book.rb

   1
   2 require 'nokogiri'
   3 require 'zip'
   4
   5 require 'author'
   6 require 'cover'
   7 require 'store'
   8
   9 class Book
  10   @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
  11
  12   def initialize(store)
  13     @author = nil
  14     @cover = nil
  15     @description = nil
  16     @path = nil
  17     @series_id = nil
  18     @store = store
  19     @title = nil
  20     @volume = nil
  21   end
  22
  23   def load_from_file(fileName)
  24     @path = fileName
  25     parse_file_name!(fileName)
  26   end
  27
  28   def self.can_handle?(fileName)
  29     if nil == fileName
  30       return false
  31     end
  32
  33     #puts "Filename:  " + fileName.to_s
  34     lowerName = fileName.downcase()
  35
  36     if lowerName.end_with?(".epub")
  37       return true
  38     end
  39
  40     if lowerName.end_with?(".pdf")
  41       return true
  42     end
  43
  44     return false
  45   end
  46
  47   def author
  48     return @author
  49   end
  50
  51   def author=(value)
  52     @author = value
  53   end
  54
  55   def cover
  56     return @cover
  57   end
  58
  59   def cover=(value)
  60     @cover = value
  61   end
  62
  63   def description
  64     @description
  65   end
  66
  67   def description=(value)
  68     @description = value
  69   end
  70
  71   def heading
  72     result = []
  73
  74     if nil != @title
  75       result.push('<b>' + @title + '</b>')
  76     else
  77       result.push('<i>(Unknown title)</i>')
  78     end
  79     if nil != @author
  80       result.push('<i>by ' + @author.reading_order + '</i>')
  81     end
  82
  83     seriesInfo = []
  84     series = @store.load_series(@series_id)
  85     if nil != series and nil != series.descr
  86       seriesInfo.push(series.descr.to_s)
  87     end
  88     if nil != @volume
  89       seriesInfo.push(@volume.to_s)
  90     end
  91     if seriesInfo.length > 0
  92       result.push(seriesInfo.join(' '))
  93     end
  94
  95     return result.join('<br/>')
  96   end
  97
  98   def inspect
  99     data = []
 100     if nil != @author
 101       data.push('author="' + @author.inspect + '"')
 102     end
 103     if nil != @series_id
 104       data.push('series_id="' + @series_id.to_s() + '"')
 105     end
 106     if nil != @volume
 107       data.push('volume="' + @volume + '"')
 108     end
 109     if nil != @title
 110       data.push('title="' + @title + '"')
 111     end
 112     if nil != @cover
 113       data.push(@cover.inspect())
 114     end
 115     if nil != @path
 116       data.push('path="' + @path + '"')
 117     end
 118     return '(Book:' + data.join(',') + ')'
 119   end
 120
 121   def path
 122     @path
 123   end
 124
 125   def path=(value)
 126     @path = value
 127   end
 128
 129   def series_id
 130     @series_id
 131   end
 132
 133   def series_id=(value)
 134     @series_id = value
 135   end
 136
 137   def to_s
 138     return inspect()
 139   end
 140
 141   def title
 142     @title
 143   end
 144
 145   def title=(value)
 146     @title = value
 147   end
 148
 149   def volume
 150     @volume
 151   end
 152
 153   def volume=(value)
 154     @volume = value
 155   end
 156
 157   protected
 158   def isUpper?(c)
 159     return /[[:upper:]]/.match(c)
 160   end
 161
 162   protected
 163   def massage_author(input)
 164     if nil == input
 165       return nil
 166     end
 167
 168     reading_order = ""
 169     input.each_char do |c|
 170       if isUpper?(c) and (reading_order.length > 0)
 171         reading_order += " "
 172      end
 173       reading_order += c
 174     end
 175
 176     return reading_order
 177   end
 178
 179   # Returns (series, volumeNo, titleText)
 180   protected
 181   def processTitle(input)
 182     if nil == input
 183       return nil
 184     end
 185
 186     arr = input.split('_')
 187
 188     series = nil
 189     vol = nil
 190
 191     first = arr[0]
 192     matchData = (arr[0]).match(/^([A-Z]+)([0-9]+)$/)
 193     if nil != matchData
 194       capt = matchData.captures
 195       series = capt[0]
 196       vol = capt[1]
 197       arr.shift
 198     end
 199
 200     pos = arr[-1].rindex('.')
 201     if nil != pos
 202       arr[-1] = arr[-1].slice(0, pos)
 203     end
 204
 205     title = arr.join(' ')
 206
 207     return series, vol, title
 208   end
 209
 210   protected
 211   def parse_file_name!(file_name)
 212     parts = file_name.split('/')
 213     (series_code, @volume, @title) = processTitle(parts[-1])
 214     if parts.length > 1
 215       grouping = parts[-2]
 216       reading_order = massage_author(grouping)
 217       sort_order = nil
 218       @author = Author.new(grouping, reading_order, sort_order)
 219       @series_id = @store.get_series(grouping, series_code)
 220     end
 221
 222     lc_file_name = file_name.downcase
 223     if lc_file_name.end_with?(".epub")
 224       scanEpub!(file_name)
 225     elsif lc_file_name.end_with?(".pdf")
 226       scan_pdf!(file_name)
 227     end
 228   end
 229
 230   protected
 231   def scanEpub!(fileName)
 232     #puts 'Scanning "' + fileName.to_s + '"...'
 233     begin
 234       Zip::File.open(fileName) do |zipfile|
 235         entry = zipfile.find_entry('META-INF/container.xml')
 236         if nil == entry
 237           puts 'No META-INF/container.xml, skipping book ' + fileName
 238           return
 239         end
 240         contXml = zipfile.read('META-INF/container.xml')
 241         contDoc = Nokogiri::XML(contXml)
 242         opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
 243
 244         scanOpf!(zipfile, opfPath)
 245       end
 246     rescue Zip::Error => exc
 247       puts 'ERROR processing file "' + fileName + '":'
 248       puts exc.message
 249       puts exc.backtrace
 250     end
 251   end
 252
 253   protected
 254   def scan_pdf!(file_name)
 255     #puts 'Scanning "' + file_name.to_s + '"...'
 256
 257     pdf_path = File.expand_path(file_name).to_s
 258     if ! pdf_path.end_with?('.pdf')
 259       puts 'Unexpected internal error:  path "' + file_name.to_s + '" does not end with ".pdf".'
 260       return
 261     end
 262
 263     jpeg_path = pdf_path[0..-5] + '.jpeg'
 264     if File.file?(jpeg_path)
 265       File.open(jpeg_path, 'r') do |is|
 266         @cover = Cover.new(is, jpeg_path, 'image/jpeg')
 267       end
 268     end
 269   end
 270
 271
 272   protected
 273   def scanOpf!(zipfile, opfPath)
 274     coverId = nil
 275
 276     opfXml = zipfile.read(opfPath)
 277     opfDoc = Nokogiri::XML(opfXml)
 278
 279     #-------
 280     # Author
 281
 282     grouping = @author.grouping
 283     reading_order = @author.reading_order
 284     sort_order = @author.sort_order
 285
 286     creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
 287     if (creators.length > 0)
 288       creator = creators[0]
 289       if nil != creator
 290         role = creator['opf:role']
 291         if 'aut' == role
 292           reading_order = creator.content
 293
 294           file_as = creator['opf:file-as']
 295           if nil != file_as
 296             sort_order = file_as
 297           end
 298         end
 299
 300         @author = Author.new(grouping, reading_order, sort_order)
 301       end
 302     end
 303
 304     #---------------------------------------
 305     # Title
 306
 307     titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
 308     if titles.length > 0
 309       title = titles[0]
 310       if nil != title
 311         @title = title.content
 312       end
 313     end
 314
 315     #---------------------------------------
 316     # Description
 317
 318     descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
 319     if (descrNodes.length > 0)
 320       descrNode = descrNodes[0]
 321       if nil != descrNode
 322         @description = descrNode.content
 323       end
 324     end
 325
 326     #---------------------------------------
 327     # Other metadata:  series, volume, cover
 328
 329     metas = opfDoc.css('package metadata meta')
 330     for m in metas
 331       name = m['name']
 332       content = m['content']
 333
 334       if 'calibre:series' == name
 335         # TODO:  Dynamically create a new series?
 336         # @series_id = content
 337       elsif 'calibre:series-index' == name
 338         @volume = content
 339       elsif 'cover' == name
 340         coverId = content
 341         #puts 'File ' + @path + ' coverId ' + coverId
 342       end
 343     end
 344
 345     #---------------
 346     # Load the cover
 347
 348     @cover = load_cover(zipfile, opfPath, opfDoc, coverId)
 349   end
 350
 351   protected
 352   def load_cover(zipfile, opfPath, opfDoc, coverId)
 353     coverFile = nil
 354     if nil == coverId
 355       coverId = "cover-image"
 356     end
 357
 358     items = opfDoc.css('package manifest item')
 359     for i in items
 360       href = i['href']
 361       id = i['id']
 362       mimeType = i['media-type']
 363
 364       if coverId == id
 365         entry = zipfile.find_entry(href)
 366
 367         if nil == entry
 368           # Although the epub standard requires the path to be relative
 369           # to the base of the epub (zip), some books encountered in the
 370           # wild have been found to use a bath relative to the location
 371           # of the opf file.
 372           parts = opfPath.split('/')
 373           opfBasePath = opfPath.split('/')[0..-2].join('/')
 374           coverPath = opfBasePath + '/' + href
 375           entry = zipfile.find_entry(coverPath)
 376         end
 377
 378         if nil == entry
 379           puts 'WARNING!  Cover image "' + href + '" not found in file "' + @path + '".'
 380           return nil
 381         else
 382           entry.get_input_stream() do |is|
 383             return Cover.new(is, href, mimeType)
 384           end
 385         end
 386       end
 387     end
 388     return nil
 389   end
 390 end
 391