book.rb

   1
   2 require 'nokogiri'
   3 require 'zip'
   4
   5 require 'author'
   6 require 'cover'
   7
   8 class Book
   9   @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
  10
  11   def initialize(fileName)
  12     #puts 'InitBook(' + fileName + ')'
  13     @author = nil
  14     @cover = nil
  15     @description = nil
  16     @path = fileName
  17     @series = nil
  18     @title = nil
  19     @volume = nil
  20
  21     parseFileName!(fileName)
  22   end
  23
  24   def self.canHandle?(fileName)
  25     if nil == fileName
  26       return false
  27     end
  28
  29     #puts "Filename:  " + fileName.to_s
  30     lowerName = fileName.downcase()
  31
  32     if lowerName.end_with?(".epub")
  33       return true
  34     end
  35
  36     return false
  37   end
  38
  39   def author
  40     return @author
  41   end
  42
  43   def cover
  44     return @cover
  45   end
  46
  47   def description
  48     @description
  49   end
  50
  51   def heading
  52     result = []
  53
  54     if nil != @title
  55       result.push('<b>' + @title + '</b>')
  56     else
  57       result.push('<i>(Unknown title)</i>')
  58     end
  59     if nil != @author
  60       result.push('<i>by ' + @author.to_s() + '</i>')
  61     end
  62
  63     seriesInfo = []
  64     if nil != @series
  65       seriesInfo.push(@series.to_s)
  66     end
  67     if nil != @volume
  68       seriesInfo.push(@volume.to_s)
  69     end
  70     if seriesInfo.length > 0
  71       result.push(seriesInfo.join(' '))
  72     end
  73
  74     return result.join('<br/>')
  75   end
  76
  77   def inspect
  78     data = []
  79     if nil != @author
  80       data.push('author="' + @author.inspect + '"')
  81     end
  82     if nil != @series
  83       data.push('series="' + @series + '"')
  84     end
  85     if nil != @volume
  86       data.push('volume="' + @volume + '"')
  87     end
  88     if nil != @title
  89       data.push('title="' + @title + '"')
  90     end
  91     if nil != @cover
  92       data.push(@cover.inspect())
  93     end
  94     if nil != @path
  95       data.push('path="' + @path + '"')
  96     end
  97     return '(Book:' + data.join(',') + ')'
  98   end
  99
 100   def path
 101     @path
 102   end
 103
 104   def series
 105     @series
 106   end
 107
 108   def to_s
 109     return inspect()
 110   end
 111
 112   def title
 113     @title
 114   end
 115
 116   def volume
 117     @volume
 118   end
 119
 120   protected
 121   def isUpper?(c)
 122     return /[[:upper:]]/.match(c)
 123   end
 124
 125   protected
 126   def massage_author(input)
 127     if nil == input
 128       return nil
 129     end
 130
 131     reading_order = ""
 132     input.each_char do |c|
 133       if isUpper?(c) and (reading_order.length > 0)
 134         reading_order += " "
 135      end
 136       reading_order += c
 137     end
 138
 139     return reading_order
 140   end
 141
 142   # Returns (series, volumeNo, titleText)
 143   protected
 144   def processTitle(input)
 145     if nil == input
 146       return nil
 147     end
 148
 149     arr = input.split('_')
 150
 151     series = nil
 152     vol = nil
 153
 154     first = arr[0]
 155     matchData = (arr[0]).match(/^([A-Z]+)([0-9]+)$/)
 156     if nil != matchData
 157       capt = matchData.captures
 158       series = capt[0]
 159       vol = capt[1]
 160       arr.shift
 161     end
 162
 163     pos = arr[-1].rindex('.')
 164     if nil != pos
 165       arr[-1] = arr[-1].slice(0, pos)
 166     end
 167
 168     title = arr.join(' ')
 169
 170     return series, vol, title
 171   end
 172
 173   protected
 174   def parseFileName!(fileName)
 175     parts = fileName.split('/')
 176     (@series, @volume, @title) = processTitle(parts[-1])
 177     if parts.length > 1
 178       grouping = parts[-2]
 179       reading_order = massage_author(grouping)
 180       sort_order = nil
 181       @author = Author.new(grouping, reading_order, sort_order)
 182     end
 183
 184     if fileName.downcase.end_with?(".epub")
 185       scanEpub!(fileName)
 186     end
 187   end
 188
 189   protected
 190   def scanEpub!(fileName)
 191     #puts 'Scanning "' + fileName.to_s + '"...'
 192     begin
 193       Zip::File.open(fileName) do |zipfile|
 194         entry = zipfile.find_entry('META-INF/container.xml')
 195         if nil == entry
 196           puts 'No META-INF/container.xml, skipping book ' + fileName
 197           return
 198         end
 199         contXml = zipfile.read('META-INF/container.xml')
 200         contDoc = Nokogiri::XML(contXml)
 201         opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
 202
 203         scanOpf!(zipfile, opfPath)
 204       end
 205     rescue Zip::Error => exc
 206       puts 'ERROR processing file "' + fileName + '":'
 207       puts exc.message
 208       puts exc.backtrace
 209     end
 210   end
 211
 212   protected
 213   def scanOpf!(zipfile, opfPath)
 214     coverId = nil
 215
 216     opfXml = zipfile.read(opfPath)
 217     opfDoc = Nokogiri::XML(opfXml)
 218
 219     #-------
 220     # Author
 221
 222     grouping = @author.grouping
 223     reading_order = @author.reading_order
 224     sort_order = @author.sort_order
 225
 226     creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
 227     if (creators.length > 0)
 228       creator = creators[0]
 229       if nil != creator
 230         role = creator['opf:role']
 231         if 'aut' == role
 232           reading_order = creator.content
 233
 234           file_as = creator['opf:file-as']
 235           if nil != file_as
 236             sort_order = file_as
 237           end
 238         end
 239
 240         @author = Author.new(grouping, reading_order, sort_order)
 241       end
 242     end
 243
 244     #---------------------------------------
 245     # Title
 246
 247     titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
 248     if titles.length > 0
 249       title = titles[0]
 250       if nil != title
 251         @title = title.content
 252       end
 253     end
 254
 255     #---------------------------------------
 256     # Description
 257
 258     descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
 259     if (descrNodes.length > 0)
 260       descrNode = descrNodes[0]
 261       if nil != descrNode
 262         @description = descrNode.content
 263       end
 264     end
 265
 266     #---------------------------------------
 267     # Other metadata:  series, volume, cover
 268
 269     metas = opfDoc.css('package metadata meta')
 270     for m in metas
 271       name = m['name']
 272       content = m['content']
 273
 274       if 'calibre:series' == name
 275         @series = content
 276       elsif 'calibre:series-index' == name
 277         @volume = content
 278       elsif 'cover' == name
 279         coverId = content
 280         #puts 'File ' + @path + ' coverId ' + coverId
 281       end
 282     end
 283
 284     #---------------
 285     # Load the cover
 286
 287     @cover = loadCover(zipfile, opfPath, opfDoc, coverId)
 288   end
 289
 290   protected
 291   def loadCover(zipfile, opfPath, opfDoc, coverId)
 292     coverFile = nil
 293     if nil == coverId
 294       coverId = "cover-image"
 295     end
 296
 297     items = opfDoc.css('package manifest item')
 298     for i in items
 299       href = i['href']
 300       id = i['id']
 301       mimeType = i['media-type']
 302
 303       if coverId == id
 304         entry = zipfile.find_entry(href)
 305
 306         if nil == entry
 307           # Although the epub standard requires the path to be relative
 308           # to the base of the epub (zip), some books encountered in the
 309           # wild have been found to use a bath relative to the location
 310           # of the opf file.
 311           parts = opfPath.split('/')
 312           opfBasePath = opfPath.split('/')[0..-2].join('/')
 313           coverPath = opfBasePath + '/' + href
 314           entry = zipfile.find_entry(coverPath)
 315         end
 316
 317         if nil == entry
 318           puts 'WARNING!  Cover image "' + href + '" not found in file "' + @path + '".'
 319           return nil
 320         else
 321           entry.get_input_stream() do |is|
 322             return Cover.new(is, href, mimeType)
 323           end
 324         end
 325       end
 326     end
 327     return nil
 328   end
 329 end
 330