book.rb

   1
   2 require 'nokogiri'
   3 require 'zip'
   4
   5 require 'author'
   6 require 'cover'
   7
   8 class Book
   9   @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
  10
  11   def initialize
  12     @author = nil
  13     @cover = nil
  14     @description = nil
  15     @path = nil
  16     @series = nil
  17     @title = nil
  18     @volume = nil
  19   end
  20
  21   def loadFromFile(fileName)
  22     @path = fileName
  23     parseFileName!(fileName)
  24   end
  25
  26   def self.canHandle?(fileName)
  27     if nil == fileName
  28       return false
  29     end
  30
  31     #puts "Filename:  " + fileName.to_s
  32     lowerName = fileName.downcase()
  33
  34     if lowerName.end_with?(".epub")
  35       return true
  36     end
  37
  38     return false
  39   end
  40
  41   def author
  42     return @author
  43   end
  44
  45   def cover
  46     return @cover
  47   end
  48
  49   def description
  50     @description
  51   end
  52
  53   def heading
  54     result = []
  55
  56     if nil != @title
  57       result.push('<b>' + @title + '</b>')
  58     else
  59       result.push('<i>(Unknown title)</i>')
  60     end
  61     if nil != @author
  62       result.push('<i>by ' + @author.reading_order + '</i>')
  63     end
  64
  65     seriesInfo = []
  66     if nil != @series
  67       seriesInfo.push(@series.to_s)
  68     end
  69     if nil != @volume
  70       seriesInfo.push(@volume.to_s)
  71     end
  72     if seriesInfo.length > 0
  73       result.push(seriesInfo.join(' '))
  74     end
  75
  76     return result.join('<br/>')
  77   end
  78
  79   def inspect
  80     data = []
  81     if nil != @author
  82       data.push('author="' + @author.inspect + '"')
  83     end
  84     if nil != @series
  85       data.push('series="' + @series + '"')
  86     end
  87     if nil != @volume
  88       data.push('volume="' + @volume + '"')
  89     end
  90     if nil != @title
  91       data.push('title="' + @title + '"')
  92     end
  93     if nil != @cover
  94       data.push(@cover.inspect())
  95     end
  96     if nil != @path
  97       data.push('path="' + @path + '"')
  98     end
  99     return '(Book:' + data.join(',') + ')'
 100   end
 101
 102   def path
 103     @path
 104   end
 105
 106   def series
 107     @series
 108   end
 109
 110   def to_s
 111     return inspect()
 112   end
 113
 114   def title
 115     @title
 116   end
 117
 118   def volume
 119     @volume
 120   end
 121
 122   protected
 123   def isUpper?(c)
 124     return /[[:upper:]]/.match(c)
 125   end
 126
 127   protected
 128   def massage_author(input)
 129     if nil == input
 130       return nil
 131     end
 132
 133     reading_order = ""
 134     input.each_char do |c|
 135       if isUpper?(c) and (reading_order.length > 0)
 136         reading_order += " "
 137      end
 138       reading_order += c
 139     end
 140
 141     return reading_order
 142   end
 143
 144   # Returns (series, volumeNo, titleText)
 145   protected
 146   def processTitle(input)
 147     if nil == input
 148       return nil
 149     end
 150
 151     arr = input.split('_')
 152
 153     series = nil
 154     vol = nil
 155
 156     first = arr[0]
 157     matchData = (arr[0]).match(/^([A-Z]+)([0-9]+)$/)
 158     if nil != matchData
 159       capt = matchData.captures
 160       series = capt[0]
 161       vol = capt[1]
 162       arr.shift
 163     end
 164
 165     pos = arr[-1].rindex('.')
 166     if nil != pos
 167       arr[-1] = arr[-1].slice(0, pos)
 168     end
 169
 170     title = arr.join(' ')
 171
 172     return series, vol, title
 173   end
 174
 175   protected
 176   def parseFileName!(fileName)
 177     parts = fileName.split('/')
 178     (@series, @volume, @title) = processTitle(parts[-1])
 179     if parts.length > 1
 180       grouping = parts[-2]
 181       reading_order = massage_author(grouping)
 182       sort_order = nil
 183       @author = Author.new(grouping, reading_order, sort_order)
 184     end
 185
 186     if fileName.downcase.end_with?(".epub")
 187       scanEpub!(fileName)
 188     end
 189   end
 190
 191   protected
 192   def scanEpub!(fileName)
 193     #puts 'Scanning "' + fileName.to_s + '"...'
 194     begin
 195       Zip::File.open(fileName) do |zipfile|
 196         entry = zipfile.find_entry('META-INF/container.xml')
 197         if nil == entry
 198           puts 'No META-INF/container.xml, skipping book ' + fileName
 199           return
 200         end
 201         contXml = zipfile.read('META-INF/container.xml')
 202         contDoc = Nokogiri::XML(contXml)
 203         opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
 204
 205         scanOpf!(zipfile, opfPath)
 206       end
 207     rescue Zip::Error => exc
 208       puts 'ERROR processing file "' + fileName + '":'
 209       puts exc.message
 210       puts exc.backtrace
 211     end
 212   end
 213
 214   protected
 215   def scanOpf!(zipfile, opfPath)
 216     coverId = nil
 217
 218     opfXml = zipfile.read(opfPath)
 219     opfDoc = Nokogiri::XML(opfXml)
 220
 221     #-------
 222     # Author
 223
 224     grouping = @author.grouping
 225     reading_order = @author.reading_order
 226     sort_order = @author.sort_order
 227
 228     creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
 229     if (creators.length > 0)
 230       creator = creators[0]
 231       if nil != creator
 232         role = creator['opf:role']
 233         if 'aut' == role
 234           reading_order = creator.content
 235
 236           file_as = creator['opf:file-as']
 237           if nil != file_as
 238             sort_order = file_as
 239           end
 240         end
 241
 242         @author = Author.new(grouping, reading_order, sort_order)
 243       end
 244     end
 245
 246     #---------------------------------------
 247     # Title
 248
 249     titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
 250     if titles.length > 0
 251       title = titles[0]
 252       if nil != title
 253         @title = title.content
 254       end
 255     end
 256
 257     #---------------------------------------
 258     # Description
 259
 260     descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
 261     if (descrNodes.length > 0)
 262       descrNode = descrNodes[0]
 263       if nil != descrNode
 264         @description = descrNode.content
 265       end
 266     end
 267
 268     #---------------------------------------
 269     # Other metadata:  series, volume, cover
 270
 271     metas = opfDoc.css('package metadata meta')
 272     for m in metas
 273       name = m['name']
 274       content = m['content']
 275
 276       if 'calibre:series' == name
 277         @series = content
 278       elsif 'calibre:series-index' == name
 279         @volume = content
 280       elsif 'cover' == name
 281         coverId = content
 282         #puts 'File ' + @path + ' coverId ' + coverId
 283       end
 284     end
 285
 286     #---------------
 287     # Load the cover
 288
 289     @cover = loadCover(zipfile, opfPath, opfDoc, coverId)
 290   end
 291
 292   protected
 293   def loadCover(zipfile, opfPath, opfDoc, coverId)
 294     coverFile = nil
 295     if nil == coverId
 296       coverId = "cover-image"
 297     end
 298
 299     items = opfDoc.css('package manifest item')
 300     for i in items
 301       href = i['href']
 302       id = i['id']
 303       mimeType = i['media-type']
 304
 305       if coverId == id
 306         entry = zipfile.find_entry(href)
 307
 308         if nil == entry
 309           # Although the epub standard requires the path to be relative
 310           # to the base of the epub (zip), some books encountered in the
 311           # wild have been found to use a bath relative to the location
 312           # of the opf file.
 313           parts = opfPath.split('/')
 314           opfBasePath = opfPath.split('/')[0..-2].join('/')
 315           coverPath = opfBasePath + '/' + href
 316           entry = zipfile.find_entry(coverPath)
 317         end
 318
 319         if nil == entry
 320           puts 'WARNING!  Cover image "' + href + '" not found in file "' + @path + '".'
 321           return nil
 322         else
 323           entry.get_input_stream() do |is|
 324             return Cover.new(is, href, mimeType)
 325           end
 326         end
 327       end
 328     end
 329     return nil
 330   end
 331 end
 332