book.rb

   1
   2 require 'nokogiri'
   3 require 'zip'
   4
   5 require 'author'
   6 require 'cover'
   7
   8 class Book
   9   @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
  10
  11   def initialize(fileName)
  12     @author = nil
  13     @cover = nil
  14     @description = nil
  15     @path = fileName
  16     @series = nil
  17     @title = nil
  18     @volume = nil
  19
  20     parseFileName!(fileName)
  21   end
  22
  23   def self.canHandle?(fileName)
  24     if nil == fileName
  25       return false
  26     end
  27
  28     #puts "Filename:  " + fileName.to_s
  29     lowerName = fileName.downcase()
  30
  31     if lowerName.end_with?(".epub")
  32       return true
  33     end
  34
  35     return false
  36   end
  37
  38   def cover
  39     return @cover
  40   end
  41
  42   def description
  43     @description
  44   end
  45
  46   def heading
  47     result = []
  48
  49     if nil != @title
  50       result.push('<b>' + @title + '</b>')
  51     else
  52       result.push('<i>(Unknown title)</i>')
  53     end
  54     if nil != @author
  55       result.push('<i>by ' + @author.to_s() + '</i>')
  56     end
  57
  58     seriesInfo = []
  59     if nil != @series
  60       seriesInfo.push(@series.to_s)
  61     end
  62     if nil != @volume
  63       seriesInfo.push(@volume.to_s)
  64     end
  65     if seriesInfo.length > 0
  66       result.push(seriesInfo.join(' '))
  67     end
  68
  69     return result.join('<br/>')
  70   end
  71
  72   def inspect
  73     data = []
  74     if nil != @author
  75       data.push('author="' + @author.to_s + '"')
  76     end
  77     if nil != @series
  78       data.push('series="' + @series + '"')
  79     end
  80     if nil != @volume
  81       data.push('volume="' + @volume + '"')
  82     end
  83     if nil != @title
  84       data.push('title="' + @title + '"')
  85     end
  86     if nil != @cover
  87       data.push(@cover.inspect())
  88     end
  89     if nil != @path
  90       data.push('path="' + @path + '"')
  91     end
  92     return '(Book:' + data.join(',') + ')'
  93   end
  94
  95   def path
  96     @path
  97   end
  98
  99   def to_s
 100     return inspect()
 101   end
 102
 103   protected
 104   def isUpper?(c)
 105     return /[[:upper:]]/.match(c)
 106   end
 107
 108   protected
 109   def massageAuthor(input)
 110     if nil == input
 111       return nil
 112     end
 113
 114     result = ""
 115     input.each_char do |c|
 116       if isUpper?(c) and (result.length > 0)
 117         result += " "
 118       end
 119       result += c
 120     end
 121
 122     return result
 123   end
 124
 125   # Returns (series, volumeNo, titleText)
 126   protected
 127   def processTitle(input)
 128     if nil == input
 129       return nil
 130     end
 131
 132     arr = input.split('_')
 133
 134     series = nil
 135     vol = nil
 136
 137     first = arr[0]
 138     matchData = (arr[0]).match(/^([A-Z]+)([0-9]+)$/)
 139     if nil != matchData
 140       capt = matchData.captures
 141       series = capt[0]
 142       vol = capt[1]
 143       arr.shift
 144     end
 145
 146     pos = arr[-1].rindex('.')
 147     if nil != pos
 148       arr[-1] = arr[-1].slice(0, pos)
 149     end
 150
 151     title = arr.join(' ')
 152
 153     return series, vol, title
 154   end
 155
 156   protected
 157   def parseFileName!(fileName)
 158     parts = fileName.split('/')
 159     (@series, @volume, @title) = processTitle(parts[-1])
 160     if parts.length > 1
 161       @author = massageAuthor(parts[-2])
 162     end
 163
 164     if fileName.downcase.end_with?(".epub")
 165       scanEpub!(fileName)
 166     end
 167   end
 168
 169   protected
 170   def scanEpub!(fileName)
 171     #puts 'Scanning "' + fileName.to_s + '"...'
 172     begin
 173       Zip::File.open(fileName) do |zipfile|
 174         entry = zipfile.find_entry('META-INF/container.xml')
 175         if nil == entry
 176           return
 177         end
 178         contXml = zipfile.read('META-INF/container.xml')
 179         contDoc = Nokogiri::XML(contXml)
 180         opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
 181
 182         scanOpf!(zipfile, opfPath)
 183       end
 184     rescue Zip::Error => exc
 185       puts 'ERROR processing file "' + fileName + '":'
 186       puts exc.message
 187       puts exc.backtrace
 188     end
 189   end
 190
 191   protected
 192   def scanOpf!(zipfile, opfPath)
 193     coverId = nil
 194
 195     opfXml = zipfile.read(opfPath)
 196     opfDoc = Nokogiri::XML(opfXml)
 197
 198     #-------
 199     # Author
 200
 201     creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
 202     if (creators.length > 0)
 203       creator = creators[0]
 204       if nil != creator
 205         role = creator['opf:role']
 206         if 'aut' == role
 207           name = creator.content
 208           parts = name.split(' ')
 209           if parts.length > 1
 210             surname = parts[-1]
 211             givenNames = parts[0..-2].join(' ')
 212             @author = Author.new(surname, givenNames)
 213           else
 214             @author = Author.new(name, '')
 215           end
 216         end
 217       end
 218     end
 219
 220     #---------------------------------------
 221     # Title
 222
 223     titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
 224     if titles.length > 0
 225       title = titles[0]
 226       if nil != title
 227         @title = title.content
 228       end
 229     end
 230
 231     #---------------------------------------
 232     # Description
 233
 234     descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
 235     if (descrNodes.length > 0)
 236       descrNode = descrNodes[0]
 237       if nil != descrNode
 238         @description = descrNode.content
 239       end
 240     end
 241
 242     #---------------------------------------
 243     # Other metadata:  series, volume, cover
 244
 245     metas = opfDoc.css('package metadata meta')
 246     for m in metas
 247       name = m['name']
 248       content = m['content']
 249
 250       if 'calibre:series' == name
 251         @series = content
 252       elsif 'calibre:series-index' == name
 253         @volume = content
 254       elsif 'cover' == name
 255         coverId = content
 256       end
 257     end
 258
 259     #---------------
 260     # Load the cover
 261
 262     @cover = loadCover(zipfile, opfPath, opfDoc, coverId)
 263   end
 264
 265   protected
 266   def loadCover(zipfile, opfPath, opfDoc, coverId)
 267     coverFile = nil
 268     if nil == coverId
 269       coverId = "cover-image"
 270     end
 271
 272     items = opfDoc.css('package manifest item')
 273     for i in items
 274       href = i['href']
 275       id = i['id']
 276       mimeType = i['media-type']
 277
 278       if coverId == id
 279         entry = zipfile.find_entry(href)
 280
 281         if nil == entry
 282           # Although the epub standard requires the path to be relative
 283           # to the base of the epub (zip), some books encountered in the
 284           # wild have been found to use a bath relative to the location
 285           # of the opf file.
 286           parts = opfPath.split('/')
 287           opfBasePath = opfPath.split('/')[0..-2].join('/')
 288           coverPath = opfBasePath + '/' + href
 289           entry = zipfile.find_entry(coverPath)
 290         end
 291
 292         if nil == entry
 293           puts 'WARNING!  Cover image "' + href + '" not found in file "' + @path + '".'
 294           return nil
 295         else
 296           entry.get_input_stream() do |is|
 297             return Cover.new(is, href, mimeType)
 298           end
 299         end
 300       end
 301     end
 302     return nil
 303   end
 304 end
 305