X-Git-Url: http://jaekl.net/gitweb/?a=blobdiff_plain;f=book.rb;h=d7a2a26e24d27d6d99cc3df50acb1d75818b6397;hb=291671015d4e88cbb7e7f3995109a88ee993aac6;hp=814d890a382c9895978746f0df1bcf012216f504;hpb=c49d5c3ea36b85b8628d99c68da464d79556176d;p=quanlib.git diff --git a/book.rb b/book.rb index 814d890..d7a2a26 100644 --- a/book.rb +++ b/book.rb @@ -1,10 +1,18 @@ -require './author.rb' +require 'nokogiri' +require 'zip' + +require 'author' +require 'cover' class Book + @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/' + def initialize(fileName) + #puts 'InitBook(' + fileName + ')' @author = nil @cover = nil + @description = nil @path = fileName @series = nil @title = nil @@ -18,6 +26,7 @@ class Book return false end + #puts "Filename: " + fileName.to_s lowerName = fileName.downcase() if lowerName.end_with?(".epub") @@ -27,10 +36,48 @@ class Book return false end + def author + return @author + end + + def cover + return @cover + end + + def description + @description + end + + def heading + result = [] + + if nil != @title + result.push('' + @title + '') + else + result.push('(Unknown title)') + end + if nil != @author + result.push('by ' + @author.to_s() + '') + end + + seriesInfo = [] + if nil != @series + seriesInfo.push(@series.to_s) + end + if nil != @volume + seriesInfo.push(@volume.to_s) + end + if seriesInfo.length > 0 + result.push(seriesInfo.join(' ')) + end + + return result.join('
') + end + def inspect data = [] if nil != @author - data.push('author="' + @author + '"') + data.push('author="' + @author.inspect + '"') end if nil != @series data.push('series="' + @series + '"') @@ -41,36 +88,55 @@ class Book if nil != @title data.push('title="' + @title + '"') end + if nil != @cover + data.push(@cover.inspect()) + end if nil != @path data.push('path="' + @path + '"') end return '(Book:' + data.join(',') + ')' end + def path + @path + end + + def series + @series + end + def to_s return inspect() end + def title + @title + end + + def volume + @volume + end + protected def isUpper?(c) return /[[:upper:]]/.match(c) end protected - def massageAuthor(input) + def massage_author(input) if nil == input return nil end - result = "" + reading_order = "" input.each_char do |c| - if isUpper?(c) and (result.length > 0) - result += " " - end - result += c + if isUpper?(c) and (reading_order.length > 0) + reading_order += " " + end + reading_order += c end - - return result + + return reading_order end # Returns (series, volumeNo, titleText) @@ -109,7 +175,156 @@ class Book parts = fileName.split('/') (@series, @volume, @title) = processTitle(parts[-1]) if parts.length > 1 - @author = massageAuthor(parts[-2]) + grouping = parts[-2] + reading_order = massage_author(grouping) + sort_order = nil + @author = Author.new(grouping, reading_order, sort_order) + end + + if fileName.downcase.end_with?(".epub") + scanEpub!(fileName) + end + end + + protected + def scanEpub!(fileName) + #puts 'Scanning "' + fileName.to_s + '"...' + begin + Zip::File.open(fileName) do |zipfile| + entry = zipfile.find_entry('META-INF/container.xml') + if nil == entry + puts 'No META-INF/container.xml, skipping book ' + fileName + return + end + contXml = zipfile.read('META-INF/container.xml') + contDoc = Nokogiri::XML(contXml) + opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path'] + + scanOpf!(zipfile, opfPath) + end + rescue Zip::Error => exc + puts 'ERROR processing file "' + fileName + '":' + puts exc.message + puts exc.backtrace end end + + protected + def scanOpf!(zipfile, opfPath) + coverId = nil + + opfXml = zipfile.read(opfPath) + opfDoc = Nokogiri::XML(opfXml) + + #------- + # Author + + grouping = @author.grouping + reading_order = @author.reading_order + sort_order = @author.sort_order + + creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL) + if (creators.length > 0) + creator = creators[0] + if nil != creator + role = creator['opf:role'] + if 'aut' == role + reading_order = creator.content + + file_as = creator['opf:file-as'] + if nil != file_as + sort_order = file_as + end + end + + @author = Author.new(grouping, reading_order, sort_order) + end + end + + #--------------------------------------- + # Title + + titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL) + if titles.length > 0 + title = titles[0] + if nil != title + @title = title.content + end + end + + #--------------------------------------- + # Description + + descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL) + if (descrNodes.length > 0) + descrNode = descrNodes[0] + if nil != descrNode + @description = descrNode.content + end + end + + #--------------------------------------- + # Other metadata: series, volume, cover + + metas = opfDoc.css('package metadata meta') + for m in metas + name = m['name'] + content = m['content'] + + if 'calibre:series' == name + @series = content + elsif 'calibre:series-index' == name + @volume = content + elsif 'cover' == name + coverId = content + #puts 'File ' + @path + ' coverId ' + coverId + end + end + + #--------------- + # Load the cover + + @cover = loadCover(zipfile, opfPath, opfDoc, coverId) + end + + protected + def loadCover(zipfile, opfPath, opfDoc, coverId) + coverFile = nil + if nil == coverId + coverId = "cover-image" + end + + items = opfDoc.css('package manifest item') + for i in items + href = i['href'] + id = i['id'] + mimeType = i['media-type'] + + if coverId == id + entry = zipfile.find_entry(href) + + if nil == entry + # Although the epub standard requires the path to be relative + # to the base of the epub (zip), some books encountered in the + # wild have been found to use a bath relative to the location + # of the opf file. + parts = opfPath.split('/') + opfBasePath = opfPath.split('/')[0..-2].join('/') + coverPath = opfBasePath + '/' + href + entry = zipfile.find_entry(coverPath) + end + + if nil == entry + puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".' + return nil + else + entry.get_input_stream() do |is| + return Cover.new(is, href, mimeType) + end + end + end + end + return nil + end end +