+
+ protected
+ def scanEpub!(fileName)
+ #puts 'Scanning "' + fileName.to_s + '"...'
+ begin
+ Zip.warn_invalid_date = false
+ Zip::File.open(fileName) do |zipfile|
+ entry = zipfile.find_entry('META-INF/container.xml')
+ if nil == entry
+ puts 'No META-INF/container.xml, skipping book ' + fileName
+ return
+ end
+ contXml = zipfile.read('META-INF/container.xml')
+ contDoc = Nokogiri::XML(contXml)
+ opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
+
+ scanOpf!(zipfile, opfPath)
+ end
+ rescue Zip::Error => exc
+ puts 'ERROR processing file "' + fileName + '":'
+ puts exc.message
+ puts exc.backtrace
+ end
+ end
+
+ protected
+ def scan_pdf!(file_name)
+ #puts 'Scanning "' + file_name.to_s + '"...'
+
+ pdf_path = File.expand_path(file_name).to_s
+ if ! pdf_path.end_with?('.pdf')
+ puts 'Unexpected internal error: path "' + file_name.to_s + '" does not end with ".pdf".'
+ return
+ end
+
+ jpeg_path = pdf_path[0..-5] + '.jpeg'
+ if File.file?(jpeg_path)
+ File.open(jpeg_path, 'r') do |is|
+ @cover = Cover.new(is, jpeg_path, 'image/jpeg')
+ end
+ end
+ end
+
+
+ protected
+ def scanOpf!(zipfile, opfPath)
+ coverId = nil
+
+ opfXml = zipfile.read(opfPath)
+ opfDoc = Nokogiri::XML(opfXml)
+
+ #-------
+ # Author
+
+ grouping = @author.grouping
+ reading_order = @author.reading_order
+ sort_order = @author.sort_order
+
+ creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
+ if (creators.length > 0)
+ creator = creators[0]
+ if nil != creator
+ role = creator['opf:role']
+ if 'aut' == role
+ reading_order = creator.content
+
+ file_as = creator['opf:file-as']
+ if nil != file_as
+ sort_order = file_as
+ end
+ end
+
+ @author = Author.new(grouping, reading_order, sort_order)
+ end
+ end
+
+ #---------------------------------------
+ # Title
+
+ titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
+ if titles.length > 0
+ title = titles[0]
+ if nil != title
+ @title = title.content
+ end
+ end
+
+ #---------------------------------------
+ # Description
+
+ descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
+ if (descrNodes.length > 0)
+ descrNode = descrNodes[0]
+ if nil != descrNode
+ @description = descrNode.content
+ end
+ end
+
+ #---------------------------------------
+ # Language
+
+ langNodes = opfDoc.css('dc|language', 'dc' => @@DC_NS_URL)
+ if (langNodes.length > 0)
+ langNode = langNodes[0]
+ if langNode
+ @language = langNode.content
+ end
+ end
+
+ #---------------------------------------
+ # Other metadata: series, volume, cover
+
+ metas = opfDoc.css('package metadata meta')
+ for m in metas
+ name = m['name']
+ content = m['content']
+
+ if 'calibre:series' == name
+ # TODO: Dynamically create a new series?
+ # @series_id = content
+ elsif 'calibre:series-index' == name
+ @volume = content
+ elsif 'cover' == name
+ coverId = content
+ #puts 'File ' + @path + ' coverId ' + coverId
+ end
+ end
+
+ #---------------
+ # Load the cover
+
+ @cover = load_cover(zipfile, opfPath, opfDoc, coverId)
+ end
+
+ protected
+ def load_cover(zipfile, opfPath, opfDoc, coverId)
+ coverFile = nil
+ if nil == coverId
+ coverId = "cover-image"
+ end
+
+ items = opfDoc.css('package manifest item')
+ for i in items
+ href = i['href']
+ id = i['id']
+ mimeType = i['media-type']
+
+ if coverId == id
+ entry = zipfile.find_entry(href)
+
+ if nil == entry
+ # Although the epub standard requires the path to be relative
+ # to the base of the epub (zip), some books encountered in the
+ # wild have been found to use a bath relative to the location
+ # of the opf file.
+ parts = opfPath.split('/')
+ opfBasePath = opfPath.split('/')[0..-2].join('/')
+ coverPath = opfBasePath + '/' + href
+ entry = zipfile.find_entry(coverPath)
+ end
+
+ unless entry
+ # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg'
+ if href.start_with? '../'
+ coverPath = href[3..-1]
+ entry = zipfile.find_entry(coverPath)
+ end
+ end
+
+ if nil == entry
+ puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".'
+ return nil
+ else
+ entry.get_input_stream() do |is|
+ return Cover.new(is, href, mimeType)
+ end
+ end
+ end
+ end
+ return nil
+ end