require "nokogiri"
require "zip"
+require "scanner/opf"
require "scanner/scanner"
module Scanner
puts "No META-INF/container.xml, skipping book #{filename.inspect}"
return nil
end
- cont_xml = zipfile.read("META-INF/container.xml")
- cont_doc = Nokogiri::XML(cont_xml)
- opf_path = cont_doc.css("container rootfiles rootfile")[0]["full-path"]
-
book = scan_base_attributes(filename)
- scan_opf(book, zipfile, opf_path)
+ Opf.new(book).scan_opf(zipfile, opf_path(zipfile))
end
rescue Zip::Error => e
puts "ERROR processing file #{filename.inspect}:"
private
- DC_NS_URL = "http://purl.org/dc/elements/1.1/"
- private_constant :DC_NS_URL
-
- def scan_opf(book, zipfile, opf_path)
- cover_id = nil
-
- opf_xml = zipfile.read(opf_path)
- opf_doc = Nokogiri::XML(opf_xml)
-
- #-------
- # Author
-
- grouping = book.author.grouping
- reading_order = book.author.reading_order
- sort_order = book.author.sort_order
-
- creators = opf_doc.css("dc|creator", "dc" => DC_NS_URL)
- unless creators.empty?
- creator = creators[0]
-
- return if creator.nil?
-
- role = creator["opf:role"]
- if role == "aut"
- reading_order = creator.content
-
- file_as = creator["opf:file-as"]
- sort_order = file_as unless file_as.nil?
- end
-
- book.author = Author.new(grouping, reading_order, sort_order)
- end
-
- #---------------------------------------
- # Title
-
- titles = opf_doc.css("dc|title", "dc" => DC_NS_URL)
- unless titles.empty?
- title = titles[0]
- book.title = title.content unless title.nil?
- end
-
- #---------------------------------------
- # Description
-
- descr_nodes = opf_doc.css("dc|description", "dc" => DC_NS_URL)
- unless descr_nodes.empty?
- descr_node = descr_nodes[0]
- book.description = descr_node.content unless descr_node.nil?
- end
-
- #---------------------------------------
- # Language
-
- lang_nodes = opf_doc.css("dc|language", "dc" => DC_NS_URL)
- unless lang_nodes.empty?
- lang_node = lang_nodes[0]
- book.language = lang_node.content if lang_node
- end
-
- #---------------------------------------
- # Other metadata: series, volume, cover
-
- metas = opf_doc.css("package metadata meta")
- metas.each do |m|
- name = m["name"]
- content = m["content"]
-
- case name
- when "calibre:series"
- # TODO: Dynamically create a new series?
- # @series_id = content
- when "calibre:series-index"
- book.volume = content
- when "cover"
- cover_id = content
- end
- end
-
- #---------------
- # Load the cover
-
- book.cover = load_cover(zipfile, opf_path, opf_doc, cover_id)
-
- book
- end
-
- def load_cover(zipfile, opf_path, opf_doc, cover_id)
- cover_id = "cover-image" if cover_id.nil?
-
- items = opf_doc.css("package manifest item")
- items.each do |i|
- href = i["href"]
- id = i["id"]
- mime_type = i["media-type"]
-
- next unless cover_id == id
-
- entry = zipfile.find_entry(href)
-
- if entry.nil?
- # Although the epub standard requires the path to be relative
- # to the base of the epub (zip), some books encountered in the
- # wild have been found to use a bath relative to the location
- # of the opf file.
- parts = opf_path.split("/")
- opf_base_path = parts[0..-2].join("/")
- cover_path = "#{opf_base_path}/#{href}"
- entry = zipfile.find_entry(cover_path)
- end
-
- if !entry && href.start_with?("../")
- # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg'
- cover_path = href[3..]
- entry = zipfile.find_entry(cover_path)
- end
-
- if entry.nil?
- puts "WARNING! Cover image #{href.inspect} not found in file #{@book.path.inspect}."
- return nil
- else
- entry.get_input_stream do |is|
- return Cover.new(is, href, mime_type)
- end
- end
- end
-
- nil
+ def opf_path(zipfile)
+ cont_xml = zipfile.read("META-INF/container.xml")
+ cont_doc = Nokogiri::XML(cont_xml)
+ cont_doc.css("container rootfiles rootfile")[0]["full-path"]
end
end
end