--- /dev/null
+# frozen_string_literal: true
+
+require "nokogiri"
+require "zip"
+
+require "scanner/scanner"
+
+module Scanner
+ # Scans an .epub file for information about the book it represents
+ class Opf
+ def initialize(book)
+ @book = book
+ end
+
+ def scan_opf(zipfile, opf_path)
+ @cover_id = "cover-image"
+
+ opf_xml = zipfile.read(opf_path)
+ opf_doc = Nokogiri::XML(opf_xml)
+
+ load_author!(opf_doc)
+ load_title!(opf_doc)
+ load_description!(opf_doc)
+ load_language!(opf_doc)
+ load_other_metadata!(opf_doc)
+ @book.cover = load_cover(zipfile, opf_path, opf_doc, @cover_id)
+
+ @book
+ end
+
+ private
+
+ DC_NS_URL = "http://purl.org/dc/elements/1.1/"
+ private_constant :DC_NS_URL
+
+ def find_cover_entry(zipfile, href, opf_path)
+ entry = zipfile.find_entry(href)
+
+ if entry.nil?
+ # Although the epub standard requires the path to be relative
+ # to the base of the epub (zip), some books encountered in the
+ # wild have been found to use a path relative to the location
+ # of the opf file.
+ parts = opf_path.split("/")
+ opf_base_path = parts[0..-2].join("/")
+ cover_path = "#{opf_base_path}/#{href}"
+ entry = zipfile.find_entry(cover_path)
+ end
+
+ if !entry && href.start_with?("../")
+ # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg'
+ cover_path = href[3..]
+ entry = zipfile.find_entry(cover_path)
+ end
+
+ entry
+ end
+
+ def load_author!(opf_doc)
+ grouping = @book.author.grouping
+ reading_order = @book.author.reading_order
+ sort_order = @book.author.sort_order
+
+ creators = opf_doc.css("dc|creator", "dc" => DC_NS_URL)
+
+ return if creators.empty?
+
+ creator = creators[0]
+
+ return if creator.nil?
+
+ role = creator["opf:role"]
+ if role == "aut"
+ reading_order = creator.content
+
+ file_as = creator["opf:file-as"]
+ sort_order = file_as unless file_as.nil?
+ end
+
+ @book.author = Author.new(grouping, reading_order, sort_order)
+ end
+
+ def load_cover(zipfile, opf_path, opf_doc, cover_id)
+ items = opf_doc.css("package manifest item")
+ items.each do |i|
+ href = i["href"]
+ id = i["id"]
+ mime_type = i["media-type"]
+
+ next unless cover_id == id
+
+ entry = find_cover_entry(zipfile, href, opf_path)
+
+ if entry.nil?
+ puts "WARNING! Cover image #{href.inspect} not found in file #{@book.path.inspect}."
+ return nil
+ else
+ entry.get_input_stream do |is|
+ return Cover.new(is, href, mime_type)
+ end
+ end
+ end
+
+ nil
+ end
+
+ def load_description!(opf_doc)
+ descr_nodes = opf_doc.css("dc|description", "dc" => DC_NS_URL)
+ return if descr_nodes.empty?
+
+ descr_node = descr_nodes[0]
+ @book.description = descr_node.content unless descr_node.nil?
+ end
+
+ def load_language!(opf_doc)
+ lang_nodes = opf_doc.css("dc|language", "dc" => DC_NS_URL)
+ return if lang_nodes.empty?
+
+ lang_node = lang_nodes[0]
+ @book.language = lang_node.content if lang_node
+ end
+
+ def load_other_metadata!(opf_doc)
+ # Other metadata: series, volume, cover
+
+ metas = opf_doc.css("package metadata meta")
+ metas.each do |m|
+ name = m["name"]
+ content = m["content"]
+
+ case name
+ when "calibre:series"
+ # TODO: Dynamically create a new series?
+ # @series_id = content
+ when "calibre:series-index"
+ @book.volume = content
+ when "cover"
+ @cover_id = content
+ end
+ end
+ end
+
+ def load_title!(opf_doc)
+ titles = opf_doc.css("dc|title", "dc" => DC_NS_URL)
+ return if titles.empty?
+
+ title = titles[0]
+ @book.title = title.content unless title.nil?
+ end
+ end
+end