From 787e9d26e094cbc2a8dab89d15926663ae659a3c Mon Sep 17 00:00:00 2001 From: Chris Jaekl Date: Wed, 26 Jun 2024 22:19:56 -0400 Subject: [PATCH] Refactor `Scanner::Opf` to reduce complexity --- app/scanner/epub.rb | 139 ++------------------------------------------ app/tconn.rb | 4 +- 2 files changed, 8 insertions(+), 135 deletions(-) diff --git a/app/scanner/epub.rb b/app/scanner/epub.rb index bfc1e0d..119f854 100644 --- a/app/scanner/epub.rb +++ b/app/scanner/epub.rb @@ -3,6 +3,7 @@ require "nokogiri" require "zip" +require "scanner/opf" require "scanner/scanner" module Scanner @@ -20,12 +21,8 @@ module Scanner puts "No META-INF/container.xml, skipping book #{filename.inspect}" return nil end - cont_xml = zipfile.read("META-INF/container.xml") - cont_doc = Nokogiri::XML(cont_xml) - opf_path = cont_doc.css("container rootfiles rootfile")[0]["full-path"] - book = scan_base_attributes(filename) - scan_opf(book, zipfile, opf_path) + Opf.new(book).scan_opf(zipfile, opf_path(zipfile)) end rescue Zip::Error => e puts "ERROR processing file #{filename.inspect}:" @@ -35,134 +32,10 @@ module Scanner private - DC_NS_URL = "http://purl.org/dc/elements/1.1/" - private_constant :DC_NS_URL - - def scan_opf(book, zipfile, opf_path) - cover_id = nil - - opf_xml = zipfile.read(opf_path) - opf_doc = Nokogiri::XML(opf_xml) - - #------- - # Author - - grouping = book.author.grouping - reading_order = book.author.reading_order - sort_order = book.author.sort_order - - creators = opf_doc.css("dc|creator", "dc" => DC_NS_URL) - unless creators.empty? - creator = creators[0] - - return if creator.nil? - - role = creator["opf:role"] - if role == "aut" - reading_order = creator.content - - file_as = creator["opf:file-as"] - sort_order = file_as unless file_as.nil? - end - - book.author = Author.new(grouping, reading_order, sort_order) - end - - #--------------------------------------- - # Title - - titles = opf_doc.css("dc|title", "dc" => DC_NS_URL) - unless titles.empty? - title = titles[0] - book.title = title.content unless title.nil? - end - - #--------------------------------------- - # Description - - descr_nodes = opf_doc.css("dc|description", "dc" => DC_NS_URL) - unless descr_nodes.empty? - descr_node = descr_nodes[0] - book.description = descr_node.content unless descr_node.nil? - end - - #--------------------------------------- - # Language - - lang_nodes = opf_doc.css("dc|language", "dc" => DC_NS_URL) - unless lang_nodes.empty? - lang_node = lang_nodes[0] - book.language = lang_node.content if lang_node - end - - #--------------------------------------- - # Other metadata: series, volume, cover - - metas = opf_doc.css("package metadata meta") - metas.each do |m| - name = m["name"] - content = m["content"] - - case name - when "calibre:series" - # TODO: Dynamically create a new series? - # @series_id = content - when "calibre:series-index" - book.volume = content - when "cover" - cover_id = content - end - end - - #--------------- - # Load the cover - - book.cover = load_cover(zipfile, opf_path, opf_doc, cover_id) - - book - end - - def load_cover(zipfile, opf_path, opf_doc, cover_id) - cover_id = "cover-image" if cover_id.nil? - - items = opf_doc.css("package manifest item") - items.each do |i| - href = i["href"] - id = i["id"] - mime_type = i["media-type"] - - next unless cover_id == id - - entry = zipfile.find_entry(href) - - if entry.nil? - # Although the epub standard requires the path to be relative - # to the base of the epub (zip), some books encountered in the - # wild have been found to use a bath relative to the location - # of the opf file. - parts = opf_path.split("/") - opf_base_path = parts[0..-2].join("/") - cover_path = "#{opf_base_path}/#{href}" - entry = zipfile.find_entry(cover_path) - end - - if !entry && href.start_with?("../") - # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg' - cover_path = href[3..] - entry = zipfile.find_entry(cover_path) - end - - if entry.nil? - puts "WARNING! Cover image #{href.inspect} not found in file #{@book.path.inspect}." - return nil - else - entry.get_input_stream do |is| - return Cover.new(is, href, mime_type) - end - end - end - - nil + def opf_path(zipfile) + cont_xml = zipfile.read("META-INF/container.xml") + cont_doc = Nokogiri::XML(cont_xml) + cont_doc.css("container rootfiles rootfile")[0]["full-path"] end end end diff --git a/app/tconn.rb b/app/tconn.rb index eb1faf0..437f301 100644 --- a/app/tconn.rb +++ b/app/tconn.rb @@ -24,7 +24,7 @@ class TimedConn end end - def exec(*args, &block) + def exec(*args) before = Time.now result = @conn.exec(*args) after = Time.now @@ -37,7 +37,7 @@ class TimedConn yield(result) end - def exec_params(*args, &block) + def exec_params(*args) before = Time.now result = @conn.exec_params(*args) after = Time.now -- 2.39.2