From: Chris Jaekl Date: Mon, 8 Jul 2019 23:45:48 +0000 (-0400) Subject: Handle a few extra edge cases when parsing epubs. X-Git-Url: https://jaekl.net/gitweb/?a=commitdiff_plain;h=530ad9b3f9bc70d7dd8c80ba5b9f35e98d8cc03d;p=quanlib.git Handle a few extra edge cases when parsing epubs. Also, remove trailing white space. --- diff --git a/book.rb b/book.rb index 6d90c0e..cd14ab6 100644 --- a/book.rb +++ b/book.rb @@ -90,7 +90,7 @@ class Book if nil != @author result.push('by ' + @author.reading_order + '') end - + seriesInfo = [] series = @store.load_series(@series_id) if nil != series and nil != series.descr @@ -153,7 +153,7 @@ class Book def series_id @series_id end - + def series_id=(value) @series_id = value end @@ -270,14 +270,15 @@ class Book if ('01_nonfic' == category) && (nil == classification_id) open(Store.unclassified_csv, 'a') do |fd| fd.puts('"' + grouping.to_s + '","' + path + '"') - end + end end end - protected + protected def scanEpub!(fileName) #puts 'Scanning "' + fileName.to_s + '"...' begin + Zip.warn_invalid_date = false Zip::File.open(fileName) do |zipfile| entry = zipfile.find_entry('META-INF/container.xml') if nil == entry @@ -361,7 +362,7 @@ class Book #--------------------------------------- # Description - + descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL) if (descrNodes.length > 0) descrNode = descrNodes[0] @@ -412,9 +413,9 @@ class Book entry = zipfile.find_entry(href) if nil == entry - # Although the epub standard requires the path to be relative + # Although the epub standard requires the path to be relative # to the base of the epub (zip), some books encountered in the - # wild have been found to use a bath relative to the location + # wild have been found to use a bath relative to the location # of the opf file. parts = opfPath.split('/') opfBasePath = opfPath.split('/')[0..-2].join('/') @@ -422,6 +423,14 @@ class Book entry = zipfile.find_entry(coverPath) end + unless entry + # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg' + if href.start_with? '../' + coverPath = href[3..-1] + entry = zipfile.find_entry(coverPath) + end + end + if nil == entry puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".' return nil