From e9d890ae4d346ce3abe93a8db94d3a3ddf9819d9 Mon Sep 17 00:00:00 2001 From: Chris Jaekl Date: Sat, 25 Feb 2017 20:42:52 +0900 Subject: [PATCH] Improve HTML formatting. Handle more variants of EPUB. --- book.rb | 76 +++++++++++++++++++++++++++++++++++++++++------------- main.rb | 24 ++++++++++++----- walkdir.rb | 4 +-- 3 files changed, 78 insertions(+), 26 deletions(-) diff --git a/book.rb b/book.rb index 370093d..f59b61a 100644 --- a/book.rb +++ b/book.rb @@ -22,6 +22,7 @@ class Book return false end + #puts "Filename: " + fileName.to_s lowerName = fileName.downcase() if lowerName.end_with?(".epub") @@ -84,6 +85,10 @@ class Book return '(Book:' + data.join(',') + ')' end + def path + @path + end + def to_s return inspect() end @@ -156,13 +161,23 @@ class Book protected def scanEpub!(fileName) - puts 'Scanning "' + fileName.to_s + '"...' - Zip::File.open(fileName) do |zipfile| - contXml = zipfile.read('META-INF/container.xml') - contDoc = Nokogiri::XML(contXml) - opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path'] + #puts 'Scanning "' + fileName.to_s + '"...' + begin + Zip::File.open(fileName) do |zipfile| + entry = zipfile.find_entry('META-INF/container.xml') + if nil == entry + return + end + contXml = zipfile.read('META-INF/container.xml') + contDoc = Nokogiri::XML(contXml) + opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path'] - scanOpf!(zipfile, opfPath) + scanOpf!(zipfile, opfPath) + end + rescue Zip::Error => exc + puts 'ERROR processing file "' + fileName + '":' + puts exc.message + puts exc.backtrace end end @@ -177,11 +192,11 @@ class Book # Author creator = opfDoc.css('dc|creator', 'dc' => 'http://purl.org/dc/elements/1.1/') - if nil != creator + if (nil != creator) and (creator.length > 0) roleNode = creator.attr('role') if nil != roleNode role = roleNode.value - if 'aut' == role + if ('aut' == role) and (creator.children.length > 0) and (nil != creator.children[0]) name = creator.children[0].content parts = name.split(' ') if parts.length > 1 @@ -215,22 +230,47 @@ class Book #--------------- # Load the cover + @cover = loadCover(zipfile, opfPath, opfDoc, coverId) + end + + protected + def loadCover(zipfile, opfPath, opfDoc, coverId) coverFile = nil - if nil != coverId - items = opfDoc.css('package manifest item') - for i in items - href = i['href'] - id = i['id'] - mimeType = i['media-type'] - - if coverId == id - entry = zipfile.find_entry(href) + if nil == coverId + coverId = "cover-image" + end + + items = opfDoc.css('package manifest item') + for i in items + href = i['href'] + id = i['id'] + mimeType = i['media-type'] + + if coverId == id + entry = zipfile.find_entry(href) + + if nil == entry + # Although the epub standard requires the path to be relative + # to the base of the epub (zip), some books encountered in the + # wild have been found to use a bath relative to the location + # of the opf file. + parts = opfPath.split('/') + opfBasePath = opfPath.split('/')[0..-2].join('/') + coverPath = opfBasePath + '/' + href + entry = zipfile.find_entry(coverPath) + end + + if nil == entry + puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".' + return nil + else entry.get_input_stream() do |is| - @cover = Cover.new(is, href, mimeType) + return Cover.new(is, href, mimeType) end end end end + return nil end end diff --git a/main.rb b/main.rb index 4e73b07..8617ab0 100644 --- a/main.rb +++ b/main.rb @@ -15,22 +15,34 @@ if ! Dir.exist?(outputDir) end open(outputDir + '/index.html', 'w') do |fd| - fd.puts "" - fd.puts " Books" - fd.puts " " - fd.puts " " + fd.puts '' + fd.puts ' ' + fd.puts ' ' + fd.puts ' Books' + fd.puts ' ' + fd.puts ' ' + fd.puts ' ' for book in books image = nil if nil != book.cover imageCount += 1 (path, mimeType) = book.cover.writeImage(outputDir, 'image' + imageCount.to_s) - image = '' + image = '' else image = '(No cover image)' end - fd.puts " " + fd.puts '
" + image + "" + book.describe() + "
' + fd.puts ' ' + fd.puts '
' + image + '' + book.describe() + '
' end fd.puts " " diff --git a/walkdir.rb b/walkdir.rb index df73e4e..035752f 100644 --- a/walkdir.rb +++ b/walkdir.rb @@ -28,7 +28,7 @@ class WalkDir def books result = [] - for file in @files + for file in @files.sort if Book.canHandle?(file) book = Book.new(file) result.push(book) @@ -42,7 +42,7 @@ class WalkDir children = Dir.entries(path) for child in children fullName = (path.chomp("/")) + "/" + child - if (File.directory?(fullName)) and (child != ".") and (child != "..") + if (File.directory?(fullName)) and (child != ".") and (child != "..") and (!File.symlink?(fullName)) sub = walk(fullName) if (sub != nil) and (sub.length > 0) result.concat(sub) -- 2.39.2