Improve HTML formatting. Handle more variants of EPUB.

author Chris Jaekl <cejaekl@yahoo.com>

Sat, 25 Feb 2017 11:42:52 +0000 (20:42 +0900)

committer Chris Jaekl <cejaekl@yahoo.com>

Sat, 25 Feb 2017 11:42:52 +0000 (20:42 +0900)
author Chris Jaekl <cejaekl@yahoo.com>
Sat, 25 Feb 2017 11:42:52 +0000 (20:42 +0900)
committer Chris Jaekl <cejaekl@yahoo.com>
Sat, 25 Feb 2017 11:42:52 +0000 (20:42 +0900)
diff --git a/book.rb b/book.rb

index 370093da92964413102b286792ab27a4e8ed6415..f59b61a66ea44d17a86e7d999e34496b84fbea99 100644 (file)
--- a/book.rb
+++ b/book.rb
@@ -22,6 +22,7 @@ class Book
        return false
      end
  
+    #puts "Filename:  " + fileName.to_s
      lowerName = fileName.downcase()
  
      if lowerName.end_with?(".epub")
@@ -84,6 +85,10 @@ class Book
      return '(Book:' + data.join(',') + ')'
    end
  
+  def path
+    @path
+  end
+
    def to_s
      return inspect()
    end
@@ -156,13 +161,23 @@ class Book
  
    protected 
    def scanEpub!(fileName)
-    puts 'Scanning "' + fileName.to_s + '"...'
-    Zip::File.open(fileName) do |zipfile|
-      contXml = zipfile.read('META-INF/container.xml')
-      contDoc = Nokogiri::XML(contXml)
-      opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
+    #puts 'Scanning "' + fileName.to_s + '"...'
+    begin
+      Zip::File.open(fileName) do |zipfile|
+        entry = zipfile.find_entry('META-INF/container.xml')
+        if nil == entry
+          return
+        end
+        contXml = zipfile.read('META-INF/container.xml')
+        contDoc = Nokogiri::XML(contXml)
+        opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
  
-      scanOpf!(zipfile, opfPath)
+        scanOpf!(zipfile, opfPath)
+      end
+    rescue Zip::Error => exc
+      puts 'ERROR processing file "' + fileName + '":'
+      puts exc.message
+      puts exc.backtrace
      end
    end
  
@@ -177,11 +192,11 @@ class Book
      # Author
  
      creator = opfDoc.css('dc|creator', 'dc' => 'http://purl.org/dc/elements/1.1/')
-    if nil != creator
+    if (nil != creator) and (creator.length > 0)
        roleNode = creator.attr('role')
        if nil != roleNode
          role = roleNode.value
-        if 'aut' == role
+        if ('aut' == role) and (creator.children.length > 0) and (nil != creator.children[0])
            name = creator.children[0].content
            parts = name.split(' ')
            if parts.length > 1
@@ -215,22 +230,47 @@ class Book
      #---------------
      # Load the cover
  
+    @cover = loadCover(zipfile, opfPath, opfDoc, coverId)
+  end
+
+  protected
+  def loadCover(zipfile, opfPath, opfDoc, coverId)
      coverFile = nil
-    if nil != coverId
-      items = opfDoc.css('package manifest item')
-      for i in items
-        href = i['href']
-        id = i['id']
-        mimeType = i['media-type']
-
-        if coverId == id
-          entry = zipfile.find_entry(href)
+    if nil == coverId
+      coverId = "cover-image"
+    end
+
+    items = opfDoc.css('package manifest item')
+    for i in items
+      href = i['href']
+      id = i['id']
+      mimeType = i['media-type']
+
+      if coverId == id
+        entry = zipfile.find_entry(href)
+
+        if nil == entry
+          # Although the epub standard requires the path to be relative 
+          # to the base of the epub (zip), some books encountered in the
+          # wild have been found to use a bath relative to the location 
+          # of the opf file.
+          parts = opfPath.split('/')
+          opfBasePath = opfPath.split('/')[0..-2].join('/')
+          coverPath = opfBasePath + '/' + href
+          entry = zipfile.find_entry(coverPath)
+        end
+
+        if nil == entry
+          puts 'WARNING!  Cover image "' + href + '" not found in file "' + @path + '".'
+          return nil
+        else
            entry.get_input_stream() do |is|
-            @cover = Cover.new(is, href, mimeType)
+            return Cover.new(is, href, mimeType)
            end
          end
        end
      end
+    return nil
    end
  end
  
diff --git a/main.rb b/main.rb

index 4e73b07c0b490eca6eef199f504055406bfb907d..8617ab0146577146d30e6ec73c21c7856ab03a1c 100644 (file)
--- a/main.rb
+++ b/main.rb
@@ -15,22 +15,34 @@ if ! Dir.exist?(outputDir)
  end
  
  open(outputDir + '/index.html', 'w') do |fd|
-  fd.puts "<html>"
-  fd.puts "  <head><title>Books</title></head>"
-  fd.puts "  <body>"
-  fd.puts "    <table>"
+  fd.puts '<html>'
+  fd.puts '  <head>'
+  fd.puts '    <meta charset="utf-8"/>'
+  fd.puts '    <title>Books</title>'
+  fd.puts '    <style>'
+  fd.puts 'div { '
+  fd.puts '  display: inline-block;'
+  fd.puts '  width: 400px;'
+  fd.puts '  margin: 10px;'
+  fd.puts '  border 3px solid #73ad21;'
+  fd.puts '}'
+  fd.puts '    </style>'
+  fd.puts '  </head>'
+  fd.puts '  <body>'
    
    for book in books
      image = nil
      if nil != book.cover
        imageCount += 1
        (path, mimeType) = book.cover.writeImage(outputDir, 'image' + imageCount.to_s)
-      image = '<img src="' + path + '"/>'
+      image = '<img height="200px" src="' + path + '"/>'
      else
        image = '(No cover image)'
      end
  
-    fd.puts "      <tr><td>" + image + "</td><td>" + book.describe() + "</td></tr>"
+    fd.puts '    <div><table>'
+    fd.puts '      <tr><td><a href="' + book.path + '">' + image + '</a></td><td>' + book.describe() + '</td></tr>'
+    fd.puts '    </table></div>'
    end
    
    fd.puts "    </table>"
diff --git a/walkdir.rb b/walkdir.rb

index df73e4e9f393b0f9b9cf956bb50b0e87601fbb11..035752f05d48785d10db87d3b6e6bf91abe855a2 100644 (file)
--- a/walkdir.rb
+++ b/walkdir.rb
@@ -28,7 +28,7 @@ class WalkDir
  
    def books
      result = []
-    for file in @files
+    for file in @files.sort
        if Book.canHandle?(file)
          book = Book.new(file)
          result.push(book)
@@ -42,7 +42,7 @@ class WalkDir
      children = Dir.entries(path)
      for child in children
        fullName = (path.chomp("/")) + "/" + child
-      if (File.directory?(fullName)) and (child != ".") and (child != "..")
+      if (File.directory?(fullName)) and (child != ".") and (child != "..") and (!File.symlink?(fullName))
          sub = walk(fullName)
          if (sub != nil) and (sub.length > 0)
            result.concat(sub)
author	Chris Jaekl <cejaekl@yahoo.com>
	Sat, 25 Feb 2017 11:42:52 +0000 (20:42 +0900)
committer	Chris Jaekl <cejaekl@yahoo.com>
	Sat, 25 Feb 2017 11:42:52 +0000 (20:42 +0900)
book.rb		patch \| blob \| history
main.rb		patch \| blob \| history
walkdir.rb		patch \| blob \| history