Parse .epub files to extract metadata and cover image.
authorChris Jaekl <cejaekl@yahoo.com>
Fri, 24 Feb 2017 14:23:31 +0000 (23:23 +0900)
committerChris Jaekl <cejaekl@yahoo.com>
Fri, 24 Feb 2017 14:23:31 +0000 (23:23 +0900)
author.rb
book.rb
main.rb
walkdir.rb

index 6bfe610495b6e7a090499141ba890b7e5ec17208..a0a95e89f340fe4c996a540947e2432202011be1 100644 (file)
--- a/author.rb
+++ b/author.rb
@@ -4,4 +4,12 @@ class Author
     @surname = surname
     @givenNames = givenNames
   end
+
+  def inspect
+    @givenNames + ' ' + @surname
+  end
+
+  def to_s
+    inspect
+  end
 end
diff --git a/book.rb b/book.rb
index 814d890a382c9895978746f0df1bcf012216f504..72bb83ad1170f77b29e58279eac85cb32c505146 100644 (file)
--- a/book.rb
+++ b/book.rb
@@ -1,5 +1,9 @@
 
-require './author.rb'
+require 'nokogiri'
+require 'zip'
+
+require './author'
+require './cover'
 
 class Book
   def initialize(fileName)
@@ -30,7 +34,7 @@ class Book
   def inspect
     data = []
     if nil != @author
-      data.push('author="' + @author + '"')
+      data.push('author="' + @author.to_s + '"')
     end
     if nil != @series
       data.push('series="' + @series + '"')
@@ -41,6 +45,9 @@ class Book
     if nil != @title
       data.push('title="' + @title + '"')
     end
+    if nil != @cover
+      data.push(@cover.inspect())
+    end
     if nil != @path
       data.push('path="' + @path + '"')
     end
@@ -111,5 +118,88 @@ class Book
     if parts.length > 1
       @author = massageAuthor(parts[-2])
     end
+
+    if fileName.downcase.end_with?(".epub")
+      scanEpub!(fileName)
+    end
+  end
+
+  protected 
+  def scanEpub!(fileName)
+    Zip::File.open(fileName) do |zipfile|
+      contXml = zipfile.read('META-INF/container.xml')
+      contDoc = Nokogiri::XML(contXml)
+      opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
+
+      scanOpf!(zipfile, opfPath)
+    end
+  end
+
+  protected
+  def scanOpf!(zipfile, opfPath)
+    coverId = nil
+
+    opfXml = zipfile.read(opfPath)
+    opfDoc = Nokogiri::XML(opfXml)
+
+    #-------
+    # Author
+
+    creator = opfDoc.css('dc|creator', 'dc' => 'http://purl.org/dc/elements/1.1/')
+    if nil != creator
+      roleNode = creator.attr('role')
+      if nil != roleNode
+        role = roleNode.value
+        if 'aut' == role
+          name = creator.children[0].content
+          parts = name.split(' ')
+          if parts.length > 1
+            surname = parts[-1]
+            givenNames = parts[0..-2].join(' ')
+            @author = Author.new(surname, givenNames)
+          else
+            @author = Author.new(name, '')
+          end
+        end
+      end
+    end
+
+    #---------------------------------------
+    # Other metadata:  series, volume, cover
+
+    metas = opfDoc.css('package metadata meta')
+    for m in metas
+      name = m['name']
+      content = m['content']
+
+      if 'calibre:series' == name
+        @series = content
+      elsif 'calibre:series-index' == name
+        @volume = content
+      elsif 'cover' == name
+        coverId = content
+      end
+    end
+
+    #---------------
+    # Load the cover
+
+    coverFile = nil
+    if nil != coverId
+      items = opfDoc.css('package manifest item')
+      for i in items
+        href = i['href']
+        id = i['id']
+        mimeType = i['media-type']
+
+        if coverId == id
+          entry = zipfile.find_entry(href)
+          entry.get_input_stream() do |is|
+            @cover = Cover.new(is, href, mimeType)
+          end
+        end
+      end
+    end
   end
 end
+
diff --git a/main.rb b/main.rb
index 98f5a3555656ba398afcc59efad7e7ffc2f0e2a2..76f543e61cffd9019e2a1eee49b4860e7e6ebbf4 100644 (file)
--- a/main.rb
+++ b/main.rb
@@ -1,4 +1,4 @@
-require './walkdir.rb'
+require './walkdir'
 
 books = []
 
index b54d0a2b4134b970e7999752e33df546e9a4da5a..c776dbfd7e67822042dd8bd3c825bc35ac338625 100644 (file)
@@ -18,7 +18,7 @@
 # and Mrs. Pollifax volume 6, On the China Station, is
 # .../DorothyGilman/P06_On_the_China_Station.epub.
 
-require './book.rb'
+require './book'
 
 class WalkDir
   def initialize(root)