Stores book metadata to PostgreSQL database.
authorChris Jaekl <cejaekl@yahoo.com>
Mon, 15 May 2017 12:22:06 +0000 (21:22 +0900)
committerChris Jaekl <cejaekl@yahoo.com>
Mon, 15 May 2017 12:22:06 +0000 (21:22 +0900)
author.rb
book.rb
main.rb
store.rb [new file with mode: 0644]
store_test.rb [new file with mode: 0644]
walkdir.rb

index a0a95e89f340fe4c996a540947e2432202011be1..fb2003bf43b87bbd3a81c566065d0e84347a12cb 100644 (file)
--- a/author.rb
+++ b/author.rb
@@ -1,15 +1,57 @@
 
 class Author
-  def initialize(surname, givenNames)
-    @surname = surname
-    @givenNames = givenNames
+  def initialize(grouping, reading_order, sort_order)
+    @grouping = grouping
+    @reading_order = reading_order
+    @sort_order = sort_order
+
+    if (nil == sort_order) || ('Unknown' == sort_order)
+      @sort_order = reading_to_sort_order(reading_order)
+    end
+  end
+
+  def grouping
+    @grouping
+  end
+
+  def reading_order
+    @reading_order
+  end
+
+  def sort_order
+    @sort_order
   end
 
   def inspect
-    @givenNames + ' ' + @surname
+    result = '(Author:'
+    if nil != @grouping
+      result += ' grouping="' + @grouping + '"'
+    end
+    if nil != @reading_order
+      result += ' reading_order="' + @reading_order + '"'
+    end
+    if nil != @sort_order
+      result += ' sort_order="' + @sort_order + '"'
+    end
+    result += ')'
+
+    return result
   end
 
   def to_s
     inspect
   end
+
+  protected
+  def reading_to_sort_order(reading_order)
+    sort_order = reading_order
+
+    parts = reading_order.split(' ')
+    if parts.length > 1
+      sort_order = parts[-1] + ', ' + parts[0..-2].join(' ')
+    end
+
+    return sort_order
+  end
 end
+
diff --git a/book.rb b/book.rb
index 3cf513957c0a2cc4173266857c95952b8cc0bb14..d7a2a26e24d27d6d99cc3df50acb1d75818b6397 100644 (file)
--- a/book.rb
+++ b/book.rb
@@ -9,6 +9,7 @@ class Book
   @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
 
   def initialize(fileName)
+    #puts 'InitBook(' + fileName + ')'
     @author = nil
     @cover = nil
     @description = nil
@@ -35,6 +36,10 @@ class Book
     return false
   end
 
+  def author
+    return @author
+  end
+
   def cover
     return @cover
   end
@@ -72,7 +77,7 @@ class Book
   def inspect
     data = []
     if nil != @author
-      data.push('author="' + @author.to_s + '"')
+      data.push('author="' + @author.inspect + '"')
     end
     if nil != @series
       data.push('series="' + @series + '"')
@@ -96,30 +101,42 @@ class Book
     @path
   end
 
+  def series
+    @series
+  end
+
   def to_s
     return inspect()
   end
 
+  def title
+    @title
+  end
+
+  def volume
+    @volume
+  end
+
   protected
   def isUpper?(c)
     return /[[:upper:]]/.match(c)
   end
 
   protected
-  def massageAuthor(input)
+  def massage_author(input)
     if nil == input
       return nil
     end
 
-    result = ""
+    reading_order = ""
     input.each_char do |c|
-      if isUpper?(c) and (result.length > 0)
-        result += " "
-      end
-      result += c
+      if isUpper?(c) and (reading_order.length > 0)
+        reading_order += " "
+     end
+      reading_order += c
     end
-    
-    return result
+
+    return reading_order
   end
 
   # Returns (series, volumeNo, titleText)
@@ -158,7 +175,10 @@ class Book
     parts = fileName.split('/')
     (@series, @volume, @title) = processTitle(parts[-1])
     if parts.length > 1
-      @author = massageAuthor(parts[-2])
+      grouping = parts[-2]
+      reading_order = massage_author(grouping)
+      sort_order = nil
+      @author = Author.new(grouping, reading_order, sort_order)
     end
 
     if fileName.downcase.end_with?(".epub")
@@ -173,6 +193,7 @@ class Book
       Zip::File.open(fileName) do |zipfile|
         entry = zipfile.find_entry('META-INF/container.xml')
         if nil == entry
+          puts 'No META-INF/container.xml, skipping book ' + fileName
           return
         end
         contXml = zipfile.read('META-INF/container.xml')
@@ -198,22 +219,25 @@ class Book
     #-------
     # Author
 
+    grouping = @author.grouping
+    reading_order = @author.reading_order
+    sort_order = @author.sort_order
+
     creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
     if (creators.length > 0)
       creator = creators[0]
       if nil != creator
         role = creator['opf:role']
         if 'aut' == role
-          name = creator.content
-          parts = name.split(' ')
-          if parts.length > 1
-            surname = parts[-1]
-            givenNames = parts[0..-2].join(' ')
-            @author = Author.new(surname, givenNames)
-          else
-            @author = Author.new(name, '')
+          reading_order = creator.content
+
+          file_as = creator['opf:file-as']
+          if nil != file_as
+            sort_order = file_as
           end
         end
+
+        @author = Author.new(grouping, reading_order, sort_order)
       end
     end
 
@@ -253,6 +277,7 @@ class Book
         @volume = content
       elsif 'cover' == name
         coverId = content
+        #puts 'File ' + @path + ' coverId ' + coverId
       end
     end
 
diff --git a/main.rb b/main.rb
index 2d7f68b25c33c6bf45dbb0e909e17e3146f53e5b..bc175fbf51ae33205a37735d1390872a3ea630e1 100644 (file)
--- a/main.rb
+++ b/main.rb
@@ -1,3 +1,4 @@
+require 'store'
 require 'walkdir'
 
 outputDir = 'output'
@@ -5,11 +6,34 @@ outputDir = 'output'
 books = []
 imageCount = 0
 
+def handleArg(arg)
+  if "--purge" == arg
+    puts 'Purging database...'
+    @store.dropSchema()
+  elsif arg.start_with?("--")
+    abort('ERROR:  Unrecognized option "' + arg + '".')
+  end
+end
+
+@store = Store.new()
+@store.connect()
+
+for arg in ARGV
+  handleArg(arg)
+end
+
+@store.init_db()
+
 for arg in ARGV
-  w = WalkDir.new(arg)
-  books += (w.books)
+  if ! arg.start_with?("--")
+    puts 'Scanning directory "' + arg + '"...'
+    w = WalkDir.new(@store, arg)
+    books += (w.books)
+  end
 end
 
+puts 'Creating output...'
+
 if ! Dir.exist?(outputDir)
   Dir.mkdir(outputDir)
 end
@@ -65,3 +89,4 @@ open(outputDir + '/index.html', 'w') do |fd|
   fd.puts "</html>"
 end
 
+@store.disconnect()
diff --git a/store.rb b/store.rb
new file mode 100644 (file)
index 0000000..0b7b476
--- /dev/null
+++ b/store.rb
@@ -0,0 +1,190 @@
+
+require 'fileutils'
+require 'pg'
+
+class Store
+  def initialize
+    @basepath = '/home/chris/prog/quanlib/efs' # TODO: FIXME: configure this in a sane way
+    @conn = nil
+
+    #@dburl = 'dbi:Pg:quanlib:localhost'
+    @dbhost = "localhost"
+    @dbport = 5432
+    @dbname = 'quanlib'
+    @dbuser = 'quanlib'
+    @dbpass = 'quanlib'
+  end
+
+  def connect
+    # @conn = PGconn.connect('localhost', 5432, '', '', 'quanlib', 'quanlib', 'quanlib')
+    @conn = PG.connect('localhost', 5432, '', '', 'quanlib', 'quanlib', 'quanlib')
+    return @conn
+  end
+
+  def disconnect
+    @conn.close()
+  end
+
+  def construct_efs_path(efs_id)
+    id_str = sprintf('%010d', efs_id)
+    path = sprintf('%s/%s/%s/%s', id_str[0,2], id_str[2,2], id_str[4,2], id_str[6,2])
+    name = id_str + '.dat'
+    return path, name
+  end
+
+  def create_schema
+    create_authors = 
+<<EOS
+      CREATE TABLE Authors (
+        id          SERIAL PRIMARY KEY,
+        grouping    VARCHAR(64),
+        reading     VARCHAR(128),
+        sort        VARCHAR(128)
+      );
+EOS
+
+    create_books = 
+<<EOS
+      CREATE TABLE Books (
+        id          SERIAL PRIMARY KEY,
+        author      INTEGER REFERENCES Authors(id),
+        cover       INTEGER,
+        description TEXT,
+        path        VARCHAR(256),
+        series      VARCHAR(128),
+        title       VARCHAR(196),
+        volume      VARCHAR(16)
+      );
+EOS
+
+    create_efs = 
+<<EOS
+      CREATE TABLE EFS (
+        id          INTEGER,
+        mimetype    VARCHAR(64)
+      );
+EOS
+
+    stmts = [
+      create_authors,
+      create_books,
+      create_efs,
+      'CREATE SEQUENCE efs_id;'
+    ]
+
+    for stmt in stmts
+      @conn.exec(stmt)
+    end
+  end
+
+  def dropSchema
+    stmts = [
+      'DROP TABLE Books;',
+      'DROP TABLE Authors;',
+      'DROP TABLE EFS;',
+      'DROP SEQUENCE efs_id;'
+    ]
+
+    for stmt in stmts do
+      @conn.exec(stmt)
+    end
+  end
+
+  def init_db
+    sql = "SELECT 1 FROM pg_tables WHERE tableowner='quanlib' AND tablename='books'"
+    found = false
+    @conn.exec(sql).each do |row|
+      found = true
+    end
+
+    if ! found
+      create_schema()
+    end
+  end
+
+  def find_author(author)
+    sqlSelect = "SELECT id FROM Authors WHERE grouping=$1 AND reading=$2 AND sort=$3;"
+    args = [author.grouping, author.reading_order, author.sort_order]
+    @conn.exec_params(sqlSelect, args) do |rs|
+      if rs.ntuples > 0
+        return rs[0]['id']
+      end
+    end
+    return nil
+  end
+
+  def store_author(author)
+    id = find_author(author)
+    if nil == id
+      sqlInsert = "INSERT INTO Authors(grouping, reading, sort) VALUES ($1, $2, $3);"
+      args = [author.grouping, author.reading_order, author.sort_order]
+      begin 
+        rs = @conn.exec_params(sqlInsert, args)
+      rescue Exception => e
+        puts sqlInsert + ":  " + args.inspect()
+        puts e.message
+        puts $@
+      ensure
+        rs.clear if rs
+      end
+    end
+    return find_author(author)
+  end
+
+  def store_book(book)
+    sql = "INSERT INTO Books (author, cover, description, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7);"
+
+    author_id = store_author(book.author)
+    (efs_id, mime_type) = store_cover(book)
+
+    args = [author_id, efs_id, book.description(), book.path(), book.series(), book.title(), book.volume()]
+
+    begin
+      rs = @conn.exec_params(sql, args)
+    rescue Exception => e
+      puts sql + ": " + args.inspect()
+      puts e.message 
+      puts $@
+    ensure
+      rs.clear if rs
+    end
+  end
+
+  def store_cover(book)
+    efs_id = nil
+    cover = book.cover()
+
+    if nil == cover
+      return nil
+    end
+
+    @conn.exec("SELECT nextval('efs_id')") do |rs|
+      efs_id = rs[0]['nextval']
+    end
+
+    if nil == efs_id
+      return nil
+    end
+
+    (efspath, efsname) = construct_efs_path(efs_id)
+
+    efspath = @basepath + '/' + efspath
+
+    FileUtils.mkdir_p(efspath)
+
+    (filepath, mimetype) = cover.writeImage(efspath, efsname)
+
+    sql = "INSERT INTO efs VALUES ($1, $2)"
+    begin
+      rs = @conn.exec_params(sql, [efs_id, mimetype])
+    rescue Exception => e
+      puts e.message
+      puts $@
+    ensure
+      rs.clear if rs
+    end
+    
+    return efs_id, mimetype
+  end
+end
+
diff --git a/store_test.rb b/store_test.rb
new file mode 100644 (file)
index 0000000..301be76
--- /dev/null
@@ -0,0 +1,21 @@
+
+require 'minitest/autorun'
+require 'store'
+
+class StoreTest < MiniTest::Test
+  def test_construct_efs_path
+    data = [ [      1234, '00/00/00/12', '0000001234.dat'],
+             [         1, '00/00/00/00', '0000000001.dat'],
+            [1234567890, '12/34/56/78', '1234567890.dat'],
+            [   7778123, '00/07/77/81', '0007778123.dat'],
+            [      0x1b, '00/00/00/00', '0000000027.dat'] ]
+
+    store = Store.new()
+
+    for (input, expectedPath, expectedName) in data
+      (actualPath, actualName) = store.construct_efs_path(input)
+      assert_equal(expectedPath, actualPath)
+      assert_equal(expectedName, actualName)
+    end
+  end
+end
index 035752f05d48785d10db87d3b6e6bf91abe855a2..5d5bf84ec90804e5c4b4be4ff30bdb69ab62a741 100644 (file)
 # .../DorothyGilman/P06_On_the_China_Station.epub.
 
 require 'book'
+require 'store'
 
 class WalkDir
-  def initialize(root)
+  def initialize(store, root)
     @root = root
+    @store = store
     @files = walk(@root)
   end
 
@@ -31,6 +33,7 @@ class WalkDir
     for file in @files.sort
       if Book.canHandle?(file)
         book = Book.new(file)
+        @store.store_book(book)
         result.push(book)
       end
     end
@@ -51,6 +54,7 @@ class WalkDir
         result.push(fullName)
       end
     end
+    #puts result
     return result
   end
 end