From: Chris Jaekl Date: Mon, 15 May 2017 12:22:06 +0000 (+0900) Subject: Stores book metadata to PostgreSQL database. X-Git-Url: https://jaekl.net/gitweb/?a=commitdiff_plain;h=291671015d4e88cbb7e7f3995109a88ee993aac6;p=quanlib.git Stores book metadata to PostgreSQL database. --- diff --git a/author.rb b/author.rb index a0a95e8..fb2003b 100644 --- a/author.rb +++ b/author.rb @@ -1,15 +1,57 @@ class Author - def initialize(surname, givenNames) - @surname = surname - @givenNames = givenNames + def initialize(grouping, reading_order, sort_order) + @grouping = grouping + @reading_order = reading_order + @sort_order = sort_order + + if (nil == sort_order) || ('Unknown' == sort_order) + @sort_order = reading_to_sort_order(reading_order) + end + end + + def grouping + @grouping + end + + def reading_order + @reading_order + end + + def sort_order + @sort_order end def inspect - @givenNames + ' ' + @surname + result = '(Author:' + if nil != @grouping + result += ' grouping="' + @grouping + '"' + end + if nil != @reading_order + result += ' reading_order="' + @reading_order + '"' + end + if nil != @sort_order + result += ' sort_order="' + @sort_order + '"' + end + result += ')' + + return result end def to_s inspect end + + protected + def reading_to_sort_order(reading_order) + sort_order = reading_order + + parts = reading_order.split(' ') + if parts.length > 1 + sort_order = parts[-1] + ', ' + parts[0..-2].join(' ') + end + + return sort_order + end end + diff --git a/book.rb b/book.rb index 3cf5139..d7a2a26 100644 --- a/book.rb +++ b/book.rb @@ -9,6 +9,7 @@ class Book @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/' def initialize(fileName) + #puts 'InitBook(' + fileName + ')' @author = nil @cover = nil @description = nil @@ -35,6 +36,10 @@ class Book return false end + def author + return @author + end + def cover return @cover end @@ -72,7 +77,7 @@ class Book def inspect data = [] if nil != @author - data.push('author="' + @author.to_s + '"') + data.push('author="' + @author.inspect + '"') end if nil != @series data.push('series="' + @series + '"') @@ -96,30 +101,42 @@ class Book @path end + def series + @series + end + def to_s return inspect() end + def title + @title + end + + def volume + @volume + end + protected def isUpper?(c) return /[[:upper:]]/.match(c) end protected - def massageAuthor(input) + def massage_author(input) if nil == input return nil end - result = "" + reading_order = "" input.each_char do |c| - if isUpper?(c) and (result.length > 0) - result += " " - end - result += c + if isUpper?(c) and (reading_order.length > 0) + reading_order += " " + end + reading_order += c end - - return result + + return reading_order end # Returns (series, volumeNo, titleText) @@ -158,7 +175,10 @@ class Book parts = fileName.split('/') (@series, @volume, @title) = processTitle(parts[-1]) if parts.length > 1 - @author = massageAuthor(parts[-2]) + grouping = parts[-2] + reading_order = massage_author(grouping) + sort_order = nil + @author = Author.new(grouping, reading_order, sort_order) end if fileName.downcase.end_with?(".epub") @@ -173,6 +193,7 @@ class Book Zip::File.open(fileName) do |zipfile| entry = zipfile.find_entry('META-INF/container.xml') if nil == entry + puts 'No META-INF/container.xml, skipping book ' + fileName return end contXml = zipfile.read('META-INF/container.xml') @@ -198,22 +219,25 @@ class Book #------- # Author + grouping = @author.grouping + reading_order = @author.reading_order + sort_order = @author.sort_order + creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL) if (creators.length > 0) creator = creators[0] if nil != creator role = creator['opf:role'] if 'aut' == role - name = creator.content - parts = name.split(' ') - if parts.length > 1 - surname = parts[-1] - givenNames = parts[0..-2].join(' ') - @author = Author.new(surname, givenNames) - else - @author = Author.new(name, '') + reading_order = creator.content + + file_as = creator['opf:file-as'] + if nil != file_as + sort_order = file_as end end + + @author = Author.new(grouping, reading_order, sort_order) end end @@ -253,6 +277,7 @@ class Book @volume = content elsif 'cover' == name coverId = content + #puts 'File ' + @path + ' coverId ' + coverId end end diff --git a/main.rb b/main.rb index 2d7f68b..bc175fb 100644 --- a/main.rb +++ b/main.rb @@ -1,3 +1,4 @@ +require 'store' require 'walkdir' outputDir = 'output' @@ -5,11 +6,34 @@ outputDir = 'output' books = [] imageCount = 0 +def handleArg(arg) + if "--purge" == arg + puts 'Purging database...' + @store.dropSchema() + elsif arg.start_with?("--") + abort('ERROR: Unrecognized option "' + arg + '".') + end +end + +@store = Store.new() +@store.connect() + +for arg in ARGV + handleArg(arg) +end + +@store.init_db() + for arg in ARGV - w = WalkDir.new(arg) - books += (w.books) + if ! arg.start_with?("--") + puts 'Scanning directory "' + arg + '"...' + w = WalkDir.new(@store, arg) + books += (w.books) + end end +puts 'Creating output...' + if ! Dir.exist?(outputDir) Dir.mkdir(outputDir) end @@ -65,3 +89,4 @@ open(outputDir + '/index.html', 'w') do |fd| fd.puts "" end +@store.disconnect() diff --git a/store.rb b/store.rb new file mode 100644 index 0000000..0b7b476 --- /dev/null +++ b/store.rb @@ -0,0 +1,190 @@ + +require 'fileutils' +require 'pg' + +class Store + def initialize + @basepath = '/home/chris/prog/quanlib/efs' # TODO: FIXME: configure this in a sane way + @conn = nil + + #@dburl = 'dbi:Pg:quanlib:localhost' + @dbhost = "localhost" + @dbport = 5432 + @dbname = 'quanlib' + @dbuser = 'quanlib' + @dbpass = 'quanlib' + end + + def connect + # @conn = PGconn.connect('localhost', 5432, '', '', 'quanlib', 'quanlib', 'quanlib') + @conn = PG.connect('localhost', 5432, '', '', 'quanlib', 'quanlib', 'quanlib') + return @conn + end + + def disconnect + @conn.close() + end + + def construct_efs_path(efs_id) + id_str = sprintf('%010d', efs_id) + path = sprintf('%s/%s/%s/%s', id_str[0,2], id_str[2,2], id_str[4,2], id_str[6,2]) + name = id_str + '.dat' + return path, name + end + + def create_schema + create_authors = +< 0 + return rs[0]['id'] + end + end + return nil + end + + def store_author(author) + id = find_author(author) + if nil == id + sqlInsert = "INSERT INTO Authors(grouping, reading, sort) VALUES ($1, $2, $3);" + args = [author.grouping, author.reading_order, author.sort_order] + begin + rs = @conn.exec_params(sqlInsert, args) + rescue Exception => e + puts sqlInsert + ": " + args.inspect() + puts e.message + puts $@ + ensure + rs.clear if rs + end + end + return find_author(author) + end + + def store_book(book) + sql = "INSERT INTO Books (author, cover, description, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7);" + + author_id = store_author(book.author) + (efs_id, mime_type) = store_cover(book) + + args = [author_id, efs_id, book.description(), book.path(), book.series(), book.title(), book.volume()] + + begin + rs = @conn.exec_params(sql, args) + rescue Exception => e + puts sql + ": " + args.inspect() + puts e.message + puts $@ + ensure + rs.clear if rs + end + end + + def store_cover(book) + efs_id = nil + cover = book.cover() + + if nil == cover + return nil + end + + @conn.exec("SELECT nextval('efs_id')") do |rs| + efs_id = rs[0]['nextval'] + end + + if nil == efs_id + return nil + end + + (efspath, efsname) = construct_efs_path(efs_id) + + efspath = @basepath + '/' + efspath + + FileUtils.mkdir_p(efspath) + + (filepath, mimetype) = cover.writeImage(efspath, efsname) + + sql = "INSERT INTO efs VALUES ($1, $2)" + begin + rs = @conn.exec_params(sql, [efs_id, mimetype]) + rescue Exception => e + puts e.message + puts $@ + ensure + rs.clear if rs + end + + return efs_id, mimetype + end +end + diff --git a/store_test.rb b/store_test.rb new file mode 100644 index 0000000..301be76 --- /dev/null +++ b/store_test.rb @@ -0,0 +1,21 @@ + +require 'minitest/autorun' +require 'store' + +class StoreTest < MiniTest::Test + def test_construct_efs_path + data = [ [ 1234, '00/00/00/12', '0000001234.dat'], + [ 1, '00/00/00/00', '0000000001.dat'], + [1234567890, '12/34/56/78', '1234567890.dat'], + [ 7778123, '00/07/77/81', '0007778123.dat'], + [ 0x1b, '00/00/00/00', '0000000027.dat'] ] + + store = Store.new() + + for (input, expectedPath, expectedName) in data + (actualPath, actualName) = store.construct_efs_path(input) + assert_equal(expectedPath, actualPath) + assert_equal(expectedName, actualName) + end + end +end diff --git a/walkdir.rb b/walkdir.rb index 035752f..5d5bf84 100644 --- a/walkdir.rb +++ b/walkdir.rb @@ -19,10 +19,12 @@ # .../DorothyGilman/P06_On_the_China_Station.epub. require 'book' +require 'store' class WalkDir - def initialize(root) + def initialize(store, root) @root = root + @store = store @files = walk(@root) end @@ -31,6 +33,7 @@ class WalkDir for file in @files.sort if Book.canHandle?(file) book = Book.new(file) + @store.store_book(book) result.push(book) end end @@ -51,6 +54,7 @@ class WalkDir result.push(fullName) end end + #puts result return result end end