]> jaekl.net Git - quanlib.git/commitdiff
Switch from rspec to minitest
authorChris Jaekl <chris@jaekl.net>
Thu, 20 Jun 2024 15:41:02 +0000 (11:41 -0400)
committerChris Jaekl <chris@jaekl.net>
Thu, 20 Jun 2024 15:41:02 +0000 (11:41 -0400)
Also adds a Rakefile, and pulls in Rubocop while we're at it.

32 files changed:
Gemfile
Gemfile.lock
Rakefile [new file with mode: 0644]
app/author.rb [new file with mode: 0644]
app/book.rb [new file with mode: 0644]
app/book_loader.rb [new file with mode: 0644]
app/classification.rb [new file with mode: 0644]
app/cover.rb [new file with mode: 0644]
app/extract.rb [new file with mode: 0644]
app/main.rb [new file with mode: 0644]
app/navigator.rb [new file with mode: 0644]
app/page.rb [new file with mode: 0644]
app/series.rb [new file with mode: 0644]
app/store.rb [new file with mode: 0644]
app/tconn.rb [new file with mode: 0644]
app/walk_dir.rb [new file with mode: 0644]
author.rb [deleted file]
book.rb [deleted file]
book_loader.rb [deleted file]
classification.rb [deleted file]
cover.rb [deleted file]
extract.rb [deleted file]
main.rb [deleted file]
navigator.rb [deleted file]
page.rb [deleted file]
series.rb [deleted file]
store.rb [deleted file]
tconn.rb [deleted file]
test/book_test.rb
test/store_test.rb
test/test_helper.rb [new file with mode: 0644]
walk_dir.rb [deleted file]

diff --git a/Gemfile b/Gemfile
index e3b145ed25153117b71a8b5aecb38f6520c970e1..01814797818a10cc8fa8853589c6f2dccfbc11b5 100644 (file)
--- a/Gemfile
+++ b/Gemfile
@@ -1,7 +1,9 @@
 source 'http://rubygems.org'
 
 gem 'inifile'
+gem 'mocha'
 gem 'nokogiri'
 gem 'pg'
 gem 'rspec'
+gem 'rubocop'
 gem 'rubyzip'
index 3cc58d3a9dd19c0a8eb62f2b9b4fa57df470069d..977825132b2ac992bad3b69be8adbf9fd484692c 100644 (file)
@@ -1,36 +1,83 @@
 GEM
   remote: http://rubygems.org/
   specs:
-    diff-lcs (1.3)
+    ast (2.4.2)
+    diff-lcs (1.5.1)
     inifile (3.0.0)
-    mini_portile2 (2.4.0)
-    nokogiri (1.10.3)
-      mini_portile2 (~> 2.4.0)
-    pg (1.1.4)
-    rspec (3.8.0)
-      rspec-core (~> 3.8.0)
-      rspec-expectations (~> 3.8.0)
-      rspec-mocks (~> 3.8.0)
-    rspec-core (3.8.2)
-      rspec-support (~> 3.8.0)
-    rspec-expectations (3.8.4)
+    json (2.7.2)
+    language_server-protocol (3.17.0.3)
+    mocha (2.4.0)
+      ruby2_keywords (>= 0.0.5)
+    nokogiri (1.16.6-aarch64-linux)
+      racc (~> 1.4)
+    nokogiri (1.16.6-arm-linux)
+      racc (~> 1.4)
+    nokogiri (1.16.6-arm64-darwin)
+      racc (~> 1.4)
+    nokogiri (1.16.6-x86-linux)
+      racc (~> 1.4)
+    nokogiri (1.16.6-x86_64-darwin)
+      racc (~> 1.4)
+    nokogiri (1.16.6-x86_64-linux)
+      racc (~> 1.4)
+    parallel (1.25.1)
+    parser (3.3.3.0)
+      ast (~> 2.4.1)
+      racc
+    pg (1.5.6)
+    racc (1.8.0)
+    rainbow (3.1.1)
+    regexp_parser (2.9.2)
+    rexml (3.3.0)
+      strscan
+    rspec (3.13.0)
+      rspec-core (~> 3.13.0)
+      rspec-expectations (~> 3.13.0)
+      rspec-mocks (~> 3.13.0)
+    rspec-core (3.13.0)
+      rspec-support (~> 3.13.0)
+    rspec-expectations (3.13.1)
       diff-lcs (>= 1.2.0, < 2.0)
-      rspec-support (~> 3.8.0)
-    rspec-mocks (3.8.1)
+      rspec-support (~> 3.13.0)
+    rspec-mocks (3.13.1)
       diff-lcs (>= 1.2.0, < 2.0)
-      rspec-support (~> 3.8.0)
-    rspec-support (3.8.2)
-    rubyzip (1.2.3)
+      rspec-support (~> 3.13.0)
+    rspec-support (3.13.1)
+    rubocop (1.64.1)
+      json (~> 2.3)
+      language_server-protocol (>= 3.17.0)
+      parallel (~> 1.10)
+      parser (>= 3.3.0.2)
+      rainbow (>= 2.2.2, < 4.0)
+      regexp_parser (>= 1.8, < 3.0)
+      rexml (>= 3.2.5, < 4.0)
+      rubocop-ast (>= 1.31.1, < 2.0)
+      ruby-progressbar (~> 1.7)
+      unicode-display_width (>= 2.4.0, < 3.0)
+    rubocop-ast (1.31.3)
+      parser (>= 3.3.1.0)
+    ruby-progressbar (1.13.0)
+    ruby2_keywords (0.0.5)
+    rubyzip (2.3.2)
+    strscan (3.1.0)
+    unicode-display_width (2.5.0)
 
 PLATFORMS
-  ruby
+  aarch64-linux
+  arm-linux
+  arm64-darwin
+  x86-linux
+  x86_64-darwin
+  x86_64-linux
 
 DEPENDENCIES
   inifile
+  mocha
   nokogiri
   pg
   rspec
+  rubocop
   rubyzip
 
 BUNDLED WITH
-   2.0.2
+   2.5.13
diff --git a/Rakefile b/Rakefile
new file mode 100644 (file)
index 0000000..9cf3b56
--- /dev/null
+++ b/Rakefile
@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+
+require "minitest/test_task"
+
+Minitest::TestTask.create
+
+require "rubocop/rake_task"
+
+RuboCop::RakeTask.new
+
+task default: %i[test rubocop]
diff --git a/app/author.rb b/app/author.rb
new file mode 100644 (file)
index 0000000..fb2003b
--- /dev/null
@@ -0,0 +1,57 @@
+
+class Author
+  def initialize(grouping, reading_order, sort_order)
+    @grouping = grouping
+    @reading_order = reading_order
+    @sort_order = sort_order
+
+    if (nil == sort_order) || ('Unknown' == sort_order)
+      @sort_order = reading_to_sort_order(reading_order)
+    end
+  end
+
+  def grouping
+    @grouping
+  end
+
+  def reading_order
+    @reading_order
+  end
+
+  def sort_order
+    @sort_order
+  end
+
+  def inspect
+    result = '(Author:'
+    if nil != @grouping
+      result += ' grouping="' + @grouping + '"'
+    end
+    if nil != @reading_order
+      result += ' reading_order="' + @reading_order + '"'
+    end
+    if nil != @sort_order
+      result += ' sort_order="' + @sort_order + '"'
+    end
+    result += ')'
+
+    return result
+  end
+
+  def to_s
+    inspect
+  end
+
+  protected
+  def reading_to_sort_order(reading_order)
+    sort_order = reading_order
+
+    parts = reading_order.split(' ')
+    if parts.length > 1
+      sort_order = parts[-1] + ', ' + parts[0..-2].join(' ')
+    end
+
+    return sort_order
+  end
+end
+
diff --git a/app/book.rb b/app/book.rb
new file mode 100644 (file)
index 0000000..2b93f4b
--- /dev/null
@@ -0,0 +1,421 @@
+
+require 'nokogiri'
+require 'rubygems'
+require 'zip'
+
+require_relative 'author'
+require_relative 'classification'
+require_relative 'cover'
+require_relative 'store'
+
+class Book
+  @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
+  @@SERIES_AND_VOLUME_REGEX = /^([A-Z]+)([0-9]+(\.[0-9]+)?)$/
+
+  attr_accessor :arrived
+  attr_accessor :author
+  attr_accessor :classification_id
+  attr_accessor :cover
+  attr_accessor :description
+  attr_accessor :language
+  attr_accessor :path
+  attr_accessor :series_id
+  attr_accessor :title
+  attr_accessor :volume
+
+  def initialize(store)
+    @store = store
+  end
+
+  def load_from_file!(fileName)
+    @path = fileName
+    parse_file_name!(fileName)
+  end
+
+  def self.can_handle?(fileName)
+    if nil == fileName
+      return false
+    end
+
+    #puts "Filename:  " + fileName.to_s
+    lowerName = fileName.downcase()
+
+    if lowerName.end_with?(".epub")
+      return true
+    end
+
+    if lowerName.end_with?(".pdf")
+      return true
+    end
+
+    return false
+  end
+
+  def self.grouping_for_title(title)
+    result = title
+
+    '\'",!#'.split('').each do |c|
+      result = result.gsub(c, '-')
+    end
+    result = result.gsub(/: */, '--')
+    result = result.gsub(' ', '_')
+
+    result
+  end
+
+  def heading
+    result = []
+
+    if nil != @title
+      result.push('<b>' + @title + '</b>')
+    else
+      result.push('<i>(Unknown title)</i>')
+    end
+    if nil != @author
+      result.push('<i>by ' + @author.reading_order + '</i>')
+    end
+
+    seriesInfo = []
+    series = @store.load_series(@series_id)
+    if nil != series and nil != series.descr
+      seriesInfo.push(series.descr.to_s)
+    end
+    if nil != @volume
+      seriesInfo.push(@volume.to_s)
+    end
+    if seriesInfo.length > 0
+      result.push(seriesInfo.join(' '))
+    end
+
+    classification = nil
+    if nil != @classification_id
+      classification = @store.load_classification(@classification_id)
+    end
+    if nil != classification
+      if nil != classification.ddc
+        result.push('Dewey: ' + classification.ddc.to_s)
+      end
+      if nil != classification.lcc
+        result.push('LCC: ' + classification.lcc.to_s)
+      end
+    end
+
+    return result.join('<br/>')
+  end
+
+  def inspect
+    data = []
+    if nil != @author
+      data.push('author="' + @author.inspect + '"')
+    end
+    if nil != @series_id
+      data.push('series_id="' + @series_id.to_s() + '"')
+    end
+    if nil != @volume
+      data.push('volume="' + @volume + '"')
+    end
+    if nil != @title
+      data.push('title="' + @title + '"')
+    end
+    if nil != @cover
+      data.push(@cover.inspect())
+    end
+    if nil != @path
+      data.push('path="' + @path + '"')
+    end
+    return '(Book:' + data.join(',') + ')'
+  end
+
+  def to_s
+    return inspect()
+  end
+
+  def title_grouping
+    if nil == @path
+      return nil
+    end
+
+    return File.basename(@path, '.*')
+  end
+
+  protected
+  def isUpper?(c)
+    return /[[:upper:]]/.match(c)
+  end
+
+  protected
+  def massage_author(input)
+    if nil == input
+      return nil
+    end
+
+    reading_order = ""
+    input.each_char do |c|
+      if isUpper?(c) and (reading_order.length > 0)
+        reading_order += " "
+     end
+      reading_order += c
+    end
+
+    return reading_order
+  end
+
+  # Returns (series, volumeNo, titleText)
+  protected
+  def processTitle(input)
+    if nil == input
+      return nil
+    end
+
+    arr = input.split('_')
+
+    series = nil
+    vol = nil
+
+    first = arr[0]
+    matchData = (arr[0]).match(@@SERIES_AND_VOLUME_REGEX)
+    if nil != matchData
+      capt = matchData.captures
+      series = capt[0]
+      vol = capt[1]
+      arr.shift
+    end
+
+    pos = arr[-1].rindex('.')
+    if nil != pos
+      arr[-1] = arr[-1].slice(0, pos)
+    end
+
+    title = arr.join(' ')
+
+    bare_title_grouping = title_grouping
+      .split('_')
+      .reject { |part| part.match(@@SERIES_AND_VOLUME_REGEX) }
+      .join('_')
+
+    unless bare_title_grouping == Book.grouping_for_title(title)
+      puts "WARNING:  title_grouping mismatch:  #{bare_title_grouping.inspect} vs. #{Book.grouping_for_title(title).inspect}"
+    end
+
+    return series, vol, title
+  end
+
+  protected
+  def parse_file_name!(file_name)
+    category = nil   # e.g., non-fiction, fan-fiction
+    grouping = ''
+
+    parts = file_name.split('/')
+    (series_code, @volume, @title) = processTitle(parts[-1])
+    if parts.length > 1
+      grouping = parts[-2]
+      reading_order = massage_author(grouping)
+      sort_order = nil
+      @author = Author.new(grouping, reading_order, sort_order)
+      @series_id = @store.get_series(grouping, series_code)
+    end
+    if parts.length > 2
+      category = parts[-3]
+    end
+
+    lc_file_name = file_name.downcase
+    if lc_file_name.end_with?(".epub")
+      scanEpub!(file_name)
+    elsif lc_file_name.end_with?(".pdf")
+      scan_pdf!(file_name)
+    end
+
+    @arrived = File.ctime(file_name)
+
+    @classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*'))
+
+    # TODO:  Fix horrible hard-coded strings and paths
+    if ('01_nonfic' == category) && (nil == classification_id)
+      open(Store.unclassified_csv, 'a') do |fd|
+        fd.puts('"' + grouping.to_s + '","' + path + '"')
+      end
+    end
+  end
+
+  protected
+  def scanEpub!(fileName)
+    #puts 'Scanning "' + fileName.to_s + '"...'
+    begin
+      Zip.warn_invalid_date = false
+      Zip::File.open(fileName) do |zipfile|
+        entry = zipfile.find_entry('META-INF/container.xml')
+        if nil == entry
+          puts 'No META-INF/container.xml, skipping book ' + fileName
+          return
+        end
+        contXml = zipfile.read('META-INF/container.xml')
+        contDoc = Nokogiri::XML(contXml)
+        opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
+
+        scanOpf!(zipfile, opfPath)
+      end
+    rescue Zip::Error => exc
+      puts 'ERROR processing file "' + fileName + '":'
+      puts exc.message
+      puts exc.backtrace
+    end
+  end
+
+  protected
+  def scan_pdf!(file_name)
+    #puts 'Scanning "' + file_name.to_s + '"...'
+
+    pdf_path = File.expand_path(file_name).to_s
+    if ! pdf_path.end_with?('.pdf')
+      puts 'Unexpected internal error:  path "' + file_name.to_s + '" does not end with ".pdf".'
+      return
+    end
+
+    jpeg_path = pdf_path[0..-5] + '.jpeg'
+    if File.file?(jpeg_path)
+      File.open(jpeg_path, 'r') do |is|
+        @cover = Cover.new(is, jpeg_path, 'image/jpeg')
+      end
+    end
+  end
+
+
+  protected
+  def scanOpf!(zipfile, opfPath)
+    coverId = nil
+
+    opfXml = zipfile.read(opfPath)
+    opfDoc = Nokogiri::XML(opfXml)
+
+    #-------
+    # Author
+
+    grouping = @author.grouping
+    reading_order = @author.reading_order
+    sort_order = @author.sort_order
+
+    creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
+    if (creators.length > 0)
+      creator = creators[0]
+      if nil != creator
+        role = creator['opf:role']
+        if 'aut' == role
+          reading_order = creator.content
+
+          file_as = creator['opf:file-as']
+          if nil != file_as
+            sort_order = file_as
+          end
+        end
+
+        @author = Author.new(grouping, reading_order, sort_order)
+      end
+    end
+
+    #---------------------------------------
+    # Title
+
+    titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
+    if titles.length > 0
+      title = titles[0]
+      if nil != title
+        @title = title.content
+      end
+    end
+
+    #---------------------------------------
+    # Description
+
+    descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
+    if (descrNodes.length > 0)
+      descrNode = descrNodes[0]
+      if nil != descrNode
+        @description = descrNode.content
+      end
+    end
+
+    #---------------------------------------
+    # Language
+
+    langNodes = opfDoc.css('dc|language', 'dc' => @@DC_NS_URL)
+    if (langNodes.length > 0)
+      langNode = langNodes[0]
+      if langNode
+        @language = langNode.content
+      end
+    end
+
+    #---------------------------------------
+    # Other metadata:  series, volume, cover
+
+    metas = opfDoc.css('package metadata meta')
+    for m in metas
+      name = m['name']
+      content = m['content']
+
+      if 'calibre:series' == name
+        # TODO:  Dynamically create a new series?
+        # @series_id = content
+      elsif 'calibre:series-index' == name
+        @volume = content
+      elsif 'cover' == name
+        coverId = content
+        #puts 'File ' + @path + ' coverId ' + coverId
+      end
+    end
+
+    #---------------
+    # Load the cover
+
+    @cover = load_cover(zipfile, opfPath, opfDoc, coverId)
+  end
+
+  protected
+  def load_cover(zipfile, opfPath, opfDoc, coverId)
+    coverFile = nil
+    if nil == coverId
+      coverId = "cover-image"
+    end
+
+    items = opfDoc.css('package manifest item')
+    for i in items
+      href = i['href']
+      id = i['id']
+      mimeType = i['media-type']
+
+      if coverId == id
+        entry = zipfile.find_entry(href)
+
+        if nil == entry
+          # Although the epub standard requires the path to be relative
+          # to the base of the epub (zip), some books encountered in the
+          # wild have been found to use a bath relative to the location
+          # of the opf file.
+          parts = opfPath.split('/')
+          opfBasePath = opfPath.split('/')[0..-2].join('/')
+          coverPath = opfBasePath + '/' + href
+          entry = zipfile.find_entry(coverPath)
+        end
+
+        unless entry
+          # Another case found in the wild:  cover image is at the root, but path is '../cover.jpeg'
+          if href.start_with? '../'
+            coverPath = href[3..-1]
+            entry = zipfile.find_entry(coverPath)
+          end
+        end
+
+        if nil == entry
+          puts 'WARNING!  Cover image "' + href + '" not found in file "' + @path + '".'
+          return nil
+        else
+          entry.get_input_stream() do |is|
+            return Cover.new(is, href, mimeType)
+          end
+        end
+      end
+    end
+    return nil
+  end
+end
+
diff --git a/app/book_loader.rb b/app/book_loader.rb
new file mode 100644 (file)
index 0000000..5516f04
--- /dev/null
@@ -0,0 +1,28 @@
+
+require_relative 'book'
+require_relative 'store'
+
+class BookLoader 
+  DONE_MARKER = '<END>'
+
+  def initialize(config_file, queue)
+    @config_file = config_file
+    @queue = queue
+  end
+
+  def run
+    @store = Store.new(@config_file)
+    @store.connect()
+
+    file = @queue.pop
+    until file == DONE_MARKER do
+      book = Book.new(@store)
+      book.load_from_file!(file)
+      @store.store_book(book)
+
+      file = @queue.pop
+    end
+
+    @store.disconnect()
+  end
+end
diff --git a/app/classification.rb b/app/classification.rb
new file mode 100644 (file)
index 0000000..2061e46
--- /dev/null
@@ -0,0 +1,75 @@
+
+class Classification
+  def initialize(ddc, lcc, author_grouping, author, title_grouping, title)
+    @id = nil
+    @ddc = ddc
+    @lcc = lcc
+    @author_grouping = author_grouping
+    @author = author
+    @title_grouping = title_grouping
+    @title = title
+  end
+
+  def id
+    @id
+  end
+  def id=(value)
+    @id = value
+  end
+
+  def ddc
+    @ddc
+  end
+  def lcc
+    @lcc
+  end
+  def author_grouping
+    @author_grouping
+  end
+  def author
+    @author
+  end
+  def 
+
+  def inspect
+    data = []
+    if nil != @ddc
+      data.push('Dewey=' + @ddc.to_s)
+    end
+    if nil != @lcc
+      data.push('LCC=' + @lcc.to_s)
+    end
+    if nil != @author_grouping
+      data.push('author_grouping=' + @author_grouping.to_s)
+    end
+    if nil != @author
+      data.push('author=' + @author.to_s)
+    end
+    if nil != @title_grouping
+      data.push('title_grouping=' + @title_grouping.to_s)
+    end
+    if nil != @title
+      data.push('title=' + @title)
+    end
+
+    return '(Classification:' + data.join(',') + ')'
+  end
+
+  def to_s
+    inspect
+  end
+
+  protected
+  def reading_to_sort_order(reading_order)
+    sort_order = reading_order
+
+    parts = reading_order.split(' ')
+    if parts.length > 1
+      sort_order = parts[-1] + ', ' + parts[0..-2].join(' ')
+    end
+
+    return sort_order
+  end
+end
+
diff --git a/app/cover.rb b/app/cover.rb
new file mode 100644 (file)
index 0000000..e74c27b
--- /dev/null
@@ -0,0 +1,57 @@
+
+class Cover
+  attr_reader :path
+
+  def initialize(inputStream, path, mimeType)
+    if nil != inputStream
+      @data = inputStream.read
+    else
+      @data = nil
+    end
+    @path = path
+    @mimeType = mimeType
+  end
+
+  def inspect
+    info = []
+    if nil != @data
+      info.push('size=' + @data.length.to_s)
+    else
+      info.push('empty')
+    end
+    if nil != @path
+      info.push('path="' + @path + '"')
+    end
+    if nil != @mimeType
+      info.push('mimeType="' + @mimeType + '"')
+    end
+    return '(Cover:' + info.join(',') + ')'
+  end
+
+  def read_image(filename)
+    open(filename, 'rb') do |fd|
+      @data = fd.read()
+    end
+  end
+
+  def to_s
+    return inspect
+  end
+
+  def write_image(outputDir, filename)
+    open(outputDir + '/' + filename, 'wb') do |fd|
+      fd.write(@data)
+    end
+    return filename, @mimeType
+  end
+
+  protected
+  def getExt
+    pos = @path.rindex('.')
+    if nil == pos
+      return '.img'
+    end
+    return @path.slice(pos, @path.length)
+  end
+end
+
diff --git a/app/extract.rb b/app/extract.rb
new file mode 100644 (file)
index 0000000..c695941
--- /dev/null
@@ -0,0 +1,50 @@
+require 'find'
+require 'pathname'
+
+def exec(cmdline)
+  puts "$ #{cmdline}"
+  result = system(cmdline)
+  unless result
+    puts "FAILED:  #{cmdline}"
+  end
+  result
+end
+
+def extract_epub(source_file, source_path, dest_path)
+  relative_path = source_file[source_path.length .. source_file.length]
+  dest_file = "#{dest_path}/#{relative_path}"
+  dest_file = dest_file[0 .. (dest_file.length - 6)] + ".txt"
+
+  required_path = Pathname(dest_file).dirname
+  unless File.directory? required_path
+    unless exec("mkdir -p #{required_path}")
+      return false
+    end
+  end
+
+  if File.exist? dest_file
+    source_time = File.mtime source_file
+    dest_time = File.mtime dest_file
+    comp = dest_time <=> source_time
+    if comp > 0
+      return true # Nothing to do, extraction is already up-to-date
+    end
+  end
+    
+  exec("ebook-convert #{source_file} #{dest_file}")
+end
+
+def scan_dir(source_path, dest_path) 
+  Find.find(source_path) do |f|
+    if f.match(/.epub\Z/)
+      unless (f.match(/_bis.epub\Z/) || f.match(/_ter.epub\Z/) || f.match(/_quater.epub\Z/))
+        extract_epub(f, source_path, dest_path)
+      end
+    end
+  end
+end
+
+dest_path = ARGV[0]
+for arg in ARGV[1 .. ARGV.length]
+  scan_dir(arg, dest_path)
+end
diff --git a/app/main.rb b/app/main.rb
new file mode 100644 (file)
index 0000000..e294b4a
--- /dev/null
@@ -0,0 +1,56 @@
+require_relative 'navigator'
+require_relative 'page'
+require_relative 'store'
+require_relative 'walk_dir'
+
+@outputDir = 'output'
+
+@config_file = 'quanlib.ini'
+@skip_class = false
+
+def handleArg(arg)
+  if arg.start_with?("--config=")
+    @config_file = arg[9..-1]
+    puts 'Using config file "' + @config_file + '".'
+  elsif "--purge" == arg
+    puts 'Purging database...'
+    @store.dropSchema()
+    if File.exists?(@store.unclassified_csv)
+      File.delete(@store.unclassified_csv)
+    end
+  elsif "--skip-class" == arg
+    puts 'Skipping load of classification table.'
+    @skip_class = true
+  elsif arg.start_with?("--")
+    abort('ERROR:  Unrecognized option "' + arg + '".')
+  end
+end
+
+@store = Store.new(@config_file)
+@store.connect()
+
+for arg in ARGV
+  handleArg(arg)
+end
+
+@store.init_db(@skip_class)
+
+for arg in ARGV
+  if ! arg.start_with?("--")
+    puts 'Scanning directory "' + arg + '"...'
+    w = WalkDir.new(@config_file, arg)
+    w.books
+  end
+end
+
+@store.cross_reference_lists
+
+puts 'Creating output...'
+
+navigator = Navigator.new(@store)
+navigator.write_atoz_pages()
+navigator.write_series_listing()
+navigator.write_dewey()
+
+@store.disconnect()
+
diff --git a/app/navigator.rb b/app/navigator.rb
new file mode 100644 (file)
index 0000000..881b1fa
--- /dev/null
@@ -0,0 +1,157 @@
+require_relative 'page'
+require_relative 'store'
+
+class Navigator
+  def initialize(store)
+    @store = store
+  end
+
+  def write_atoz_pages
+    atoz_counts = {}
+
+    ('A'..'Z').each do |letter| 
+      atoz_counts[letter] = write_authors_starting_with(letter)
+    end
+
+    content = '<p><table><tr><th>Author</th><th>Books</th></tr>'
+    ('A'..'Z').each do |letter|
+      content += '  <tr><td><a href="../atoz/output_' + letter + '.html">Starting with ' + letter + '</a></td><td>' + atoz_counts[letter].to_s + '</td></tr>'
+    end
+    content += '</table></p>'
+    page = Page.new(@store)
+    page.output_dir = 'atoz'
+    page.special = content
+    page.up = ['../output/index.html', 'Up']
+
+    page.write_html( [] )
+  end
+
+  def write_authors_starting_with(letter)
+    book_ids = @store.query_books_by_author(letter + '%')
+    puts 'Authors starting with "' + letter + '":  ' + book_ids.length.to_s() + ' books.'
+
+    page = Page.new(@store)
+    if 'A' != letter
+      page.back = ['../atoz/output_' + (letter.ord - 1).chr + '.html', 'Prev']
+    end
+    if 'Z' != letter
+      page.forward = ['../atoz/output_' + (letter.ord + 1).chr + '.html', 'Next']
+    end
+    page.output_dir = 'atoz'
+    page.index_file = 'output_' + letter + '.html'
+    page.title = "Authors starting with '" + letter + "'"
+    page.up = ['../atoz/index.html', 'Up']
+
+    page.write_html(book_ids)
+    return book_ids.length
+  end
+
+  def write_dewey
+    book_ids = @store.query_books_by_ddc()
+    puts 'Non-fiction books arranged by Dewey Decimal Classification:  ' + book_ids.length.to_s() + ' books.'
+
+    page = Page.new(@store)
+    page.output_dir = 'ddc'
+    page.index_file = 'index.html'
+    page.title = "Non-fiction books arranged by Dewey Decimal call number"
+    page.up = ['../output/index.html', 'Up']
+    
+    page.write_html(book_ids)
+    return book_ids.length
+  end
+
+  def write_series_for_age(age)
+    series_infos = []
+
+    series_ids = @store.query_series_by_age(age)
+
+    series_ids.each do |id|
+      series = @store.load_series(id)
+      book_ids = @store.query_books_by_series_id(id)
+      if nil != book_ids and book_ids.length > 0
+        series_infos.push( [series, book_ids] )
+      end
+    end
+
+    for idx in 0 .. (series_infos.length - 1) do 
+      #puts series.descr + ': ' + book_ids.length.to_s + ' books.'
+
+      back = nil
+      fwd = nil
+
+      if idx > 0
+        back = series_infos[idx-1]
+      end
+      if (idx + 1) < series_infos.length
+        fwd = series_infos[idx+1]
+      end
+
+      cur = series_infos[idx]
+      series = cur[0]
+      book_ids = cur[1]
+
+      page = Page.new(@store)
+      if nil != back
+        page.back = [back[0].key + '.html', 'Back']
+      end
+      if nil != fwd
+        page.forward = [fwd[0].key + '.html', 'Forward']
+      end
+      page.output_dir = 'series/series_' + age
+      page.index_file = series.key + '.html'
+      page.title = 'Series &ldquo;' + series.descr + '&rdquo; (' + book_ids.length.to_s + ' books)'
+      page.up = ['index.html', 'Up']
+  
+      page.write_html(book_ids)
+    end
+
+    content =  '<h1>&ldquo;' + age + '&rdquo; Series</h1>'
+    content += '<p><table><tr><th>Author</th><th>Series</th><th>Genre</th><th>Books</th></tr>'
+    series_infos.each do |cur|
+      series = cur[0]
+      book_ids = cur[1]
+
+      author = series.grouping
+      letter = author[0]
+
+      content += '  <tr>'
+      content += '<td><a href="../../atoz/output_' + letter + '.html">' + author + '</a></td>'
+      content += '<td><a href="' + series.key + '.html">' + series.descr + '</a></td>'
+      content += '<td>' + series.genre + '</td>'
+      content += '<td>' + book_ids.length.to_s + '</td>'
+      content += '</tr>'
+    end
+    content += '</table></p>'
+    page = Page.new(@store)
+    page.output_dir = 'series/series_' + age
+    page.special = content
+    page.up = ['../index.html', 'Up']
+    page.write_html( [] )
+
+    return series_infos.length
+  end
+
+  def write_series_listing
+    ages = ['beginner', 'junior', 'ya', 'adult']
+    series_counts = {}
+
+    ages.each do |age|
+      puts 'Series for "' + age + '" readers...'
+
+      series_counts[age] = write_series_for_age(age)
+    end
+
+    content = '<h1>Browse Books By Series</h1>'
+    content += '<p>'
+    content += '<table><tr><th>Age</th><th>Number of Series</th></tr>'
+    ages.each do |age|
+      content += '<tr><td><a href="series_' + age + '/index.html">' + age + '</a></td><td>' + series_counts[age].to_s + '</td></tr>'
+    end
+    content += '</table></p>'
+    page = Page.new(@store)
+    page.output_dir = 'series'
+    page.special = content
+    page.up = ['../output/index.html', 'Up']
+    page.write_html( [] )
+  end
+end
diff --git a/app/page.rb b/app/page.rb
new file mode 100644 (file)
index 0000000..638f9ad
--- /dev/null
@@ -0,0 +1,166 @@
+require 'fileutils'
+
+require_relative 'store'
+
+class Page
+  def initialize(store)
+    @back = nil
+    @forward = nil
+    @index_file = 'index.html'
+    @output_dir = 'output'
+    @special = nil
+    @store = store
+    @title = 'Books'
+    @up = nil
+  end
+
+  def back=(value)
+    @back = value
+  end
+
+  def forward=(value)
+    @forward = value
+  end
+
+  def index_file=(value)
+    @index_file = value
+  end
+
+  def navig_link(data)
+    if (nil == data)
+      return ''
+    end
+    return '<a href="' + data[0] + '">' + data[1] + '</a>'
+  end
+
+  def output_dir=(value)
+    @output_dir = value
+  end
+
+  def special=(value)
+    @special = value
+  end
+
+  def title=(value)
+    @title = value
+  end
+
+  def up=(value)
+    @up = value
+  end
+
+  def write_books(fd, book_ids)
+    for id in book_ids
+      book = @store.load_book(id)
+      image = nil
+      if nil != book.cover
+        #@imageCount += 1
+        #(path, mimeType) = book.cover.write_image(@output_dir, 'image' + @imageCount.to_s)
+        #image = '<img class="cover-thumb" src="' + path + '"/>'
+        path = book.cover.path
+        image = '<img class="cover-thumb" src="' + path + '"/>'
+      else
+        image = '(No cover image)'
+      end
+
+      fd.puts '    <div><table>'
+      fd.puts '      <tr><td><a href="' + book.path + '">' + image + '</a></td>'
+
+      heading = book.heading()
+      description = book.description()
+      if nil != description
+        fd.puts '          <td><span class="popup">' + heading + '<span class="pop-inner"><p>' + heading + '</p><p>' + description + '</p></span></span></td></tr>'
+      else
+        fd.puts '          <td>' + heading + '</td></tr>'
+      end
+    
+      fd.puts '    </table></div>'
+    end
+  end
+
+  def write_footer(fd)
+    fd.puts '    <p class="navigator">' + navig_link(@back) + ' ' + navig_link(@up) + ' ' + navig_link(@forward) + '</p>'
+  end
+
+  def write_header(fd)
+    fd.puts '    <h1 class="header">' + @title + '</h1>'
+
+    fd.puts '    <p class="navigator">' + navig_link(@back) + ' ' + navig_link(@up) + ' ' + navig_link(@forward) + '</p>'
+  end
+
+  def write_html(book_ids)
+    @imageCount = 0
+
+    if ! Dir.exist?(@output_dir)
+      FileUtils.mkdir_p(@output_dir)
+    end
+
+    open(@output_dir + '/' + @index_file, 'w') do |fd|
+      fd.puts '<html>'
+      fd.puts '  <head>'
+      fd.puts '    <meta charset="utf-8"/>'
+      fd.puts '    <title>' + @title + '</title>'
+
+      write_style_sheet(fd)
+
+      fd.puts '  </head>'
+      fd.puts '  <body>'
+      
+      write_header(fd)
+
+      write_special(fd)
+      write_books(fd, book_ids)
+  
+      write_footer(fd)
+
+      fd.puts "  </body>"
+      fd.puts "</html>"
+    end
+  end
+
+  def write_special(fd)
+    if (nil != @special)
+      fd.puts(@special)
+    end
+  end
+
+  def write_style_sheet(fd)
+      style = 
+<<EOS
+    <style>
+      div { 
+        display: inline-block;
+        width: 400px;
+        margin: 10px;
+        border 3px solid #73ad21;
+      }
+      h1.header { 
+        background: #4040a0;
+        color: #ffffff;
+        text-align: center;
+      }
+      img.cover-thumb { max-height: 200px; max-width: 200px; }
+      p.navigator { }
+      span.popup { }
+      span.popup:hover { text-decoration: none; background: #cfffff; z-index: 6; }
+      span.popup span.pop-inner { 
+        border-color:black; 
+        border-style:solid; 
+        border-width:1px;
+        display: none; 
+        margin: 4px 0 0 0px; 
+        padding: 3px 3px 3px 3px;
+        position: absolute; 
+      }
+      span.popup:hover span.pop-inner { 
+        background: #ffffaf; 
+        display: block; 
+        margin: 20px 0 0 0px; 
+        z-index:6;
+      }
+    </style>
+EOS
+      fd.puts style
+  end
+end
+
diff --git a/app/series.rb b/app/series.rb
new file mode 100644 (file)
index 0000000..0621876
--- /dev/null
@@ -0,0 +1,87 @@
+
+class Series
+  def initialize(id)
+    @age = nil
+    @genre = nil
+    @grouping = nil 
+    @code = nil
+    @descr = nil
+    @id = id
+  end
+
+  def age
+    @age
+  end
+
+  def age=(value)
+    @age = value
+  end
+
+  def code
+    @code
+  end
+
+  def code=(value)
+    @code = value
+  end
+
+  def descr
+    @descr
+  end
+
+  def descr=(value)
+    @descr = value
+  end
+
+  def genre
+    @genre
+  end
+
+  def genre=(value)
+    @genre = value
+  end
+
+  def grouping
+    @grouping
+  end
+
+  def grouping=(value)
+    @grouping = value
+  end
+
+  def id
+    @id
+  end
+
+  def inspect
+    data = []
+    if nil != @age
+      data.push('age="' + @age.inspect + '"')
+    end
+    if nil != @code
+      data.push('code="' + @code.inspect + '"')
+    end
+    if nil != @descr
+      data.push('descr="' + @descr + '"')
+    end
+    if nil != @genre
+      data.push('genre="' + @genre + '"')
+    end
+    if nil != @grouping
+      data.push('grouping="' + @grouping + '"')
+    end
+    return '(Series:' + data.join(',') + ')'
+  end
+
+  def key
+    if nil != grouping and nil != code
+      return grouping.to_s + '_' + code.to_s
+    end
+    return id.to_s
+  end
+
+  def to_s
+    return inspect()
+  end
+end
+
diff --git a/app/store.rb b/app/store.rb
new file mode 100644 (file)
index 0000000..1a33ca3
--- /dev/null
@@ -0,0 +1,655 @@
+
+require 'csv'
+require 'fileutils'
+require 'inifile'
+require 'pg'
+
+require_relative 'series'
+require_relative 'tconn'
+
+class Store
+  def unclassified_csv
+    @basePath + '/csv/unclassified.csv'
+  end
+
+  def initialize(config_file)
+    @conn = nil
+
+    config = IniFile.load(config_file)
+    if nil == config
+      puts 'FATAL:  Failed to load config file "' + config_file + '".  Aborting initialization.'
+      return
+    end
+
+    section = config['database']
+    @dbhost = section['host']
+    @dbport = 5432
+    @dbname = section['name']
+    @dbuser = section['user']
+    @dbpass = section['pass']
+
+    section = config['filesystem']
+    @basePath = section['basePath']
+  end
+
+  def connect
+    @conn = TimedConn.new(PG.connect(@dbhost, @dbport, '', '', @dbname, @dbuser, @dbpass))
+    return @conn
+  end
+
+  def disconnect
+    @conn.close()
+  end
+
+  def construct_efs_path(efs_id)
+    id_str = sprintf('%010d', efs_id)
+    path = sprintf('%s/%s/%s/%s', id_str[0,2], id_str[2,2], id_str[4,2], id_str[6,2])
+    name = id_str + '.dat'
+    return path, name
+  end
+
+  def cross_reference_lists
+puts "@@@@@@@@@@@ CROSS-REF START @@@@@@@@@@@"
+    exec_update("TRUNCATE TABLE Lists CASCADE;", [])
+
+    populate_lists_table
+puts "@@@@@@@@@@@ CROSS-REF DONE @@@@@@@@@@@"
+  end
+
+  def create_schema(skip_class)
+    create_authors =
+<<EOS
+      CREATE TABLE Authors (
+        id          INTEGER PRIMARY KEY,
+        grouping    VARCHAR(64),
+        reading     VARCHAR(256),
+        sort        VARCHAR(256)
+      );
+EOS
+
+    create_books =
+<<EOS
+      CREATE TABLE Books (
+        id             INTEGER PRIMARY KEY,
+        arrived        TIMESTAMP,
+        author         INTEGER REFERENCES Authors(id),
+        classification INTEGER REFERENCES Classifications(id),
+        cover          INTEGER,
+        language       VARCHAR(64),
+        description    TEXT,
+        path           VARCHAR(256),
+        series         INTEGER REFERENCES Series(id),
+        title          VARCHAR(256),
+        volume         VARCHAR(16)
+      );
+EOS
+
+    create_classification =
+<<EOS
+      CREATE TABLE Classifications (
+        id              INTEGER PRIMARY KEY,
+        ddc             VARCHAR(32),
+        lcc             VARCHAR(32),
+        author_grouping VARCHAR(64),
+        author_sort     VARCHAR(128),
+        title_grouping  VARCHAR(256),
+        title           VARCHAR(256)
+      );
+EOS
+
+    create_efs =
+<<EOS
+      CREATE TABLE EFS (
+        id          INTEGER PRIMARY KEY,
+        mimetype    VARCHAR(64)
+      );
+EOS
+
+    create_fast =
+<<EOS
+      CREATE TABLE FAST (
+        id          VARCHAR(32) PRIMARY KEY,
+        descr       VARCHAR(128)
+      );
+EOS
+
+    # Associative entity, linking FAST and Classifications tables
+    # in a 0..n to 0..m relationship
+    create_fast_classifications =
+<<EOS
+      CREATE TABLE FAST_Classifications (
+        fast           VARCHAR(32) REFERENCES FAST(id),
+        classification INTEGER REFERENCES Classifications(id)
+      );
+EOS
+
+    create_lists =
+<<EOS
+      CREATE TABLE Lists (
+        id             INTEGER PRIMARY KEY,
+        age            VARCHAR(32),
+        category       VARCHAR(32),
+        code           VARCHAR(2),
+        year           INTEGER,
+        author         INTEGER REFERENCES Authors(id),
+        title          VARCHAR(256)
+      );
+EOS
+
+    # Associative entity, linking Lists and Books tables
+    # in a 0..n to 0..m relationship
+    create_lists_books =
+<<EOS
+      CREATE TABLE Lists_Books (
+        list           INTEGER REFERENCES Lists(id),
+        book           INTEGER REFERENCES Books(id)
+      );
+EOS
+
+    create_series =
+<<EOS
+      CREATE TABLE Series (
+        id          INTEGER PRIMARY KEY,
+        age         VARCHAR(32),
+        genre       VARCHAR(32),
+        grouping    VARCHAR(64),
+        code        VARCHAR(16),
+        descr       VARCHAR(128)
+      )
+EOS
+
+    stmts = [
+      create_authors,
+      create_classification,
+      create_efs,
+      create_fast,
+      create_series,
+      create_books,
+      create_fast_classifications,
+      create_lists,
+      create_lists_books,
+      'CREATE SEQUENCE author_id;',
+      'CREATE SEQUENCE book_id;',
+      'CREATE SEQUENCE classification_id;',
+      'CREATE SEQUENCE efs_id;',
+      'CREATE SEQUENCE list_id;',
+      'CREATE SEQUENCE series_id;'
+    ]
+
+    for stmt in stmts
+      @conn.exec(stmt)
+    end
+
+    if skip_class == false
+      populate_fast_table
+      populate_classifications_table
+    end
+
+    populate_series_table
+  end
+
+  def dropSchema
+    stmts = [
+      'DROP TABLE Lists_Books;',
+      'DROP TABLE Lists;',
+      'DROP TABLE Books;',
+      'DROP TABLE FAST_Classifications;',
+      'DROP TABLE Authors;',
+      'DROP TABLE Classifications;',
+      'DROP TABLE EFS;',
+      'DROP TABLE FAST;',
+      'DROP TABLE Series;',
+      'DROP SEQUENCE author_id;',
+      'DROP SEQUENCE book_id;',
+      'DROP SEQUENCE classification_id;',
+      'DROP SEQUENCE efs_id;',
+      'DROP SEQUENCE list_id;',
+      'DROP SEQUENCE series_id;'
+    ]
+
+    for stmt in stmts do
+      begin
+        @conn.exec(stmt)
+      rescue Exception => exc
+        puts 'WARNING:  "' + stmt + '" failed:  ' + exc.to_s
+      end
+    end
+  end
+
+  def find_all_authors(author_name)
+    result = []
+
+    sqlSelect = "SELECT id FROM Authors WHERE grouping=$1;"
+    args = [author_name]
+
+    @conn.exec_params(sqlSelect, args) do |rs|
+      rs.each do |row|
+        result << row['id']
+      end
+    end
+
+    result
+  end
+
+  def find_author(author)
+    sqlSelect = "SELECT id FROM Authors WHERE grouping=$1 AND reading=$2 AND sort=$3;"
+    args = [author.grouping, author.reading_order, author.sort_order]
+
+    @conn.exec_params(sqlSelect, args) do |rs|
+      if rs.ntuples > 0
+        return rs[0]['id']
+      end
+    end
+
+    return nil
+  end
+
+  def init_db(skip_class)
+    sql = "SELECT 1 FROM pg_tables WHERE tableowner='quanlib' AND tablename='books'"
+    found = false
+    @conn.exec(sql).each do |row|
+      found = true
+    end
+
+    if ! found
+      create_schema(skip_class)
+    end
+  end
+
+  def load_author(id)
+    sqlSelect = "SELECT grouping, reading, sort FROM Authors WHERE id=$1"
+    args = [id]
+    @conn.exec_params(sqlSelect, args) do |rs|
+      if rs.ntuples != 1
+        raise "Expected 1 row for " + id + " but got " + rs.ntuples + ":  " + sqlSelect
+      end
+      row = rs[0]
+      author = Author.new(row['grouping'], row['reading'], row['sort'])
+      return author
+    end
+    return nil
+  end
+
+  def store_author(author)
+    id = find_author(author)
+    if nil == id
+      id = next_id('author_id')
+      sqlInsert = "INSERT INTO Authors(id, grouping, reading, sort) VALUES ($1, $2, $3, $4);"
+      args = [id, author.grouping, author.reading_order, author.sort_order]
+      begin
+        rs = @conn.exec_params(sqlInsert, args)
+      rescue Exception => e
+        puts sqlInsert + ":  " + args.inspect()
+        puts e.message
+        puts $@
+      ensure
+        rs.clear if rs
+      end
+    end
+    return id
+  end
+
+  def load_book(id)
+    sql = "SELECT author, classification, cover, description, language, path, series, title, volume FROM Books WHERE id=$1;"
+    book = nil
+
+    begin
+      @conn.exec_params(sql, [id]) do |rs|
+        if 1 != rs.ntuples
+          raise 'Expected one row in Books for id ' + id + ', but found ' + rs.length + '.'
+          return nil
+        end
+        row = rs[0]
+
+        book = Book.new(self)
+        book.author = load_author(row['author'])
+        book.classification_id = row['classification']
+        book.cover = load_cover(row['cover'])
+        book.description = row['description']
+        book.language = row['language']
+        book.path = row['path']
+        book.series_id = row['series']
+        book.title = row['title']
+        book.volume = row['volume']
+      end
+    rescue Exception => e
+      puts sql + ": " + id
+      puts e.message
+      puts $@
+    end
+
+    return book
+  end
+
+  def store_book(book)
+    sql = "INSERT INTO Books (id, arrived, author, classification, cover, description, language, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11);"
+
+    book_id = next_id('book_id')
+
+    author_id = store_author(book.author)
+    (efs_id, mime_type) = store_cover(book)
+
+    args = [book_id, book.arrived, author_id, book.classification_id, efs_id, book.description, book.language, book.path, book.series_id, book.title, book.volume]
+
+    begin
+      rs = @conn.exec_params(sql, args)
+    rescue Exception => e
+      puts sql + ": " + args.inspect()
+      puts e.message
+      puts $@
+    ensure
+      rs.clear if rs
+    end
+
+    return book_id
+  end
+
+  def find_classification(author_grouping, title_grouping)
+    sql = "SELECT id FROM Classifications WHERE author_grouping = $1 AND title_grouping = $2;"
+    @conn.exec_params(sql, [author_grouping, title_grouping]) do |rs|
+      if rs.ntuples > 0
+        return rs[0]['id']
+      end
+    end
+    return nil
+  end
+
+  def load_classification(id)
+    sql  = "SELECT ddc, lcc, author_grouping, author_sort, title_grouping, title "
+    sql += " FROM Classifications WHERE id=$1"
+    @conn.exec_params(sql, [id]) do |rs|
+      if rs.ntuples > 0
+        row = rs[0]
+        ddc = row['ddc']
+        lcc = row['lcc']
+        author_grouping = row['author_grouping']
+        author = row['author_sort']
+        title_grouping = row['title_grouping']
+        title = row['title']
+
+        result = Classification.new(ddc, lcc, author_grouping, author, title_grouping, title)
+        result.id = id
+        return result
+      end
+    end
+
+    return nil
+  end
+
+  def load_cover(id)
+    if nil == id
+      return nil
+    end
+
+    mime_type = 'application/octet-stream'
+
+    sql = "SELECT mimeType FROM Efs WHERE id=$1"
+    @conn.exec_params(sql, [id]) do |rs|
+      if rs.ntuples != 1
+        raise "Expected one row but got " + rs.ntuples + ": " + sql + ": " + id
+      end
+      mime_type = rs[0]['mimeType']
+    end
+
+    (efspath, efsname) = construct_efs_path(id)
+
+    fullpath = @basePath + '/efs/' + efspath + '/' + efsname
+
+    return Cover.new(nil, fullpath, mime_type)
+  end
+
+  def store_cover(book)
+    efs_id = nil
+    cover = book.cover()
+
+    if nil == cover
+      return nil
+    end
+
+    @conn.exec("SELECT nextval('efs_id')") do |rs|
+      efs_id = rs[0]['nextval']
+    end
+
+    if nil == efs_id
+      return nil
+    end
+
+    (efspath, efsname) = construct_efs_path(efs_id)
+
+    efspath = @basePath + '/efs/' + efspath
+
+    FileUtils.mkdir_p(efspath)
+
+    (filepath, mimetype) = cover.write_image(efspath, efsname)
+
+    sql = "INSERT INTO efs VALUES ($1, $2)"
+    begin
+      rs = @conn.exec_params(sql, [efs_id, mimetype])
+    rescue Exception => e
+      puts sql + ": " + efs_id + ", " + mimetype
+      puts e.message
+      puts $@
+    ensure
+      rs.clear if rs
+    end
+
+    return efs_id, mimetype
+  end
+
+  def exec_id_query(sql, args)
+    ids = []
+    @conn.exec_params(sql, args) do |rs|
+      rs.each do |row|
+        ids.push(row['id'])
+      end
+    end
+    return ids
+  end
+
+  def exec_update(sql, args)
+    begin
+      rs = @conn.exec_params(sql, args)
+    rescue Exception => e
+      puts sql + ": " + args.inspect()
+      puts e.message
+      puts $@
+    ensure
+      rs.clear if rs
+    end
+  end
+
+  def next_id(seq_name)
+    id = nil
+    @conn.exec("SELECT nextval('" + seq_name + "');") do |rs|
+      id = rs[0]['nextval']
+    end
+    return id
+  end
+
+  def get_series(grouping, code)
+    if nil == code
+      return nil
+    end
+
+    sql = "SELECT id FROM Series WHERE grouping=$1 AND code=$2;"
+    args = [grouping, code]
+    @conn.exec_params(sql, args).each do |row|
+      return row['id']
+    end
+
+    # TODO:  Create a new series object here?
+    puts 'WARNING:  series("' + grouping + '", "' + code + '") not found.'
+    return nil
+  end
+
+  def load_series(id)
+    sql = "SELECT age,genre,grouping,code,descr FROM Series WHERE id=$1;"
+    args = [id]
+    @conn.exec_params(sql, args) do |rs|
+      if rs.ntuples > 0
+        row = rs[0]
+        series = Series.new(id)
+        series.age = row['age']
+        series.genre = row['genre']
+        series.grouping = row['grouping']
+        series.code = row['code']
+        series.descr = row['descr']
+        return series
+      end
+    end
+    return nil
+  end
+
+  def populate_classifications_table
+    puts "Populating the Classifications table..."
+    first = true
+    CSV.foreach(@basePath + '/csv/class.csv') do |row|
+      if first
+        # skip the header row
+        first = false
+      else
+
+        # First, add a row to the Classifications table
+
+        id = next_id('classification_id')
+        ddc = row[0]
+        lcc = row[1]
+        author_grouping = row[2]
+        author_sort = row[3]
+        title_grouping = row[4]
+        title = row[5]
+
+        sqlInsert = "INSERT INTO Classifications (id, ddc, lcc, author_grouping, author_sort, title_grouping, title) VALUES ($1, $2, $3, $4, $5, $6, $7);"
+        args = [id, ddc, lcc, author_grouping, author_sort, title_grouping, title]
+        exec_update(sqlInsert, args)
+
+        # Second, link up with the appropriate FAST table entries
+
+        fast = []
+        input = row[6]
+        if input.length > 0
+          fast = input.split(';')
+        end
+
+        fast.each do |fast_id|
+          sqlInsert = "INSERT INTO FAST_Classifications (fast, classification) VALUES ($1, $2);"
+          args = [fast_id, id]
+          exec_update(sqlInsert, args)
+        end
+      end
+    end
+  end
+
+  def populate_fast_table
+    puts "Populating the FAST table..."
+    first = true
+    CSV.foreach(@basePath + '/csv/fast.csv') do |row|
+      if first
+        first = false  # skip the header row
+      else
+        id = row[0]
+        descr = row[1]
+        sqlInsert = "INSERT INTO FAST (id, descr) VALUES ($1, $2);"
+        exec_update(sqlInsert, [id, descr])
+      end
+    end
+  end
+
+  def populate_lists_table
+    puts "Populating the Lists table..."
+
+    CSV.foreach(@basePath + "/csv/lists.csv", headers: true) do |row|
+      author_ids = find_all_authors(row['author'])
+      if author_ids.empty?
+        specification = [row['age'], row['category'], row['code'], row['year'], row['author'], row['title']]
+          .map { |x| x.inspect }
+          .join(', ')
+
+        puts "WARNING: For list entry (#{specification}), no such author was found."
+
+        next
+      end
+
+      sqlInsert = %Q(
+        INSERT INTO Lists (id, age, category, code, year, author, title)
+        VALUES ($1, $2, $3, $4, $5, $6, $7);
+      )
+      author_ids.each do |author_id|
+        list_id = next_id('list_id')
+        args = [list_id, row['age'], row['category'], row['code'], row['year'], author_id, row['title']]
+        exec_update(sqlInsert, args)
+
+        update_lists_books_table(list_id, author_id, row['title'])
+      end
+    end
+  end
+
+  # Scan for books that match this Lists entry, and add any matches to the Lists_Books associative table
+  def update_lists_books_table(list_id, author_id, title)
+    title_pattern = Book.grouping_for_title(title).gsub('_', '%')
+    sqlSelect = "SELECT id FROM Books WHERE author = $1 AND title LIKE $2;"
+    args = [author_id, title_pattern]
+
+    @conn.exec_params(sqlSelect, args) do |rs|
+      rs.each do |row|
+        sqlInsert = "INSERT INTO Lists_Books (list, book) VALUES ($1, $2)"
+        args = [list_id, row['id']]
+        exec_update(sqlInsert, args)
+      end
+    end
+  end
+
+  def populate_series_table
+    puts "Populating the Series table..."
+    CSV.foreach(@basePath + '/csv/series.csv') do |row|
+      id = next_id('series_id')
+      sqlInsert = "INSERT INTO Series (id, age, genre, grouping, code, descr) VALUES ($1, $2, $3, $4, $5, $6);"
+      args = [id] + row
+      exec_update(sqlInsert, args)
+    end
+  end
+
+  def query_books_by_author(pattern)
+    sql =
+<<EOS
+      SELECT b.id FROM Authors a
+      INNER JOIN Books b ON b.author=a.id
+      LEFT OUTER JOIN Series s on s.id=b.series
+      WHERE upper(a.grouping) LIKE $1
+      ORDER BY a.grouping, b.series, b.volume, b.title
+EOS
+    return exec_id_query(sql, [pattern])
+  end
+
+  def query_books_by_ddc
+    sql =
+<<EOS
+      SELECT b.id FROM Classifications c
+      INNER JOIN Books b ON b.classification=c.id
+      ORDER BY c.ddc
+EOS
+    return exec_id_query(sql, [])
+  end
+
+  def query_books_by_series_id(id)
+    sql =
+<<EOS
+      SELECT b.id FROM Books b
+      WHERE b.series = $1
+      ORDER BY b.volume,b.title
+EOS
+    return exec_id_query(sql, [id])
+  end
+
+  def query_series_by_age(pattern)
+    sql =
+<<EOS
+      SELECT s.id
+      FROM Series s
+      WHERE s.age LIKE $1
+      ORDER BY s.grouping,s.descr
+EOS
+    return exec_id_query(sql, [pattern])
+  end
+end
+
diff --git a/app/tconn.rb b/app/tconn.rb
new file mode 100644 (file)
index 0000000..43fa0f5
--- /dev/null
@@ -0,0 +1,71 @@
+# tconn.rb
+#
+# Timed Connection:  
+# Wrapper around a PG Connection that provides a report on where time was spent executing SQL
+#
+
+require 'pg'
+
+class TimedConn 
+  def initialize(wrapped_conn)
+    @conn = wrapped_conn
+    @stmts = {}
+    @total_time = 0
+  end
+
+  def close
+    @conn.close()
+    puts "Connection closing.  Total SQL time:  " + @total_time.to_s + " secs"
+    @stmts.each do |sql, info| 
+      elapsed = info[2]
+      calls = info[1]
+      puts elapsed.to_s + " secs: " + calls.to_s + " times:  " + sql
+    end
+  end
+
+  def exec(*args, &block)
+    before = Time.now
+    #puts args.inspect
+    result = @conn.exec(*args)
+    #puts result.inspect
+    after = Time.now
+    elapsed = (after - before)
+    remember(args[0], elapsed)
+    @total_time += elapsed
+    if block_given?
+      yield(result)
+    else
+      return result
+    end
+  end
+
+  def exec_params(*args, &block)
+    before = Time.now
+    #puts args.inspect
+    result = @conn.exec_params(*args)
+    #puts result.inspect
+    after = Time.now
+    elapsed = (after - before)
+    remember(args[0], elapsed)
+    @total_time += elapsed
+    if block_given?
+      yield(result)
+    else
+      return result
+    end
+  end
+
+  def remember(sql, elapsed)
+    if @stmts.has_key?(sql)
+      stmt = @stmts[sql]
+    else
+      stmt = [sql, 0, 0]
+    end
+
+    stmt[1] += 1  # Number of times this statement has been invoked
+    stmt[2] += elapsed   # total elapsed time spent on this statement
+    
+    @stmts[sql] = stmt
+  end
+end
+
diff --git a/app/walk_dir.rb b/app/walk_dir.rb
new file mode 100644 (file)
index 0000000..a2c088f
--- /dev/null
@@ -0,0 +1,118 @@
+# Walk the directory (and subdirectories), identifying books.
+#
+# Expected format:
+#   .../AuthorName/Title_of_the_Awesome_Book.ext
+#
+# Author is given as FirstLast.  For example, 
+# Robert Anson Heinlein is RobertHeinlein, and 
+# JKRowling is JoanneRowling.
+#
+# Book titles have spaces replaced with underscores,
+# and punctuation [,!?'] replaced with hyphens.
+#
+# If the book forms part of a series, then an all-capitals 
+# series designator, followed by a numeric volume number, 
+# followed by an underscore, is prefixed to the name.
+# For example, Hardy Boys' volume 1, The Tower Treasure, 
+# is rendered as .../FranklinDixon/HB001_The_Tower_Treasure.epub
+# and Mrs. Pollifax volume 6, On the China Station, is
+# .../DorothyGilman/P06_On_the_China_Station.epub.
+
+require_relative 'book'
+require_relative 'book_loader'
+require_relative 'store'
+
+class WalkDir
+  def initialize(config_file, root)
+    @queue = Queue.new
+    @root = root
+    @config_file = config_file
+    @threads = []
+
+    @files = walk(@root)
+  end
+
+  def books
+    @threads = []
+    num_threads.times do
+      @threads << Thread.new do
+        BookLoader.new(@config_file, @queue).run
+      end
+    end
+
+    result = []
+    @files = remove_duplicates(@files)
+    for file in @files.sort()
+      if Book.can_handle?(file) && (!is_duplicate?(file))
+        # Queue this book to be loaded and added to the DB by a BookLoader thread
+        @queue << file
+      end
+    end
+
+    @threads.count.times { @queue << BookLoader::DONE_MARKER }
+
+    @threads.each { |t| t.join }
+  end
+
+  # Duplicate versions of a text are named 
+  #   xxx_suffix.ext
+  # Where suffix is one of bis, ter, quater, quinquies
+  # for the 2nd, 3rd, 4th or 5th variant respectively.
+  def is_duplicate?(file)
+    s = file.to_s
+    suffix = ['_bis.', '_ter.', '_quater.', '_quinquies.']
+    suffix.each do |pat|
+      if s.include?(pat)
+        return true
+      end
+    end
+    
+    return false
+  end
+
+  def remove_duplicates(files)
+    unique = {}
+    for file in files
+      if Book.can_handle?(file)
+        key = File.dirname(file) + '/' + File.basename(file, '.*')
+        if unique.has_key?(key)
+          new_ext = File.extname(file)
+          old_ext = File.extname(unique[key])
+          if ('.pdf' == old_ext) && ('.epub' == new_ext)
+            # Prefer EPUB over PDF
+            puts 'REPLACED ' + unique[key].to_s + ' with ' + file.to_s
+            unique[key] = file
+          else
+            puts 'DROPPED ' + file.to_s + " because it's superceded by " + unique[key].to_s
+          end
+        else
+          unique[key] = file
+        end
+      end
+    end
+
+    return unique.values
+  end
+
+  def walk(path)
+    result = []
+    children = Dir.entries(path)
+    for child in children
+      fullName = (path.chomp("/")) + "/" + child
+      if (File.directory?(fullName)) and (child != ".") and (child != "..") and (!File.symlink?(fullName))
+        sub = walk(fullName)
+        if (sub != nil) and (sub.length > 0)
+          result.concat(sub)
+        end
+      elsif (! File.directory?(fullName))
+        result.push(fullName)
+      end
+    end
+    return result
+  end
+
+  def num_threads
+    # TOOD:  make this (auto?) configurable
+    12
+  end
+end
diff --git a/author.rb b/author.rb
deleted file mode 100644 (file)
index fb2003b..0000000
--- a/author.rb
+++ /dev/null
@@ -1,57 +0,0 @@
-
-class Author
-  def initialize(grouping, reading_order, sort_order)
-    @grouping = grouping
-    @reading_order = reading_order
-    @sort_order = sort_order
-
-    if (nil == sort_order) || ('Unknown' == sort_order)
-      @sort_order = reading_to_sort_order(reading_order)
-    end
-  end
-
-  def grouping
-    @grouping
-  end
-
-  def reading_order
-    @reading_order
-  end
-
-  def sort_order
-    @sort_order
-  end
-
-  def inspect
-    result = '(Author:'
-    if nil != @grouping
-      result += ' grouping="' + @grouping + '"'
-    end
-    if nil != @reading_order
-      result += ' reading_order="' + @reading_order + '"'
-    end
-    if nil != @sort_order
-      result += ' sort_order="' + @sort_order + '"'
-    end
-    result += ')'
-
-    return result
-  end
-
-  def to_s
-    inspect
-  end
-
-  protected
-  def reading_to_sort_order(reading_order)
-    sort_order = reading_order
-
-    parts = reading_order.split(' ')
-    if parts.length > 1
-      sort_order = parts[-1] + ', ' + parts[0..-2].join(' ')
-    end
-
-    return sort_order
-  end
-end
-
diff --git a/book.rb b/book.rb
deleted file mode 100644 (file)
index 2b93f4b..0000000
--- a/book.rb
+++ /dev/null
@@ -1,421 +0,0 @@
-
-require 'nokogiri'
-require 'rubygems'
-require 'zip'
-
-require_relative 'author'
-require_relative 'classification'
-require_relative 'cover'
-require_relative 'store'
-
-class Book
-  @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
-  @@SERIES_AND_VOLUME_REGEX = /^([A-Z]+)([0-9]+(\.[0-9]+)?)$/
-
-  attr_accessor :arrived
-  attr_accessor :author
-  attr_accessor :classification_id
-  attr_accessor :cover
-  attr_accessor :description
-  attr_accessor :language
-  attr_accessor :path
-  attr_accessor :series_id
-  attr_accessor :title
-  attr_accessor :volume
-
-  def initialize(store)
-    @store = store
-  end
-
-  def load_from_file!(fileName)
-    @path = fileName
-    parse_file_name!(fileName)
-  end
-
-  def self.can_handle?(fileName)
-    if nil == fileName
-      return false
-    end
-
-    #puts "Filename:  " + fileName.to_s
-    lowerName = fileName.downcase()
-
-    if lowerName.end_with?(".epub")
-      return true
-    end
-
-    if lowerName.end_with?(".pdf")
-      return true
-    end
-
-    return false
-  end
-
-  def self.grouping_for_title(title)
-    result = title
-
-    '\'",!#'.split('').each do |c|
-      result = result.gsub(c, '-')
-    end
-    result = result.gsub(/: */, '--')
-    result = result.gsub(' ', '_')
-
-    result
-  end
-
-  def heading
-    result = []
-
-    if nil != @title
-      result.push('<b>' + @title + '</b>')
-    else
-      result.push('<i>(Unknown title)</i>')
-    end
-    if nil != @author
-      result.push('<i>by ' + @author.reading_order + '</i>')
-    end
-
-    seriesInfo = []
-    series = @store.load_series(@series_id)
-    if nil != series and nil != series.descr
-      seriesInfo.push(series.descr.to_s)
-    end
-    if nil != @volume
-      seriesInfo.push(@volume.to_s)
-    end
-    if seriesInfo.length > 0
-      result.push(seriesInfo.join(' '))
-    end
-
-    classification = nil
-    if nil != @classification_id
-      classification = @store.load_classification(@classification_id)
-    end
-    if nil != classification
-      if nil != classification.ddc
-        result.push('Dewey: ' + classification.ddc.to_s)
-      end
-      if nil != classification.lcc
-        result.push('LCC: ' + classification.lcc.to_s)
-      end
-    end
-
-    return result.join('<br/>')
-  end
-
-  def inspect
-    data = []
-    if nil != @author
-      data.push('author="' + @author.inspect + '"')
-    end
-    if nil != @series_id
-      data.push('series_id="' + @series_id.to_s() + '"')
-    end
-    if nil != @volume
-      data.push('volume="' + @volume + '"')
-    end
-    if nil != @title
-      data.push('title="' + @title + '"')
-    end
-    if nil != @cover
-      data.push(@cover.inspect())
-    end
-    if nil != @path
-      data.push('path="' + @path + '"')
-    end
-    return '(Book:' + data.join(',') + ')'
-  end
-
-  def to_s
-    return inspect()
-  end
-
-  def title_grouping
-    if nil == @path
-      return nil
-    end
-
-    return File.basename(@path, '.*')
-  end
-
-  protected
-  def isUpper?(c)
-    return /[[:upper:]]/.match(c)
-  end
-
-  protected
-  def massage_author(input)
-    if nil == input
-      return nil
-    end
-
-    reading_order = ""
-    input.each_char do |c|
-      if isUpper?(c) and (reading_order.length > 0)
-        reading_order += " "
-     end
-      reading_order += c
-    end
-
-    return reading_order
-  end
-
-  # Returns (series, volumeNo, titleText)
-  protected
-  def processTitle(input)
-    if nil == input
-      return nil
-    end
-
-    arr = input.split('_')
-
-    series = nil
-    vol = nil
-
-    first = arr[0]
-    matchData = (arr[0]).match(@@SERIES_AND_VOLUME_REGEX)
-    if nil != matchData
-      capt = matchData.captures
-      series = capt[0]
-      vol = capt[1]
-      arr.shift
-    end
-
-    pos = arr[-1].rindex('.')
-    if nil != pos
-      arr[-1] = arr[-1].slice(0, pos)
-    end
-
-    title = arr.join(' ')
-
-    bare_title_grouping = title_grouping
-      .split('_')
-      .reject { |part| part.match(@@SERIES_AND_VOLUME_REGEX) }
-      .join('_')
-
-    unless bare_title_grouping == Book.grouping_for_title(title)
-      puts "WARNING:  title_grouping mismatch:  #{bare_title_grouping.inspect} vs. #{Book.grouping_for_title(title).inspect}"
-    end
-
-    return series, vol, title
-  end
-
-  protected
-  def parse_file_name!(file_name)
-    category = nil   # e.g., non-fiction, fan-fiction
-    grouping = ''
-
-    parts = file_name.split('/')
-    (series_code, @volume, @title) = processTitle(parts[-1])
-    if parts.length > 1
-      grouping = parts[-2]
-      reading_order = massage_author(grouping)
-      sort_order = nil
-      @author = Author.new(grouping, reading_order, sort_order)
-      @series_id = @store.get_series(grouping, series_code)
-    end
-    if parts.length > 2
-      category = parts[-3]
-    end
-
-    lc_file_name = file_name.downcase
-    if lc_file_name.end_with?(".epub")
-      scanEpub!(file_name)
-    elsif lc_file_name.end_with?(".pdf")
-      scan_pdf!(file_name)
-    end
-
-    @arrived = File.ctime(file_name)
-
-    @classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*'))
-
-    # TODO:  Fix horrible hard-coded strings and paths
-    if ('01_nonfic' == category) && (nil == classification_id)
-      open(Store.unclassified_csv, 'a') do |fd|
-        fd.puts('"' + grouping.to_s + '","' + path + '"')
-      end
-    end
-  end
-
-  protected
-  def scanEpub!(fileName)
-    #puts 'Scanning "' + fileName.to_s + '"...'
-    begin
-      Zip.warn_invalid_date = false
-      Zip::File.open(fileName) do |zipfile|
-        entry = zipfile.find_entry('META-INF/container.xml')
-        if nil == entry
-          puts 'No META-INF/container.xml, skipping book ' + fileName
-          return
-        end
-        contXml = zipfile.read('META-INF/container.xml')
-        contDoc = Nokogiri::XML(contXml)
-        opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
-
-        scanOpf!(zipfile, opfPath)
-      end
-    rescue Zip::Error => exc
-      puts 'ERROR processing file "' + fileName + '":'
-      puts exc.message
-      puts exc.backtrace
-    end
-  end
-
-  protected
-  def scan_pdf!(file_name)
-    #puts 'Scanning "' + file_name.to_s + '"...'
-
-    pdf_path = File.expand_path(file_name).to_s
-    if ! pdf_path.end_with?('.pdf')
-      puts 'Unexpected internal error:  path "' + file_name.to_s + '" does not end with ".pdf".'
-      return
-    end
-
-    jpeg_path = pdf_path[0..-5] + '.jpeg'
-    if File.file?(jpeg_path)
-      File.open(jpeg_path, 'r') do |is|
-        @cover = Cover.new(is, jpeg_path, 'image/jpeg')
-      end
-    end
-  end
-
-
-  protected
-  def scanOpf!(zipfile, opfPath)
-    coverId = nil
-
-    opfXml = zipfile.read(opfPath)
-    opfDoc = Nokogiri::XML(opfXml)
-
-    #-------
-    # Author
-
-    grouping = @author.grouping
-    reading_order = @author.reading_order
-    sort_order = @author.sort_order
-
-    creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
-    if (creators.length > 0)
-      creator = creators[0]
-      if nil != creator
-        role = creator['opf:role']
-        if 'aut' == role
-          reading_order = creator.content
-
-          file_as = creator['opf:file-as']
-          if nil != file_as
-            sort_order = file_as
-          end
-        end
-
-        @author = Author.new(grouping, reading_order, sort_order)
-      end
-    end
-
-    #---------------------------------------
-    # Title
-
-    titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
-    if titles.length > 0
-      title = titles[0]
-      if nil != title
-        @title = title.content
-      end
-    end
-
-    #---------------------------------------
-    # Description
-
-    descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
-    if (descrNodes.length > 0)
-      descrNode = descrNodes[0]
-      if nil != descrNode
-        @description = descrNode.content
-      end
-    end
-
-    #---------------------------------------
-    # Language
-
-    langNodes = opfDoc.css('dc|language', 'dc' => @@DC_NS_URL)
-    if (langNodes.length > 0)
-      langNode = langNodes[0]
-      if langNode
-        @language = langNode.content
-      end
-    end
-
-    #---------------------------------------
-    # Other metadata:  series, volume, cover
-
-    metas = opfDoc.css('package metadata meta')
-    for m in metas
-      name = m['name']
-      content = m['content']
-
-      if 'calibre:series' == name
-        # TODO:  Dynamically create a new series?
-        # @series_id = content
-      elsif 'calibre:series-index' == name
-        @volume = content
-      elsif 'cover' == name
-        coverId = content
-        #puts 'File ' + @path + ' coverId ' + coverId
-      end
-    end
-
-    #---------------
-    # Load the cover
-
-    @cover = load_cover(zipfile, opfPath, opfDoc, coverId)
-  end
-
-  protected
-  def load_cover(zipfile, opfPath, opfDoc, coverId)
-    coverFile = nil
-    if nil == coverId
-      coverId = "cover-image"
-    end
-
-    items = opfDoc.css('package manifest item')
-    for i in items
-      href = i['href']
-      id = i['id']
-      mimeType = i['media-type']
-
-      if coverId == id
-        entry = zipfile.find_entry(href)
-
-        if nil == entry
-          # Although the epub standard requires the path to be relative
-          # to the base of the epub (zip), some books encountered in the
-          # wild have been found to use a bath relative to the location
-          # of the opf file.
-          parts = opfPath.split('/')
-          opfBasePath = opfPath.split('/')[0..-2].join('/')
-          coverPath = opfBasePath + '/' + href
-          entry = zipfile.find_entry(coverPath)
-        end
-
-        unless entry
-          # Another case found in the wild:  cover image is at the root, but path is '../cover.jpeg'
-          if href.start_with? '../'
-            coverPath = href[3..-1]
-            entry = zipfile.find_entry(coverPath)
-          end
-        end
-
-        if nil == entry
-          puts 'WARNING!  Cover image "' + href + '" not found in file "' + @path + '".'
-          return nil
-        else
-          entry.get_input_stream() do |is|
-            return Cover.new(is, href, mimeType)
-          end
-        end
-      end
-    end
-    return nil
-  end
-end
-
diff --git a/book_loader.rb b/book_loader.rb
deleted file mode 100644 (file)
index 5516f04..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-
-require_relative 'book'
-require_relative 'store'
-
-class BookLoader 
-  DONE_MARKER = '<END>'
-
-  def initialize(config_file, queue)
-    @config_file = config_file
-    @queue = queue
-  end
-
-  def run
-    @store = Store.new(@config_file)
-    @store.connect()
-
-    file = @queue.pop
-    until file == DONE_MARKER do
-      book = Book.new(@store)
-      book.load_from_file!(file)
-      @store.store_book(book)
-
-      file = @queue.pop
-    end
-
-    @store.disconnect()
-  end
-end
diff --git a/classification.rb b/classification.rb
deleted file mode 100644 (file)
index 2061e46..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-
-class Classification
-  def initialize(ddc, lcc, author_grouping, author, title_grouping, title)
-    @id = nil
-    @ddc = ddc
-    @lcc = lcc
-    @author_grouping = author_grouping
-    @author = author
-    @title_grouping = title_grouping
-    @title = title
-  end
-
-  def id
-    @id
-  end
-  def id=(value)
-    @id = value
-  end
-
-  def ddc
-    @ddc
-  end
-  def lcc
-    @lcc
-  end
-  def author_grouping
-    @author_grouping
-  end
-  def author
-    @author
-  end
-  def 
-
-  def inspect
-    data = []
-    if nil != @ddc
-      data.push('Dewey=' + @ddc.to_s)
-    end
-    if nil != @lcc
-      data.push('LCC=' + @lcc.to_s)
-    end
-    if nil != @author_grouping
-      data.push('author_grouping=' + @author_grouping.to_s)
-    end
-    if nil != @author
-      data.push('author=' + @author.to_s)
-    end
-    if nil != @title_grouping
-      data.push('title_grouping=' + @title_grouping.to_s)
-    end
-    if nil != @title
-      data.push('title=' + @title)
-    end
-
-    return '(Classification:' + data.join(',') + ')'
-  end
-
-  def to_s
-    inspect
-  end
-
-  protected
-  def reading_to_sort_order(reading_order)
-    sort_order = reading_order
-
-    parts = reading_order.split(' ')
-    if parts.length > 1
-      sort_order = parts[-1] + ', ' + parts[0..-2].join(' ')
-    end
-
-    return sort_order
-  end
-end
-
diff --git a/cover.rb b/cover.rb
deleted file mode 100644 (file)
index e74c27b..0000000
--- a/cover.rb
+++ /dev/null
@@ -1,57 +0,0 @@
-
-class Cover
-  attr_reader :path
-
-  def initialize(inputStream, path, mimeType)
-    if nil != inputStream
-      @data = inputStream.read
-    else
-      @data = nil
-    end
-    @path = path
-    @mimeType = mimeType
-  end
-
-  def inspect
-    info = []
-    if nil != @data
-      info.push('size=' + @data.length.to_s)
-    else
-      info.push('empty')
-    end
-    if nil != @path
-      info.push('path="' + @path + '"')
-    end
-    if nil != @mimeType
-      info.push('mimeType="' + @mimeType + '"')
-    end
-    return '(Cover:' + info.join(',') + ')'
-  end
-
-  def read_image(filename)
-    open(filename, 'rb') do |fd|
-      @data = fd.read()
-    end
-  end
-
-  def to_s
-    return inspect
-  end
-
-  def write_image(outputDir, filename)
-    open(outputDir + '/' + filename, 'wb') do |fd|
-      fd.write(@data)
-    end
-    return filename, @mimeType
-  end
-
-  protected
-  def getExt
-    pos = @path.rindex('.')
-    if nil == pos
-      return '.img'
-    end
-    return @path.slice(pos, @path.length)
-  end
-end
-
diff --git a/extract.rb b/extract.rb
deleted file mode 100644 (file)
index c695941..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-require 'find'
-require 'pathname'
-
-def exec(cmdline)
-  puts "$ #{cmdline}"
-  result = system(cmdline)
-  unless result
-    puts "FAILED:  #{cmdline}"
-  end
-  result
-end
-
-def extract_epub(source_file, source_path, dest_path)
-  relative_path = source_file[source_path.length .. source_file.length]
-  dest_file = "#{dest_path}/#{relative_path}"
-  dest_file = dest_file[0 .. (dest_file.length - 6)] + ".txt"
-
-  required_path = Pathname(dest_file).dirname
-  unless File.directory? required_path
-    unless exec("mkdir -p #{required_path}")
-      return false
-    end
-  end
-
-  if File.exist? dest_file
-    source_time = File.mtime source_file
-    dest_time = File.mtime dest_file
-    comp = dest_time <=> source_time
-    if comp > 0
-      return true # Nothing to do, extraction is already up-to-date
-    end
-  end
-    
-  exec("ebook-convert #{source_file} #{dest_file}")
-end
-
-def scan_dir(source_path, dest_path) 
-  Find.find(source_path) do |f|
-    if f.match(/.epub\Z/)
-      unless (f.match(/_bis.epub\Z/) || f.match(/_ter.epub\Z/) || f.match(/_quater.epub\Z/))
-        extract_epub(f, source_path, dest_path)
-      end
-    end
-  end
-end
-
-dest_path = ARGV[0]
-for arg in ARGV[1 .. ARGV.length]
-  scan_dir(arg, dest_path)
-end
diff --git a/main.rb b/main.rb
deleted file mode 100644 (file)
index e294b4a..0000000
--- a/main.rb
+++ /dev/null
@@ -1,56 +0,0 @@
-require_relative 'navigator'
-require_relative 'page'
-require_relative 'store'
-require_relative 'walk_dir'
-
-@outputDir = 'output'
-
-@config_file = 'quanlib.ini'
-@skip_class = false
-
-def handleArg(arg)
-  if arg.start_with?("--config=")
-    @config_file = arg[9..-1]
-    puts 'Using config file "' + @config_file + '".'
-  elsif "--purge" == arg
-    puts 'Purging database...'
-    @store.dropSchema()
-    if File.exists?(@store.unclassified_csv)
-      File.delete(@store.unclassified_csv)
-    end
-  elsif "--skip-class" == arg
-    puts 'Skipping load of classification table.'
-    @skip_class = true
-  elsif arg.start_with?("--")
-    abort('ERROR:  Unrecognized option "' + arg + '".')
-  end
-end
-
-@store = Store.new(@config_file)
-@store.connect()
-
-for arg in ARGV
-  handleArg(arg)
-end
-
-@store.init_db(@skip_class)
-
-for arg in ARGV
-  if ! arg.start_with?("--")
-    puts 'Scanning directory "' + arg + '"...'
-    w = WalkDir.new(@config_file, arg)
-    w.books
-  end
-end
-
-@store.cross_reference_lists
-
-puts 'Creating output...'
-
-navigator = Navigator.new(@store)
-navigator.write_atoz_pages()
-navigator.write_series_listing()
-navigator.write_dewey()
-
-@store.disconnect()
-
diff --git a/navigator.rb b/navigator.rb
deleted file mode 100644 (file)
index 881b1fa..0000000
+++ /dev/null
@@ -1,157 +0,0 @@
-require_relative 'page'
-require_relative 'store'
-
-class Navigator
-  def initialize(store)
-    @store = store
-  end
-
-  def write_atoz_pages
-    atoz_counts = {}
-
-    ('A'..'Z').each do |letter| 
-      atoz_counts[letter] = write_authors_starting_with(letter)
-    end
-
-    content = '<p><table><tr><th>Author</th><th>Books</th></tr>'
-    ('A'..'Z').each do |letter|
-      content += '  <tr><td><a href="../atoz/output_' + letter + '.html">Starting with ' + letter + '</a></td><td>' + atoz_counts[letter].to_s + '</td></tr>'
-    end
-    content += '</table></p>'
-    page = Page.new(@store)
-    page.output_dir = 'atoz'
-    page.special = content
-    page.up = ['../output/index.html', 'Up']
-
-    page.write_html( [] )
-  end
-
-  def write_authors_starting_with(letter)
-    book_ids = @store.query_books_by_author(letter + '%')
-    puts 'Authors starting with "' + letter + '":  ' + book_ids.length.to_s() + ' books.'
-
-    page = Page.new(@store)
-    if 'A' != letter
-      page.back = ['../atoz/output_' + (letter.ord - 1).chr + '.html', 'Prev']
-    end
-    if 'Z' != letter
-      page.forward = ['../atoz/output_' + (letter.ord + 1).chr + '.html', 'Next']
-    end
-    page.output_dir = 'atoz'
-    page.index_file = 'output_' + letter + '.html'
-    page.title = "Authors starting with '" + letter + "'"
-    page.up = ['../atoz/index.html', 'Up']
-
-    page.write_html(book_ids)
-    return book_ids.length
-  end
-
-  def write_dewey
-    book_ids = @store.query_books_by_ddc()
-    puts 'Non-fiction books arranged by Dewey Decimal Classification:  ' + book_ids.length.to_s() + ' books.'
-
-    page = Page.new(@store)
-    page.output_dir = 'ddc'
-    page.index_file = 'index.html'
-    page.title = "Non-fiction books arranged by Dewey Decimal call number"
-    page.up = ['../output/index.html', 'Up']
-    
-    page.write_html(book_ids)
-    return book_ids.length
-  end
-
-  def write_series_for_age(age)
-    series_infos = []
-
-    series_ids = @store.query_series_by_age(age)
-
-    series_ids.each do |id|
-      series = @store.load_series(id)
-      book_ids = @store.query_books_by_series_id(id)
-      if nil != book_ids and book_ids.length > 0
-        series_infos.push( [series, book_ids] )
-      end
-    end
-
-    for idx in 0 .. (series_infos.length - 1) do 
-      #puts series.descr + ': ' + book_ids.length.to_s + ' books.'
-
-      back = nil
-      fwd = nil
-
-      if idx > 0
-        back = series_infos[idx-1]
-      end
-      if (idx + 1) < series_infos.length
-        fwd = series_infos[idx+1]
-      end
-
-      cur = series_infos[idx]
-      series = cur[0]
-      book_ids = cur[1]
-
-      page = Page.new(@store)
-      if nil != back
-        page.back = [back[0].key + '.html', 'Back']
-      end
-      if nil != fwd
-        page.forward = [fwd[0].key + '.html', 'Forward']
-      end
-      page.output_dir = 'series/series_' + age
-      page.index_file = series.key + '.html'
-      page.title = 'Series &ldquo;' + series.descr + '&rdquo; (' + book_ids.length.to_s + ' books)'
-      page.up = ['index.html', 'Up']
-  
-      page.write_html(book_ids)
-    end
-
-    content =  '<h1>&ldquo;' + age + '&rdquo; Series</h1>'
-    content += '<p><table><tr><th>Author</th><th>Series</th><th>Genre</th><th>Books</th></tr>'
-    series_infos.each do |cur|
-      series = cur[0]
-      book_ids = cur[1]
-
-      author = series.grouping
-      letter = author[0]
-
-      content += '  <tr>'
-      content += '<td><a href="../../atoz/output_' + letter + '.html">' + author + '</a></td>'
-      content += '<td><a href="' + series.key + '.html">' + series.descr + '</a></td>'
-      content += '<td>' + series.genre + '</td>'
-      content += '<td>' + book_ids.length.to_s + '</td>'
-      content += '</tr>'
-    end
-    content += '</table></p>'
-    page = Page.new(@store)
-    page.output_dir = 'series/series_' + age
-    page.special = content
-    page.up = ['../index.html', 'Up']
-    page.write_html( [] )
-
-    return series_infos.length
-  end
-
-  def write_series_listing
-    ages = ['beginner', 'junior', 'ya', 'adult']
-    series_counts = {}
-
-    ages.each do |age|
-      puts 'Series for "' + age + '" readers...'
-
-      series_counts[age] = write_series_for_age(age)
-    end
-
-    content = '<h1>Browse Books By Series</h1>'
-    content += '<p>'
-    content += '<table><tr><th>Age</th><th>Number of Series</th></tr>'
-    ages.each do |age|
-      content += '<tr><td><a href="series_' + age + '/index.html">' + age + '</a></td><td>' + series_counts[age].to_s + '</td></tr>'
-    end
-    content += '</table></p>'
-    page = Page.new(@store)
-    page.output_dir = 'series'
-    page.special = content
-    page.up = ['../output/index.html', 'Up']
-    page.write_html( [] )
-  end
-end
diff --git a/page.rb b/page.rb
deleted file mode 100644 (file)
index 638f9ad..0000000
--- a/page.rb
+++ /dev/null
@@ -1,166 +0,0 @@
-require 'fileutils'
-
-require_relative 'store'
-
-class Page
-  def initialize(store)
-    @back = nil
-    @forward = nil
-    @index_file = 'index.html'
-    @output_dir = 'output'
-    @special = nil
-    @store = store
-    @title = 'Books'
-    @up = nil
-  end
-
-  def back=(value)
-    @back = value
-  end
-
-  def forward=(value)
-    @forward = value
-  end
-
-  def index_file=(value)
-    @index_file = value
-  end
-
-  def navig_link(data)
-    if (nil == data)
-      return ''
-    end
-    return '<a href="' + data[0] + '">' + data[1] + '</a>'
-  end
-
-  def output_dir=(value)
-    @output_dir = value
-  end
-
-  def special=(value)
-    @special = value
-  end
-
-  def title=(value)
-    @title = value
-  end
-
-  def up=(value)
-    @up = value
-  end
-
-  def write_books(fd, book_ids)
-    for id in book_ids
-      book = @store.load_book(id)
-      image = nil
-      if nil != book.cover
-        #@imageCount += 1
-        #(path, mimeType) = book.cover.write_image(@output_dir, 'image' + @imageCount.to_s)
-        #image = '<img class="cover-thumb" src="' + path + '"/>'
-        path = book.cover.path
-        image = '<img class="cover-thumb" src="' + path + '"/>'
-      else
-        image = '(No cover image)'
-      end
-
-      fd.puts '    <div><table>'
-      fd.puts '      <tr><td><a href="' + book.path + '">' + image + '</a></td>'
-
-      heading = book.heading()
-      description = book.description()
-      if nil != description
-        fd.puts '          <td><span class="popup">' + heading + '<span class="pop-inner"><p>' + heading + '</p><p>' + description + '</p></span></span></td></tr>'
-      else
-        fd.puts '          <td>' + heading + '</td></tr>'
-      end
-    
-      fd.puts '    </table></div>'
-    end
-  end
-
-  def write_footer(fd)
-    fd.puts '    <p class="navigator">' + navig_link(@back) + ' ' + navig_link(@up) + ' ' + navig_link(@forward) + '</p>'
-  end
-
-  def write_header(fd)
-    fd.puts '    <h1 class="header">' + @title + '</h1>'
-
-    fd.puts '    <p class="navigator">' + navig_link(@back) + ' ' + navig_link(@up) + ' ' + navig_link(@forward) + '</p>'
-  end
-
-  def write_html(book_ids)
-    @imageCount = 0
-
-    if ! Dir.exist?(@output_dir)
-      FileUtils.mkdir_p(@output_dir)
-    end
-
-    open(@output_dir + '/' + @index_file, 'w') do |fd|
-      fd.puts '<html>'
-      fd.puts '  <head>'
-      fd.puts '    <meta charset="utf-8"/>'
-      fd.puts '    <title>' + @title + '</title>'
-
-      write_style_sheet(fd)
-
-      fd.puts '  </head>'
-      fd.puts '  <body>'
-      
-      write_header(fd)
-
-      write_special(fd)
-      write_books(fd, book_ids)
-  
-      write_footer(fd)
-
-      fd.puts "  </body>"
-      fd.puts "</html>"
-    end
-  end
-
-  def write_special(fd)
-    if (nil != @special)
-      fd.puts(@special)
-    end
-  end
-
-  def write_style_sheet(fd)
-      style = 
-<<EOS
-    <style>
-      div { 
-        display: inline-block;
-        width: 400px;
-        margin: 10px;
-        border 3px solid #73ad21;
-      }
-      h1.header { 
-        background: #4040a0;
-        color: #ffffff;
-        text-align: center;
-      }
-      img.cover-thumb { max-height: 200px; max-width: 200px; }
-      p.navigator { }
-      span.popup { }
-      span.popup:hover { text-decoration: none; background: #cfffff; z-index: 6; }
-      span.popup span.pop-inner { 
-        border-color:black; 
-        border-style:solid; 
-        border-width:1px;
-        display: none; 
-        margin: 4px 0 0 0px; 
-        padding: 3px 3px 3px 3px;
-        position: absolute; 
-      }
-      span.popup:hover span.pop-inner { 
-        background: #ffffaf; 
-        display: block; 
-        margin: 20px 0 0 0px; 
-        z-index:6;
-      }
-    </style>
-EOS
-      fd.puts style
-  end
-end
-
diff --git a/series.rb b/series.rb
deleted file mode 100644 (file)
index 0621876..0000000
--- a/series.rb
+++ /dev/null
@@ -1,87 +0,0 @@
-
-class Series
-  def initialize(id)
-    @age = nil
-    @genre = nil
-    @grouping = nil 
-    @code = nil
-    @descr = nil
-    @id = id
-  end
-
-  def age
-    @age
-  end
-
-  def age=(value)
-    @age = value
-  end
-
-  def code
-    @code
-  end
-
-  def code=(value)
-    @code = value
-  end
-
-  def descr
-    @descr
-  end
-
-  def descr=(value)
-    @descr = value
-  end
-
-  def genre
-    @genre
-  end
-
-  def genre=(value)
-    @genre = value
-  end
-
-  def grouping
-    @grouping
-  end
-
-  def grouping=(value)
-    @grouping = value
-  end
-
-  def id
-    @id
-  end
-
-  def inspect
-    data = []
-    if nil != @age
-      data.push('age="' + @age.inspect + '"')
-    end
-    if nil != @code
-      data.push('code="' + @code.inspect + '"')
-    end
-    if nil != @descr
-      data.push('descr="' + @descr + '"')
-    end
-    if nil != @genre
-      data.push('genre="' + @genre + '"')
-    end
-    if nil != @grouping
-      data.push('grouping="' + @grouping + '"')
-    end
-    return '(Series:' + data.join(',') + ')'
-  end
-
-  def key
-    if nil != grouping and nil != code
-      return grouping.to_s + '_' + code.to_s
-    end
-    return id.to_s
-  end
-
-  def to_s
-    return inspect()
-  end
-end
-
diff --git a/store.rb b/store.rb
deleted file mode 100644 (file)
index 1a33ca3..0000000
--- a/store.rb
+++ /dev/null
@@ -1,655 +0,0 @@
-
-require 'csv'
-require 'fileutils'
-require 'inifile'
-require 'pg'
-
-require_relative 'series'
-require_relative 'tconn'
-
-class Store
-  def unclassified_csv
-    @basePath + '/csv/unclassified.csv'
-  end
-
-  def initialize(config_file)
-    @conn = nil
-
-    config = IniFile.load(config_file)
-    if nil == config
-      puts 'FATAL:  Failed to load config file "' + config_file + '".  Aborting initialization.'
-      return
-    end
-
-    section = config['database']
-    @dbhost = section['host']
-    @dbport = 5432
-    @dbname = section['name']
-    @dbuser = section['user']
-    @dbpass = section['pass']
-
-    section = config['filesystem']
-    @basePath = section['basePath']
-  end
-
-  def connect
-    @conn = TimedConn.new(PG.connect(@dbhost, @dbport, '', '', @dbname, @dbuser, @dbpass))
-    return @conn
-  end
-
-  def disconnect
-    @conn.close()
-  end
-
-  def construct_efs_path(efs_id)
-    id_str = sprintf('%010d', efs_id)
-    path = sprintf('%s/%s/%s/%s', id_str[0,2], id_str[2,2], id_str[4,2], id_str[6,2])
-    name = id_str + '.dat'
-    return path, name
-  end
-
-  def cross_reference_lists
-puts "@@@@@@@@@@@ CROSS-REF START @@@@@@@@@@@"
-    exec_update("TRUNCATE TABLE Lists CASCADE;", [])
-
-    populate_lists_table
-puts "@@@@@@@@@@@ CROSS-REF DONE @@@@@@@@@@@"
-  end
-
-  def create_schema(skip_class)
-    create_authors =
-<<EOS
-      CREATE TABLE Authors (
-        id          INTEGER PRIMARY KEY,
-        grouping    VARCHAR(64),
-        reading     VARCHAR(256),
-        sort        VARCHAR(256)
-      );
-EOS
-
-    create_books =
-<<EOS
-      CREATE TABLE Books (
-        id             INTEGER PRIMARY KEY,
-        arrived        TIMESTAMP,
-        author         INTEGER REFERENCES Authors(id),
-        classification INTEGER REFERENCES Classifications(id),
-        cover          INTEGER,
-        language       VARCHAR(64),
-        description    TEXT,
-        path           VARCHAR(256),
-        series         INTEGER REFERENCES Series(id),
-        title          VARCHAR(256),
-        volume         VARCHAR(16)
-      );
-EOS
-
-    create_classification =
-<<EOS
-      CREATE TABLE Classifications (
-        id              INTEGER PRIMARY KEY,
-        ddc             VARCHAR(32),
-        lcc             VARCHAR(32),
-        author_grouping VARCHAR(64),
-        author_sort     VARCHAR(128),
-        title_grouping  VARCHAR(256),
-        title           VARCHAR(256)
-      );
-EOS
-
-    create_efs =
-<<EOS
-      CREATE TABLE EFS (
-        id          INTEGER PRIMARY KEY,
-        mimetype    VARCHAR(64)
-      );
-EOS
-
-    create_fast =
-<<EOS
-      CREATE TABLE FAST (
-        id          VARCHAR(32) PRIMARY KEY,
-        descr       VARCHAR(128)
-      );
-EOS
-
-    # Associative entity, linking FAST and Classifications tables
-    # in a 0..n to 0..m relationship
-    create_fast_classifications =
-<<EOS
-      CREATE TABLE FAST_Classifications (
-        fast           VARCHAR(32) REFERENCES FAST(id),
-        classification INTEGER REFERENCES Classifications(id)
-      );
-EOS
-
-    create_lists =
-<<EOS
-      CREATE TABLE Lists (
-        id             INTEGER PRIMARY KEY,
-        age            VARCHAR(32),
-        category       VARCHAR(32),
-        code           VARCHAR(2),
-        year           INTEGER,
-        author         INTEGER REFERENCES Authors(id),
-        title          VARCHAR(256)
-      );
-EOS
-
-    # Associative entity, linking Lists and Books tables
-    # in a 0..n to 0..m relationship
-    create_lists_books =
-<<EOS
-      CREATE TABLE Lists_Books (
-        list           INTEGER REFERENCES Lists(id),
-        book           INTEGER REFERENCES Books(id)
-      );
-EOS
-
-    create_series =
-<<EOS
-      CREATE TABLE Series (
-        id          INTEGER PRIMARY KEY,
-        age         VARCHAR(32),
-        genre       VARCHAR(32),
-        grouping    VARCHAR(64),
-        code        VARCHAR(16),
-        descr       VARCHAR(128)
-      )
-EOS
-
-    stmts = [
-      create_authors,
-      create_classification,
-      create_efs,
-      create_fast,
-      create_series,
-      create_books,
-      create_fast_classifications,
-      create_lists,
-      create_lists_books,
-      'CREATE SEQUENCE author_id;',
-      'CREATE SEQUENCE book_id;',
-      'CREATE SEQUENCE classification_id;',
-      'CREATE SEQUENCE efs_id;',
-      'CREATE SEQUENCE list_id;',
-      'CREATE SEQUENCE series_id;'
-    ]
-
-    for stmt in stmts
-      @conn.exec(stmt)
-    end
-
-    if skip_class == false
-      populate_fast_table
-      populate_classifications_table
-    end
-
-    populate_series_table
-  end
-
-  def dropSchema
-    stmts = [
-      'DROP TABLE Lists_Books;',
-      'DROP TABLE Lists;',
-      'DROP TABLE Books;',
-      'DROP TABLE FAST_Classifications;',
-      'DROP TABLE Authors;',
-      'DROP TABLE Classifications;',
-      'DROP TABLE EFS;',
-      'DROP TABLE FAST;',
-      'DROP TABLE Series;',
-      'DROP SEQUENCE author_id;',
-      'DROP SEQUENCE book_id;',
-      'DROP SEQUENCE classification_id;',
-      'DROP SEQUENCE efs_id;',
-      'DROP SEQUENCE list_id;',
-      'DROP SEQUENCE series_id;'
-    ]
-
-    for stmt in stmts do
-      begin
-        @conn.exec(stmt)
-      rescue Exception => exc
-        puts 'WARNING:  "' + stmt + '" failed:  ' + exc.to_s
-      end
-    end
-  end
-
-  def find_all_authors(author_name)
-    result = []
-
-    sqlSelect = "SELECT id FROM Authors WHERE grouping=$1;"
-    args = [author_name]
-
-    @conn.exec_params(sqlSelect, args) do |rs|
-      rs.each do |row|
-        result << row['id']
-      end
-    end
-
-    result
-  end
-
-  def find_author(author)
-    sqlSelect = "SELECT id FROM Authors WHERE grouping=$1 AND reading=$2 AND sort=$3;"
-    args = [author.grouping, author.reading_order, author.sort_order]
-
-    @conn.exec_params(sqlSelect, args) do |rs|
-      if rs.ntuples > 0
-        return rs[0]['id']
-      end
-    end
-
-    return nil
-  end
-
-  def init_db(skip_class)
-    sql = "SELECT 1 FROM pg_tables WHERE tableowner='quanlib' AND tablename='books'"
-    found = false
-    @conn.exec(sql).each do |row|
-      found = true
-    end
-
-    if ! found
-      create_schema(skip_class)
-    end
-  end
-
-  def load_author(id)
-    sqlSelect = "SELECT grouping, reading, sort FROM Authors WHERE id=$1"
-    args = [id]
-    @conn.exec_params(sqlSelect, args) do |rs|
-      if rs.ntuples != 1
-        raise "Expected 1 row for " + id + " but got " + rs.ntuples + ":  " + sqlSelect
-      end
-      row = rs[0]
-      author = Author.new(row['grouping'], row['reading'], row['sort'])
-      return author
-    end
-    return nil
-  end
-
-  def store_author(author)
-    id = find_author(author)
-    if nil == id
-      id = next_id('author_id')
-      sqlInsert = "INSERT INTO Authors(id, grouping, reading, sort) VALUES ($1, $2, $3, $4);"
-      args = [id, author.grouping, author.reading_order, author.sort_order]
-      begin
-        rs = @conn.exec_params(sqlInsert, args)
-      rescue Exception => e
-        puts sqlInsert + ":  " + args.inspect()
-        puts e.message
-        puts $@
-      ensure
-        rs.clear if rs
-      end
-    end
-    return id
-  end
-
-  def load_book(id)
-    sql = "SELECT author, classification, cover, description, language, path, series, title, volume FROM Books WHERE id=$1;"
-    book = nil
-
-    begin
-      @conn.exec_params(sql, [id]) do |rs|
-        if 1 != rs.ntuples
-          raise 'Expected one row in Books for id ' + id + ', but found ' + rs.length + '.'
-          return nil
-        end
-        row = rs[0]
-
-        book = Book.new(self)
-        book.author = load_author(row['author'])
-        book.classification_id = row['classification']
-        book.cover = load_cover(row['cover'])
-        book.description = row['description']
-        book.language = row['language']
-        book.path = row['path']
-        book.series_id = row['series']
-        book.title = row['title']
-        book.volume = row['volume']
-      end
-    rescue Exception => e
-      puts sql + ": " + id
-      puts e.message
-      puts $@
-    end
-
-    return book
-  end
-
-  def store_book(book)
-    sql = "INSERT INTO Books (id, arrived, author, classification, cover, description, language, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11);"
-
-    book_id = next_id('book_id')
-
-    author_id = store_author(book.author)
-    (efs_id, mime_type) = store_cover(book)
-
-    args = [book_id, book.arrived, author_id, book.classification_id, efs_id, book.description, book.language, book.path, book.series_id, book.title, book.volume]
-
-    begin
-      rs = @conn.exec_params(sql, args)
-    rescue Exception => e
-      puts sql + ": " + args.inspect()
-      puts e.message
-      puts $@
-    ensure
-      rs.clear if rs
-    end
-
-    return book_id
-  end
-
-  def find_classification(author_grouping, title_grouping)
-    sql = "SELECT id FROM Classifications WHERE author_grouping = $1 AND title_grouping = $2;"
-    @conn.exec_params(sql, [author_grouping, title_grouping]) do |rs|
-      if rs.ntuples > 0
-        return rs[0]['id']
-      end
-    end
-    return nil
-  end
-
-  def load_classification(id)
-    sql  = "SELECT ddc, lcc, author_grouping, author_sort, title_grouping, title "
-    sql += " FROM Classifications WHERE id=$1"
-    @conn.exec_params(sql, [id]) do |rs|
-      if rs.ntuples > 0
-        row = rs[0]
-        ddc = row['ddc']
-        lcc = row['lcc']
-        author_grouping = row['author_grouping']
-        author = row['author_sort']
-        title_grouping = row['title_grouping']
-        title = row['title']
-
-        result = Classification.new(ddc, lcc, author_grouping, author, title_grouping, title)
-        result.id = id
-        return result
-      end
-    end
-
-    return nil
-  end
-
-  def load_cover(id)
-    if nil == id
-      return nil
-    end
-
-    mime_type = 'application/octet-stream'
-
-    sql = "SELECT mimeType FROM Efs WHERE id=$1"
-    @conn.exec_params(sql, [id]) do |rs|
-      if rs.ntuples != 1
-        raise "Expected one row but got " + rs.ntuples + ": " + sql + ": " + id
-      end
-      mime_type = rs[0]['mimeType']
-    end
-
-    (efspath, efsname) = construct_efs_path(id)
-
-    fullpath = @basePath + '/efs/' + efspath + '/' + efsname
-
-    return Cover.new(nil, fullpath, mime_type)
-  end
-
-  def store_cover(book)
-    efs_id = nil
-    cover = book.cover()
-
-    if nil == cover
-      return nil
-    end
-
-    @conn.exec("SELECT nextval('efs_id')") do |rs|
-      efs_id = rs[0]['nextval']
-    end
-
-    if nil == efs_id
-      return nil
-    end
-
-    (efspath, efsname) = construct_efs_path(efs_id)
-
-    efspath = @basePath + '/efs/' + efspath
-
-    FileUtils.mkdir_p(efspath)
-
-    (filepath, mimetype) = cover.write_image(efspath, efsname)
-
-    sql = "INSERT INTO efs VALUES ($1, $2)"
-    begin
-      rs = @conn.exec_params(sql, [efs_id, mimetype])
-    rescue Exception => e
-      puts sql + ": " + efs_id + ", " + mimetype
-      puts e.message
-      puts $@
-    ensure
-      rs.clear if rs
-    end
-
-    return efs_id, mimetype
-  end
-
-  def exec_id_query(sql, args)
-    ids = []
-    @conn.exec_params(sql, args) do |rs|
-      rs.each do |row|
-        ids.push(row['id'])
-      end
-    end
-    return ids
-  end
-
-  def exec_update(sql, args)
-    begin
-      rs = @conn.exec_params(sql, args)
-    rescue Exception => e
-      puts sql + ": " + args.inspect()
-      puts e.message
-      puts $@
-    ensure
-      rs.clear if rs
-    end
-  end
-
-  def next_id(seq_name)
-    id = nil
-    @conn.exec("SELECT nextval('" + seq_name + "');") do |rs|
-      id = rs[0]['nextval']
-    end
-    return id
-  end
-
-  def get_series(grouping, code)
-    if nil == code
-      return nil
-    end
-
-    sql = "SELECT id FROM Series WHERE grouping=$1 AND code=$2;"
-    args = [grouping, code]
-    @conn.exec_params(sql, args).each do |row|
-      return row['id']
-    end
-
-    # TODO:  Create a new series object here?
-    puts 'WARNING:  series("' + grouping + '", "' + code + '") not found.'
-    return nil
-  end
-
-  def load_series(id)
-    sql = "SELECT age,genre,grouping,code,descr FROM Series WHERE id=$1;"
-    args = [id]
-    @conn.exec_params(sql, args) do |rs|
-      if rs.ntuples > 0
-        row = rs[0]
-        series = Series.new(id)
-        series.age = row['age']
-        series.genre = row['genre']
-        series.grouping = row['grouping']
-        series.code = row['code']
-        series.descr = row['descr']
-        return series
-      end
-    end
-    return nil
-  end
-
-  def populate_classifications_table
-    puts "Populating the Classifications table..."
-    first = true
-    CSV.foreach(@basePath + '/csv/class.csv') do |row|
-      if first
-        # skip the header row
-        first = false
-      else
-
-        # First, add a row to the Classifications table
-
-        id = next_id('classification_id')
-        ddc = row[0]
-        lcc = row[1]
-        author_grouping = row[2]
-        author_sort = row[3]
-        title_grouping = row[4]
-        title = row[5]
-
-        sqlInsert = "INSERT INTO Classifications (id, ddc, lcc, author_grouping, author_sort, title_grouping, title) VALUES ($1, $2, $3, $4, $5, $6, $7);"
-        args = [id, ddc, lcc, author_grouping, author_sort, title_grouping, title]
-        exec_update(sqlInsert, args)
-
-        # Second, link up with the appropriate FAST table entries
-
-        fast = []
-        input = row[6]
-        if input.length > 0
-          fast = input.split(';')
-        end
-
-        fast.each do |fast_id|
-          sqlInsert = "INSERT INTO FAST_Classifications (fast, classification) VALUES ($1, $2);"
-          args = [fast_id, id]
-          exec_update(sqlInsert, args)
-        end
-      end
-    end
-  end
-
-  def populate_fast_table
-    puts "Populating the FAST table..."
-    first = true
-    CSV.foreach(@basePath + '/csv/fast.csv') do |row|
-      if first
-        first = false  # skip the header row
-      else
-        id = row[0]
-        descr = row[1]
-        sqlInsert = "INSERT INTO FAST (id, descr) VALUES ($1, $2);"
-        exec_update(sqlInsert, [id, descr])
-      end
-    end
-  end
-
-  def populate_lists_table
-    puts "Populating the Lists table..."
-
-    CSV.foreach(@basePath + "/csv/lists.csv", headers: true) do |row|
-      author_ids = find_all_authors(row['author'])
-      if author_ids.empty?
-        specification = [row['age'], row['category'], row['code'], row['year'], row['author'], row['title']]
-          .map { |x| x.inspect }
-          .join(', ')
-
-        puts "WARNING: For list entry (#{specification}), no such author was found."
-
-        next
-      end
-
-      sqlInsert = %Q(
-        INSERT INTO Lists (id, age, category, code, year, author, title)
-        VALUES ($1, $2, $3, $4, $5, $6, $7);
-      )
-      author_ids.each do |author_id|
-        list_id = next_id('list_id')
-        args = [list_id, row['age'], row['category'], row['code'], row['year'], author_id, row['title']]
-        exec_update(sqlInsert, args)
-
-        update_lists_books_table(list_id, author_id, row['title'])
-      end
-    end
-  end
-
-  # Scan for books that match this Lists entry, and add any matches to the Lists_Books associative table
-  def update_lists_books_table(list_id, author_id, title)
-    title_pattern = Book.grouping_for_title(title).gsub('_', '%')
-    sqlSelect = "SELECT id FROM Books WHERE author = $1 AND title LIKE $2;"
-    args = [author_id, title_pattern]
-
-    @conn.exec_params(sqlSelect, args) do |rs|
-      rs.each do |row|
-        sqlInsert = "INSERT INTO Lists_Books (list, book) VALUES ($1, $2)"
-        args = [list_id, row['id']]
-        exec_update(sqlInsert, args)
-      end
-    end
-  end
-
-  def populate_series_table
-    puts "Populating the Series table..."
-    CSV.foreach(@basePath + '/csv/series.csv') do |row|
-      id = next_id('series_id')
-      sqlInsert = "INSERT INTO Series (id, age, genre, grouping, code, descr) VALUES ($1, $2, $3, $4, $5, $6);"
-      args = [id] + row
-      exec_update(sqlInsert, args)
-    end
-  end
-
-  def query_books_by_author(pattern)
-    sql =
-<<EOS
-      SELECT b.id FROM Authors a
-      INNER JOIN Books b ON b.author=a.id
-      LEFT OUTER JOIN Series s on s.id=b.series
-      WHERE upper(a.grouping) LIKE $1
-      ORDER BY a.grouping, b.series, b.volume, b.title
-EOS
-    return exec_id_query(sql, [pattern])
-  end
-
-  def query_books_by_ddc
-    sql =
-<<EOS
-      SELECT b.id FROM Classifications c
-      INNER JOIN Books b ON b.classification=c.id
-      ORDER BY c.ddc
-EOS
-    return exec_id_query(sql, [])
-  end
-
-  def query_books_by_series_id(id)
-    sql =
-<<EOS
-      SELECT b.id FROM Books b
-      WHERE b.series = $1
-      ORDER BY b.volume,b.title
-EOS
-    return exec_id_query(sql, [id])
-  end
-
-  def query_series_by_age(pattern)
-    sql =
-<<EOS
-      SELECT s.id
-      FROM Series s
-      WHERE s.age LIKE $1
-      ORDER BY s.grouping,s.descr
-EOS
-    return exec_id_query(sql, [pattern])
-  end
-end
-
diff --git a/tconn.rb b/tconn.rb
deleted file mode 100644 (file)
index 43fa0f5..0000000
--- a/tconn.rb
+++ /dev/null
@@ -1,71 +0,0 @@
-# tconn.rb
-#
-# Timed Connection:  
-# Wrapper around a PG Connection that provides a report on where time was spent executing SQL
-#
-
-require 'pg'
-
-class TimedConn 
-  def initialize(wrapped_conn)
-    @conn = wrapped_conn
-    @stmts = {}
-    @total_time = 0
-  end
-
-  def close
-    @conn.close()
-    puts "Connection closing.  Total SQL time:  " + @total_time.to_s + " secs"
-    @stmts.each do |sql, info| 
-      elapsed = info[2]
-      calls = info[1]
-      puts elapsed.to_s + " secs: " + calls.to_s + " times:  " + sql
-    end
-  end
-
-  def exec(*args, &block)
-    before = Time.now
-    #puts args.inspect
-    result = @conn.exec(*args)
-    #puts result.inspect
-    after = Time.now
-    elapsed = (after - before)
-    remember(args[0], elapsed)
-    @total_time += elapsed
-    if block_given?
-      yield(result)
-    else
-      return result
-    end
-  end
-
-  def exec_params(*args, &block)
-    before = Time.now
-    #puts args.inspect
-    result = @conn.exec_params(*args)
-    #puts result.inspect
-    after = Time.now
-    elapsed = (after - before)
-    remember(args[0], elapsed)
-    @total_time += elapsed
-    if block_given?
-      yield(result)
-    else
-      return result
-    end
-  end
-
-  def remember(sql, elapsed)
-    if @stmts.has_key?(sql)
-      stmt = @stmts[sql]
-    else
-      stmt = [sql, 0, 0]
-    end
-
-    stmt[1] += 1  # Number of times this statement has been invoked
-    stmt[2] += elapsed   # total elapsed time spent on this statement
-    
-    @stmts[sql] = stmt
-  end
-end
-
index 64a2892b0f9903926744a72182a801c63be2207f..de3227897be674ba1bf727a58404eafec7d3d173 100644 (file)
@@ -1,17 +1,18 @@
+# frozen_string_literal: true
 
-require 'rspec/autorun'
-require_relative '../book'
+require "test_helper"
+require "book"
 
-describe Book do
-  it "can handle .epub and .pdf files" do
-    ['epub', 'pdf'].each do |extension|
-      expect(Book.can_handle?("sample.#{extension}")).to be true
+class BookTest < Minitest::Test
+  def test_that_it_can_handle_epub_and_pdf_files
+    %w(epub pdf).each do |extension|
+      assert_equal true, Book.can_handle?("sample.#{extension}")
     end
   end
 
-  it "cannot handle .mobi, .html, .txt, .doc, .zip, .rtf or .rar files" do
+  def test_that_it_cannot_handle_mobi_html_txt_doc_zip_rtf_nor_rar
     %w(doc html mobi rar rtf txt zip).each do |extension|
-      expect(Book.can_handle?("sample.#{extension}")).to be false
+      assert_equal false, Book.can_handle?("sample.#{extension}")
     end
   end
 end
index 5ebeff1da57ff7a3c70e8744e5ce04e840017b40..46676b8f01a9be38f4b970233aa9256d2ba19f6f 100644 (file)
@@ -1,9 +1,11 @@
+# frozen_string_literal: true
 
-require 'rspec/autorun'
-require_relative '../store'
+require "test_helper"
 
-describe Store do
-  it "construct_efs_path produces paths and filenames as expected" do
+require "store"
+
+class StoreTest < Minitest::Test
+  def test_construct_efs_path
     data = [
       [      1234, '00/00/00/12', '0000001234.dat'],
       [         1, '00/00/00/00', '0000000001.dat'],
@@ -12,13 +14,14 @@ describe Store do
       [      0x1b, '00/00/00/00', '0000000027.dat']
     ]
 
+    IniFile.stubs(:load).returns({"database" => {}, "filesystem" => {}})
     store = Store.new('quanlib.ini')
 
-    data.each do |input, expectedPath, expectedName|
-      (actualPath, actualName) = store.construct_efs_path(input)
+    data.each do |input, expected_path, expected_name|
+      (actual_path, actual_name) = store.construct_efs_path(input)
 
-      expect(actualPath).to eq(expectedPath)
-      expect(actualName).to eq(expectedName)
+      assert_equal expected_path, actual_path
+      assert_equal expected_name, actual_name
     end
   end
 end
diff --git a/test/test_helper.rb b/test/test_helper.rb
new file mode 100644 (file)
index 0000000..25bf530
--- /dev/null
@@ -0,0 +1,6 @@
+# frozen_string_literal: true
+
+$LOAD_PATH.unshift File.expand_path("../app", __dir__)
+
+require "minitest/autorun"
+require "mocha/minitest"
diff --git a/walk_dir.rb b/walk_dir.rb
deleted file mode 100644 (file)
index a2c088f..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-# Walk the directory (and subdirectories), identifying books.
-#
-# Expected format:
-#   .../AuthorName/Title_of_the_Awesome_Book.ext
-#
-# Author is given as FirstLast.  For example, 
-# Robert Anson Heinlein is RobertHeinlein, and 
-# JKRowling is JoanneRowling.
-#
-# Book titles have spaces replaced with underscores,
-# and punctuation [,!?'] replaced with hyphens.
-#
-# If the book forms part of a series, then an all-capitals 
-# series designator, followed by a numeric volume number, 
-# followed by an underscore, is prefixed to the name.
-# For example, Hardy Boys' volume 1, The Tower Treasure, 
-# is rendered as .../FranklinDixon/HB001_The_Tower_Treasure.epub
-# and Mrs. Pollifax volume 6, On the China Station, is
-# .../DorothyGilman/P06_On_the_China_Station.epub.
-
-require_relative 'book'
-require_relative 'book_loader'
-require_relative 'store'
-
-class WalkDir
-  def initialize(config_file, root)
-    @queue = Queue.new
-    @root = root
-    @config_file = config_file
-    @threads = []
-
-    @files = walk(@root)
-  end
-
-  def books
-    @threads = []
-    num_threads.times do
-      @threads << Thread.new do
-        BookLoader.new(@config_file, @queue).run
-      end
-    end
-
-    result = []
-    @files = remove_duplicates(@files)
-    for file in @files.sort()
-      if Book.can_handle?(file) && (!is_duplicate?(file))
-        # Queue this book to be loaded and added to the DB by a BookLoader thread
-        @queue << file
-      end
-    end
-
-    @threads.count.times { @queue << BookLoader::DONE_MARKER }
-
-    @threads.each { |t| t.join }
-  end
-
-  # Duplicate versions of a text are named 
-  #   xxx_suffix.ext
-  # Where suffix is one of bis, ter, quater, quinquies
-  # for the 2nd, 3rd, 4th or 5th variant respectively.
-  def is_duplicate?(file)
-    s = file.to_s
-    suffix = ['_bis.', '_ter.', '_quater.', '_quinquies.']
-    suffix.each do |pat|
-      if s.include?(pat)
-        return true
-      end
-    end
-    
-    return false
-  end
-
-  def remove_duplicates(files)
-    unique = {}
-    for file in files
-      if Book.can_handle?(file)
-        key = File.dirname(file) + '/' + File.basename(file, '.*')
-        if unique.has_key?(key)
-          new_ext = File.extname(file)
-          old_ext = File.extname(unique[key])
-          if ('.pdf' == old_ext) && ('.epub' == new_ext)
-            # Prefer EPUB over PDF
-            puts 'REPLACED ' + unique[key].to_s + ' with ' + file.to_s
-            unique[key] = file
-          else
-            puts 'DROPPED ' + file.to_s + " because it's superceded by " + unique[key].to_s
-          end
-        else
-          unique[key] = file
-        end
-      end
-    end
-
-    return unique.values
-  end
-
-  def walk(path)
-    result = []
-    children = Dir.entries(path)
-    for child in children
-      fullName = (path.chomp("/")) + "/" + child
-      if (File.directory?(fullName)) and (child != ".") and (child != "..") and (!File.symlink?(fullName))
-        sub = walk(fullName)
-        if (sub != nil) and (sub.length > 0)
-          result.concat(sub)
-        end
-      elsif (! File.directory?(fullName))
-        result.push(fullName)
-      end
-    end
-    return result
-  end
-
-  def num_threads
-    # TOOD:  make this (auto?) configurable
-    12
-  end
-end