From: Chris Jaekl
Date: Thu, 20 Jun 2024 15:41:02 +0000 (-0400)
Subject: Switch from rspec to minitest
X-Git-Url: https://jaekl.net/gitweb/?a=commitdiff_plain;h=dcd50e38d56f95e2bf88413b4d56db7132dccf11;p=quanlib.git
Switch from rspec to minitest
Also adds a Rakefile, and pulls in Rubocop while we're at it.
---
diff --git a/Gemfile b/Gemfile
index e3b145e..0181479 100644
--- a/Gemfile
+++ b/Gemfile
@@ -1,7 +1,9 @@
source 'http://rubygems.org'
gem 'inifile'
+gem 'mocha'
gem 'nokogiri'
gem 'pg'
gem 'rspec'
+gem 'rubocop'
gem 'rubyzip'
diff --git a/Gemfile.lock b/Gemfile.lock
index 3cc58d3..9778251 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -1,36 +1,83 @@
GEM
remote: http://rubygems.org/
specs:
- diff-lcs (1.3)
+ ast (2.4.2)
+ diff-lcs (1.5.1)
inifile (3.0.0)
- mini_portile2 (2.4.0)
- nokogiri (1.10.3)
- mini_portile2 (~> 2.4.0)
- pg (1.1.4)
- rspec (3.8.0)
- rspec-core (~> 3.8.0)
- rspec-expectations (~> 3.8.0)
- rspec-mocks (~> 3.8.0)
- rspec-core (3.8.2)
- rspec-support (~> 3.8.0)
- rspec-expectations (3.8.4)
+ json (2.7.2)
+ language_server-protocol (3.17.0.3)
+ mocha (2.4.0)
+ ruby2_keywords (>= 0.0.5)
+ nokogiri (1.16.6-aarch64-linux)
+ racc (~> 1.4)
+ nokogiri (1.16.6-arm-linux)
+ racc (~> 1.4)
+ nokogiri (1.16.6-arm64-darwin)
+ racc (~> 1.4)
+ nokogiri (1.16.6-x86-linux)
+ racc (~> 1.4)
+ nokogiri (1.16.6-x86_64-darwin)
+ racc (~> 1.4)
+ nokogiri (1.16.6-x86_64-linux)
+ racc (~> 1.4)
+ parallel (1.25.1)
+ parser (3.3.3.0)
+ ast (~> 2.4.1)
+ racc
+ pg (1.5.6)
+ racc (1.8.0)
+ rainbow (3.1.1)
+ regexp_parser (2.9.2)
+ rexml (3.3.0)
+ strscan
+ rspec (3.13.0)
+ rspec-core (~> 3.13.0)
+ rspec-expectations (~> 3.13.0)
+ rspec-mocks (~> 3.13.0)
+ rspec-core (3.13.0)
+ rspec-support (~> 3.13.0)
+ rspec-expectations (3.13.1)
diff-lcs (>= 1.2.0, < 2.0)
- rspec-support (~> 3.8.0)
- rspec-mocks (3.8.1)
+ rspec-support (~> 3.13.0)
+ rspec-mocks (3.13.1)
diff-lcs (>= 1.2.0, < 2.0)
- rspec-support (~> 3.8.0)
- rspec-support (3.8.2)
- rubyzip (1.2.3)
+ rspec-support (~> 3.13.0)
+ rspec-support (3.13.1)
+ rubocop (1.64.1)
+ json (~> 2.3)
+ language_server-protocol (>= 3.17.0)
+ parallel (~> 1.10)
+ parser (>= 3.3.0.2)
+ rainbow (>= 2.2.2, < 4.0)
+ regexp_parser (>= 1.8, < 3.0)
+ rexml (>= 3.2.5, < 4.0)
+ rubocop-ast (>= 1.31.1, < 2.0)
+ ruby-progressbar (~> 1.7)
+ unicode-display_width (>= 2.4.0, < 3.0)
+ rubocop-ast (1.31.3)
+ parser (>= 3.3.1.0)
+ ruby-progressbar (1.13.0)
+ ruby2_keywords (0.0.5)
+ rubyzip (2.3.2)
+ strscan (3.1.0)
+ unicode-display_width (2.5.0)
PLATFORMS
- ruby
+ aarch64-linux
+ arm-linux
+ arm64-darwin
+ x86-linux
+ x86_64-darwin
+ x86_64-linux
DEPENDENCIES
inifile
+ mocha
nokogiri
pg
rspec
+ rubocop
rubyzip
BUNDLED WITH
- 2.0.2
+ 2.5.13
diff --git a/Rakefile b/Rakefile
new file mode 100644
index 0000000..9cf3b56
--- /dev/null
+++ b/Rakefile
@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+
+require "minitest/test_task"
+
+Minitest::TestTask.create
+
+require "rubocop/rake_task"
+
+RuboCop::RakeTask.new
+
+task default: %i[test rubocop]
diff --git a/app/author.rb b/app/author.rb
new file mode 100644
index 0000000..fb2003b
--- /dev/null
+++ b/app/author.rb
@@ -0,0 +1,57 @@
+
+class Author
+ def initialize(grouping, reading_order, sort_order)
+ @grouping = grouping
+ @reading_order = reading_order
+ @sort_order = sort_order
+
+ if (nil == sort_order) || ('Unknown' == sort_order)
+ @sort_order = reading_to_sort_order(reading_order)
+ end
+ end
+
+ def grouping
+ @grouping
+ end
+
+ def reading_order
+ @reading_order
+ end
+
+ def sort_order
+ @sort_order
+ end
+
+ def inspect
+ result = '(Author:'
+ if nil != @grouping
+ result += ' grouping="' + @grouping + '"'
+ end
+ if nil != @reading_order
+ result += ' reading_order="' + @reading_order + '"'
+ end
+ if nil != @sort_order
+ result += ' sort_order="' + @sort_order + '"'
+ end
+ result += ')'
+
+ return result
+ end
+
+ def to_s
+ inspect
+ end
+
+ protected
+ def reading_to_sort_order(reading_order)
+ sort_order = reading_order
+
+ parts = reading_order.split(' ')
+ if parts.length > 1
+ sort_order = parts[-1] + ', ' + parts[0..-2].join(' ')
+ end
+
+ return sort_order
+ end
+end
+
diff --git a/app/book.rb b/app/book.rb
new file mode 100644
index 0000000..2b93f4b
--- /dev/null
+++ b/app/book.rb
@@ -0,0 +1,421 @@
+
+require 'nokogiri'
+require 'rubygems'
+require 'zip'
+
+require_relative 'author'
+require_relative 'classification'
+require_relative 'cover'
+require_relative 'store'
+
+class Book
+ @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
+ @@SERIES_AND_VOLUME_REGEX = /^([A-Z]+)([0-9]+(\.[0-9]+)?)$/
+
+ attr_accessor :arrived
+ attr_accessor :author
+ attr_accessor :classification_id
+ attr_accessor :cover
+ attr_accessor :description
+ attr_accessor :language
+ attr_accessor :path
+ attr_accessor :series_id
+ attr_accessor :title
+ attr_accessor :volume
+
+ def initialize(store)
+ @store = store
+ end
+
+ def load_from_file!(fileName)
+ @path = fileName
+ parse_file_name!(fileName)
+ end
+
+ def self.can_handle?(fileName)
+ if nil == fileName
+ return false
+ end
+
+ #puts "Filename: " + fileName.to_s
+ lowerName = fileName.downcase()
+
+ if lowerName.end_with?(".epub")
+ return true
+ end
+
+ if lowerName.end_with?(".pdf")
+ return true
+ end
+
+ return false
+ end
+
+ def self.grouping_for_title(title)
+ result = title
+
+ '\'",!#'.split('').each do |c|
+ result = result.gsub(c, '-')
+ end
+ result = result.gsub(/: */, '--')
+ result = result.gsub(' ', '_')
+
+ result
+ end
+
+ def heading
+ result = []
+
+ if nil != @title
+ result.push('' + @title + '')
+ else
+ result.push('(Unknown title)')
+ end
+ if nil != @author
+ result.push('by ' + @author.reading_order + '')
+ end
+
+ seriesInfo = []
+ series = @store.load_series(@series_id)
+ if nil != series and nil != series.descr
+ seriesInfo.push(series.descr.to_s)
+ end
+ if nil != @volume
+ seriesInfo.push(@volume.to_s)
+ end
+ if seriesInfo.length > 0
+ result.push(seriesInfo.join(' '))
+ end
+
+ classification = nil
+ if nil != @classification_id
+ classification = @store.load_classification(@classification_id)
+ end
+ if nil != classification
+ if nil != classification.ddc
+ result.push('Dewey: ' + classification.ddc.to_s)
+ end
+ if nil != classification.lcc
+ result.push('LCC: ' + classification.lcc.to_s)
+ end
+ end
+
+ return result.join('
')
+ end
+
+ def inspect
+ data = []
+ if nil != @author
+ data.push('author="' + @author.inspect + '"')
+ end
+ if nil != @series_id
+ data.push('series_id="' + @series_id.to_s() + '"')
+ end
+ if nil != @volume
+ data.push('volume="' + @volume + '"')
+ end
+ if nil != @title
+ data.push('title="' + @title + '"')
+ end
+ if nil != @cover
+ data.push(@cover.inspect())
+ end
+ if nil != @path
+ data.push('path="' + @path + '"')
+ end
+ return '(Book:' + data.join(',') + ')'
+ end
+
+ def to_s
+ return inspect()
+ end
+
+ def title_grouping
+ if nil == @path
+ return nil
+ end
+
+ return File.basename(@path, '.*')
+ end
+
+ protected
+ def isUpper?(c)
+ return /[[:upper:]]/.match(c)
+ end
+
+ protected
+ def massage_author(input)
+ if nil == input
+ return nil
+ end
+
+ reading_order = ""
+ input.each_char do |c|
+ if isUpper?(c) and (reading_order.length > 0)
+ reading_order += " "
+ end
+ reading_order += c
+ end
+
+ return reading_order
+ end
+
+ # Returns (series, volumeNo, titleText)
+ protected
+ def processTitle(input)
+ if nil == input
+ return nil
+ end
+
+ arr = input.split('_')
+
+ series = nil
+ vol = nil
+
+ first = arr[0]
+ matchData = (arr[0]).match(@@SERIES_AND_VOLUME_REGEX)
+ if nil != matchData
+ capt = matchData.captures
+ series = capt[0]
+ vol = capt[1]
+ arr.shift
+ end
+
+ pos = arr[-1].rindex('.')
+ if nil != pos
+ arr[-1] = arr[-1].slice(0, pos)
+ end
+
+ title = arr.join(' ')
+
+ bare_title_grouping = title_grouping
+ .split('_')
+ .reject { |part| part.match(@@SERIES_AND_VOLUME_REGEX) }
+ .join('_')
+
+ unless bare_title_grouping == Book.grouping_for_title(title)
+ puts "WARNING: title_grouping mismatch: #{bare_title_grouping.inspect} vs. #{Book.grouping_for_title(title).inspect}"
+ end
+
+ return series, vol, title
+ end
+
+ protected
+ def parse_file_name!(file_name)
+ category = nil # e.g., non-fiction, fan-fiction
+ grouping = ''
+
+ parts = file_name.split('/')
+ (series_code, @volume, @title) = processTitle(parts[-1])
+ if parts.length > 1
+ grouping = parts[-2]
+ reading_order = massage_author(grouping)
+ sort_order = nil
+ @author = Author.new(grouping, reading_order, sort_order)
+ @series_id = @store.get_series(grouping, series_code)
+ end
+ if parts.length > 2
+ category = parts[-3]
+ end
+
+ lc_file_name = file_name.downcase
+ if lc_file_name.end_with?(".epub")
+ scanEpub!(file_name)
+ elsif lc_file_name.end_with?(".pdf")
+ scan_pdf!(file_name)
+ end
+
+ @arrived = File.ctime(file_name)
+
+ @classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*'))
+
+ # TODO: Fix horrible hard-coded strings and paths
+ if ('01_nonfic' == category) && (nil == classification_id)
+ open(Store.unclassified_csv, 'a') do |fd|
+ fd.puts('"' + grouping.to_s + '","' + path + '"')
+ end
+ end
+ end
+
+ protected
+ def scanEpub!(fileName)
+ #puts 'Scanning "' + fileName.to_s + '"...'
+ begin
+ Zip.warn_invalid_date = false
+ Zip::File.open(fileName) do |zipfile|
+ entry = zipfile.find_entry('META-INF/container.xml')
+ if nil == entry
+ puts 'No META-INF/container.xml, skipping book ' + fileName
+ return
+ end
+ contXml = zipfile.read('META-INF/container.xml')
+ contDoc = Nokogiri::XML(contXml)
+ opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
+
+ scanOpf!(zipfile, opfPath)
+ end
+ rescue Zip::Error => exc
+ puts 'ERROR processing file "' + fileName + '":'
+ puts exc.message
+ puts exc.backtrace
+ end
+ end
+
+ protected
+ def scan_pdf!(file_name)
+ #puts 'Scanning "' + file_name.to_s + '"...'
+
+ pdf_path = File.expand_path(file_name).to_s
+ if ! pdf_path.end_with?('.pdf')
+ puts 'Unexpected internal error: path "' + file_name.to_s + '" does not end with ".pdf".'
+ return
+ end
+
+ jpeg_path = pdf_path[0..-5] + '.jpeg'
+ if File.file?(jpeg_path)
+ File.open(jpeg_path, 'r') do |is|
+ @cover = Cover.new(is, jpeg_path, 'image/jpeg')
+ end
+ end
+ end
+
+
+ protected
+ def scanOpf!(zipfile, opfPath)
+ coverId = nil
+
+ opfXml = zipfile.read(opfPath)
+ opfDoc = Nokogiri::XML(opfXml)
+
+ #-------
+ # Author
+
+ grouping = @author.grouping
+ reading_order = @author.reading_order
+ sort_order = @author.sort_order
+
+ creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
+ if (creators.length > 0)
+ creator = creators[0]
+ if nil != creator
+ role = creator['opf:role']
+ if 'aut' == role
+ reading_order = creator.content
+
+ file_as = creator['opf:file-as']
+ if nil != file_as
+ sort_order = file_as
+ end
+ end
+
+ @author = Author.new(grouping, reading_order, sort_order)
+ end
+ end
+
+ #---------------------------------------
+ # Title
+
+ titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
+ if titles.length > 0
+ title = titles[0]
+ if nil != title
+ @title = title.content
+ end
+ end
+
+ #---------------------------------------
+ # Description
+
+ descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
+ if (descrNodes.length > 0)
+ descrNode = descrNodes[0]
+ if nil != descrNode
+ @description = descrNode.content
+ end
+ end
+
+ #---------------------------------------
+ # Language
+
+ langNodes = opfDoc.css('dc|language', 'dc' => @@DC_NS_URL)
+ if (langNodes.length > 0)
+ langNode = langNodes[0]
+ if langNode
+ @language = langNode.content
+ end
+ end
+
+ #---------------------------------------
+ # Other metadata: series, volume, cover
+
+ metas = opfDoc.css('package metadata meta')
+ for m in metas
+ name = m['name']
+ content = m['content']
+
+ if 'calibre:series' == name
+ # TODO: Dynamically create a new series?
+ # @series_id = content
+ elsif 'calibre:series-index' == name
+ @volume = content
+ elsif 'cover' == name
+ coverId = content
+ #puts 'File ' + @path + ' coverId ' + coverId
+ end
+ end
+
+ #---------------
+ # Load the cover
+
+ @cover = load_cover(zipfile, opfPath, opfDoc, coverId)
+ end
+
+ protected
+ def load_cover(zipfile, opfPath, opfDoc, coverId)
+ coverFile = nil
+ if nil == coverId
+ coverId = "cover-image"
+ end
+
+ items = opfDoc.css('package manifest item')
+ for i in items
+ href = i['href']
+ id = i['id']
+ mimeType = i['media-type']
+
+ if coverId == id
+ entry = zipfile.find_entry(href)
+
+ if nil == entry
+ # Although the epub standard requires the path to be relative
+ # to the base of the epub (zip), some books encountered in the
+ # wild have been found to use a bath relative to the location
+ # of the opf file.
+ parts = opfPath.split('/')
+ opfBasePath = opfPath.split('/')[0..-2].join('/')
+ coverPath = opfBasePath + '/' + href
+ entry = zipfile.find_entry(coverPath)
+ end
+
+ unless entry
+ # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg'
+ if href.start_with? '../'
+ coverPath = href[3..-1]
+ entry = zipfile.find_entry(coverPath)
+ end
+ end
+
+ if nil == entry
+ puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".'
+ return nil
+ else
+ entry.get_input_stream() do |is|
+ return Cover.new(is, href, mimeType)
+ end
+ end
+ end
+ end
+ return nil
+ end
+end
+
diff --git a/app/book_loader.rb b/app/book_loader.rb
new file mode 100644
index 0000000..5516f04
--- /dev/null
+++ b/app/book_loader.rb
@@ -0,0 +1,28 @@
+
+require_relative 'book'
+require_relative 'store'
+
+class BookLoader
+ DONE_MARKER = ''
+
+ def initialize(config_file, queue)
+ @config_file = config_file
+ @queue = queue
+ end
+
+ def run
+ @store = Store.new(@config_file)
+ @store.connect()
+
+ file = @queue.pop
+ until file == DONE_MARKER do
+ book = Book.new(@store)
+ book.load_from_file!(file)
+ @store.store_book(book)
+
+ file = @queue.pop
+ end
+
+ @store.disconnect()
+ end
+end
diff --git a/app/classification.rb b/app/classification.rb
new file mode 100644
index 0000000..2061e46
--- /dev/null
+++ b/app/classification.rb
@@ -0,0 +1,75 @@
+
+class Classification
+ def initialize(ddc, lcc, author_grouping, author, title_grouping, title)
+ @id = nil
+ @ddc = ddc
+ @lcc = lcc
+ @author_grouping = author_grouping
+ @author = author
+ @title_grouping = title_grouping
+ @title = title
+ end
+
+ def id
+ @id
+ end
+ def id=(value)
+ @id = value
+ end
+
+ def ddc
+ @ddc
+ end
+ def lcc
+ @lcc
+ end
+ def author_grouping
+ @author_grouping
+ end
+ def author
+ @author
+ end
+ def
+
+ def inspect
+ data = []
+
+ if nil != @ddc
+ data.push('Dewey=' + @ddc.to_s)
+ end
+ if nil != @lcc
+ data.push('LCC=' + @lcc.to_s)
+ end
+ if nil != @author_grouping
+ data.push('author_grouping=' + @author_grouping.to_s)
+ end
+ if nil != @author
+ data.push('author=' + @author.to_s)
+ end
+ if nil != @title_grouping
+ data.push('title_grouping=' + @title_grouping.to_s)
+ end
+ if nil != @title
+ data.push('title=' + @title)
+ end
+
+ return '(Classification:' + data.join(',') + ')'
+ end
+
+ def to_s
+ inspect
+ end
+
+ protected
+ def reading_to_sort_order(reading_order)
+ sort_order = reading_order
+
+ parts = reading_order.split(' ')
+ if parts.length > 1
+ sort_order = parts[-1] + ', ' + parts[0..-2].join(' ')
+ end
+
+ return sort_order
+ end
+end
+
diff --git a/app/cover.rb b/app/cover.rb
new file mode 100644
index 0000000..e74c27b
--- /dev/null
+++ b/app/cover.rb
@@ -0,0 +1,57 @@
+
+class Cover
+ attr_reader :path
+
+ def initialize(inputStream, path, mimeType)
+ if nil != inputStream
+ @data = inputStream.read
+ else
+ @data = nil
+ end
+ @path = path
+ @mimeType = mimeType
+ end
+
+ def inspect
+ info = []
+ if nil != @data
+ info.push('size=' + @data.length.to_s)
+ else
+ info.push('empty')
+ end
+ if nil != @path
+ info.push('path="' + @path + '"')
+ end
+ if nil != @mimeType
+ info.push('mimeType="' + @mimeType + '"')
+ end
+ return '(Cover:' + info.join(',') + ')'
+ end
+
+ def read_image(filename)
+ open(filename, 'rb') do |fd|
+ @data = fd.read()
+ end
+ end
+
+ def to_s
+ return inspect
+ end
+
+ def write_image(outputDir, filename)
+ open(outputDir + '/' + filename, 'wb') do |fd|
+ fd.write(@data)
+ end
+ return filename, @mimeType
+ end
+
+ protected
+ def getExt
+ pos = @path.rindex('.')
+ if nil == pos
+ return '.img'
+ end
+ return @path.slice(pos, @path.length)
+ end
+end
+
diff --git a/app/extract.rb b/app/extract.rb
new file mode 100644
index 0000000..c695941
--- /dev/null
+++ b/app/extract.rb
@@ -0,0 +1,50 @@
+require 'find'
+require 'pathname'
+
+def exec(cmdline)
+ puts "$ #{cmdline}"
+ result = system(cmdline)
+ unless result
+ puts "FAILED: #{cmdline}"
+ end
+ result
+end
+
+def extract_epub(source_file, source_path, dest_path)
+ relative_path = source_file[source_path.length .. source_file.length]
+ dest_file = "#{dest_path}/#{relative_path}"
+ dest_file = dest_file[0 .. (dest_file.length - 6)] + ".txt"
+
+ required_path = Pathname(dest_file).dirname
+ unless File.directory? required_path
+ unless exec("mkdir -p #{required_path}")
+ return false
+ end
+ end
+
+ if File.exist? dest_file
+ source_time = File.mtime source_file
+ dest_time = File.mtime dest_file
+ comp = dest_time <=> source_time
+ if comp > 0
+ return true # Nothing to do, extraction is already up-to-date
+ end
+ end
+
+ exec("ebook-convert #{source_file} #{dest_file}")
+end
+
+def scan_dir(source_path, dest_path)
+ Find.find(source_path) do |f|
+ if f.match(/.epub\Z/)
+ unless (f.match(/_bis.epub\Z/) || f.match(/_ter.epub\Z/) || f.match(/_quater.epub\Z/))
+ extract_epub(f, source_path, dest_path)
+ end
+ end
+ end
+end
+
+dest_path = ARGV[0]
+for arg in ARGV[1 .. ARGV.length]
+ scan_dir(arg, dest_path)
+end
diff --git a/app/main.rb b/app/main.rb
new file mode 100644
index 0000000..e294b4a
--- /dev/null
+++ b/app/main.rb
@@ -0,0 +1,56 @@
+require_relative 'navigator'
+require_relative 'page'
+require_relative 'store'
+require_relative 'walk_dir'
+
+@outputDir = 'output'
+
+@config_file = 'quanlib.ini'
+@skip_class = false
+
+def handleArg(arg)
+ if arg.start_with?("--config=")
+ @config_file = arg[9..-1]
+ puts 'Using config file "' + @config_file + '".'
+ elsif "--purge" == arg
+ puts 'Purging database...'
+ @store.dropSchema()
+ if File.exists?(@store.unclassified_csv)
+ File.delete(@store.unclassified_csv)
+ end
+ elsif "--skip-class" == arg
+ puts 'Skipping load of classification table.'
+ @skip_class = true
+ elsif arg.start_with?("--")
+ abort('ERROR: Unrecognized option "' + arg + '".')
+ end
+end
+
+@store = Store.new(@config_file)
+@store.connect()
+
+for arg in ARGV
+ handleArg(arg)
+end
+
+@store.init_db(@skip_class)
+
+for arg in ARGV
+ if ! arg.start_with?("--")
+ puts 'Scanning directory "' + arg + '"...'
+ w = WalkDir.new(@config_file, arg)
+ w.books
+ end
+end
+
+@store.cross_reference_lists
+
+puts 'Creating output...'
+
+navigator = Navigator.new(@store)
+navigator.write_atoz_pages()
+navigator.write_series_listing()
+navigator.write_dewey()
+
+@store.disconnect()
+
diff --git a/app/navigator.rb b/app/navigator.rb
new file mode 100644
index 0000000..881b1fa
--- /dev/null
+++ b/app/navigator.rb
@@ -0,0 +1,157 @@
+require_relative 'page'
+require_relative 'store'
+
+class Navigator
+ def initialize(store)
+ @store = store
+ end
+
+ def write_atoz_pages
+ atoz_counts = {}
+
+ ('A'..'Z').each do |letter|
+ atoz_counts[letter] = write_authors_starting_with(letter)
+ end
+
+ content = 'Author | Books |
'
+ ('A'..'Z').each do |letter|
+ content += ' Starting with ' + letter + ' | ' + atoz_counts[letter].to_s + ' |
'
+ end
+ content += '
'
+ page = Page.new(@store)
+ page.output_dir = 'atoz'
+ page.special = content
+ page.up = ['../output/index.html', 'Up']
+
+ page.write_html( [] )
+ end
+
+ def write_authors_starting_with(letter)
+ book_ids = @store.query_books_by_author(letter + '%')
+ puts 'Authors starting with "' + letter + '": ' + book_ids.length.to_s() + ' books.'
+
+ page = Page.new(@store)
+ if 'A' != letter
+ page.back = ['../atoz/output_' + (letter.ord - 1).chr + '.html', 'Prev']
+ end
+ if 'Z' != letter
+ page.forward = ['../atoz/output_' + (letter.ord + 1).chr + '.html', 'Next']
+ end
+ page.output_dir = 'atoz'
+ page.index_file = 'output_' + letter + '.html'
+ page.title = "Authors starting with '" + letter + "'"
+ page.up = ['../atoz/index.html', 'Up']
+
+ page.write_html(book_ids)
+ return book_ids.length
+ end
+
+ def write_dewey
+ book_ids = @store.query_books_by_ddc()
+ puts 'Non-fiction books arranged by Dewey Decimal Classification: ' + book_ids.length.to_s() + ' books.'
+
+ page = Page.new(@store)
+ page.output_dir = 'ddc'
+ page.index_file = 'index.html'
+ page.title = "Non-fiction books arranged by Dewey Decimal call number"
+ page.up = ['../output/index.html', 'Up']
+
+ page.write_html(book_ids)
+ return book_ids.length
+ end
+
+ def write_series_for_age(age)
+ series_infos = []
+
+ series_ids = @store.query_series_by_age(age)
+
+ series_ids.each do |id|
+ series = @store.load_series(id)
+ book_ids = @store.query_books_by_series_id(id)
+ if nil != book_ids and book_ids.length > 0
+ series_infos.push( [series, book_ids] )
+ end
+ end
+
+ for idx in 0 .. (series_infos.length - 1) do
+ #puts series.descr + ': ' + book_ids.length.to_s + ' books.'
+
+ back = nil
+ fwd = nil
+
+ if idx > 0
+ back = series_infos[idx-1]
+ end
+ if (idx + 1) < series_infos.length
+ fwd = series_infos[idx+1]
+ end
+
+ cur = series_infos[idx]
+ series = cur[0]
+ book_ids = cur[1]
+
+ page = Page.new(@store)
+ if nil != back
+ page.back = [back[0].key + '.html', 'Back']
+ end
+ if nil != fwd
+ page.forward = [fwd[0].key + '.html', 'Forward']
+ end
+ page.output_dir = 'series/series_' + age
+ page.index_file = series.key + '.html'
+ page.title = 'Series “' + series.descr + '” (' + book_ids.length.to_s + ' books)'
+ page.up = ['index.html', 'Up']
+
+ page.write_html(book_ids)
+ end
+
+ content = '“' + age + '” Series
'
+ content += 'Author | Series | Genre | Books |
'
+ series_infos.each do |cur|
+ series = cur[0]
+ book_ids = cur[1]
+
+ author = series.grouping
+ letter = author[0]
+
+ content += ' '
+ content += '' + author + ' | '
+ content += '' + series.descr + ' | '
+ content += '' + series.genre + ' | '
+ content += '' + book_ids.length.to_s + ' | '
+ content += '
'
+ end
+ content += '
'
+ page = Page.new(@store)
+ page.output_dir = 'series/series_' + age
+ page.special = content
+ page.up = ['../index.html', 'Up']
+ page.write_html( [] )
+
+ return series_infos.length
+ end
+
+ def write_series_listing
+ ages = ['beginner', 'junior', 'ya', 'adult']
+ series_counts = {}
+
+ ages.each do |age|
+ puts 'Series for "' + age + '" readers...'
+
+ series_counts[age] = write_series_for_age(age)
+ end
+
+ content = 'Browse Books By Series
'
+ content += ''
+ content += '
Age | Number of Series |
'
+ ages.each do |age|
+ content += '' + age + ' | ' + series_counts[age].to_s + ' |
'
+ end
+ content += '
'
+ page = Page.new(@store)
+ page.output_dir = 'series'
+ page.special = content
+ page.up = ['../output/index.html', 'Up']
+ page.write_html( [] )
+ end
+end
diff --git a/app/page.rb b/app/page.rb
new file mode 100644
index 0000000..638f9ad
--- /dev/null
+++ b/app/page.rb
@@ -0,0 +1,166 @@
+require 'fileutils'
+
+require_relative 'store'
+
+class Page
+ def initialize(store)
+ @back = nil
+ @forward = nil
+ @index_file = 'index.html'
+ @output_dir = 'output'
+ @special = nil
+ @store = store
+ @title = 'Books'
+ @up = nil
+ end
+
+ def back=(value)
+ @back = value
+ end
+
+ def forward=(value)
+ @forward = value
+ end
+
+ def index_file=(value)
+ @index_file = value
+ end
+
+ def navig_link(data)
+ if (nil == data)
+ return ''
+ end
+ return '' + data[1] + ''
+ end
+
+ def output_dir=(value)
+ @output_dir = value
+ end
+
+ def special=(value)
+ @special = value
+ end
+
+ def title=(value)
+ @title = value
+ end
+
+ def up=(value)
+ @up = value
+ end
+
+ def write_books(fd, book_ids)
+ for id in book_ids
+ book = @store.load_book(id)
+ image = nil
+ if nil != book.cover
+ #@imageCount += 1
+ #(path, mimeType) = book.cover.write_image(@output_dir, 'image' + @imageCount.to_s)
+ #image = ''
+ path = book.cover.path
+ image = ''
+ else
+ image = '(No cover image)'
+ end
+
+ fd.puts ' '
+ fd.puts ' ' + image + ' | '
+
+ heading = book.heading()
+ description = book.description()
+ if nil != description
+ fd.puts ' |
'
+ else
+ fd.puts ' ' + heading + ' | '
+ end
+
+ fd.puts '
'
+ end
+ end
+
+ def write_footer(fd)
+ fd.puts ' ' + navig_link(@back) + ' ' + navig_link(@up) + ' ' + navig_link(@forward) + '
'
+ end
+
+ def write_header(fd)
+ fd.puts ' '
+
+ fd.puts ' ' + navig_link(@back) + ' ' + navig_link(@up) + ' ' + navig_link(@forward) + '
'
+ end
+
+ def write_html(book_ids)
+ @imageCount = 0
+
+ if ! Dir.exist?(@output_dir)
+ FileUtils.mkdir_p(@output_dir)
+ end
+
+ open(@output_dir + '/' + @index_file, 'w') do |fd|
+ fd.puts ''
+ fd.puts ' '
+ fd.puts ' '
+ fd.puts ' ' + @title + ''
+
+ write_style_sheet(fd)
+
+ fd.puts ' '
+ fd.puts ' '
+
+ write_header(fd)
+
+ write_special(fd)
+ write_books(fd, book_ids)
+
+ write_footer(fd)
+
+ fd.puts " "
+ fd.puts ""
+ end
+ end
+
+ def write_special(fd)
+ if (nil != @special)
+ fd.puts(@special)
+ end
+ end
+
+ def write_style_sheet(fd)
+ style =
+<
+ div {
+ display: inline-block;
+ width: 400px;
+ margin: 10px;
+ border 3px solid #73ad21;
+ }
+ h1.header {
+ background: #4040a0;
+ color: #ffffff;
+ text-align: center;
+ }
+ img.cover-thumb { max-height: 200px; max-width: 200px; }
+ p.navigator { }
+ span.popup { }
+ span.popup:hover { text-decoration: none; background: #cfffff; z-index: 6; }
+ span.popup span.pop-inner {
+ border-color:black;
+ border-style:solid;
+ border-width:1px;
+ display: none;
+ margin: 4px 0 0 0px;
+ padding: 3px 3px 3px 3px;
+ position: absolute;
+ }
+ span.popup:hover span.pop-inner {
+ background: #ffffaf;
+ display: block;
+ margin: 20px 0 0 0px;
+ z-index:6;
+ }
+
+EOS
+ fd.puts style
+ end
+end
+
diff --git a/app/series.rb b/app/series.rb
new file mode 100644
index 0000000..0621876
--- /dev/null
+++ b/app/series.rb
@@ -0,0 +1,87 @@
+
+class Series
+ def initialize(id)
+ @age = nil
+ @genre = nil
+ @grouping = nil
+ @code = nil
+ @descr = nil
+ @id = id
+ end
+
+ def age
+ @age
+ end
+
+ def age=(value)
+ @age = value
+ end
+
+ def code
+ @code
+ end
+
+ def code=(value)
+ @code = value
+ end
+
+ def descr
+ @descr
+ end
+
+ def descr=(value)
+ @descr = value
+ end
+
+ def genre
+ @genre
+ end
+
+ def genre=(value)
+ @genre = value
+ end
+
+ def grouping
+ @grouping
+ end
+
+ def grouping=(value)
+ @grouping = value
+ end
+
+ def id
+ @id
+ end
+
+ def inspect
+ data = []
+ if nil != @age
+ data.push('age="' + @age.inspect + '"')
+ end
+ if nil != @code
+ data.push('code="' + @code.inspect + '"')
+ end
+ if nil != @descr
+ data.push('descr="' + @descr + '"')
+ end
+ if nil != @genre
+ data.push('genre="' + @genre + '"')
+ end
+ if nil != @grouping
+ data.push('grouping="' + @grouping + '"')
+ end
+ return '(Series:' + data.join(',') + ')'
+ end
+
+ def key
+ if nil != grouping and nil != code
+ return grouping.to_s + '_' + code.to_s
+ end
+ return id.to_s
+ end
+
+ def to_s
+ return inspect()
+ end
+end
+
diff --git a/app/store.rb b/app/store.rb
new file mode 100644
index 0000000..1a33ca3
--- /dev/null
+++ b/app/store.rb
@@ -0,0 +1,655 @@
+
+require 'csv'
+require 'fileutils'
+require 'inifile'
+require 'pg'
+
+require_relative 'series'
+require_relative 'tconn'
+
+class Store
+ def unclassified_csv
+ @basePath + '/csv/unclassified.csv'
+ end
+
+ def initialize(config_file)
+ @conn = nil
+
+ config = IniFile.load(config_file)
+ if nil == config
+ puts 'FATAL: Failed to load config file "' + config_file + '". Aborting initialization.'
+ return
+ end
+
+ section = config['database']
+ @dbhost = section['host']
+ @dbport = 5432
+ @dbname = section['name']
+ @dbuser = section['user']
+ @dbpass = section['pass']
+
+ section = config['filesystem']
+ @basePath = section['basePath']
+ end
+
+ def connect
+ @conn = TimedConn.new(PG.connect(@dbhost, @dbport, '', '', @dbname, @dbuser, @dbpass))
+ return @conn
+ end
+
+ def disconnect
+ @conn.close()
+ end
+
+ def construct_efs_path(efs_id)
+ id_str = sprintf('%010d', efs_id)
+ path = sprintf('%s/%s/%s/%s', id_str[0,2], id_str[2,2], id_str[4,2], id_str[6,2])
+ name = id_str + '.dat'
+ return path, name
+ end
+
+ def cross_reference_lists
+puts "@@@@@@@@@@@ CROSS-REF START @@@@@@@@@@@"
+ exec_update("TRUNCATE TABLE Lists CASCADE;", [])
+
+ populate_lists_table
+puts "@@@@@@@@@@@ CROSS-REF DONE @@@@@@@@@@@"
+ end
+
+ def create_schema(skip_class)
+ create_authors =
+< exc
+ puts 'WARNING: "' + stmt + '" failed: ' + exc.to_s
+ end
+ end
+ end
+
+ def find_all_authors(author_name)
+ result = []
+
+ sqlSelect = "SELECT id FROM Authors WHERE grouping=$1;"
+ args = [author_name]
+
+ @conn.exec_params(sqlSelect, args) do |rs|
+ rs.each do |row|
+ result << row['id']
+ end
+ end
+
+ result
+ end
+
+ def find_author(author)
+ sqlSelect = "SELECT id FROM Authors WHERE grouping=$1 AND reading=$2 AND sort=$3;"
+ args = [author.grouping, author.reading_order, author.sort_order]
+
+ @conn.exec_params(sqlSelect, args) do |rs|
+ if rs.ntuples > 0
+ return rs[0]['id']
+ end
+ end
+
+ return nil
+ end
+
+ def init_db(skip_class)
+ sql = "SELECT 1 FROM pg_tables WHERE tableowner='quanlib' AND tablename='books'"
+ found = false
+ @conn.exec(sql).each do |row|
+ found = true
+ end
+
+ if ! found
+ create_schema(skip_class)
+ end
+ end
+
+ def load_author(id)
+ sqlSelect = "SELECT grouping, reading, sort FROM Authors WHERE id=$1"
+ args = [id]
+ @conn.exec_params(sqlSelect, args) do |rs|
+ if rs.ntuples != 1
+ raise "Expected 1 row for " + id + " but got " + rs.ntuples + ": " + sqlSelect
+ end
+ row = rs[0]
+ author = Author.new(row['grouping'], row['reading'], row['sort'])
+ return author
+ end
+ return nil
+ end
+
+ def store_author(author)
+ id = find_author(author)
+ if nil == id
+ id = next_id('author_id')
+ sqlInsert = "INSERT INTO Authors(id, grouping, reading, sort) VALUES ($1, $2, $3, $4);"
+ args = [id, author.grouping, author.reading_order, author.sort_order]
+ begin
+ rs = @conn.exec_params(sqlInsert, args)
+ rescue Exception => e
+ puts sqlInsert + ": " + args.inspect()
+ puts e.message
+ puts $@
+ ensure
+ rs.clear if rs
+ end
+ end
+ return id
+ end
+
+ def load_book(id)
+ sql = "SELECT author, classification, cover, description, language, path, series, title, volume FROM Books WHERE id=$1;"
+ book = nil
+
+ begin
+ @conn.exec_params(sql, [id]) do |rs|
+ if 1 != rs.ntuples
+ raise 'Expected one row in Books for id ' + id + ', but found ' + rs.length + '.'
+ return nil
+ end
+ row = rs[0]
+
+ book = Book.new(self)
+ book.author = load_author(row['author'])
+ book.classification_id = row['classification']
+ book.cover = load_cover(row['cover'])
+ book.description = row['description']
+ book.language = row['language']
+ book.path = row['path']
+ book.series_id = row['series']
+ book.title = row['title']
+ book.volume = row['volume']
+ end
+ rescue Exception => e
+ puts sql + ": " + id
+ puts e.message
+ puts $@
+ end
+
+ return book
+ end
+
+ def store_book(book)
+ sql = "INSERT INTO Books (id, arrived, author, classification, cover, description, language, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11);"
+
+ book_id = next_id('book_id')
+
+ author_id = store_author(book.author)
+ (efs_id, mime_type) = store_cover(book)
+
+ args = [book_id, book.arrived, author_id, book.classification_id, efs_id, book.description, book.language, book.path, book.series_id, book.title, book.volume]
+
+ begin
+ rs = @conn.exec_params(sql, args)
+ rescue Exception => e
+ puts sql + ": " + args.inspect()
+ puts e.message
+ puts $@
+ ensure
+ rs.clear if rs
+ end
+
+ return book_id
+ end
+
+ def find_classification(author_grouping, title_grouping)
+ sql = "SELECT id FROM Classifications WHERE author_grouping = $1 AND title_grouping = $2;"
+ @conn.exec_params(sql, [author_grouping, title_grouping]) do |rs|
+ if rs.ntuples > 0
+ return rs[0]['id']
+ end
+ end
+ return nil
+ end
+
+ def load_classification(id)
+ sql = "SELECT ddc, lcc, author_grouping, author_sort, title_grouping, title "
+ sql += " FROM Classifications WHERE id=$1"
+ @conn.exec_params(sql, [id]) do |rs|
+ if rs.ntuples > 0
+ row = rs[0]
+ ddc = row['ddc']
+ lcc = row['lcc']
+ author_grouping = row['author_grouping']
+ author = row['author_sort']
+ title_grouping = row['title_grouping']
+ title = row['title']
+
+ result = Classification.new(ddc, lcc, author_grouping, author, title_grouping, title)
+ result.id = id
+ return result
+ end
+ end
+
+ return nil
+ end
+
+ def load_cover(id)
+ if nil == id
+ return nil
+ end
+
+ mime_type = 'application/octet-stream'
+
+ sql = "SELECT mimeType FROM Efs WHERE id=$1"
+ @conn.exec_params(sql, [id]) do |rs|
+ if rs.ntuples != 1
+ raise "Expected one row but got " + rs.ntuples + ": " + sql + ": " + id
+ end
+ mime_type = rs[0]['mimeType']
+ end
+
+ (efspath, efsname) = construct_efs_path(id)
+
+ fullpath = @basePath + '/efs/' + efspath + '/' + efsname
+
+ return Cover.new(nil, fullpath, mime_type)
+ end
+
+ def store_cover(book)
+ efs_id = nil
+ cover = book.cover()
+
+ if nil == cover
+ return nil
+ end
+
+ @conn.exec("SELECT nextval('efs_id')") do |rs|
+ efs_id = rs[0]['nextval']
+ end
+
+ if nil == efs_id
+ return nil
+ end
+
+ (efspath, efsname) = construct_efs_path(efs_id)
+
+ efspath = @basePath + '/efs/' + efspath
+
+ FileUtils.mkdir_p(efspath)
+
+ (filepath, mimetype) = cover.write_image(efspath, efsname)
+
+ sql = "INSERT INTO efs VALUES ($1, $2)"
+ begin
+ rs = @conn.exec_params(sql, [efs_id, mimetype])
+ rescue Exception => e
+ puts sql + ": " + efs_id + ", " + mimetype
+ puts e.message
+ puts $@
+ ensure
+ rs.clear if rs
+ end
+
+ return efs_id, mimetype
+ end
+
+ def exec_id_query(sql, args)
+ ids = []
+ @conn.exec_params(sql, args) do |rs|
+ rs.each do |row|
+ ids.push(row['id'])
+ end
+ end
+ return ids
+ end
+
+ def exec_update(sql, args)
+ begin
+ rs = @conn.exec_params(sql, args)
+ rescue Exception => e
+ puts sql + ": " + args.inspect()
+ puts e.message
+ puts $@
+ ensure
+ rs.clear if rs
+ end
+ end
+
+ def next_id(seq_name)
+ id = nil
+ @conn.exec("SELECT nextval('" + seq_name + "');") do |rs|
+ id = rs[0]['nextval']
+ end
+ return id
+ end
+
+ def get_series(grouping, code)
+ if nil == code
+ return nil
+ end
+
+ sql = "SELECT id FROM Series WHERE grouping=$1 AND code=$2;"
+ args = [grouping, code]
+ @conn.exec_params(sql, args).each do |row|
+ return row['id']
+ end
+
+ # TODO: Create a new series object here?
+ puts 'WARNING: series("' + grouping + '", "' + code + '") not found.'
+ return nil
+ end
+
+ def load_series(id)
+ sql = "SELECT age,genre,grouping,code,descr FROM Series WHERE id=$1;"
+ args = [id]
+ @conn.exec_params(sql, args) do |rs|
+ if rs.ntuples > 0
+ row = rs[0]
+ series = Series.new(id)
+ series.age = row['age']
+ series.genre = row['genre']
+ series.grouping = row['grouping']
+ series.code = row['code']
+ series.descr = row['descr']
+ return series
+ end
+ end
+ return nil
+ end
+
+ def populate_classifications_table
+ puts "Populating the Classifications table..."
+ first = true
+ CSV.foreach(@basePath + '/csv/class.csv') do |row|
+ if first
+ # skip the header row
+ first = false
+ else
+
+ # First, add a row to the Classifications table
+
+ id = next_id('classification_id')
+ ddc = row[0]
+ lcc = row[1]
+ author_grouping = row[2]
+ author_sort = row[3]
+ title_grouping = row[4]
+ title = row[5]
+
+ sqlInsert = "INSERT INTO Classifications (id, ddc, lcc, author_grouping, author_sort, title_grouping, title) VALUES ($1, $2, $3, $4, $5, $6, $7);"
+ args = [id, ddc, lcc, author_grouping, author_sort, title_grouping, title]
+ exec_update(sqlInsert, args)
+
+ # Second, link up with the appropriate FAST table entries
+
+ fast = []
+ input = row[6]
+ if input.length > 0
+ fast = input.split(';')
+ end
+
+ fast.each do |fast_id|
+ sqlInsert = "INSERT INTO FAST_Classifications (fast, classification) VALUES ($1, $2);"
+ args = [fast_id, id]
+ exec_update(sqlInsert, args)
+ end
+ end
+ end
+ end
+
+ def populate_fast_table
+ puts "Populating the FAST table..."
+ first = true
+ CSV.foreach(@basePath + '/csv/fast.csv') do |row|
+ if first
+ first = false # skip the header row
+ else
+ id = row[0]
+ descr = row[1]
+ sqlInsert = "INSERT INTO FAST (id, descr) VALUES ($1, $2);"
+ exec_update(sqlInsert, [id, descr])
+ end
+ end
+ end
+
+ def populate_lists_table
+ puts "Populating the Lists table..."
+
+ CSV.foreach(@basePath + "/csv/lists.csv", headers: true) do |row|
+ author_ids = find_all_authors(row['author'])
+ if author_ids.empty?
+ specification = [row['age'], row['category'], row['code'], row['year'], row['author'], row['title']]
+ .map { |x| x.inspect }
+ .join(', ')
+
+ puts "WARNING: For list entry (#{specification}), no such author was found."
+
+ next
+ end
+
+ sqlInsert = %Q(
+ INSERT INTO Lists (id, age, category, code, year, author, title)
+ VALUES ($1, $2, $3, $4, $5, $6, $7);
+ )
+ author_ids.each do |author_id|
+ list_id = next_id('list_id')
+ args = [list_id, row['age'], row['category'], row['code'], row['year'], author_id, row['title']]
+ exec_update(sqlInsert, args)
+
+ update_lists_books_table(list_id, author_id, row['title'])
+ end
+ end
+ end
+
+ # Scan for books that match this Lists entry, and add any matches to the Lists_Books associative table
+ def update_lists_books_table(list_id, author_id, title)
+ title_pattern = Book.grouping_for_title(title).gsub('_', '%')
+ sqlSelect = "SELECT id FROM Books WHERE author = $1 AND title LIKE $2;"
+ args = [author_id, title_pattern]
+
+ @conn.exec_params(sqlSelect, args) do |rs|
+ rs.each do |row|
+ sqlInsert = "INSERT INTO Lists_Books (list, book) VALUES ($1, $2)"
+ args = [list_id, row['id']]
+ exec_update(sqlInsert, args)
+ end
+ end
+ end
+
+ def populate_series_table
+ puts "Populating the Series table..."
+ CSV.foreach(@basePath + '/csv/series.csv') do |row|
+ id = next_id('series_id')
+ sqlInsert = "INSERT INTO Series (id, age, genre, grouping, code, descr) VALUES ($1, $2, $3, $4, $5, $6);"
+ args = [id] + row
+ exec_update(sqlInsert, args)
+ end
+ end
+
+ def query_books_by_author(pattern)
+ sql =
+< 0)
+ result.concat(sub)
+ end
+ elsif (! File.directory?(fullName))
+ result.push(fullName)
+ end
+ end
+ return result
+ end
+
+ def num_threads
+ # TOOD: make this (auto?) configurable
+ 12
+ end
+end
diff --git a/author.rb b/author.rb
deleted file mode 100644
index fb2003b..0000000
--- a/author.rb
+++ /dev/null
@@ -1,57 +0,0 @@
-
-class Author
- def initialize(grouping, reading_order, sort_order)
- @grouping = grouping
- @reading_order = reading_order
- @sort_order = sort_order
-
- if (nil == sort_order) || ('Unknown' == sort_order)
- @sort_order = reading_to_sort_order(reading_order)
- end
- end
-
- def grouping
- @grouping
- end
-
- def reading_order
- @reading_order
- end
-
- def sort_order
- @sort_order
- end
-
- def inspect
- result = '(Author:'
- if nil != @grouping
- result += ' grouping="' + @grouping + '"'
- end
- if nil != @reading_order
- result += ' reading_order="' + @reading_order + '"'
- end
- if nil != @sort_order
- result += ' sort_order="' + @sort_order + '"'
- end
- result += ')'
-
- return result
- end
-
- def to_s
- inspect
- end
-
- protected
- def reading_to_sort_order(reading_order)
- sort_order = reading_order
-
- parts = reading_order.split(' ')
- if parts.length > 1
- sort_order = parts[-1] + ', ' + parts[0..-2].join(' ')
- end
-
- return sort_order
- end
-end
-
diff --git a/book.rb b/book.rb
deleted file mode 100644
index 2b93f4b..0000000
--- a/book.rb
+++ /dev/null
@@ -1,421 +0,0 @@
-
-require 'nokogiri'
-require 'rubygems'
-require 'zip'
-
-require_relative 'author'
-require_relative 'classification'
-require_relative 'cover'
-require_relative 'store'
-
-class Book
- @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
- @@SERIES_AND_VOLUME_REGEX = /^([A-Z]+)([0-9]+(\.[0-9]+)?)$/
-
- attr_accessor :arrived
- attr_accessor :author
- attr_accessor :classification_id
- attr_accessor :cover
- attr_accessor :description
- attr_accessor :language
- attr_accessor :path
- attr_accessor :series_id
- attr_accessor :title
- attr_accessor :volume
-
- def initialize(store)
- @store = store
- end
-
- def load_from_file!(fileName)
- @path = fileName
- parse_file_name!(fileName)
- end
-
- def self.can_handle?(fileName)
- if nil == fileName
- return false
- end
-
- #puts "Filename: " + fileName.to_s
- lowerName = fileName.downcase()
-
- if lowerName.end_with?(".epub")
- return true
- end
-
- if lowerName.end_with?(".pdf")
- return true
- end
-
- return false
- end
-
- def self.grouping_for_title(title)
- result = title
-
- '\'",!#'.split('').each do |c|
- result = result.gsub(c, '-')
- end
- result = result.gsub(/: */, '--')
- result = result.gsub(' ', '_')
-
- result
- end
-
- def heading
- result = []
-
- if nil != @title
- result.push('' + @title + '')
- else
- result.push('(Unknown title)')
- end
- if nil != @author
- result.push('by ' + @author.reading_order + '')
- end
-
- seriesInfo = []
- series = @store.load_series(@series_id)
- if nil != series and nil != series.descr
- seriesInfo.push(series.descr.to_s)
- end
- if nil != @volume
- seriesInfo.push(@volume.to_s)
- end
- if seriesInfo.length > 0
- result.push(seriesInfo.join(' '))
- end
-
- classification = nil
- if nil != @classification_id
- classification = @store.load_classification(@classification_id)
- end
- if nil != classification
- if nil != classification.ddc
- result.push('Dewey: ' + classification.ddc.to_s)
- end
- if nil != classification.lcc
- result.push('LCC: ' + classification.lcc.to_s)
- end
- end
-
- return result.join('
')
- end
-
- def inspect
- data = []
- if nil != @author
- data.push('author="' + @author.inspect + '"')
- end
- if nil != @series_id
- data.push('series_id="' + @series_id.to_s() + '"')
- end
- if nil != @volume
- data.push('volume="' + @volume + '"')
- end
- if nil != @title
- data.push('title="' + @title + '"')
- end
- if nil != @cover
- data.push(@cover.inspect())
- end
- if nil != @path
- data.push('path="' + @path + '"')
- end
- return '(Book:' + data.join(',') + ')'
- end
-
- def to_s
- return inspect()
- end
-
- def title_grouping
- if nil == @path
- return nil
- end
-
- return File.basename(@path, '.*')
- end
-
- protected
- def isUpper?(c)
- return /[[:upper:]]/.match(c)
- end
-
- protected
- def massage_author(input)
- if nil == input
- return nil
- end
-
- reading_order = ""
- input.each_char do |c|
- if isUpper?(c) and (reading_order.length > 0)
- reading_order += " "
- end
- reading_order += c
- end
-
- return reading_order
- end
-
- # Returns (series, volumeNo, titleText)
- protected
- def processTitle(input)
- if nil == input
- return nil
- end
-
- arr = input.split('_')
-
- series = nil
- vol = nil
-
- first = arr[0]
- matchData = (arr[0]).match(@@SERIES_AND_VOLUME_REGEX)
- if nil != matchData
- capt = matchData.captures
- series = capt[0]
- vol = capt[1]
- arr.shift
- end
-
- pos = arr[-1].rindex('.')
- if nil != pos
- arr[-1] = arr[-1].slice(0, pos)
- end
-
- title = arr.join(' ')
-
- bare_title_grouping = title_grouping
- .split('_')
- .reject { |part| part.match(@@SERIES_AND_VOLUME_REGEX) }
- .join('_')
-
- unless bare_title_grouping == Book.grouping_for_title(title)
- puts "WARNING: title_grouping mismatch: #{bare_title_grouping.inspect} vs. #{Book.grouping_for_title(title).inspect}"
- end
-
- return series, vol, title
- end
-
- protected
- def parse_file_name!(file_name)
- category = nil # e.g., non-fiction, fan-fiction
- grouping = ''
-
- parts = file_name.split('/')
- (series_code, @volume, @title) = processTitle(parts[-1])
- if parts.length > 1
- grouping = parts[-2]
- reading_order = massage_author(grouping)
- sort_order = nil
- @author = Author.new(grouping, reading_order, sort_order)
- @series_id = @store.get_series(grouping, series_code)
- end
- if parts.length > 2
- category = parts[-3]
- end
-
- lc_file_name = file_name.downcase
- if lc_file_name.end_with?(".epub")
- scanEpub!(file_name)
- elsif lc_file_name.end_with?(".pdf")
- scan_pdf!(file_name)
- end
-
- @arrived = File.ctime(file_name)
-
- @classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*'))
-
- # TODO: Fix horrible hard-coded strings and paths
- if ('01_nonfic' == category) && (nil == classification_id)
- open(Store.unclassified_csv, 'a') do |fd|
- fd.puts('"' + grouping.to_s + '","' + path + '"')
- end
- end
- end
-
- protected
- def scanEpub!(fileName)
- #puts 'Scanning "' + fileName.to_s + '"...'
- begin
- Zip.warn_invalid_date = false
- Zip::File.open(fileName) do |zipfile|
- entry = zipfile.find_entry('META-INF/container.xml')
- if nil == entry
- puts 'No META-INF/container.xml, skipping book ' + fileName
- return
- end
- contXml = zipfile.read('META-INF/container.xml')
- contDoc = Nokogiri::XML(contXml)
- opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
-
- scanOpf!(zipfile, opfPath)
- end
- rescue Zip::Error => exc
- puts 'ERROR processing file "' + fileName + '":'
- puts exc.message
- puts exc.backtrace
- end
- end
-
- protected
- def scan_pdf!(file_name)
- #puts 'Scanning "' + file_name.to_s + '"...'
-
- pdf_path = File.expand_path(file_name).to_s
- if ! pdf_path.end_with?('.pdf')
- puts 'Unexpected internal error: path "' + file_name.to_s + '" does not end with ".pdf".'
- return
- end
-
- jpeg_path = pdf_path[0..-5] + '.jpeg'
- if File.file?(jpeg_path)
- File.open(jpeg_path, 'r') do |is|
- @cover = Cover.new(is, jpeg_path, 'image/jpeg')
- end
- end
- end
-
-
- protected
- def scanOpf!(zipfile, opfPath)
- coverId = nil
-
- opfXml = zipfile.read(opfPath)
- opfDoc = Nokogiri::XML(opfXml)
-
- #-------
- # Author
-
- grouping = @author.grouping
- reading_order = @author.reading_order
- sort_order = @author.sort_order
-
- creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
- if (creators.length > 0)
- creator = creators[0]
- if nil != creator
- role = creator['opf:role']
- if 'aut' == role
- reading_order = creator.content
-
- file_as = creator['opf:file-as']
- if nil != file_as
- sort_order = file_as
- end
- end
-
- @author = Author.new(grouping, reading_order, sort_order)
- end
- end
-
- #---------------------------------------
- # Title
-
- titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
- if titles.length > 0
- title = titles[0]
- if nil != title
- @title = title.content
- end
- end
-
- #---------------------------------------
- # Description
-
- descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
- if (descrNodes.length > 0)
- descrNode = descrNodes[0]
- if nil != descrNode
- @description = descrNode.content
- end
- end
-
- #---------------------------------------
- # Language
-
- langNodes = opfDoc.css('dc|language', 'dc' => @@DC_NS_URL)
- if (langNodes.length > 0)
- langNode = langNodes[0]
- if langNode
- @language = langNode.content
- end
- end
-
- #---------------------------------------
- # Other metadata: series, volume, cover
-
- metas = opfDoc.css('package metadata meta')
- for m in metas
- name = m['name']
- content = m['content']
-
- if 'calibre:series' == name
- # TODO: Dynamically create a new series?
- # @series_id = content
- elsif 'calibre:series-index' == name
- @volume = content
- elsif 'cover' == name
- coverId = content
- #puts 'File ' + @path + ' coverId ' + coverId
- end
- end
-
- #---------------
- # Load the cover
-
- @cover = load_cover(zipfile, opfPath, opfDoc, coverId)
- end
-
- protected
- def load_cover(zipfile, opfPath, opfDoc, coverId)
- coverFile = nil
- if nil == coverId
- coverId = "cover-image"
- end
-
- items = opfDoc.css('package manifest item')
- for i in items
- href = i['href']
- id = i['id']
- mimeType = i['media-type']
-
- if coverId == id
- entry = zipfile.find_entry(href)
-
- if nil == entry
- # Although the epub standard requires the path to be relative
- # to the base of the epub (zip), some books encountered in the
- # wild have been found to use a bath relative to the location
- # of the opf file.
- parts = opfPath.split('/')
- opfBasePath = opfPath.split('/')[0..-2].join('/')
- coverPath = opfBasePath + '/' + href
- entry = zipfile.find_entry(coverPath)
- end
-
- unless entry
- # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg'
- if href.start_with? '../'
- coverPath = href[3..-1]
- entry = zipfile.find_entry(coverPath)
- end
- end
-
- if nil == entry
- puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".'
- return nil
- else
- entry.get_input_stream() do |is|
- return Cover.new(is, href, mimeType)
- end
- end
- end
- end
- return nil
- end
-end
-
diff --git a/book_loader.rb b/book_loader.rb
deleted file mode 100644
index 5516f04..0000000
--- a/book_loader.rb
+++ /dev/null
@@ -1,28 +0,0 @@
-
-require_relative 'book'
-require_relative 'store'
-
-class BookLoader
- DONE_MARKER = ''
-
- def initialize(config_file, queue)
- @config_file = config_file
- @queue = queue
- end
-
- def run
- @store = Store.new(@config_file)
- @store.connect()
-
- file = @queue.pop
- until file == DONE_MARKER do
- book = Book.new(@store)
- book.load_from_file!(file)
- @store.store_book(book)
-
- file = @queue.pop
- end
-
- @store.disconnect()
- end
-end
diff --git a/classification.rb b/classification.rb
deleted file mode 100644
index 2061e46..0000000
--- a/classification.rb
+++ /dev/null
@@ -1,75 +0,0 @@
-
-class Classification
- def initialize(ddc, lcc, author_grouping, author, title_grouping, title)
- @id = nil
- @ddc = ddc
- @lcc = lcc
- @author_grouping = author_grouping
- @author = author
- @title_grouping = title_grouping
- @title = title
- end
-
- def id
- @id
- end
- def id=(value)
- @id = value
- end
-
- def ddc
- @ddc
- end
- def lcc
- @lcc
- end
- def author_grouping
- @author_grouping
- end
- def author
- @author
- end
- def
-
- def inspect
- data = []
-
- if nil != @ddc
- data.push('Dewey=' + @ddc.to_s)
- end
- if nil != @lcc
- data.push('LCC=' + @lcc.to_s)
- end
- if nil != @author_grouping
- data.push('author_grouping=' + @author_grouping.to_s)
- end
- if nil != @author
- data.push('author=' + @author.to_s)
- end
- if nil != @title_grouping
- data.push('title_grouping=' + @title_grouping.to_s)
- end
- if nil != @title
- data.push('title=' + @title)
- end
-
- return '(Classification:' + data.join(',') + ')'
- end
-
- def to_s
- inspect
- end
-
- protected
- def reading_to_sort_order(reading_order)
- sort_order = reading_order
-
- parts = reading_order.split(' ')
- if parts.length > 1
- sort_order = parts[-1] + ', ' + parts[0..-2].join(' ')
- end
-
- return sort_order
- end
-end
-
diff --git a/cover.rb b/cover.rb
deleted file mode 100644
index e74c27b..0000000
--- a/cover.rb
+++ /dev/null
@@ -1,57 +0,0 @@
-
-class Cover
- attr_reader :path
-
- def initialize(inputStream, path, mimeType)
- if nil != inputStream
- @data = inputStream.read
- else
- @data = nil
- end
- @path = path
- @mimeType = mimeType
- end
-
- def inspect
- info = []
- if nil != @data
- info.push('size=' + @data.length.to_s)
- else
- info.push('empty')
- end
- if nil != @path
- info.push('path="' + @path + '"')
- end
- if nil != @mimeType
- info.push('mimeType="' + @mimeType + '"')
- end
- return '(Cover:' + info.join(',') + ')'
- end
-
- def read_image(filename)
- open(filename, 'rb') do |fd|
- @data = fd.read()
- end
- end
-
- def to_s
- return inspect
- end
-
- def write_image(outputDir, filename)
- open(outputDir + '/' + filename, 'wb') do |fd|
- fd.write(@data)
- end
- return filename, @mimeType
- end
-
- protected
- def getExt
- pos = @path.rindex('.')
- if nil == pos
- return '.img'
- end
- return @path.slice(pos, @path.length)
- end
-end
-
diff --git a/extract.rb b/extract.rb
deleted file mode 100644
index c695941..0000000
--- a/extract.rb
+++ /dev/null
@@ -1,50 +0,0 @@
-require 'find'
-require 'pathname'
-
-def exec(cmdline)
- puts "$ #{cmdline}"
- result = system(cmdline)
- unless result
- puts "FAILED: #{cmdline}"
- end
- result
-end
-
-def extract_epub(source_file, source_path, dest_path)
- relative_path = source_file[source_path.length .. source_file.length]
- dest_file = "#{dest_path}/#{relative_path}"
- dest_file = dest_file[0 .. (dest_file.length - 6)] + ".txt"
-
- required_path = Pathname(dest_file).dirname
- unless File.directory? required_path
- unless exec("mkdir -p #{required_path}")
- return false
- end
- end
-
- if File.exist? dest_file
- source_time = File.mtime source_file
- dest_time = File.mtime dest_file
- comp = dest_time <=> source_time
- if comp > 0
- return true # Nothing to do, extraction is already up-to-date
- end
- end
-
- exec("ebook-convert #{source_file} #{dest_file}")
-end
-
-def scan_dir(source_path, dest_path)
- Find.find(source_path) do |f|
- if f.match(/.epub\Z/)
- unless (f.match(/_bis.epub\Z/) || f.match(/_ter.epub\Z/) || f.match(/_quater.epub\Z/))
- extract_epub(f, source_path, dest_path)
- end
- end
- end
-end
-
-dest_path = ARGV[0]
-for arg in ARGV[1 .. ARGV.length]
- scan_dir(arg, dest_path)
-end
diff --git a/main.rb b/main.rb
deleted file mode 100644
index e294b4a..0000000
--- a/main.rb
+++ /dev/null
@@ -1,56 +0,0 @@
-require_relative 'navigator'
-require_relative 'page'
-require_relative 'store'
-require_relative 'walk_dir'
-
-@outputDir = 'output'
-
-@config_file = 'quanlib.ini'
-@skip_class = false
-
-def handleArg(arg)
- if arg.start_with?("--config=")
- @config_file = arg[9..-1]
- puts 'Using config file "' + @config_file + '".'
- elsif "--purge" == arg
- puts 'Purging database...'
- @store.dropSchema()
- if File.exists?(@store.unclassified_csv)
- File.delete(@store.unclassified_csv)
- end
- elsif "--skip-class" == arg
- puts 'Skipping load of classification table.'
- @skip_class = true
- elsif arg.start_with?("--")
- abort('ERROR: Unrecognized option "' + arg + '".')
- end
-end
-
-@store = Store.new(@config_file)
-@store.connect()
-
-for arg in ARGV
- handleArg(arg)
-end
-
-@store.init_db(@skip_class)
-
-for arg in ARGV
- if ! arg.start_with?("--")
- puts 'Scanning directory "' + arg + '"...'
- w = WalkDir.new(@config_file, arg)
- w.books
- end
-end
-
-@store.cross_reference_lists
-
-puts 'Creating output...'
-
-navigator = Navigator.new(@store)
-navigator.write_atoz_pages()
-navigator.write_series_listing()
-navigator.write_dewey()
-
-@store.disconnect()
-
diff --git a/navigator.rb b/navigator.rb
deleted file mode 100644
index 881b1fa..0000000
--- a/navigator.rb
+++ /dev/null
@@ -1,157 +0,0 @@
-require_relative 'page'
-require_relative 'store'
-
-class Navigator
- def initialize(store)
- @store = store
- end
-
- def write_atoz_pages
- atoz_counts = {}
-
- ('A'..'Z').each do |letter|
- atoz_counts[letter] = write_authors_starting_with(letter)
- end
-
- content = 'Author | Books |
'
- ('A'..'Z').each do |letter|
- content += ' Starting with ' + letter + ' | ' + atoz_counts[letter].to_s + ' |
'
- end
- content += '
'
- page = Page.new(@store)
- page.output_dir = 'atoz'
- page.special = content
- page.up = ['../output/index.html', 'Up']
-
- page.write_html( [] )
- end
-
- def write_authors_starting_with(letter)
- book_ids = @store.query_books_by_author(letter + '%')
- puts 'Authors starting with "' + letter + '": ' + book_ids.length.to_s() + ' books.'
-
- page = Page.new(@store)
- if 'A' != letter
- page.back = ['../atoz/output_' + (letter.ord - 1).chr + '.html', 'Prev']
- end
- if 'Z' != letter
- page.forward = ['../atoz/output_' + (letter.ord + 1).chr + '.html', 'Next']
- end
- page.output_dir = 'atoz'
- page.index_file = 'output_' + letter + '.html'
- page.title = "Authors starting with '" + letter + "'"
- page.up = ['../atoz/index.html', 'Up']
-
- page.write_html(book_ids)
- return book_ids.length
- end
-
- def write_dewey
- book_ids = @store.query_books_by_ddc()
- puts 'Non-fiction books arranged by Dewey Decimal Classification: ' + book_ids.length.to_s() + ' books.'
-
- page = Page.new(@store)
- page.output_dir = 'ddc'
- page.index_file = 'index.html'
- page.title = "Non-fiction books arranged by Dewey Decimal call number"
- page.up = ['../output/index.html', 'Up']
-
- page.write_html(book_ids)
- return book_ids.length
- end
-
- def write_series_for_age(age)
- series_infos = []
-
- series_ids = @store.query_series_by_age(age)
-
- series_ids.each do |id|
- series = @store.load_series(id)
- book_ids = @store.query_books_by_series_id(id)
- if nil != book_ids and book_ids.length > 0
- series_infos.push( [series, book_ids] )
- end
- end
-
- for idx in 0 .. (series_infos.length - 1) do
- #puts series.descr + ': ' + book_ids.length.to_s + ' books.'
-
- back = nil
- fwd = nil
-
- if idx > 0
- back = series_infos[idx-1]
- end
- if (idx + 1) < series_infos.length
- fwd = series_infos[idx+1]
- end
-
- cur = series_infos[idx]
- series = cur[0]
- book_ids = cur[1]
-
- page = Page.new(@store)
- if nil != back
- page.back = [back[0].key + '.html', 'Back']
- end
- if nil != fwd
- page.forward = [fwd[0].key + '.html', 'Forward']
- end
- page.output_dir = 'series/series_' + age
- page.index_file = series.key + '.html'
- page.title = 'Series “' + series.descr + '” (' + book_ids.length.to_s + ' books)'
- page.up = ['index.html', 'Up']
-
- page.write_html(book_ids)
- end
-
- content = '“' + age + '” Series
'
- content += 'Author | Series | Genre | Books |
'
- series_infos.each do |cur|
- series = cur[0]
- book_ids = cur[1]
-
- author = series.grouping
- letter = author[0]
-
- content += ' '
- content += '' + author + ' | '
- content += '' + series.descr + ' | '
- content += '' + series.genre + ' | '
- content += '' + book_ids.length.to_s + ' | '
- content += '
'
- end
- content += '
'
- page = Page.new(@store)
- page.output_dir = 'series/series_' + age
- page.special = content
- page.up = ['../index.html', 'Up']
- page.write_html( [] )
-
- return series_infos.length
- end
-
- def write_series_listing
- ages = ['beginner', 'junior', 'ya', 'adult']
- series_counts = {}
-
- ages.each do |age|
- puts 'Series for "' + age + '" readers...'
-
- series_counts[age] = write_series_for_age(age)
- end
-
- content = 'Browse Books By Series
'
- content += ''
- content += '
Age | Number of Series |
'
- ages.each do |age|
- content += '' + age + ' | ' + series_counts[age].to_s + ' |
'
- end
- content += '
'
- page = Page.new(@store)
- page.output_dir = 'series'
- page.special = content
- page.up = ['../output/index.html', 'Up']
- page.write_html( [] )
- end
-end
diff --git a/page.rb b/page.rb
deleted file mode 100644
index 638f9ad..0000000
--- a/page.rb
+++ /dev/null
@@ -1,166 +0,0 @@
-require 'fileutils'
-
-require_relative 'store'
-
-class Page
- def initialize(store)
- @back = nil
- @forward = nil
- @index_file = 'index.html'
- @output_dir = 'output'
- @special = nil
- @store = store
- @title = 'Books'
- @up = nil
- end
-
- def back=(value)
- @back = value
- end
-
- def forward=(value)
- @forward = value
- end
-
- def index_file=(value)
- @index_file = value
- end
-
- def navig_link(data)
- if (nil == data)
- return ''
- end
- return '' + data[1] + ''
- end
-
- def output_dir=(value)
- @output_dir = value
- end
-
- def special=(value)
- @special = value
- end
-
- def title=(value)
- @title = value
- end
-
- def up=(value)
- @up = value
- end
-
- def write_books(fd, book_ids)
- for id in book_ids
- book = @store.load_book(id)
- image = nil
- if nil != book.cover
- #@imageCount += 1
- #(path, mimeType) = book.cover.write_image(@output_dir, 'image' + @imageCount.to_s)
- #image = ''
- path = book.cover.path
- image = ''
- else
- image = '(No cover image)'
- end
-
- fd.puts ' '
- fd.puts ' ' + image + ' | '
-
- heading = book.heading()
- description = book.description()
- if nil != description
- fd.puts ' |
'
- else
- fd.puts ' ' + heading + ' | '
- end
-
- fd.puts '
'
- end
- end
-
- def write_footer(fd)
- fd.puts ' ' + navig_link(@back) + ' ' + navig_link(@up) + ' ' + navig_link(@forward) + '
'
- end
-
- def write_header(fd)
- fd.puts ' '
-
- fd.puts ' ' + navig_link(@back) + ' ' + navig_link(@up) + ' ' + navig_link(@forward) + '
'
- end
-
- def write_html(book_ids)
- @imageCount = 0
-
- if ! Dir.exist?(@output_dir)
- FileUtils.mkdir_p(@output_dir)
- end
-
- open(@output_dir + '/' + @index_file, 'w') do |fd|
- fd.puts ''
- fd.puts ' '
- fd.puts ' '
- fd.puts ' ' + @title + ''
-
- write_style_sheet(fd)
-
- fd.puts ' '
- fd.puts ' '
-
- write_header(fd)
-
- write_special(fd)
- write_books(fd, book_ids)
-
- write_footer(fd)
-
- fd.puts " "
- fd.puts ""
- end
- end
-
- def write_special(fd)
- if (nil != @special)
- fd.puts(@special)
- end
- end
-
- def write_style_sheet(fd)
- style =
-<
- div {
- display: inline-block;
- width: 400px;
- margin: 10px;
- border 3px solid #73ad21;
- }
- h1.header {
- background: #4040a0;
- color: #ffffff;
- text-align: center;
- }
- img.cover-thumb { max-height: 200px; max-width: 200px; }
- p.navigator { }
- span.popup { }
- span.popup:hover { text-decoration: none; background: #cfffff; z-index: 6; }
- span.popup span.pop-inner {
- border-color:black;
- border-style:solid;
- border-width:1px;
- display: none;
- margin: 4px 0 0 0px;
- padding: 3px 3px 3px 3px;
- position: absolute;
- }
- span.popup:hover span.pop-inner {
- background: #ffffaf;
- display: block;
- margin: 20px 0 0 0px;
- z-index:6;
- }
-
-EOS
- fd.puts style
- end
-end
-
diff --git a/series.rb b/series.rb
deleted file mode 100644
index 0621876..0000000
--- a/series.rb
+++ /dev/null
@@ -1,87 +0,0 @@
-
-class Series
- def initialize(id)
- @age = nil
- @genre = nil
- @grouping = nil
- @code = nil
- @descr = nil
- @id = id
- end
-
- def age
- @age
- end
-
- def age=(value)
- @age = value
- end
-
- def code
- @code
- end
-
- def code=(value)
- @code = value
- end
-
- def descr
- @descr
- end
-
- def descr=(value)
- @descr = value
- end
-
- def genre
- @genre
- end
-
- def genre=(value)
- @genre = value
- end
-
- def grouping
- @grouping
- end
-
- def grouping=(value)
- @grouping = value
- end
-
- def id
- @id
- end
-
- def inspect
- data = []
- if nil != @age
- data.push('age="' + @age.inspect + '"')
- end
- if nil != @code
- data.push('code="' + @code.inspect + '"')
- end
- if nil != @descr
- data.push('descr="' + @descr + '"')
- end
- if nil != @genre
- data.push('genre="' + @genre + '"')
- end
- if nil != @grouping
- data.push('grouping="' + @grouping + '"')
- end
- return '(Series:' + data.join(',') + ')'
- end
-
- def key
- if nil != grouping and nil != code
- return grouping.to_s + '_' + code.to_s
- end
- return id.to_s
- end
-
- def to_s
- return inspect()
- end
-end
-
diff --git a/store.rb b/store.rb
deleted file mode 100644
index 1a33ca3..0000000
--- a/store.rb
+++ /dev/null
@@ -1,655 +0,0 @@
-
-require 'csv'
-require 'fileutils'
-require 'inifile'
-require 'pg'
-
-require_relative 'series'
-require_relative 'tconn'
-
-class Store
- def unclassified_csv
- @basePath + '/csv/unclassified.csv'
- end
-
- def initialize(config_file)
- @conn = nil
-
- config = IniFile.load(config_file)
- if nil == config
- puts 'FATAL: Failed to load config file "' + config_file + '". Aborting initialization.'
- return
- end
-
- section = config['database']
- @dbhost = section['host']
- @dbport = 5432
- @dbname = section['name']
- @dbuser = section['user']
- @dbpass = section['pass']
-
- section = config['filesystem']
- @basePath = section['basePath']
- end
-
- def connect
- @conn = TimedConn.new(PG.connect(@dbhost, @dbport, '', '', @dbname, @dbuser, @dbpass))
- return @conn
- end
-
- def disconnect
- @conn.close()
- end
-
- def construct_efs_path(efs_id)
- id_str = sprintf('%010d', efs_id)
- path = sprintf('%s/%s/%s/%s', id_str[0,2], id_str[2,2], id_str[4,2], id_str[6,2])
- name = id_str + '.dat'
- return path, name
- end
-
- def cross_reference_lists
-puts "@@@@@@@@@@@ CROSS-REF START @@@@@@@@@@@"
- exec_update("TRUNCATE TABLE Lists CASCADE;", [])
-
- populate_lists_table
-puts "@@@@@@@@@@@ CROSS-REF DONE @@@@@@@@@@@"
- end
-
- def create_schema(skip_class)
- create_authors =
-< exc
- puts 'WARNING: "' + stmt + '" failed: ' + exc.to_s
- end
- end
- end
-
- def find_all_authors(author_name)
- result = []
-
- sqlSelect = "SELECT id FROM Authors WHERE grouping=$1;"
- args = [author_name]
-
- @conn.exec_params(sqlSelect, args) do |rs|
- rs.each do |row|
- result << row['id']
- end
- end
-
- result
- end
-
- def find_author(author)
- sqlSelect = "SELECT id FROM Authors WHERE grouping=$1 AND reading=$2 AND sort=$3;"
- args = [author.grouping, author.reading_order, author.sort_order]
-
- @conn.exec_params(sqlSelect, args) do |rs|
- if rs.ntuples > 0
- return rs[0]['id']
- end
- end
-
- return nil
- end
-
- def init_db(skip_class)
- sql = "SELECT 1 FROM pg_tables WHERE tableowner='quanlib' AND tablename='books'"
- found = false
- @conn.exec(sql).each do |row|
- found = true
- end
-
- if ! found
- create_schema(skip_class)
- end
- end
-
- def load_author(id)
- sqlSelect = "SELECT grouping, reading, sort FROM Authors WHERE id=$1"
- args = [id]
- @conn.exec_params(sqlSelect, args) do |rs|
- if rs.ntuples != 1
- raise "Expected 1 row for " + id + " but got " + rs.ntuples + ": " + sqlSelect
- end
- row = rs[0]
- author = Author.new(row['grouping'], row['reading'], row['sort'])
- return author
- end
- return nil
- end
-
- def store_author(author)
- id = find_author(author)
- if nil == id
- id = next_id('author_id')
- sqlInsert = "INSERT INTO Authors(id, grouping, reading, sort) VALUES ($1, $2, $3, $4);"
- args = [id, author.grouping, author.reading_order, author.sort_order]
- begin
- rs = @conn.exec_params(sqlInsert, args)
- rescue Exception => e
- puts sqlInsert + ": " + args.inspect()
- puts e.message
- puts $@
- ensure
- rs.clear if rs
- end
- end
- return id
- end
-
- def load_book(id)
- sql = "SELECT author, classification, cover, description, language, path, series, title, volume FROM Books WHERE id=$1;"
- book = nil
-
- begin
- @conn.exec_params(sql, [id]) do |rs|
- if 1 != rs.ntuples
- raise 'Expected one row in Books for id ' + id + ', but found ' + rs.length + '.'
- return nil
- end
- row = rs[0]
-
- book = Book.new(self)
- book.author = load_author(row['author'])
- book.classification_id = row['classification']
- book.cover = load_cover(row['cover'])
- book.description = row['description']
- book.language = row['language']
- book.path = row['path']
- book.series_id = row['series']
- book.title = row['title']
- book.volume = row['volume']
- end
- rescue Exception => e
- puts sql + ": " + id
- puts e.message
- puts $@
- end
-
- return book
- end
-
- def store_book(book)
- sql = "INSERT INTO Books (id, arrived, author, classification, cover, description, language, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11);"
-
- book_id = next_id('book_id')
-
- author_id = store_author(book.author)
- (efs_id, mime_type) = store_cover(book)
-
- args = [book_id, book.arrived, author_id, book.classification_id, efs_id, book.description, book.language, book.path, book.series_id, book.title, book.volume]
-
- begin
- rs = @conn.exec_params(sql, args)
- rescue Exception => e
- puts sql + ": " + args.inspect()
- puts e.message
- puts $@
- ensure
- rs.clear if rs
- end
-
- return book_id
- end
-
- def find_classification(author_grouping, title_grouping)
- sql = "SELECT id FROM Classifications WHERE author_grouping = $1 AND title_grouping = $2;"
- @conn.exec_params(sql, [author_grouping, title_grouping]) do |rs|
- if rs.ntuples > 0
- return rs[0]['id']
- end
- end
- return nil
- end
-
- def load_classification(id)
- sql = "SELECT ddc, lcc, author_grouping, author_sort, title_grouping, title "
- sql += " FROM Classifications WHERE id=$1"
- @conn.exec_params(sql, [id]) do |rs|
- if rs.ntuples > 0
- row = rs[0]
- ddc = row['ddc']
- lcc = row['lcc']
- author_grouping = row['author_grouping']
- author = row['author_sort']
- title_grouping = row['title_grouping']
- title = row['title']
-
- result = Classification.new(ddc, lcc, author_grouping, author, title_grouping, title)
- result.id = id
- return result
- end
- end
-
- return nil
- end
-
- def load_cover(id)
- if nil == id
- return nil
- end
-
- mime_type = 'application/octet-stream'
-
- sql = "SELECT mimeType FROM Efs WHERE id=$1"
- @conn.exec_params(sql, [id]) do |rs|
- if rs.ntuples != 1
- raise "Expected one row but got " + rs.ntuples + ": " + sql + ": " + id
- end
- mime_type = rs[0]['mimeType']
- end
-
- (efspath, efsname) = construct_efs_path(id)
-
- fullpath = @basePath + '/efs/' + efspath + '/' + efsname
-
- return Cover.new(nil, fullpath, mime_type)
- end
-
- def store_cover(book)
- efs_id = nil
- cover = book.cover()
-
- if nil == cover
- return nil
- end
-
- @conn.exec("SELECT nextval('efs_id')") do |rs|
- efs_id = rs[0]['nextval']
- end
-
- if nil == efs_id
- return nil
- end
-
- (efspath, efsname) = construct_efs_path(efs_id)
-
- efspath = @basePath + '/efs/' + efspath
-
- FileUtils.mkdir_p(efspath)
-
- (filepath, mimetype) = cover.write_image(efspath, efsname)
-
- sql = "INSERT INTO efs VALUES ($1, $2)"
- begin
- rs = @conn.exec_params(sql, [efs_id, mimetype])
- rescue Exception => e
- puts sql + ": " + efs_id + ", " + mimetype
- puts e.message
- puts $@
- ensure
- rs.clear if rs
- end
-
- return efs_id, mimetype
- end
-
- def exec_id_query(sql, args)
- ids = []
- @conn.exec_params(sql, args) do |rs|
- rs.each do |row|
- ids.push(row['id'])
- end
- end
- return ids
- end
-
- def exec_update(sql, args)
- begin
- rs = @conn.exec_params(sql, args)
- rescue Exception => e
- puts sql + ": " + args.inspect()
- puts e.message
- puts $@
- ensure
- rs.clear if rs
- end
- end
-
- def next_id(seq_name)
- id = nil
- @conn.exec("SELECT nextval('" + seq_name + "');") do |rs|
- id = rs[0]['nextval']
- end
- return id
- end
-
- def get_series(grouping, code)
- if nil == code
- return nil
- end
-
- sql = "SELECT id FROM Series WHERE grouping=$1 AND code=$2;"
- args = [grouping, code]
- @conn.exec_params(sql, args).each do |row|
- return row['id']
- end
-
- # TODO: Create a new series object here?
- puts 'WARNING: series("' + grouping + '", "' + code + '") not found.'
- return nil
- end
-
- def load_series(id)
- sql = "SELECT age,genre,grouping,code,descr FROM Series WHERE id=$1;"
- args = [id]
- @conn.exec_params(sql, args) do |rs|
- if rs.ntuples > 0
- row = rs[0]
- series = Series.new(id)
- series.age = row['age']
- series.genre = row['genre']
- series.grouping = row['grouping']
- series.code = row['code']
- series.descr = row['descr']
- return series
- end
- end
- return nil
- end
-
- def populate_classifications_table
- puts "Populating the Classifications table..."
- first = true
- CSV.foreach(@basePath + '/csv/class.csv') do |row|
- if first
- # skip the header row
- first = false
- else
-
- # First, add a row to the Classifications table
-
- id = next_id('classification_id')
- ddc = row[0]
- lcc = row[1]
- author_grouping = row[2]
- author_sort = row[3]
- title_grouping = row[4]
- title = row[5]
-
- sqlInsert = "INSERT INTO Classifications (id, ddc, lcc, author_grouping, author_sort, title_grouping, title) VALUES ($1, $2, $3, $4, $5, $6, $7);"
- args = [id, ddc, lcc, author_grouping, author_sort, title_grouping, title]
- exec_update(sqlInsert, args)
-
- # Second, link up with the appropriate FAST table entries
-
- fast = []
- input = row[6]
- if input.length > 0
- fast = input.split(';')
- end
-
- fast.each do |fast_id|
- sqlInsert = "INSERT INTO FAST_Classifications (fast, classification) VALUES ($1, $2);"
- args = [fast_id, id]
- exec_update(sqlInsert, args)
- end
- end
- end
- end
-
- def populate_fast_table
- puts "Populating the FAST table..."
- first = true
- CSV.foreach(@basePath + '/csv/fast.csv') do |row|
- if first
- first = false # skip the header row
- else
- id = row[0]
- descr = row[1]
- sqlInsert = "INSERT INTO FAST (id, descr) VALUES ($1, $2);"
- exec_update(sqlInsert, [id, descr])
- end
- end
- end
-
- def populate_lists_table
- puts "Populating the Lists table..."
-
- CSV.foreach(@basePath + "/csv/lists.csv", headers: true) do |row|
- author_ids = find_all_authors(row['author'])
- if author_ids.empty?
- specification = [row['age'], row['category'], row['code'], row['year'], row['author'], row['title']]
- .map { |x| x.inspect }
- .join(', ')
-
- puts "WARNING: For list entry (#{specification}), no such author was found."
-
- next
- end
-
- sqlInsert = %Q(
- INSERT INTO Lists (id, age, category, code, year, author, title)
- VALUES ($1, $2, $3, $4, $5, $6, $7);
- )
- author_ids.each do |author_id|
- list_id = next_id('list_id')
- args = [list_id, row['age'], row['category'], row['code'], row['year'], author_id, row['title']]
- exec_update(sqlInsert, args)
-
- update_lists_books_table(list_id, author_id, row['title'])
- end
- end
- end
-
- # Scan for books that match this Lists entry, and add any matches to the Lists_Books associative table
- def update_lists_books_table(list_id, author_id, title)
- title_pattern = Book.grouping_for_title(title).gsub('_', '%')
- sqlSelect = "SELECT id FROM Books WHERE author = $1 AND title LIKE $2;"
- args = [author_id, title_pattern]
-
- @conn.exec_params(sqlSelect, args) do |rs|
- rs.each do |row|
- sqlInsert = "INSERT INTO Lists_Books (list, book) VALUES ($1, $2)"
- args = [list_id, row['id']]
- exec_update(sqlInsert, args)
- end
- end
- end
-
- def populate_series_table
- puts "Populating the Series table..."
- CSV.foreach(@basePath + '/csv/series.csv') do |row|
- id = next_id('series_id')
- sqlInsert = "INSERT INTO Series (id, age, genre, grouping, code, descr) VALUES ($1, $2, $3, $4, $5, $6);"
- args = [id] + row
- exec_update(sqlInsert, args)
- end
- end
-
- def query_books_by_author(pattern)
- sql =
-< {}, "filesystem" => {}})
store = Store.new('quanlib.ini')
- data.each do |input, expectedPath, expectedName|
- (actualPath, actualName) = store.construct_efs_path(input)
+ data.each do |input, expected_path, expected_name|
+ (actual_path, actual_name) = store.construct_efs_path(input)
- expect(actualPath).to eq(expectedPath)
- expect(actualName).to eq(expectedName)
+ assert_equal expected_path, actual_path
+ assert_equal expected_name, actual_name
end
end
end
diff --git a/test/test_helper.rb b/test/test_helper.rb
new file mode 100644
index 0000000..25bf530
--- /dev/null
+++ b/test/test_helper.rb
@@ -0,0 +1,6 @@
+# frozen_string_literal: true
+
+$LOAD_PATH.unshift File.expand_path("../app", __dir__)
+
+require "minitest/autorun"
+require "mocha/minitest"
diff --git a/walk_dir.rb b/walk_dir.rb
deleted file mode 100644
index a2c088f..0000000
--- a/walk_dir.rb
+++ /dev/null
@@ -1,118 +0,0 @@
-# Walk the directory (and subdirectories), identifying books.
-#
-# Expected format:
-# .../AuthorName/Title_of_the_Awesome_Book.ext
-#
-# Author is given as FirstLast. For example,
-# Robert Anson Heinlein is RobertHeinlein, and
-# JKRowling is JoanneRowling.
-#
-# Book titles have spaces replaced with underscores,
-# and punctuation [,!?'] replaced with hyphens.
-#
-# If the book forms part of a series, then an all-capitals
-# series designator, followed by a numeric volume number,
-# followed by an underscore, is prefixed to the name.
-# For example, Hardy Boys' volume 1, The Tower Treasure,
-# is rendered as .../FranklinDixon/HB001_The_Tower_Treasure.epub
-# and Mrs. Pollifax volume 6, On the China Station, is
-# .../DorothyGilman/P06_On_the_China_Station.epub.
-
-require_relative 'book'
-require_relative 'book_loader'
-require_relative 'store'
-
-class WalkDir
- def initialize(config_file, root)
- @queue = Queue.new
- @root = root
- @config_file = config_file
- @threads = []
-
- @files = walk(@root)
- end
-
- def books
- @threads = []
- num_threads.times do
- @threads << Thread.new do
- BookLoader.new(@config_file, @queue).run
- end
- end
-
- result = []
- @files = remove_duplicates(@files)
- for file in @files.sort()
- if Book.can_handle?(file) && (!is_duplicate?(file))
- # Queue this book to be loaded and added to the DB by a BookLoader thread
- @queue << file
- end
- end
-
- @threads.count.times { @queue << BookLoader::DONE_MARKER }
-
- @threads.each { |t| t.join }
- end
-
- # Duplicate versions of a text are named
- # xxx_suffix.ext
- # Where suffix is one of bis, ter, quater, quinquies
- # for the 2nd, 3rd, 4th or 5th variant respectively.
- def is_duplicate?(file)
- s = file.to_s
- suffix = ['_bis.', '_ter.', '_quater.', '_quinquies.']
- suffix.each do |pat|
- if s.include?(pat)
- return true
- end
- end
-
- return false
- end
-
- def remove_duplicates(files)
- unique = {}
- for file in files
- if Book.can_handle?(file)
- key = File.dirname(file) + '/' + File.basename(file, '.*')
- if unique.has_key?(key)
- new_ext = File.extname(file)
- old_ext = File.extname(unique[key])
- if ('.pdf' == old_ext) && ('.epub' == new_ext)
- # Prefer EPUB over PDF
- puts 'REPLACED ' + unique[key].to_s + ' with ' + file.to_s
- unique[key] = file
- else
- puts 'DROPPED ' + file.to_s + " because it's superceded by " + unique[key].to_s
- end
- else
- unique[key] = file
- end
- end
- end
-
- return unique.values
- end
-
- def walk(path)
- result = []
- children = Dir.entries(path)
- for child in children
- fullName = (path.chomp("/")) + "/" + child
- if (File.directory?(fullName)) and (child != ".") and (child != "..") and (!File.symlink?(fullName))
- sub = walk(fullName)
- if (sub != nil) and (sub.length > 0)
- result.concat(sub)
- end
- elsif (! File.directory?(fullName))
- result.push(fullName)
- end
- end
- return result
- end
-
- def num_threads
- # TOOD: make this (auto?) configurable
- 12
- end
-end