Also adds a Rakefile, and pulls in Rubocop while we're at it.
source 'http://rubygems.org'
gem 'inifile'
+gem 'mocha'
gem 'nokogiri'
gem 'pg'
gem 'rspec'
+gem 'rubocop'
gem 'rubyzip'
GEM
remote: http://rubygems.org/
specs:
- diff-lcs (1.3)
+ ast (2.4.2)
+ diff-lcs (1.5.1)
inifile (3.0.0)
- mini_portile2 (2.4.0)
- nokogiri (1.10.3)
- mini_portile2 (~> 2.4.0)
- pg (1.1.4)
- rspec (3.8.0)
- rspec-core (~> 3.8.0)
- rspec-expectations (~> 3.8.0)
- rspec-mocks (~> 3.8.0)
- rspec-core (3.8.2)
- rspec-support (~> 3.8.0)
- rspec-expectations (3.8.4)
+ json (2.7.2)
+ language_server-protocol (3.17.0.3)
+ mocha (2.4.0)
+ ruby2_keywords (>= 0.0.5)
+ nokogiri (1.16.6-aarch64-linux)
+ racc (~> 1.4)
+ nokogiri (1.16.6-arm-linux)
+ racc (~> 1.4)
+ nokogiri (1.16.6-arm64-darwin)
+ racc (~> 1.4)
+ nokogiri (1.16.6-x86-linux)
+ racc (~> 1.4)
+ nokogiri (1.16.6-x86_64-darwin)
+ racc (~> 1.4)
+ nokogiri (1.16.6-x86_64-linux)
+ racc (~> 1.4)
+ parallel (1.25.1)
+ parser (3.3.3.0)
+ ast (~> 2.4.1)
+ racc
+ pg (1.5.6)
+ racc (1.8.0)
+ rainbow (3.1.1)
+ regexp_parser (2.9.2)
+ rexml (3.3.0)
+ strscan
+ rspec (3.13.0)
+ rspec-core (~> 3.13.0)
+ rspec-expectations (~> 3.13.0)
+ rspec-mocks (~> 3.13.0)
+ rspec-core (3.13.0)
+ rspec-support (~> 3.13.0)
+ rspec-expectations (3.13.1)
diff-lcs (>= 1.2.0, < 2.0)
- rspec-support (~> 3.8.0)
- rspec-mocks (3.8.1)
+ rspec-support (~> 3.13.0)
+ rspec-mocks (3.13.1)
diff-lcs (>= 1.2.0, < 2.0)
- rspec-support (~> 3.8.0)
- rspec-support (3.8.2)
- rubyzip (1.2.3)
+ rspec-support (~> 3.13.0)
+ rspec-support (3.13.1)
+ rubocop (1.64.1)
+ json (~> 2.3)
+ language_server-protocol (>= 3.17.0)
+ parallel (~> 1.10)
+ parser (>= 3.3.0.2)
+ rainbow (>= 2.2.2, < 4.0)
+ regexp_parser (>= 1.8, < 3.0)
+ rexml (>= 3.2.5, < 4.0)
+ rubocop-ast (>= 1.31.1, < 2.0)
+ ruby-progressbar (~> 1.7)
+ unicode-display_width (>= 2.4.0, < 3.0)
+ rubocop-ast (1.31.3)
+ parser (>= 3.3.1.0)
+ ruby-progressbar (1.13.0)
+ ruby2_keywords (0.0.5)
+ rubyzip (2.3.2)
+ strscan (3.1.0)
+ unicode-display_width (2.5.0)
PLATFORMS
- ruby
+ aarch64-linux
+ arm-linux
+ arm64-darwin
+ x86-linux
+ x86_64-darwin
+ x86_64-linux
DEPENDENCIES
inifile
+ mocha
nokogiri
pg
rspec
+ rubocop
rubyzip
BUNDLED WITH
- 2.0.2
+ 2.5.13
--- /dev/null
+# frozen_string_literal: true
+
+require "minitest/test_task"
+
+Minitest::TestTask.create
+
+require "rubocop/rake_task"
+
+RuboCop::RakeTask.new
+
+task default: %i[test rubocop]
--- /dev/null
+
+class Author
+ def initialize(grouping, reading_order, sort_order)
+ @grouping = grouping
+ @reading_order = reading_order
+ @sort_order = sort_order
+
+ if (nil == sort_order) || ('Unknown' == sort_order)
+ @sort_order = reading_to_sort_order(reading_order)
+ end
+ end
+
+ def grouping
+ @grouping
+ end
+
+ def reading_order
+ @reading_order
+ end
+
+ def sort_order
+ @sort_order
+ end
+
+ def inspect
+ result = '(Author:'
+ if nil != @grouping
+ result += ' grouping="' + @grouping + '"'
+ end
+ if nil != @reading_order
+ result += ' reading_order="' + @reading_order + '"'
+ end
+ if nil != @sort_order
+ result += ' sort_order="' + @sort_order + '"'
+ end
+ result += ')'
+
+ return result
+ end
+
+ def to_s
+ inspect
+ end
+
+ protected
+ def reading_to_sort_order(reading_order)
+ sort_order = reading_order
+
+ parts = reading_order.split(' ')
+ if parts.length > 1
+ sort_order = parts[-1] + ', ' + parts[0..-2].join(' ')
+ end
+
+ return sort_order
+ end
+end
+
--- /dev/null
+
+require 'nokogiri'
+require 'rubygems'
+require 'zip'
+
+require_relative 'author'
+require_relative 'classification'
+require_relative 'cover'
+require_relative 'store'
+
+class Book
+ @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
+ @@SERIES_AND_VOLUME_REGEX = /^([A-Z]+)([0-9]+(\.[0-9]+)?)$/
+
+ attr_accessor :arrived
+ attr_accessor :author
+ attr_accessor :classification_id
+ attr_accessor :cover
+ attr_accessor :description
+ attr_accessor :language
+ attr_accessor :path
+ attr_accessor :series_id
+ attr_accessor :title
+ attr_accessor :volume
+
+ def initialize(store)
+ @store = store
+ end
+
+ def load_from_file!(fileName)
+ @path = fileName
+ parse_file_name!(fileName)
+ end
+
+ def self.can_handle?(fileName)
+ if nil == fileName
+ return false
+ end
+
+ #puts "Filename: " + fileName.to_s
+ lowerName = fileName.downcase()
+
+ if lowerName.end_with?(".epub")
+ return true
+ end
+
+ if lowerName.end_with?(".pdf")
+ return true
+ end
+
+ return false
+ end
+
+ def self.grouping_for_title(title)
+ result = title
+
+ '\'",!#'.split('').each do |c|
+ result = result.gsub(c, '-')
+ end
+ result = result.gsub(/: */, '--')
+ result = result.gsub(' ', '_')
+
+ result
+ end
+
+ def heading
+ result = []
+
+ if nil != @title
+ result.push('<b>' + @title + '</b>')
+ else
+ result.push('<i>(Unknown title)</i>')
+ end
+ if nil != @author
+ result.push('<i>by ' + @author.reading_order + '</i>')
+ end
+
+ seriesInfo = []
+ series = @store.load_series(@series_id)
+ if nil != series and nil != series.descr
+ seriesInfo.push(series.descr.to_s)
+ end
+ if nil != @volume
+ seriesInfo.push(@volume.to_s)
+ end
+ if seriesInfo.length > 0
+ result.push(seriesInfo.join(' '))
+ end
+
+ classification = nil
+ if nil != @classification_id
+ classification = @store.load_classification(@classification_id)
+ end
+ if nil != classification
+ if nil != classification.ddc
+ result.push('Dewey: ' + classification.ddc.to_s)
+ end
+ if nil != classification.lcc
+ result.push('LCC: ' + classification.lcc.to_s)
+ end
+ end
+
+ return result.join('<br/>')
+ end
+
+ def inspect
+ data = []
+ if nil != @author
+ data.push('author="' + @author.inspect + '"')
+ end
+ if nil != @series_id
+ data.push('series_id="' + @series_id.to_s() + '"')
+ end
+ if nil != @volume
+ data.push('volume="' + @volume + '"')
+ end
+ if nil != @title
+ data.push('title="' + @title + '"')
+ end
+ if nil != @cover
+ data.push(@cover.inspect())
+ end
+ if nil != @path
+ data.push('path="' + @path + '"')
+ end
+ return '(Book:' + data.join(',') + ')'
+ end
+
+ def to_s
+ return inspect()
+ end
+
+ def title_grouping
+ if nil == @path
+ return nil
+ end
+
+ return File.basename(@path, '.*')
+ end
+
+ protected
+ def isUpper?(c)
+ return /[[:upper:]]/.match(c)
+ end
+
+ protected
+ def massage_author(input)
+ if nil == input
+ return nil
+ end
+
+ reading_order = ""
+ input.each_char do |c|
+ if isUpper?(c) and (reading_order.length > 0)
+ reading_order += " "
+ end
+ reading_order += c
+ end
+
+ return reading_order
+ end
+
+ # Returns (series, volumeNo, titleText)
+ protected
+ def processTitle(input)
+ if nil == input
+ return nil
+ end
+
+ arr = input.split('_')
+
+ series = nil
+ vol = nil
+
+ first = arr[0]
+ matchData = (arr[0]).match(@@SERIES_AND_VOLUME_REGEX)
+ if nil != matchData
+ capt = matchData.captures
+ series = capt[0]
+ vol = capt[1]
+ arr.shift
+ end
+
+ pos = arr[-1].rindex('.')
+ if nil != pos
+ arr[-1] = arr[-1].slice(0, pos)
+ end
+
+ title = arr.join(' ')
+
+ bare_title_grouping = title_grouping
+ .split('_')
+ .reject { |part| part.match(@@SERIES_AND_VOLUME_REGEX) }
+ .join('_')
+
+ unless bare_title_grouping == Book.grouping_for_title(title)
+ puts "WARNING: title_grouping mismatch: #{bare_title_grouping.inspect} vs. #{Book.grouping_for_title(title).inspect}"
+ end
+
+ return series, vol, title
+ end
+
+ protected
+ def parse_file_name!(file_name)
+ category = nil # e.g., non-fiction, fan-fiction
+ grouping = ''
+
+ parts = file_name.split('/')
+ (series_code, @volume, @title) = processTitle(parts[-1])
+ if parts.length > 1
+ grouping = parts[-2]
+ reading_order = massage_author(grouping)
+ sort_order = nil
+ @author = Author.new(grouping, reading_order, sort_order)
+ @series_id = @store.get_series(grouping, series_code)
+ end
+ if parts.length > 2
+ category = parts[-3]
+ end
+
+ lc_file_name = file_name.downcase
+ if lc_file_name.end_with?(".epub")
+ scanEpub!(file_name)
+ elsif lc_file_name.end_with?(".pdf")
+ scan_pdf!(file_name)
+ end
+
+ @arrived = File.ctime(file_name)
+
+ @classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*'))
+
+ # TODO: Fix horrible hard-coded strings and paths
+ if ('01_nonfic' == category) && (nil == classification_id)
+ open(Store.unclassified_csv, 'a') do |fd|
+ fd.puts('"' + grouping.to_s + '","' + path + '"')
+ end
+ end
+ end
+
+ protected
+ def scanEpub!(fileName)
+ #puts 'Scanning "' + fileName.to_s + '"...'
+ begin
+ Zip.warn_invalid_date = false
+ Zip::File.open(fileName) do |zipfile|
+ entry = zipfile.find_entry('META-INF/container.xml')
+ if nil == entry
+ puts 'No META-INF/container.xml, skipping book ' + fileName
+ return
+ end
+ contXml = zipfile.read('META-INF/container.xml')
+ contDoc = Nokogiri::XML(contXml)
+ opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
+
+ scanOpf!(zipfile, opfPath)
+ end
+ rescue Zip::Error => exc
+ puts 'ERROR processing file "' + fileName + '":'
+ puts exc.message
+ puts exc.backtrace
+ end
+ end
+
+ protected
+ def scan_pdf!(file_name)
+ #puts 'Scanning "' + file_name.to_s + '"...'
+
+ pdf_path = File.expand_path(file_name).to_s
+ if ! pdf_path.end_with?('.pdf')
+ puts 'Unexpected internal error: path "' + file_name.to_s + '" does not end with ".pdf".'
+ return
+ end
+
+ jpeg_path = pdf_path[0..-5] + '.jpeg'
+ if File.file?(jpeg_path)
+ File.open(jpeg_path, 'r') do |is|
+ @cover = Cover.new(is, jpeg_path, 'image/jpeg')
+ end
+ end
+ end
+
+
+ protected
+ def scanOpf!(zipfile, opfPath)
+ coverId = nil
+
+ opfXml = zipfile.read(opfPath)
+ opfDoc = Nokogiri::XML(opfXml)
+
+ #-------
+ # Author
+
+ grouping = @author.grouping
+ reading_order = @author.reading_order
+ sort_order = @author.sort_order
+
+ creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
+ if (creators.length > 0)
+ creator = creators[0]
+ if nil != creator
+ role = creator['opf:role']
+ if 'aut' == role
+ reading_order = creator.content
+
+ file_as = creator['opf:file-as']
+ if nil != file_as
+ sort_order = file_as
+ end
+ end
+
+ @author = Author.new(grouping, reading_order, sort_order)
+ end
+ end
+
+ #---------------------------------------
+ # Title
+
+ titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
+ if titles.length > 0
+ title = titles[0]
+ if nil != title
+ @title = title.content
+ end
+ end
+
+ #---------------------------------------
+ # Description
+
+ descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
+ if (descrNodes.length > 0)
+ descrNode = descrNodes[0]
+ if nil != descrNode
+ @description = descrNode.content
+ end
+ end
+
+ #---------------------------------------
+ # Language
+
+ langNodes = opfDoc.css('dc|language', 'dc' => @@DC_NS_URL)
+ if (langNodes.length > 0)
+ langNode = langNodes[0]
+ if langNode
+ @language = langNode.content
+ end
+ end
+
+ #---------------------------------------
+ # Other metadata: series, volume, cover
+
+ metas = opfDoc.css('package metadata meta')
+ for m in metas
+ name = m['name']
+ content = m['content']
+
+ if 'calibre:series' == name
+ # TODO: Dynamically create a new series?
+ # @series_id = content
+ elsif 'calibre:series-index' == name
+ @volume = content
+ elsif 'cover' == name
+ coverId = content
+ #puts 'File ' + @path + ' coverId ' + coverId
+ end
+ end
+
+ #---------------
+ # Load the cover
+
+ @cover = load_cover(zipfile, opfPath, opfDoc, coverId)
+ end
+
+ protected
+ def load_cover(zipfile, opfPath, opfDoc, coverId)
+ coverFile = nil
+ if nil == coverId
+ coverId = "cover-image"
+ end
+
+ items = opfDoc.css('package manifest item')
+ for i in items
+ href = i['href']
+ id = i['id']
+ mimeType = i['media-type']
+
+ if coverId == id
+ entry = zipfile.find_entry(href)
+
+ if nil == entry
+ # Although the epub standard requires the path to be relative
+ # to the base of the epub (zip), some books encountered in the
+ # wild have been found to use a bath relative to the location
+ # of the opf file.
+ parts = opfPath.split('/')
+ opfBasePath = opfPath.split('/')[0..-2].join('/')
+ coverPath = opfBasePath + '/' + href
+ entry = zipfile.find_entry(coverPath)
+ end
+
+ unless entry
+ # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg'
+ if href.start_with? '../'
+ coverPath = href[3..-1]
+ entry = zipfile.find_entry(coverPath)
+ end
+ end
+
+ if nil == entry
+ puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".'
+ return nil
+ else
+ entry.get_input_stream() do |is|
+ return Cover.new(is, href, mimeType)
+ end
+ end
+ end
+ end
+ return nil
+ end
+end
+
--- /dev/null
+
+require_relative 'book'
+require_relative 'store'
+
+class BookLoader
+ DONE_MARKER = '<END>'
+
+ def initialize(config_file, queue)
+ @config_file = config_file
+ @queue = queue
+ end
+
+ def run
+ @store = Store.new(@config_file)
+ @store.connect()
+
+ file = @queue.pop
+ until file == DONE_MARKER do
+ book = Book.new(@store)
+ book.load_from_file!(file)
+ @store.store_book(book)
+
+ file = @queue.pop
+ end
+
+ @store.disconnect()
+ end
+end
--- /dev/null
+
+class Classification
+ def initialize(ddc, lcc, author_grouping, author, title_grouping, title)
+ @id = nil
+ @ddc = ddc
+ @lcc = lcc
+ @author_grouping = author_grouping
+ @author = author
+ @title_grouping = title_grouping
+ @title = title
+ end
+
+ def id
+ @id
+ end
+ def id=(value)
+ @id = value
+ end
+
+ def ddc
+ @ddc
+ end
+ def lcc
+ @lcc
+ end
+ def author_grouping
+ @author_grouping
+ end
+ def author
+ @author
+ end
+ def
+
+ def inspect
+ data = []
+
+ if nil != @ddc
+ data.push('Dewey=' + @ddc.to_s)
+ end
+ if nil != @lcc
+ data.push('LCC=' + @lcc.to_s)
+ end
+ if nil != @author_grouping
+ data.push('author_grouping=' + @author_grouping.to_s)
+ end
+ if nil != @author
+ data.push('author=' + @author.to_s)
+ end
+ if nil != @title_grouping
+ data.push('title_grouping=' + @title_grouping.to_s)
+ end
+ if nil != @title
+ data.push('title=' + @title)
+ end
+
+ return '(Classification:' + data.join(',') + ')'
+ end
+
+ def to_s
+ inspect
+ end
+
+ protected
+ def reading_to_sort_order(reading_order)
+ sort_order = reading_order
+
+ parts = reading_order.split(' ')
+ if parts.length > 1
+ sort_order = parts[-1] + ', ' + parts[0..-2].join(' ')
+ end
+
+ return sort_order
+ end
+end
+
--- /dev/null
+
+class Cover
+ attr_reader :path
+
+ def initialize(inputStream, path, mimeType)
+ if nil != inputStream
+ @data = inputStream.read
+ else
+ @data = nil
+ end
+ @path = path
+ @mimeType = mimeType
+ end
+
+ def inspect
+ info = []
+ if nil != @data
+ info.push('size=' + @data.length.to_s)
+ else
+ info.push('empty')
+ end
+ if nil != @path
+ info.push('path="' + @path + '"')
+ end
+ if nil != @mimeType
+ info.push('mimeType="' + @mimeType + '"')
+ end
+ return '(Cover:' + info.join(',') + ')'
+ end
+
+ def read_image(filename)
+ open(filename, 'rb') do |fd|
+ @data = fd.read()
+ end
+ end
+
+ def to_s
+ return inspect
+ end
+
+ def write_image(outputDir, filename)
+ open(outputDir + '/' + filename, 'wb') do |fd|
+ fd.write(@data)
+ end
+ return filename, @mimeType
+ end
+
+ protected
+ def getExt
+ pos = @path.rindex('.')
+ if nil == pos
+ return '.img'
+ end
+ return @path.slice(pos, @path.length)
+ end
+end
+
--- /dev/null
+require 'find'
+require 'pathname'
+
+def exec(cmdline)
+ puts "$ #{cmdline}"
+ result = system(cmdline)
+ unless result
+ puts "FAILED: #{cmdline}"
+ end
+ result
+end
+
+def extract_epub(source_file, source_path, dest_path)
+ relative_path = source_file[source_path.length .. source_file.length]
+ dest_file = "#{dest_path}/#{relative_path}"
+ dest_file = dest_file[0 .. (dest_file.length - 6)] + ".txt"
+
+ required_path = Pathname(dest_file).dirname
+ unless File.directory? required_path
+ unless exec("mkdir -p #{required_path}")
+ return false
+ end
+ end
+
+ if File.exist? dest_file
+ source_time = File.mtime source_file
+ dest_time = File.mtime dest_file
+ comp = dest_time <=> source_time
+ if comp > 0
+ return true # Nothing to do, extraction is already up-to-date
+ end
+ end
+
+ exec("ebook-convert #{source_file} #{dest_file}")
+end
+
+def scan_dir(source_path, dest_path)
+ Find.find(source_path) do |f|
+ if f.match(/.epub\Z/)
+ unless (f.match(/_bis.epub\Z/) || f.match(/_ter.epub\Z/) || f.match(/_quater.epub\Z/))
+ extract_epub(f, source_path, dest_path)
+ end
+ end
+ end
+end
+
+dest_path = ARGV[0]
+for arg in ARGV[1 .. ARGV.length]
+ scan_dir(arg, dest_path)
+end
--- /dev/null
+require_relative 'navigator'
+require_relative 'page'
+require_relative 'store'
+require_relative 'walk_dir'
+
+@outputDir = 'output'
+
+@config_file = 'quanlib.ini'
+@skip_class = false
+
+def handleArg(arg)
+ if arg.start_with?("--config=")
+ @config_file = arg[9..-1]
+ puts 'Using config file "' + @config_file + '".'
+ elsif "--purge" == arg
+ puts 'Purging database...'
+ @store.dropSchema()
+ if File.exists?(@store.unclassified_csv)
+ File.delete(@store.unclassified_csv)
+ end
+ elsif "--skip-class" == arg
+ puts 'Skipping load of classification table.'
+ @skip_class = true
+ elsif arg.start_with?("--")
+ abort('ERROR: Unrecognized option "' + arg + '".')
+ end
+end
+
+@store = Store.new(@config_file)
+@store.connect()
+
+for arg in ARGV
+ handleArg(arg)
+end
+
+@store.init_db(@skip_class)
+
+for arg in ARGV
+ if ! arg.start_with?("--")
+ puts 'Scanning directory "' + arg + '"...'
+ w = WalkDir.new(@config_file, arg)
+ w.books
+ end
+end
+
+@store.cross_reference_lists
+
+puts 'Creating output...'
+
+navigator = Navigator.new(@store)
+navigator.write_atoz_pages()
+navigator.write_series_listing()
+navigator.write_dewey()
+
+@store.disconnect()
+
--- /dev/null
+require_relative 'page'
+require_relative 'store'
+
+class Navigator
+ def initialize(store)
+ @store = store
+ end
+
+ def write_atoz_pages
+ atoz_counts = {}
+
+ ('A'..'Z').each do |letter|
+ atoz_counts[letter] = write_authors_starting_with(letter)
+ end
+
+ content = '<p><table><tr><th>Author</th><th>Books</th></tr>'
+ ('A'..'Z').each do |letter|
+ content += ' <tr><td><a href="../atoz/output_' + letter + '.html">Starting with ' + letter + '</a></td><td>' + atoz_counts[letter].to_s + '</td></tr>'
+ end
+ content += '</table></p>'
+ page = Page.new(@store)
+ page.output_dir = 'atoz'
+ page.special = content
+ page.up = ['../output/index.html', 'Up']
+
+ page.write_html( [] )
+ end
+
+ def write_authors_starting_with(letter)
+ book_ids = @store.query_books_by_author(letter + '%')
+ puts 'Authors starting with "' + letter + '": ' + book_ids.length.to_s() + ' books.'
+
+ page = Page.new(@store)
+ if 'A' != letter
+ page.back = ['../atoz/output_' + (letter.ord - 1).chr + '.html', 'Prev']
+ end
+ if 'Z' != letter
+ page.forward = ['../atoz/output_' + (letter.ord + 1).chr + '.html', 'Next']
+ end
+ page.output_dir = 'atoz'
+ page.index_file = 'output_' + letter + '.html'
+ page.title = "Authors starting with '" + letter + "'"
+ page.up = ['../atoz/index.html', 'Up']
+
+ page.write_html(book_ids)
+ return book_ids.length
+ end
+
+ def write_dewey
+ book_ids = @store.query_books_by_ddc()
+ puts 'Non-fiction books arranged by Dewey Decimal Classification: ' + book_ids.length.to_s() + ' books.'
+
+ page = Page.new(@store)
+ page.output_dir = 'ddc'
+ page.index_file = 'index.html'
+ page.title = "Non-fiction books arranged by Dewey Decimal call number"
+ page.up = ['../output/index.html', 'Up']
+
+ page.write_html(book_ids)
+ return book_ids.length
+ end
+
+ def write_series_for_age(age)
+ series_infos = []
+
+ series_ids = @store.query_series_by_age(age)
+
+ series_ids.each do |id|
+ series = @store.load_series(id)
+ book_ids = @store.query_books_by_series_id(id)
+ if nil != book_ids and book_ids.length > 0
+ series_infos.push( [series, book_ids] )
+ end
+ end
+
+ for idx in 0 .. (series_infos.length - 1) do
+ #puts series.descr + ': ' + book_ids.length.to_s + ' books.'
+
+ back = nil
+ fwd = nil
+
+ if idx > 0
+ back = series_infos[idx-1]
+ end
+ if (idx + 1) < series_infos.length
+ fwd = series_infos[idx+1]
+ end
+
+ cur = series_infos[idx]
+ series = cur[0]
+ book_ids = cur[1]
+
+ page = Page.new(@store)
+ if nil != back
+ page.back = [back[0].key + '.html', 'Back']
+ end
+ if nil != fwd
+ page.forward = [fwd[0].key + '.html', 'Forward']
+ end
+ page.output_dir = 'series/series_' + age
+ page.index_file = series.key + '.html'
+ page.title = 'Series “' + series.descr + '” (' + book_ids.length.to_s + ' books)'
+ page.up = ['index.html', 'Up']
+
+ page.write_html(book_ids)
+ end
+
+ content = '<h1>“' + age + '” Series</h1>'
+ content += '<p><table><tr><th>Author</th><th>Series</th><th>Genre</th><th>Books</th></tr>'
+ series_infos.each do |cur|
+ series = cur[0]
+ book_ids = cur[1]
+
+ author = series.grouping
+ letter = author[0]
+
+ content += ' <tr>'
+ content += '<td><a href="../../atoz/output_' + letter + '.html">' + author + '</a></td>'
+ content += '<td><a href="' + series.key + '.html">' + series.descr + '</a></td>'
+ content += '<td>' + series.genre + '</td>'
+ content += '<td>' + book_ids.length.to_s + '</td>'
+ content += '</tr>'
+ end
+ content += '</table></p>'
+ page = Page.new(@store)
+ page.output_dir = 'series/series_' + age
+ page.special = content
+ page.up = ['../index.html', 'Up']
+ page.write_html( [] )
+
+ return series_infos.length
+ end
+
+ def write_series_listing
+ ages = ['beginner', 'junior', 'ya', 'adult']
+ series_counts = {}
+
+ ages.each do |age|
+ puts 'Series for "' + age + '" readers...'
+
+ series_counts[age] = write_series_for_age(age)
+ end
+
+ content = '<h1>Browse Books By Series</h1>'
+ content += '<p>'
+ content += '<table><tr><th>Age</th><th>Number of Series</th></tr>'
+ ages.each do |age|
+ content += '<tr><td><a href="series_' + age + '/index.html">' + age + '</a></td><td>' + series_counts[age].to_s + '</td></tr>'
+ end
+ content += '</table></p>'
+ page = Page.new(@store)
+ page.output_dir = 'series'
+ page.special = content
+ page.up = ['../output/index.html', 'Up']
+ page.write_html( [] )
+ end
+end
--- /dev/null
+require 'fileutils'
+
+require_relative 'store'
+
+class Page
+ def initialize(store)
+ @back = nil
+ @forward = nil
+ @index_file = 'index.html'
+ @output_dir = 'output'
+ @special = nil
+ @store = store
+ @title = 'Books'
+ @up = nil
+ end
+
+ def back=(value)
+ @back = value
+ end
+
+ def forward=(value)
+ @forward = value
+ end
+
+ def index_file=(value)
+ @index_file = value
+ end
+
+ def navig_link(data)
+ if (nil == data)
+ return ''
+ end
+ return '<a href="' + data[0] + '">' + data[1] + '</a>'
+ end
+
+ def output_dir=(value)
+ @output_dir = value
+ end
+
+ def special=(value)
+ @special = value
+ end
+
+ def title=(value)
+ @title = value
+ end
+
+ def up=(value)
+ @up = value
+ end
+
+ def write_books(fd, book_ids)
+ for id in book_ids
+ book = @store.load_book(id)
+ image = nil
+ if nil != book.cover
+ #@imageCount += 1
+ #(path, mimeType) = book.cover.write_image(@output_dir, 'image' + @imageCount.to_s)
+ #image = '<img class="cover-thumb" src="' + path + '"/>'
+ path = book.cover.path
+ image = '<img class="cover-thumb" src="' + path + '"/>'
+ else
+ image = '(No cover image)'
+ end
+
+ fd.puts ' <div><table>'
+ fd.puts ' <tr><td><a href="' + book.path + '">' + image + '</a></td>'
+
+ heading = book.heading()
+ description = book.description()
+ if nil != description
+ fd.puts ' <td><span class="popup">' + heading + '<span class="pop-inner"><p>' + heading + '</p><p>' + description + '</p></span></span></td></tr>'
+ else
+ fd.puts ' <td>' + heading + '</td></tr>'
+ end
+
+ fd.puts ' </table></div>'
+ end
+ end
+
+ def write_footer(fd)
+ fd.puts ' <p class="navigator">' + navig_link(@back) + ' ' + navig_link(@up) + ' ' + navig_link(@forward) + '</p>'
+ end
+
+ def write_header(fd)
+ fd.puts ' <h1 class="header">' + @title + '</h1>'
+
+ fd.puts ' <p class="navigator">' + navig_link(@back) + ' ' + navig_link(@up) + ' ' + navig_link(@forward) + '</p>'
+ end
+
+ def write_html(book_ids)
+ @imageCount = 0
+
+ if ! Dir.exist?(@output_dir)
+ FileUtils.mkdir_p(@output_dir)
+ end
+
+ open(@output_dir + '/' + @index_file, 'w') do |fd|
+ fd.puts '<html>'
+ fd.puts ' <head>'
+ fd.puts ' <meta charset="utf-8"/>'
+ fd.puts ' <title>' + @title + '</title>'
+
+ write_style_sheet(fd)
+
+ fd.puts ' </head>'
+ fd.puts ' <body>'
+
+ write_header(fd)
+
+ write_special(fd)
+ write_books(fd, book_ids)
+
+ write_footer(fd)
+
+ fd.puts " </body>"
+ fd.puts "</html>"
+ end
+ end
+
+ def write_special(fd)
+ if (nil != @special)
+ fd.puts(@special)
+ end
+ end
+
+ def write_style_sheet(fd)
+ style =
+<<EOS
+ <style>
+ div {
+ display: inline-block;
+ width: 400px;
+ margin: 10px;
+ border 3px solid #73ad21;
+ }
+ h1.header {
+ background: #4040a0;
+ color: #ffffff;
+ text-align: center;
+ }
+ img.cover-thumb { max-height: 200px; max-width: 200px; }
+ p.navigator { }
+ span.popup { }
+ span.popup:hover { text-decoration: none; background: #cfffff; z-index: 6; }
+ span.popup span.pop-inner {
+ border-color:black;
+ border-style:solid;
+ border-width:1px;
+ display: none;
+ margin: 4px 0 0 0px;
+ padding: 3px 3px 3px 3px;
+ position: absolute;
+ }
+ span.popup:hover span.pop-inner {
+ background: #ffffaf;
+ display: block;
+ margin: 20px 0 0 0px;
+ z-index:6;
+ }
+ </style>
+EOS
+ fd.puts style
+ end
+end
+
--- /dev/null
+
+class Series
+ def initialize(id)
+ @age = nil
+ @genre = nil
+ @grouping = nil
+ @code = nil
+ @descr = nil
+ @id = id
+ end
+
+ def age
+ @age
+ end
+
+ def age=(value)
+ @age = value
+ end
+
+ def code
+ @code
+ end
+
+ def code=(value)
+ @code = value
+ end
+
+ def descr
+ @descr
+ end
+
+ def descr=(value)
+ @descr = value
+ end
+
+ def genre
+ @genre
+ end
+
+ def genre=(value)
+ @genre = value
+ end
+
+ def grouping
+ @grouping
+ end
+
+ def grouping=(value)
+ @grouping = value
+ end
+
+ def id
+ @id
+ end
+
+ def inspect
+ data = []
+ if nil != @age
+ data.push('age="' + @age.inspect + '"')
+ end
+ if nil != @code
+ data.push('code="' + @code.inspect + '"')
+ end
+ if nil != @descr
+ data.push('descr="' + @descr + '"')
+ end
+ if nil != @genre
+ data.push('genre="' + @genre + '"')
+ end
+ if nil != @grouping
+ data.push('grouping="' + @grouping + '"')
+ end
+ return '(Series:' + data.join(',') + ')'
+ end
+
+ def key
+ if nil != grouping and nil != code
+ return grouping.to_s + '_' + code.to_s
+ end
+ return id.to_s
+ end
+
+ def to_s
+ return inspect()
+ end
+end
+
--- /dev/null
+
+require 'csv'
+require 'fileutils'
+require 'inifile'
+require 'pg'
+
+require_relative 'series'
+require_relative 'tconn'
+
+class Store
+ def unclassified_csv
+ @basePath + '/csv/unclassified.csv'
+ end
+
+ def initialize(config_file)
+ @conn = nil
+
+ config = IniFile.load(config_file)
+ if nil == config
+ puts 'FATAL: Failed to load config file "' + config_file + '". Aborting initialization.'
+ return
+ end
+
+ section = config['database']
+ @dbhost = section['host']
+ @dbport = 5432
+ @dbname = section['name']
+ @dbuser = section['user']
+ @dbpass = section['pass']
+
+ section = config['filesystem']
+ @basePath = section['basePath']
+ end
+
+ def connect
+ @conn = TimedConn.new(PG.connect(@dbhost, @dbport, '', '', @dbname, @dbuser, @dbpass))
+ return @conn
+ end
+
+ def disconnect
+ @conn.close()
+ end
+
+ def construct_efs_path(efs_id)
+ id_str = sprintf('%010d', efs_id)
+ path = sprintf('%s/%s/%s/%s', id_str[0,2], id_str[2,2], id_str[4,2], id_str[6,2])
+ name = id_str + '.dat'
+ return path, name
+ end
+
+ def cross_reference_lists
+puts "@@@@@@@@@@@ CROSS-REF START @@@@@@@@@@@"
+ exec_update("TRUNCATE TABLE Lists CASCADE;", [])
+
+ populate_lists_table
+puts "@@@@@@@@@@@ CROSS-REF DONE @@@@@@@@@@@"
+ end
+
+ def create_schema(skip_class)
+ create_authors =
+<<EOS
+ CREATE TABLE Authors (
+ id INTEGER PRIMARY KEY,
+ grouping VARCHAR(64),
+ reading VARCHAR(256),
+ sort VARCHAR(256)
+ );
+EOS
+
+ create_books =
+<<EOS
+ CREATE TABLE Books (
+ id INTEGER PRIMARY KEY,
+ arrived TIMESTAMP,
+ author INTEGER REFERENCES Authors(id),
+ classification INTEGER REFERENCES Classifications(id),
+ cover INTEGER,
+ language VARCHAR(64),
+ description TEXT,
+ path VARCHAR(256),
+ series INTEGER REFERENCES Series(id),
+ title VARCHAR(256),
+ volume VARCHAR(16)
+ );
+EOS
+
+ create_classification =
+<<EOS
+ CREATE TABLE Classifications (
+ id INTEGER PRIMARY KEY,
+ ddc VARCHAR(32),
+ lcc VARCHAR(32),
+ author_grouping VARCHAR(64),
+ author_sort VARCHAR(128),
+ title_grouping VARCHAR(256),
+ title VARCHAR(256)
+ );
+EOS
+
+ create_efs =
+<<EOS
+ CREATE TABLE EFS (
+ id INTEGER PRIMARY KEY,
+ mimetype VARCHAR(64)
+ );
+EOS
+
+ create_fast =
+<<EOS
+ CREATE TABLE FAST (
+ id VARCHAR(32) PRIMARY KEY,
+ descr VARCHAR(128)
+ );
+EOS
+
+ # Associative entity, linking FAST and Classifications tables
+ # in a 0..n to 0..m relationship
+ create_fast_classifications =
+<<EOS
+ CREATE TABLE FAST_Classifications (
+ fast VARCHAR(32) REFERENCES FAST(id),
+ classification INTEGER REFERENCES Classifications(id)
+ );
+EOS
+
+ create_lists =
+<<EOS
+ CREATE TABLE Lists (
+ id INTEGER PRIMARY KEY,
+ age VARCHAR(32),
+ category VARCHAR(32),
+ code VARCHAR(2),
+ year INTEGER,
+ author INTEGER REFERENCES Authors(id),
+ title VARCHAR(256)
+ );
+EOS
+
+ # Associative entity, linking Lists and Books tables
+ # in a 0..n to 0..m relationship
+ create_lists_books =
+<<EOS
+ CREATE TABLE Lists_Books (
+ list INTEGER REFERENCES Lists(id),
+ book INTEGER REFERENCES Books(id)
+ );
+EOS
+
+ create_series =
+<<EOS
+ CREATE TABLE Series (
+ id INTEGER PRIMARY KEY,
+ age VARCHAR(32),
+ genre VARCHAR(32),
+ grouping VARCHAR(64),
+ code VARCHAR(16),
+ descr VARCHAR(128)
+ )
+EOS
+
+ stmts = [
+ create_authors,
+ create_classification,
+ create_efs,
+ create_fast,
+ create_series,
+ create_books,
+ create_fast_classifications,
+ create_lists,
+ create_lists_books,
+ 'CREATE SEQUENCE author_id;',
+ 'CREATE SEQUENCE book_id;',
+ 'CREATE SEQUENCE classification_id;',
+ 'CREATE SEQUENCE efs_id;',
+ 'CREATE SEQUENCE list_id;',
+ 'CREATE SEQUENCE series_id;'
+ ]
+
+ for stmt in stmts
+ @conn.exec(stmt)
+ end
+
+ if skip_class == false
+ populate_fast_table
+ populate_classifications_table
+ end
+
+ populate_series_table
+ end
+
+ def dropSchema
+ stmts = [
+ 'DROP TABLE Lists_Books;',
+ 'DROP TABLE Lists;',
+ 'DROP TABLE Books;',
+ 'DROP TABLE FAST_Classifications;',
+ 'DROP TABLE Authors;',
+ 'DROP TABLE Classifications;',
+ 'DROP TABLE EFS;',
+ 'DROP TABLE FAST;',
+ 'DROP TABLE Series;',
+ 'DROP SEQUENCE author_id;',
+ 'DROP SEQUENCE book_id;',
+ 'DROP SEQUENCE classification_id;',
+ 'DROP SEQUENCE efs_id;',
+ 'DROP SEQUENCE list_id;',
+ 'DROP SEQUENCE series_id;'
+ ]
+
+ for stmt in stmts do
+ begin
+ @conn.exec(stmt)
+ rescue Exception => exc
+ puts 'WARNING: "' + stmt + '" failed: ' + exc.to_s
+ end
+ end
+ end
+
+ def find_all_authors(author_name)
+ result = []
+
+ sqlSelect = "SELECT id FROM Authors WHERE grouping=$1;"
+ args = [author_name]
+
+ @conn.exec_params(sqlSelect, args) do |rs|
+ rs.each do |row|
+ result << row['id']
+ end
+ end
+
+ result
+ end
+
+ def find_author(author)
+ sqlSelect = "SELECT id FROM Authors WHERE grouping=$1 AND reading=$2 AND sort=$3;"
+ args = [author.grouping, author.reading_order, author.sort_order]
+
+ @conn.exec_params(sqlSelect, args) do |rs|
+ if rs.ntuples > 0
+ return rs[0]['id']
+ end
+ end
+
+ return nil
+ end
+
+ def init_db(skip_class)
+ sql = "SELECT 1 FROM pg_tables WHERE tableowner='quanlib' AND tablename='books'"
+ found = false
+ @conn.exec(sql).each do |row|
+ found = true
+ end
+
+ if ! found
+ create_schema(skip_class)
+ end
+ end
+
+ def load_author(id)
+ sqlSelect = "SELECT grouping, reading, sort FROM Authors WHERE id=$1"
+ args = [id]
+ @conn.exec_params(sqlSelect, args) do |rs|
+ if rs.ntuples != 1
+ raise "Expected 1 row for " + id + " but got " + rs.ntuples + ": " + sqlSelect
+ end
+ row = rs[0]
+ author = Author.new(row['grouping'], row['reading'], row['sort'])
+ return author
+ end
+ return nil
+ end
+
+ def store_author(author)
+ id = find_author(author)
+ if nil == id
+ id = next_id('author_id')
+ sqlInsert = "INSERT INTO Authors(id, grouping, reading, sort) VALUES ($1, $2, $3, $4);"
+ args = [id, author.grouping, author.reading_order, author.sort_order]
+ begin
+ rs = @conn.exec_params(sqlInsert, args)
+ rescue Exception => e
+ puts sqlInsert + ": " + args.inspect()
+ puts e.message
+ puts $@
+ ensure
+ rs.clear if rs
+ end
+ end
+ return id
+ end
+
+ def load_book(id)
+ sql = "SELECT author, classification, cover, description, language, path, series, title, volume FROM Books WHERE id=$1;"
+ book = nil
+
+ begin
+ @conn.exec_params(sql, [id]) do |rs|
+ if 1 != rs.ntuples
+ raise 'Expected one row in Books for id ' + id + ', but found ' + rs.length + '.'
+ return nil
+ end
+ row = rs[0]
+
+ book = Book.new(self)
+ book.author = load_author(row['author'])
+ book.classification_id = row['classification']
+ book.cover = load_cover(row['cover'])
+ book.description = row['description']
+ book.language = row['language']
+ book.path = row['path']
+ book.series_id = row['series']
+ book.title = row['title']
+ book.volume = row['volume']
+ end
+ rescue Exception => e
+ puts sql + ": " + id
+ puts e.message
+ puts $@
+ end
+
+ return book
+ end
+
+ def store_book(book)
+ sql = "INSERT INTO Books (id, arrived, author, classification, cover, description, language, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11);"
+
+ book_id = next_id('book_id')
+
+ author_id = store_author(book.author)
+ (efs_id, mime_type) = store_cover(book)
+
+ args = [book_id, book.arrived, author_id, book.classification_id, efs_id, book.description, book.language, book.path, book.series_id, book.title, book.volume]
+
+ begin
+ rs = @conn.exec_params(sql, args)
+ rescue Exception => e
+ puts sql + ": " + args.inspect()
+ puts e.message
+ puts $@
+ ensure
+ rs.clear if rs
+ end
+
+ return book_id
+ end
+
+ def find_classification(author_grouping, title_grouping)
+ sql = "SELECT id FROM Classifications WHERE author_grouping = $1 AND title_grouping = $2;"
+ @conn.exec_params(sql, [author_grouping, title_grouping]) do |rs|
+ if rs.ntuples > 0
+ return rs[0]['id']
+ end
+ end
+ return nil
+ end
+
+ def load_classification(id)
+ sql = "SELECT ddc, lcc, author_grouping, author_sort, title_grouping, title "
+ sql += " FROM Classifications WHERE id=$1"
+ @conn.exec_params(sql, [id]) do |rs|
+ if rs.ntuples > 0
+ row = rs[0]
+ ddc = row['ddc']
+ lcc = row['lcc']
+ author_grouping = row['author_grouping']
+ author = row['author_sort']
+ title_grouping = row['title_grouping']
+ title = row['title']
+
+ result = Classification.new(ddc, lcc, author_grouping, author, title_grouping, title)
+ result.id = id
+ return result
+ end
+ end
+
+ return nil
+ end
+
+ def load_cover(id)
+ if nil == id
+ return nil
+ end
+
+ mime_type = 'application/octet-stream'
+
+ sql = "SELECT mimeType FROM Efs WHERE id=$1"
+ @conn.exec_params(sql, [id]) do |rs|
+ if rs.ntuples != 1
+ raise "Expected one row but got " + rs.ntuples + ": " + sql + ": " + id
+ end
+ mime_type = rs[0]['mimeType']
+ end
+
+ (efspath, efsname) = construct_efs_path(id)
+
+ fullpath = @basePath + '/efs/' + efspath + '/' + efsname
+
+ return Cover.new(nil, fullpath, mime_type)
+ end
+
+ def store_cover(book)
+ efs_id = nil
+ cover = book.cover()
+
+ if nil == cover
+ return nil
+ end
+
+ @conn.exec("SELECT nextval('efs_id')") do |rs|
+ efs_id = rs[0]['nextval']
+ end
+
+ if nil == efs_id
+ return nil
+ end
+
+ (efspath, efsname) = construct_efs_path(efs_id)
+
+ efspath = @basePath + '/efs/' + efspath
+
+ FileUtils.mkdir_p(efspath)
+
+ (filepath, mimetype) = cover.write_image(efspath, efsname)
+
+ sql = "INSERT INTO efs VALUES ($1, $2)"
+ begin
+ rs = @conn.exec_params(sql, [efs_id, mimetype])
+ rescue Exception => e
+ puts sql + ": " + efs_id + ", " + mimetype
+ puts e.message
+ puts $@
+ ensure
+ rs.clear if rs
+ end
+
+ return efs_id, mimetype
+ end
+
+ def exec_id_query(sql, args)
+ ids = []
+ @conn.exec_params(sql, args) do |rs|
+ rs.each do |row|
+ ids.push(row['id'])
+ end
+ end
+ return ids
+ end
+
+ def exec_update(sql, args)
+ begin
+ rs = @conn.exec_params(sql, args)
+ rescue Exception => e
+ puts sql + ": " + args.inspect()
+ puts e.message
+ puts $@
+ ensure
+ rs.clear if rs
+ end
+ end
+
+ def next_id(seq_name)
+ id = nil
+ @conn.exec("SELECT nextval('" + seq_name + "');") do |rs|
+ id = rs[0]['nextval']
+ end
+ return id
+ end
+
+ def get_series(grouping, code)
+ if nil == code
+ return nil
+ end
+
+ sql = "SELECT id FROM Series WHERE grouping=$1 AND code=$2;"
+ args = [grouping, code]
+ @conn.exec_params(sql, args).each do |row|
+ return row['id']
+ end
+
+ # TODO: Create a new series object here?
+ puts 'WARNING: series("' + grouping + '", "' + code + '") not found.'
+ return nil
+ end
+
+ def load_series(id)
+ sql = "SELECT age,genre,grouping,code,descr FROM Series WHERE id=$1;"
+ args = [id]
+ @conn.exec_params(sql, args) do |rs|
+ if rs.ntuples > 0
+ row = rs[0]
+ series = Series.new(id)
+ series.age = row['age']
+ series.genre = row['genre']
+ series.grouping = row['grouping']
+ series.code = row['code']
+ series.descr = row['descr']
+ return series
+ end
+ end
+ return nil
+ end
+
+ def populate_classifications_table
+ puts "Populating the Classifications table..."
+ first = true
+ CSV.foreach(@basePath + '/csv/class.csv') do |row|
+ if first
+ # skip the header row
+ first = false
+ else
+
+ # First, add a row to the Classifications table
+
+ id = next_id('classification_id')
+ ddc = row[0]
+ lcc = row[1]
+ author_grouping = row[2]
+ author_sort = row[3]
+ title_grouping = row[4]
+ title = row[5]
+
+ sqlInsert = "INSERT INTO Classifications (id, ddc, lcc, author_grouping, author_sort, title_grouping, title) VALUES ($1, $2, $3, $4, $5, $6, $7);"
+ args = [id, ddc, lcc, author_grouping, author_sort, title_grouping, title]
+ exec_update(sqlInsert, args)
+
+ # Second, link up with the appropriate FAST table entries
+
+ fast = []
+ input = row[6]
+ if input.length > 0
+ fast = input.split(';')
+ end
+
+ fast.each do |fast_id|
+ sqlInsert = "INSERT INTO FAST_Classifications (fast, classification) VALUES ($1, $2);"
+ args = [fast_id, id]
+ exec_update(sqlInsert, args)
+ end
+ end
+ end
+ end
+
+ def populate_fast_table
+ puts "Populating the FAST table..."
+ first = true
+ CSV.foreach(@basePath + '/csv/fast.csv') do |row|
+ if first
+ first = false # skip the header row
+ else
+ id = row[0]
+ descr = row[1]
+ sqlInsert = "INSERT INTO FAST (id, descr) VALUES ($1, $2);"
+ exec_update(sqlInsert, [id, descr])
+ end
+ end
+ end
+
+ def populate_lists_table
+ puts "Populating the Lists table..."
+
+ CSV.foreach(@basePath + "/csv/lists.csv", headers: true) do |row|
+ author_ids = find_all_authors(row['author'])
+ if author_ids.empty?
+ specification = [row['age'], row['category'], row['code'], row['year'], row['author'], row['title']]
+ .map { |x| x.inspect }
+ .join(', ')
+
+ puts "WARNING: For list entry (#{specification}), no such author was found."
+
+ next
+ end
+
+ sqlInsert = %Q(
+ INSERT INTO Lists (id, age, category, code, year, author, title)
+ VALUES ($1, $2, $3, $4, $5, $6, $7);
+ )
+ author_ids.each do |author_id|
+ list_id = next_id('list_id')
+ args = [list_id, row['age'], row['category'], row['code'], row['year'], author_id, row['title']]
+ exec_update(sqlInsert, args)
+
+ update_lists_books_table(list_id, author_id, row['title'])
+ end
+ end
+ end
+
+ # Scan for books that match this Lists entry, and add any matches to the Lists_Books associative table
+ def update_lists_books_table(list_id, author_id, title)
+ title_pattern = Book.grouping_for_title(title).gsub('_', '%')
+ sqlSelect = "SELECT id FROM Books WHERE author = $1 AND title LIKE $2;"
+ args = [author_id, title_pattern]
+
+ @conn.exec_params(sqlSelect, args) do |rs|
+ rs.each do |row|
+ sqlInsert = "INSERT INTO Lists_Books (list, book) VALUES ($1, $2)"
+ args = [list_id, row['id']]
+ exec_update(sqlInsert, args)
+ end
+ end
+ end
+
+ def populate_series_table
+ puts "Populating the Series table..."
+ CSV.foreach(@basePath + '/csv/series.csv') do |row|
+ id = next_id('series_id')
+ sqlInsert = "INSERT INTO Series (id, age, genre, grouping, code, descr) VALUES ($1, $2, $3, $4, $5, $6);"
+ args = [id] + row
+ exec_update(sqlInsert, args)
+ end
+ end
+
+ def query_books_by_author(pattern)
+ sql =
+<<EOS
+ SELECT b.id FROM Authors a
+ INNER JOIN Books b ON b.author=a.id
+ LEFT OUTER JOIN Series s on s.id=b.series
+ WHERE upper(a.grouping) LIKE $1
+ ORDER BY a.grouping, b.series, b.volume, b.title
+EOS
+ return exec_id_query(sql, [pattern])
+ end
+
+ def query_books_by_ddc
+ sql =
+<<EOS
+ SELECT b.id FROM Classifications c
+ INNER JOIN Books b ON b.classification=c.id
+ ORDER BY c.ddc
+EOS
+ return exec_id_query(sql, [])
+ end
+
+ def query_books_by_series_id(id)
+ sql =
+<<EOS
+ SELECT b.id FROM Books b
+ WHERE b.series = $1
+ ORDER BY b.volume,b.title
+EOS
+ return exec_id_query(sql, [id])
+ end
+
+ def query_series_by_age(pattern)
+ sql =
+<<EOS
+ SELECT s.id
+ FROM Series s
+ WHERE s.age LIKE $1
+ ORDER BY s.grouping,s.descr
+EOS
+ return exec_id_query(sql, [pattern])
+ end
+end
+
--- /dev/null
+# tconn.rb
+#
+# Timed Connection:
+# Wrapper around a PG Connection that provides a report on where time was spent executing SQL
+#
+
+require 'pg'
+
+class TimedConn
+ def initialize(wrapped_conn)
+ @conn = wrapped_conn
+ @stmts = {}
+ @total_time = 0
+ end
+
+ def close
+ @conn.close()
+ puts "Connection closing. Total SQL time: " + @total_time.to_s + " secs"
+ @stmts.each do |sql, info|
+ elapsed = info[2]
+ calls = info[1]
+ puts elapsed.to_s + " secs: " + calls.to_s + " times: " + sql
+ end
+ end
+
+ def exec(*args, &block)
+ before = Time.now
+ #puts args.inspect
+ result = @conn.exec(*args)
+ #puts result.inspect
+ after = Time.now
+ elapsed = (after - before)
+ remember(args[0], elapsed)
+ @total_time += elapsed
+ if block_given?
+ yield(result)
+ else
+ return result
+ end
+ end
+
+ def exec_params(*args, &block)
+ before = Time.now
+ #puts args.inspect
+ result = @conn.exec_params(*args)
+ #puts result.inspect
+ after = Time.now
+ elapsed = (after - before)
+ remember(args[0], elapsed)
+ @total_time += elapsed
+ if block_given?
+ yield(result)
+ else
+ return result
+ end
+ end
+
+ def remember(sql, elapsed)
+ if @stmts.has_key?(sql)
+ stmt = @stmts[sql]
+ else
+ stmt = [sql, 0, 0]
+ end
+
+ stmt[1] += 1 # Number of times this statement has been invoked
+ stmt[2] += elapsed # total elapsed time spent on this statement
+
+ @stmts[sql] = stmt
+ end
+end
+
--- /dev/null
+# Walk the directory (and subdirectories), identifying books.
+#
+# Expected format:
+# .../AuthorName/Title_of_the_Awesome_Book.ext
+#
+# Author is given as FirstLast. For example,
+# Robert Anson Heinlein is RobertHeinlein, and
+# JKRowling is JoanneRowling.
+#
+# Book titles have spaces replaced with underscores,
+# and punctuation [,!?'] replaced with hyphens.
+#
+# If the book forms part of a series, then an all-capitals
+# series designator, followed by a numeric volume number,
+# followed by an underscore, is prefixed to the name.
+# For example, Hardy Boys' volume 1, The Tower Treasure,
+# is rendered as .../FranklinDixon/HB001_The_Tower_Treasure.epub
+# and Mrs. Pollifax volume 6, On the China Station, is
+# .../DorothyGilman/P06_On_the_China_Station.epub.
+
+require_relative 'book'
+require_relative 'book_loader'
+require_relative 'store'
+
+class WalkDir
+ def initialize(config_file, root)
+ @queue = Queue.new
+ @root = root
+ @config_file = config_file
+ @threads = []
+
+ @files = walk(@root)
+ end
+
+ def books
+ @threads = []
+ num_threads.times do
+ @threads << Thread.new do
+ BookLoader.new(@config_file, @queue).run
+ end
+ end
+
+ result = []
+ @files = remove_duplicates(@files)
+ for file in @files.sort()
+ if Book.can_handle?(file) && (!is_duplicate?(file))
+ # Queue this book to be loaded and added to the DB by a BookLoader thread
+ @queue << file
+ end
+ end
+
+ @threads.count.times { @queue << BookLoader::DONE_MARKER }
+
+ @threads.each { |t| t.join }
+ end
+
+ # Duplicate versions of a text are named
+ # xxx_suffix.ext
+ # Where suffix is one of bis, ter, quater, quinquies
+ # for the 2nd, 3rd, 4th or 5th variant respectively.
+ def is_duplicate?(file)
+ s = file.to_s
+ suffix = ['_bis.', '_ter.', '_quater.', '_quinquies.']
+ suffix.each do |pat|
+ if s.include?(pat)
+ return true
+ end
+ end
+
+ return false
+ end
+
+ def remove_duplicates(files)
+ unique = {}
+ for file in files
+ if Book.can_handle?(file)
+ key = File.dirname(file) + '/' + File.basename(file, '.*')
+ if unique.has_key?(key)
+ new_ext = File.extname(file)
+ old_ext = File.extname(unique[key])
+ if ('.pdf' == old_ext) && ('.epub' == new_ext)
+ # Prefer EPUB over PDF
+ puts 'REPLACED ' + unique[key].to_s + ' with ' + file.to_s
+ unique[key] = file
+ else
+ puts 'DROPPED ' + file.to_s + " because it's superceded by " + unique[key].to_s
+ end
+ else
+ unique[key] = file
+ end
+ end
+ end
+
+ return unique.values
+ end
+
+ def walk(path)
+ result = []
+ children = Dir.entries(path)
+ for child in children
+ fullName = (path.chomp("/")) + "/" + child
+ if (File.directory?(fullName)) and (child != ".") and (child != "..") and (!File.symlink?(fullName))
+ sub = walk(fullName)
+ if (sub != nil) and (sub.length > 0)
+ result.concat(sub)
+ end
+ elsif (! File.directory?(fullName))
+ result.push(fullName)
+ end
+ end
+ return result
+ end
+
+ def num_threads
+ # TOOD: make this (auto?) configurable
+ 12
+ end
+end
+++ /dev/null
-
-class Author
- def initialize(grouping, reading_order, sort_order)
- @grouping = grouping
- @reading_order = reading_order
- @sort_order = sort_order
-
- if (nil == sort_order) || ('Unknown' == sort_order)
- @sort_order = reading_to_sort_order(reading_order)
- end
- end
-
- def grouping
- @grouping
- end
-
- def reading_order
- @reading_order
- end
-
- def sort_order
- @sort_order
- end
-
- def inspect
- result = '(Author:'
- if nil != @grouping
- result += ' grouping="' + @grouping + '"'
- end
- if nil != @reading_order
- result += ' reading_order="' + @reading_order + '"'
- end
- if nil != @sort_order
- result += ' sort_order="' + @sort_order + '"'
- end
- result += ')'
-
- return result
- end
-
- def to_s
- inspect
- end
-
- protected
- def reading_to_sort_order(reading_order)
- sort_order = reading_order
-
- parts = reading_order.split(' ')
- if parts.length > 1
- sort_order = parts[-1] + ', ' + parts[0..-2].join(' ')
- end
-
- return sort_order
- end
-end
-
+++ /dev/null
-
-require 'nokogiri'
-require 'rubygems'
-require 'zip'
-
-require_relative 'author'
-require_relative 'classification'
-require_relative 'cover'
-require_relative 'store'
-
-class Book
- @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
- @@SERIES_AND_VOLUME_REGEX = /^([A-Z]+)([0-9]+(\.[0-9]+)?)$/
-
- attr_accessor :arrived
- attr_accessor :author
- attr_accessor :classification_id
- attr_accessor :cover
- attr_accessor :description
- attr_accessor :language
- attr_accessor :path
- attr_accessor :series_id
- attr_accessor :title
- attr_accessor :volume
-
- def initialize(store)
- @store = store
- end
-
- def load_from_file!(fileName)
- @path = fileName
- parse_file_name!(fileName)
- end
-
- def self.can_handle?(fileName)
- if nil == fileName
- return false
- end
-
- #puts "Filename: " + fileName.to_s
- lowerName = fileName.downcase()
-
- if lowerName.end_with?(".epub")
- return true
- end
-
- if lowerName.end_with?(".pdf")
- return true
- end
-
- return false
- end
-
- def self.grouping_for_title(title)
- result = title
-
- '\'",!#'.split('').each do |c|
- result = result.gsub(c, '-')
- end
- result = result.gsub(/: */, '--')
- result = result.gsub(' ', '_')
-
- result
- end
-
- def heading
- result = []
-
- if nil != @title
- result.push('<b>' + @title + '</b>')
- else
- result.push('<i>(Unknown title)</i>')
- end
- if nil != @author
- result.push('<i>by ' + @author.reading_order + '</i>')
- end
-
- seriesInfo = []
- series = @store.load_series(@series_id)
- if nil != series and nil != series.descr
- seriesInfo.push(series.descr.to_s)
- end
- if nil != @volume
- seriesInfo.push(@volume.to_s)
- end
- if seriesInfo.length > 0
- result.push(seriesInfo.join(' '))
- end
-
- classification = nil
- if nil != @classification_id
- classification = @store.load_classification(@classification_id)
- end
- if nil != classification
- if nil != classification.ddc
- result.push('Dewey: ' + classification.ddc.to_s)
- end
- if nil != classification.lcc
- result.push('LCC: ' + classification.lcc.to_s)
- end
- end
-
- return result.join('<br/>')
- end
-
- def inspect
- data = []
- if nil != @author
- data.push('author="' + @author.inspect + '"')
- end
- if nil != @series_id
- data.push('series_id="' + @series_id.to_s() + '"')
- end
- if nil != @volume
- data.push('volume="' + @volume + '"')
- end
- if nil != @title
- data.push('title="' + @title + '"')
- end
- if nil != @cover
- data.push(@cover.inspect())
- end
- if nil != @path
- data.push('path="' + @path + '"')
- end
- return '(Book:' + data.join(',') + ')'
- end
-
- def to_s
- return inspect()
- end
-
- def title_grouping
- if nil == @path
- return nil
- end
-
- return File.basename(@path, '.*')
- end
-
- protected
- def isUpper?(c)
- return /[[:upper:]]/.match(c)
- end
-
- protected
- def massage_author(input)
- if nil == input
- return nil
- end
-
- reading_order = ""
- input.each_char do |c|
- if isUpper?(c) and (reading_order.length > 0)
- reading_order += " "
- end
- reading_order += c
- end
-
- return reading_order
- end
-
- # Returns (series, volumeNo, titleText)
- protected
- def processTitle(input)
- if nil == input
- return nil
- end
-
- arr = input.split('_')
-
- series = nil
- vol = nil
-
- first = arr[0]
- matchData = (arr[0]).match(@@SERIES_AND_VOLUME_REGEX)
- if nil != matchData
- capt = matchData.captures
- series = capt[0]
- vol = capt[1]
- arr.shift
- end
-
- pos = arr[-1].rindex('.')
- if nil != pos
- arr[-1] = arr[-1].slice(0, pos)
- end
-
- title = arr.join(' ')
-
- bare_title_grouping = title_grouping
- .split('_')
- .reject { |part| part.match(@@SERIES_AND_VOLUME_REGEX) }
- .join('_')
-
- unless bare_title_grouping == Book.grouping_for_title(title)
- puts "WARNING: title_grouping mismatch: #{bare_title_grouping.inspect} vs. #{Book.grouping_for_title(title).inspect}"
- end
-
- return series, vol, title
- end
-
- protected
- def parse_file_name!(file_name)
- category = nil # e.g., non-fiction, fan-fiction
- grouping = ''
-
- parts = file_name.split('/')
- (series_code, @volume, @title) = processTitle(parts[-1])
- if parts.length > 1
- grouping = parts[-2]
- reading_order = massage_author(grouping)
- sort_order = nil
- @author = Author.new(grouping, reading_order, sort_order)
- @series_id = @store.get_series(grouping, series_code)
- end
- if parts.length > 2
- category = parts[-3]
- end
-
- lc_file_name = file_name.downcase
- if lc_file_name.end_with?(".epub")
- scanEpub!(file_name)
- elsif lc_file_name.end_with?(".pdf")
- scan_pdf!(file_name)
- end
-
- @arrived = File.ctime(file_name)
-
- @classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*'))
-
- # TODO: Fix horrible hard-coded strings and paths
- if ('01_nonfic' == category) && (nil == classification_id)
- open(Store.unclassified_csv, 'a') do |fd|
- fd.puts('"' + grouping.to_s + '","' + path + '"')
- end
- end
- end
-
- protected
- def scanEpub!(fileName)
- #puts 'Scanning "' + fileName.to_s + '"...'
- begin
- Zip.warn_invalid_date = false
- Zip::File.open(fileName) do |zipfile|
- entry = zipfile.find_entry('META-INF/container.xml')
- if nil == entry
- puts 'No META-INF/container.xml, skipping book ' + fileName
- return
- end
- contXml = zipfile.read('META-INF/container.xml')
- contDoc = Nokogiri::XML(contXml)
- opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
-
- scanOpf!(zipfile, opfPath)
- end
- rescue Zip::Error => exc
- puts 'ERROR processing file "' + fileName + '":'
- puts exc.message
- puts exc.backtrace
- end
- end
-
- protected
- def scan_pdf!(file_name)
- #puts 'Scanning "' + file_name.to_s + '"...'
-
- pdf_path = File.expand_path(file_name).to_s
- if ! pdf_path.end_with?('.pdf')
- puts 'Unexpected internal error: path "' + file_name.to_s + '" does not end with ".pdf".'
- return
- end
-
- jpeg_path = pdf_path[0..-5] + '.jpeg'
- if File.file?(jpeg_path)
- File.open(jpeg_path, 'r') do |is|
- @cover = Cover.new(is, jpeg_path, 'image/jpeg')
- end
- end
- end
-
-
- protected
- def scanOpf!(zipfile, opfPath)
- coverId = nil
-
- opfXml = zipfile.read(opfPath)
- opfDoc = Nokogiri::XML(opfXml)
-
- #-------
- # Author
-
- grouping = @author.grouping
- reading_order = @author.reading_order
- sort_order = @author.sort_order
-
- creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
- if (creators.length > 0)
- creator = creators[0]
- if nil != creator
- role = creator['opf:role']
- if 'aut' == role
- reading_order = creator.content
-
- file_as = creator['opf:file-as']
- if nil != file_as
- sort_order = file_as
- end
- end
-
- @author = Author.new(grouping, reading_order, sort_order)
- end
- end
-
- #---------------------------------------
- # Title
-
- titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
- if titles.length > 0
- title = titles[0]
- if nil != title
- @title = title.content
- end
- end
-
- #---------------------------------------
- # Description
-
- descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
- if (descrNodes.length > 0)
- descrNode = descrNodes[0]
- if nil != descrNode
- @description = descrNode.content
- end
- end
-
- #---------------------------------------
- # Language
-
- langNodes = opfDoc.css('dc|language', 'dc' => @@DC_NS_URL)
- if (langNodes.length > 0)
- langNode = langNodes[0]
- if langNode
- @language = langNode.content
- end
- end
-
- #---------------------------------------
- # Other metadata: series, volume, cover
-
- metas = opfDoc.css('package metadata meta')
- for m in metas
- name = m['name']
- content = m['content']
-
- if 'calibre:series' == name
- # TODO: Dynamically create a new series?
- # @series_id = content
- elsif 'calibre:series-index' == name
- @volume = content
- elsif 'cover' == name
- coverId = content
- #puts 'File ' + @path + ' coverId ' + coverId
- end
- end
-
- #---------------
- # Load the cover
-
- @cover = load_cover(zipfile, opfPath, opfDoc, coverId)
- end
-
- protected
- def load_cover(zipfile, opfPath, opfDoc, coverId)
- coverFile = nil
- if nil == coverId
- coverId = "cover-image"
- end
-
- items = opfDoc.css('package manifest item')
- for i in items
- href = i['href']
- id = i['id']
- mimeType = i['media-type']
-
- if coverId == id
- entry = zipfile.find_entry(href)
-
- if nil == entry
- # Although the epub standard requires the path to be relative
- # to the base of the epub (zip), some books encountered in the
- # wild have been found to use a bath relative to the location
- # of the opf file.
- parts = opfPath.split('/')
- opfBasePath = opfPath.split('/')[0..-2].join('/')
- coverPath = opfBasePath + '/' + href
- entry = zipfile.find_entry(coverPath)
- end
-
- unless entry
- # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg'
- if href.start_with? '../'
- coverPath = href[3..-1]
- entry = zipfile.find_entry(coverPath)
- end
- end
-
- if nil == entry
- puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".'
- return nil
- else
- entry.get_input_stream() do |is|
- return Cover.new(is, href, mimeType)
- end
- end
- end
- end
- return nil
- end
-end
-
+++ /dev/null
-
-require_relative 'book'
-require_relative 'store'
-
-class BookLoader
- DONE_MARKER = '<END>'
-
- def initialize(config_file, queue)
- @config_file = config_file
- @queue = queue
- end
-
- def run
- @store = Store.new(@config_file)
- @store.connect()
-
- file = @queue.pop
- until file == DONE_MARKER do
- book = Book.new(@store)
- book.load_from_file!(file)
- @store.store_book(book)
-
- file = @queue.pop
- end
-
- @store.disconnect()
- end
-end
+++ /dev/null
-
-class Classification
- def initialize(ddc, lcc, author_grouping, author, title_grouping, title)
- @id = nil
- @ddc = ddc
- @lcc = lcc
- @author_grouping = author_grouping
- @author = author
- @title_grouping = title_grouping
- @title = title
- end
-
- def id
- @id
- end
- def id=(value)
- @id = value
- end
-
- def ddc
- @ddc
- end
- def lcc
- @lcc
- end
- def author_grouping
- @author_grouping
- end
- def author
- @author
- end
- def
-
- def inspect
- data = []
-
- if nil != @ddc
- data.push('Dewey=' + @ddc.to_s)
- end
- if nil != @lcc
- data.push('LCC=' + @lcc.to_s)
- end
- if nil != @author_grouping
- data.push('author_grouping=' + @author_grouping.to_s)
- end
- if nil != @author
- data.push('author=' + @author.to_s)
- end
- if nil != @title_grouping
- data.push('title_grouping=' + @title_grouping.to_s)
- end
- if nil != @title
- data.push('title=' + @title)
- end
-
- return '(Classification:' + data.join(',') + ')'
- end
-
- def to_s
- inspect
- end
-
- protected
- def reading_to_sort_order(reading_order)
- sort_order = reading_order
-
- parts = reading_order.split(' ')
- if parts.length > 1
- sort_order = parts[-1] + ', ' + parts[0..-2].join(' ')
- end
-
- return sort_order
- end
-end
-
+++ /dev/null
-
-class Cover
- attr_reader :path
-
- def initialize(inputStream, path, mimeType)
- if nil != inputStream
- @data = inputStream.read
- else
- @data = nil
- end
- @path = path
- @mimeType = mimeType
- end
-
- def inspect
- info = []
- if nil != @data
- info.push('size=' + @data.length.to_s)
- else
- info.push('empty')
- end
- if nil != @path
- info.push('path="' + @path + '"')
- end
- if nil != @mimeType
- info.push('mimeType="' + @mimeType + '"')
- end
- return '(Cover:' + info.join(',') + ')'
- end
-
- def read_image(filename)
- open(filename, 'rb') do |fd|
- @data = fd.read()
- end
- end
-
- def to_s
- return inspect
- end
-
- def write_image(outputDir, filename)
- open(outputDir + '/' + filename, 'wb') do |fd|
- fd.write(@data)
- end
- return filename, @mimeType
- end
-
- protected
- def getExt
- pos = @path.rindex('.')
- if nil == pos
- return '.img'
- end
- return @path.slice(pos, @path.length)
- end
-end
-
+++ /dev/null
-require 'find'
-require 'pathname'
-
-def exec(cmdline)
- puts "$ #{cmdline}"
- result = system(cmdline)
- unless result
- puts "FAILED: #{cmdline}"
- end
- result
-end
-
-def extract_epub(source_file, source_path, dest_path)
- relative_path = source_file[source_path.length .. source_file.length]
- dest_file = "#{dest_path}/#{relative_path}"
- dest_file = dest_file[0 .. (dest_file.length - 6)] + ".txt"
-
- required_path = Pathname(dest_file).dirname
- unless File.directory? required_path
- unless exec("mkdir -p #{required_path}")
- return false
- end
- end
-
- if File.exist? dest_file
- source_time = File.mtime source_file
- dest_time = File.mtime dest_file
- comp = dest_time <=> source_time
- if comp > 0
- return true # Nothing to do, extraction is already up-to-date
- end
- end
-
- exec("ebook-convert #{source_file} #{dest_file}")
-end
-
-def scan_dir(source_path, dest_path)
- Find.find(source_path) do |f|
- if f.match(/.epub\Z/)
- unless (f.match(/_bis.epub\Z/) || f.match(/_ter.epub\Z/) || f.match(/_quater.epub\Z/))
- extract_epub(f, source_path, dest_path)
- end
- end
- end
-end
-
-dest_path = ARGV[0]
-for arg in ARGV[1 .. ARGV.length]
- scan_dir(arg, dest_path)
-end
+++ /dev/null
-require_relative 'navigator'
-require_relative 'page'
-require_relative 'store'
-require_relative 'walk_dir'
-
-@outputDir = 'output'
-
-@config_file = 'quanlib.ini'
-@skip_class = false
-
-def handleArg(arg)
- if arg.start_with?("--config=")
- @config_file = arg[9..-1]
- puts 'Using config file "' + @config_file + '".'
- elsif "--purge" == arg
- puts 'Purging database...'
- @store.dropSchema()
- if File.exists?(@store.unclassified_csv)
- File.delete(@store.unclassified_csv)
- end
- elsif "--skip-class" == arg
- puts 'Skipping load of classification table.'
- @skip_class = true
- elsif arg.start_with?("--")
- abort('ERROR: Unrecognized option "' + arg + '".')
- end
-end
-
-@store = Store.new(@config_file)
-@store.connect()
-
-for arg in ARGV
- handleArg(arg)
-end
-
-@store.init_db(@skip_class)
-
-for arg in ARGV
- if ! arg.start_with?("--")
- puts 'Scanning directory "' + arg + '"...'
- w = WalkDir.new(@config_file, arg)
- w.books
- end
-end
-
-@store.cross_reference_lists
-
-puts 'Creating output...'
-
-navigator = Navigator.new(@store)
-navigator.write_atoz_pages()
-navigator.write_series_listing()
-navigator.write_dewey()
-
-@store.disconnect()
-
+++ /dev/null
-require_relative 'page'
-require_relative 'store'
-
-class Navigator
- def initialize(store)
- @store = store
- end
-
- def write_atoz_pages
- atoz_counts = {}
-
- ('A'..'Z').each do |letter|
- atoz_counts[letter] = write_authors_starting_with(letter)
- end
-
- content = '<p><table><tr><th>Author</th><th>Books</th></tr>'
- ('A'..'Z').each do |letter|
- content += ' <tr><td><a href="../atoz/output_' + letter + '.html">Starting with ' + letter + '</a></td><td>' + atoz_counts[letter].to_s + '</td></tr>'
- end
- content += '</table></p>'
- page = Page.new(@store)
- page.output_dir = 'atoz'
- page.special = content
- page.up = ['../output/index.html', 'Up']
-
- page.write_html( [] )
- end
-
- def write_authors_starting_with(letter)
- book_ids = @store.query_books_by_author(letter + '%')
- puts 'Authors starting with "' + letter + '": ' + book_ids.length.to_s() + ' books.'
-
- page = Page.new(@store)
- if 'A' != letter
- page.back = ['../atoz/output_' + (letter.ord - 1).chr + '.html', 'Prev']
- end
- if 'Z' != letter
- page.forward = ['../atoz/output_' + (letter.ord + 1).chr + '.html', 'Next']
- end
- page.output_dir = 'atoz'
- page.index_file = 'output_' + letter + '.html'
- page.title = "Authors starting with '" + letter + "'"
- page.up = ['../atoz/index.html', 'Up']
-
- page.write_html(book_ids)
- return book_ids.length
- end
-
- def write_dewey
- book_ids = @store.query_books_by_ddc()
- puts 'Non-fiction books arranged by Dewey Decimal Classification: ' + book_ids.length.to_s() + ' books.'
-
- page = Page.new(@store)
- page.output_dir = 'ddc'
- page.index_file = 'index.html'
- page.title = "Non-fiction books arranged by Dewey Decimal call number"
- page.up = ['../output/index.html', 'Up']
-
- page.write_html(book_ids)
- return book_ids.length
- end
-
- def write_series_for_age(age)
- series_infos = []
-
- series_ids = @store.query_series_by_age(age)
-
- series_ids.each do |id|
- series = @store.load_series(id)
- book_ids = @store.query_books_by_series_id(id)
- if nil != book_ids and book_ids.length > 0
- series_infos.push( [series, book_ids] )
- end
- end
-
- for idx in 0 .. (series_infos.length - 1) do
- #puts series.descr + ': ' + book_ids.length.to_s + ' books.'
-
- back = nil
- fwd = nil
-
- if idx > 0
- back = series_infos[idx-1]
- end
- if (idx + 1) < series_infos.length
- fwd = series_infos[idx+1]
- end
-
- cur = series_infos[idx]
- series = cur[0]
- book_ids = cur[1]
-
- page = Page.new(@store)
- if nil != back
- page.back = [back[0].key + '.html', 'Back']
- end
- if nil != fwd
- page.forward = [fwd[0].key + '.html', 'Forward']
- end
- page.output_dir = 'series/series_' + age
- page.index_file = series.key + '.html'
- page.title = 'Series “' + series.descr + '” (' + book_ids.length.to_s + ' books)'
- page.up = ['index.html', 'Up']
-
- page.write_html(book_ids)
- end
-
- content = '<h1>“' + age + '” Series</h1>'
- content += '<p><table><tr><th>Author</th><th>Series</th><th>Genre</th><th>Books</th></tr>'
- series_infos.each do |cur|
- series = cur[0]
- book_ids = cur[1]
-
- author = series.grouping
- letter = author[0]
-
- content += ' <tr>'
- content += '<td><a href="../../atoz/output_' + letter + '.html">' + author + '</a></td>'
- content += '<td><a href="' + series.key + '.html">' + series.descr + '</a></td>'
- content += '<td>' + series.genre + '</td>'
- content += '<td>' + book_ids.length.to_s + '</td>'
- content += '</tr>'
- end
- content += '</table></p>'
- page = Page.new(@store)
- page.output_dir = 'series/series_' + age
- page.special = content
- page.up = ['../index.html', 'Up']
- page.write_html( [] )
-
- return series_infos.length
- end
-
- def write_series_listing
- ages = ['beginner', 'junior', 'ya', 'adult']
- series_counts = {}
-
- ages.each do |age|
- puts 'Series for "' + age + '" readers...'
-
- series_counts[age] = write_series_for_age(age)
- end
-
- content = '<h1>Browse Books By Series</h1>'
- content += '<p>'
- content += '<table><tr><th>Age</th><th>Number of Series</th></tr>'
- ages.each do |age|
- content += '<tr><td><a href="series_' + age + '/index.html">' + age + '</a></td><td>' + series_counts[age].to_s + '</td></tr>'
- end
- content += '</table></p>'
- page = Page.new(@store)
- page.output_dir = 'series'
- page.special = content
- page.up = ['../output/index.html', 'Up']
- page.write_html( [] )
- end
-end
+++ /dev/null
-require 'fileutils'
-
-require_relative 'store'
-
-class Page
- def initialize(store)
- @back = nil
- @forward = nil
- @index_file = 'index.html'
- @output_dir = 'output'
- @special = nil
- @store = store
- @title = 'Books'
- @up = nil
- end
-
- def back=(value)
- @back = value
- end
-
- def forward=(value)
- @forward = value
- end
-
- def index_file=(value)
- @index_file = value
- end
-
- def navig_link(data)
- if (nil == data)
- return ''
- end
- return '<a href="' + data[0] + '">' + data[1] + '</a>'
- end
-
- def output_dir=(value)
- @output_dir = value
- end
-
- def special=(value)
- @special = value
- end
-
- def title=(value)
- @title = value
- end
-
- def up=(value)
- @up = value
- end
-
- def write_books(fd, book_ids)
- for id in book_ids
- book = @store.load_book(id)
- image = nil
- if nil != book.cover
- #@imageCount += 1
- #(path, mimeType) = book.cover.write_image(@output_dir, 'image' + @imageCount.to_s)
- #image = '<img class="cover-thumb" src="' + path + '"/>'
- path = book.cover.path
- image = '<img class="cover-thumb" src="' + path + '"/>'
- else
- image = '(No cover image)'
- end
-
- fd.puts ' <div><table>'
- fd.puts ' <tr><td><a href="' + book.path + '">' + image + '</a></td>'
-
- heading = book.heading()
- description = book.description()
- if nil != description
- fd.puts ' <td><span class="popup">' + heading + '<span class="pop-inner"><p>' + heading + '</p><p>' + description + '</p></span></span></td></tr>'
- else
- fd.puts ' <td>' + heading + '</td></tr>'
- end
-
- fd.puts ' </table></div>'
- end
- end
-
- def write_footer(fd)
- fd.puts ' <p class="navigator">' + navig_link(@back) + ' ' + navig_link(@up) + ' ' + navig_link(@forward) + '</p>'
- end
-
- def write_header(fd)
- fd.puts ' <h1 class="header">' + @title + '</h1>'
-
- fd.puts ' <p class="navigator">' + navig_link(@back) + ' ' + navig_link(@up) + ' ' + navig_link(@forward) + '</p>'
- end
-
- def write_html(book_ids)
- @imageCount = 0
-
- if ! Dir.exist?(@output_dir)
- FileUtils.mkdir_p(@output_dir)
- end
-
- open(@output_dir + '/' + @index_file, 'w') do |fd|
- fd.puts '<html>'
- fd.puts ' <head>'
- fd.puts ' <meta charset="utf-8"/>'
- fd.puts ' <title>' + @title + '</title>'
-
- write_style_sheet(fd)
-
- fd.puts ' </head>'
- fd.puts ' <body>'
-
- write_header(fd)
-
- write_special(fd)
- write_books(fd, book_ids)
-
- write_footer(fd)
-
- fd.puts " </body>"
- fd.puts "</html>"
- end
- end
-
- def write_special(fd)
- if (nil != @special)
- fd.puts(@special)
- end
- end
-
- def write_style_sheet(fd)
- style =
-<<EOS
- <style>
- div {
- display: inline-block;
- width: 400px;
- margin: 10px;
- border 3px solid #73ad21;
- }
- h1.header {
- background: #4040a0;
- color: #ffffff;
- text-align: center;
- }
- img.cover-thumb { max-height: 200px; max-width: 200px; }
- p.navigator { }
- span.popup { }
- span.popup:hover { text-decoration: none; background: #cfffff; z-index: 6; }
- span.popup span.pop-inner {
- border-color:black;
- border-style:solid;
- border-width:1px;
- display: none;
- margin: 4px 0 0 0px;
- padding: 3px 3px 3px 3px;
- position: absolute;
- }
- span.popup:hover span.pop-inner {
- background: #ffffaf;
- display: block;
- margin: 20px 0 0 0px;
- z-index:6;
- }
- </style>
-EOS
- fd.puts style
- end
-end
-
+++ /dev/null
-
-class Series
- def initialize(id)
- @age = nil
- @genre = nil
- @grouping = nil
- @code = nil
- @descr = nil
- @id = id
- end
-
- def age
- @age
- end
-
- def age=(value)
- @age = value
- end
-
- def code
- @code
- end
-
- def code=(value)
- @code = value
- end
-
- def descr
- @descr
- end
-
- def descr=(value)
- @descr = value
- end
-
- def genre
- @genre
- end
-
- def genre=(value)
- @genre = value
- end
-
- def grouping
- @grouping
- end
-
- def grouping=(value)
- @grouping = value
- end
-
- def id
- @id
- end
-
- def inspect
- data = []
- if nil != @age
- data.push('age="' + @age.inspect + '"')
- end
- if nil != @code
- data.push('code="' + @code.inspect + '"')
- end
- if nil != @descr
- data.push('descr="' + @descr + '"')
- end
- if nil != @genre
- data.push('genre="' + @genre + '"')
- end
- if nil != @grouping
- data.push('grouping="' + @grouping + '"')
- end
- return '(Series:' + data.join(',') + ')'
- end
-
- def key
- if nil != grouping and nil != code
- return grouping.to_s + '_' + code.to_s
- end
- return id.to_s
- end
-
- def to_s
- return inspect()
- end
-end
-
+++ /dev/null
-
-require 'csv'
-require 'fileutils'
-require 'inifile'
-require 'pg'
-
-require_relative 'series'
-require_relative 'tconn'
-
-class Store
- def unclassified_csv
- @basePath + '/csv/unclassified.csv'
- end
-
- def initialize(config_file)
- @conn = nil
-
- config = IniFile.load(config_file)
- if nil == config
- puts 'FATAL: Failed to load config file "' + config_file + '". Aborting initialization.'
- return
- end
-
- section = config['database']
- @dbhost = section['host']
- @dbport = 5432
- @dbname = section['name']
- @dbuser = section['user']
- @dbpass = section['pass']
-
- section = config['filesystem']
- @basePath = section['basePath']
- end
-
- def connect
- @conn = TimedConn.new(PG.connect(@dbhost, @dbport, '', '', @dbname, @dbuser, @dbpass))
- return @conn
- end
-
- def disconnect
- @conn.close()
- end
-
- def construct_efs_path(efs_id)
- id_str = sprintf('%010d', efs_id)
- path = sprintf('%s/%s/%s/%s', id_str[0,2], id_str[2,2], id_str[4,2], id_str[6,2])
- name = id_str + '.dat'
- return path, name
- end
-
- def cross_reference_lists
-puts "@@@@@@@@@@@ CROSS-REF START @@@@@@@@@@@"
- exec_update("TRUNCATE TABLE Lists CASCADE;", [])
-
- populate_lists_table
-puts "@@@@@@@@@@@ CROSS-REF DONE @@@@@@@@@@@"
- end
-
- def create_schema(skip_class)
- create_authors =
-<<EOS
- CREATE TABLE Authors (
- id INTEGER PRIMARY KEY,
- grouping VARCHAR(64),
- reading VARCHAR(256),
- sort VARCHAR(256)
- );
-EOS
-
- create_books =
-<<EOS
- CREATE TABLE Books (
- id INTEGER PRIMARY KEY,
- arrived TIMESTAMP,
- author INTEGER REFERENCES Authors(id),
- classification INTEGER REFERENCES Classifications(id),
- cover INTEGER,
- language VARCHAR(64),
- description TEXT,
- path VARCHAR(256),
- series INTEGER REFERENCES Series(id),
- title VARCHAR(256),
- volume VARCHAR(16)
- );
-EOS
-
- create_classification =
-<<EOS
- CREATE TABLE Classifications (
- id INTEGER PRIMARY KEY,
- ddc VARCHAR(32),
- lcc VARCHAR(32),
- author_grouping VARCHAR(64),
- author_sort VARCHAR(128),
- title_grouping VARCHAR(256),
- title VARCHAR(256)
- );
-EOS
-
- create_efs =
-<<EOS
- CREATE TABLE EFS (
- id INTEGER PRIMARY KEY,
- mimetype VARCHAR(64)
- );
-EOS
-
- create_fast =
-<<EOS
- CREATE TABLE FAST (
- id VARCHAR(32) PRIMARY KEY,
- descr VARCHAR(128)
- );
-EOS
-
- # Associative entity, linking FAST and Classifications tables
- # in a 0..n to 0..m relationship
- create_fast_classifications =
-<<EOS
- CREATE TABLE FAST_Classifications (
- fast VARCHAR(32) REFERENCES FAST(id),
- classification INTEGER REFERENCES Classifications(id)
- );
-EOS
-
- create_lists =
-<<EOS
- CREATE TABLE Lists (
- id INTEGER PRIMARY KEY,
- age VARCHAR(32),
- category VARCHAR(32),
- code VARCHAR(2),
- year INTEGER,
- author INTEGER REFERENCES Authors(id),
- title VARCHAR(256)
- );
-EOS
-
- # Associative entity, linking Lists and Books tables
- # in a 0..n to 0..m relationship
- create_lists_books =
-<<EOS
- CREATE TABLE Lists_Books (
- list INTEGER REFERENCES Lists(id),
- book INTEGER REFERENCES Books(id)
- );
-EOS
-
- create_series =
-<<EOS
- CREATE TABLE Series (
- id INTEGER PRIMARY KEY,
- age VARCHAR(32),
- genre VARCHAR(32),
- grouping VARCHAR(64),
- code VARCHAR(16),
- descr VARCHAR(128)
- )
-EOS
-
- stmts = [
- create_authors,
- create_classification,
- create_efs,
- create_fast,
- create_series,
- create_books,
- create_fast_classifications,
- create_lists,
- create_lists_books,
- 'CREATE SEQUENCE author_id;',
- 'CREATE SEQUENCE book_id;',
- 'CREATE SEQUENCE classification_id;',
- 'CREATE SEQUENCE efs_id;',
- 'CREATE SEQUENCE list_id;',
- 'CREATE SEQUENCE series_id;'
- ]
-
- for stmt in stmts
- @conn.exec(stmt)
- end
-
- if skip_class == false
- populate_fast_table
- populate_classifications_table
- end
-
- populate_series_table
- end
-
- def dropSchema
- stmts = [
- 'DROP TABLE Lists_Books;',
- 'DROP TABLE Lists;',
- 'DROP TABLE Books;',
- 'DROP TABLE FAST_Classifications;',
- 'DROP TABLE Authors;',
- 'DROP TABLE Classifications;',
- 'DROP TABLE EFS;',
- 'DROP TABLE FAST;',
- 'DROP TABLE Series;',
- 'DROP SEQUENCE author_id;',
- 'DROP SEQUENCE book_id;',
- 'DROP SEQUENCE classification_id;',
- 'DROP SEQUENCE efs_id;',
- 'DROP SEQUENCE list_id;',
- 'DROP SEQUENCE series_id;'
- ]
-
- for stmt in stmts do
- begin
- @conn.exec(stmt)
- rescue Exception => exc
- puts 'WARNING: "' + stmt + '" failed: ' + exc.to_s
- end
- end
- end
-
- def find_all_authors(author_name)
- result = []
-
- sqlSelect = "SELECT id FROM Authors WHERE grouping=$1;"
- args = [author_name]
-
- @conn.exec_params(sqlSelect, args) do |rs|
- rs.each do |row|
- result << row['id']
- end
- end
-
- result
- end
-
- def find_author(author)
- sqlSelect = "SELECT id FROM Authors WHERE grouping=$1 AND reading=$2 AND sort=$3;"
- args = [author.grouping, author.reading_order, author.sort_order]
-
- @conn.exec_params(sqlSelect, args) do |rs|
- if rs.ntuples > 0
- return rs[0]['id']
- end
- end
-
- return nil
- end
-
- def init_db(skip_class)
- sql = "SELECT 1 FROM pg_tables WHERE tableowner='quanlib' AND tablename='books'"
- found = false
- @conn.exec(sql).each do |row|
- found = true
- end
-
- if ! found
- create_schema(skip_class)
- end
- end
-
- def load_author(id)
- sqlSelect = "SELECT grouping, reading, sort FROM Authors WHERE id=$1"
- args = [id]
- @conn.exec_params(sqlSelect, args) do |rs|
- if rs.ntuples != 1
- raise "Expected 1 row for " + id + " but got " + rs.ntuples + ": " + sqlSelect
- end
- row = rs[0]
- author = Author.new(row['grouping'], row['reading'], row['sort'])
- return author
- end
- return nil
- end
-
- def store_author(author)
- id = find_author(author)
- if nil == id
- id = next_id('author_id')
- sqlInsert = "INSERT INTO Authors(id, grouping, reading, sort) VALUES ($1, $2, $3, $4);"
- args = [id, author.grouping, author.reading_order, author.sort_order]
- begin
- rs = @conn.exec_params(sqlInsert, args)
- rescue Exception => e
- puts sqlInsert + ": " + args.inspect()
- puts e.message
- puts $@
- ensure
- rs.clear if rs
- end
- end
- return id
- end
-
- def load_book(id)
- sql = "SELECT author, classification, cover, description, language, path, series, title, volume FROM Books WHERE id=$1;"
- book = nil
-
- begin
- @conn.exec_params(sql, [id]) do |rs|
- if 1 != rs.ntuples
- raise 'Expected one row in Books for id ' + id + ', but found ' + rs.length + '.'
- return nil
- end
- row = rs[0]
-
- book = Book.new(self)
- book.author = load_author(row['author'])
- book.classification_id = row['classification']
- book.cover = load_cover(row['cover'])
- book.description = row['description']
- book.language = row['language']
- book.path = row['path']
- book.series_id = row['series']
- book.title = row['title']
- book.volume = row['volume']
- end
- rescue Exception => e
- puts sql + ": " + id
- puts e.message
- puts $@
- end
-
- return book
- end
-
- def store_book(book)
- sql = "INSERT INTO Books (id, arrived, author, classification, cover, description, language, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11);"
-
- book_id = next_id('book_id')
-
- author_id = store_author(book.author)
- (efs_id, mime_type) = store_cover(book)
-
- args = [book_id, book.arrived, author_id, book.classification_id, efs_id, book.description, book.language, book.path, book.series_id, book.title, book.volume]
-
- begin
- rs = @conn.exec_params(sql, args)
- rescue Exception => e
- puts sql + ": " + args.inspect()
- puts e.message
- puts $@
- ensure
- rs.clear if rs
- end
-
- return book_id
- end
-
- def find_classification(author_grouping, title_grouping)
- sql = "SELECT id FROM Classifications WHERE author_grouping = $1 AND title_grouping = $2;"
- @conn.exec_params(sql, [author_grouping, title_grouping]) do |rs|
- if rs.ntuples > 0
- return rs[0]['id']
- end
- end
- return nil
- end
-
- def load_classification(id)
- sql = "SELECT ddc, lcc, author_grouping, author_sort, title_grouping, title "
- sql += " FROM Classifications WHERE id=$1"
- @conn.exec_params(sql, [id]) do |rs|
- if rs.ntuples > 0
- row = rs[0]
- ddc = row['ddc']
- lcc = row['lcc']
- author_grouping = row['author_grouping']
- author = row['author_sort']
- title_grouping = row['title_grouping']
- title = row['title']
-
- result = Classification.new(ddc, lcc, author_grouping, author, title_grouping, title)
- result.id = id
- return result
- end
- end
-
- return nil
- end
-
- def load_cover(id)
- if nil == id
- return nil
- end
-
- mime_type = 'application/octet-stream'
-
- sql = "SELECT mimeType FROM Efs WHERE id=$1"
- @conn.exec_params(sql, [id]) do |rs|
- if rs.ntuples != 1
- raise "Expected one row but got " + rs.ntuples + ": " + sql + ": " + id
- end
- mime_type = rs[0]['mimeType']
- end
-
- (efspath, efsname) = construct_efs_path(id)
-
- fullpath = @basePath + '/efs/' + efspath + '/' + efsname
-
- return Cover.new(nil, fullpath, mime_type)
- end
-
- def store_cover(book)
- efs_id = nil
- cover = book.cover()
-
- if nil == cover
- return nil
- end
-
- @conn.exec("SELECT nextval('efs_id')") do |rs|
- efs_id = rs[0]['nextval']
- end
-
- if nil == efs_id
- return nil
- end
-
- (efspath, efsname) = construct_efs_path(efs_id)
-
- efspath = @basePath + '/efs/' + efspath
-
- FileUtils.mkdir_p(efspath)
-
- (filepath, mimetype) = cover.write_image(efspath, efsname)
-
- sql = "INSERT INTO efs VALUES ($1, $2)"
- begin
- rs = @conn.exec_params(sql, [efs_id, mimetype])
- rescue Exception => e
- puts sql + ": " + efs_id + ", " + mimetype
- puts e.message
- puts $@
- ensure
- rs.clear if rs
- end
-
- return efs_id, mimetype
- end
-
- def exec_id_query(sql, args)
- ids = []
- @conn.exec_params(sql, args) do |rs|
- rs.each do |row|
- ids.push(row['id'])
- end
- end
- return ids
- end
-
- def exec_update(sql, args)
- begin
- rs = @conn.exec_params(sql, args)
- rescue Exception => e
- puts sql + ": " + args.inspect()
- puts e.message
- puts $@
- ensure
- rs.clear if rs
- end
- end
-
- def next_id(seq_name)
- id = nil
- @conn.exec("SELECT nextval('" + seq_name + "');") do |rs|
- id = rs[0]['nextval']
- end
- return id
- end
-
- def get_series(grouping, code)
- if nil == code
- return nil
- end
-
- sql = "SELECT id FROM Series WHERE grouping=$1 AND code=$2;"
- args = [grouping, code]
- @conn.exec_params(sql, args).each do |row|
- return row['id']
- end
-
- # TODO: Create a new series object here?
- puts 'WARNING: series("' + grouping + '", "' + code + '") not found.'
- return nil
- end
-
- def load_series(id)
- sql = "SELECT age,genre,grouping,code,descr FROM Series WHERE id=$1;"
- args = [id]
- @conn.exec_params(sql, args) do |rs|
- if rs.ntuples > 0
- row = rs[0]
- series = Series.new(id)
- series.age = row['age']
- series.genre = row['genre']
- series.grouping = row['grouping']
- series.code = row['code']
- series.descr = row['descr']
- return series
- end
- end
- return nil
- end
-
- def populate_classifications_table
- puts "Populating the Classifications table..."
- first = true
- CSV.foreach(@basePath + '/csv/class.csv') do |row|
- if first
- # skip the header row
- first = false
- else
-
- # First, add a row to the Classifications table
-
- id = next_id('classification_id')
- ddc = row[0]
- lcc = row[1]
- author_grouping = row[2]
- author_sort = row[3]
- title_grouping = row[4]
- title = row[5]
-
- sqlInsert = "INSERT INTO Classifications (id, ddc, lcc, author_grouping, author_sort, title_grouping, title) VALUES ($1, $2, $3, $4, $5, $6, $7);"
- args = [id, ddc, lcc, author_grouping, author_sort, title_grouping, title]
- exec_update(sqlInsert, args)
-
- # Second, link up with the appropriate FAST table entries
-
- fast = []
- input = row[6]
- if input.length > 0
- fast = input.split(';')
- end
-
- fast.each do |fast_id|
- sqlInsert = "INSERT INTO FAST_Classifications (fast, classification) VALUES ($1, $2);"
- args = [fast_id, id]
- exec_update(sqlInsert, args)
- end
- end
- end
- end
-
- def populate_fast_table
- puts "Populating the FAST table..."
- first = true
- CSV.foreach(@basePath + '/csv/fast.csv') do |row|
- if first
- first = false # skip the header row
- else
- id = row[0]
- descr = row[1]
- sqlInsert = "INSERT INTO FAST (id, descr) VALUES ($1, $2);"
- exec_update(sqlInsert, [id, descr])
- end
- end
- end
-
- def populate_lists_table
- puts "Populating the Lists table..."
-
- CSV.foreach(@basePath + "/csv/lists.csv", headers: true) do |row|
- author_ids = find_all_authors(row['author'])
- if author_ids.empty?
- specification = [row['age'], row['category'], row['code'], row['year'], row['author'], row['title']]
- .map { |x| x.inspect }
- .join(', ')
-
- puts "WARNING: For list entry (#{specification}), no such author was found."
-
- next
- end
-
- sqlInsert = %Q(
- INSERT INTO Lists (id, age, category, code, year, author, title)
- VALUES ($1, $2, $3, $4, $5, $6, $7);
- )
- author_ids.each do |author_id|
- list_id = next_id('list_id')
- args = [list_id, row['age'], row['category'], row['code'], row['year'], author_id, row['title']]
- exec_update(sqlInsert, args)
-
- update_lists_books_table(list_id, author_id, row['title'])
- end
- end
- end
-
- # Scan for books that match this Lists entry, and add any matches to the Lists_Books associative table
- def update_lists_books_table(list_id, author_id, title)
- title_pattern = Book.grouping_for_title(title).gsub('_', '%')
- sqlSelect = "SELECT id FROM Books WHERE author = $1 AND title LIKE $2;"
- args = [author_id, title_pattern]
-
- @conn.exec_params(sqlSelect, args) do |rs|
- rs.each do |row|
- sqlInsert = "INSERT INTO Lists_Books (list, book) VALUES ($1, $2)"
- args = [list_id, row['id']]
- exec_update(sqlInsert, args)
- end
- end
- end
-
- def populate_series_table
- puts "Populating the Series table..."
- CSV.foreach(@basePath + '/csv/series.csv') do |row|
- id = next_id('series_id')
- sqlInsert = "INSERT INTO Series (id, age, genre, grouping, code, descr) VALUES ($1, $2, $3, $4, $5, $6);"
- args = [id] + row
- exec_update(sqlInsert, args)
- end
- end
-
- def query_books_by_author(pattern)
- sql =
-<<EOS
- SELECT b.id FROM Authors a
- INNER JOIN Books b ON b.author=a.id
- LEFT OUTER JOIN Series s on s.id=b.series
- WHERE upper(a.grouping) LIKE $1
- ORDER BY a.grouping, b.series, b.volume, b.title
-EOS
- return exec_id_query(sql, [pattern])
- end
-
- def query_books_by_ddc
- sql =
-<<EOS
- SELECT b.id FROM Classifications c
- INNER JOIN Books b ON b.classification=c.id
- ORDER BY c.ddc
-EOS
- return exec_id_query(sql, [])
- end
-
- def query_books_by_series_id(id)
- sql =
-<<EOS
- SELECT b.id FROM Books b
- WHERE b.series = $1
- ORDER BY b.volume,b.title
-EOS
- return exec_id_query(sql, [id])
- end
-
- def query_series_by_age(pattern)
- sql =
-<<EOS
- SELECT s.id
- FROM Series s
- WHERE s.age LIKE $1
- ORDER BY s.grouping,s.descr
-EOS
- return exec_id_query(sql, [pattern])
- end
-end
-
+++ /dev/null
-# tconn.rb
-#
-# Timed Connection:
-# Wrapper around a PG Connection that provides a report on where time was spent executing SQL
-#
-
-require 'pg'
-
-class TimedConn
- def initialize(wrapped_conn)
- @conn = wrapped_conn
- @stmts = {}
- @total_time = 0
- end
-
- def close
- @conn.close()
- puts "Connection closing. Total SQL time: " + @total_time.to_s + " secs"
- @stmts.each do |sql, info|
- elapsed = info[2]
- calls = info[1]
- puts elapsed.to_s + " secs: " + calls.to_s + " times: " + sql
- end
- end
-
- def exec(*args, &block)
- before = Time.now
- #puts args.inspect
- result = @conn.exec(*args)
- #puts result.inspect
- after = Time.now
- elapsed = (after - before)
- remember(args[0], elapsed)
- @total_time += elapsed
- if block_given?
- yield(result)
- else
- return result
- end
- end
-
- def exec_params(*args, &block)
- before = Time.now
- #puts args.inspect
- result = @conn.exec_params(*args)
- #puts result.inspect
- after = Time.now
- elapsed = (after - before)
- remember(args[0], elapsed)
- @total_time += elapsed
- if block_given?
- yield(result)
- else
- return result
- end
- end
-
- def remember(sql, elapsed)
- if @stmts.has_key?(sql)
- stmt = @stmts[sql]
- else
- stmt = [sql, 0, 0]
- end
-
- stmt[1] += 1 # Number of times this statement has been invoked
- stmt[2] += elapsed # total elapsed time spent on this statement
-
- @stmts[sql] = stmt
- end
-end
-
+# frozen_string_literal: true
-require 'rspec/autorun'
-require_relative '../book'
+require "test_helper"
+require "book"
-describe Book do
- it "can handle .epub and .pdf files" do
- ['epub', 'pdf'].each do |extension|
- expect(Book.can_handle?("sample.#{extension}")).to be true
+class BookTest < Minitest::Test
+ def test_that_it_can_handle_epub_and_pdf_files
+ %w(epub pdf).each do |extension|
+ assert_equal true, Book.can_handle?("sample.#{extension}")
end
end
- it "cannot handle .mobi, .html, .txt, .doc, .zip, .rtf or .rar files" do
+ def test_that_it_cannot_handle_mobi_html_txt_doc_zip_rtf_nor_rar
%w(doc html mobi rar rtf txt zip).each do |extension|
- expect(Book.can_handle?("sample.#{extension}")).to be false
+ assert_equal false, Book.can_handle?("sample.#{extension}")
end
end
end
+# frozen_string_literal: true
-require 'rspec/autorun'
-require_relative '../store'
+require "test_helper"
-describe Store do
- it "construct_efs_path produces paths and filenames as expected" do
+require "store"
+
+class StoreTest < Minitest::Test
+ def test_construct_efs_path
data = [
[ 1234, '00/00/00/12', '0000001234.dat'],
[ 1, '00/00/00/00', '0000000001.dat'],
[ 0x1b, '00/00/00/00', '0000000027.dat']
]
+ IniFile.stubs(:load).returns({"database" => {}, "filesystem" => {}})
store = Store.new('quanlib.ini')
- data.each do |input, expectedPath, expectedName|
- (actualPath, actualName) = store.construct_efs_path(input)
+ data.each do |input, expected_path, expected_name|
+ (actual_path, actual_name) = store.construct_efs_path(input)
- expect(actualPath).to eq(expectedPath)
- expect(actualName).to eq(expectedName)
+ assert_equal expected_path, actual_path
+ assert_equal expected_name, actual_name
end
end
end
--- /dev/null
+# frozen_string_literal: true
+
+$LOAD_PATH.unshift File.expand_path("../app", __dir__)
+
+require "minitest/autorun"
+require "mocha/minitest"
+++ /dev/null
-# Walk the directory (and subdirectories), identifying books.
-#
-# Expected format:
-# .../AuthorName/Title_of_the_Awesome_Book.ext
-#
-# Author is given as FirstLast. For example,
-# Robert Anson Heinlein is RobertHeinlein, and
-# JKRowling is JoanneRowling.
-#
-# Book titles have spaces replaced with underscores,
-# and punctuation [,!?'] replaced with hyphens.
-#
-# If the book forms part of a series, then an all-capitals
-# series designator, followed by a numeric volume number,
-# followed by an underscore, is prefixed to the name.
-# For example, Hardy Boys' volume 1, The Tower Treasure,
-# is rendered as .../FranklinDixon/HB001_The_Tower_Treasure.epub
-# and Mrs. Pollifax volume 6, On the China Station, is
-# .../DorothyGilman/P06_On_the_China_Station.epub.
-
-require_relative 'book'
-require_relative 'book_loader'
-require_relative 'store'
-
-class WalkDir
- def initialize(config_file, root)
- @queue = Queue.new
- @root = root
- @config_file = config_file
- @threads = []
-
- @files = walk(@root)
- end
-
- def books
- @threads = []
- num_threads.times do
- @threads << Thread.new do
- BookLoader.new(@config_file, @queue).run
- end
- end
-
- result = []
- @files = remove_duplicates(@files)
- for file in @files.sort()
- if Book.can_handle?(file) && (!is_duplicate?(file))
- # Queue this book to be loaded and added to the DB by a BookLoader thread
- @queue << file
- end
- end
-
- @threads.count.times { @queue << BookLoader::DONE_MARKER }
-
- @threads.each { |t| t.join }
- end
-
- # Duplicate versions of a text are named
- # xxx_suffix.ext
- # Where suffix is one of bis, ter, quater, quinquies
- # for the 2nd, 3rd, 4th or 5th variant respectively.
- def is_duplicate?(file)
- s = file.to_s
- suffix = ['_bis.', '_ter.', '_quater.', '_quinquies.']
- suffix.each do |pat|
- if s.include?(pat)
- return true
- end
- end
-
- return false
- end
-
- def remove_duplicates(files)
- unique = {}
- for file in files
- if Book.can_handle?(file)
- key = File.dirname(file) + '/' + File.basename(file, '.*')
- if unique.has_key?(key)
- new_ext = File.extname(file)
- old_ext = File.extname(unique[key])
- if ('.pdf' == old_ext) && ('.epub' == new_ext)
- # Prefer EPUB over PDF
- puts 'REPLACED ' + unique[key].to_s + ' with ' + file.to_s
- unique[key] = file
- else
- puts 'DROPPED ' + file.to_s + " because it's superceded by " + unique[key].to_s
- end
- else
- unique[key] = file
- end
- end
- end
-
- return unique.values
- end
-
- def walk(path)
- result = []
- children = Dir.entries(path)
- for child in children
- fullName = (path.chomp("/")) + "/" + child
- if (File.directory?(fullName)) and (child != ".") and (child != "..") and (!File.symlink?(fullName))
- sub = walk(fullName)
- if (sub != nil) and (sub.length > 0)
- result.concat(sub)
- end
- elsif (! File.directory?(fullName))
- result.push(fullName)
- end
- end
- return result
- end
-
- def num_threads
- # TOOD: make this (auto?) configurable
- 12
- end
-end