From: Chris Jaekl Date: Thu, 20 Jun 2024 15:41:02 +0000 (-0400) Subject: Switch from rspec to minitest X-Git-Url: http://jaekl.net/gitweb/?a=commitdiff_plain;h=dcd50e38d56f95e2bf88413b4d56db7132dccf11;p=quanlib.git Switch from rspec to minitest Also adds a Rakefile, and pulls in Rubocop while we're at it. --- diff --git a/Gemfile b/Gemfile index e3b145e..0181479 100644 --- a/Gemfile +++ b/Gemfile @@ -1,7 +1,9 @@ source 'http://rubygems.org' gem 'inifile' +gem 'mocha' gem 'nokogiri' gem 'pg' gem 'rspec' +gem 'rubocop' gem 'rubyzip' diff --git a/Gemfile.lock b/Gemfile.lock index 3cc58d3..9778251 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,36 +1,83 @@ GEM remote: http://rubygems.org/ specs: - diff-lcs (1.3) + ast (2.4.2) + diff-lcs (1.5.1) inifile (3.0.0) - mini_portile2 (2.4.0) - nokogiri (1.10.3) - mini_portile2 (~> 2.4.0) - pg (1.1.4) - rspec (3.8.0) - rspec-core (~> 3.8.0) - rspec-expectations (~> 3.8.0) - rspec-mocks (~> 3.8.0) - rspec-core (3.8.2) - rspec-support (~> 3.8.0) - rspec-expectations (3.8.4) + json (2.7.2) + language_server-protocol (3.17.0.3) + mocha (2.4.0) + ruby2_keywords (>= 0.0.5) + nokogiri (1.16.6-aarch64-linux) + racc (~> 1.4) + nokogiri (1.16.6-arm-linux) + racc (~> 1.4) + nokogiri (1.16.6-arm64-darwin) + racc (~> 1.4) + nokogiri (1.16.6-x86-linux) + racc (~> 1.4) + nokogiri (1.16.6-x86_64-darwin) + racc (~> 1.4) + nokogiri (1.16.6-x86_64-linux) + racc (~> 1.4) + parallel (1.25.1) + parser (3.3.3.0) + ast (~> 2.4.1) + racc + pg (1.5.6) + racc (1.8.0) + rainbow (3.1.1) + regexp_parser (2.9.2) + rexml (3.3.0) + strscan + rspec (3.13.0) + rspec-core (~> 3.13.0) + rspec-expectations (~> 3.13.0) + rspec-mocks (~> 3.13.0) + rspec-core (3.13.0) + rspec-support (~> 3.13.0) + rspec-expectations (3.13.1) diff-lcs (>= 1.2.0, < 2.0) - rspec-support (~> 3.8.0) - rspec-mocks (3.8.1) + rspec-support (~> 3.13.0) + rspec-mocks (3.13.1) diff-lcs (>= 1.2.0, < 2.0) - rspec-support (~> 3.8.0) - rspec-support (3.8.2) - rubyzip (1.2.3) + rspec-support (~> 3.13.0) + rspec-support (3.13.1) + rubocop (1.64.1) + json (~> 2.3) + language_server-protocol (>= 3.17.0) + parallel (~> 1.10) + parser (>= 3.3.0.2) + rainbow (>= 2.2.2, < 4.0) + regexp_parser (>= 1.8, < 3.0) + rexml (>= 3.2.5, < 4.0) + rubocop-ast (>= 1.31.1, < 2.0) + ruby-progressbar (~> 1.7) + unicode-display_width (>= 2.4.0, < 3.0) + rubocop-ast (1.31.3) + parser (>= 3.3.1.0) + ruby-progressbar (1.13.0) + ruby2_keywords (0.0.5) + rubyzip (2.3.2) + strscan (3.1.0) + unicode-display_width (2.5.0) PLATFORMS - ruby + aarch64-linux + arm-linux + arm64-darwin + x86-linux + x86_64-darwin + x86_64-linux DEPENDENCIES inifile + mocha nokogiri pg rspec + rubocop rubyzip BUNDLED WITH - 2.0.2 + 2.5.13 diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..9cf3b56 --- /dev/null +++ b/Rakefile @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +require "minitest/test_task" + +Minitest::TestTask.create + +require "rubocop/rake_task" + +RuboCop::RakeTask.new + +task default: %i[test rubocop] diff --git a/app/author.rb b/app/author.rb new file mode 100644 index 0000000..fb2003b --- /dev/null +++ b/app/author.rb @@ -0,0 +1,57 @@ + +class Author + def initialize(grouping, reading_order, sort_order) + @grouping = grouping + @reading_order = reading_order + @sort_order = sort_order + + if (nil == sort_order) || ('Unknown' == sort_order) + @sort_order = reading_to_sort_order(reading_order) + end + end + + def grouping + @grouping + end + + def reading_order + @reading_order + end + + def sort_order + @sort_order + end + + def inspect + result = '(Author:' + if nil != @grouping + result += ' grouping="' + @grouping + '"' + end + if nil != @reading_order + result += ' reading_order="' + @reading_order + '"' + end + if nil != @sort_order + result += ' sort_order="' + @sort_order + '"' + end + result += ')' + + return result + end + + def to_s + inspect + end + + protected + def reading_to_sort_order(reading_order) + sort_order = reading_order + + parts = reading_order.split(' ') + if parts.length > 1 + sort_order = parts[-1] + ', ' + parts[0..-2].join(' ') + end + + return sort_order + end +end + diff --git a/app/book.rb b/app/book.rb new file mode 100644 index 0000000..2b93f4b --- /dev/null +++ b/app/book.rb @@ -0,0 +1,421 @@ + +require 'nokogiri' +require 'rubygems' +require 'zip' + +require_relative 'author' +require_relative 'classification' +require_relative 'cover' +require_relative 'store' + +class Book + @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/' + @@SERIES_AND_VOLUME_REGEX = /^([A-Z]+)([0-9]+(\.[0-9]+)?)$/ + + attr_accessor :arrived + attr_accessor :author + attr_accessor :classification_id + attr_accessor :cover + attr_accessor :description + attr_accessor :language + attr_accessor :path + attr_accessor :series_id + attr_accessor :title + attr_accessor :volume + + def initialize(store) + @store = store + end + + def load_from_file!(fileName) + @path = fileName + parse_file_name!(fileName) + end + + def self.can_handle?(fileName) + if nil == fileName + return false + end + + #puts "Filename: " + fileName.to_s + lowerName = fileName.downcase() + + if lowerName.end_with?(".epub") + return true + end + + if lowerName.end_with?(".pdf") + return true + end + + return false + end + + def self.grouping_for_title(title) + result = title + + '\'",!#'.split('').each do |c| + result = result.gsub(c, '-') + end + result = result.gsub(/: */, '--') + result = result.gsub(' ', '_') + + result + end + + def heading + result = [] + + if nil != @title + result.push('' + @title + '') + else + result.push('(Unknown title)') + end + if nil != @author + result.push('by ' + @author.reading_order + '') + end + + seriesInfo = [] + series = @store.load_series(@series_id) + if nil != series and nil != series.descr + seriesInfo.push(series.descr.to_s) + end + if nil != @volume + seriesInfo.push(@volume.to_s) + end + if seriesInfo.length > 0 + result.push(seriesInfo.join(' ')) + end + + classification = nil + if nil != @classification_id + classification = @store.load_classification(@classification_id) + end + if nil != classification + if nil != classification.ddc + result.push('Dewey: ' + classification.ddc.to_s) + end + if nil != classification.lcc + result.push('LCC: ' + classification.lcc.to_s) + end + end + + return result.join('
') + end + + def inspect + data = [] + if nil != @author + data.push('author="' + @author.inspect + '"') + end + if nil != @series_id + data.push('series_id="' + @series_id.to_s() + '"') + end + if nil != @volume + data.push('volume="' + @volume + '"') + end + if nil != @title + data.push('title="' + @title + '"') + end + if nil != @cover + data.push(@cover.inspect()) + end + if nil != @path + data.push('path="' + @path + '"') + end + return '(Book:' + data.join(',') + ')' + end + + def to_s + return inspect() + end + + def title_grouping + if nil == @path + return nil + end + + return File.basename(@path, '.*') + end + + protected + def isUpper?(c) + return /[[:upper:]]/.match(c) + end + + protected + def massage_author(input) + if nil == input + return nil + end + + reading_order = "" + input.each_char do |c| + if isUpper?(c) and (reading_order.length > 0) + reading_order += " " + end + reading_order += c + end + + return reading_order + end + + # Returns (series, volumeNo, titleText) + protected + def processTitle(input) + if nil == input + return nil + end + + arr = input.split('_') + + series = nil + vol = nil + + first = arr[0] + matchData = (arr[0]).match(@@SERIES_AND_VOLUME_REGEX) + if nil != matchData + capt = matchData.captures + series = capt[0] + vol = capt[1] + arr.shift + end + + pos = arr[-1].rindex('.') + if nil != pos + arr[-1] = arr[-1].slice(0, pos) + end + + title = arr.join(' ') + + bare_title_grouping = title_grouping + .split('_') + .reject { |part| part.match(@@SERIES_AND_VOLUME_REGEX) } + .join('_') + + unless bare_title_grouping == Book.grouping_for_title(title) + puts "WARNING: title_grouping mismatch: #{bare_title_grouping.inspect} vs. #{Book.grouping_for_title(title).inspect}" + end + + return series, vol, title + end + + protected + def parse_file_name!(file_name) + category = nil # e.g., non-fiction, fan-fiction + grouping = '' + + parts = file_name.split('/') + (series_code, @volume, @title) = processTitle(parts[-1]) + if parts.length > 1 + grouping = parts[-2] + reading_order = massage_author(grouping) + sort_order = nil + @author = Author.new(grouping, reading_order, sort_order) + @series_id = @store.get_series(grouping, series_code) + end + if parts.length > 2 + category = parts[-3] + end + + lc_file_name = file_name.downcase + if lc_file_name.end_with?(".epub") + scanEpub!(file_name) + elsif lc_file_name.end_with?(".pdf") + scan_pdf!(file_name) + end + + @arrived = File.ctime(file_name) + + @classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*')) + + # TODO: Fix horrible hard-coded strings and paths + if ('01_nonfic' == category) && (nil == classification_id) + open(Store.unclassified_csv, 'a') do |fd| + fd.puts('"' + grouping.to_s + '","' + path + '"') + end + end + end + + protected + def scanEpub!(fileName) + #puts 'Scanning "' + fileName.to_s + '"...' + begin + Zip.warn_invalid_date = false + Zip::File.open(fileName) do |zipfile| + entry = zipfile.find_entry('META-INF/container.xml') + if nil == entry + puts 'No META-INF/container.xml, skipping book ' + fileName + return + end + contXml = zipfile.read('META-INF/container.xml') + contDoc = Nokogiri::XML(contXml) + opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path'] + + scanOpf!(zipfile, opfPath) + end + rescue Zip::Error => exc + puts 'ERROR processing file "' + fileName + '":' + puts exc.message + puts exc.backtrace + end + end + + protected + def scan_pdf!(file_name) + #puts 'Scanning "' + file_name.to_s + '"...' + + pdf_path = File.expand_path(file_name).to_s + if ! pdf_path.end_with?('.pdf') + puts 'Unexpected internal error: path "' + file_name.to_s + '" does not end with ".pdf".' + return + end + + jpeg_path = pdf_path[0..-5] + '.jpeg' + if File.file?(jpeg_path) + File.open(jpeg_path, 'r') do |is| + @cover = Cover.new(is, jpeg_path, 'image/jpeg') + end + end + end + + + protected + def scanOpf!(zipfile, opfPath) + coverId = nil + + opfXml = zipfile.read(opfPath) + opfDoc = Nokogiri::XML(opfXml) + + #------- + # Author + + grouping = @author.grouping + reading_order = @author.reading_order + sort_order = @author.sort_order + + creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL) + if (creators.length > 0) + creator = creators[0] + if nil != creator + role = creator['opf:role'] + if 'aut' == role + reading_order = creator.content + + file_as = creator['opf:file-as'] + if nil != file_as + sort_order = file_as + end + end + + @author = Author.new(grouping, reading_order, sort_order) + end + end + + #--------------------------------------- + # Title + + titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL) + if titles.length > 0 + title = titles[0] + if nil != title + @title = title.content + end + end + + #--------------------------------------- + # Description + + descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL) + if (descrNodes.length > 0) + descrNode = descrNodes[0] + if nil != descrNode + @description = descrNode.content + end + end + + #--------------------------------------- + # Language + + langNodes = opfDoc.css('dc|language', 'dc' => @@DC_NS_URL) + if (langNodes.length > 0) + langNode = langNodes[0] + if langNode + @language = langNode.content + end + end + + #--------------------------------------- + # Other metadata: series, volume, cover + + metas = opfDoc.css('package metadata meta') + for m in metas + name = m['name'] + content = m['content'] + + if 'calibre:series' == name + # TODO: Dynamically create a new series? + # @series_id = content + elsif 'calibre:series-index' == name + @volume = content + elsif 'cover' == name + coverId = content + #puts 'File ' + @path + ' coverId ' + coverId + end + end + + #--------------- + # Load the cover + + @cover = load_cover(zipfile, opfPath, opfDoc, coverId) + end + + protected + def load_cover(zipfile, opfPath, opfDoc, coverId) + coverFile = nil + if nil == coverId + coverId = "cover-image" + end + + items = opfDoc.css('package manifest item') + for i in items + href = i['href'] + id = i['id'] + mimeType = i['media-type'] + + if coverId == id + entry = zipfile.find_entry(href) + + if nil == entry + # Although the epub standard requires the path to be relative + # to the base of the epub (zip), some books encountered in the + # wild have been found to use a bath relative to the location + # of the opf file. + parts = opfPath.split('/') + opfBasePath = opfPath.split('/')[0..-2].join('/') + coverPath = opfBasePath + '/' + href + entry = zipfile.find_entry(coverPath) + end + + unless entry + # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg' + if href.start_with? '../' + coverPath = href[3..-1] + entry = zipfile.find_entry(coverPath) + end + end + + if nil == entry + puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".' + return nil + else + entry.get_input_stream() do |is| + return Cover.new(is, href, mimeType) + end + end + end + end + return nil + end +end + diff --git a/app/book_loader.rb b/app/book_loader.rb new file mode 100644 index 0000000..5516f04 --- /dev/null +++ b/app/book_loader.rb @@ -0,0 +1,28 @@ + +require_relative 'book' +require_relative 'store' + +class BookLoader + DONE_MARKER = '' + + def initialize(config_file, queue) + @config_file = config_file + @queue = queue + end + + def run + @store = Store.new(@config_file) + @store.connect() + + file = @queue.pop + until file == DONE_MARKER do + book = Book.new(@store) + book.load_from_file!(file) + @store.store_book(book) + + file = @queue.pop + end + + @store.disconnect() + end +end diff --git a/app/classification.rb b/app/classification.rb new file mode 100644 index 0000000..2061e46 --- /dev/null +++ b/app/classification.rb @@ -0,0 +1,75 @@ + +class Classification + def initialize(ddc, lcc, author_grouping, author, title_grouping, title) + @id = nil + @ddc = ddc + @lcc = lcc + @author_grouping = author_grouping + @author = author + @title_grouping = title_grouping + @title = title + end + + def id + @id + end + def id=(value) + @id = value + end + + def ddc + @ddc + end + def lcc + @lcc + end + def author_grouping + @author_grouping + end + def author + @author + end + def + + def inspect + data = [] + + if nil != @ddc + data.push('Dewey=' + @ddc.to_s) + end + if nil != @lcc + data.push('LCC=' + @lcc.to_s) + end + if nil != @author_grouping + data.push('author_grouping=' + @author_grouping.to_s) + end + if nil != @author + data.push('author=' + @author.to_s) + end + if nil != @title_grouping + data.push('title_grouping=' + @title_grouping.to_s) + end + if nil != @title + data.push('title=' + @title) + end + + return '(Classification:' + data.join(',') + ')' + end + + def to_s + inspect + end + + protected + def reading_to_sort_order(reading_order) + sort_order = reading_order + + parts = reading_order.split(' ') + if parts.length > 1 + sort_order = parts[-1] + ', ' + parts[0..-2].join(' ') + end + + return sort_order + end +end + diff --git a/app/cover.rb b/app/cover.rb new file mode 100644 index 0000000..e74c27b --- /dev/null +++ b/app/cover.rb @@ -0,0 +1,57 @@ + +class Cover + attr_reader :path + + def initialize(inputStream, path, mimeType) + if nil != inputStream + @data = inputStream.read + else + @data = nil + end + @path = path + @mimeType = mimeType + end + + def inspect + info = [] + if nil != @data + info.push('size=' + @data.length.to_s) + else + info.push('empty') + end + if nil != @path + info.push('path="' + @path + '"') + end + if nil != @mimeType + info.push('mimeType="' + @mimeType + '"') + end + return '(Cover:' + info.join(',') + ')' + end + + def read_image(filename) + open(filename, 'rb') do |fd| + @data = fd.read() + end + end + + def to_s + return inspect + end + + def write_image(outputDir, filename) + open(outputDir + '/' + filename, 'wb') do |fd| + fd.write(@data) + end + return filename, @mimeType + end + + protected + def getExt + pos = @path.rindex('.') + if nil == pos + return '.img' + end + return @path.slice(pos, @path.length) + end +end + diff --git a/app/extract.rb b/app/extract.rb new file mode 100644 index 0000000..c695941 --- /dev/null +++ b/app/extract.rb @@ -0,0 +1,50 @@ +require 'find' +require 'pathname' + +def exec(cmdline) + puts "$ #{cmdline}" + result = system(cmdline) + unless result + puts "FAILED: #{cmdline}" + end + result +end + +def extract_epub(source_file, source_path, dest_path) + relative_path = source_file[source_path.length .. source_file.length] + dest_file = "#{dest_path}/#{relative_path}" + dest_file = dest_file[0 .. (dest_file.length - 6)] + ".txt" + + required_path = Pathname(dest_file).dirname + unless File.directory? required_path + unless exec("mkdir -p #{required_path}") + return false + end + end + + if File.exist? dest_file + source_time = File.mtime source_file + dest_time = File.mtime dest_file + comp = dest_time <=> source_time + if comp > 0 + return true # Nothing to do, extraction is already up-to-date + end + end + + exec("ebook-convert #{source_file} #{dest_file}") +end + +def scan_dir(source_path, dest_path) + Find.find(source_path) do |f| + if f.match(/.epub\Z/) + unless (f.match(/_bis.epub\Z/) || f.match(/_ter.epub\Z/) || f.match(/_quater.epub\Z/)) + extract_epub(f, source_path, dest_path) + end + end + end +end + +dest_path = ARGV[0] +for arg in ARGV[1 .. ARGV.length] + scan_dir(arg, dest_path) +end diff --git a/app/main.rb b/app/main.rb new file mode 100644 index 0000000..e294b4a --- /dev/null +++ b/app/main.rb @@ -0,0 +1,56 @@ +require_relative 'navigator' +require_relative 'page' +require_relative 'store' +require_relative 'walk_dir' + +@outputDir = 'output' + +@config_file = 'quanlib.ini' +@skip_class = false + +def handleArg(arg) + if arg.start_with?("--config=") + @config_file = arg[9..-1] + puts 'Using config file "' + @config_file + '".' + elsif "--purge" == arg + puts 'Purging database...' + @store.dropSchema() + if File.exists?(@store.unclassified_csv) + File.delete(@store.unclassified_csv) + end + elsif "--skip-class" == arg + puts 'Skipping load of classification table.' + @skip_class = true + elsif arg.start_with?("--") + abort('ERROR: Unrecognized option "' + arg + '".') + end +end + +@store = Store.new(@config_file) +@store.connect() + +for arg in ARGV + handleArg(arg) +end + +@store.init_db(@skip_class) + +for arg in ARGV + if ! arg.start_with?("--") + puts 'Scanning directory "' + arg + '"...' + w = WalkDir.new(@config_file, arg) + w.books + end +end + +@store.cross_reference_lists + +puts 'Creating output...' + +navigator = Navigator.new(@store) +navigator.write_atoz_pages() +navigator.write_series_listing() +navigator.write_dewey() + +@store.disconnect() + diff --git a/app/navigator.rb b/app/navigator.rb new file mode 100644 index 0000000..881b1fa --- /dev/null +++ b/app/navigator.rb @@ -0,0 +1,157 @@ +require_relative 'page' +require_relative 'store' + +class Navigator + def initialize(store) + @store = store + end + + def write_atoz_pages + atoz_counts = {} + + ('A'..'Z').each do |letter| + atoz_counts[letter] = write_authors_starting_with(letter) + end + + content = '

' + ('A'..'Z').each do |letter| + content += ' ' + end + content += '
AuthorBooks
Starting with ' + letter + '' + atoz_counts[letter].to_s + '

' + page = Page.new(@store) + page.output_dir = 'atoz' + page.special = content + page.up = ['../output/index.html', 'Up'] + + page.write_html( [] ) + end + + def write_authors_starting_with(letter) + book_ids = @store.query_books_by_author(letter + '%') + puts 'Authors starting with "' + letter + '": ' + book_ids.length.to_s() + ' books.' + + page = Page.new(@store) + if 'A' != letter + page.back = ['../atoz/output_' + (letter.ord - 1).chr + '.html', 'Prev'] + end + if 'Z' != letter + page.forward = ['../atoz/output_' + (letter.ord + 1).chr + '.html', 'Next'] + end + page.output_dir = 'atoz' + page.index_file = 'output_' + letter + '.html' + page.title = "Authors starting with '" + letter + "'" + page.up = ['../atoz/index.html', 'Up'] + + page.write_html(book_ids) + return book_ids.length + end + + def write_dewey + book_ids = @store.query_books_by_ddc() + puts 'Non-fiction books arranged by Dewey Decimal Classification: ' + book_ids.length.to_s() + ' books.' + + page = Page.new(@store) + page.output_dir = 'ddc' + page.index_file = 'index.html' + page.title = "Non-fiction books arranged by Dewey Decimal call number" + page.up = ['../output/index.html', 'Up'] + + page.write_html(book_ids) + return book_ids.length + end + + def write_series_for_age(age) + series_infos = [] + + series_ids = @store.query_series_by_age(age) + + series_ids.each do |id| + series = @store.load_series(id) + book_ids = @store.query_books_by_series_id(id) + if nil != book_ids and book_ids.length > 0 + series_infos.push( [series, book_ids] ) + end + end + + for idx in 0 .. (series_infos.length - 1) do + #puts series.descr + ': ' + book_ids.length.to_s + ' books.' + + back = nil + fwd = nil + + if idx > 0 + back = series_infos[idx-1] + end + if (idx + 1) < series_infos.length + fwd = series_infos[idx+1] + end + + cur = series_infos[idx] + series = cur[0] + book_ids = cur[1] + + page = Page.new(@store) + if nil != back + page.back = [back[0].key + '.html', 'Back'] + end + if nil != fwd + page.forward = [fwd[0].key + '.html', 'Forward'] + end + page.output_dir = 'series/series_' + age + page.index_file = series.key + '.html' + page.title = 'Series “' + series.descr + '” (' + book_ids.length.to_s + ' books)' + page.up = ['index.html', 'Up'] + + page.write_html(book_ids) + end + + content = '

“' + age + '” Series

' + content += '

' + series_infos.each do |cur| + series = cur[0] + book_ids = cur[1] + + author = series.grouping + letter = author[0] + + content += ' ' + content += '' + content += '' + content += '' + content += '' + content += '' + end + content += '
AuthorSeriesGenreBooks
' + author + '' + series.descr + '' + series.genre + '' + book_ids.length.to_s + '

' + page = Page.new(@store) + page.output_dir = 'series/series_' + age + page.special = content + page.up = ['../index.html', 'Up'] + page.write_html( [] ) + + return series_infos.length + end + + def write_series_listing + ages = ['beginner', 'junior', 'ya', 'adult'] + series_counts = {} + + ages.each do |age| + puts 'Series for "' + age + '" readers...' + + series_counts[age] = write_series_for_age(age) + end + + content = '

Browse Books By Series

' + content += '

' + content += '' + ages.each do |age| + content += '' + end + content += '
AgeNumber of Series
' + age + '' + series_counts[age].to_s + '

' + page = Page.new(@store) + page.output_dir = 'series' + page.special = content + page.up = ['../output/index.html', 'Up'] + page.write_html( [] ) + end +end diff --git a/app/page.rb b/app/page.rb new file mode 100644 index 0000000..638f9ad --- /dev/null +++ b/app/page.rb @@ -0,0 +1,166 @@ +require 'fileutils' + +require_relative 'store' + +class Page + def initialize(store) + @back = nil + @forward = nil + @index_file = 'index.html' + @output_dir = 'output' + @special = nil + @store = store + @title = 'Books' + @up = nil + end + + def back=(value) + @back = value + end + + def forward=(value) + @forward = value + end + + def index_file=(value) + @index_file = value + end + + def navig_link(data) + if (nil == data) + return '' + end + return '' + data[1] + '' + end + + def output_dir=(value) + @output_dir = value + end + + def special=(value) + @special = value + end + + def title=(value) + @title = value + end + + def up=(value) + @up = value + end + + def write_books(fd, book_ids) + for id in book_ids + book = @store.load_book(id) + image = nil + if nil != book.cover + #@imageCount += 1 + #(path, mimeType) = book.cover.write_image(@output_dir, 'image' + @imageCount.to_s) + #image = '' + path = book.cover.path + image = '' + else + image = '(No cover image)' + end + + fd.puts '
' + fd.puts ' ' + + heading = book.heading() + description = book.description() + if nil != description + fd.puts ' ' + else + fd.puts ' ' + end + + fd.puts '
' + image + '' + heading + '

' + heading + '

' + description + '

' + heading + '
' + end + end + + def write_footer(fd) + fd.puts ' ' + end + + def write_header(fd) + fd.puts '

' + @title + '

' + + fd.puts ' ' + end + + def write_html(book_ids) + @imageCount = 0 + + if ! Dir.exist?(@output_dir) + FileUtils.mkdir_p(@output_dir) + end + + open(@output_dir + '/' + @index_file, 'w') do |fd| + fd.puts '' + fd.puts ' ' + fd.puts ' ' + fd.puts ' ' + @title + '' + + write_style_sheet(fd) + + fd.puts ' ' + fd.puts ' ' + + write_header(fd) + + write_special(fd) + write_books(fd, book_ids) + + write_footer(fd) + + fd.puts " " + fd.puts "" + end + end + + def write_special(fd) + if (nil != @special) + fd.puts(@special) + end + end + + def write_style_sheet(fd) + style = +< + div { + display: inline-block; + width: 400px; + margin: 10px; + border 3px solid #73ad21; + } + h1.header { + background: #4040a0; + color: #ffffff; + text-align: center; + } + img.cover-thumb { max-height: 200px; max-width: 200px; } + p.navigator { } + span.popup { } + span.popup:hover { text-decoration: none; background: #cfffff; z-index: 6; } + span.popup span.pop-inner { + border-color:black; + border-style:solid; + border-width:1px; + display: none; + margin: 4px 0 0 0px; + padding: 3px 3px 3px 3px; + position: absolute; + } + span.popup:hover span.pop-inner { + background: #ffffaf; + display: block; + margin: 20px 0 0 0px; + z-index:6; + } + +EOS + fd.puts style + end +end + diff --git a/app/series.rb b/app/series.rb new file mode 100644 index 0000000..0621876 --- /dev/null +++ b/app/series.rb @@ -0,0 +1,87 @@ + +class Series + def initialize(id) + @age = nil + @genre = nil + @grouping = nil + @code = nil + @descr = nil + @id = id + end + + def age + @age + end + + def age=(value) + @age = value + end + + def code + @code + end + + def code=(value) + @code = value + end + + def descr + @descr + end + + def descr=(value) + @descr = value + end + + def genre + @genre + end + + def genre=(value) + @genre = value + end + + def grouping + @grouping + end + + def grouping=(value) + @grouping = value + end + + def id + @id + end + + def inspect + data = [] + if nil != @age + data.push('age="' + @age.inspect + '"') + end + if nil != @code + data.push('code="' + @code.inspect + '"') + end + if nil != @descr + data.push('descr="' + @descr + '"') + end + if nil != @genre + data.push('genre="' + @genre + '"') + end + if nil != @grouping + data.push('grouping="' + @grouping + '"') + end + return '(Series:' + data.join(',') + ')' + end + + def key + if nil != grouping and nil != code + return grouping.to_s + '_' + code.to_s + end + return id.to_s + end + + def to_s + return inspect() + end +end + diff --git a/app/store.rb b/app/store.rb new file mode 100644 index 0000000..1a33ca3 --- /dev/null +++ b/app/store.rb @@ -0,0 +1,655 @@ + +require 'csv' +require 'fileutils' +require 'inifile' +require 'pg' + +require_relative 'series' +require_relative 'tconn' + +class Store + def unclassified_csv + @basePath + '/csv/unclassified.csv' + end + + def initialize(config_file) + @conn = nil + + config = IniFile.load(config_file) + if nil == config + puts 'FATAL: Failed to load config file "' + config_file + '". Aborting initialization.' + return + end + + section = config['database'] + @dbhost = section['host'] + @dbport = 5432 + @dbname = section['name'] + @dbuser = section['user'] + @dbpass = section['pass'] + + section = config['filesystem'] + @basePath = section['basePath'] + end + + def connect + @conn = TimedConn.new(PG.connect(@dbhost, @dbport, '', '', @dbname, @dbuser, @dbpass)) + return @conn + end + + def disconnect + @conn.close() + end + + def construct_efs_path(efs_id) + id_str = sprintf('%010d', efs_id) + path = sprintf('%s/%s/%s/%s', id_str[0,2], id_str[2,2], id_str[4,2], id_str[6,2]) + name = id_str + '.dat' + return path, name + end + + def cross_reference_lists +puts "@@@@@@@@@@@ CROSS-REF START @@@@@@@@@@@" + exec_update("TRUNCATE TABLE Lists CASCADE;", []) + + populate_lists_table +puts "@@@@@@@@@@@ CROSS-REF DONE @@@@@@@@@@@" + end + + def create_schema(skip_class) + create_authors = +< exc + puts 'WARNING: "' + stmt + '" failed: ' + exc.to_s + end + end + end + + def find_all_authors(author_name) + result = [] + + sqlSelect = "SELECT id FROM Authors WHERE grouping=$1;" + args = [author_name] + + @conn.exec_params(sqlSelect, args) do |rs| + rs.each do |row| + result << row['id'] + end + end + + result + end + + def find_author(author) + sqlSelect = "SELECT id FROM Authors WHERE grouping=$1 AND reading=$2 AND sort=$3;" + args = [author.grouping, author.reading_order, author.sort_order] + + @conn.exec_params(sqlSelect, args) do |rs| + if rs.ntuples > 0 + return rs[0]['id'] + end + end + + return nil + end + + def init_db(skip_class) + sql = "SELECT 1 FROM pg_tables WHERE tableowner='quanlib' AND tablename='books'" + found = false + @conn.exec(sql).each do |row| + found = true + end + + if ! found + create_schema(skip_class) + end + end + + def load_author(id) + sqlSelect = "SELECT grouping, reading, sort FROM Authors WHERE id=$1" + args = [id] + @conn.exec_params(sqlSelect, args) do |rs| + if rs.ntuples != 1 + raise "Expected 1 row for " + id + " but got " + rs.ntuples + ": " + sqlSelect + end + row = rs[0] + author = Author.new(row['grouping'], row['reading'], row['sort']) + return author + end + return nil + end + + def store_author(author) + id = find_author(author) + if nil == id + id = next_id('author_id') + sqlInsert = "INSERT INTO Authors(id, grouping, reading, sort) VALUES ($1, $2, $3, $4);" + args = [id, author.grouping, author.reading_order, author.sort_order] + begin + rs = @conn.exec_params(sqlInsert, args) + rescue Exception => e + puts sqlInsert + ": " + args.inspect() + puts e.message + puts $@ + ensure + rs.clear if rs + end + end + return id + end + + def load_book(id) + sql = "SELECT author, classification, cover, description, language, path, series, title, volume FROM Books WHERE id=$1;" + book = nil + + begin + @conn.exec_params(sql, [id]) do |rs| + if 1 != rs.ntuples + raise 'Expected one row in Books for id ' + id + ', but found ' + rs.length + '.' + return nil + end + row = rs[0] + + book = Book.new(self) + book.author = load_author(row['author']) + book.classification_id = row['classification'] + book.cover = load_cover(row['cover']) + book.description = row['description'] + book.language = row['language'] + book.path = row['path'] + book.series_id = row['series'] + book.title = row['title'] + book.volume = row['volume'] + end + rescue Exception => e + puts sql + ": " + id + puts e.message + puts $@ + end + + return book + end + + def store_book(book) + sql = "INSERT INTO Books (id, arrived, author, classification, cover, description, language, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11);" + + book_id = next_id('book_id') + + author_id = store_author(book.author) + (efs_id, mime_type) = store_cover(book) + + args = [book_id, book.arrived, author_id, book.classification_id, efs_id, book.description, book.language, book.path, book.series_id, book.title, book.volume] + + begin + rs = @conn.exec_params(sql, args) + rescue Exception => e + puts sql + ": " + args.inspect() + puts e.message + puts $@ + ensure + rs.clear if rs + end + + return book_id + end + + def find_classification(author_grouping, title_grouping) + sql = "SELECT id FROM Classifications WHERE author_grouping = $1 AND title_grouping = $2;" + @conn.exec_params(sql, [author_grouping, title_grouping]) do |rs| + if rs.ntuples > 0 + return rs[0]['id'] + end + end + return nil + end + + def load_classification(id) + sql = "SELECT ddc, lcc, author_grouping, author_sort, title_grouping, title " + sql += " FROM Classifications WHERE id=$1" + @conn.exec_params(sql, [id]) do |rs| + if rs.ntuples > 0 + row = rs[0] + ddc = row['ddc'] + lcc = row['lcc'] + author_grouping = row['author_grouping'] + author = row['author_sort'] + title_grouping = row['title_grouping'] + title = row['title'] + + result = Classification.new(ddc, lcc, author_grouping, author, title_grouping, title) + result.id = id + return result + end + end + + return nil + end + + def load_cover(id) + if nil == id + return nil + end + + mime_type = 'application/octet-stream' + + sql = "SELECT mimeType FROM Efs WHERE id=$1" + @conn.exec_params(sql, [id]) do |rs| + if rs.ntuples != 1 + raise "Expected one row but got " + rs.ntuples + ": " + sql + ": " + id + end + mime_type = rs[0]['mimeType'] + end + + (efspath, efsname) = construct_efs_path(id) + + fullpath = @basePath + '/efs/' + efspath + '/' + efsname + + return Cover.new(nil, fullpath, mime_type) + end + + def store_cover(book) + efs_id = nil + cover = book.cover() + + if nil == cover + return nil + end + + @conn.exec("SELECT nextval('efs_id')") do |rs| + efs_id = rs[0]['nextval'] + end + + if nil == efs_id + return nil + end + + (efspath, efsname) = construct_efs_path(efs_id) + + efspath = @basePath + '/efs/' + efspath + + FileUtils.mkdir_p(efspath) + + (filepath, mimetype) = cover.write_image(efspath, efsname) + + sql = "INSERT INTO efs VALUES ($1, $2)" + begin + rs = @conn.exec_params(sql, [efs_id, mimetype]) + rescue Exception => e + puts sql + ": " + efs_id + ", " + mimetype + puts e.message + puts $@ + ensure + rs.clear if rs + end + + return efs_id, mimetype + end + + def exec_id_query(sql, args) + ids = [] + @conn.exec_params(sql, args) do |rs| + rs.each do |row| + ids.push(row['id']) + end + end + return ids + end + + def exec_update(sql, args) + begin + rs = @conn.exec_params(sql, args) + rescue Exception => e + puts sql + ": " + args.inspect() + puts e.message + puts $@ + ensure + rs.clear if rs + end + end + + def next_id(seq_name) + id = nil + @conn.exec("SELECT nextval('" + seq_name + "');") do |rs| + id = rs[0]['nextval'] + end + return id + end + + def get_series(grouping, code) + if nil == code + return nil + end + + sql = "SELECT id FROM Series WHERE grouping=$1 AND code=$2;" + args = [grouping, code] + @conn.exec_params(sql, args).each do |row| + return row['id'] + end + + # TODO: Create a new series object here? + puts 'WARNING: series("' + grouping + '", "' + code + '") not found.' + return nil + end + + def load_series(id) + sql = "SELECT age,genre,grouping,code,descr FROM Series WHERE id=$1;" + args = [id] + @conn.exec_params(sql, args) do |rs| + if rs.ntuples > 0 + row = rs[0] + series = Series.new(id) + series.age = row['age'] + series.genre = row['genre'] + series.grouping = row['grouping'] + series.code = row['code'] + series.descr = row['descr'] + return series + end + end + return nil + end + + def populate_classifications_table + puts "Populating the Classifications table..." + first = true + CSV.foreach(@basePath + '/csv/class.csv') do |row| + if first + # skip the header row + first = false + else + + # First, add a row to the Classifications table + + id = next_id('classification_id') + ddc = row[0] + lcc = row[1] + author_grouping = row[2] + author_sort = row[3] + title_grouping = row[4] + title = row[5] + + sqlInsert = "INSERT INTO Classifications (id, ddc, lcc, author_grouping, author_sort, title_grouping, title) VALUES ($1, $2, $3, $4, $5, $6, $7);" + args = [id, ddc, lcc, author_grouping, author_sort, title_grouping, title] + exec_update(sqlInsert, args) + + # Second, link up with the appropriate FAST table entries + + fast = [] + input = row[6] + if input.length > 0 + fast = input.split(';') + end + + fast.each do |fast_id| + sqlInsert = "INSERT INTO FAST_Classifications (fast, classification) VALUES ($1, $2);" + args = [fast_id, id] + exec_update(sqlInsert, args) + end + end + end + end + + def populate_fast_table + puts "Populating the FAST table..." + first = true + CSV.foreach(@basePath + '/csv/fast.csv') do |row| + if first + first = false # skip the header row + else + id = row[0] + descr = row[1] + sqlInsert = "INSERT INTO FAST (id, descr) VALUES ($1, $2);" + exec_update(sqlInsert, [id, descr]) + end + end + end + + def populate_lists_table + puts "Populating the Lists table..." + + CSV.foreach(@basePath + "/csv/lists.csv", headers: true) do |row| + author_ids = find_all_authors(row['author']) + if author_ids.empty? + specification = [row['age'], row['category'], row['code'], row['year'], row['author'], row['title']] + .map { |x| x.inspect } + .join(', ') + + puts "WARNING: For list entry (#{specification}), no such author was found." + + next + end + + sqlInsert = %Q( + INSERT INTO Lists (id, age, category, code, year, author, title) + VALUES ($1, $2, $3, $4, $5, $6, $7); + ) + author_ids.each do |author_id| + list_id = next_id('list_id') + args = [list_id, row['age'], row['category'], row['code'], row['year'], author_id, row['title']] + exec_update(sqlInsert, args) + + update_lists_books_table(list_id, author_id, row['title']) + end + end + end + + # Scan for books that match this Lists entry, and add any matches to the Lists_Books associative table + def update_lists_books_table(list_id, author_id, title) + title_pattern = Book.grouping_for_title(title).gsub('_', '%') + sqlSelect = "SELECT id FROM Books WHERE author = $1 AND title LIKE $2;" + args = [author_id, title_pattern] + + @conn.exec_params(sqlSelect, args) do |rs| + rs.each do |row| + sqlInsert = "INSERT INTO Lists_Books (list, book) VALUES ($1, $2)" + args = [list_id, row['id']] + exec_update(sqlInsert, args) + end + end + end + + def populate_series_table + puts "Populating the Series table..." + CSV.foreach(@basePath + '/csv/series.csv') do |row| + id = next_id('series_id') + sqlInsert = "INSERT INTO Series (id, age, genre, grouping, code, descr) VALUES ($1, $2, $3, $4, $5, $6);" + args = [id] + row + exec_update(sqlInsert, args) + end + end + + def query_books_by_author(pattern) + sql = +< 0) + result.concat(sub) + end + elsif (! File.directory?(fullName)) + result.push(fullName) + end + end + return result + end + + def num_threads + # TOOD: make this (auto?) configurable + 12 + end +end diff --git a/author.rb b/author.rb deleted file mode 100644 index fb2003b..0000000 --- a/author.rb +++ /dev/null @@ -1,57 +0,0 @@ - -class Author - def initialize(grouping, reading_order, sort_order) - @grouping = grouping - @reading_order = reading_order - @sort_order = sort_order - - if (nil == sort_order) || ('Unknown' == sort_order) - @sort_order = reading_to_sort_order(reading_order) - end - end - - def grouping - @grouping - end - - def reading_order - @reading_order - end - - def sort_order - @sort_order - end - - def inspect - result = '(Author:' - if nil != @grouping - result += ' grouping="' + @grouping + '"' - end - if nil != @reading_order - result += ' reading_order="' + @reading_order + '"' - end - if nil != @sort_order - result += ' sort_order="' + @sort_order + '"' - end - result += ')' - - return result - end - - def to_s - inspect - end - - protected - def reading_to_sort_order(reading_order) - sort_order = reading_order - - parts = reading_order.split(' ') - if parts.length > 1 - sort_order = parts[-1] + ', ' + parts[0..-2].join(' ') - end - - return sort_order - end -end - diff --git a/book.rb b/book.rb deleted file mode 100644 index 2b93f4b..0000000 --- a/book.rb +++ /dev/null @@ -1,421 +0,0 @@ - -require 'nokogiri' -require 'rubygems' -require 'zip' - -require_relative 'author' -require_relative 'classification' -require_relative 'cover' -require_relative 'store' - -class Book - @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/' - @@SERIES_AND_VOLUME_REGEX = /^([A-Z]+)([0-9]+(\.[0-9]+)?)$/ - - attr_accessor :arrived - attr_accessor :author - attr_accessor :classification_id - attr_accessor :cover - attr_accessor :description - attr_accessor :language - attr_accessor :path - attr_accessor :series_id - attr_accessor :title - attr_accessor :volume - - def initialize(store) - @store = store - end - - def load_from_file!(fileName) - @path = fileName - parse_file_name!(fileName) - end - - def self.can_handle?(fileName) - if nil == fileName - return false - end - - #puts "Filename: " + fileName.to_s - lowerName = fileName.downcase() - - if lowerName.end_with?(".epub") - return true - end - - if lowerName.end_with?(".pdf") - return true - end - - return false - end - - def self.grouping_for_title(title) - result = title - - '\'",!#'.split('').each do |c| - result = result.gsub(c, '-') - end - result = result.gsub(/: */, '--') - result = result.gsub(' ', '_') - - result - end - - def heading - result = [] - - if nil != @title - result.push('' + @title + '') - else - result.push('(Unknown title)') - end - if nil != @author - result.push('by ' + @author.reading_order + '') - end - - seriesInfo = [] - series = @store.load_series(@series_id) - if nil != series and nil != series.descr - seriesInfo.push(series.descr.to_s) - end - if nil != @volume - seriesInfo.push(@volume.to_s) - end - if seriesInfo.length > 0 - result.push(seriesInfo.join(' ')) - end - - classification = nil - if nil != @classification_id - classification = @store.load_classification(@classification_id) - end - if nil != classification - if nil != classification.ddc - result.push('Dewey: ' + classification.ddc.to_s) - end - if nil != classification.lcc - result.push('LCC: ' + classification.lcc.to_s) - end - end - - return result.join('
') - end - - def inspect - data = [] - if nil != @author - data.push('author="' + @author.inspect + '"') - end - if nil != @series_id - data.push('series_id="' + @series_id.to_s() + '"') - end - if nil != @volume - data.push('volume="' + @volume + '"') - end - if nil != @title - data.push('title="' + @title + '"') - end - if nil != @cover - data.push(@cover.inspect()) - end - if nil != @path - data.push('path="' + @path + '"') - end - return '(Book:' + data.join(',') + ')' - end - - def to_s - return inspect() - end - - def title_grouping - if nil == @path - return nil - end - - return File.basename(@path, '.*') - end - - protected - def isUpper?(c) - return /[[:upper:]]/.match(c) - end - - protected - def massage_author(input) - if nil == input - return nil - end - - reading_order = "" - input.each_char do |c| - if isUpper?(c) and (reading_order.length > 0) - reading_order += " " - end - reading_order += c - end - - return reading_order - end - - # Returns (series, volumeNo, titleText) - protected - def processTitle(input) - if nil == input - return nil - end - - arr = input.split('_') - - series = nil - vol = nil - - first = arr[0] - matchData = (arr[0]).match(@@SERIES_AND_VOLUME_REGEX) - if nil != matchData - capt = matchData.captures - series = capt[0] - vol = capt[1] - arr.shift - end - - pos = arr[-1].rindex('.') - if nil != pos - arr[-1] = arr[-1].slice(0, pos) - end - - title = arr.join(' ') - - bare_title_grouping = title_grouping - .split('_') - .reject { |part| part.match(@@SERIES_AND_VOLUME_REGEX) } - .join('_') - - unless bare_title_grouping == Book.grouping_for_title(title) - puts "WARNING: title_grouping mismatch: #{bare_title_grouping.inspect} vs. #{Book.grouping_for_title(title).inspect}" - end - - return series, vol, title - end - - protected - def parse_file_name!(file_name) - category = nil # e.g., non-fiction, fan-fiction - grouping = '' - - parts = file_name.split('/') - (series_code, @volume, @title) = processTitle(parts[-1]) - if parts.length > 1 - grouping = parts[-2] - reading_order = massage_author(grouping) - sort_order = nil - @author = Author.new(grouping, reading_order, sort_order) - @series_id = @store.get_series(grouping, series_code) - end - if parts.length > 2 - category = parts[-3] - end - - lc_file_name = file_name.downcase - if lc_file_name.end_with?(".epub") - scanEpub!(file_name) - elsif lc_file_name.end_with?(".pdf") - scan_pdf!(file_name) - end - - @arrived = File.ctime(file_name) - - @classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*')) - - # TODO: Fix horrible hard-coded strings and paths - if ('01_nonfic' == category) && (nil == classification_id) - open(Store.unclassified_csv, 'a') do |fd| - fd.puts('"' + grouping.to_s + '","' + path + '"') - end - end - end - - protected - def scanEpub!(fileName) - #puts 'Scanning "' + fileName.to_s + '"...' - begin - Zip.warn_invalid_date = false - Zip::File.open(fileName) do |zipfile| - entry = zipfile.find_entry('META-INF/container.xml') - if nil == entry - puts 'No META-INF/container.xml, skipping book ' + fileName - return - end - contXml = zipfile.read('META-INF/container.xml') - contDoc = Nokogiri::XML(contXml) - opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path'] - - scanOpf!(zipfile, opfPath) - end - rescue Zip::Error => exc - puts 'ERROR processing file "' + fileName + '":' - puts exc.message - puts exc.backtrace - end - end - - protected - def scan_pdf!(file_name) - #puts 'Scanning "' + file_name.to_s + '"...' - - pdf_path = File.expand_path(file_name).to_s - if ! pdf_path.end_with?('.pdf') - puts 'Unexpected internal error: path "' + file_name.to_s + '" does not end with ".pdf".' - return - end - - jpeg_path = pdf_path[0..-5] + '.jpeg' - if File.file?(jpeg_path) - File.open(jpeg_path, 'r') do |is| - @cover = Cover.new(is, jpeg_path, 'image/jpeg') - end - end - end - - - protected - def scanOpf!(zipfile, opfPath) - coverId = nil - - opfXml = zipfile.read(opfPath) - opfDoc = Nokogiri::XML(opfXml) - - #------- - # Author - - grouping = @author.grouping - reading_order = @author.reading_order - sort_order = @author.sort_order - - creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL) - if (creators.length > 0) - creator = creators[0] - if nil != creator - role = creator['opf:role'] - if 'aut' == role - reading_order = creator.content - - file_as = creator['opf:file-as'] - if nil != file_as - sort_order = file_as - end - end - - @author = Author.new(grouping, reading_order, sort_order) - end - end - - #--------------------------------------- - # Title - - titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL) - if titles.length > 0 - title = titles[0] - if nil != title - @title = title.content - end - end - - #--------------------------------------- - # Description - - descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL) - if (descrNodes.length > 0) - descrNode = descrNodes[0] - if nil != descrNode - @description = descrNode.content - end - end - - #--------------------------------------- - # Language - - langNodes = opfDoc.css('dc|language', 'dc' => @@DC_NS_URL) - if (langNodes.length > 0) - langNode = langNodes[0] - if langNode - @language = langNode.content - end - end - - #--------------------------------------- - # Other metadata: series, volume, cover - - metas = opfDoc.css('package metadata meta') - for m in metas - name = m['name'] - content = m['content'] - - if 'calibre:series' == name - # TODO: Dynamically create a new series? - # @series_id = content - elsif 'calibre:series-index' == name - @volume = content - elsif 'cover' == name - coverId = content - #puts 'File ' + @path + ' coverId ' + coverId - end - end - - #--------------- - # Load the cover - - @cover = load_cover(zipfile, opfPath, opfDoc, coverId) - end - - protected - def load_cover(zipfile, opfPath, opfDoc, coverId) - coverFile = nil - if nil == coverId - coverId = "cover-image" - end - - items = opfDoc.css('package manifest item') - for i in items - href = i['href'] - id = i['id'] - mimeType = i['media-type'] - - if coverId == id - entry = zipfile.find_entry(href) - - if nil == entry - # Although the epub standard requires the path to be relative - # to the base of the epub (zip), some books encountered in the - # wild have been found to use a bath relative to the location - # of the opf file. - parts = opfPath.split('/') - opfBasePath = opfPath.split('/')[0..-2].join('/') - coverPath = opfBasePath + '/' + href - entry = zipfile.find_entry(coverPath) - end - - unless entry - # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg' - if href.start_with? '../' - coverPath = href[3..-1] - entry = zipfile.find_entry(coverPath) - end - end - - if nil == entry - puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".' - return nil - else - entry.get_input_stream() do |is| - return Cover.new(is, href, mimeType) - end - end - end - end - return nil - end -end - diff --git a/book_loader.rb b/book_loader.rb deleted file mode 100644 index 5516f04..0000000 --- a/book_loader.rb +++ /dev/null @@ -1,28 +0,0 @@ - -require_relative 'book' -require_relative 'store' - -class BookLoader - DONE_MARKER = '' - - def initialize(config_file, queue) - @config_file = config_file - @queue = queue - end - - def run - @store = Store.new(@config_file) - @store.connect() - - file = @queue.pop - until file == DONE_MARKER do - book = Book.new(@store) - book.load_from_file!(file) - @store.store_book(book) - - file = @queue.pop - end - - @store.disconnect() - end -end diff --git a/classification.rb b/classification.rb deleted file mode 100644 index 2061e46..0000000 --- a/classification.rb +++ /dev/null @@ -1,75 +0,0 @@ - -class Classification - def initialize(ddc, lcc, author_grouping, author, title_grouping, title) - @id = nil - @ddc = ddc - @lcc = lcc - @author_grouping = author_grouping - @author = author - @title_grouping = title_grouping - @title = title - end - - def id - @id - end - def id=(value) - @id = value - end - - def ddc - @ddc - end - def lcc - @lcc - end - def author_grouping - @author_grouping - end - def author - @author - end - def - - def inspect - data = [] - - if nil != @ddc - data.push('Dewey=' + @ddc.to_s) - end - if nil != @lcc - data.push('LCC=' + @lcc.to_s) - end - if nil != @author_grouping - data.push('author_grouping=' + @author_grouping.to_s) - end - if nil != @author - data.push('author=' + @author.to_s) - end - if nil != @title_grouping - data.push('title_grouping=' + @title_grouping.to_s) - end - if nil != @title - data.push('title=' + @title) - end - - return '(Classification:' + data.join(',') + ')' - end - - def to_s - inspect - end - - protected - def reading_to_sort_order(reading_order) - sort_order = reading_order - - parts = reading_order.split(' ') - if parts.length > 1 - sort_order = parts[-1] + ', ' + parts[0..-2].join(' ') - end - - return sort_order - end -end - diff --git a/cover.rb b/cover.rb deleted file mode 100644 index e74c27b..0000000 --- a/cover.rb +++ /dev/null @@ -1,57 +0,0 @@ - -class Cover - attr_reader :path - - def initialize(inputStream, path, mimeType) - if nil != inputStream - @data = inputStream.read - else - @data = nil - end - @path = path - @mimeType = mimeType - end - - def inspect - info = [] - if nil != @data - info.push('size=' + @data.length.to_s) - else - info.push('empty') - end - if nil != @path - info.push('path="' + @path + '"') - end - if nil != @mimeType - info.push('mimeType="' + @mimeType + '"') - end - return '(Cover:' + info.join(',') + ')' - end - - def read_image(filename) - open(filename, 'rb') do |fd| - @data = fd.read() - end - end - - def to_s - return inspect - end - - def write_image(outputDir, filename) - open(outputDir + '/' + filename, 'wb') do |fd| - fd.write(@data) - end - return filename, @mimeType - end - - protected - def getExt - pos = @path.rindex('.') - if nil == pos - return '.img' - end - return @path.slice(pos, @path.length) - end -end - diff --git a/extract.rb b/extract.rb deleted file mode 100644 index c695941..0000000 --- a/extract.rb +++ /dev/null @@ -1,50 +0,0 @@ -require 'find' -require 'pathname' - -def exec(cmdline) - puts "$ #{cmdline}" - result = system(cmdline) - unless result - puts "FAILED: #{cmdline}" - end - result -end - -def extract_epub(source_file, source_path, dest_path) - relative_path = source_file[source_path.length .. source_file.length] - dest_file = "#{dest_path}/#{relative_path}" - dest_file = dest_file[0 .. (dest_file.length - 6)] + ".txt" - - required_path = Pathname(dest_file).dirname - unless File.directory? required_path - unless exec("mkdir -p #{required_path}") - return false - end - end - - if File.exist? dest_file - source_time = File.mtime source_file - dest_time = File.mtime dest_file - comp = dest_time <=> source_time - if comp > 0 - return true # Nothing to do, extraction is already up-to-date - end - end - - exec("ebook-convert #{source_file} #{dest_file}") -end - -def scan_dir(source_path, dest_path) - Find.find(source_path) do |f| - if f.match(/.epub\Z/) - unless (f.match(/_bis.epub\Z/) || f.match(/_ter.epub\Z/) || f.match(/_quater.epub\Z/)) - extract_epub(f, source_path, dest_path) - end - end - end -end - -dest_path = ARGV[0] -for arg in ARGV[1 .. ARGV.length] - scan_dir(arg, dest_path) -end diff --git a/main.rb b/main.rb deleted file mode 100644 index e294b4a..0000000 --- a/main.rb +++ /dev/null @@ -1,56 +0,0 @@ -require_relative 'navigator' -require_relative 'page' -require_relative 'store' -require_relative 'walk_dir' - -@outputDir = 'output' - -@config_file = 'quanlib.ini' -@skip_class = false - -def handleArg(arg) - if arg.start_with?("--config=") - @config_file = arg[9..-1] - puts 'Using config file "' + @config_file + '".' - elsif "--purge" == arg - puts 'Purging database...' - @store.dropSchema() - if File.exists?(@store.unclassified_csv) - File.delete(@store.unclassified_csv) - end - elsif "--skip-class" == arg - puts 'Skipping load of classification table.' - @skip_class = true - elsif arg.start_with?("--") - abort('ERROR: Unrecognized option "' + arg + '".') - end -end - -@store = Store.new(@config_file) -@store.connect() - -for arg in ARGV - handleArg(arg) -end - -@store.init_db(@skip_class) - -for arg in ARGV - if ! arg.start_with?("--") - puts 'Scanning directory "' + arg + '"...' - w = WalkDir.new(@config_file, arg) - w.books - end -end - -@store.cross_reference_lists - -puts 'Creating output...' - -navigator = Navigator.new(@store) -navigator.write_atoz_pages() -navigator.write_series_listing() -navigator.write_dewey() - -@store.disconnect() - diff --git a/navigator.rb b/navigator.rb deleted file mode 100644 index 881b1fa..0000000 --- a/navigator.rb +++ /dev/null @@ -1,157 +0,0 @@ -require_relative 'page' -require_relative 'store' - -class Navigator - def initialize(store) - @store = store - end - - def write_atoz_pages - atoz_counts = {} - - ('A'..'Z').each do |letter| - atoz_counts[letter] = write_authors_starting_with(letter) - end - - content = '

' - ('A'..'Z').each do |letter| - content += ' ' - end - content += '
AuthorBooks
Starting with ' + letter + '' + atoz_counts[letter].to_s + '

' - page = Page.new(@store) - page.output_dir = 'atoz' - page.special = content - page.up = ['../output/index.html', 'Up'] - - page.write_html( [] ) - end - - def write_authors_starting_with(letter) - book_ids = @store.query_books_by_author(letter + '%') - puts 'Authors starting with "' + letter + '": ' + book_ids.length.to_s() + ' books.' - - page = Page.new(@store) - if 'A' != letter - page.back = ['../atoz/output_' + (letter.ord - 1).chr + '.html', 'Prev'] - end - if 'Z' != letter - page.forward = ['../atoz/output_' + (letter.ord + 1).chr + '.html', 'Next'] - end - page.output_dir = 'atoz' - page.index_file = 'output_' + letter + '.html' - page.title = "Authors starting with '" + letter + "'" - page.up = ['../atoz/index.html', 'Up'] - - page.write_html(book_ids) - return book_ids.length - end - - def write_dewey - book_ids = @store.query_books_by_ddc() - puts 'Non-fiction books arranged by Dewey Decimal Classification: ' + book_ids.length.to_s() + ' books.' - - page = Page.new(@store) - page.output_dir = 'ddc' - page.index_file = 'index.html' - page.title = "Non-fiction books arranged by Dewey Decimal call number" - page.up = ['../output/index.html', 'Up'] - - page.write_html(book_ids) - return book_ids.length - end - - def write_series_for_age(age) - series_infos = [] - - series_ids = @store.query_series_by_age(age) - - series_ids.each do |id| - series = @store.load_series(id) - book_ids = @store.query_books_by_series_id(id) - if nil != book_ids and book_ids.length > 0 - series_infos.push( [series, book_ids] ) - end - end - - for idx in 0 .. (series_infos.length - 1) do - #puts series.descr + ': ' + book_ids.length.to_s + ' books.' - - back = nil - fwd = nil - - if idx > 0 - back = series_infos[idx-1] - end - if (idx + 1) < series_infos.length - fwd = series_infos[idx+1] - end - - cur = series_infos[idx] - series = cur[0] - book_ids = cur[1] - - page = Page.new(@store) - if nil != back - page.back = [back[0].key + '.html', 'Back'] - end - if nil != fwd - page.forward = [fwd[0].key + '.html', 'Forward'] - end - page.output_dir = 'series/series_' + age - page.index_file = series.key + '.html' - page.title = 'Series “' + series.descr + '” (' + book_ids.length.to_s + ' books)' - page.up = ['index.html', 'Up'] - - page.write_html(book_ids) - end - - content = '

“' + age + '” Series

' - content += '

' - series_infos.each do |cur| - series = cur[0] - book_ids = cur[1] - - author = series.grouping - letter = author[0] - - content += ' ' - content += '' - content += '' - content += '' - content += '' - content += '' - end - content += '
AuthorSeriesGenreBooks
' + author + '' + series.descr + '' + series.genre + '' + book_ids.length.to_s + '

' - page = Page.new(@store) - page.output_dir = 'series/series_' + age - page.special = content - page.up = ['../index.html', 'Up'] - page.write_html( [] ) - - return series_infos.length - end - - def write_series_listing - ages = ['beginner', 'junior', 'ya', 'adult'] - series_counts = {} - - ages.each do |age| - puts 'Series for "' + age + '" readers...' - - series_counts[age] = write_series_for_age(age) - end - - content = '

Browse Books By Series

' - content += '

' - content += '' - ages.each do |age| - content += '' - end - content += '
AgeNumber of Series
' + age + '' + series_counts[age].to_s + '

' - page = Page.new(@store) - page.output_dir = 'series' - page.special = content - page.up = ['../output/index.html', 'Up'] - page.write_html( [] ) - end -end diff --git a/page.rb b/page.rb deleted file mode 100644 index 638f9ad..0000000 --- a/page.rb +++ /dev/null @@ -1,166 +0,0 @@ -require 'fileutils' - -require_relative 'store' - -class Page - def initialize(store) - @back = nil - @forward = nil - @index_file = 'index.html' - @output_dir = 'output' - @special = nil - @store = store - @title = 'Books' - @up = nil - end - - def back=(value) - @back = value - end - - def forward=(value) - @forward = value - end - - def index_file=(value) - @index_file = value - end - - def navig_link(data) - if (nil == data) - return '' - end - return '' + data[1] + '' - end - - def output_dir=(value) - @output_dir = value - end - - def special=(value) - @special = value - end - - def title=(value) - @title = value - end - - def up=(value) - @up = value - end - - def write_books(fd, book_ids) - for id in book_ids - book = @store.load_book(id) - image = nil - if nil != book.cover - #@imageCount += 1 - #(path, mimeType) = book.cover.write_image(@output_dir, 'image' + @imageCount.to_s) - #image = '' - path = book.cover.path - image = '' - else - image = '(No cover image)' - end - - fd.puts '
' - fd.puts ' ' - - heading = book.heading() - description = book.description() - if nil != description - fd.puts ' ' - else - fd.puts ' ' - end - - fd.puts '
' + image + '' + heading + '

' + heading + '

' + description + '

' + heading + '
' - end - end - - def write_footer(fd) - fd.puts ' ' - end - - def write_header(fd) - fd.puts '

' + @title + '

' - - fd.puts ' ' - end - - def write_html(book_ids) - @imageCount = 0 - - if ! Dir.exist?(@output_dir) - FileUtils.mkdir_p(@output_dir) - end - - open(@output_dir + '/' + @index_file, 'w') do |fd| - fd.puts '' - fd.puts ' ' - fd.puts ' ' - fd.puts ' ' + @title + '' - - write_style_sheet(fd) - - fd.puts ' ' - fd.puts ' ' - - write_header(fd) - - write_special(fd) - write_books(fd, book_ids) - - write_footer(fd) - - fd.puts " " - fd.puts "" - end - end - - def write_special(fd) - if (nil != @special) - fd.puts(@special) - end - end - - def write_style_sheet(fd) - style = -< - div { - display: inline-block; - width: 400px; - margin: 10px; - border 3px solid #73ad21; - } - h1.header { - background: #4040a0; - color: #ffffff; - text-align: center; - } - img.cover-thumb { max-height: 200px; max-width: 200px; } - p.navigator { } - span.popup { } - span.popup:hover { text-decoration: none; background: #cfffff; z-index: 6; } - span.popup span.pop-inner { - border-color:black; - border-style:solid; - border-width:1px; - display: none; - margin: 4px 0 0 0px; - padding: 3px 3px 3px 3px; - position: absolute; - } - span.popup:hover span.pop-inner { - background: #ffffaf; - display: block; - margin: 20px 0 0 0px; - z-index:6; - } - -EOS - fd.puts style - end -end - diff --git a/series.rb b/series.rb deleted file mode 100644 index 0621876..0000000 --- a/series.rb +++ /dev/null @@ -1,87 +0,0 @@ - -class Series - def initialize(id) - @age = nil - @genre = nil - @grouping = nil - @code = nil - @descr = nil - @id = id - end - - def age - @age - end - - def age=(value) - @age = value - end - - def code - @code - end - - def code=(value) - @code = value - end - - def descr - @descr - end - - def descr=(value) - @descr = value - end - - def genre - @genre - end - - def genre=(value) - @genre = value - end - - def grouping - @grouping - end - - def grouping=(value) - @grouping = value - end - - def id - @id - end - - def inspect - data = [] - if nil != @age - data.push('age="' + @age.inspect + '"') - end - if nil != @code - data.push('code="' + @code.inspect + '"') - end - if nil != @descr - data.push('descr="' + @descr + '"') - end - if nil != @genre - data.push('genre="' + @genre + '"') - end - if nil != @grouping - data.push('grouping="' + @grouping + '"') - end - return '(Series:' + data.join(',') + ')' - end - - def key - if nil != grouping and nil != code - return grouping.to_s + '_' + code.to_s - end - return id.to_s - end - - def to_s - return inspect() - end -end - diff --git a/store.rb b/store.rb deleted file mode 100644 index 1a33ca3..0000000 --- a/store.rb +++ /dev/null @@ -1,655 +0,0 @@ - -require 'csv' -require 'fileutils' -require 'inifile' -require 'pg' - -require_relative 'series' -require_relative 'tconn' - -class Store - def unclassified_csv - @basePath + '/csv/unclassified.csv' - end - - def initialize(config_file) - @conn = nil - - config = IniFile.load(config_file) - if nil == config - puts 'FATAL: Failed to load config file "' + config_file + '". Aborting initialization.' - return - end - - section = config['database'] - @dbhost = section['host'] - @dbport = 5432 - @dbname = section['name'] - @dbuser = section['user'] - @dbpass = section['pass'] - - section = config['filesystem'] - @basePath = section['basePath'] - end - - def connect - @conn = TimedConn.new(PG.connect(@dbhost, @dbport, '', '', @dbname, @dbuser, @dbpass)) - return @conn - end - - def disconnect - @conn.close() - end - - def construct_efs_path(efs_id) - id_str = sprintf('%010d', efs_id) - path = sprintf('%s/%s/%s/%s', id_str[0,2], id_str[2,2], id_str[4,2], id_str[6,2]) - name = id_str + '.dat' - return path, name - end - - def cross_reference_lists -puts "@@@@@@@@@@@ CROSS-REF START @@@@@@@@@@@" - exec_update("TRUNCATE TABLE Lists CASCADE;", []) - - populate_lists_table -puts "@@@@@@@@@@@ CROSS-REF DONE @@@@@@@@@@@" - end - - def create_schema(skip_class) - create_authors = -< exc - puts 'WARNING: "' + stmt + '" failed: ' + exc.to_s - end - end - end - - def find_all_authors(author_name) - result = [] - - sqlSelect = "SELECT id FROM Authors WHERE grouping=$1;" - args = [author_name] - - @conn.exec_params(sqlSelect, args) do |rs| - rs.each do |row| - result << row['id'] - end - end - - result - end - - def find_author(author) - sqlSelect = "SELECT id FROM Authors WHERE grouping=$1 AND reading=$2 AND sort=$3;" - args = [author.grouping, author.reading_order, author.sort_order] - - @conn.exec_params(sqlSelect, args) do |rs| - if rs.ntuples > 0 - return rs[0]['id'] - end - end - - return nil - end - - def init_db(skip_class) - sql = "SELECT 1 FROM pg_tables WHERE tableowner='quanlib' AND tablename='books'" - found = false - @conn.exec(sql).each do |row| - found = true - end - - if ! found - create_schema(skip_class) - end - end - - def load_author(id) - sqlSelect = "SELECT grouping, reading, sort FROM Authors WHERE id=$1" - args = [id] - @conn.exec_params(sqlSelect, args) do |rs| - if rs.ntuples != 1 - raise "Expected 1 row for " + id + " but got " + rs.ntuples + ": " + sqlSelect - end - row = rs[0] - author = Author.new(row['grouping'], row['reading'], row['sort']) - return author - end - return nil - end - - def store_author(author) - id = find_author(author) - if nil == id - id = next_id('author_id') - sqlInsert = "INSERT INTO Authors(id, grouping, reading, sort) VALUES ($1, $2, $3, $4);" - args = [id, author.grouping, author.reading_order, author.sort_order] - begin - rs = @conn.exec_params(sqlInsert, args) - rescue Exception => e - puts sqlInsert + ": " + args.inspect() - puts e.message - puts $@ - ensure - rs.clear if rs - end - end - return id - end - - def load_book(id) - sql = "SELECT author, classification, cover, description, language, path, series, title, volume FROM Books WHERE id=$1;" - book = nil - - begin - @conn.exec_params(sql, [id]) do |rs| - if 1 != rs.ntuples - raise 'Expected one row in Books for id ' + id + ', but found ' + rs.length + '.' - return nil - end - row = rs[0] - - book = Book.new(self) - book.author = load_author(row['author']) - book.classification_id = row['classification'] - book.cover = load_cover(row['cover']) - book.description = row['description'] - book.language = row['language'] - book.path = row['path'] - book.series_id = row['series'] - book.title = row['title'] - book.volume = row['volume'] - end - rescue Exception => e - puts sql + ": " + id - puts e.message - puts $@ - end - - return book - end - - def store_book(book) - sql = "INSERT INTO Books (id, arrived, author, classification, cover, description, language, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11);" - - book_id = next_id('book_id') - - author_id = store_author(book.author) - (efs_id, mime_type) = store_cover(book) - - args = [book_id, book.arrived, author_id, book.classification_id, efs_id, book.description, book.language, book.path, book.series_id, book.title, book.volume] - - begin - rs = @conn.exec_params(sql, args) - rescue Exception => e - puts sql + ": " + args.inspect() - puts e.message - puts $@ - ensure - rs.clear if rs - end - - return book_id - end - - def find_classification(author_grouping, title_grouping) - sql = "SELECT id FROM Classifications WHERE author_grouping = $1 AND title_grouping = $2;" - @conn.exec_params(sql, [author_grouping, title_grouping]) do |rs| - if rs.ntuples > 0 - return rs[0]['id'] - end - end - return nil - end - - def load_classification(id) - sql = "SELECT ddc, lcc, author_grouping, author_sort, title_grouping, title " - sql += " FROM Classifications WHERE id=$1" - @conn.exec_params(sql, [id]) do |rs| - if rs.ntuples > 0 - row = rs[0] - ddc = row['ddc'] - lcc = row['lcc'] - author_grouping = row['author_grouping'] - author = row['author_sort'] - title_grouping = row['title_grouping'] - title = row['title'] - - result = Classification.new(ddc, lcc, author_grouping, author, title_grouping, title) - result.id = id - return result - end - end - - return nil - end - - def load_cover(id) - if nil == id - return nil - end - - mime_type = 'application/octet-stream' - - sql = "SELECT mimeType FROM Efs WHERE id=$1" - @conn.exec_params(sql, [id]) do |rs| - if rs.ntuples != 1 - raise "Expected one row but got " + rs.ntuples + ": " + sql + ": " + id - end - mime_type = rs[0]['mimeType'] - end - - (efspath, efsname) = construct_efs_path(id) - - fullpath = @basePath + '/efs/' + efspath + '/' + efsname - - return Cover.new(nil, fullpath, mime_type) - end - - def store_cover(book) - efs_id = nil - cover = book.cover() - - if nil == cover - return nil - end - - @conn.exec("SELECT nextval('efs_id')") do |rs| - efs_id = rs[0]['nextval'] - end - - if nil == efs_id - return nil - end - - (efspath, efsname) = construct_efs_path(efs_id) - - efspath = @basePath + '/efs/' + efspath - - FileUtils.mkdir_p(efspath) - - (filepath, mimetype) = cover.write_image(efspath, efsname) - - sql = "INSERT INTO efs VALUES ($1, $2)" - begin - rs = @conn.exec_params(sql, [efs_id, mimetype]) - rescue Exception => e - puts sql + ": " + efs_id + ", " + mimetype - puts e.message - puts $@ - ensure - rs.clear if rs - end - - return efs_id, mimetype - end - - def exec_id_query(sql, args) - ids = [] - @conn.exec_params(sql, args) do |rs| - rs.each do |row| - ids.push(row['id']) - end - end - return ids - end - - def exec_update(sql, args) - begin - rs = @conn.exec_params(sql, args) - rescue Exception => e - puts sql + ": " + args.inspect() - puts e.message - puts $@ - ensure - rs.clear if rs - end - end - - def next_id(seq_name) - id = nil - @conn.exec("SELECT nextval('" + seq_name + "');") do |rs| - id = rs[0]['nextval'] - end - return id - end - - def get_series(grouping, code) - if nil == code - return nil - end - - sql = "SELECT id FROM Series WHERE grouping=$1 AND code=$2;" - args = [grouping, code] - @conn.exec_params(sql, args).each do |row| - return row['id'] - end - - # TODO: Create a new series object here? - puts 'WARNING: series("' + grouping + '", "' + code + '") not found.' - return nil - end - - def load_series(id) - sql = "SELECT age,genre,grouping,code,descr FROM Series WHERE id=$1;" - args = [id] - @conn.exec_params(sql, args) do |rs| - if rs.ntuples > 0 - row = rs[0] - series = Series.new(id) - series.age = row['age'] - series.genre = row['genre'] - series.grouping = row['grouping'] - series.code = row['code'] - series.descr = row['descr'] - return series - end - end - return nil - end - - def populate_classifications_table - puts "Populating the Classifications table..." - first = true - CSV.foreach(@basePath + '/csv/class.csv') do |row| - if first - # skip the header row - first = false - else - - # First, add a row to the Classifications table - - id = next_id('classification_id') - ddc = row[0] - lcc = row[1] - author_grouping = row[2] - author_sort = row[3] - title_grouping = row[4] - title = row[5] - - sqlInsert = "INSERT INTO Classifications (id, ddc, lcc, author_grouping, author_sort, title_grouping, title) VALUES ($1, $2, $3, $4, $5, $6, $7);" - args = [id, ddc, lcc, author_grouping, author_sort, title_grouping, title] - exec_update(sqlInsert, args) - - # Second, link up with the appropriate FAST table entries - - fast = [] - input = row[6] - if input.length > 0 - fast = input.split(';') - end - - fast.each do |fast_id| - sqlInsert = "INSERT INTO FAST_Classifications (fast, classification) VALUES ($1, $2);" - args = [fast_id, id] - exec_update(sqlInsert, args) - end - end - end - end - - def populate_fast_table - puts "Populating the FAST table..." - first = true - CSV.foreach(@basePath + '/csv/fast.csv') do |row| - if first - first = false # skip the header row - else - id = row[0] - descr = row[1] - sqlInsert = "INSERT INTO FAST (id, descr) VALUES ($1, $2);" - exec_update(sqlInsert, [id, descr]) - end - end - end - - def populate_lists_table - puts "Populating the Lists table..." - - CSV.foreach(@basePath + "/csv/lists.csv", headers: true) do |row| - author_ids = find_all_authors(row['author']) - if author_ids.empty? - specification = [row['age'], row['category'], row['code'], row['year'], row['author'], row['title']] - .map { |x| x.inspect } - .join(', ') - - puts "WARNING: For list entry (#{specification}), no such author was found." - - next - end - - sqlInsert = %Q( - INSERT INTO Lists (id, age, category, code, year, author, title) - VALUES ($1, $2, $3, $4, $5, $6, $7); - ) - author_ids.each do |author_id| - list_id = next_id('list_id') - args = [list_id, row['age'], row['category'], row['code'], row['year'], author_id, row['title']] - exec_update(sqlInsert, args) - - update_lists_books_table(list_id, author_id, row['title']) - end - end - end - - # Scan for books that match this Lists entry, and add any matches to the Lists_Books associative table - def update_lists_books_table(list_id, author_id, title) - title_pattern = Book.grouping_for_title(title).gsub('_', '%') - sqlSelect = "SELECT id FROM Books WHERE author = $1 AND title LIKE $2;" - args = [author_id, title_pattern] - - @conn.exec_params(sqlSelect, args) do |rs| - rs.each do |row| - sqlInsert = "INSERT INTO Lists_Books (list, book) VALUES ($1, $2)" - args = [list_id, row['id']] - exec_update(sqlInsert, args) - end - end - end - - def populate_series_table - puts "Populating the Series table..." - CSV.foreach(@basePath + '/csv/series.csv') do |row| - id = next_id('series_id') - sqlInsert = "INSERT INTO Series (id, age, genre, grouping, code, descr) VALUES ($1, $2, $3, $4, $5, $6);" - args = [id] + row - exec_update(sqlInsert, args) - end - end - - def query_books_by_author(pattern) - sql = -< {}, "filesystem" => {}}) store = Store.new('quanlib.ini') - data.each do |input, expectedPath, expectedName| - (actualPath, actualName) = store.construct_efs_path(input) + data.each do |input, expected_path, expected_name| + (actual_path, actual_name) = store.construct_efs_path(input) - expect(actualPath).to eq(expectedPath) - expect(actualName).to eq(expectedName) + assert_equal expected_path, actual_path + assert_equal expected_name, actual_name end end end diff --git a/test/test_helper.rb b/test/test_helper.rb new file mode 100644 index 0000000..25bf530 --- /dev/null +++ b/test/test_helper.rb @@ -0,0 +1,6 @@ +# frozen_string_literal: true + +$LOAD_PATH.unshift File.expand_path("../app", __dir__) + +require "minitest/autorun" +require "mocha/minitest" diff --git a/walk_dir.rb b/walk_dir.rb deleted file mode 100644 index a2c088f..0000000 --- a/walk_dir.rb +++ /dev/null @@ -1,118 +0,0 @@ -# Walk the directory (and subdirectories), identifying books. -# -# Expected format: -# .../AuthorName/Title_of_the_Awesome_Book.ext -# -# Author is given as FirstLast. For example, -# Robert Anson Heinlein is RobertHeinlein, and -# JKRowling is JoanneRowling. -# -# Book titles have spaces replaced with underscores, -# and punctuation [,!?'] replaced with hyphens. -# -# If the book forms part of a series, then an all-capitals -# series designator, followed by a numeric volume number, -# followed by an underscore, is prefixed to the name. -# For example, Hardy Boys' volume 1, The Tower Treasure, -# is rendered as .../FranklinDixon/HB001_The_Tower_Treasure.epub -# and Mrs. Pollifax volume 6, On the China Station, is -# .../DorothyGilman/P06_On_the_China_Station.epub. - -require_relative 'book' -require_relative 'book_loader' -require_relative 'store' - -class WalkDir - def initialize(config_file, root) - @queue = Queue.new - @root = root - @config_file = config_file - @threads = [] - - @files = walk(@root) - end - - def books - @threads = [] - num_threads.times do - @threads << Thread.new do - BookLoader.new(@config_file, @queue).run - end - end - - result = [] - @files = remove_duplicates(@files) - for file in @files.sort() - if Book.can_handle?(file) && (!is_duplicate?(file)) - # Queue this book to be loaded and added to the DB by a BookLoader thread - @queue << file - end - end - - @threads.count.times { @queue << BookLoader::DONE_MARKER } - - @threads.each { |t| t.join } - end - - # Duplicate versions of a text are named - # xxx_suffix.ext - # Where suffix is one of bis, ter, quater, quinquies - # for the 2nd, 3rd, 4th or 5th variant respectively. - def is_duplicate?(file) - s = file.to_s - suffix = ['_bis.', '_ter.', '_quater.', '_quinquies.'] - suffix.each do |pat| - if s.include?(pat) - return true - end - end - - return false - end - - def remove_duplicates(files) - unique = {} - for file in files - if Book.can_handle?(file) - key = File.dirname(file) + '/' + File.basename(file, '.*') - if unique.has_key?(key) - new_ext = File.extname(file) - old_ext = File.extname(unique[key]) - if ('.pdf' == old_ext) && ('.epub' == new_ext) - # Prefer EPUB over PDF - puts 'REPLACED ' + unique[key].to_s + ' with ' + file.to_s - unique[key] = file - else - puts 'DROPPED ' + file.to_s + " because it's superceded by " + unique[key].to_s - end - else - unique[key] = file - end - end - end - - return unique.values - end - - def walk(path) - result = [] - children = Dir.entries(path) - for child in children - fullName = (path.chomp("/")) + "/" + child - if (File.directory?(fullName)) and (child != ".") and (child != "..") and (!File.symlink?(fullName)) - sub = walk(fullName) - if (sub != nil) and (sub.length > 0) - result.concat(sub) - end - elsif (! File.directory?(fullName)) - result.push(fullName) - end - end - return result - end - - def num_threads - # TOOD: make this (auto?) configurable - 12 - end -end