From: Chris Jaekl Date: Sat, 4 Apr 2020 20:36:15 +0000 (-0400) Subject: Add support to cross-reference books against list of award-winners. X-Git-Url: https://jaekl.net/gitweb/?a=commitdiff_plain;h=5c1cd0b8778c301090bfdda6f9cb2399e70b792d;p=quanlib.git Add support to cross-reference books against list of award-winners. --- diff --git a/book.rb b/book.rb index d713569..ea65024 100644 --- a/book.rb +++ b/book.rb @@ -10,6 +10,7 @@ require_relative 'store' class Book @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/' + @@SERIES_AND_VOLUME_REGEX = /^([A-Z]+)([0-9]+(\.[0-9]+)?)$/ attr_accessor :author attr_accessor :classification_id @@ -49,6 +50,18 @@ class Book return false end + def self.grouping_for_title(title) + result = title + + '\'",!#'.split('').each do |c| + result = result.gsub(c, '-') + end + result = result.gsub(/: */, '--') + result = result.gsub(' ', '_') + + result + end + def heading result = [] @@ -159,7 +172,7 @@ class Book vol = nil first = arr[0] - matchData = (arr[0]).match(/^([A-Z]+)([0-9]+)$/) + matchData = (arr[0]).match(@@SERIES_AND_VOLUME_REGEX) if nil != matchData capt = matchData.captures series = capt[0] @@ -174,6 +187,15 @@ class Book title = arr.join(' ') + bare_title_grouping = title_grouping + .split('_') + .reject { |part| part.match(@@SERIES_AND_VOLUME_REGEX) } + .join('_') + + unless bare_title_grouping == Book.grouping_for_title(title) + puts "WARNING: title_grouping mismatch: #{bare_title_grouping.inspect} vs. #{Book.grouping_for_title(title).inspect}" + end + return series, vol, title end diff --git a/main.rb b/main.rb index 564ba8b..b0c4b79 100644 --- a/main.rb +++ b/main.rb @@ -44,6 +44,8 @@ for arg in ARGV end end +@store.cross_reference_lists + puts 'Creating output...' navigator = Navigator.new(@store) diff --git a/store.rb b/store.rb index 5faf7ce..f1b7fba 100644 --- a/store.rb +++ b/store.rb @@ -48,8 +48,16 @@ class Store return path, name end + def cross_reference_lists +puts "@@@@@@@@@@@ CROSS-REF START @@@@@@@@@@@" + exec_update("TRUNCATE TABLE Lists CASCADE;", []) + + populate_lists_table +puts "@@@@@@@@@@@ CROSS-REF DONE @@@@@@@@@@@" + end + def create_schema(skip_class) - create_authors = + create_authors = < 0 return rs[0]['id'] end end + return nil end @@ -222,7 +275,7 @@ EOS id = next_id('author_id') sqlInsert = "INSERT INTO Authors(id, grouping, reading, sort) VALUES ($1, $2, $3, $4);" args = [id, author.grouping, author.reading_order, author.sort_order] - begin + begin rs = @conn.exec_params(sqlInsert, args) rescue Exception => e puts sqlInsert + ": " + args.inspect() @@ -257,7 +310,7 @@ EOS book.series_id = row['series'] book.title = row['title'] book.volume = row['volume'] - end + end rescue Exception => e puts sql + ": " + id puts e.message @@ -281,7 +334,7 @@ EOS rs = @conn.exec_params(sql, args) rescue Exception => e puts sql + ": " + args.inspect() - puts e.message + puts e.message puts $@ ensure rs.clear if rs @@ -378,7 +431,7 @@ EOS ensure rs.clear if rs end - + return efs_id, mimetype end @@ -408,7 +461,7 @@ EOS id = nil @conn.exec("SELECT nextval('" + seq_name + "');") do |rs| id = rs[0]['nextval'] - end + end return id end @@ -464,7 +517,7 @@ EOS author_sort = row[3] title_grouping = row[4] title = row[5] - + sqlInsert = "INSERT INTO Classifications (id, ddc, lcc, author_grouping, author_sort, title_grouping, title) VALUES ($1, $2, $3, $4, $5, $6, $7);" args = [id, ddc, lcc, author_grouping, author_sort, title_grouping, title] exec_update(sqlInsert, args) @@ -475,7 +528,7 @@ EOS input = row[6] if input.length > 0 fast = input.split(';') - end + end fast.each do |fast_id| sqlInsert = "INSERT INTO FAST_Classifications (fast, classification) VALUES ($1, $2);" @@ -501,6 +554,50 @@ EOS end end + def populate_lists_table + puts "Populating the Lists table..." + + CSV.foreach(@basePath + "/csv/lists.csv", headers: true) do |row| + author_ids = find_all_authors(row['author']) + if author_ids.empty? + specification = [row['age'], row['category'], row['code'], row['year'], row['author'], row['title']] + .map { |x| x.inspect } + .join(', ') + + puts "WARNING: For list entry (#{specification}), no such author was found." + + next + end + + sqlInsert = %Q( + INSERT INTO Lists (id, age, category, code, year, author, title) + VALUES ($1, $2, $3, $4, $5, $6, $7); + ) + author_ids.each do |author_id| + list_id = next_id('list_id') + args = [list_id, row['age'], row['category'], row['code'], row['year'], author_id, row['title']] + exec_update(sqlInsert, args) + + update_lists_books_table(list_id, author_id, row['title']) + end + end + end + + # Scan for books that match this Lists entry, and add any matches to the Lists_Books associative table + def update_lists_books_table(list_id, author_id, title) + title_pattern = Book.grouping_for_title(title).gsub('_', '%') + sqlSelect = "SELECT id FROM Books WHERE author = $1 AND title LIKE $2;" + args = [author_id, title_pattern] + + @conn.exec_params(sqlSelect, args) do |rs| + rs.each do |row| + sqlInsert = "INSERT INTO Lists_Books (list, book) VALUES ($1, $2)" + args = [list_id, row['id']] + exec_update(sqlInsert, args) + end + end + end + def populate_series_table puts "Populating the Series table..." CSV.foreach(@basePath + '/csv/series.csv') do |row| @@ -512,21 +609,21 @@ EOS end def query_books_by_author(pattern) - sql = + sql = <