--- /dev/null
+require 'erb'
+require 'net/http'
+require 'nokogiri'
+
+require 'bookclass'
+require 'classset'
+require 'fastset'
+
+class Lookup
+ def initialize
+ @class_set = ClassSet.new()
+ @fast_set = FastSet.new()
+ end
+
+ def construct_url(params)
+ first = true
+ cmd = 'http://classify.oclc.org/classify2/Classify'
+
+ params += [ ['summary', 'false' ] ]
+
+ params.each do |tuple|
+ name, value = tuple
+ if (first)
+ cmd += '?'
+ first = false
+ else
+ cmd += '&'
+ end
+ cmd += name + '='
+ cmd += ERB::Util.url_encode(value)
+ end
+
+ return cmd
+ end
+
+ def isUpper?(c)
+ return /[[:upper:]]/.match(c)
+ end
+
+ def lookup(author_grouping, title)
+ params = [
+ ['author', massage_author(author_grouping)],
+ ['title', title]
+ ]
+
+ cmd = construct_url(params)
+ res = submit_request(cmd)
+
+ doc = Nokogiri::XML(res.body)
+
+ if "4" == response_code(doc)
+ # Multiple matches; pick the first one and re-query
+ owi = doc.css("works work")[0]["owi"]
+
+ params = [
+ ['owi', owi]
+ ]
+ cmd = construct_url(params)
+ res = submit_request(cmd)
+
+ #puts res.body
+
+ doc = Nokogiri::XML(res.body)
+ end
+
+ if "2" != response_code(doc)
+ puts "Lookup failed"
+ return nil
+ end
+
+ title = doc.css("classify editions edition")[0]["title"]
+
+ info = BookClass.new(author_grouping, title)
+
+ author = doc.css("classify editions edition")[0]["author"]
+ info.author = author
+
+ nodes = doc.css("classify recommendations ddc mostPopular")
+ if nil != nodes && nodes.length > 0
+ ddc = nodes[0]["sfa"]
+ info.ddc = ddc
+ end
+
+ nodes = doc.css("classify recommendations lcc mostPopular")
+ if nil != nodes && nodes.length > 0
+ lcc = nodes[0]["sfa"]
+ end
+ info.lcc = lcc
+
+ headings = doc.css("classify recommendations fast headings heading")
+ headings.each do |heading|
+ #puts heading.inspect
+ id = heading['ident']
+ #puts 'ID: ' + id
+ descr = heading.content
+ #puts 'DESCR: ' + descr
+ info.add_fast(id)
+ @fast_set.add(id, descr)
+ end
+
+ @class_set.ensure_contains!(info)
+
+ return info
+ end
+
+ def massage_author(input)
+ if nil == input
+ return nil
+ end
+
+ reading_order = ""
+ input.each_char do |c|
+ if isUpper?(c) and (reading_order.length > 0)
+ reading_order += " "
+ end
+ reading_order += c
+ end
+
+ return reading_order
+ end
+
+ def response_code(doc)
+ return doc.css("classify response")[0]["code"]
+ end
+
+ def save_state
+ @class_set.save_state()
+ @fast_set.save_state()
+ end
+
+ def submit_request(cmd)
+ puts ('GET ' + cmd)
+
+ url = URI.parse(cmd)
+ req = Net::HTTP::Get.new(url.to_s)
+ res = Net::HTTP.start(url.host, url.port) {|http|
+ http.request(req)
+ }
+ return res
+ end
+end
+
--- /dev/null
+require 'csv'
+
+require 'bookclass'
+
+class ClassSet
+ @@class_csv_file = 'class.csv'
+
+ def initialize
+ @entries = {}
+ load!(@@class_csv_file)
+ end
+
+ def add!(info)
+ key = construct_key(info.grouping, info.title)
+ @entries[key] = info
+ end
+
+ def construct_key(author_grouping, title)
+ author_grouping.to_s + '|' + title.to_s
+ end
+
+ def get(author_grouping, title)
+ key = construct_key(author_grouping, title)
+ if @entries.has_key?(key)
+ return @entries[key]
+ else
+ return nil
+ end
+ end
+
+ def has_key?(author_grouping, title)
+ @entries.has_key?(construct_key(author_grouping, title))
+ end
+
+ def ensure_contains!(info)
+ if ! has_key?(info.grouping, info.title)
+ add!(info)
+ end
+ end
+
+ def inspect
+ data = []
+
+ if nil != @entries
+ data.push('entries=' + @entries.inspect + '')
+ end
+
+ return '(ClassSet:' + data.join(',') + ')'
+ end
+
+ def load!(file_name)
+ first = true
+ @entries = {}
+
+ if ! File.exist?(file_name)
+ puts 'WARNING: file "' + file_name + '" not found.'
+ return
+ end
+
+ File.open(file_name, 'r:UTF-8') do |fd|
+ csv = CSV.new(fd)
+ csv.to_a.each do |row|
+ if first
+ first = false
+ elsif row.length >= 6
+ ddc = row[0]
+ lcc = row[1]
+ grouping = row[2]
+ author = row[3]
+ title = row[4]
+ fast = []
+ if nil != row[5]
+ fast = row[5].split(';')
+ end
+
+ bookclass = BookClass.new(grouping, title)
+ bookclass.ddc = ddc
+ bookclass.lcc = lcc
+ bookclass.author = author
+
+ fast.each do |id|
+ bookclass.add_fast(id)
+ end
+
+ key = construct_key(grouping, title)
+ @entries[key] = bookclass
+ end
+ end
+ end
+ end
+
+ def save(file_name)
+ CSV.open(file_name, 'w:UTF-8') do |csv|
+ csv << ['Dewey', 'LCC', 'Grouping', 'Author', 'Title', 'FAST']
+
+ @entries.keys.sort.each do |key|
+ info = @entries[key]
+
+ ddc = info.ddc
+ lcc = info.lcc
+ grouping = info.grouping
+ author = info.author
+ title = info.title
+ fast_list = info.fast
+ fast_ids = []
+ fast_list.each do |tuple|
+ fast_ids.push(tuple[0])
+ end
+ fast = fast_ids.join(';')
+
+ csv << [ ddc, lcc, grouping, author, title, fast ]
+ end
+ end
+ end
+
+ def save_state
+ save(@@class_csv_file)
+ end
+end
+
create_books =
<<EOS
CREATE TABLE Books (
- id INTEGER PRIMARY KEY,
- author INTEGER REFERENCES Authors(id),
- cover INTEGER,
- description TEXT,
- path VARCHAR(256),
- series INTEGER REFERENCES Series(id),
- title VARCHAR(196),
- volume VARCHAR(16)
+ id INTEGER PRIMARY KEY,
+ author INTEGER REFERENCES Authors(id),
+ classification INTEGER REFERENCES Classifications(id),
+ cover INTEGER,
+ description TEXT,
+ path VARCHAR(256),
+ series INTEGER REFERENCES Series(id),
+ title VARCHAR(256),
+ volume VARCHAR(16)
+ );
+EOS
+
+ create_classification =
+<<EOS
+ CREATE TABLE Classifications (
+ id INTEGER PRIMARY KEY,
+ ddc VARCHAR(32),
+ lcc VARCHAR(32),
+ author_grouping VARCHAR(64),
+ author_sort VARCHAR(128),
+ title_grouping VARCHAR(256),
+ title VARCHAR(256)
);
EOS
);
EOS
+ create_fast =
+<<EOS
+ CREATE TABLE FAST (
+ id VARCHAR(32) PRIMARY KEY,
+ descr VARCHAR(128)
+ );
+EOS
+
+ # Associative entity, linking FAST and Classifications tables
+ # in a 0..n to 0..m relationship
+ create_fast_classifications =
+<<EOS
+ CREATE TABLE FAST_Classifications (
+ fast VARCHAR(32) REFERENCES FAST(id),
+ classification INTEGER REFERENCES Classifications(id)
+ );
+EOS
+
create_series =
<<EOS
CREATE TABLE Series (
stmts = [
create_authors,
+ create_classification,
create_efs,
+ create_fast,
create_series,
create_books,
+ create_fast_classifications,
'CREATE SEQUENCE author_id;',
'CREATE SEQUENCE book_id;',
+ 'CREATE SEQUENCE classification_id;',
'CREATE SEQUENCE efs_id;',
'CREATE SEQUENCE series_id;'
]
@conn.exec(stmt)
end
+ populate_fast_table()
+ populate_classifications_table()
populate_series_table()
end
def dropSchema
stmts = [
'DROP TABLE Books;',
+ 'DROP TABLE FAST_Classifications;',
'DROP TABLE Authors;',
+ 'DROP TABLE Classifications;',
'DROP TABLE EFS;',
+ 'DROP TABLE FAST;',
'DROP TABLE Series;',
'DROP SEQUENCE author_id;',
'DROP SEQUENCE book_id;',
+ 'DROP SEQUENCE classification_id;',
'DROP SEQUENCE efs_id;',
'DROP SEQUENCE series_id;'
]
for stmt in stmts do
- @conn.exec(stmt)
+ begin
+ @conn.exec(stmt)
+ rescue Exception => exc
+ puts 'WARNING: "' + stmt + '" failed: ' + exc.to_s
+ end
end
end
def load_book(id)
#puts 'DEBUG: load_book(' + id + ')'
- sql = "SELECT author, cover, description, path, series, title, volume FROM Books WHERE id=$1;"
+ sql = "SELECT author, classification, cover, description, path, series, title, volume FROM Books WHERE id=$1;"
book = nil
begin
book = Book.new(self)
book.author = load_author(row['author'])
+ book.classification_id = row['classification']
book.cover = load_cover(row['cover'])
book.description = row['description']
book.path = row['path']
end
def store_book(book)
- sql = "INSERT INTO Books (id, author, cover, description, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7, $8);"
+ sql = "INSERT INTO Books (id, author, classification, cover, description, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9);"
book_id = next_id('book_id')
author_id = store_author(book.author)
(efs_id, mime_type) = store_cover(book)
- args = [book_id, author_id, efs_id, book.description(), book.path(), book.series_id(), book.title(), book.volume()]
+ args = [book_id, author_id, book.classification_id, efs_id, book.description(), book.path(), book.series_id(), book.title(), book.volume()]
begin
rs = @conn.exec_params(sql, args)
return book_id
end
+ def find_classification(author_grouping, title_grouping)
+ #puts 'find_classification("' + author_grouping.inspect + '", "' + title_grouping.inspect + '")...'
+ sql = "SELECT id FROM Classifications WHERE author_grouping = $1 AND title_grouping = $2;"
+ @conn.exec_params(sql, [author_grouping, title_grouping]) do |rs|
+ if rs.ntuples > 0
+ #puts ' --> ' + rs[0]['id'].inspect
+ return rs[0]['id']
+ end
+ end
+ #puts ' --> NIL'
+ return nil
+ end
+
def load_cover(id)
if nil == id
return nil
return efs_id, mimetype
end
+ def exec_id_query(sql, args)
+ ids = []
+ @conn.exec_params(sql, args) do |rs|
+ rs.each do |row|
+ ids.push(row['id'])
+ end
+ end
+ return ids
+ end
+
+ def exec_update(sql, args)
+ begin
+ rs = @conn.exec_params(sql, args)
+ rescue Exception => e
+ puts sql + ": " + args.inspect()
+ puts e.message
+ puts $@
+ ensure
+ rs.clear if rs
+ end
+ end
+
def next_id(seq_name)
id = nil
@conn.exec("SELECT nextval('" + seq_name + "');") do |rs|
return nil
end
+ def populate_classifications_table
+ puts "Populating the Classifications table..."
+ first = true
+ CSV.foreach(@basepath + '/csv/class.csv') do |row|
+ if first
+ # skip the header row
+ first = false
+ else
+
+ # First, add a row to the Classifications table
+
+ id = next_id('classification_id')
+ ddc = row[0]
+ lcc = row[1]
+ author_grouping = row[2]
+ author_sort = row[3]
+ title_grouping = row[4]
+ title = row[5]
+
+ sqlInsert = "INSERT INTO Classifications (id, ddc, lcc, author_grouping, author_sort, title_grouping, title) VALUES ($1, $2, $3, $4, $5, $6, $7);"
+ args = [id, ddc, lcc, author_grouping, author_sort, title_grouping, title]
+ exec_update(sqlInsert, args)
+
+ # Second, link up with the appropriate FAST table entries
+
+ fast = []
+ input = row[6]
+ if input.length > 0
+ fast = input.split(';')
+ end
+
+ fast.each do |fast_id|
+ sqlInsert = "INSERT INTO FAST_Classifications (fast, classification) VALUES ($1, $2);"
+ args = [fast_id, id]
+ exec_update(sqlInsert, args)
+ end
+ end
+ end
+ end
+
+ def populate_fast_table
+ puts "Populating the FAST table..."
+ first = true
+ CSV.foreach(@basepath + '/csv/fast.csv') do |row|
+ if first
+ first = false # skip the header row
+ else
+ id = row[0]
+ descr = row[1]
+ sqlInsert = "INSERT INTO FAST (id, descr) VALUES ($1, $2);"
+ exec_update(sqlInsert, [id, descr])
+ end
+ end
+ end
+
def populate_series_table
puts "Populating the Series table..."
CSV.foreach(@basepath + '/csv/series.csv') do |row|
id = next_id('series_id')
sqlInsert = "INSERT INTO Series (id, age, genre, grouping, code, descr) VALUES ($1, $2, $3, $4, $5, $6);"
args = [id] + row
- begin
- # DEBUG: puts 'SQL> ' + sqlInsert + ': ' + args.inspect()
- rs = @conn.exec_params(sqlInsert, args)
- rescue Exception => e
- puts sqlInsert + ": " + args.inspect()
- puts e.message
- puts $@
- ensure
- rs.clear if rs
- end
+ exec_update(sqlInsert, args)
end
end
WHERE upper(a.grouping) LIKE $1
ORDER BY a.grouping, b.series, b.volume, b.title
EOS
- book_ids = []
- @conn.exec_params(sql, [pattern]) do |rs|
- rs.each do |row|
- book_ids.push(row['id'])
- end
- end
- return book_ids
+ return exec_id_query(sql, [pattern])
+ end
+
+ def query_books_by_ddc
+ sql =
+<<EOS
+ SELECT b.id FROM Classifications c
+ INNER JOIN Books b ON b.classification=c.id
+ ORDER BY c.ddc
+EOS
+ return exec_id_query(sql, [])
end
def query_books_by_series_id(id)
WHERE b.series = $1
ORDER BY b.volume,b.title
EOS
- book_ids = []
- @conn.exec_params(sql, [id]) do |rs|
- rs.each do |row|
- book_ids.push(row['id'])
- end
- end
- return book_ids
+ return exec_id_query(sql, [id])
end
def query_series_by_age(pattern)
WHERE s.age LIKE $1
ORDER BY s.grouping,s.descr
EOS
- series_ids = []
- @conn.exec_params(sql, [pattern]) do |rs|
- rs.each do |row|
- series_ids.push(row['id'])
- end
- end
- return series_ids
+ return exec_id_query(sql, [pattern])
end
end