protected
def parse_file_name!(file_name)
+ category = nil # e.g., non-fiction, fan-fiction
+ grouping = ''
+
parts = file_name.split('/')
(series_code, @volume, @title) = processTitle(parts[-1])
if parts.length > 1
@author = Author.new(grouping, reading_order, sort_order)
@series_id = @store.get_series(grouping, series_code)
end
+ if parts.length > 2
+ category = parts[-3]
+ end
lc_file_name = file_name.downcase
if lc_file_name.end_with?(".epub")
end
@classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*'))
+
+ # TODO: Fix horrible hard-coded strings and paths
+ if ('01_nonfic' == category) && (nil == classification_id)
+ open(Store.unclassified_csv, 'a') do |fd|
+ fd.puts('"' + grouping.to_s + '","' + path + '"')
+ end
+ end
end
protected
@ddc = nil
@grouping = grouping
@fast = []
+ @filename = []
@lcc = nil
@title = title
end
def fast
@fast
end
+ def filename
+ @filename
+ end
+ def filename=(value)
+ @filename = value
+ end
def grouping
@grouping
end
if nil != @fast
data.push('fast=' + @fast.inspect)
end
+ if nil != @filename
+ data.push('filename=' + @filename.to_s + '"')
+ end
if nil != @lcc
data.push('lcc="' + @lcc + '"')
end
+++ /dev/null
-require 'erb'
-require 'net/http'
-require 'nokogiri'
-
-require 'bookclass'
-require 'classset'
-require 'fastset'
-
-class Lookup
- def initialize
- @class_set = ClassSet.new()
- @fast_set = FastSet.new()
- end
-
- def construct_url(params)
- first = true
- cmd = 'http://classify.oclc.org/classify2/Classify'
-
- params += [ ['summary', 'false' ] ]
-
- params.each do |tuple|
- name, value = tuple
- if (first)
- cmd += '?'
- first = false
- else
- cmd += '&'
- end
- cmd += name + '='
- cmd += ERB::Util.url_encode(value)
- end
-
- return cmd
- end
-
- def isUpper?(c)
- return /[[:upper:]]/.match(c)
- end
-
- def lookup(author_grouping, title)
- params = [
- ['author', massage_author(author_grouping)],
- ['title', title]
- ]
-
- cmd = construct_url(params)
- res = submit_request(cmd)
-
- doc = Nokogiri::XML(res.body)
-
- if "4" == response_code(doc)
- # Multiple matches; pick the first one and re-query
- owi = doc.css("works work")[0]["owi"]
-
- params = [
- ['owi', owi]
- ]
- cmd = construct_url(params)
- res = submit_request(cmd)
-
- #puts res.body
-
- doc = Nokogiri::XML(res.body)
- end
-
- if "2" != response_code(doc)
- puts "Lookup failed"
- return nil
- end
-
- title = doc.css("classify editions edition")[0]["title"]
-
- info = BookClass.new(author_grouping, title)
-
- author = doc.css("classify editions edition")[0]["author"]
- info.author = author
-
- nodes = doc.css("classify recommendations ddc mostPopular")
- if nil != nodes && nodes.length > 0
- ddc = nodes[0]["sfa"]
- info.ddc = ddc
- end
-
- nodes = doc.css("classify recommendations lcc mostPopular")
- if nil != nodes && nodes.length > 0
- lcc = nodes[0]["sfa"]
- end
- info.lcc = lcc
-
- headings = doc.css("classify recommendations fast headings heading")
- headings.each do |heading|
- #puts heading.inspect
- id = heading['ident']
- #puts 'ID: ' + id
- descr = heading.content
- #puts 'DESCR: ' + descr
- info.add_fast(id)
- @fast_set.add(id, descr)
- end
-
- @class_set.ensure_contains!(info)
-
- return info
- end
-
- def massage_author(input)
- if nil == input
- return nil
- end
-
- reading_order = ""
- input.each_char do |c|
- if isUpper?(c) and (reading_order.length > 0)
- reading_order += " "
- end
- reading_order += c
- end
-
- return reading_order
- end
-
- def response_code(doc)
- return doc.css("classify response")[0]["code"]
- end
-
- def save_state
- @class_set.save_state()
- @fast_set.save_state()
- end
-
- def submit_request(cmd)
- puts ('GET ' + cmd)
-
- url = URI.parse(cmd)
- req = Net::HTTP::Get.new(url.to_s)
- res = Net::HTTP.start(url.host, url.port) {|http|
- http.request(req)
- }
- return res
- end
-end
-
lcc = row[1]
grouping = row[2]
author = row[3]
- title = row[4]
+ filename = row[4]
+ title = row[5]
fast = []
- if nil != row[5]
- fast = row[5].split(';')
+ if nil != row[6]
+ fast = row[6].split(';')
end
bookclass = BookClass.new(grouping, title)
bookclass.ddc = ddc
bookclass.lcc = lcc
bookclass.author = author
+ bookclass.filename = filename
fast.each do |id|
bookclass.add_fast(id)
key = construct_key(grouping, title)
@entries[key] = bookclass
+
+ #puts 'LOADED[' + key.inspect + ']: ' + bookclass.inspect
end
end
end
def save(file_name)
CSV.open(file_name, 'w:UTF-8') do |csv|
- csv << ['Dewey', 'LCC', 'Grouping', 'Author', 'Title', 'FAST']
+ csv << ['Dewey', 'LCC', 'Grouping', 'Author', 'Filename', 'Title', 'FAST']
@entries.keys.sort.each do |key|
info = @entries[key]
+ #puts 'SAVING[' + key.inspect + ']: ' + info.inspect
+
ddc = info.ddc
lcc = info.lcc
grouping = info.grouping
author = info.author
+ filename = info.filename
title = info.title
- fast_list = info.fast
- fast_ids = []
- fast_list.each do |tuple|
- fast_ids.push(tuple[0])
- end
- fast = fast_ids.join(';')
+ fast = info.fast.join(';')
- csv << [ ddc, lcc, grouping, author, title, fast ]
+ csv << [ ddc, lcc, grouping, author, filename, title, fast ]
end
end
end
return
end
- File.open(file_name, 'r:UTF-8').each_line do |line|
- cols = line.split(/,/)
- if first
- first = false
- elsif cols.length > 1
- id = cols[0]
- descr = cols[1]
- @entries[id] = descr
+ first = true
+ CSV.open(file_name, 'r:UTF-8') do |csv|
+ csv.to_a.each do |row|
+ if first
+ first = false
+ elsif row.length >= 2
+ id = row[0]
+ descr = row[1]
+ @entries[id] = descr
+ end
end
end
end
def save(file_name)
- File.open(file_name, 'w:UTF-8') do |fd|
- fd.puts('Code,Description')
+ CSV.open(file_name, 'w:UTF-8') do |csv|
+ csv << [ 'Code', 'Description' ]
@entries.keys.sort.each do |key|
- fd.puts(key.to_s + ',' + @entries[key].to_s)
+ csv << [ key.to_s, @entries[key].to_s ]
end
end
end
--- /dev/null
+require 'erb'
+require 'net/http'
+require 'nokogiri'
+
+require 'bookclass'
+require 'classset'
+require 'fastset'
+
+class Lookup
+ def initialize
+ @class_set = ClassSet.new()
+ @fast_set = FastSet.new()
+ end
+
+ def construct_url(params)
+ first = true
+ cmd = 'http://classify.oclc.org/classify2/Classify'
+
+ params += [ ['summary', 'false' ] ]
+
+ params.each do |tuple|
+ name, value = tuple
+ if (first)
+ cmd += '?'
+ first = false
+ else
+ cmd += '&'
+ end
+ cmd += name + '='
+ cmd += ERB::Util.url_encode(value)
+ end
+
+ return cmd
+ end
+
+ def isUpper?(c)
+ return /[[:upper:]]/.match(c)
+ end
+
+ def lookup(author_grouping, title)
+ params = [
+ ['author', massage_author(author_grouping)],
+ ['title', massage_title(title)]
+ ]
+
+ cmd = construct_url(params)
+ res = submit_request(cmd)
+
+ doc = Nokogiri::XML(res.body)
+
+ if "4" == response_code(doc)
+ # Multiple matches; pick the first one and re-query
+ owi = doc.css("works work")[0]["owi"]
+
+ params = [
+ ['owi', owi]
+ ]
+ cmd = construct_url(params)
+ res = submit_request(cmd)
+
+ #puts res.body
+
+ doc = Nokogiri::XML(res.body)
+ end
+
+ if "2" != response_code(doc)
+ puts "Lookup failed"
+ return nil
+ end
+
+ title = doc.css("classify editions edition")[0]["title"]
+
+ info = BookClass.new(author_grouping, title)
+
+ author = doc.css("classify editions edition")[0]["author"]
+ info.author = author
+
+ nodes = doc.css("classify recommendations ddc mostPopular")
+ if nil != nodes && nodes.length > 0
+ ddc = nodes[0]["sfa"]
+ info.ddc = ddc
+ end
+
+ nodes = doc.css("classify recommendations lcc mostPopular")
+ if nil != nodes && nodes.length > 0
+ lcc = nodes[0]["sfa"]
+ end
+ info.lcc = lcc
+
+ headings = doc.css("classify recommendations fast headings heading")
+ headings.each do |heading|
+ #puts heading.inspect
+ id = heading['ident']
+ #puts 'ID: ' + id
+ descr = heading.content
+ #puts 'DESCR: ' + descr
+ info.add_fast(id)
+ @fast_set.add(id, descr)
+ end
+
+ info.filename = title
+
+ @class_set.ensure_contains!(info)
+
+ return info
+ end
+
+ def massage_author(input)
+ if nil == input
+ return nil
+ end
+
+ reading_order = ""
+ input.each_char do |c|
+ if isUpper?(c) and (reading_order.length > 0)
+ reading_order += " "
+ end
+ reading_order += c
+ end
+
+ return reading_order
+ end
+
+ def massage_title(pathname)
+ basename = File.basename(pathname, '.*')
+
+ result = ""
+ basename.each_char do |c|
+ if '_' == c
+ result += ' '
+ elsif '-' == c
+ result += "'"
+ else
+ result += c
+ end
+ end
+
+ return result
+ end
+
+ def response_code(doc)
+ return doc.css("classify response")[0]["code"]
+ end
+
+ def save_state
+ @class_set.save_state()
+ @fast_set.save_state()
+ end
+
+ def submit_request(cmd)
+ puts ('GET ' + cmd)
+
+ url = URI.parse(cmd)
+ req = Net::HTTP::Get.new(url.to_s)
+ res = Net::HTTP.start(url.host, url.port) {|http|
+ http.request(req)
+ }
+ return res
+ end
+end
+
if "--purge" == arg
puts 'Purging database...'
@store.dropSchema()
+ if File.exists?(Store.unclassified_csv)
+ File.delete(Store.unclassified_csv)
+ end
elsif arg.start_with?("--")
abort('ERROR: Unrecognized option "' + arg + '".')
end
require 'series'
class Store
+ @@BASEPATH = '/arc/quanlib' # TODO: FIXME: configure this in a sane way
+ @@UNCLASSIFIED_CSV = @@BASEPATH + '/unclassified.csv'
+
+ def self.unclassified_csv
+ @@UNCLASSIFIED_CSV
+ end
+
def initialize
- @basepath = '/arc/quanlib' # TODO: FIXME: configure this in a sane way
@conn = nil
#@dburl = 'dbi:Pg:quanlib:localhost'
(efspath, efsname) = construct_efs_path(id)
- fullpath = @basepath + '/efs/' + efspath + '/' + efsname
+ fullpath = @@BASEPATH + '/efs/' + efspath + '/' + efsname
return Cover.new(nil, fullpath, mime_type)
(efspath, efsname) = construct_efs_path(efs_id)
- efspath = @basepath + '/efs/' + efspath
+ efspath = @@BASEPATH + '/efs/' + efspath
FileUtils.mkdir_p(efspath)
def populate_classifications_table
puts "Populating the Classifications table..."
first = true
- CSV.foreach(@basepath + '/csv/class.csv') do |row|
+ CSV.foreach(@@BASEPATH + '/csv/class.csv') do |row|
if first
# skip the header row
first = false
def populate_fast_table
puts "Populating the FAST table..."
first = true
- CSV.foreach(@basepath + '/csv/fast.csv') do |row|
+ CSV.foreach(@@BASEPATH + '/csv/fast.csv') do |row|
if first
first = false # skip the header row
else
def populate_series_table
puts "Populating the Series table..."
- CSV.foreach(@basepath + '/csv/series.csv') do |row|
+ CSV.foreach(@@BASEPATH + '/csv/series.csv') do |row|
id = next_id('series_id')
sqlInsert = "INSERT INTO Series (id, age, genre, grouping, code, descr) VALUES ($1, $2, $3, $4, $5, $6);"
args = [id] + row
# .../AuthorName/Title_of_the_Awesome_Book.ext
#
# Author is given as FirstLast. For example,
-# Robert Anson Heinlein is RoberHeinlein, and
+# Robert Anson Heinlein is RobertHeinlein, and
# JKRowling is JoanneRowling.
#
# Book titles have spaces replaced with underscores,