OCLC discontinued their open-access classify service in January 2024.
Thus, there's no point in maintaining this code anymore.
+++ /dev/null
-# Classification information for a single book
-
-class BookClass
- def initialize(grouping, title)
- @author = nil
- @ddc = nil
- @grouping = grouping
- @fast = []
- @filename = []
- @lcc = nil
- @title = title
- end
-
- def author
- @author
- end
- def author=(value)
- @author = value
- end
- def ddc
- @ddc
- end
- def ddc=(value)
- @ddc = value
- end
- def fast
- @fast
- end
- def filename
- @filename
- end
- def filename=(value)
- @filename = value
- end
- def grouping
- @grouping
- end
- def lcc
- @lcc
- end
- def lcc=(value)
- @lcc = value
- end
- def title
- @title
- end
-
- def add_fast(id)
- @fast.push(id)
- end
-
- def inspect
- data = []
-
- if nil != @author_name
- data.push('author_name="' + @author_name + '"')
- end
- if nil != @ddc
- data.push('ddc="' + @ddc + '"')
- end
- if nil != @grouping
- data.push('grouping="' + @grouping + '"')
- end
- if nil != @fast
- data.push('fast=' + @fast.inspect)
- end
- if nil != @filename
- data.push('filename=' + @filename.to_s + '"')
- end
- if nil != @lcc
- data.push('lcc="' + @lcc + '"')
- end
- if nil != @title
- data.push('title="' + @title + '"')
- end
-
- return '(BookClass:' + data.join(',') + ')'
- end
-end
-
+++ /dev/null
-require 'csv'
-
-require 'bookclass'
-
-class ClassSet
- @@class_csv_file = 'class.csv'
-
- def initialize
- @entries = {}
- load!(@@class_csv_file)
- end
-
- def add!(info)
- key = construct_key(info.grouping, info.title)
- @entries[key] = info
- end
-
- def construct_key(author_grouping, title)
- author_grouping.to_s + '|' + title.to_s
- end
-
- def get(author_grouping, title)
- key = construct_key(author_grouping, title)
- if @entries.has_key?(key)
- return @entries[key]
- else
- return nil
- end
- end
-
- def has_key?(author_grouping, title)
- @entries.has_key?(construct_key(author_grouping, title))
- end
-
- def ensure_contains!(info)
- if ! has_key?(info.grouping, info.title)
- add!(info)
- end
- end
-
- def inspect
- data = []
-
- if nil != @entries
- data.push('entries=' + @entries.inspect + '')
- end
-
- return '(ClassSet:' + data.join(',') + ')'
- end
-
- def load!(file_name)
- first = true
- @entries = {}
-
- if ! File.exist?(file_name)
- puts 'WARNING: file "' + file_name + '" not found.'
- return
- end
-
- File.open(file_name, 'r:UTF-8') do |fd|
- csv = CSV.new(fd)
- csv.to_a.each do |row|
- if first
- first = false
- elsif row.length >= 6
- ddc = row[0]
- lcc = row[1]
- grouping = row[2]
- author = row[3]
- filename = row[4]
- title = row[5]
- fast = []
- if nil != row[6]
- fast = row[6].split(';')
- end
-
- bookclass = BookClass.new(grouping, title)
- bookclass.ddc = ddc
- bookclass.lcc = lcc
- bookclass.author = author
- bookclass.filename = filename
-
- fast.each do |id|
- bookclass.add_fast(id)
- end
-
- key = construct_key(grouping, title)
- @entries[key] = bookclass
-
- #puts 'LOADED[' + key.inspect + ']: ' + bookclass.inspect
- end
- end
- end
- end
-
- def save(file_name)
- CSV.open(file_name, 'w:UTF-8') do |csv|
- csv << ['Dewey', 'LCC', 'Grouping', 'Author', 'Filename', 'Title', 'FAST']
-
- @entries.keys.sort.each do |key|
- info = @entries[key]
-
- #puts 'SAVING[' + key.inspect + ']: ' + info.inspect
-
- ddc = info.ddc
- lcc = info.lcc
- grouping = info.grouping
- author = info.author
- filename = info.filename
- title = info.title
- fast = info.fast.join(';')
-
- csv << [ ddc, lcc, grouping, author, filename, title, fast ]
- end
- end
- end
-
- def save_state
- save(@@class_csv_file)
- end
-end
-
+++ /dev/null
-
-class FastSet
- @@fast_csv_file = 'fast.csv'
-
- def initialize
- @entries = {}
- load!(@@fast_csv_file)
- end
-
- def add(id, descr)
- @entries[id] = descr
- end
-
- def has_key?(value)
- @entries.has_key?(value)
- end
-
- def inspect
- data = []
-
- if nil != @entries
- data.push('entries=' + @entries.inspect + '')
- end
-
- return '(FastSet:' + data.join(',') + ')'
- end
-
- def load!(file_name)
- first = true
- @entries = {}
-
- if ! File.exist?(file_name)
- puts 'WARNING: file "' + file_name + '" not found.'
- return
- end
-
- first = true
- CSV.open(file_name, 'r:UTF-8') do |csv|
- csv.to_a.each do |row|
- if first
- first = false
- elsif row.length >= 2
- id = row[0]
- descr = row[1]
- @entries[id] = descr
- end
- end
- end
- end
-
- def save(file_name)
- CSV.open(file_name, 'w:UTF-8') do |csv|
- csv << [ 'Code', 'Description' ]
-
- @entries.keys.sort.each do |key|
- csv << [ key.to_s, @entries[key].to_s ]
- end
- end
- end
-
- def save_state
- save(@@fast_csv_file)
- end
-end
-
+++ /dev/null
-require 'erb'
-require 'net/http'
-require 'nokogiri'
-
-require 'bookclass'
-require 'classset'
-require 'fastset'
-
-class Lookup
- def initialize
- @class_set = ClassSet.new()
- @fast_set = FastSet.new()
- end
-
- def construct_url(params)
- first = true
- cmd = 'http://classify.oclc.org/classify2/Classify'
-
- params += [ ['summary', 'false' ] ]
-
- params.each do |tuple|
- name, value = tuple
- if (first)
- cmd += '?'
- first = false
- else
- cmd += '&'
- end
- cmd += name + '='
- cmd += ERB::Util.url_encode(value)
- end
-
- return cmd
- end
-
- def isUpper?(c)
- return /[[:upper:]]/.match(c)
- end
-
- def lookup(author_grouping, pathname)
- params = [
- ['author', massage_author(author_grouping)],
- ['title', massage_title(pathname)]
- ]
-
- cmd = construct_url(params)
- res = submit_request(cmd)
-
- doc = Nokogiri::XML(res.body)
-
- if "4" == response_code(doc)
- # Multiple matches; pick the first one and re-query
- owi = doc.css("works work")[0]["owi"]
-
- params = [
- ['owi', owi]
- ]
- cmd = construct_url(params)
- res = submit_request(cmd)
-
- #puts res.body
-
- doc = Nokogiri::XML(res.body)
- end
-
- if "2" != response_code(doc)
- # Lookup failed. Let's try shortening the title, if it's in multiple parts
-
- #TODO TODO
- puts "Lookup failed"
- return nil
- end
-
- title = doc.css("classify editions edition")[0]["title"]
-
- info = BookClass.new(author_grouping, title)
-
- author = doc.css("classify editions edition")[0]["author"]
- info.author = author
-
- nodes = doc.css("classify recommendations ddc mostPopular")
- if nil != nodes && nodes.length > 0
- ddc = nodes[0]["sfa"]
- info.ddc = ddc
- end
-
- nodes = doc.css("classify recommendations lcc mostPopular")
- if nil != nodes && nodes.length > 0
- lcc = nodes[0]["sfa"]
- end
- info.lcc = lcc
-
- headings = doc.css("classify recommendations fast headings heading")
- headings.each do |heading|
- #puts heading.inspect
- id = heading['ident']
- #puts 'ID: ' + id
- descr = heading.content
- #puts 'DESCR: ' + descr
- info.add_fast(id)
- @fast_set.add(id, descr)
- end
-
- info.filename = title
-
- @class_set.ensure_contains!(info)
-
- return info
- end
-
- def massage_author(input)
- if nil == input
- return nil
- end
-
- reading_order = ""
- input.each_char do |c|
- if isUpper?(c) and (reading_order.length > 0)
- reading_order += " "
- end
- reading_order += c
- end
-
- return reading_order
- end
-
- def massage_title(pathname)
- basename = File.basename(pathname, '.*')
-
- basename.gsub!('_', ' ')
- basename.gsub!('--', ': ')
- basename.gsub!('-s ', "'s ")
- basename.gsub!('s- ', "s' ")
-
- return basename
- end
-
- def response_code(doc)
- return doc.css("classify response")[0]["code"]
- end
-
- def save_state
- @class_set.save_state()
- @fast_set.save_state()
- end
-
- def submit_request(cmd)
- puts ('GET ' + cmd)
-
- url = URI.parse(cmd)
- req = Net::HTTP::Get.new(url.to_s)
- res = Net::HTTP.start(url.host, url.port) {|http|
- http.request(req)
- }
- return res
- end
-end
-
+++ /dev/null
-require 'classset'
-require 'lookup'
-
-#if ARGV.length != 2
-# puts 'Usage: ruby classify.rb author title'
-# exit 1
-#end
-#
-#author_grouping = ARGV[0]
-#title = ARGV[1]
-
-classset = ClassSet.new()
-lookup = Lookup.new()
-
-CSV.open('unclassified.csv', 'r:UTF-8') do |csv|
- query_count = 0
- first = true
- csv.to_a.each do |row|
- if first
- first = false
- elsif row.length >= 2
- author_grouping = row[0]
- pathname = row[1]
-
- info = classset.get(author_grouping, pathname)
- if nil == info
- query_count += 1
- info = lookup.lookup(author_grouping, pathname)
-
- puts info.inspect()
-
- puts 'Saving state...'
- classset.save_state()
- lookup.save_state()
-
- sleep_time = 10 + rand(10)
- puts 'Pausing for ' + sleep_time.to_s + ' seconds...'
- sleep(sleep_time) # Pause between lookup requests, to be polite to the server
- end
-
- if nil != info
- classset.ensure_contains!(info)
- else
- puts 'WARNING: lookup of ' + author_grouping + ', "' + pathname + '" failed.'
- File.open('failed.log', 'a:UTF-8') do |fd|
- fd.puts(author_grouping.to_s + ',' + pathname.to_s)
- end
- end
-
-# if query_count > 5
-# break
-# end
- end
- end
-end
-
-classset.save_state()
-lookup.save_state()