X-Git-Url: http://jaekl.net/gitweb/?p=quanlib.git;a=blobdiff_plain;f=classify%2Flookup.rb;fp=classify%2Flookup.rb;h=1fa5faaa9dc6f948610960644f194af496b4fc0c;hp=0000000000000000000000000000000000000000;hb=0aeb88ddc91aa8f9fd8d93a8004d5df5094a4613;hpb=b8fe580f73b094e366643388ecd2d184b643616a diff --git a/classify/lookup.rb b/classify/lookup.rb new file mode 100644 index 0000000..1fa5faa --- /dev/null +++ b/classify/lookup.rb @@ -0,0 +1,161 @@ +require 'erb' +require 'net/http' +require 'nokogiri' + +require 'bookclass' +require 'classset' +require 'fastset' + +class Lookup + def initialize + @class_set = ClassSet.new() + @fast_set = FastSet.new() + end + + def construct_url(params) + first = true + cmd = 'http://classify.oclc.org/classify2/Classify' + + params += [ ['summary', 'false' ] ] + + params.each do |tuple| + name, value = tuple + if (first) + cmd += '?' + first = false + else + cmd += '&' + end + cmd += name + '=' + cmd += ERB::Util.url_encode(value) + end + + return cmd + end + + def isUpper?(c) + return /[[:upper:]]/.match(c) + end + + def lookup(author_grouping, title) + params = [ + ['author', massage_author(author_grouping)], + ['title', massage_title(title)] + ] + + cmd = construct_url(params) + res = submit_request(cmd) + + doc = Nokogiri::XML(res.body) + + if "4" == response_code(doc) + # Multiple matches; pick the first one and re-query + owi = doc.css("works work")[0]["owi"] + + params = [ + ['owi', owi] + ] + cmd = construct_url(params) + res = submit_request(cmd) + + #puts res.body + + doc = Nokogiri::XML(res.body) + end + + if "2" != response_code(doc) + puts "Lookup failed" + return nil + end + + title = doc.css("classify editions edition")[0]["title"] + + info = BookClass.new(author_grouping, title) + + author = doc.css("classify editions edition")[0]["author"] + info.author = author + + nodes = doc.css("classify recommendations ddc mostPopular") + if nil != nodes && nodes.length > 0 + ddc = nodes[0]["sfa"] + info.ddc = ddc + end + + nodes = doc.css("classify recommendations lcc mostPopular") + if nil != nodes && nodes.length > 0 + lcc = nodes[0]["sfa"] + end + info.lcc = lcc + + headings = doc.css("classify recommendations fast headings heading") + headings.each do |heading| + #puts heading.inspect + id = heading['ident'] + #puts 'ID: ' + id + descr = heading.content + #puts 'DESCR: ' + descr + info.add_fast(id) + @fast_set.add(id, descr) + end + + info.filename = title + + @class_set.ensure_contains!(info) + + return info + end + + def massage_author(input) + if nil == input + return nil + end + + reading_order = "" + input.each_char do |c| + if isUpper?(c) and (reading_order.length > 0) + reading_order += " " + end + reading_order += c + end + + return reading_order + end + + def massage_title(pathname) + basename = File.basename(pathname, '.*') + + result = "" + basename.each_char do |c| + if '_' == c + result += ' ' + elsif '-' == c + result += "'" + else + result += c + end + end + + return result + end + + def response_code(doc) + return doc.css("classify response")[0]["code"] + end + + def save_state + @class_set.save_state() + @fast_set.save_state() + end + + def submit_request(cmd) + puts ('GET ' + cmd) + + url = URI.parse(cmd) + req = Net::HTTP::Get.new(url.to_s) + res = Net::HTTP.start(url.host, url.port) {|http| + http.request(req) + } + return res + end +end +