X-Git-Url: http://jaekl.net/gitweb/?p=quanlib.git;a=blobdiff_plain;f=classify%2Fclassify.rb;fp=classify%2Fclassify.rb;h=0000000000000000000000000000000000000000;hp=9cc28ae606488b1042b869c595dc781ba76713a1;hb=0aeb88ddc91aa8f9fd8d93a8004d5df5094a4613;hpb=b8fe580f73b094e366643388ecd2d184b643616a diff --git a/classify/classify.rb b/classify/classify.rb deleted file mode 100644 index 9cc28ae..0000000 --- a/classify/classify.rb +++ /dev/null @@ -1,142 +0,0 @@ -require 'erb' -require 'net/http' -require 'nokogiri' - -require 'bookclass' -require 'classset' -require 'fastset' - -class Lookup - def initialize - @class_set = ClassSet.new() - @fast_set = FastSet.new() - end - - def construct_url(params) - first = true - cmd = 'http://classify.oclc.org/classify2/Classify' - - params += [ ['summary', 'false' ] ] - - params.each do |tuple| - name, value = tuple - if (first) - cmd += '?' - first = false - else - cmd += '&' - end - cmd += name + '=' - cmd += ERB::Util.url_encode(value) - end - - return cmd - end - - def isUpper?(c) - return /[[:upper:]]/.match(c) - end - - def lookup(author_grouping, title) - params = [ - ['author', massage_author(author_grouping)], - ['title', title] - ] - - cmd = construct_url(params) - res = submit_request(cmd) - - doc = Nokogiri::XML(res.body) - - if "4" == response_code(doc) - # Multiple matches; pick the first one and re-query - owi = doc.css("works work")[0]["owi"] - - params = [ - ['owi', owi] - ] - cmd = construct_url(params) - res = submit_request(cmd) - - #puts res.body - - doc = Nokogiri::XML(res.body) - end - - if "2" != response_code(doc) - puts "Lookup failed" - return nil - end - - title = doc.css("classify editions edition")[0]["title"] - - info = BookClass.new(author_grouping, title) - - author = doc.css("classify editions edition")[0]["author"] - info.author = author - - nodes = doc.css("classify recommendations ddc mostPopular") - if nil != nodes && nodes.length > 0 - ddc = nodes[0]["sfa"] - info.ddc = ddc - end - - nodes = doc.css("classify recommendations lcc mostPopular") - if nil != nodes && nodes.length > 0 - lcc = nodes[0]["sfa"] - end - info.lcc = lcc - - headings = doc.css("classify recommendations fast headings heading") - headings.each do |heading| - #puts heading.inspect - id = heading['ident'] - #puts 'ID: ' + id - descr = heading.content - #puts 'DESCR: ' + descr - info.add_fast(id) - @fast_set.add(id, descr) - end - - @class_set.ensure_contains!(info) - - return info - end - - def massage_author(input) - if nil == input - return nil - end - - reading_order = "" - input.each_char do |c| - if isUpper?(c) and (reading_order.length > 0) - reading_order += " " - end - reading_order += c - end - - return reading_order - end - - def response_code(doc) - return doc.css("classify response")[0]["code"] - end - - def save_state - @class_set.save_state() - @fast_set.save_state() - end - - def submit_request(cmd) - puts ('GET ' + cmd) - - url = URI.parse(cmd) - req = Net::HTTP::Get.new(url.to_s) - res = Net::HTTP.start(url.host, url.port) {|http| - http.request(req) - } - return res - end -end -