X-Git-Url: http://jaekl.net/gitweb/?p=quanlib.git;a=blobdiff_plain;f=classify%2Fqueryoclc.rb;fp=classify%2Fqueryoclc.rb;h=b3368040150e034b07a6089282021df2a2d01ed7;hp=0000000000000000000000000000000000000000;hb=2c6d69af97c152524366d3fefe1808dfb78f8f56;hpb=fcaeedd4d1c128ff84371c0a7db5d0af6751492a diff --git a/classify/queryoclc.rb b/classify/queryoclc.rb new file mode 100644 index 0000000..b336804 --- /dev/null +++ b/classify/queryoclc.rb @@ -0,0 +1,57 @@ +require 'classset' +require 'lookup' + +#if ARGV.length != 2 +# puts 'Usage: ruby classify.rb author title' +# exit 1 +#end +# +#author_grouping = ARGV[0] +#title = ARGV[1] + +classset = ClassSet.new() +lookup = Lookup.new() + +CSV.open('unclassified.csv', 'r:UTF-8') do |csv| + query_count = 0 + first = true + csv.to_a.each do |row| + if first + first = false + elsif row.length >= 2 + author_grouping = row[0] + title = row[1] + + info = classset.get(author_grouping, title) + if nil == info + query_count += 1 + info = lookup.lookup(author_grouping, title) + puts info.inspect() + + puts 'Saving state...' + classset.save_state() + lookup.save_state() + + sleep_time = 10 + rand(10) + puts 'Pausing for ' + sleep_time.to_s + ' seconds...' + sleep(sleep_time) # Pause between lookup requests, to be polite to the server + end + + if nil != info + classset.ensure_contains!(info) + else + puts 'WARNING: lookup of ' + author_grouping + ', "' + title + '" failed.' + File.open('failed.log', 'a:UTF-8') do |fd| + fd.puts(author_grouping.to_s + ',' + title.to_s) + end + end + +# if query_count > 5 +# break +# end + end + end +end + +classset.save_state() +lookup.save_state()