Add support for classification of non-fiction books.
[quanlib.git] / classify / queryoclc.rb
diff --git a/classify/queryoclc.rb b/classify/queryoclc.rb
new file mode 100644 (file)
index 0000000..b336804
--- /dev/null
@@ -0,0 +1,57 @@
+require 'classset'
+require 'lookup'
+
+#if ARGV.length != 2
+#  puts 'Usage:  ruby classify.rb author title'
+#  exit 1
+#end
+#
+#author_grouping = ARGV[0]
+#title = ARGV[1]
+
+classset = ClassSet.new()
+lookup = Lookup.new()
+
+CSV.open('unclassified.csv', 'r:UTF-8') do |csv|
+  query_count = 0
+  first = true
+  csv.to_a.each do |row|
+    if first
+      first = false
+    elsif row.length >= 2
+      author_grouping = row[0]
+      title = row[1]
+
+      info = classset.get(author_grouping, title)
+      if nil == info
+        query_count += 1
+        info = lookup.lookup(author_grouping, title)
+        puts info.inspect()
+
+        puts 'Saving state...'
+        classset.save_state()
+        lookup.save_state()
+
+        sleep_time = 10 + rand(10)
+        puts 'Pausing for ' + sleep_time.to_s + ' seconds...'
+        sleep(sleep_time)  # Pause between lookup requests, to be polite to the server
+      end
+
+      if nil != info
+        classset.ensure_contains!(info)
+      else
+        puts 'WARNING:  lookup of ' + author_grouping + ', "' + title + '" failed.'
+        File.open('failed.log', 'a:UTF-8') do |fd|
+          fd.puts(author_grouping.to_s + ',' + title.to_s)
+        end
+      end
+
+#      if query_count > 5
+#        break
+#      end
+    end
+  end
+end
+
+classset.save_state()
+lookup.save_state()