Minor change to oclc lookup heuristic.
[quanlib.git] / store.rb
index 0b7b476bbd860ae9e55ff1fc2e887f88066dc84f..69e1278d40393ea9d3d7e75d34cdf452645e8ed1 100644 (file)
--- a/store.rb
+++ b/store.rb
@@ -1,23 +1,39 @@
 
+require 'csv'
 require 'fileutils'
+require 'inifile'
 require 'pg'
 
+require 'series'
+
 class Store
-  def initialize
-    @basepath = '/home/chris/prog/quanlib/efs' # TODO: FIXME: configure this in a sane way
+  def unclassified_csv
+    @basePath + '/csv/unclassified.csv'
+  end
+
+  def initialize(config_file)
     @conn = nil
 
-    #@dburl = 'dbi:Pg:quanlib:localhost'
-    @dbhost = "localhost"
+    config = IniFile.load(config_file)
+    if nil == config
+      puts 'FATAL:  Failed to load config file "' + config_file + '".  Aborting initialization.'
+      return
+    end
+
+    section = config['database']
+    @dbhost = section['host']
     @dbport = 5432
-    @dbname = 'quanlib'
-    @dbuser = 'quanlib'
-    @dbpass = 'quanlib'
+    @dbname = section['name']
+    @dbuser = section['user']
+    @dbpass = section['pass']
+
+    section = config['filesystem']
+    @basePath = section['basePath']
   end
 
   def connect
     # @conn = PGconn.connect('localhost', 5432, '', '', 'quanlib', 'quanlib', 'quanlib')
-    @conn = PG.connect('localhost', 5432, '', '', 'quanlib', 'quanlib', 'quanlib')
+    @conn = PG.connect(@dbhost, @dbport, '', '', @dbname, @dbuser, @dbpass)
     return @conn
   end
 
@@ -32,28 +48,42 @@ class Store
     return path, name
   end
 
-  def create_schema
+  def create_schema(skip_class)
     create_authors = 
 <<EOS
       CREATE TABLE Authors (
-        id          SERIAL PRIMARY KEY,
+        id          INTEGER PRIMARY KEY,
         grouping    VARCHAR(64),
-        reading     VARCHAR(128),
-        sort        VARCHAR(128)
+        reading     VARCHAR(256),
+        sort        VARCHAR(256)
       );
 EOS
 
     create_books = 
 <<EOS
       CREATE TABLE Books (
-        id          SERIAL PRIMARY KEY,
-        author      INTEGER REFERENCES Authors(id),
-        cover       INTEGER,
-        description TEXT,
-        path        VARCHAR(256),
-        series      VARCHAR(128),
-        title       VARCHAR(196),
-        volume      VARCHAR(16)
+        id             INTEGER PRIMARY KEY,
+        author         INTEGER REFERENCES Authors(id),
+        classification INTEGER REFERENCES Classifications(id),
+        cover          INTEGER,
+        description    TEXT,
+        path           VARCHAR(256),
+        series         INTEGER REFERENCES Series(id),
+        title          VARCHAR(256),
+        volume         VARCHAR(16)
+      );
+EOS
+
+    create_classification =
+<<EOS
+      CREATE TABLE Classifications (
+        id              INTEGER PRIMARY KEY,
+        ddc             VARCHAR(32),
+        lcc             VARCHAR(32),
+        author_grouping VARCHAR(64),
+        author_sort     VARCHAR(128),
+        title_grouping  VARCHAR(256),
+        title           VARCHAR(256)
       );
 EOS
 
@@ -65,32 +95,101 @@ EOS
       );
 EOS
 
+    create_fast = 
+<<EOS
+      CREATE TABLE FAST (
+        id          VARCHAR(32) PRIMARY KEY,
+        descr       VARCHAR(128)
+      );
+EOS
+
+    # Associative entity, linking FAST and Classifications tables
+    # in a 0..n to 0..m relationship
+    create_fast_classifications =
+<<EOS
+      CREATE TABLE FAST_Classifications (
+        fast           VARCHAR(32) REFERENCES FAST(id),
+        classification INTEGER REFERENCES Classifications(id)
+      );
+EOS
+
+    create_series = 
+<<EOS
+      CREATE TABLE Series (
+        id          INTEGER PRIMARY KEY,
+        age         VARCHAR(32),
+        genre       VARCHAR(32),
+        grouping    VARCHAR(64),
+        code        VARCHAR(16),
+        descr       VARCHAR(128)
+      )
+EOS
+
     stmts = [
       create_authors,
-      create_books,
+      create_classification,
       create_efs,
-      'CREATE SEQUENCE efs_id;'
+      create_fast,
+      create_series,
+      create_books,
+      create_fast_classifications,
+      'CREATE SEQUENCE author_id;',
+      'CREATE SEQUENCE book_id;',
+      'CREATE SEQUENCE classification_id;',
+      'CREATE SEQUENCE efs_id;',
+      'CREATE SEQUENCE series_id;'
     ]
 
     for stmt in stmts
       @conn.exec(stmt)
     end
+
+    if skip_class == false
+      populate_fast_table()
+      populate_classifications_table()
+    end
+
+    populate_series_table()
+
   end
 
   def dropSchema
     stmts = [
       'DROP TABLE Books;',
+      'DROP TABLE FAST_Classifications;',
       'DROP TABLE Authors;',
+      'DROP TABLE Classifications;',
       'DROP TABLE EFS;',
-      'DROP SEQUENCE efs_id;'
+      'DROP TABLE FAST;',
+      'DROP TABLE Series;',
+      'DROP SEQUENCE author_id;',
+      'DROP SEQUENCE book_id;',
+      'DROP SEQUENCE classification_id;',
+      'DROP SEQUENCE efs_id;',
+      'DROP SEQUENCE series_id;'
     ]
 
     for stmt in stmts do
-      @conn.exec(stmt)
+      begin
+        @conn.exec(stmt)
+      rescue Exception => exc
+        puts 'WARNING:  "' + stmt + '" failed:  ' + exc.to_s
+      end
     end
   end
 
-  def init_db
+  def find_author(author)
+    sqlSelect = "SELECT id FROM Authors WHERE grouping=$1 AND reading=$2 AND sort=$3;"
+    args = [author.grouping, author.reading_order, author.sort_order]
+    @conn.exec_params(sqlSelect, args) do |rs|
+      if rs.ntuples > 0
+        return rs[0]['id']
+      end
+    end
+    return nil
+  end
+
+  def init_db(skip_class)
     sql = "SELECT 1 FROM pg_tables WHERE tableowner='quanlib' AND tablename='books'"
     found = false
     @conn.exec(sql).each do |row|
@@ -98,17 +197,20 @@ EOS
     end
 
     if ! found
-      create_schema()
+      create_schema(skip_class)
     end
   end
 
-  def find_author(author)
-    sqlSelect = "SELECT id FROM Authors WHERE grouping=$1 AND reading=$2 AND sort=$3;"
-    args = [author.grouping, author.reading_order, author.sort_order]
+  def load_author(id)
+    sqlSelect = "SELECT grouping, reading, sort FROM Authors WHERE id=$1"
+    args = [id]
     @conn.exec_params(sqlSelect, args) do |rs|
-      if rs.ntuples > 0
-        return rs[0]['id']
+      if rs.ntuples != 1
+        raise "Expected 1 row for " + id + " but got " + rs.ntuples + ":  " + sqlSelect
       end
+      row = rs[0]
+      author = Author.new(row['grouping'], row['reading'], row['sort'])
+      return author
     end
     return nil
   end
@@ -116,8 +218,9 @@ EOS
   def store_author(author)
     id = find_author(author)
     if nil == id
-      sqlInsert = "INSERT INTO Authors(grouping, reading, sort) VALUES ($1, $2, $3);"
-      args = [author.grouping, author.reading_order, author.sort_order]
+      id = next_id('author_id')
+      sqlInsert = "INSERT INTO Authors(id, grouping, reading, sort) VALUES ($1, $2, $3, $4);"
+      args = [id, author.grouping, author.reading_order, author.sort_order]
       begin 
         rs = @conn.exec_params(sqlInsert, args)
       rescue Exception => e
@@ -128,16 +231,49 @@ EOS
         rs.clear if rs
       end
     end
-    return find_author(author)
+    return id
+  end
+
+  def load_book(id)
+    sql = "SELECT author, classification, cover, description, path, series, title, volume FROM Books WHERE id=$1;"
+    book = nil
+
+    begin
+      @conn.exec_params(sql, [id]) do |rs|
+        if 1 != rs.ntuples
+          raise 'Expected one row in Books for id ' + id + ', but found ' + rs.length + '.'
+          return nil
+        end
+        row = rs[0]
+
+        book = Book.new(self)
+        book.author = load_author(row['author'])
+        book.classification_id = row['classification']
+        book.cover = load_cover(row['cover'])
+        book.description = row['description']
+        book.path = row['path']
+        book.series_id = row['series']
+        book.title = row['title']
+        book.volume = row['volume']
+      end    
+    rescue Exception => e
+      puts sql + ": " + id
+      puts e.message
+      puts $@
+    end
+
+    return book
   end
 
   def store_book(book)
-    sql = "INSERT INTO Books (author, cover, description, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7);"
+    sql = "INSERT INTO Books (id, author, classification, cover, description, path, series, title, volume) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9);"
+
+    book_id = next_id('book_id')
 
     author_id = store_author(book.author)
     (efs_id, mime_type) = store_cover(book)
 
-    args = [author_id, efs_id, book.description(), book.path(), book.series(), book.title(), book.volume()]
+    args = [book_id, author_id, book.classification_id, efs_id, book.description(), book.path(), book.series_id(), book.title(), book.volume()]
 
     begin
       rs = @conn.exec_params(sql, args)
@@ -148,6 +284,71 @@ EOS
     ensure
       rs.clear if rs
     end
+
+    return book_id
+  end
+
+  def find_classification(author_grouping, title_grouping)
+    #puts 'find_classification("' + author_grouping.inspect + '", "' + title_grouping.inspect + '")...'
+    sql = "SELECT id FROM Classifications WHERE author_grouping = $1 AND title_grouping = $2;"
+    @conn.exec_params(sql, [author_grouping, title_grouping]) do |rs|
+      if rs.ntuples > 0
+        #puts '  --> ' + rs[0]['id'].inspect
+        return rs[0]['id']
+      end
+    end
+    #puts '  --> NIL'
+    return nil
+  end
+
+  def load_classification(id)
+    sql  = "SELECT ddc, lcc, author_grouping, author_sort, title_grouping, title "
+    sql += " FROM Classifications WHERE id=$1"
+    @conn.exec_params(sql, [id]) do |rs|
+      if rs.ntuples > 0
+        row = rs[0]
+        ddc = row['ddc']
+        lcc = row['lcc']
+        author_grouping = row['author_grouping']
+        author = row['author_sort']
+        title_grouping = row['title_grouping']
+        title = row['title']
+
+        result = Classification.new(ddc, lcc, author_grouping, author, title_grouping, title)
+        result.id = id
+        return result
+      end
+    end
+
+    return nil
+  end
+
+  def load_cover(id)
+    if nil == id
+      return nil
+    end
+
+    mime_type = 'application/octet-stream'
+
+    sql = "SELECT mimeType FROM Efs WHERE id=$1"
+    @conn.exec_params(sql, [id]) do |rs|
+      if rs.ntuples != 1
+        raise "Expected one row but got " + rs.ntuples + ": " + sql + ": " + id
+      end
+      mime_type = rs[0]['mimeType']
+    end
+
+    (efspath, efsname) = construct_efs_path(id)
+
+    fullpath = @basePath + '/efs/' + efspath + '/' + efsname
+
+    return Cover.new(nil, fullpath, mime_type)
+
+    #File.open(fullpath, 'rb') do |is|
+    #  return Cover.new(is, fullpath, mime_type)
+    #end
+    #
+    #return nil
   end
 
   def store_cover(book)
@@ -168,16 +369,17 @@ EOS
 
     (efspath, efsname) = construct_efs_path(efs_id)
 
-    efspath = @basepath + '/' + efspath
+    efspath = @basePath + '/efs/' + efspath
 
     FileUtils.mkdir_p(efspath)
 
-    (filepath, mimetype) = cover.writeImage(efspath, efsname)
+    (filepath, mimetype) = cover.write_image(efspath, efsname)
 
     sql = "INSERT INTO efs VALUES ($1, $2)"
     begin
       rs = @conn.exec_params(sql, [efs_id, mimetype])
     rescue Exception => e
+      puts sql + ": " + efs_id + ", " + mimetype
       puts e.message
       puts $@
     ensure
@@ -186,5 +388,177 @@ EOS
     
     return efs_id, mimetype
   end
+
+  def exec_id_query(sql, args)
+    ids = []
+    @conn.exec_params(sql, args) do |rs|
+      rs.each do |row|
+        ids.push(row['id'])
+      end
+    end
+    return ids
+  end
+
+  def exec_update(sql, args)
+    begin
+      rs = @conn.exec_params(sql, args)
+    rescue Exception => e
+      puts sql + ": " + args.inspect()
+      puts e.message
+      puts $@
+    ensure
+      rs.clear if rs
+    end
+  end
+
+  def next_id(seq_name)
+    id = nil
+    @conn.exec("SELECT nextval('" + seq_name + "');") do |rs|
+      id = rs[0]['nextval']
+    end 
+    return id
+  end
+
+  def get_series(grouping, code)
+    if nil == code
+      return nil
+    end
+
+    sql = "SELECT id FROM Series WHERE grouping=$1 AND code=$2;"
+    args = [grouping, code]
+    @conn.exec_params(sql, args).each do |row|
+      return row['id']
+    end
+
+    # TODO:  Create a new series object here?
+    puts 'WARNING:  series("' + grouping + '", "' + code + '") not found.'
+    return nil
+  end
+
+  def load_series(id)
+    sql = "SELECT age,genre,grouping,code,descr FROM Series WHERE id=$1;"
+    args = [id]
+    @conn.exec_params(sql, args) do |rs|
+      if rs.ntuples > 0
+        row = rs[0]
+        series = Series.new(id)
+        series.age = row['age']
+        series.genre = row['genre']
+        series.grouping = row['grouping']
+        series.code = row['code']
+        series.descr = row['descr']
+        return series
+      end
+    end
+    return nil
+  end
+
+  def populate_classifications_table
+    puts "Populating the Classifications table..."
+    first = true
+    CSV.foreach(@basePath + '/csv/class.csv') do |row|
+      if first
+        # skip the header row
+        first = false
+      else
+
+        # First, add a row to the Classifications table
+
+        id = next_id('classification_id')
+        ddc = row[0]
+        lcc = row[1]
+        author_grouping = row[2]
+        author_sort = row[3]
+        title_grouping = row[4]
+        title = row[5]
+        
+        sqlInsert = "INSERT INTO Classifications (id, ddc, lcc, author_grouping, author_sort, title_grouping, title) VALUES ($1, $2, $3, $4, $5, $6, $7);"
+        args = [id, ddc, lcc, author_grouping, author_sort, title_grouping, title]
+        exec_update(sqlInsert, args)
+
+        # Second, link up with the appropriate FAST table entries
+
+        fast = []
+        input = row[6]
+        if input.length > 0
+          fast = input.split(';')
+        end 
+
+        fast.each do |fast_id|
+          sqlInsert = "INSERT INTO FAST_Classifications (fast, classification) VALUES ($1, $2);"
+          args = [fast_id, id]
+          exec_update(sqlInsert, args)
+        end
+      end
+    end
+  end
+
+  def populate_fast_table
+    puts "Populating the FAST table..."
+    first = true
+    CSV.foreach(@basePath + '/csv/fast.csv') do |row|
+      if first
+        first = false  # skip the header row
+      else
+        id = row[0]
+        descr = row[1]
+        sqlInsert = "INSERT INTO FAST (id, descr) VALUES ($1, $2);"
+        exec_update(sqlInsert, [id, descr])
+      end
+    end
+  end
+
+  def populate_series_table
+    puts "Populating the Series table..."
+    CSV.foreach(@basePath + '/csv/series.csv') do |row|
+      id = next_id('series_id')
+      sqlInsert = "INSERT INTO Series (id, age, genre, grouping, code, descr) VALUES ($1, $2, $3, $4, $5, $6);"
+      args = [id] + row
+      exec_update(sqlInsert, args)
+    end
+  end
+
+  def query_books_by_author(pattern)
+    sql = 
+<<EOS
+      SELECT b.id FROM Authors a 
+      INNER JOIN Books b ON b.author=a.id 
+      LEFT OUTER JOIN Series s on s.id=b.series
+      WHERE upper(a.grouping) LIKE $1 
+      ORDER BY a.grouping, b.series, b.volume, b.title
+EOS
+    return exec_id_query(sql, [pattern])
+  end
+
+  def query_books_by_ddc
+    sql = 
+<<EOS
+      SELECT b.id FROM Classifications c 
+      INNER JOIN Books b ON b.classification=c.id
+      ORDER BY c.ddc
+EOS
+    return exec_id_query(sql, [])
+  end
+
+  def query_books_by_series_id(id)
+    sql = 
+<<EOS
+      SELECT b.id FROM Books b
+      WHERE b.series = $1
+      ORDER BY b.volume,b.title
+EOS
+    return exec_id_query(sql, [id])
+  end
+
+  def query_series_by_age(pattern)
+    sql = 
+<<EOS
+      SELECT s.id 
+      FROM Series s
+      WHERE s.age LIKE $1
+      ORDER BY s.grouping,s.descr
+EOS
+    return exec_id_query(sql, [pattern])
+  end
 end