Add support for classification of non-fiction books.
[quanlib.git] / book.rb
diff --git a/book.rb b/book.rb
index f59b61a66ea44d17a86e7d999e34496b84fbea99..5b698e7a474e2909433c0b6c893dc13af802c193 100644 (file)
--- a/book.rb
+++ b/book.rb
@@ -4,20 +4,29 @@ require 'zip'
 
 require 'author'
 require 'cover'
+require 'store'
 
 class Book
-  def initialize(fileName)
+  @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
+
+  def initialize(store)
     @author = nil
+    @classification_id = nil
     @cover = nil
-    @path = fileName
-    @series = nil
+    @description = nil
+    @path = nil
+    @series_id = nil
+    @store = store
     @title = nil
     @volume = nil
+  end
 
-    parseFileName!(fileName)
+  def load_from_file!(fileName)
+    @path = fileName
+    parse_file_name!(fileName)
   end
 
-  def self.canHandle?(fileName)
+  def self.can_handle?(fileName)
     if nil == fileName
       return false
     end
@@ -29,14 +38,46 @@ class Book
       return true
     end
 
+    if lowerName.end_with?(".pdf")
+      return true
+    end
+
     return false
   end
 
+  def author
+    return @author
+  end
+
+  def author=(value)
+    @author = value
+  end
+
+  def classification_id
+    @classification_id
+  end
+
+  def classification_id=(value)
+    @classification_id = value
+  end
+
   def cover
     return @cover
   end
 
-  def describe
+  def cover=(value)
+    @cover = value
+  end
+
+  def description
+    @description
+  end
+
+  def description=(value)
+    @description = value
+  end
+
+  def heading
     result = []
 
     if nil != @title
@@ -45,12 +86,13 @@ class Book
       result.push('<i>(Unknown title)</i>')
     end
     if nil != @author
-      result.push(@author.to_s())
+      result.push('<i>by ' + @author.reading_order + '</i>')
     end
     
     seriesInfo = []
-    if nil != @series
-      seriesInfo.push(@series.to_s)
+    series = @store.load_series(@series_id)
+    if nil != series and nil != series.descr
+      seriesInfo.push(series.descr.to_s)
     end
     if nil != @volume
       seriesInfo.push(@volume.to_s)
@@ -65,10 +107,10 @@ class Book
   def inspect
     data = []
     if nil != @author
-      data.push('author="' + @author.to_s + '"')
+      data.push('author="' + @author.inspect + '"')
     end
-    if nil != @series
-      data.push('series="' + @series + '"')
+    if nil != @series_id
+      data.push('series_id="' + @series_id.to_s() + '"')
     end
     if nil != @volume
       data.push('volume="' + @volume + '"')
@@ -89,30 +131,66 @@ class Book
     @path
   end
 
+  def path=(value)
+    @path = value
+  end
+
+  def series_id
+    @series_id
+  end
+  
+  def series_id=(value)
+    @series_id = value
+  end
+
   def to_s
     return inspect()
   end
 
+  def title
+    @title
+  end
+
+  def title=(value)
+    @title = value
+  end
+
+  def title_grouping
+    if nil == @path
+      return nil
+    end
+
+    return File.basename(@path, '.*')
+  end
+
+  def volume
+    @volume
+  end
+
+  def volume=(value)
+    @volume = value
+  end
+
   protected
   def isUpper?(c)
     return /[[:upper:]]/.match(c)
   end
 
   protected
-  def massageAuthor(input)
+  def massage_author(input)
     if nil == input
       return nil
     end
 
-    result = ""
+    reading_order = ""
     input.each_char do |c|
-      if isUpper?(c) and (result.length > 0)
-        result += " "
-      end
-      result += c
+      if isUpper?(c) and (reading_order.length > 0)
+        reading_order += " "
+     end
+      reading_order += c
     end
-    
-    return result
+
+    return reading_order
   end
 
   # Returns (series, volumeNo, titleText)
@@ -147,16 +225,25 @@ class Book
   end
 
   protected
-  def parseFileName!(fileName)
-    parts = fileName.split('/')
-    (@series, @volume, @title) = processTitle(parts[-1])
+  def parse_file_name!(file_name)
+    parts = file_name.split('/')
+    (series_code, @volume, @title) = processTitle(parts[-1])
     if parts.length > 1
-      @author = massageAuthor(parts[-2])
+      grouping = parts[-2]
+      reading_order = massage_author(grouping)
+      sort_order = nil
+      @author = Author.new(grouping, reading_order, sort_order)
+      @series_id = @store.get_series(grouping, series_code)
     end
 
-    if fileName.downcase.end_with?(".epub")
-      scanEpub!(fileName)
+    lc_file_name = file_name.downcase
+    if lc_file_name.end_with?(".epub")
+      scanEpub!(file_name)
+    elsif lc_file_name.end_with?(".pdf")
+      scan_pdf!(file_name)
     end
+
+    @classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*'))
   end
 
   protected 
@@ -166,6 +253,7 @@ class Book
       Zip::File.open(fileName) do |zipfile|
         entry = zipfile.find_entry('META-INF/container.xml')
         if nil == entry
+          puts 'No META-INF/container.xml, skipping book ' + fileName
           return
         end
         contXml = zipfile.read('META-INF/container.xml')
@@ -181,6 +269,25 @@ class Book
     end
   end
 
+  protected
+  def scan_pdf!(file_name)
+    #puts 'Scanning "' + file_name.to_s + '"...'
+
+    pdf_path = File.expand_path(file_name).to_s
+    if ! pdf_path.end_with?('.pdf')
+      puts 'Unexpected internal error:  path "' + file_name.to_s + '" does not end with ".pdf".'
+      return
+    end
+
+    jpeg_path = pdf_path[0..-5] + '.jpeg'
+    if File.file?(jpeg_path)
+      File.open(jpeg_path, 'r') do |is|
+        @cover = Cover.new(is, jpeg_path, 'image/jpeg')
+      end
+    end
+  end
+
+
   protected
   def scanOpf!(zipfile, opfPath)
     coverId = nil
@@ -191,22 +298,47 @@ class Book
     #-------
     # Author
 
-    creator = opfDoc.css('dc|creator', 'dc' => 'http://purl.org/dc/elements/1.1/')
-    if (nil != creator) and (creator.length > 0)
-      roleNode = creator.attr('role')
-      if nil != roleNode
-        role = roleNode.value
-        if ('aut' == role) and (creator.children.length > 0) and (nil != creator.children[0])
-          name = creator.children[0].content
-          parts = name.split(' ')
-          if parts.length > 1
-            surname = parts[-1]
-            givenNames = parts[0..-2].join(' ')
-            @author = Author.new(surname, givenNames)
-          else
-            @author = Author.new(name, '')
+    grouping = @author.grouping
+    reading_order = @author.reading_order
+    sort_order = @author.sort_order
+
+    creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
+    if (creators.length > 0)
+      creator = creators[0]
+      if nil != creator
+        role = creator['opf:role']
+        if 'aut' == role
+          reading_order = creator.content
+
+          file_as = creator['opf:file-as']
+          if nil != file_as
+            sort_order = file_as
           end
         end
+
+        @author = Author.new(grouping, reading_order, sort_order)
+      end
+    end
+
+    #---------------------------------------
+    # Title
+
+    titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
+    if titles.length > 0
+      title = titles[0]
+      if nil != title
+        @title = title.content
+      end
+    end
+
+    #---------------------------------------
+    # Description
+    
+    descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
+    if (descrNodes.length > 0)
+      descrNode = descrNodes[0]
+      if nil != descrNode
+        @description = descrNode.content
       end
     end
 
@@ -219,22 +351,24 @@ class Book
       content = m['content']
 
       if 'calibre:series' == name
-        @series = content
+        # TODO:  Dynamically create a new series?
+        # @series_id = content
       elsif 'calibre:series-index' == name
         @volume = content
       elsif 'cover' == name
         coverId = content
+        #puts 'File ' + @path + ' coverId ' + coverId
       end
     end
 
     #---------------
     # Load the cover
 
-    @cover = loadCover(zipfile, opfPath, opfDoc, coverId)
+    @cover = load_cover(zipfile, opfPath, opfDoc, coverId)
   end
 
   protected
-  def loadCover(zipfile, opfPath, opfDoc, coverId)
+  def load_cover(zipfile, opfPath, opfDoc, coverId)
     coverFile = nil
     if nil == coverId
       coverId = "cover-image"