X-Git-Url: http://jaekl.net/gitweb/?a=blobdiff_plain;f=book.rb;h=cd14ab690b6164c14c7f842b4755e85bb87d231e;hb=2122d68f0408b8471f9dedd9998ecf890a90f703;hp=aa61930bc357fb89dcc861085eedd38921b1f654;hpb=fffa1ed35ce07fdec65e1aa14a8f637fbdde9b10;p=quanlib.git
diff --git a/book.rb b/book.rb
index aa61930..cd14ab6 100644
--- a/book.rb
+++ b/book.rb
@@ -1,8 +1,10 @@
require 'nokogiri'
+require 'rubygems'
require 'zip'
require 'author'
+require 'classification'
require 'cover'
require 'store'
@@ -11,6 +13,7 @@ class Book
def initialize(store)
@author = nil
+ @classification_id = nil
@cover = nil
@description = nil
@path = nil
@@ -20,12 +23,12 @@ class Book
@volume = nil
end
- def loadFromFile(fileName)
+ def load_from_file!(fileName)
@path = fileName
- parseFileName!(fileName)
+ parse_file_name!(fileName)
end
- def self.canHandle?(fileName)
+ def self.can_handle?(fileName)
if nil == fileName
return false
end
@@ -37,6 +40,10 @@ class Book
return true
end
+ if lowerName.end_with?(".pdf")
+ return true
+ end
+
return false
end
@@ -48,6 +55,14 @@ class Book
@author = value
end
+ def classification_id
+ @classification_id
+ end
+
+ def classification_id=(value)
+ @classification_id = value
+ end
+
def cover
return @cover
end
@@ -75,7 +90,7 @@ class Book
if nil != @author
result.push('by ' + @author.reading_order + '')
end
-
+
seriesInfo = []
series = @store.load_series(@series_id)
if nil != series and nil != series.descr
@@ -88,6 +103,19 @@ class Book
result.push(seriesInfo.join(' '))
end
+ classification = nil
+ if nil != @classification_id
+ classification = @store.load_classification(@classification_id)
+ end
+ if nil != classification
+ if nil != classification.ddc
+ result.push('Dewey: ' + classification.ddc.to_s)
+ end
+ if nil != classification.lcc
+ result.push('LCC: ' + classification.lcc.to_s)
+ end
+ end
+
return result.join('
')
end
@@ -125,7 +153,7 @@ class Book
def series_id
@series_id
end
-
+
def series_id=(value)
@series_id = value
end
@@ -142,6 +170,14 @@ class Book
@title = value
end
+ def title_grouping
+ if nil == @path
+ return nil
+ end
+
+ return File.basename(@path, '.*')
+ end
+
def volume
@volume
end
@@ -204,8 +240,11 @@ class Book
end
protected
- def parseFileName!(fileName)
- parts = fileName.split('/')
+ def parse_file_name!(file_name)
+ category = nil # e.g., non-fiction, fan-fiction
+ grouping = ''
+
+ parts = file_name.split('/')
(series_code, @volume, @title) = processTitle(parts[-1])
if parts.length > 1
grouping = parts[-2]
@@ -214,16 +253,32 @@ class Book
@author = Author.new(grouping, reading_order, sort_order)
@series_id = @store.get_series(grouping, series_code)
end
+ if parts.length > 2
+ category = parts[-3]
+ end
+
+ lc_file_name = file_name.downcase
+ if lc_file_name.end_with?(".epub")
+ scanEpub!(file_name)
+ elsif lc_file_name.end_with?(".pdf")
+ scan_pdf!(file_name)
+ end
- if fileName.downcase.end_with?(".epub")
- scanEpub!(fileName)
+ @classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*'))
+
+ # TODO: Fix horrible hard-coded strings and paths
+ if ('01_nonfic' == category) && (nil == classification_id)
+ open(Store.unclassified_csv, 'a') do |fd|
+ fd.puts('"' + grouping.to_s + '","' + path + '"')
+ end
end
end
- protected
+ protected
def scanEpub!(fileName)
#puts 'Scanning "' + fileName.to_s + '"...'
begin
+ Zip.warn_invalid_date = false
Zip::File.open(fileName) do |zipfile|
entry = zipfile.find_entry('META-INF/container.xml')
if nil == entry
@@ -243,6 +298,25 @@ class Book
end
end
+ protected
+ def scan_pdf!(file_name)
+ #puts 'Scanning "' + file_name.to_s + '"...'
+
+ pdf_path = File.expand_path(file_name).to_s
+ if ! pdf_path.end_with?('.pdf')
+ puts 'Unexpected internal error: path "' + file_name.to_s + '" does not end with ".pdf".'
+ return
+ end
+
+ jpeg_path = pdf_path[0..-5] + '.jpeg'
+ if File.file?(jpeg_path)
+ File.open(jpeg_path, 'r') do |is|
+ @cover = Cover.new(is, jpeg_path, 'image/jpeg')
+ end
+ end
+ end
+
+
protected
def scanOpf!(zipfile, opfPath)
coverId = nil
@@ -288,7 +362,7 @@ class Book
#---------------------------------------
# Description
-
+
descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
if (descrNodes.length > 0)
descrNode = descrNodes[0]
@@ -339,9 +413,9 @@ class Book
entry = zipfile.find_entry(href)
if nil == entry
- # Although the epub standard requires the path to be relative
+ # Although the epub standard requires the path to be relative
# to the base of the epub (zip), some books encountered in the
- # wild have been found to use a bath relative to the location
+ # wild have been found to use a bath relative to the location
# of the opf file.
parts = opfPath.split('/')
opfBasePath = opfPath.split('/')[0..-2].join('/')
@@ -349,6 +423,14 @@ class Book
entry = zipfile.find_entry(coverPath)
end
+ unless entry
+ # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg'
+ if href.start_with? '../'
+ coverPath = href[3..-1]
+ entry = zipfile.find_entry(coverPath)
+ end
+ end
+
if nil == entry
puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".'
return nil