X-Git-Url: http://jaekl.net/gitweb/?a=blobdiff_plain;f=book.rb;h=cd14ab690b6164c14c7f842b4755e85bb87d231e;hb=2122d68f0408b8471f9dedd9998ecf890a90f703;hp=aa61930bc357fb89dcc861085eedd38921b1f654;hpb=fffa1ed35ce07fdec65e1aa14a8f637fbdde9b10;p=quanlib.git diff --git a/book.rb b/book.rb index aa61930..cd14ab6 100644 --- a/book.rb +++ b/book.rb @@ -1,8 +1,10 @@ require 'nokogiri' +require 'rubygems' require 'zip' require 'author' +require 'classification' require 'cover' require 'store' @@ -11,6 +13,7 @@ class Book def initialize(store) @author = nil + @classification_id = nil @cover = nil @description = nil @path = nil @@ -20,12 +23,12 @@ class Book @volume = nil end - def loadFromFile(fileName) + def load_from_file!(fileName) @path = fileName - parseFileName!(fileName) + parse_file_name!(fileName) end - def self.canHandle?(fileName) + def self.can_handle?(fileName) if nil == fileName return false end @@ -37,6 +40,10 @@ class Book return true end + if lowerName.end_with?(".pdf") + return true + end + return false end @@ -48,6 +55,14 @@ class Book @author = value end + def classification_id + @classification_id + end + + def classification_id=(value) + @classification_id = value + end + def cover return @cover end @@ -75,7 +90,7 @@ class Book if nil != @author result.push('by ' + @author.reading_order + '') end - + seriesInfo = [] series = @store.load_series(@series_id) if nil != series and nil != series.descr @@ -88,6 +103,19 @@ class Book result.push(seriesInfo.join(' ')) end + classification = nil + if nil != @classification_id + classification = @store.load_classification(@classification_id) + end + if nil != classification + if nil != classification.ddc + result.push('Dewey: ' + classification.ddc.to_s) + end + if nil != classification.lcc + result.push('LCC: ' + classification.lcc.to_s) + end + end + return result.join('
') end @@ -125,7 +153,7 @@ class Book def series_id @series_id end - + def series_id=(value) @series_id = value end @@ -142,6 +170,14 @@ class Book @title = value end + def title_grouping + if nil == @path + return nil + end + + return File.basename(@path, '.*') + end + def volume @volume end @@ -204,8 +240,11 @@ class Book end protected - def parseFileName!(fileName) - parts = fileName.split('/') + def parse_file_name!(file_name) + category = nil # e.g., non-fiction, fan-fiction + grouping = '' + + parts = file_name.split('/') (series_code, @volume, @title) = processTitle(parts[-1]) if parts.length > 1 grouping = parts[-2] @@ -214,16 +253,32 @@ class Book @author = Author.new(grouping, reading_order, sort_order) @series_id = @store.get_series(grouping, series_code) end + if parts.length > 2 + category = parts[-3] + end + + lc_file_name = file_name.downcase + if lc_file_name.end_with?(".epub") + scanEpub!(file_name) + elsif lc_file_name.end_with?(".pdf") + scan_pdf!(file_name) + end - if fileName.downcase.end_with?(".epub") - scanEpub!(fileName) + @classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*')) + + # TODO: Fix horrible hard-coded strings and paths + if ('01_nonfic' == category) && (nil == classification_id) + open(Store.unclassified_csv, 'a') do |fd| + fd.puts('"' + grouping.to_s + '","' + path + '"') + end end end - protected + protected def scanEpub!(fileName) #puts 'Scanning "' + fileName.to_s + '"...' begin + Zip.warn_invalid_date = false Zip::File.open(fileName) do |zipfile| entry = zipfile.find_entry('META-INF/container.xml') if nil == entry @@ -243,6 +298,25 @@ class Book end end + protected + def scan_pdf!(file_name) + #puts 'Scanning "' + file_name.to_s + '"...' + + pdf_path = File.expand_path(file_name).to_s + if ! pdf_path.end_with?('.pdf') + puts 'Unexpected internal error: path "' + file_name.to_s + '" does not end with ".pdf".' + return + end + + jpeg_path = pdf_path[0..-5] + '.jpeg' + if File.file?(jpeg_path) + File.open(jpeg_path, 'r') do |is| + @cover = Cover.new(is, jpeg_path, 'image/jpeg') + end + end + end + + protected def scanOpf!(zipfile, opfPath) coverId = nil @@ -288,7 +362,7 @@ class Book #--------------------------------------- # Description - + descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL) if (descrNodes.length > 0) descrNode = descrNodes[0] @@ -339,9 +413,9 @@ class Book entry = zipfile.find_entry(href) if nil == entry - # Although the epub standard requires the path to be relative + # Although the epub standard requires the path to be relative # to the base of the epub (zip), some books encountered in the - # wild have been found to use a bath relative to the location + # wild have been found to use a bath relative to the location # of the opf file. parts = opfPath.split('/') opfBasePath = opfPath.split('/')[0..-2].join('/') @@ -349,6 +423,14 @@ class Book entry = zipfile.find_entry(coverPath) end + unless entry + # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg' + if href.start_with? '../' + coverPath = href[3..-1] + entry = zipfile.find_entry(coverPath) + end + end + if nil == entry puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".' return nil