X-Git-Url: http://jaekl.net/gitweb/?p=quanlib.git;a=blobdiff_plain;f=book.rb;h=6d90c0e2947a0a243b3480758518159e075d8d85;hp=e0ccc841eed4c86600d56e18b0373fc14a138ab2;hb=HEAD;hpb=fcaeedd4d1c128ff84371c0a7db5d0af6751492a diff --git a/book.rb b/book.rb index e0ccc84..2b93f4b 100644 --- a/book.rb +++ b/book.rb @@ -1,26 +1,33 @@ require 'nokogiri' +require 'rubygems' require 'zip' -require 'author' -require 'cover' -require 'store' +require_relative 'author' +require_relative 'classification' +require_relative 'cover' +require_relative 'store' class Book @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/' + @@SERIES_AND_VOLUME_REGEX = /^([A-Z]+)([0-9]+(\.[0-9]+)?)$/ + + attr_accessor :arrived + attr_accessor :author + attr_accessor :classification_id + attr_accessor :cover + attr_accessor :description + attr_accessor :language + attr_accessor :path + attr_accessor :series_id + attr_accessor :title + attr_accessor :volume def initialize(store) - @author = nil - @cover = nil - @description = nil - @path = nil - @series_id = nil @store = store - @title = nil - @volume = nil end - def load_from_file(fileName) + def load_from_file!(fileName) @path = fileName parse_file_name!(fileName) end @@ -44,28 +51,16 @@ class Book return false end - def author - return @author - end - - def author=(value) - @author = value - end - - def cover - return @cover - end - - def cover=(value) - @cover = value - end + def self.grouping_for_title(title) + result = title - def description - @description - end + '\'",!#'.split('').each do |c| + result = result.gsub(c, '-') + end + result = result.gsub(/: */, '--') + result = result.gsub(' ', '_') - def description=(value) - @description = value + result end def heading @@ -79,7 +74,7 @@ class Book if nil != @author result.push('by ' + @author.reading_order + '') end - + seriesInfo = [] series = @store.load_series(@series_id) if nil != series and nil != series.descr @@ -92,6 +87,19 @@ class Book result.push(seriesInfo.join(' ')) end + classification = nil + if nil != @classification_id + classification = @store.load_classification(@classification_id) + end + if nil != classification + if nil != classification.ddc + result.push('Dewey: ' + classification.ddc.to_s) + end + if nil != classification.lcc + result.push('LCC: ' + classification.lcc.to_s) + end + end + return result.join('
') end @@ -118,40 +126,16 @@ class Book return '(Book:' + data.join(',') + ')' end - def path - @path - end - - def path=(value) - @path = value - end - - def series_id - @series_id - end - - def series_id=(value) - @series_id = value - end - def to_s return inspect() end - def title - @title - end - - def title=(value) - @title = value - end - - def volume - @volume - end + def title_grouping + if nil == @path + return nil + end - def volume=(value) - @volume = value + return File.basename(@path, '.*') end protected @@ -189,7 +173,7 @@ class Book vol = nil first = arr[0] - matchData = (arr[0]).match(/^([A-Z]+)([0-9]+)$/) + matchData = (arr[0]).match(@@SERIES_AND_VOLUME_REGEX) if nil != matchData capt = matchData.captures series = capt[0] @@ -204,11 +188,23 @@ class Book title = arr.join(' ') + bare_title_grouping = title_grouping + .split('_') + .reject { |part| part.match(@@SERIES_AND_VOLUME_REGEX) } + .join('_') + + unless bare_title_grouping == Book.grouping_for_title(title) + puts "WARNING: title_grouping mismatch: #{bare_title_grouping.inspect} vs. #{Book.grouping_for_title(title).inspect}" + end + return series, vol, title end protected def parse_file_name!(file_name) + category = nil # e.g., non-fiction, fan-fiction + grouping = '' + parts = file_name.split('/') (series_code, @volume, @title) = processTitle(parts[-1]) if parts.length > 1 @@ -218,6 +214,9 @@ class Book @author = Author.new(grouping, reading_order, sort_order) @series_id = @store.get_series(grouping, series_code) end + if parts.length > 2 + category = parts[-3] + end lc_file_name = file_name.downcase if lc_file_name.end_with?(".epub") @@ -225,12 +224,24 @@ class Book elsif lc_file_name.end_with?(".pdf") scan_pdf!(file_name) end + + @arrived = File.ctime(file_name) + + @classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*')) + + # TODO: Fix horrible hard-coded strings and paths + if ('01_nonfic' == category) && (nil == classification_id) + open(Store.unclassified_csv, 'a') do |fd| + fd.puts('"' + grouping.to_s + '","' + path + '"') + end + end end - protected + protected def scanEpub!(fileName) #puts 'Scanning "' + fileName.to_s + '"...' begin + Zip.warn_invalid_date = false Zip::File.open(fileName) do |zipfile| entry = zipfile.find_entry('META-INF/container.xml') if nil == entry @@ -314,7 +325,7 @@ class Book #--------------------------------------- # Description - + descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL) if (descrNodes.length > 0) descrNode = descrNodes[0] @@ -323,6 +334,17 @@ class Book end end + #--------------------------------------- + # Language + + langNodes = opfDoc.css('dc|language', 'dc' => @@DC_NS_URL) + if (langNodes.length > 0) + langNode = langNodes[0] + if langNode + @language = langNode.content + end + end + #--------------------------------------- # Other metadata: series, volume, cover @@ -365,9 +387,9 @@ class Book entry = zipfile.find_entry(href) if nil == entry - # Although the epub standard requires the path to be relative + # Although the epub standard requires the path to be relative # to the base of the epub (zip), some books encountered in the - # wild have been found to use a bath relative to the location + # wild have been found to use a bath relative to the location # of the opf file. parts = opfPath.split('/') opfBasePath = opfPath.split('/')[0..-2].join('/') @@ -375,6 +397,14 @@ class Book entry = zipfile.find_entry(coverPath) end + unless entry + # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg' + if href.start_with? '../' + coverPath = href[3..-1] + entry = zipfile.find_entry(coverPath) + end + end + if nil == entry puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".' return nil