projects
/
quanlib.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Add `arrived` attribute (file creation timestamp) to books table.
[quanlib.git]
/
book.rb
diff --git
a/book.rb
b/book.rb
index b0a1bbff66de2851b9bab47b8ad6eb0d16cc65f3..2b93f4b574c9c45738a3a95a2c0b4f1697083d1c 100644
(file)
--- a/
book.rb
+++ b/
book.rb
@@
-1,25
+1,30
@@
require 'nokogiri'
require 'nokogiri'
+require 'rubygems'
require 'zip'
require 'zip'
-require 'author'
-require 'classification'
-require 'cover'
-require 'store'
+require
_relative
'author'
+require
_relative
'classification'
+require
_relative
'cover'
+require
_relative
'store'
class Book
@@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
class Book
@@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
+ @@SERIES_AND_VOLUME_REGEX = /^([A-Z]+)([0-9]+(\.[0-9]+)?)$/
+
+ attr_accessor :arrived
+ attr_accessor :author
+ attr_accessor :classification_id
+ attr_accessor :cover
+ attr_accessor :description
+ attr_accessor :language
+ attr_accessor :path
+ attr_accessor :series_id
+ attr_accessor :title
+ attr_accessor :volume
def initialize(store)
def initialize(store)
- @author = nil
- @classification_id = nil
- @cover = nil
- @description = nil
- @path = nil
- @series_id = nil
@store = store
@store = store
- @title = nil
- @volume = nil
end
def load_from_file!(fileName)
end
def load_from_file!(fileName)
@@
-46,36
+51,16
@@
class Book
return false
end
return false
end
- def author
- return @author
- end
-
- def author=(value)
- @author = value
- end
-
- def classification_id
- @classification_id
- end
-
- def classification_id=(value)
- @classification_id = value
- end
-
- def cover
- return @cover
- end
+ def self.grouping_for_title(title)
+ result = title
- def cover=(value)
- @cover = value
- end
-
- def description
- @description
- end
+ '\'",!#'.split('').each do |c|
+ result = result.gsub(c, '-')
+ end
+ result = result.gsub(/: */, '--')
+ result = result.gsub(' ', '_')
- def description=(value)
- @description = value
+ result
end
def heading
end
def heading
@@
-89,7
+74,7
@@
class Book
if nil != @author
result.push('<i>by ' + @author.reading_order + '</i>')
end
if nil != @author
result.push('<i>by ' + @author.reading_order + '</i>')
end
-
+
seriesInfo = []
series = @store.load_series(@series_id)
if nil != series and nil != series.descr
seriesInfo = []
series = @store.load_series(@series_id)
if nil != series and nil != series.descr
@@
-141,34
+126,10
@@
class Book
return '(Book:' + data.join(',') + ')'
end
return '(Book:' + data.join(',') + ')'
end
- def path
- @path
- end
-
- def path=(value)
- @path = value
- end
-
- def series_id
- @series_id
- end
-
- def series_id=(value)
- @series_id = value
- end
-
def to_s
return inspect()
end
def to_s
return inspect()
end
- def title
- @title
- end
-
- def title=(value)
- @title = value
- end
-
def title_grouping
if nil == @path
return nil
def title_grouping
if nil == @path
return nil
@@
-177,14
+138,6
@@
class Book
return File.basename(@path, '.*')
end
return File.basename(@path, '.*')
end
- def volume
- @volume
- end
-
- def volume=(value)
- @volume = value
- end
-
protected
def isUpper?(c)
return /[[:upper:]]/.match(c)
protected
def isUpper?(c)
return /[[:upper:]]/.match(c)
@@
-220,7
+173,7
@@
class Book
vol = nil
first = arr[0]
vol = nil
first = arr[0]
- matchData = (arr[0]).match(
/^([A-Z]+)([0-9]+)$/
)
+ matchData = (arr[0]).match(
@@SERIES_AND_VOLUME_REGEX
)
if nil != matchData
capt = matchData.captures
series = capt[0]
if nil != matchData
capt = matchData.captures
series = capt[0]
@@
-235,11
+188,23
@@
class Book
title = arr.join(' ')
title = arr.join(' ')
+ bare_title_grouping = title_grouping
+ .split('_')
+ .reject { |part| part.match(@@SERIES_AND_VOLUME_REGEX) }
+ .join('_')
+
+ unless bare_title_grouping == Book.grouping_for_title(title)
+ puts "WARNING: title_grouping mismatch: #{bare_title_grouping.inspect} vs. #{Book.grouping_for_title(title).inspect}"
+ end
+
return series, vol, title
end
protected
def parse_file_name!(file_name)
return series, vol, title
end
protected
def parse_file_name!(file_name)
+ category = nil # e.g., non-fiction, fan-fiction
+ grouping = ''
+
parts = file_name.split('/')
(series_code, @volume, @title) = processTitle(parts[-1])
if parts.length > 1
parts = file_name.split('/')
(series_code, @volume, @title) = processTitle(parts[-1])
if parts.length > 1
@@
-249,6
+214,9
@@
class Book
@author = Author.new(grouping, reading_order, sort_order)
@series_id = @store.get_series(grouping, series_code)
end
@author = Author.new(grouping, reading_order, sort_order)
@series_id = @store.get_series(grouping, series_code)
end
+ if parts.length > 2
+ category = parts[-3]
+ end
lc_file_name = file_name.downcase
if lc_file_name.end_with?(".epub")
lc_file_name = file_name.downcase
if lc_file_name.end_with?(".epub")
@@
-257,13
+225,23
@@
class Book
scan_pdf!(file_name)
end
scan_pdf!(file_name)
end
+ @arrived = File.ctime(file_name)
+
@classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*'))
@classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*'))
+
+ # TODO: Fix horrible hard-coded strings and paths
+ if ('01_nonfic' == category) && (nil == classification_id)
+ open(Store.unclassified_csv, 'a') do |fd|
+ fd.puts('"' + grouping.to_s + '","' + path + '"')
+ end
+ end
end
end
- protected
+ protected
def scanEpub!(fileName)
#puts 'Scanning "' + fileName.to_s + '"...'
begin
def scanEpub!(fileName)
#puts 'Scanning "' + fileName.to_s + '"...'
begin
+ Zip.warn_invalid_date = false
Zip::File.open(fileName) do |zipfile|
entry = zipfile.find_entry('META-INF/container.xml')
if nil == entry
Zip::File.open(fileName) do |zipfile|
entry = zipfile.find_entry('META-INF/container.xml')
if nil == entry
@@
-347,7
+325,7
@@
class Book
#---------------------------------------
# Description
#---------------------------------------
# Description
-
+
descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
if (descrNodes.length > 0)
descrNode = descrNodes[0]
descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
if (descrNodes.length > 0)
descrNode = descrNodes[0]
@@
-356,6
+334,17
@@
class Book
end
end
end
end
+ #---------------------------------------
+ # Language
+
+ langNodes = opfDoc.css('dc|language', 'dc' => @@DC_NS_URL)
+ if (langNodes.length > 0)
+ langNode = langNodes[0]
+ if langNode
+ @language = langNode.content
+ end
+ end
+
#---------------------------------------
# Other metadata: series, volume, cover
#---------------------------------------
# Other metadata: series, volume, cover
@@
-398,9
+387,9
@@
class Book
entry = zipfile.find_entry(href)
if nil == entry
entry = zipfile.find_entry(href)
if nil == entry
- # Although the epub standard requires the path to be relative
+ # Although the epub standard requires the path to be relative
# to the base of the epub (zip), some books encountered in the
# to the base of the epub (zip), some books encountered in the
- # wild have been found to use a bath relative to the location
+ # wild have been found to use a bath relative to the location
# of the opf file.
parts = opfPath.split('/')
opfBasePath = opfPath.split('/')[0..-2].join('/')
# of the opf file.
parts = opfPath.split('/')
opfBasePath = opfPath.split('/')[0..-2].join('/')
@@
-408,6
+397,14
@@
class Book
entry = zipfile.find_entry(coverPath)
end
entry = zipfile.find_entry(coverPath)
end
+ unless entry
+ # Another case found in the wild: cover image is at the root, but path is '../cover.jpeg'
+ if href.start_with? '../'
+ coverPath = href[3..-1]
+ entry = zipfile.find_entry(coverPath)
+ end
+ end
+
if nil == entry
puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".'
return nil
if nil == entry
puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".'
return nil