7 require 'classification'
12 @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
16 @classification_id = nil
26 def load_from_file!(fileName)
28 parse_file_name!(fileName)
31 def self.can_handle?(fileName)
36 #puts "Filename: " + fileName.to_s
37 lowerName = fileName.downcase()
39 if lowerName.end_with?(".epub")
43 if lowerName.end_with?(".pdf")
62 def classification_id=(value)
63 @classification_id = value
78 def description=(value)
86 result.push('<b>' + @title + '</b>')
88 result.push('<i>(Unknown title)</i>')
91 result.push('<i>by ' + @author.reading_order + '</i>')
95 series = @store.load_series(@series_id)
96 if nil != series and nil != series.descr
97 seriesInfo.push(series.descr.to_s)
100 seriesInfo.push(@volume.to_s)
102 if seriesInfo.length > 0
103 result.push(seriesInfo.join(' '))
107 if nil != @classification_id
108 classification = @store.load_classification(@classification_id)
110 if nil != classification
111 if nil != classification.ddc
112 result.push('Dewey: ' + classification.ddc.to_s)
114 if nil != classification.lcc
115 result.push('LCC: ' + classification.lcc.to_s)
119 return result.join('<br/>')
125 data.push('author="' + @author.inspect + '"')
128 data.push('series_id="' + @series_id.to_s() + '"')
131 data.push('volume="' + @volume + '"')
134 data.push('title="' + @title + '"')
137 data.push(@cover.inspect())
140 data.push('path="' + @path + '"')
142 return '(Book:' + data.join(',') + ')'
157 def series_id=(value)
178 return File.basename(@path, '.*')
191 return /[[:upper:]]/.match(c)
195 def massage_author(input)
201 input.each_char do |c|
202 if isUpper?(c) and (reading_order.length > 0)
211 # Returns (series, volumeNo, titleText)
213 def processTitle(input)
218 arr = input.split('_')
224 matchData = (arr[0]).match(/^([A-Z]+)([0-9]+)$/)
226 capt = matchData.captures
232 pos = arr[-1].rindex('.')
234 arr[-1] = arr[-1].slice(0, pos)
237 title = arr.join(' ')
239 return series, vol, title
243 def parse_file_name!(file_name)
244 category = nil # e.g., non-fiction, fan-fiction
247 parts = file_name.split('/')
248 (series_code, @volume, @title) = processTitle(parts[-1])
251 reading_order = massage_author(grouping)
253 @author = Author.new(grouping, reading_order, sort_order)
254 @series_id = @store.get_series(grouping, series_code)
260 lc_file_name = file_name.downcase
261 if lc_file_name.end_with?(".epub")
263 elsif lc_file_name.end_with?(".pdf")
267 @classification_id = @store.find_classification(@author.grouping, File.basename(file_name, '.*'))
269 # TODO: Fix horrible hard-coded strings and paths
270 if ('01_nonfic' == category) && (nil == classification_id)
271 open(Store.unclassified_csv, 'a') do |fd|
272 fd.puts('"' + grouping.to_s + '","' + path + '"')
278 def scanEpub!(fileName)
279 #puts 'Scanning "' + fileName.to_s + '"...'
281 Zip::File.open(fileName) do |zipfile|
282 entry = zipfile.find_entry('META-INF/container.xml')
284 puts 'No META-INF/container.xml, skipping book ' + fileName
287 contXml = zipfile.read('META-INF/container.xml')
288 contDoc = Nokogiri::XML(contXml)
289 opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
291 scanOpf!(zipfile, opfPath)
293 rescue Zip::Error => exc
294 puts 'ERROR processing file "' + fileName + '":'
301 def scan_pdf!(file_name)
302 #puts 'Scanning "' + file_name.to_s + '"...'
304 pdf_path = File.expand_path(file_name).to_s
305 if ! pdf_path.end_with?('.pdf')
306 puts 'Unexpected internal error: path "' + file_name.to_s + '" does not end with ".pdf".'
310 jpeg_path = pdf_path[0..-5] + '.jpeg'
311 if File.file?(jpeg_path)
312 File.open(jpeg_path, 'r') do |is|
313 @cover = Cover.new(is, jpeg_path, 'image/jpeg')
320 def scanOpf!(zipfile, opfPath)
323 opfXml = zipfile.read(opfPath)
324 opfDoc = Nokogiri::XML(opfXml)
329 grouping = @author.grouping
330 reading_order = @author.reading_order
331 sort_order = @author.sort_order
333 creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
334 if (creators.length > 0)
335 creator = creators[0]
337 role = creator['opf:role']
339 reading_order = creator.content
341 file_as = creator['opf:file-as']
347 @author = Author.new(grouping, reading_order, sort_order)
351 #---------------------------------------
354 titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
358 @title = title.content
362 #---------------------------------------
365 descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
366 if (descrNodes.length > 0)
367 descrNode = descrNodes[0]
369 @description = descrNode.content
373 #---------------------------------------
374 # Other metadata: series, volume, cover
376 metas = opfDoc.css('package metadata meta')
379 content = m['content']
381 if 'calibre:series' == name
382 # TODO: Dynamically create a new series?
383 # @series_id = content
384 elsif 'calibre:series-index' == name
386 elsif 'cover' == name
388 #puts 'File ' + @path + ' coverId ' + coverId
395 @cover = load_cover(zipfile, opfPath, opfDoc, coverId)
399 def load_cover(zipfile, opfPath, opfDoc, coverId)
402 coverId = "cover-image"
405 items = opfDoc.css('package manifest item')
409 mimeType = i['media-type']
412 entry = zipfile.find_entry(href)
415 # Although the epub standard requires the path to be relative
416 # to the base of the epub (zip), some books encountered in the
417 # wild have been found to use a bath relative to the location
419 parts = opfPath.split('/')
420 opfBasePath = opfPath.split('/')[0..-2].join('/')
421 coverPath = opfBasePath + '/' + href
422 entry = zipfile.find_entry(coverPath)
426 puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".'
429 entry.get_input_stream() do |is|
430 return Cover.new(is, href, mimeType)