10 @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
23 def load_from_file(fileName)
25 parse_file_name!(fileName)
28 def self.can_handle?(fileName)
33 #puts "Filename: " + fileName.to_s
34 lowerName = fileName.downcase()
36 if lowerName.end_with?(".epub")
40 if lowerName.end_with?(".pdf")
67 def description=(value)
75 result.push('<b>' + @title + '</b>')
77 result.push('<i>(Unknown title)</i>')
80 result.push('<i>by ' + @author.reading_order + '</i>')
84 series = @store.load_series(@series_id)
85 if nil != series and nil != series.descr
86 seriesInfo.push(series.descr.to_s)
89 seriesInfo.push(@volume.to_s)
91 if seriesInfo.length > 0
92 result.push(seriesInfo.join(' '))
95 return result.join('<br/>')
101 data.push('author="' + @author.inspect + '"')
104 data.push('series_id="' + @series_id.to_s() + '"')
107 data.push('volume="' + @volume + '"')
110 data.push('title="' + @title + '"')
113 data.push(@cover.inspect())
116 data.push('path="' + @path + '"')
118 return '(Book:' + data.join(',') + ')'
133 def series_id=(value)
159 return /[[:upper:]]/.match(c)
163 def massage_author(input)
169 input.each_char do |c|
170 if isUpper?(c) and (reading_order.length > 0)
179 # Returns (series, volumeNo, titleText)
181 def processTitle(input)
186 arr = input.split('_')
192 matchData = (arr[0]).match(/^([A-Z]+)([0-9]+)$/)
194 capt = matchData.captures
200 pos = arr[-1].rindex('.')
202 arr[-1] = arr[-1].slice(0, pos)
205 title = arr.join(' ')
207 return series, vol, title
211 def parse_file_name!(file_name)
212 parts = file_name.split('/')
213 (series_code, @volume, @title) = processTitle(parts[-1])
216 reading_order = massage_author(grouping)
218 @author = Author.new(grouping, reading_order, sort_order)
219 @series_id = @store.get_series(grouping, series_code)
222 lc_file_name = file_name.downcase
223 if lc_file_name.end_with?(".epub")
225 elsif lc_file_name.end_with?(".pdf")
231 def scanEpub!(fileName)
232 #puts 'Scanning "' + fileName.to_s + '"...'
234 Zip::File.open(fileName) do |zipfile|
235 entry = zipfile.find_entry('META-INF/container.xml')
237 puts 'No META-INF/container.xml, skipping book ' + fileName
240 contXml = zipfile.read('META-INF/container.xml')
241 contDoc = Nokogiri::XML(contXml)
242 opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
244 scanOpf!(zipfile, opfPath)
246 rescue Zip::Error => exc
247 puts 'ERROR processing file "' + fileName + '":'
254 def scan_pdf!(file_name)
255 #puts 'Scanning "' + file_name.to_s + '"...'
257 pdf_path = File.expand_path(file_name).to_s
258 if ! pdf_path.end_with?('.pdf')
259 puts 'Unexpected internal error: path "' + file_name.to_s + '" does not end with ".pdf".'
263 jpeg_path = pdf_path[0..-5] + '.jpeg'
264 if File.file?(jpeg_path)
265 File.open(jpeg_path, 'r') do |is|
266 @cover = Cover.new(is, jpeg_path, 'image/jpeg')
273 def scanOpf!(zipfile, opfPath)
276 opfXml = zipfile.read(opfPath)
277 opfDoc = Nokogiri::XML(opfXml)
282 grouping = @author.grouping
283 reading_order = @author.reading_order
284 sort_order = @author.sort_order
286 creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
287 if (creators.length > 0)
288 creator = creators[0]
290 role = creator['opf:role']
292 reading_order = creator.content
294 file_as = creator['opf:file-as']
300 @author = Author.new(grouping, reading_order, sort_order)
304 #---------------------------------------
307 titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
311 @title = title.content
315 #---------------------------------------
318 descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
319 if (descrNodes.length > 0)
320 descrNode = descrNodes[0]
322 @description = descrNode.content
326 #---------------------------------------
327 # Other metadata: series, volume, cover
329 metas = opfDoc.css('package metadata meta')
332 content = m['content']
334 if 'calibre:series' == name
335 # TODO: Dynamically create a new series?
336 # @series_id = content
337 elsif 'calibre:series-index' == name
339 elsif 'cover' == name
341 #puts 'File ' + @path + ' coverId ' + coverId
348 @cover = load_cover(zipfile, opfPath, opfDoc, coverId)
352 def load_cover(zipfile, opfPath, opfDoc, coverId)
355 coverId = "cover-image"
358 items = opfDoc.css('package manifest item')
362 mimeType = i['media-type']
365 entry = zipfile.find_entry(href)
368 # Although the epub standard requires the path to be relative
369 # to the base of the epub (zip), some books encountered in the
370 # wild have been found to use a bath relative to the location
372 parts = opfPath.split('/')
373 opfBasePath = opfPath.split('/')[0..-2].join('/')
374 coverPath = opfBasePath + '/' + href
375 entry = zipfile.find_entry(coverPath)
379 puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".'
382 entry.get_input_stream() do |is|
383 return Cover.new(is, href, mimeType)