9 @@DC_NS_URL = 'http://purl.org/dc/elements/1.1/'
11 def initialize(fileName)
20 parseFileName!(fileName)
23 def self.canHandle?(fileName)
28 #puts "Filename: " + fileName.to_s
29 lowerName = fileName.downcase()
31 if lowerName.end_with?(".epub")
50 result.push('<b>' + @title + '</b>')
52 result.push('<i>(Unknown title)</i>')
55 result.push('<i>by ' + @author.to_s() + '</i>')
60 seriesInfo.push(@series.to_s)
63 seriesInfo.push(@volume.to_s)
65 if seriesInfo.length > 0
66 result.push(seriesInfo.join(' '))
69 return result.join('<br/>')
75 data.push('author="' + @author.to_s + '"')
78 data.push('series="' + @series + '"')
81 data.push('volume="' + @volume + '"')
84 data.push('title="' + @title + '"')
87 data.push(@cover.inspect())
90 data.push('path="' + @path + '"')
92 return '(Book:' + data.join(',') + ')'
105 return /[[:upper:]]/.match(c)
109 def massageAuthor(input)
115 input.each_char do |c|
116 if isUpper?(c) and (result.length > 0)
125 # Returns (series, volumeNo, titleText)
127 def processTitle(input)
132 arr = input.split('_')
138 matchData = (arr[0]).match(/^([A-Z]+)([0-9]+)$/)
140 capt = matchData.captures
146 pos = arr[-1].rindex('.')
148 arr[-1] = arr[-1].slice(0, pos)
151 title = arr.join(' ')
153 return series, vol, title
157 def parseFileName!(fileName)
158 parts = fileName.split('/')
159 (@series, @volume, @title) = processTitle(parts[-1])
161 @author = massageAuthor(parts[-2])
164 if fileName.downcase.end_with?(".epub")
170 def scanEpub!(fileName)
171 #puts 'Scanning "' + fileName.to_s + '"...'
173 Zip::File.open(fileName) do |zipfile|
174 entry = zipfile.find_entry('META-INF/container.xml')
178 contXml = zipfile.read('META-INF/container.xml')
179 contDoc = Nokogiri::XML(contXml)
180 opfPath = contDoc.css("container rootfiles rootfile")[0]['full-path']
182 scanOpf!(zipfile, opfPath)
184 rescue Zip::Error => exc
185 puts 'ERROR processing file "' + fileName + '":'
192 def scanOpf!(zipfile, opfPath)
195 opfXml = zipfile.read(opfPath)
196 opfDoc = Nokogiri::XML(opfXml)
201 creators = opfDoc.css('dc|creator', 'dc' => @@DC_NS_URL)
202 if (creators.length > 0)
203 creator = creators[0]
205 role = creator['opf:role']
207 name = creator.content
208 parts = name.split(' ')
211 givenNames = parts[0..-2].join(' ')
212 @author = Author.new(surname, givenNames)
214 @author = Author.new(name, '')
220 #---------------------------------------
223 titles = opfDoc.css('dc|title', 'dc' => @@DC_NS_URL)
227 @title = title.content
231 #---------------------------------------
234 descrNodes = opfDoc.css('dc|description', 'dc' => @@DC_NS_URL)
235 if (descrNodes.length > 0)
236 descrNode = descrNodes[0]
238 @description = descrNode.content
242 #---------------------------------------
243 # Other metadata: series, volume, cover
245 metas = opfDoc.css('package metadata meta')
248 content = m['content']
250 if 'calibre:series' == name
252 elsif 'calibre:series-index' == name
254 elsif 'cover' == name
262 @cover = loadCover(zipfile, opfPath, opfDoc, coverId)
266 def loadCover(zipfile, opfPath, opfDoc, coverId)
269 coverId = "cover-image"
272 items = opfDoc.css('package manifest item')
276 mimeType = i['media-type']
279 entry = zipfile.find_entry(href)
282 # Although the epub standard requires the path to be relative
283 # to the base of the epub (zip), some books encountered in the
284 # wild have been found to use a bath relative to the location
286 parts = opfPath.split('/')
287 opfBasePath = opfPath.split('/')[0..-2].join('/')
288 coverPath = opfBasePath + '/' + href
289 entry = zipfile.find_entry(coverPath)
293 puts 'WARNING! Cover image "' + href + '" not found in file "' + @path + '".'
296 entry.get_input_stream() do |is|
297 return Cover.new(is, href, mimeType)