--- /dev/null
+require_relative 'book'
+require_relative 'store'
+class BookLoader
+ def initialize(config_file, queue)
+ @config_file = config_file
+ @queue = queue
+ end
+ def run
+ @store = Store.new(@config_file)
+ @store.connect()
+ file = @queue.pop
+ until file == DONE_MARKER do
+ book = Book.new(@store)
+ book.load_from_file!(file)
+ @store.store_book(book)
+ file = @queue.pop
+ end
+ @store.disconnect()
+ end
require_relative 'navigator'
require_relative 'page'
require_relative 'store'
-require_relative 'walkdir'
+require_relative 'walk_dir'
@outputDir = 'output'
-book_ids = []
@config_file = 'quanlib.ini'
@skip_class = false
for arg in ARGV
if ! arg.start_with?("--")
puts 'Scanning directory "' + arg + '"...'
- w = WalkDir.new(@store, arg)
- book_ids += (w.books)
+ w = WalkDir.new(@config_file, arg)
+ w.books
--- /dev/null
+# Walk the directory (and subdirectories), identifying books.
+# Expected format:
+# .../AuthorName/Title_of_the_Awesome_Book.ext
+# Author is given as FirstLast. For example,
+# Robert Anson Heinlein is RobertHeinlein, and
+# JKRowling is JoanneRowling.
+# Book titles have spaces replaced with underscores,
+# and punctuation [,!?'] replaced with hyphens.
+# If the book forms part of a series, then an all-capitals
+# series designator, followed by a numeric volume number,
+# followed by an underscore, is prefixed to the name.
+# For example, Hardy Boys' volume 1, The Tower Treasure,
+# is rendered as .../FranklinDixon/HB001_The_Tower_Treasure.epub
+# and Mrs. Pollifax volume 6, On the China Station, is
+# .../DorothyGilman/P06_On_the_China_Station.epub.
+require_relative 'book'
+require_relative 'book_loader'
+require_relative 'store'
+class WalkDir
+ def initialize(config_file, root)
+ @queue = Queue.new
+ @root = root
+ @config_file = config_file
+ @threads = []
+ @files = walk(@root)
+ end
+ def books
+ @threads = []
+ num_threads.times do
+ @threads << Thread.new do
+ BookLoader.new(@config_file, @queue).run
+ end
+ end
+ result = []
+ @files = remove_duplicates(@files)
+ for file in @files.sort()
+ if Book.can_handle?(file) && (!is_duplicate?(file))
+ # Queue this book to be loaded and added to the DB by a BookLoader thread
+ @queue << file
+ end
+ end
+ @threads.count.times { @queue << BookLoader::DONE_MARKER }
+ @threads.each { |t| t.join }
+ end
+ # Duplicate versions of a text are named
+ # xxx_suffix.ext
+ # Where suffix is one of bis, ter, quater, quinquies
+ # for the 2nd, 3rd, 4th or 5th variant respectively.
+ def is_duplicate?(file)
+ s = file.to_s
+ suffix = ['_bis.', '_ter.', '_quater.', '_quinquies.']
+ suffix.each do |pat|
+ if s.include?(pat)
+ return true
+ end
+ end
+ return false
+ end
+ def remove_duplicates(files)
+ unique = {}
+ for file in files
+ if Book.can_handle?(file)
+ key = File.dirname(file) + '/' + File.basename(file, '.*')
+ if unique.has_key?(key)
+ new_ext = File.extname(file)
+ old_ext = File.extname(unique[key])
+ if ('.pdf' == old_ext) && ('.epub' == new_ext)
+ # Prefer EPUB over PDF
+ puts 'REPLACED ' + unique[key].to_s + ' with ' + file.to_s
+ unique[key] = file
+ else
+ puts 'DROPPED ' + file.to_s + " because it's superceded by " + unique[key].to_s
+ end
+ else
+ unique[key] = file
+ end
+ end
+ end
+ return unique.values
+ end
+ def walk(path)
+ result = []
+ children = Dir.entries(path)
+ for child in children
+ fullName = (path.chomp("/")) + "/" + child
+ if (File.directory?(fullName)) and (child != ".") and (child != "..") and (!File.symlink?(fullName))
+ sub = walk(fullName)
+ if (sub != nil) and (sub.length > 0)
+ result.concat(sub)
+ end
+ elsif (! File.directory?(fullName))
+ result.push(fullName)
+ end
+ end
+ return result
+ end
+ def num_threads
+ # TOOD: make this (auto?) configurable
+ 12
+ end
+++ /dev/null
-# Walk the directory (and subdirectories), identifying books.
-# Expected format:
-# .../AuthorName/Title_of_the_Awesome_Book.ext
-# Author is given as FirstLast. For example,
-# Robert Anson Heinlein is RobertHeinlein, and
-# JKRowling is JoanneRowling.
-# Book titles have spaces replaced with underscores,
-# and punctuation [,!?'] replaced with hyphens.
-# If the book forms part of a series, then an all-capitals
-# series designator, followed by a numeric volume number,
-# followed by an underscore, is prefixed to the name.
-# For example, Hardy Boys' volume 1, The Tower Treasure,
-# is rendered as .../FranklinDixon/HB001_The_Tower_Treasure.epub
-# and Mrs. Pollifax volume 6, On the China Station, is
-# .../DorothyGilman/P06_On_the_China_Station.epub.
-require_relative 'book'
-require_relative 'store'
-class WalkDir
- def initialize(store, root)
- @root = root
- @store = store
- @files = walk(@root)
- end
- def books
- result = []
- @files = remove_duplicates(@files)
- for file in @files.sort()
- if Book.can_handle?(file) && (!is_duplicate?(file))
- book = Book.new(@store)
- book.load_from_file!(file)
- id = @store.store_book(book)
- result.push(id)
- end
- end
- return result
- end
- # Duplicate versions of a text are named
- # xxx_suffix.ext
- # Where suffix is one of bis, ter, quater, quinquies
- # for the 2nd, 3rd, 4th or 5th variant respectively.
- def is_duplicate?(file)
- s = file.to_s
- suffix = ['_bis.', '_ter.', '_quater.', '_quinquies.']
- suffix.each do |pat|
- if s.include?(pat)
- return true
- end
- end
- return false
- end
- def remove_duplicates(files)
- unique = {}
- for file in files
- if Book.can_handle?(file)
- key = File.dirname(file) + '/' + File.basename(file, '.*')
- if unique.has_key?(key)
- new_ext = File.extname(file)
- old_ext = File.extname(unique[key])
- if ('.pdf' == old_ext) && ('.epub' == new_ext)
- # Prefer EPUB over PDF
- puts 'REPLACED ' + unique[key].to_s + ' with ' + file.to_s
- unique[key] = file
- else
- puts 'DROPPED ' + file.to_s + " because it's superceded by " + unique[key].to_s
- end
- else
- unique[key] = file
- end
- end
- end
- return unique.values
- end
- def walk(path)
- result = []
- children = Dir.entries(path)
- for child in children
- fullName = (path.chomp("/")) + "/" + child
- if (File.directory?(fullName)) and (child != ".") and (child != "..") and (!File.symlink?(fullName))
- sub = walk(fullName)
- if (sub != nil) and (sub.length > 0)
- result.concat(sub)
- end
- elsif (! File.directory?(fullName))
- result.push(fullName)
- end
- end
- return result
- end