--- /dev/null
+# Walk the directory (and subdirectories), identifying books.
+#
+# Expected format:
+# .../AuthorName/Title_of_the_Awesome_Book.ext
+#
+# Author is given as FirstLast. For example,
+# Robert Anson Heinlein is RobertHeinlein, and
+# JKRowling is JoanneRowling.
+#
+# Book titles have spaces replaced with underscores,
+# and punctuation [,!?'] replaced with hyphens.
+#
+# If the book forms part of a series, then an all-capitals
+# series designator, followed by a numeric volume number,
+# followed by an underscore, is prefixed to the name.
+# For example, Hardy Boys' volume 1, The Tower Treasure,
+# is rendered as .../FranklinDixon/HB001_The_Tower_Treasure.epub
+# and Mrs. Pollifax volume 6, On the China Station, is
+# .../DorothyGilman/P06_On_the_China_Station.epub.
+
+require_relative 'book'
+require_relative 'book_loader'
+require_relative 'store'
+
+class WalkDir
+ def initialize(config_file, root)
+ @queue = Queue.new
+ @root = root
+ @config_file = config_file
+ @threads = []
+
+ @files = walk(@root)
+ end
+
+ def books
+ @threads = []
+ num_threads.times do
+ @threads << Thread.new do
+ BookLoader.new(@config_file, @queue).run
+ end
+ end
+
+ result = []
+ @files = remove_duplicates(@files)
+ for file in @files.sort()
+ if Book.can_handle?(file) && (!is_duplicate?(file))
+ # Queue this book to be loaded and added to the DB by a BookLoader thread
+ @queue << file
+ end
+ end
+
+ @threads.count.times { @queue << BookLoader::DONE_MARKER }
+
+ @threads.each { |t| t.join }
+ end
+
+ # Duplicate versions of a text are named
+ # xxx_suffix.ext
+ # Where suffix is one of bis, ter, quater, quinquies
+ # for the 2nd, 3rd, 4th or 5th variant respectively.
+ def is_duplicate?(file)
+ s = file.to_s
+ suffix = ['_bis.', '_ter.', '_quater.', '_quinquies.']
+ suffix.each do |pat|
+ if s.include?(pat)
+ return true
+ end
+ end
+
+ return false
+ end
+
+ def remove_duplicates(files)
+ unique = {}
+ for file in files
+ if Book.can_handle?(file)
+ key = File.dirname(file) + '/' + File.basename(file, '.*')
+ if unique.has_key?(key)
+ new_ext = File.extname(file)
+ old_ext = File.extname(unique[key])
+ if ('.pdf' == old_ext) && ('.epub' == new_ext)
+ # Prefer EPUB over PDF
+ puts 'REPLACED ' + unique[key].to_s + ' with ' + file.to_s
+ unique[key] = file
+ else
+ puts 'DROPPED ' + file.to_s + " because it's superceded by " + unique[key].to_s
+ end
+ else
+ unique[key] = file
+ end
+ end
+ end
+
+ return unique.values
+ end
+
+ def walk(path)
+ result = []
+ children = Dir.entries(path)
+ for child in children
+ fullName = (path.chomp("/")) + "/" + child
+ if (File.directory?(fullName)) and (child != ".") and (child != "..") and (!File.symlink?(fullName))
+ sub = walk(fullName)
+ if (sub != nil) and (sub.length > 0)
+ result.concat(sub)
+ end
+ elsif (! File.directory?(fullName))
+ result.push(fullName)
+ end
+ end
+ return result
+ end
+
+ def num_threads
+ # TOOD: make this (auto?) configurable
+ 12
+ end
+end