From 56ed007c7918ec7ea34b431b1f0f4ef9bbb02c56 Mon Sep 17 00:00:00 2001 From: Chris Jaekl Date: Wed, 30 Dec 2020 18:46:15 -0500 Subject: [PATCH] Multi-thread the parsing of books to speed up loading. --- book_loader.rb | 28 ++++++++++++++++++++++++++++ main.rb | 7 +++---- walkdir.rb => walk_dir.rb | 31 ++++++++++++++++++++++++------- 3 files changed, 55 insertions(+), 11 deletions(-) create mode 100644 book_loader.rb rename walkdir.rb => walk_dir.rb (82%) diff --git a/book_loader.rb b/book_loader.rb new file mode 100644 index 0000000..5516f04 --- /dev/null +++ b/book_loader.rb @@ -0,0 +1,28 @@ + +require_relative 'book' +require_relative 'store' + +class BookLoader + DONE_MARKER = '' + + def initialize(config_file, queue) + @config_file = config_file + @queue = queue + end + + def run + @store = Store.new(@config_file) + @store.connect() + + file = @queue.pop + until file == DONE_MARKER do + book = Book.new(@store) + book.load_from_file!(file) + @store.store_book(book) + + file = @queue.pop + end + + @store.disconnect() + end +end diff --git a/main.rb b/main.rb index b0c4b79..e294b4a 100644 --- a/main.rb +++ b/main.rb @@ -1,11 +1,10 @@ require_relative 'navigator' require_relative 'page' require_relative 'store' -require_relative 'walkdir' +require_relative 'walk_dir' @outputDir = 'output' -book_ids = [] @config_file = 'quanlib.ini' @skip_class = false @@ -39,8 +38,8 @@ end for arg in ARGV if ! arg.start_with?("--") puts 'Scanning directory "' + arg + '"...' - w = WalkDir.new(@store, arg) - book_ids += (w.books) + w = WalkDir.new(@config_file, arg) + w.books end end diff --git a/walkdir.rb b/walk_dir.rb similarity index 82% rename from walkdir.rb rename to walk_dir.rb index bbb56a2..a2c088f 100644 --- a/walkdir.rb +++ b/walk_dir.rb @@ -19,27 +19,39 @@ # .../DorothyGilman/P06_On_the_China_Station.epub. require_relative 'book' +require_relative 'book_loader' require_relative 'store' class WalkDir - def initialize(store, root) + def initialize(config_file, root) + @queue = Queue.new @root = root - @store = store + @config_file = config_file + @threads = [] + @files = walk(@root) end def books + @threads = [] + num_threads.times do + @threads << Thread.new do + BookLoader.new(@config_file, @queue).run + end + end + result = [] @files = remove_duplicates(@files) for file in @files.sort() if Book.can_handle?(file) && (!is_duplicate?(file)) - book = Book.new(@store) - book.load_from_file!(file) - id = @store.store_book(book) - result.push(id) + # Queue this book to be loaded and added to the DB by a BookLoader thread + @queue << file end end - return result + + @threads.count.times { @queue << BookLoader::DONE_MARKER } + + @threads.each { |t| t.join } end # Duplicate versions of a text are named @@ -98,4 +110,9 @@ class WalkDir end return result end + + def num_threads + # TOOD: make this (auto?) configurable + 12 + end end -- 2.30.2