X-Git-Url: http://jaekl.net/gitweb/?a=blobdiff_plain;ds=sidebyside;f=walkdir.rb;h=bbb56a263fbb70381875182d88bcf21fcb05c1b3;hb=9c227ac21d693fa2903a7c07e643cba99cd69031;hp=ce1954ca7a11d27417afecf739af99fdbd98c83e;hpb=4b53af822cda819dd82d0d3e7ed066c2966ae4bf;p=quanlib.git diff --git a/walkdir.rb b/walkdir.rb index ce1954c..bbb56a2 100644 --- a/walkdir.rb +++ b/walkdir.rb @@ -4,7 +4,7 @@ # .../AuthorName/Title_of_the_Awesome_Book.ext # # Author is given as FirstLast. For example, -# Robert Anson Heinlein is RoberHeinlein, and +# Robert Anson Heinlein is RobertHeinlein, and # JKRowling is JoanneRowling. # # Book titles have spaces replaced with underscores, @@ -18,8 +18,8 @@ # and Mrs. Pollifax volume 6, On the China Station, is # .../DorothyGilman/P06_On_the_China_Station.epub. -require 'book' -require 'store' +require_relative 'book' +require_relative 'store' class WalkDir def initialize(store, root) @@ -30,17 +30,58 @@ class WalkDir def books result = [] - for file in @files.sort - if Book.canHandle?(file) - book = Book.new() - book.loadFromFile(file) - @store.store_book(book) - result.push(book) + @files = remove_duplicates(@files) + for file in @files.sort() + if Book.can_handle?(file) && (!is_duplicate?(file)) + book = Book.new(@store) + book.load_from_file!(file) + id = @store.store_book(book) + result.push(id) end end return result end + # Duplicate versions of a text are named + # xxx_suffix.ext + # Where suffix is one of bis, ter, quater, quinquies + # for the 2nd, 3rd, 4th or 5th variant respectively. + def is_duplicate?(file) + s = file.to_s + suffix = ['_bis.', '_ter.', '_quater.', '_quinquies.'] + suffix.each do |pat| + if s.include?(pat) + return true + end + end + + return false + end + + def remove_duplicates(files) + unique = {} + for file in files + if Book.can_handle?(file) + key = File.dirname(file) + '/' + File.basename(file, '.*') + if unique.has_key?(key) + new_ext = File.extname(file) + old_ext = File.extname(unique[key]) + if ('.pdf' == old_ext) && ('.epub' == new_ext) + # Prefer EPUB over PDF + puts 'REPLACED ' + unique[key].to_s + ' with ' + file.to_s + unique[key] = file + else + puts 'DROPPED ' + file.to_s + " because it's superceded by " + unique[key].to_s + end + else + unique[key] = file + end + end + end + + return unique.values + end + def walk(path) result = [] children = Dir.entries(path) @@ -55,7 +96,6 @@ class WalkDir result.push(fullName) end end - #puts result return result end end