X-Git-Url: http://jaekl.net/gitweb/?a=blobdiff_plain;ds=sidebyside;f=walkdir.rb;h=b00645545fa984b5fbef17b09b45513957ab6f69;hb=872d620121706ad345b7e667521be1c7326c2e00;hp=236e3feaf7244852b4b58e780e9fdce456da6fc2;hpb=fcaeedd4d1c128ff84371c0a7db5d0af6751492a;p=quanlib.git diff --git a/walkdir.rb b/walkdir.rb index 236e3fe..b006455 100644 --- a/walkdir.rb +++ b/walkdir.rb @@ -4,7 +4,7 @@ # .../AuthorName/Title_of_the_Awesome_Book.ext # # Author is given as FirstLast. For example, -# Robert Anson Heinlein is RoberHeinlein, and +# Robert Anson Heinlein is RobertHeinlein, and # JKRowling is JoanneRowling. # # Book titles have spaces replaced with underscores, @@ -30,10 +30,11 @@ class WalkDir def books result = [] - for file in @files.sort + @files = remove_duplicates(@files) + for file in @files.sort() if Book.can_handle?(file) && (!is_duplicate?(file)) book = Book.new(@store) - book.load_from_file(file) + book.load_from_file!(file) id = @store.store_book(book) result.push(id) end @@ -57,6 +58,30 @@ class WalkDir return false end + def remove_duplicates(files) + unique = {} + for file in files + if Book.can_handle?(file) + key = File.dirname(file) + '/' + File.basename(file, '.*') + if unique.has_key?(key) + new_ext = File.extname(file) + old_ext = File.extname(unique[key]) + if ('.pdf' == old_ext) && ('.epub' == new_ext) + # Prefer EPUB over PDF + puts 'REPLACED ' + unique[key].to_s + ' with ' + file.to_s + unique[key] = file + else + puts 'DROPPED ' + file.to_s + " because it's superceded by " + unique[key].to_s + end + else + unique[key] = file + end + end + end + + return unique.values + end + def walk(path) result = [] children = Dir.entries(path) @@ -71,7 +96,6 @@ class WalkDir result.push(fullName) end end - #puts result return result end end