]> jaekl.net Git - quanlib.git/commitdiff
Add extract.rb: extract library books to .txt (for indexing).
authorChris Jaekl <cejaekl@yahoo.com>
Mon, 8 Jul 2019 23:48:07 +0000 (19:48 -0400)
committerChris Jaekl <cejaekl@yahoo.com>
Mon, 8 Jul 2019 23:48:07 +0000 (19:48 -0400)
extract.rb [new file with mode: 0644]

diff --git a/extract.rb b/extract.rb
new file mode 100644 (file)
index 0000000..c695941
--- /dev/null
@@ -0,0 +1,50 @@
+require 'find'
+require 'pathname'
+
+def exec(cmdline)
+  puts "$ #{cmdline}"
+  result = system(cmdline)
+  unless result
+    puts "FAILED:  #{cmdline}"
+  end
+  result
+end
+
+def extract_epub(source_file, source_path, dest_path)
+  relative_path = source_file[source_path.length .. source_file.length]
+  dest_file = "#{dest_path}/#{relative_path}"
+  dest_file = dest_file[0 .. (dest_file.length - 6)] + ".txt"
+
+  required_path = Pathname(dest_file).dirname
+  unless File.directory? required_path
+    unless exec("mkdir -p #{required_path}")
+      return false
+    end
+  end
+
+  if File.exist? dest_file
+    source_time = File.mtime source_file
+    dest_time = File.mtime dest_file
+    comp = dest_time <=> source_time
+    if comp > 0
+      return true # Nothing to do, extraction is already up-to-date
+    end
+  end
+    
+  exec("ebook-convert #{source_file} #{dest_file}")
+end
+
+def scan_dir(source_path, dest_path) 
+  Find.find(source_path) do |f|
+    if f.match(/.epub\Z/)
+      unless (f.match(/_bis.epub\Z/) || f.match(/_ter.epub\Z/) || f.match(/_quater.epub\Z/))
+        extract_epub(f, source_path, dest_path)
+      end
+    end
+  end
+end
+
+dest_path = ARGV[0]
+for arg in ARGV[1 .. ARGV.length]
+  scan_dir(arg, dest_path)
+end