Add cancellations_loader, and rework gtfs to handle a stop_code with multiple stop_ids.
authorChris Jaekl <chris@localhost>
Mon, 13 Feb 2023 04:17:41 +0000 (23:17 -0500)
committerChris Jaekl <chris@localhost>
Mon, 13 Feb 2023 04:17:41 +0000 (23:17 -0500)
.gitignore
lib/cancellations_loader.rb [new file with mode: 0644]
lib/gtfs.rb
lib/gtfs_loader.rb

index e8f8fd10b0b5ef977fa9b73fb2e873b9ed7563ec..79ab934b99a35633f7fa777c5060c191458169c2 100644 (file)
@@ -1,3 +1,3 @@
 gtfs.db
-cancellations
+csv
 gtfs
diff --git a/lib/cancellations_loader.rb b/lib/cancellations_loader.rb
new file mode 100644 (file)
index 0000000..beade92
--- /dev/null
@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+
+require "csv"
+require "pry"
+require "sqlite3"
+
+class CancellationsLoader
+  def initialize
+  end
+
+  def create_table
+    cancellations_sql = <<~EOS
+      CREATE TABLE cancellations (
+        source_time VARCHAR(5),
+        id CHAR(1),
+        next_trip INTEGER,
+        is_statusnet CHAR(1),
+        source VARCHAR(48),
+        message VARCHAR(140),
+        route VARCHAR(3),
+        destination VARCHAR(48),
+        affected_route VARCHAR(48),
+        is_tweet CHAR(1),
+        destination_time VARCHAR(5)
+      );
+    EOS
+
+    db.execute cancellations_sql
+  end
+
+  def load_file(base_path:, filename:)
+    print "Loading #{filename}..."
+    $stdout.flush
+    db.execute("BEGIN TRANSACTION;")
+
+    filepath = File.join(base_path, filename)
+
+    cols = %w(source_time id next_trip is_statusnet source message route destination affected_route is_tweet destination_time)
+    count = 0
+
+    CSV.foreach(filepath, headers: true, col_sep: ",") do |row|
+      count += 1
+      values = cols.map{|col| row[col.to_s]}
+      placeholders = (["?"] * values.count).join(",")
+
+      sql = "INSERT INTO cancellations (#{cols.join(",")}) VALUES (#{placeholders})"
+
+      db.execute(sql, values)
+
+      if (0 == count % 10_000)
+        db.execute("COMMIT;")
+        db.execute("BEGIN TRANSACTION;")
+        print "."
+        $stdout.flush
+      end
+    end
+
+    db.execute("COMMIT;")
+    puts " done."
+  end
+
+  private
+
+  def db
+    @db ||= SQLite3::Database.new("gtfs.db")
+  end
+end
index 05c50b0d9e3060db3af7c08e382a99f89496b492..4923f4565343bd5c9636771efe9cafe6415335b4 100644 (file)
@@ -21,32 +21,38 @@ class Gtfs
     result = {}
 
     schedules = active_schedules(date_hash: date_hash)
-    placeholders = (["?"] * schedules.count).join(",")
-    stop_id = db.query("SELECT stop_id FROM stops WHERE stop_code=?", stop_code).first.first
+    schedule_placeholders = (["?"] * schedules.count).join(",")
+    stop_ids = db.query("SELECT stop_id FROM stops WHERE stop_code=?", stop_code).to_a.map do |row|
+      row.first
+    end
+    stop_id_placeholders = (["?"] * stop_ids.count).join(",")
 
     sql = <<~EOS
-      SELECT r.route_short_name,st.departure_time
+      SELECT r.route_short_name,t.trip_headsign,t.direction_id,st.departure_time
       FROM routes r
       INNER JOIN trips t ON t.route_id=r.route_id
       INNER JOIN stop_times st ON st.trip_id=t.trip_id
-      WHERE st.stop_id=?
-      AND service_id IN (#{placeholders})
+      WHERE st.stop_id IN (#{stop_id_placeholders})
+      AND service_id IN (#{schedule_placeholders})
       ORDER BY r.route_short_name,st.departure_time;
     EOS
 
-    db.query(sql, stop_id, schedules).each do |row|
-      stop_id = row[0]
-      departure_time = row[1]
+    db.query(sql, stop_ids, schedules).each do |row|
+      route = row[0..2] # [route_id, hedsign, direction_id]
+      departure_time = row[3]
+
+      if result[route].nil?
+        result[route] = []
+      end
 
-      result[stop_id] = [] if result[stop_id].nil?
-      result[stop_id] << departure_time
+      result[route] << departure_time
     end
 
     # Sometimes OC Transpo lists the same departure under two separate schedules, *both* of which are
     # simulteneously in effect.  (?)
     # Here we compensate for that by eliminating duplicates of the same departure time.
-    result.map do |route, departure_times|
-      [route, departure_times.uniq]
+    result.map do |route, departures|
+      [route, departures.uniq]
     end.to_h
   end
 
index 53e248a25cd8d06d572f8d2fbb85ca4a85fb5923..d0a5be503c6bc0930efd7348a418d6e2e900de9b 100644 (file)
@@ -102,7 +102,7 @@ class GtfsLoader
     sql_cols = csv_cols.map{|name| "date" == name ? "calendar_date" : name}
     count = 0
 
-    CSV.foreach("#{base_path}/#{table_name}.txt", headers: true, col_sep: ",") do |row|
+    CSV.foreach(File.join(base_path, "#{table_name}.txt"), headers: true, col_sep: ",") do |row|
       count += 1
       values = csv_cols.map{|col| row[col.to_s]}
       placeholders = (["?"] * values.count).join(",")