From c1c3b7569e6bc072d94d3c3007351f8edf2f60ec Mon Sep 17 00:00:00 2001 From: Chris Jaekl Date: Sun, 12 Feb 2023 23:17:41 -0500 Subject: [PATCH] Add cancellations_loader, and rework gtfs to handle a stop_code with multiple stop_ids. --- .gitignore | 2 +- lib/cancellations_loader.rb | 67 +++++++++++++++++++++++++++++++++++++ lib/gtfs.rb | 30 ++++++++++------- lib/gtfs_loader.rb | 2 +- 4 files changed, 87 insertions(+), 14 deletions(-) create mode 100644 lib/cancellations_loader.rb diff --git a/.gitignore b/.gitignore index e8f8fd1..79ab934 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ gtfs.db -cancellations +csv gtfs diff --git a/lib/cancellations_loader.rb b/lib/cancellations_loader.rb new file mode 100644 index 0000000..beade92 --- /dev/null +++ b/lib/cancellations_loader.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +require "csv" +require "pry" +require "sqlite3" + +class CancellationsLoader + def initialize + end + + def create_table + cancellations_sql = <<~EOS + CREATE TABLE cancellations ( + source_time VARCHAR(5), + id CHAR(1), + next_trip INTEGER, + is_statusnet CHAR(1), + source VARCHAR(48), + message VARCHAR(140), + route VARCHAR(3), + destination VARCHAR(48), + affected_route VARCHAR(48), + is_tweet CHAR(1), + destination_time VARCHAR(5) + ); + EOS + + db.execute cancellations_sql + end + + def load_file(base_path:, filename:) + print "Loading #{filename}..." + $stdout.flush + db.execute("BEGIN TRANSACTION;") + + filepath = File.join(base_path, filename) + + cols = %w(source_time id next_trip is_statusnet source message route destination affected_route is_tweet destination_time) + count = 0 + + CSV.foreach(filepath, headers: true, col_sep: ",") do |row| + count += 1 + values = cols.map{|col| row[col.to_s]} + placeholders = (["?"] * values.count).join(",") + + sql = "INSERT INTO cancellations (#{cols.join(",")}) VALUES (#{placeholders})" + + db.execute(sql, values) + + if (0 == count % 10_000) + db.execute("COMMIT;") + db.execute("BEGIN TRANSACTION;") + print "." + $stdout.flush + end + end + + db.execute("COMMIT;") + puts " done." + end + + private + + def db + @db ||= SQLite3::Database.new("gtfs.db") + end +end diff --git a/lib/gtfs.rb b/lib/gtfs.rb index 05c50b0..4923f45 100644 --- a/lib/gtfs.rb +++ b/lib/gtfs.rb @@ -21,32 +21,38 @@ class Gtfs result = {} schedules = active_schedules(date_hash: date_hash) - placeholders = (["?"] * schedules.count).join(",") - stop_id = db.query("SELECT stop_id FROM stops WHERE stop_code=?", stop_code).first.first + schedule_placeholders = (["?"] * schedules.count).join(",") + stop_ids = db.query("SELECT stop_id FROM stops WHERE stop_code=?", stop_code).to_a.map do |row| + row.first + end + stop_id_placeholders = (["?"] * stop_ids.count).join(",") sql = <<~EOS - SELECT r.route_short_name,st.departure_time + SELECT r.route_short_name,t.trip_headsign,t.direction_id,st.departure_time FROM routes r INNER JOIN trips t ON t.route_id=r.route_id INNER JOIN stop_times st ON st.trip_id=t.trip_id - WHERE st.stop_id=? - AND service_id IN (#{placeholders}) + WHERE st.stop_id IN (#{stop_id_placeholders}) + AND service_id IN (#{schedule_placeholders}) ORDER BY r.route_short_name,st.departure_time; EOS - db.query(sql, stop_id, schedules).each do |row| - stop_id = row[0] - departure_time = row[1] + db.query(sql, stop_ids, schedules).each do |row| + route = row[0..2] # [route_id, hedsign, direction_id] + departure_time = row[3] + + if result[route].nil? + result[route] = [] + end - result[stop_id] = [] if result[stop_id].nil? - result[stop_id] << departure_time + result[route] << departure_time end # Sometimes OC Transpo lists the same departure under two separate schedules, *both* of which are # simulteneously in effect. (?) # Here we compensate for that by eliminating duplicates of the same departure time. - result.map do |route, departure_times| - [route, departure_times.uniq] + result.map do |route, departures| + [route, departures.uniq] end.to_h end diff --git a/lib/gtfs_loader.rb b/lib/gtfs_loader.rb index 53e248a..d0a5be5 100644 --- a/lib/gtfs_loader.rb +++ b/lib/gtfs_loader.rb @@ -102,7 +102,7 @@ class GtfsLoader sql_cols = csv_cols.map{|name| "date" == name ? "calendar_date" : name} count = 0 - CSV.foreach("#{base_path}/#{table_name}.txt", headers: true, col_sep: ",") do |row| + CSV.foreach(File.join(base_path, "#{table_name}.txt"), headers: true, col_sep: ",") do |row| count += 1 values = csv_cols.map{|col| row[col.to_s]} placeholders = (["?"] * values.count).join(",") -- 2.39.2