From 16524c10423fe6573e24452359b34abc63077536 Mon Sep 17 00:00:00 2001 From: Chris Jaekl Date: Mon, 13 Feb 2023 22:43:36 -0500 Subject: [PATCH] Add basic reporting on cancellations and annual (scheduled) trip totals. --- lib/cancellations.rb | 38 +++++++++ lib/cancellations_loader.rb | 24 +++--- lib/database.rb | 12 +++ lib/date_utils.rb | 31 +++++++ lib/gtfs.rb | 164 +++++++++++++++++++++++++++--------- lib/gtfs_loader.rb | 35 ++++---- lib/stats.rb | 86 +++++++++++++++++++ 7 files changed, 322 insertions(+), 68 deletions(-) create mode 100644 lib/cancellations.rb create mode 100644 lib/database.rb create mode 100644 lib/date_utils.rb create mode 100644 lib/stats.rb diff --git a/lib/cancellations.rb b/lib/cancellations.rb new file mode 100644 index 0000000..0d1fb69 --- /dev/null +++ b/lib/cancellations.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +require "date" +require "pry" +require "sqlite3" + +require "database" +require "date_utils" + +class Cancellations + def initialize + @date_utils = DateUtils.new + @db = Database.new.db + end + + def cancelled_trips_for_route(route:, headsign:, date_hash: nil) + date_spec = date_hash || @date_utils.today + + sql = "SELECT route, source, source_time, destination, destination_time "\ + "FROM cancellations "\ + "WHERE create_time LIKE '#{@date_utils.hyphenated_date_for(date_spec)} %' "\ + "AND route=? "\ + "AND destination=? "\ + "ORDER BY source_time;" + + rs = @db.query(sql, route, headsign) + + rs.to_a.map do |row| + { + route: row[0], + source: row[1], + source_time: row[2], + destination: row[3], + destination_time: row[4], + } + end + end +end diff --git a/lib/cancellations_loader.rb b/lib/cancellations_loader.rb index beade92..c1882e5 100644 --- a/lib/cancellations_loader.rb +++ b/lib/cancellations_loader.rb @@ -4,8 +4,11 @@ require "csv" require "pry" require "sqlite3" +require "database" + class CancellationsLoader def initialize + @db = Database.new end def create_table @@ -14,6 +17,7 @@ class CancellationsLoader source_time VARCHAR(5), id CHAR(1), next_trip INTEGER, + create_time VARCHAR(16), is_statusnet CHAR(1), source VARCHAR(48), message VARCHAR(140), @@ -25,17 +29,17 @@ class CancellationsLoader ); EOS - db.execute cancellations_sql + @db.execute cancellations_sql end def load_file(base_path:, filename:) print "Loading #{filename}..." $stdout.flush - db.execute("BEGIN TRANSACTION;") + @db.execute("BEGIN TRANSACTION;") filepath = File.join(base_path, filename) - cols = %w(source_time id next_trip is_statusnet source message route destination affected_route is_tweet destination_time) + cols = %w(source_time id next_trip create_time is_statusnet source message route destination affected_route is_tweet destination_time) count = 0 CSV.foreach(filepath, headers: true, col_sep: ",") do |row| @@ -45,23 +49,17 @@ class CancellationsLoader sql = "INSERT INTO cancellations (#{cols.join(",")}) VALUES (#{placeholders})" - db.execute(sql, values) + @db.execute(sql, values) if (0 == count % 10_000) - db.execute("COMMIT;") - db.execute("BEGIN TRANSACTION;") + @db.execute("COMMIT;") + @db.execute("BEGIN TRANSACTION;") print "." $stdout.flush end end - db.execute("COMMIT;") + @db.execute("COMMIT;") puts " done." end - - private - - def db - @db ||= SQLite3::Database.new("gtfs.db") - end end diff --git a/lib/database.rb b/lib/database.rb new file mode 100644 index 0000000..ceb4b97 --- /dev/null +++ b/lib/database.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +require "pry" +require "sqlite3" + +class Database + attr_reader :db + + def initialize + @db = SQLite3::Database.new("oct_sched.db") + end +end diff --git a/lib/date_utils.rb b/lib/date_utils.rb new file mode 100644 index 0000000..72ee6be --- /dev/null +++ b/lib/date_utils.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +require "date" +require "pry" + +class DateUtils + def initialize + end + + def date_for(date_hash) + "#{date_hash[:year]}#{date_hash[:month].to_s.rjust(2, "0")}#{date_hash[:day].to_s.rjust(2, "0")}" + end + + def hyphenated_date_for(date_hash) + "#{date_hash[:year]}-#{date_hash[:month].to_s.rjust(2, "0")}-#{date_hash[:day].to_s.rjust(2, "0")}" + end + + def today + # Beware: This assumes your system is in America/Toronto time. + now = Date.today + { + year: now.year, + month: now.month, + day: now.day, + } + end + + def weekday_for(date_hash) + Date.new(date_hash[:year], date_hash[:month], date_hash[:day]).wday + end +end diff --git a/lib/gtfs.rb b/lib/gtfs.rb index 4923f45..053f482 100644 --- a/lib/gtfs.rb +++ b/lib/gtfs.rb @@ -4,59 +4,167 @@ require "date" require "pry" require "sqlite3" +require "date_utils" +require "database" + class Gtfs + MULTI_PLATFORM_STOPS = { + "BARRHAVEN CENTRE" => ["1A", "2A"], + "BASELINE" => ["1A", "1B", "1C", "1D", "2A", "2B", "STN OFF ONLY"], + "BAYSHORE" => ["1A", "2A", "3A", "4A", "4B", "A", "B", "C"], + "BAYVIEW" => ["A", "B", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"], + "BEATRICE" => ["1A", "2A"], + "BILLINGS BRIDGE" => ["1A", "2A", "3A", "3B", "3C", "4B", "4C", "4D"], + "BLAIR" => ["A", "B", "C", "D", "E", "H", "I", "O-TRAIN", "O-TRAIN WEST / OUEST"], + "CHAPEL HILL" => ["A", "B"], + "CYRVILLE" => ["A", "B", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"], + "DOMINION" => ["1A", "2A", "STN OFF ONLY"], + "FALLOWFIELD" => ["1A", "2A"], + "GREENBORO" => ["1A", "1B", "2A"], + "HERON" => ["1A", "2A", "3A", "4A"], + "HURDMAN" => ["A", "B", "C", "D", "E", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"], + "INNOVATION" => ["A", "B"], + "IRIS" => ["1A", "2A", "STN OFF ONLY"], + "JEANNE D'ARC" => ["1A", "1B", "2A", "3A", "3B", "4A", "4B"], + "LEES" => ["A", "B", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"], + "LEITRIM" => ["1A", "2A"], + "LINCOLN FIELDS" => ["1A", "1B", "2A", "3A", "4A", "4B", "5A", "OFF ONLY"], + "LONGFIELDS" => ["1A", "2A"], + "LYCÉE CLAUDEL" => ["1A", "2A"], + "LYON" => ["A", "B", "C", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"], + "MACKENZIE KING" => ["1A", "2A"], + "MARKETPLACE" => ["1A", "2A"], + "MILLENNIUM" => ["1A", "2A"], + "NEPEAN WOODS" => ["1A", "2A"], + "PARLIAMENT / PARLEMENT" => ["A", "B", "C", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"], + "PIMISI" => ["A", "B", "C", "D", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"], + "PINECREST" => ["1A", "2A", "A", "B", "C"], + "PLEASANT PARK" => ["1A", "2A", "3A", "4A"], + "QUEENSWAY" => ["1A", "2A", "3A", "4A"], + "RIDEAU" => ["A", "B", "C", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"], + "RIVERSIDE" => ["1A", "2A"], + "RIVERVIEW" => ["1A", "2A"], + "SMYTH" => ["1A", "2A"], + "SOUTH KEYS" => ["1A", "1B", "1C", "1D", "2A"], + "STRANDHERD" => ["1A", "2A"], + "ST-LAURENT" => ["A", "B", "C", "D", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"], + "TERON" => ["1A", "2A"], + "TREMBLAY" => ["O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"], + "TRIM" => ["1A", "2A"], + "TUNNEY'S PASTURE" => ["A", "B", "C", "D", "E", "F", "G", "O-TRAIN", "O-TRAIN EAST / EST"], + "WALKLEY" => ["1A", "2A", "3A", "4A", "4B", "4C"], + "WESTBORO" => ["1A", "1B", "2A", "3A", "3A (A)", "4A", "A", "B", "OFF ONLY", "STN OFF ONLY"], + } + def initialize @calendar = nil + @date_utils = DateUtils.new + @db = Database.new.db end def active_schedules(date_hash: nil) - target_date = date_hash || today + target_date = date_hash || @date_utils.today base_schedules = calendar_schedules(target_date) overrides = calendar_overrides(target_date) base_schedules + overrides[:add] - overrides[:remove] end - def trips_for_stop(stop_code:, date_hash: nil) + def trips(stop_code: nil, stop_id: nil, stop_name: nil, date_hash: nil, originating_only: false) + args = [stop_code, stop_id, stop_name].compact + raise "Must specify exactly one of stop_code, stop_id, or stop_name" if args.count != 1 + + if !stop_code.nil? + trips_for_stop_code(stop_code: stop_code, date_hash: date_hash, originating_only: originating_only) + elsif !stop_id.nil? + trips_for_stop_ids(stop_ids: [stop_id], date_hash: date_hash, originating_only: originating_only) + else + trips_for_stop_name(stop_name: stop_name, date_hash: date_hash, originating_only: originating_only) + end + end + + private + + def trips_for_stop_code(stop_code:, date_hash:) + stop_ids = @db.query("SELECT stop_id FROM stops WHERE stop_code=?", stop_code).to_a.map do |row| + row.first + end + + trips_for_stop_ids(stop_ids: stop_ids, date_hash: date_hash) + end + + def trips_for_stop_ids(stop_ids:, date_hash:, originating_only: false) result = {} schedules = active_schedules(date_hash: date_hash) schedule_placeholders = (["?"] * schedules.count).join(",") - stop_ids = db.query("SELECT stop_id FROM stops WHERE stop_code=?", stop_code).to_a.map do |row| - row.first - end stop_id_placeholders = (["?"] * stop_ids.count).join(",") - sql = <<~EOS - SELECT r.route_short_name,t.trip_headsign,t.direction_id,st.departure_time + sql_head = <<~EOS + SELECT r.route_short_name,t.trip_headsign,t.direction_id,st.departure_time,t.trip_id FROM routes r INNER JOIN trips t ON t.route_id=r.route_id INNER JOIN stop_times st ON st.trip_id=t.trip_id WHERE st.stop_id IN (#{stop_id_placeholders}) AND service_id IN (#{schedule_placeholders}) + EOS + + sql_filter = if originating_only + "AND st.stop_sequence=1\n" + else + "" + end + + sql_tail = <<~EOS ORDER BY r.route_short_name,st.departure_time; EOS - db.query(sql, stop_ids, schedules).each do |row| + sql = "#{sql_head}#{sql_filter}#{sql_tail}" + + @db.query(sql, stop_ids, schedules).each do |row| route = row[0..2] # [route_id, hedsign, direction_id] departure_time = row[3] + trip_id = row[4] if result[route].nil? result[route] = [] end - result[route] << departure_time + result[route] << [departure_time, trip_id] end # Sometimes OC Transpo lists the same departure under two separate schedules, *both* of which are # simulteneously in effect. (?) # Here we compensate for that by eliminating duplicates of the same departure time. - result.map do |route, departures| - [route, departures.uniq] + result.map do |route, departure_tuples| + [ + route, + departure_tuples.group_by do |tuple| + tuple.first # departure time + end.map do |group_key, group_members| + group_members.first + end + ] end.to_h end - private + def trips_for_stop_name(stop_name:, date_hash:) + result = {} + + base_name = stop_name.upcase + suffixes = MULTI_PLATFORM_STOPS[base_name] || [nil] + names = suffixes.map { |suffix| [base_name, suffix].compact.join(" ") } + placeholders = (["?"] * names.count).join(",") + + stop_ids = @db.query( + "SELECT stop_id FROM stops WHERE stop_name IN (#{placeholders})", + names, + ).to_a.map do |row| + row.first + end + + trips_for_stop_ids(stop_ids: stop_ids, date_hash: date_hash) + end def calendar @calendar ||= begin @@ -65,20 +173,20 @@ class Gtfs FROM calendar ORDER BY start_date, end_date EOS - db.query(sql).to_a + @db.query(sql).to_a end end def calendar_overrides(target_date) result = {add: [], remove: []} - datestamp = date_for(target_date) + datestamp = @date_utils.date_for(target_date) sql = <<~EOS SELECT service_id, exception_type FROM calendar_dates WHERE calendar_date = ? EOS - db.query(sql).each do |row| + @db.query(sql).each do |row| case row[1] when "1" result[:add] << row[0] @@ -94,8 +202,8 @@ class Gtfs def calendar_schedules(target_date) result = [] - datestamp = date_for(target_date) - weekday = weekday_for(target_date) + datestamp = @date_utils.date_for(target_date) + weekday = @date_utils.weekday_for(target_date) calendar.each do |service_id, mon, tue, wed, thu, fri, sat, sun, start_date, end_date| if datestamp >= start_date && datestamp <= end_date if (0 == weekday && "1" == sun) || @@ -111,26 +219,4 @@ class Gtfs end result end - - def date_for(date_hash) - "#{date_hash[:year]}#{date_hash[:month].to_s.rjust(2, "0")}#{date_hash[:day].to_s.rjust(2, "0")}" - end - - def db - @db ||= SQLite3::Database.new("gtfs.db") - end - - def today - # Beware: This assumes your system is in America/Toronto time. - now = Date.today - { - year: now.year, - month: now.month, - day: now.day, - } - end - - def weekday_for(date_hash) - Date.new(date_hash[:year], date_hash[:month], date_hash[:day]).wday - end end diff --git a/lib/gtfs_loader.rb b/lib/gtfs_loader.rb index d0a5be5..4029436 100644 --- a/lib/gtfs_loader.rb +++ b/lib/gtfs_loader.rb @@ -4,9 +4,11 @@ require "csv" require "pry" require "sqlite3" +require "database" + class GtfsLoader def initialize - @db = nil + @db = Database.new.db end def create_tables @@ -80,7 +82,7 @@ class GtfsLoader stops_sql, trips_sql, ].each do |sql| - db.execute sql + @db.execute sql end end @@ -93,10 +95,17 @@ class GtfsLoader trips: %w(route_id service_id trip_id trip_headsign direction_id block_id shape_id), } - def load_table(base_path, table_name) + def load_all(root_path:) + Dir["#{root_path}/*"].each do |base_path| + puts "== Loading GTFS files from #{base_path} ==" + load_tables(base_path: base_path) + end + end + + def load_table(base_path:, table_name:) print "Loading #{table_name}..." $stdout.flush - db.execute("BEGIN TRANSACTION;") + @db.execute("BEGIN TRANSACTION;") csv_cols = TABLE_COLUMNS[table_name.to_sym] sql_cols = csv_cols.map{|name| "date" == name ? "calendar_date" : name} @@ -108,29 +117,23 @@ class GtfsLoader placeholders = (["?"] * values.count).join(",") sql = "INSERT INTO #{table_name} (#{sql_cols.join(",")}) VALUES (#{placeholders});" - db.execute(sql, values) + @db.execute(sql, values) if (0 == count % 10_000) - db.execute("COMMIT;") - db.execute("BEGIN TRANSACTION;") + @db.execute("COMMIT;") + @db.execute("BEGIN TRANSACTION;") print "." $stdout.flush end end - db.execute("COMMIT;") + @db.execute("COMMIT;") puts " done." end - def load_tables + def load_tables(base_path:) %w(calendar_dates calendar routes stops stop_times trips).each do |table| - load_table("gtfs/2022-12-14/", table) + load_table(base_path: base_path, table_name: table) end end - - private - - def db - @db ||= SQLite3::Database.new("gtfs.db") - end end diff --git a/lib/stats.rb b/lib/stats.rb new file mode 100644 index 0000000..5811fe7 --- /dev/null +++ b/lib/stats.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +require "sqlite3" + +require "database" +require "gtfs" +require "pry" + +class Stats + # Warning: doesn't account for leap years + # J F M A M J J A S O N D + DAYS_IN_MONTH = [nil, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + + def initialize + @db = Database.new.db + @gtfs = Gtfs.new + end + + def report(year:) + route_totals = {} + + (1..12).each do |month| + last_day = DAYS_IN_MONTH[month] + last_day += 1 if (month == 2) && (0 == year % 4) # leap year (doesn't account for 2000) + + (1..last_day).each do |day| + date_hash = {year: year, month: month, day: day} + date_string = "#{year}-#{month.to_s.rjust(2, "0")}-#{day.to_s.rjust(2, "0")}" + + counts = trip_counts(date_hash: date_hash) + + daily_total = 0 + counts.each do |route, count| + route_total = route_totals[route] || 0 + route_total += count + route_totals[route] = route_total + + daily_total += count + end + + puts "#{date_string};#{daily_total};#{counts.inspect}" + end + end + + annual_total = 0 + route_totals.each do |_route, count| + annual_total += count + end + + puts "ANNUAL_TOTAL;#{annual_total};#{route_totals.inspect}" + end + + def route_origin_stop_ids + @route_origin_stop_ids ||= @db.query( + "SELECT stop_id FROM stop_times WHERE stop_sequence=1 GROUP BY 1 ORDER BY 1;" + ).to_a.flatten + end + + # Returns a hash of route_num => trip_count for the given date + def trip_counts(date_hash: nil) + result = {} + + print "Checking for trips on #{date_hash[:year]}-#{date_hash[:month]}-#{date_hash[:day]}" + + route_origin_stop_ids.each do |stop_id| + print "." + $stdout.flush + + route_trips = @gtfs.trips(stop_id: stop_id, date_hash: date_hash).map do |k, v| + [ + k.first, + v.map do |x| + x.first + end, + ] + end.to_h.each do |route, trips| + count = result[route] || 0 + count += trips.count + result[route] = count + end + end + puts " done." + + result + end +end -- 2.39.2