From d0e7d5074a25b4e9b3e67137c9dc0fba5e202a3c Mon Sep 17 00:00:00 2001 From: Chris Jaekl Date: Sun, 12 Feb 2023 18:56:20 -0500 Subject: [PATCH] Initial commit: create gtfs tables and load them --- Gemfile | 4 ++ Gemfile.lock | 25 +++++++++ lib/gtfs.rb | 130 +++++++++++++++++++++++++++++++++++++++++++ lib/gtfs_loader.rb | 136 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 295 insertions(+) create mode 100644 Gemfile create mode 100644 Gemfile.lock create mode 100644 lib/gtfs.rb create mode 100644 lib/gtfs_loader.rb diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..977e379 --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ +source "https://rubygems.org" + +gem "pry-byebug" +gem "sqlite3" diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..f223f0b --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,25 @@ +GEM + remote: https://rubygems.org/ + specs: + byebug (11.1.3) + coderay (1.1.3) + method_source (1.0.0) + mini_portile2 (2.8.1) + pry (0.14.2) + coderay (~> 1.1) + method_source (~> 1.0) + pry-byebug (3.10.1) + byebug (~> 11.0) + pry (>= 0.13, < 0.15) + sqlite3 (1.6.0) + mini_portile2 (~> 2.8.0) + +PLATFORMS + ruby + +DEPENDENCIES + pry-byebug + sqlite3 + +BUNDLED WITH + 2.2.3 diff --git a/lib/gtfs.rb b/lib/gtfs.rb new file mode 100644 index 0000000..05c50b0 --- /dev/null +++ b/lib/gtfs.rb @@ -0,0 +1,130 @@ +# frozen_string_literal: true + +require "date" +require "pry" +require "sqlite3" + +class Gtfs + def initialize + @calendar = nil + end + + def active_schedules(date_hash: nil) + target_date = date_hash || today + base_schedules = calendar_schedules(target_date) + overrides = calendar_overrides(target_date) + + base_schedules + overrides[:add] - overrides[:remove] + end + + def trips_for_stop(stop_code:, date_hash: nil) + result = {} + + schedules = active_schedules(date_hash: date_hash) + placeholders = (["?"] * schedules.count).join(",") + stop_id = db.query("SELECT stop_id FROM stops WHERE stop_code=?", stop_code).first.first + + sql = <<~EOS + SELECT r.route_short_name,st.departure_time + FROM routes r + INNER JOIN trips t ON t.route_id=r.route_id + INNER JOIN stop_times st ON st.trip_id=t.trip_id + WHERE st.stop_id=? + AND service_id IN (#{placeholders}) + ORDER BY r.route_short_name,st.departure_time; + EOS + + db.query(sql, stop_id, schedules).each do |row| + stop_id = row[0] + departure_time = row[1] + + result[stop_id] = [] if result[stop_id].nil? + result[stop_id] << departure_time + end + + # Sometimes OC Transpo lists the same departure under two separate schedules, *both* of which are + # simulteneously in effect. (?) + # Here we compensate for that by eliminating duplicates of the same departure time. + result.map do |route, departure_times| + [route, departure_times.uniq] + end.to_h + end + + private + + def calendar + @calendar ||= begin + sql = <<~EOS + SELECT service_id, monday, tuesday, wednesday, thursday, friday, saturday, sunday, start_date, end_date + FROM calendar + ORDER BY start_date, end_date + EOS + db.query(sql).to_a + end + end + + def calendar_overrides(target_date) + result = {add: [], remove: []} + datestamp = date_for(target_date) + sql = <<~EOS + SELECT service_id, exception_type + FROM calendar_dates + WHERE calendar_date = ? + EOS + + db.query(sql).each do |row| + case row[1] + when "1" + result[:add] << row[0] + when "2" + result[:remove] << row[0] + else + raise "Unexpected exception_type #{row[1].inspect} found in calendar_dates" + end + end + + result + end + + def calendar_schedules(target_date) + result = [] + datestamp = date_for(target_date) + weekday = weekday_for(target_date) + calendar.each do |service_id, mon, tue, wed, thu, fri, sat, sun, start_date, end_date| + if datestamp >= start_date && datestamp <= end_date + if (0 == weekday && "1" == sun) || + (1 == weekday && "1" == mon) || + (2 == weekday && "1" == tue) || + (3 == weekday && "1" == wed) || + (4 == weekday && "1" == thu) || + (5 == weekday && "1" == fri) || + (6 == weekday && "1" == sat) + result << service_id + end + end + end + result + end + + def date_for(date_hash) + "#{date_hash[:year]}#{date_hash[:month].to_s.rjust(2, "0")}#{date_hash[:day].to_s.rjust(2, "0")}" + end + + def db + @db ||= SQLite3::Database.new("gtfs.db") + end + + def today + # Beware: This assumes your system is in America/Toronto time. + now = Date.today + { + year: now.year, + month: now.month, + day: now.day, + } + end + + def weekday_for(date_hash) + Date.new(date_hash[:year], date_hash[:month], date_hash[:day]).wday + end +end diff --git a/lib/gtfs_loader.rb b/lib/gtfs_loader.rb new file mode 100644 index 0000000..53e248a --- /dev/null +++ b/lib/gtfs_loader.rb @@ -0,0 +1,136 @@ +# frozen_string_literal: true + +require "csv" +require "pry" +require "sqlite3" + +class GtfsLoader + def initialize + @db = nil + end + + def create_tables + calendar_sql = <<~EOS + CREATE TABLE calendar ( + service_id VARCHAR(26), + monday CHAR(1), + tuesday CHAR(1), + wednesday CHAR(1), + thursday CHAR(1), + friday CHAR(1), + saturday CHAR(1), + sunday CHAR(1), + start_date CHAR(8), + end_date CHAR(8) + ); + EOS + calendar_dates_sql = <<~EOS + CREATE TABLE calendar_dates ( + service_id VARCHAR(26), + calendar_date CHAR(8), + exception_type CHAR(1) + ); + EOS + routes_sql = <<~EOS + CREATE TABLE routes ( + route_id VARCHAR(8), + route_short_name VARCHAR(16), + route_type CHAR(1), + route_color VARCHAR(6), + route_text_color VARCHAR(6) + ); + EOS + stop_times_sql = <<~EOS + CREATE TABLE stop_times ( + trip_id VARCHAR(35), + arrival_time VARCHAR(8), + departure_time VARCHAR(8), + stop_id VARCHAR(5), + stop_sequence VARCHAR(2), + pickup_type CHAR(1), + drop_off_type CHAR(1) + ); + EOS + stops_sql = <<~EOS + CREATE TABLE stops ( + stop_id VARCHAR(5), + stop_code VARCHAR(4), + stop_name VARCHAR(48), + stop_lat FLOAT, + stop_lon FLOAT + ); + EOS + trips_sql = <<~EOS + CREATE TABLE trips ( + route_id VARCHAR(8), + service_id VARCHAR(26), + trip_id VARCHAR(35), + trip_headsign VARCHAR(48), + direction_id CHAR(1), + block_id VARCHAR(8), + shape_id VARCHAR(7) + ); + EOS + + [ + calendar_sql, + calendar_dates_sql, + routes_sql, + stop_times_sql, + stops_sql, + trips_sql, + ].each do |sql| + db.execute sql + end + end + + TABLE_COLUMNS = { + calendar: %w(service_id monday tuesday wednesday thursday friday saturday sunday start_date end_date), + calendar_dates: %w(service_id date exception_type), + routes: %w(route_id route_short_name route_type route_color route_text_color), + stop_times: %w(trip_id arrival_time departure_time stop_id stop_sequence pickup_type drop_off_type), + stops: %w(stop_id stop_code stop_name stop_lat stop_lon), + trips: %w(route_id service_id trip_id trip_headsign direction_id block_id shape_id), + } + + def load_table(base_path, table_name) + print "Loading #{table_name}..." + $stdout.flush + db.execute("BEGIN TRANSACTION;") + + csv_cols = TABLE_COLUMNS[table_name.to_sym] + sql_cols = csv_cols.map{|name| "date" == name ? "calendar_date" : name} + count = 0 + + CSV.foreach("#{base_path}/#{table_name}.txt", headers: true, col_sep: ",") do |row| + count += 1 + values = csv_cols.map{|col| row[col.to_s]} + placeholders = (["?"] * values.count).join(",") + sql = "INSERT INTO #{table_name} (#{sql_cols.join(",")}) VALUES (#{placeholders});" + + db.execute(sql, values) + + if (0 == count % 10_000) + db.execute("COMMIT;") + db.execute("BEGIN TRANSACTION;") + print "." + $stdout.flush + end + end + + db.execute("COMMIT;") + puts " done." + end + + def load_tables + %w(calendar_dates calendar routes stops stop_times trips).each do |table| + load_table("gtfs/2022-12-14/", table) + end + end + + private + + def db + @db ||= SQLite3::Database.new("gtfs.db") + end +end -- 2.39.2