Initial commit: create gtfs tables and load them
authorChris Jaekl <chris@localhost>
Sun, 12 Feb 2023 23:56:20 +0000 (18:56 -0500)
committerChris Jaekl <chris@localhost>
Sun, 12 Feb 2023 23:56:20 +0000 (18:56 -0500)
Gemfile [new file with mode: 0644]
Gemfile.lock [new file with mode: 0644]
lib/gtfs.rb [new file with mode: 0644]
lib/gtfs_loader.rb [new file with mode: 0644]

diff --git a/Gemfile b/Gemfile
new file mode 100644 (file)
index 0000000..977e379
--- /dev/null
+++ b/Gemfile
@@ -0,0 +1,4 @@
+source "https://rubygems.org"
+
+gem "pry-byebug"
+gem "sqlite3"
diff --git a/Gemfile.lock b/Gemfile.lock
new file mode 100644 (file)
index 0000000..f223f0b
--- /dev/null
@@ -0,0 +1,25 @@
+GEM
+  remote: https://rubygems.org/
+  specs:
+    byebug (11.1.3)
+    coderay (1.1.3)
+    method_source (1.0.0)
+    mini_portile2 (2.8.1)
+    pry (0.14.2)
+      coderay (~> 1.1)
+      method_source (~> 1.0)
+    pry-byebug (3.10.1)
+      byebug (~> 11.0)
+      pry (>= 0.13, < 0.15)
+    sqlite3 (1.6.0)
+      mini_portile2 (~> 2.8.0)
+
+PLATFORMS
+  ruby
+
+DEPENDENCIES
+  pry-byebug
+  sqlite3
+
+BUNDLED WITH
+   2.2.3
diff --git a/lib/gtfs.rb b/lib/gtfs.rb
new file mode 100644 (file)
index 0000000..05c50b0
--- /dev/null
@@ -0,0 +1,130 @@
+# frozen_string_literal: true
+
+require "date"
+require "pry"
+require "sqlite3"
+
+class Gtfs
+  def initialize
+    @calendar = nil
+  end
+
+  def active_schedules(date_hash: nil)
+    target_date = date_hash || today
+    base_schedules = calendar_schedules(target_date)
+    overrides = calendar_overrides(target_date)
+
+    base_schedules + overrides[:add] - overrides[:remove]
+  end
+
+  def trips_for_stop(stop_code:, date_hash: nil)
+    result = {}
+
+    schedules = active_schedules(date_hash: date_hash)
+    placeholders = (["?"] * schedules.count).join(",")
+    stop_id = db.query("SELECT stop_id FROM stops WHERE stop_code=?", stop_code).first.first
+
+    sql = <<~EOS
+      SELECT r.route_short_name,st.departure_time
+      FROM routes r
+      INNER JOIN trips t ON t.route_id=r.route_id
+      INNER JOIN stop_times st ON st.trip_id=t.trip_id
+      WHERE st.stop_id=?
+      AND service_id IN (#{placeholders})
+      ORDER BY r.route_short_name,st.departure_time;
+    EOS
+
+    db.query(sql, stop_id, schedules).each do |row|
+      stop_id = row[0]
+      departure_time = row[1]
+
+      result[stop_id] = [] if result[stop_id].nil?
+      result[stop_id] << departure_time
+    end
+
+    # Sometimes OC Transpo lists the same departure under two separate schedules, *both* of which are
+    # simulteneously in effect.  (?)
+    # Here we compensate for that by eliminating duplicates of the same departure time.
+    result.map do |route, departure_times|
+      [route, departure_times.uniq]
+    end.to_h
+  end
+
+  private
+
+  def calendar
+    @calendar ||= begin
+      sql = <<~EOS
+        SELECT service_id, monday, tuesday, wednesday, thursday, friday, saturday, sunday, start_date, end_date
+        FROM calendar
+        ORDER BY start_date, end_date
+      EOS
+      db.query(sql).to_a
+    end
+  end
+
+  def calendar_overrides(target_date)
+    result = {add: [], remove: []}
+    datestamp = date_for(target_date)
+    sql = <<~EOS
+      SELECT service_id, exception_type
+      FROM calendar_dates
+      WHERE calendar_date = ?
+    EOS
+
+    db.query(sql).each do |row|
+      case row[1]
+      when "1"
+        result[:add] << row[0]
+      when "2"
+        result[:remove] << row[0]
+      else
+        raise "Unexpected exception_type #{row[1].inspect} found in calendar_dates"
+      end
+    end
+
+    result
+  end
+
+  def calendar_schedules(target_date)
+    result = []
+    datestamp = date_for(target_date)
+    weekday = weekday_for(target_date)
+    calendar.each do |service_id, mon, tue, wed, thu, fri, sat, sun, start_date, end_date|
+      if datestamp >= start_date && datestamp <= end_date
+        if (0 == weekday && "1" == sun) ||
+            (1 == weekday && "1" == mon) ||
+            (2 == weekday && "1" == tue) ||
+            (3 == weekday && "1" == wed) ||
+            (4 == weekday && "1" == thu) ||
+            (5 == weekday && "1" == fri) ||
+            (6 == weekday && "1" == sat)
+          result << service_id
+        end
+      end
+    end
+    result
+  end
+
+  def date_for(date_hash)
+    "#{date_hash[:year]}#{date_hash[:month].to_s.rjust(2, "0")}#{date_hash[:day].to_s.rjust(2, "0")}"
+  end
+
+  def db
+    @db ||= SQLite3::Database.new("gtfs.db")
+  end
+
+  def today
+    # Beware:  This assumes your system is in America/Toronto time.
+    now = Date.today
+    {
+      year: now.year,
+      month: now.month,
+      day: now.day,
+    }
+  end
+
+  def weekday_for(date_hash)
+    Date.new(date_hash[:year], date_hash[:month], date_hash[:day]).wday
+  end
+end
diff --git a/lib/gtfs_loader.rb b/lib/gtfs_loader.rb
new file mode 100644 (file)
index 0000000..53e248a
--- /dev/null
@@ -0,0 +1,136 @@
+# frozen_string_literal: true
+
+require "csv"
+require "pry"
+require "sqlite3"
+
+class GtfsLoader
+  def initialize
+    @db = nil
+  end
+
+  def create_tables
+    calendar_sql = <<~EOS
+      CREATE TABLE calendar (
+        service_id VARCHAR(26),
+        monday CHAR(1),
+        tuesday CHAR(1),
+        wednesday CHAR(1),
+        thursday CHAR(1),
+        friday CHAR(1),
+        saturday CHAR(1),
+        sunday CHAR(1),
+        start_date CHAR(8),
+        end_date CHAR(8)
+      );
+    EOS
+    calendar_dates_sql = <<~EOS
+      CREATE TABLE calendar_dates (
+        service_id VARCHAR(26),
+        calendar_date CHAR(8),
+        exception_type CHAR(1)
+      );
+    EOS
+    routes_sql = <<~EOS
+      CREATE TABLE routes (
+        route_id VARCHAR(8),
+        route_short_name VARCHAR(16),
+        route_type CHAR(1),
+        route_color VARCHAR(6),
+        route_text_color VARCHAR(6)
+      );
+    EOS
+    stop_times_sql = <<~EOS
+      CREATE TABLE stop_times (
+        trip_id VARCHAR(35),
+        arrival_time VARCHAR(8),
+        departure_time VARCHAR(8),
+        stop_id VARCHAR(5),
+        stop_sequence VARCHAR(2),
+        pickup_type CHAR(1),
+        drop_off_type CHAR(1)
+      );
+    EOS
+    stops_sql = <<~EOS
+      CREATE TABLE stops (
+        stop_id VARCHAR(5),
+        stop_code VARCHAR(4),
+        stop_name VARCHAR(48),
+        stop_lat FLOAT,
+        stop_lon FLOAT
+      );
+    EOS
+    trips_sql = <<~EOS
+      CREATE TABLE trips (
+        route_id VARCHAR(8),
+        service_id VARCHAR(26),
+        trip_id VARCHAR(35),
+        trip_headsign VARCHAR(48),
+        direction_id CHAR(1),
+        block_id VARCHAR(8),
+        shape_id VARCHAR(7)
+      );
+    EOS
+
+    [
+      calendar_sql,
+      calendar_dates_sql,
+      routes_sql,
+      stop_times_sql,
+      stops_sql,
+      trips_sql,
+    ].each do |sql|
+      db.execute sql
+    end
+  end
+
+  TABLE_COLUMNS = {
+    calendar: %w(service_id monday tuesday wednesday thursday friday saturday sunday start_date end_date),
+    calendar_dates: %w(service_id date exception_type),
+    routes: %w(route_id route_short_name route_type route_color route_text_color),
+    stop_times: %w(trip_id arrival_time departure_time stop_id stop_sequence pickup_type drop_off_type),
+    stops: %w(stop_id stop_code stop_name stop_lat stop_lon),
+    trips: %w(route_id service_id trip_id trip_headsign direction_id block_id shape_id),
+  }
+
+  def load_table(base_path, table_name)
+    print "Loading #{table_name}..."
+    $stdout.flush
+    db.execute("BEGIN TRANSACTION;")
+
+    csv_cols = TABLE_COLUMNS[table_name.to_sym]
+    sql_cols = csv_cols.map{|name| "date" == name ? "calendar_date" : name}
+    count = 0
+
+    CSV.foreach("#{base_path}/#{table_name}.txt", headers: true, col_sep: ",") do |row|
+      count += 1
+      values = csv_cols.map{|col| row[col.to_s]}
+      placeholders = (["?"] * values.count).join(",")
+      sql = "INSERT INTO #{table_name} (#{sql_cols.join(",")}) VALUES (#{placeholders});"
+
+      db.execute(sql, values)
+
+      if (0 == count % 10_000)
+        db.execute("COMMIT;")
+        db.execute("BEGIN TRANSACTION;")
+        print "."
+        $stdout.flush
+      end
+    end
+
+    db.execute("COMMIT;")
+    puts " done."
+  end
+
+  def load_tables
+    %w(calendar_dates calendar routes stops stop_times trips).each do |table|
+      load_table("gtfs/2022-12-14/", table)
+    end
+  end
+
+  private
+
+  def db
+    @db ||= SQLite3::Database.new("gtfs.db")
+  end
+end