Add basic reporting on cancellations and annual (scheduled) trip totals.
authorChris Jaekl <chris@localhost>
Tue, 14 Feb 2023 03:43:36 +0000 (22:43 -0500)
committerChris Jaekl <chris@localhost>
Tue, 14 Feb 2023 03:43:36 +0000 (22:43 -0500)
lib/cancellations.rb [new file with mode: 0644]
lib/cancellations_loader.rb
lib/database.rb [new file with mode: 0644]
lib/date_utils.rb [new file with mode: 0644]
lib/gtfs.rb
lib/gtfs_loader.rb
lib/stats.rb [new file with mode: 0644]

diff --git a/lib/cancellations.rb b/lib/cancellations.rb
new file mode 100644 (file)
index 0000000..0d1fb69
--- /dev/null
@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+require "date"
+require "pry"
+require "sqlite3"
+
+require "database"
+require "date_utils"
+
+class Cancellations
+  def initialize
+    @date_utils = DateUtils.new
+    @db = Database.new.db
+  end
+
+  def cancelled_trips_for_route(route:, headsign:, date_hash: nil)
+    date_spec = date_hash || @date_utils.today
+
+    sql = "SELECT route, source, source_time, destination, destination_time "\
+      "FROM cancellations "\
+      "WHERE create_time LIKE '#{@date_utils.hyphenated_date_for(date_spec)} %' "\
+      "AND route=? "\
+      "AND destination=? "\
+      "ORDER BY source_time;"
+
+    rs = @db.query(sql, route, headsign)
+
+    rs.to_a.map do |row|
+      {
+        route: row[0],
+        source: row[1],
+        source_time: row[2],
+        destination: row[3],
+        destination_time: row[4],
+      }
+    end
+  end
+end
index beade928cc84c129c1df9f4a9da3d39cc54d7568..c1882e5544044e4d6273bdf777f8e3f5ff4486ee 100644 (file)
@@ -4,8 +4,11 @@ require "csv"
 require "pry"
 require "sqlite3"
 
+require "database"
+
 class CancellationsLoader
   def initialize
+    @db = Database.new
   end
 
   def create_table
@@ -14,6 +17,7 @@ class CancellationsLoader
         source_time VARCHAR(5),
         id CHAR(1),
         next_trip INTEGER,
+        create_time VARCHAR(16),
         is_statusnet CHAR(1),
         source VARCHAR(48),
         message VARCHAR(140),
@@ -25,17 +29,17 @@ class CancellationsLoader
       );
     EOS
 
-    db.execute cancellations_sql
+    @db.execute cancellations_sql
   end
 
   def load_file(base_path:, filename:)
     print "Loading #{filename}..."
     $stdout.flush
-    db.execute("BEGIN TRANSACTION;")
+    @db.execute("BEGIN TRANSACTION;")
 
     filepath = File.join(base_path, filename)
 
-    cols = %w(source_time id next_trip is_statusnet source message route destination affected_route is_tweet destination_time)
+    cols = %w(source_time id next_trip create_time is_statusnet source message route destination affected_route is_tweet destination_time)
     count = 0
 
     CSV.foreach(filepath, headers: true, col_sep: ",") do |row|
@@ -45,23 +49,17 @@ class CancellationsLoader
 
       sql = "INSERT INTO cancellations (#{cols.join(",")}) VALUES (#{placeholders})"
 
-      db.execute(sql, values)
+      @db.execute(sql, values)
 
       if (0 == count % 10_000)
-        db.execute("COMMIT;")
-        db.execute("BEGIN TRANSACTION;")
+        @db.execute("COMMIT;")
+        @db.execute("BEGIN TRANSACTION;")
         print "."
         $stdout.flush
       end
     end
 
-    db.execute("COMMIT;")
+    @db.execute("COMMIT;")
     puts " done."
   end
-
-  private
-
-  def db
-    @db ||= SQLite3::Database.new("gtfs.db")
-  end
 end
diff --git a/lib/database.rb b/lib/database.rb
new file mode 100644 (file)
index 0000000..ceb4b97
--- /dev/null
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+require "pry"
+require "sqlite3"
+
+class Database
+  attr_reader :db
+
+  def initialize
+    @db = SQLite3::Database.new("oct_sched.db")
+  end
+end
diff --git a/lib/date_utils.rb b/lib/date_utils.rb
new file mode 100644 (file)
index 0000000..72ee6be
--- /dev/null
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+require "date"
+require "pry"
+
+class DateUtils
+  def initialize
+  end
+
+  def date_for(date_hash)
+    "#{date_hash[:year]}#{date_hash[:month].to_s.rjust(2, "0")}#{date_hash[:day].to_s.rjust(2, "0")}"
+  end
+
+  def hyphenated_date_for(date_hash)
+    "#{date_hash[:year]}-#{date_hash[:month].to_s.rjust(2, "0")}-#{date_hash[:day].to_s.rjust(2, "0")}"
+  end
+
+  def today
+    # Beware:  This assumes your system is in America/Toronto time.
+    now = Date.today
+    {
+      year: now.year,
+      month: now.month,
+      day: now.day,
+    }
+  end
+
+  def weekday_for(date_hash)
+    Date.new(date_hash[:year], date_hash[:month], date_hash[:day]).wday
+  end
+end
index 4923f4565343bd5c9636771efe9cafe6415335b4..053f4828f9f5c5964e5a160679bbd5db09cd1659 100644 (file)
@@ -4,59 +4,167 @@ require "date"
 require "pry"
 require "sqlite3"
 
+require "date_utils"
+require "database"
+
 class Gtfs
+  MULTI_PLATFORM_STOPS = {
+    "BARRHAVEN CENTRE" => ["1A", "2A"],
+    "BASELINE" => ["1A", "1B", "1C", "1D", "2A", "2B", "STN OFF ONLY"],
+    "BAYSHORE" => ["1A", "2A", "3A", "4A", "4B", "A", "B", "C"],
+    "BAYVIEW" => ["A", "B", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"],
+    "BEATRICE" => ["1A", "2A"],
+    "BILLINGS BRIDGE" => ["1A", "2A", "3A", "3B", "3C", "4B", "4C", "4D"],
+    "BLAIR" => ["A", "B", "C", "D", "E", "H", "I", "O-TRAIN", "O-TRAIN WEST / OUEST"],
+    "CHAPEL HILL" => ["A", "B"],
+    "CYRVILLE" => ["A", "B", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"],
+    "DOMINION" => ["1A", "2A", "STN OFF ONLY"],
+    "FALLOWFIELD" => ["1A", "2A"],
+    "GREENBORO" => ["1A", "1B", "2A"],
+    "HERON" => ["1A", "2A", "3A", "4A"],
+    "HURDMAN" => ["A", "B", "C", "D", "E", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"],
+    "INNOVATION" => ["A", "B"],
+    "IRIS" => ["1A", "2A", "STN OFF ONLY"],
+    "JEANNE D'ARC" => ["1A", "1B", "2A", "3A", "3B", "4A", "4B"],
+    "LEES" => ["A", "B", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"],
+    "LEITRIM" => ["1A", "2A"],
+    "LINCOLN FIELDS" => ["1A", "1B", "2A", "3A", "4A", "4B", "5A", "OFF ONLY"],
+    "LONGFIELDS" => ["1A", "2A"],
+    "LYCÉE CLAUDEL" => ["1A", "2A"],
+    "LYON" => ["A", "B", "C", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"],
+    "MACKENZIE KING" => ["1A", "2A"],
+    "MARKETPLACE" => ["1A", "2A"],
+    "MILLENNIUM" => ["1A", "2A"],
+    "NEPEAN WOODS" => ["1A", "2A"],
+    "PARLIAMENT / PARLEMENT" => ["A", "B", "C", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"],
+    "PIMISI" => ["A", "B", "C", "D", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"],
+    "PINECREST" => ["1A", "2A", "A", "B", "C"],
+    "PLEASANT PARK" => ["1A", "2A", "3A", "4A"],
+    "QUEENSWAY" => ["1A", "2A", "3A", "4A"],
+    "RIDEAU" => ["A", "B", "C", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"],
+    "RIVERSIDE" => ["1A", "2A"],
+    "RIVERVIEW" => ["1A", "2A"],
+    "SMYTH" => ["1A", "2A"],
+    "SOUTH KEYS" => ["1A", "1B", "1C", "1D", "2A"],
+    "STRANDHERD" => ["1A", "2A"],
+    "ST-LAURENT" => ["A", "B", "C", "D", "O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"],
+    "TERON" => ["1A", "2A"],
+    "TREMBLAY" => ["O-TRAIN EAST / EST", "O-TRAIN WEST / OUEST"],
+    "TRIM" => ["1A", "2A"],
+    "TUNNEY'S PASTURE" => ["A", "B", "C", "D", "E", "F", "G", "O-TRAIN", "O-TRAIN EAST / EST"],
+    "WALKLEY" => ["1A", "2A", "3A", "4A", "4B", "4C"],
+    "WESTBORO" => ["1A", "1B", "2A", "3A", "3A (A)", "4A", "A", "B", "OFF ONLY", "STN OFF ONLY"],
+  }
+
   def initialize
     @calendar = nil
+    @date_utils = DateUtils.new
+    @db = Database.new.db
   end
 
   def active_schedules(date_hash: nil)
-    target_date = date_hash || today
+    target_date = date_hash || @date_utils.today
     base_schedules = calendar_schedules(target_date)
     overrides = calendar_overrides(target_date)
 
     base_schedules + overrides[:add] - overrides[:remove]
   end
 
-  def trips_for_stop(stop_code:, date_hash: nil)
+  def trips(stop_code: nil, stop_id: nil, stop_name: nil, date_hash: nil, originating_only: false)
+    args = [stop_code, stop_id, stop_name].compact
+    raise "Must specify exactly one of stop_code, stop_id, or stop_name" if args.count != 1
+
+    if !stop_code.nil?
+      trips_for_stop_code(stop_code: stop_code, date_hash: date_hash, originating_only: originating_only)
+    elsif !stop_id.nil?
+      trips_for_stop_ids(stop_ids: [stop_id], date_hash: date_hash, originating_only: originating_only)
+    else
+      trips_for_stop_name(stop_name: stop_name, date_hash: date_hash, originating_only: originating_only)
+    end
+  end
+
+  private
+
+  def trips_for_stop_code(stop_code:, date_hash:)
+    stop_ids = @db.query("SELECT stop_id FROM stops WHERE stop_code=?", stop_code).to_a.map do |row|
+      row.first
+    end
+
+    trips_for_stop_ids(stop_ids: stop_ids, date_hash: date_hash)
+  end
+
+  def trips_for_stop_ids(stop_ids:, date_hash:, originating_only: false)
     result = {}
 
     schedules = active_schedules(date_hash: date_hash)
     schedule_placeholders = (["?"] * schedules.count).join(",")
-    stop_ids = db.query("SELECT stop_id FROM stops WHERE stop_code=?", stop_code).to_a.map do |row|
-      row.first
-    end
     stop_id_placeholders = (["?"] * stop_ids.count).join(",")
 
-    sql = <<~EOS
-      SELECT r.route_short_name,t.trip_headsign,t.direction_id,st.departure_time
+    sql_head = <<~EOS
+      SELECT r.route_short_name,t.trip_headsign,t.direction_id,st.departure_time,t.trip_id
       FROM routes r
       INNER JOIN trips t ON t.route_id=r.route_id
       INNER JOIN stop_times st ON st.trip_id=t.trip_id
       WHERE st.stop_id IN (#{stop_id_placeholders})
       AND service_id IN (#{schedule_placeholders})
+    EOS
+
+    sql_filter = if originating_only
+      "AND st.stop_sequence=1\n"
+    else
+      ""
+    end
+
+    sql_tail = <<~EOS
       ORDER BY r.route_short_name,st.departure_time;
     EOS
 
-    db.query(sql, stop_ids, schedules).each do |row|
+    sql = "#{sql_head}#{sql_filter}#{sql_tail}"
+
+    @db.query(sql, stop_ids, schedules).each do |row|
       route = row[0..2] # [route_id, hedsign, direction_id]
       departure_time = row[3]
+      trip_id = row[4]
 
       if result[route].nil?
         result[route] = []
       end
 
-      result[route] << departure_time
+      result[route] << [departure_time, trip_id]
     end
 
     # Sometimes OC Transpo lists the same departure under two separate schedules, *both* of which are
     # simulteneously in effect.  (?)
     # Here we compensate for that by eliminating duplicates of the same departure time.
-    result.map do |route, departures|
-      [route, departures.uniq]
+    result.map do |route, departure_tuples|
+      [
+        route,
+        departure_tuples.group_by do |tuple|
+          tuple.first # departure time
+        end.map do |group_key, group_members|
+          group_members.first
+        end
+      ]
     end.to_h
   end
 
-  private
+  def trips_for_stop_name(stop_name:, date_hash:)
+    result = {}
+
+    base_name = stop_name.upcase
+    suffixes = MULTI_PLATFORM_STOPS[base_name] || [nil]
+    names = suffixes.map { |suffix| [base_name, suffix].compact.join(" ") }
+    placeholders = (["?"] * names.count).join(",")
+
+    stop_ids = @db.query(
+      "SELECT stop_id FROM stops WHERE stop_name IN (#{placeholders})",
+      names,
+    ).to_a.map do |row|
+      row.first
+    end
+
+    trips_for_stop_ids(stop_ids: stop_ids, date_hash: date_hash)
+  end
 
   def calendar
     @calendar ||= begin
@@ -65,20 +173,20 @@ class Gtfs
         FROM calendar
         ORDER BY start_date, end_date
       EOS
-      db.query(sql).to_a
+      @db.query(sql).to_a
     end
   end
 
   def calendar_overrides(target_date)
     result = {add: [], remove: []}
-    datestamp = date_for(target_date)
+    datestamp = @date_utils.date_for(target_date)
     sql = <<~EOS
       SELECT service_id, exception_type
       FROM calendar_dates
       WHERE calendar_date = ?
     EOS
 
-    db.query(sql).each do |row|
+    @db.query(sql).each do |row|
       case row[1]
       when "1"
         result[:add] << row[0]
@@ -94,8 +202,8 @@ class Gtfs
 
   def calendar_schedules(target_date)
     result = []
-    datestamp = date_for(target_date)
-    weekday = weekday_for(target_date)
+    datestamp = @date_utils.date_for(target_date)
+    weekday = @date_utils.weekday_for(target_date)
     calendar.each do |service_id, mon, tue, wed, thu, fri, sat, sun, start_date, end_date|
       if datestamp >= start_date && datestamp <= end_date
         if (0 == weekday && "1" == sun) ||
@@ -111,26 +219,4 @@ class Gtfs
     end
     result
   end
-
-  def date_for(date_hash)
-    "#{date_hash[:year]}#{date_hash[:month].to_s.rjust(2, "0")}#{date_hash[:day].to_s.rjust(2, "0")}"
-  end
-
-  def db
-    @db ||= SQLite3::Database.new("gtfs.db")
-  end
-
-  def today
-    # Beware:  This assumes your system is in America/Toronto time.
-    now = Date.today
-    {
-      year: now.year,
-      month: now.month,
-      day: now.day,
-    }
-  end
-
-  def weekday_for(date_hash)
-    Date.new(date_hash[:year], date_hash[:month], date_hash[:day]).wday
-  end
 end
index d0a5be503c6bc0930efd7348a418d6e2e900de9b..40294364bb3c3f5cea7948ee0334c429bff8dcad 100644 (file)
@@ -4,9 +4,11 @@ require "csv"
 require "pry"
 require "sqlite3"
 
+require "database"
+
 class GtfsLoader
   def initialize
-    @db = nil
+    @db = Database.new.db
   end
 
   def create_tables
@@ -80,7 +82,7 @@ class GtfsLoader
       stops_sql,
       trips_sql,
     ].each do |sql|
-      db.execute sql
+      @db.execute sql
     end
   end
 
@@ -93,10 +95,17 @@ class GtfsLoader
     trips: %w(route_id service_id trip_id trip_headsign direction_id block_id shape_id),
   }
 
-  def load_table(base_path, table_name)
+  def load_all(root_path:)
+    Dir["#{root_path}/*"].each do |base_path|
+      puts "== Loading GTFS files from #{base_path} =="
+      load_tables(base_path: base_path)
+    end
+  end
+
+  def load_table(base_path:, table_name:)
     print "Loading #{table_name}..."
     $stdout.flush
-    db.execute("BEGIN TRANSACTION;")
+    @db.execute("BEGIN TRANSACTION;")
 
     csv_cols = TABLE_COLUMNS[table_name.to_sym]
     sql_cols = csv_cols.map{|name| "date" == name ? "calendar_date" : name}
@@ -108,29 +117,23 @@ class GtfsLoader
       placeholders = (["?"] * values.count).join(",")
       sql = "INSERT INTO #{table_name} (#{sql_cols.join(",")}) VALUES (#{placeholders});"
 
-      db.execute(sql, values)
+      @db.execute(sql, values)
 
       if (0 == count % 10_000)
-        db.execute("COMMIT;")
-        db.execute("BEGIN TRANSACTION;")
+        @db.execute("COMMIT;")
+        @db.execute("BEGIN TRANSACTION;")
         print "."
         $stdout.flush
       end
     end
 
-    db.execute("COMMIT;")
+    @db.execute("COMMIT;")
     puts " done."
   end
 
-  def load_tables
+  def load_tables(base_path:)
     %w(calendar_dates calendar routes stops stop_times trips).each do |table|
-      load_table("gtfs/2022-12-14/", table)
+      load_table(base_path: base_path, table_name: table)
     end
   end
-
-  private
-
-  def db
-    @db ||= SQLite3::Database.new("gtfs.db")
-  end
 end
diff --git a/lib/stats.rb b/lib/stats.rb
new file mode 100644 (file)
index 0000000..5811fe7
--- /dev/null
@@ -0,0 +1,86 @@
+# frozen_string_literal: true
+
+require "sqlite3"
+
+require "database"
+require "gtfs"
+require "pry"
+
+class Stats
+  # Warning:  doesn't account for leap years
+  #                     J   F   M   A   M   J   J   A   S   O   N   D
+  DAYS_IN_MONTH = [nil, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
+
+  def initialize
+    @db = Database.new.db
+    @gtfs = Gtfs.new
+  end
+
+  def report(year:)
+    route_totals = {}
+
+    (1..12).each do |month|
+      last_day = DAYS_IN_MONTH[month]
+      last_day += 1 if (month == 2) && (0 == year % 4) # leap year (doesn't account for 2000)
+
+      (1..last_day).each do |day|
+        date_hash = {year: year, month: month, day: day}
+        date_string = "#{year}-#{month.to_s.rjust(2, "0")}-#{day.to_s.rjust(2, "0")}"
+
+        counts = trip_counts(date_hash: date_hash)
+
+        daily_total = 0
+        counts.each do |route, count|
+          route_total = route_totals[route] || 0
+          route_total += count
+          route_totals[route] = route_total
+
+          daily_total += count
+        end
+
+        puts "#{date_string};#{daily_total};#{counts.inspect}"
+      end
+    end
+
+    annual_total = 0
+    route_totals.each do |_route, count|
+      annual_total += count
+    end
+
+    puts "ANNUAL_TOTAL;#{annual_total};#{route_totals.inspect}"
+  end
+
+  def route_origin_stop_ids
+    @route_origin_stop_ids ||= @db.query(
+      "SELECT stop_id FROM stop_times WHERE stop_sequence=1 GROUP BY 1 ORDER BY 1;"
+    ).to_a.flatten
+  end
+
+  # Returns a hash of route_num => trip_count for the given date
+  def trip_counts(date_hash: nil)
+    result = {}
+
+    print "Checking for trips on #{date_hash[:year]}-#{date_hash[:month]}-#{date_hash[:day]}"
+
+    route_origin_stop_ids.each do |stop_id|
+      print "."
+      $stdout.flush
+
+      route_trips = @gtfs.trips(stop_id: stop_id, date_hash: date_hash).map do |k, v|
+        [
+          k.first,
+          v.map do |x|
+            x.first
+          end,
+        ]
+      end.to_h.each do |route, trips|
+        count = result[route] || 0
+        count += trips.count
+        result[route] = count
+      end
+    end
+    puts " done."
+
+    result
+  end
+end