Another round of speed improvements

This commit is contained in:
Marcel Schwarz 2020-12-20 19:15:25 +01:00
parent 100eb416af
commit 840d170501
2 changed files with 28 additions and 5 deletions

View File

@ -31,10 +31,11 @@ def get_dashboard(station_id):
b.common_name AS commonName, b.common_name AS commonName,
b.lat, b.lat,
b.lon, b.lon,
max(date(u.start_date, 'unixepoch')) AS maxEndDate, d.max_end_date AS maxEndDate,
min(date(u.start_date, 'unixepoch')) AS maxStartDate d.max_start_date AS maxStartDate
FROM usage_stats u FROM usage_stats u
JOIN bike_points b ON u.start_station_id = b.id_num JOIN bike_points b ON u.start_station_id = b.id_num
JOIN dashboard d ON u.start_station_id = d.id
WHERE u.start_station_id = ?""" WHERE u.start_station_id = ?"""
return get_db_connection().execute(query, (station_id,)).fetchall() return get_db_connection().execute(query, (station_id,)).fetchall()

View File

@ -97,23 +97,38 @@ def create_indexes():
conn = sqlite3.connect(DB_NAME, timeout=300) conn = sqlite3.connect(DB_NAME, timeout=300)
LOG.info("Starting to build index: idx_date_of_start_date") LOG.info("Starting to build index: idx_date_of_start_date")
conn.execute("""CREATE INDEX IF NOT EXISTS idx_date_of_start_date conn.execute("""CREATE INDEX IF NOT EXISTS idx_date_of_start_date
ON usage_stats (date(start_date, "unixepoch"))""") ON usage_stats (date(start_date, 'unixepoch'))""")
conn.commit() conn.commit()
LOG.info("Created index: idx_date_of_start_date") LOG.info("Created index: idx_date_of_start_date")
LOG.info("Starting to build index: idx_end_station_id_date_of_start_date") LOG.info("Starting to build index: idx_end_station_id_date_of_start_date")
conn.execute("""CREATE INDEX IF NOT EXISTS "idx_end_station_id_date_of_start_date" conn.execute("""CREATE INDEX IF NOT EXISTS "idx_end_station_id_date_of_start_date"
ON "usage_stats" ("end_station_id" ASC, date(start_date, "unixepoch"))""") ON "usage_stats" ("end_station_id" ASC, date(start_date, 'unixepoch'))""")
conn.commit() conn.commit()
LOG.info("Created index: idx_end_station_id_date_of_start_date") LOG.info("Created index: idx_end_station_id_date_of_start_date")
LOG.info("Starting to build index: idx_start_station_id_date_of_start_date") LOG.info("Starting to build index: idx_start_station_id_date_of_start_date")
conn.execute("""CREATE INDEX IF NOT EXISTS "idx_start_station_id_date_of_start_date" conn.execute("""CREATE INDEX IF NOT EXISTS "idx_start_station_id_date_of_start_date"
ON "usage_stats" ("start_station_id" ASC, date("start_date", "unixepoch"))""") ON "usage_stats" ("start_station_id" ASC, date("start_date", 'unixepoch'))""")
conn.commit() conn.commit()
LOG.info("Created index: idx_start_station_id_date_of_start_date") LOG.info("Created index: idx_start_station_id_date_of_start_date")
conn.close() conn.close()
LOG.info("Indexes created") LOG.info("Indexes created")
def create_dashboard_table():
LOG.info("Creating dashboard table")
conn = sqlite3.connect(DB_NAME, timeout=300)
conn.execute("DROP TABLE IF EXISTS dashboard")
conn.execute("""CREATE TABLE dashboard AS SELECT
b.id_num as id,
max(date(u.start_date, 'unixepoch')) AS max_end_date,
min(date(u.start_date, 'unixepoch')) AS max_start_date
FROM usage_stats u
JOIN bike_points b ON u.start_station_id = b.id_num
GROUP BY b.id_num""")
conn.commit()
LOG.info("Created dashboard table")
def import_bikepoints(): def import_bikepoints():
LOG.info("Importing bikepoints") LOG.info("Importing bikepoints")
conn = sqlite3.connect(DB_NAME, timeout=300) conn = sqlite3.connect(DB_NAME, timeout=300)
@ -203,11 +218,15 @@ def main():
# General DB init # General DB init
init_database() init_database()
count_pre = sqlite3.connect(DB_NAME, timeout=300).execute("SELECT count(*) FROM usage_stats").fetchone()[0]
# Download and import opendata from S3 bucket # Download and import opendata from S3 bucket
all_files = get_online_files_list(subdir_filter="usage-stats", file_extension_filter=".csv") all_files = get_online_files_list(subdir_filter="usage-stats", file_extension_filter=".csv")
for file in all_files: for file in all_files:
import_usage_stats_file(file) import_usage_stats_file(file)
count_after = sqlite3.connect(DB_NAME, timeout=300).execute("SELECT count(*) FROM usage_stats").fetchone()[0]
# Create search-index for faster querying # Create search-index for faster querying
create_indexes() create_indexes()
# Import Bikepoints # Import Bikepoints
@ -215,6 +234,9 @@ def main():
# Import bike accidents # Import bike accidents
import_accidents(2019) import_accidents(2019)
if count_after - count_pre > 0:
create_dashboard_table()
if __name__ == "__main__": if __name__ == "__main__":
main() main()