Fix stupid accident dataset (remove duplicates?!?!)

This commit is contained in:
Marcel Schwarz 2020-12-22 02:25:56 +01:00
parent 04c45e0b7a
commit 9cdae30f7e
2 changed files with 18 additions and 7 deletions

View File

@ -85,7 +85,8 @@ def init_database():
lon REAL, lon REAL,
location TEXT, location TEXT,
date TEXT, date TEXT,
severity TEXT severity TEXT,
UNIQUE (lat, lon, date)
)""") )""")
conn.commit() conn.commit()
conn.close() conn.close()
@ -143,7 +144,7 @@ def import_bikepoints():
def import_accidents(year): def import_accidents(year):
LOG.info("Importing accidents") LOG.info(f"Importing accidents for year {year}")
conn = sqlite3.connect(DB_NAME, timeout=300) conn = sqlite3.connect(DB_NAME, timeout=300)
def filter_pedal_cycles(accident): def filter_pedal_cycles(accident):
@ -155,13 +156,15 @@ def import_accidents(year):
accidents = requests.get(f"https://api.tfl.gov.uk/AccidentStats/{year}").text accidents = requests.get(f"https://api.tfl.gov.uk/AccidentStats/{year}").text
accidents = json.loads(accidents) accidents = json.loads(accidents)
accidents = list(filter(filter_pedal_cycles, accidents)) accidents = list(filter(filter_pedal_cycles, accidents))
accidents = list(map(lambda a: (a['id'], a['lat'], a['lon'], a['location'], a['date'], a['severity']), accidents)) accidents = list(map(lambda a: (a['lat'], a['lon'], a['location'], a['date'], a['severity']), accidents))
LOG.info(f"Writing {len(accidents)} bike accidents to DB") LOG.info(f"Writing {len(accidents)} bike accidents to DB")
conn.executemany("INSERT OR IGNORE INTO accidents VALUES (?, ?, ?, ?, ?, ?)", accidents) conn.executemany("""INSERT OR IGNORE INTO
accidents(lat, lon, location, date, severity)
VALUES (?, ?, ?, ?, ?)""", accidents)
conn.commit() conn.commit()
conn.close() conn.close()
LOG.info("Accidents importet") LOG.info(f"Accidents imported for year {year}")
def import_usage_stats_file(export_file: ApiExportFile): def import_usage_stats_file(export_file: ApiExportFile):
@ -232,7 +235,8 @@ def main():
# Import Bikepoints # Import Bikepoints
import_bikepoints() import_bikepoints()
# Import bike accidents # Import bike accidents
import_accidents(2019) for year in range(2005, 2020):
import_accidents(year)
if count_after - count_pre > 0: if count_after - count_pre > 0:
create_dashboard_table() create_dashboard_table()

View File

@ -1,4 +1,5 @@
import logging import logging
from enum import Enum
from typing import List from typing import List
from fastapi import APIRouter from fastapi import APIRouter
@ -10,10 +11,16 @@ router = APIRouter(prefix="/accidents", tags=["accidents", "local"])
LOG = logging.getLogger() LOG = logging.getLogger()
class Severity(str, Enum):
slight = "Slight"
serious = "Serious"
fatal = "Fatal"
class Accident(BaseModel): class Accident(BaseModel):
lat: float lat: float
lon: float lon: float
severity: str severity: Severity
@router.get( @router.get(