Merge branch 'project-3-api-implementation'

This commit is contained in:
Marcel Schwarz 2020-12-21 19:15:52 +01:00
commit 11fa967d22
13 changed files with 413 additions and 7 deletions

View File

@ -0,0 +1,2 @@
bike-data.db
*.log

View File

@ -1,2 +1,3 @@
bike-data.db
*.log
__pycache__

View File

@ -0,0 +1,9 @@
FROM python:3.9
WORKDIR /app
COPY requirements.txt /app
RUN pip install -r requirements.txt
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "80"]

View File

@ -18,4 +18,16 @@ Note: The importer outputs logs to the terminal, and the db_init.log file.
## 3. Start the backend
To start the backend, you first have to initialize the database!
### Run with docker
Switch into the project directory, then run:
```shell
docker build -t geovis-backend .
```
After the build make sure you are in the same directory as "bike-data.db" resides, if so, run
```shell
docker run -v $(pwd):/app -p 8080:80 --restart always -d test
```
Note: `$(pwd)` puts the current directory in the command, if you are on Windows, you can use WSL or provide the full path by typing it out.
To stop just shut down the container.

View File

@ -0,0 +1,38 @@
import uvicorn
from fastapi import FastAPI, APIRouter
from fastapi.middleware.cors import CORSMiddleware
from routers import accidents, bikepoints, dashboard
app = FastAPI(
title="London Bikestations Dashboard API",
docs_url="/api/docs",
redoc_url="/api/redoc",
openapi_url="/api/openapi.json"
)
origins = [
"http://it-schwarz.net",
"https://it-schwarz.net",
"http://localhost",
"http://localhost:4200",
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
v1_router = APIRouter()
v1_router.include_router(accidents.router)
v1_router.include_router(bikepoints.router)
v1_router.include_router(dashboard.router)
app.include_router(v1_router, prefix="/api/latest")
if __name__ == "__main__":
uvicorn.run("api:app", host="0.0.0.0", port=8080, reload=True)

View File

@ -0,0 +1,102 @@
import sqlite3
UPSTREAM_BASE_URL = "https://api.tfl.gov.uk"
DB_NAME = "bike-data.db"
def get_db_connection():
conn = sqlite3.connect(DB_NAME, timeout=300)
conn.row_factory = sqlite3.Row
return conn
# ACCIDENTS
def get_all_accidents():
query = """SELECT id, lat, lon, location, date, severity FROM accidents"""
return get_db_connection().execute(query).fetchall()
def get_accidents(year: str):
query = """
SELECT id, lat, lon, location, date, severity
FROM accidents WHERE STRFTIME('%Y', date) = ?"""
return get_db_connection().execute(query, (year,)).fetchall()
# DASHBOARD
def get_dashboard(station_id):
query = """
SELECT
b.id_num as id,
b.common_name AS commonName,
b.lat,
b.lon,
d.max_end_date AS maxEndDate,
d.max_start_date AS maxStartDate
FROM usage_stats u
JOIN bike_points b ON u.start_station_id = b.id_num
JOIN dashboard d ON u.start_station_id = d.id
WHERE u.start_station_id = ?"""
return get_db_connection().execute(query, (station_id,)).fetchall()
def get_dashboard_to(station_id, start_date, end_date):
query = """
SELECT
u.start_station_name AS startStationName,
u.end_station_name AS endStationName,
count(*) AS number,
round(avg(u.duration)) AS avgDuration
FROM usage_stats u
WHERE u.start_station_id = ? AND date(u.start_date, 'unixepoch') BETWEEN ? AND ?
GROUP BY u.end_station_name
ORDER BY number DESC
LIMIT 3"""
return get_db_connection().execute(query, (station_id, start_date, end_date)).fetchall()
def get_dashboard_from(station_id, start_date, end_date):
query = """
SELECT
u.start_station_name AS startStationName,
u.end_station_name AS endStationName,
count(*) AS number,
round(avg(u.duration)) AS avgDuration
FROM usage_stats u
WHERE u.end_station_id = ? AND date(u.start_date, 'unixepoch') BETWEEN ? AND ?
GROUP BY u.start_station_name
ORDER BY number DESC
LIMIT 3"""
return get_db_connection().execute(query, (station_id, start_date, end_date)).fetchall()
def get_dashboard_duration(station_id, start_date, end_date):
query = """
SELECT
count(*) AS number,
CASE WHEN duration <= 300 THEN '0-5'
WHEN duration <= 900 THEN '5-15'
WHEN duration <= 1800 THEN '15-30'
WHEN duration <= 2700 THEN '30-45'
ELSE '45+'
END AS minutesGroup
FROM usage_stats
WHERE
start_station_id = ? AND
date(start_date, 'unixepoch') BETWEEN ? AND ?
GROUP BY minutesGroup"""
return get_db_connection().execute(query, (station_id, start_date, end_date)).fetchall()
def get_dashboard_time(station_id, start_date, end_date):
query = """
SELECT
substr(strftime('%H:%M', start_date, 'unixepoch'), 1, 4) || '0' as timeFrame,
count(*) AS number,
round(avg(duration)) AS avgDuration
FROM usage_stats
WHERE
start_station_id = ?
AND date(start_date, 'unixepoch') BETWEEN ? AND ?
GROUP BY substr(strftime('%H:%M', start_date, 'unixepoch'), 1, 4)"""
return get_db_connection().execute(query, (station_id, start_date, end_date)).fetchall()

View File

@ -77,7 +77,7 @@ def init_database():
common_name TEXT,
lat REAL,
lon REAL,
id_num INTEGER AS (CAST(SUBSTR(id, 12) as INTEGER)) STORED
id_num INTEGER
)""")
conn.execute("""CREATE TABLE IF NOT EXISTS accidents(
id INTEGER PRIMARY KEY,
@ -95,21 +95,48 @@ def init_database():
def create_indexes():
LOG.info("Try to create indexes")
conn = sqlite3.connect(DB_NAME, timeout=300)
LOG.info("Starting to build index: idx_date_of_start_date")
conn.execute("""CREATE INDEX IF NOT EXISTS idx_date_of_start_date
ON usage_stats (date(start_date, "unixepoch"))""")
ON usage_stats (date(start_date, 'unixepoch'))""")
conn.commit()
LOG.info("Created index: idx_date_of_start_date")
LOG.info("Starting to build index: idx_end_station_id_date_of_start_date")
conn.execute("""CREATE INDEX IF NOT EXISTS "idx_end_station_id_date_of_start_date"
ON "usage_stats" ("end_station_id" ASC, date(start_date, 'unixepoch'))""")
conn.commit()
LOG.info("Created index: idx_end_station_id_date_of_start_date")
LOG.info("Starting to build index: idx_start_station_id_date_of_start_date")
conn.execute("""CREATE INDEX IF NOT EXISTS "idx_start_station_id_date_of_start_date"
ON "usage_stats" ("start_station_id" ASC, date("start_date", 'unixepoch'))""")
conn.commit()
LOG.info("Created index: idx_start_station_id_date_of_start_date")
conn.close()
LOG.info("Indexes created")
def create_dashboard_table():
LOG.info("Creating dashboard table")
conn = sqlite3.connect(DB_NAME, timeout=300)
conn.execute("DROP TABLE IF EXISTS dashboard")
conn.execute("""CREATE TABLE dashboard AS SELECT
b.id_num as id,
max(date(u.start_date, 'unixepoch')) AS max_end_date,
min(date(u.start_date, 'unixepoch')) AS max_start_date
FROM usage_stats u
JOIN bike_points b ON u.start_station_id = b.id_num
GROUP BY b.id_num""")
conn.commit()
LOG.info("Created dashboard table")
def import_bikepoints():
LOG.info("Importing bikepoints")
conn = sqlite3.connect(DB_NAME, timeout=300)
points = json.loads(requests.get("https://api.tfl.gov.uk/BikePoint").text)
points = list(map(lambda p: (p['id'], p['commonName'], p['lat'], p['lon']), points))
points = list(map(lambda p: (p['id'], p['commonName'], p['lat'], p['lon'], int(p['id'][11:])), points))
LOG.info(f"Writing {len(points)} bikepoints to DB")
conn.executemany("INSERT OR IGNORE INTO bike_points VALUES (?, ?, ?, ?)", points)
conn.executemany("INSERT OR IGNORE INTO bike_points VALUES (?, ?, ?, ?, ?)", points)
conn.commit()
conn.close()
LOG.info("Bikepoints imported")
@ -191,11 +218,15 @@ def main():
# General DB init
init_database()
count_pre = sqlite3.connect(DB_NAME, timeout=300).execute("SELECT count(*) FROM usage_stats").fetchone()[0]
# Download and import opendata from S3 bucket
all_files = get_online_files_list(subdir_filter="usage-stats", file_extension_filter=".csv")
for file in all_files:
import_usage_stats_file(file)
count_after = sqlite3.connect(DB_NAME, timeout=300).execute("SELECT count(*) FROM usage_stats").fetchone()[0]
# Create search-index for faster querying
create_indexes()
# Import Bikepoints
@ -203,6 +234,9 @@ def main():
# Import bike accidents
import_accidents(2019)
if count_after - count_pre > 0:
create_dashboard_table()
if __name__ == "__main__":
main()

View File

@ -1 +1,4 @@
requests
fastapi[all]
uvicorn
pydantic

View File

@ -0,0 +1,36 @@
import logging
from typing import List
from fastapi import APIRouter
from pydantic.main import BaseModel
import api_database
router = APIRouter(prefix="/accidents", tags=["accidents", "local"])
LOG = logging.getLogger()
class Accident(BaseModel):
lat: float
lon: float
severity: str
@router.get(
"/",
name="Get all accidents",
description="Get all bike accidents in London.",
response_model=List[Accident]
)
def get_accidents():
return api_database.get_all_accidents()
@router.get(
"/{year}",
name="Get accidents by year",
description="Get bike accidents in London for a specific year.",
response_model=List[Accident]
)
def get_accidents(year: str):
return api_database.get_accidents(year)

View File

@ -0,0 +1,61 @@
import json
from typing import List
import requests
from fastapi import APIRouter
from pydantic import BaseModel
from api_database import UPSTREAM_BASE_URL
router = APIRouter(prefix="/bikepoints", tags=["bikepoints"])
class BikepointStatus(BaseModel):
NbBikes: int
NbEmptyDocks: int
NbDocks: int
class Bikepoint(BaseModel):
id: str
commonName: str
lat: float
lon: float
status: BikepointStatus
def bikepoint_mapper(bikepoint):
mapped_point = {
"id": bikepoint['id'][11:],
"url": bikepoint['url'],
"commonName": bikepoint['commonName'],
"lat": bikepoint['lat'],
"lon": bikepoint['lon']
}
props = list(filter(
lambda p: p['key'] in ["NbBikes", "NbEmptyDocks", "NbDocks"],
bikepoint['additionalProperties']
))
mapped_point['status'] = {prop['key']: int(prop['value']) for prop in props}
return mapped_point
@router.get(
"/",
tags=["upstream"],
response_model=List[Bikepoint]
)
def get_all():
bikepoints = json.loads(requests.get(UPSTREAM_BASE_URL + "/BikePoint").text)
mapped_points = list(map(bikepoint_mapper, bikepoints))
return mapped_points
@router.get(
"/{id}",
tags=["upstream"],
response_model=Bikepoint
)
def get_single(id: int):
bikepoint = json.loads(requests.get(UPSTREAM_BASE_URL + f"/BikePoint/BikePoints_{id}").text)
return bikepoint_mapper(bikepoint)

View File

@ -0,0 +1,71 @@
import datetime
from typing import Optional, List
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
import api_database
router = APIRouter(prefix="/dashboard/{station_id}", tags=["dashboard", "local"])
def validate_daterange(start_date: datetime.date, end_date: datetime.date):
days_requested = (end_date - start_date).days
if days_requested < 0:
raise HTTPException(status_code=400, detail="Requested date-range is negative")
class StationDashboard(BaseModel):
id: Optional[int]
commonName: Optional[str]
lat: Optional[float]
lon: Optional[float]
maxEndDate: Optional[datetime.date]
maxStartDate: Optional[datetime.date]
@router.get("/", response_model=StationDashboard)
def get_general_dashboard(station_id: int):
return api_database.get_dashboard(station_id)[0]
class StationDashboardTopStationsEntry(BaseModel):
startStationName: str
endStationName: str
number: int
avgDuration: int
@router.get("/to", response_model=List[StationDashboardTopStationsEntry])
def get_to_dashboard_for_station(station_id: int, start_date: datetime.date, end_date: datetime.date):
validate_daterange(start_date, end_date)
return api_database.get_dashboard_to(station_id, start_date, end_date)
@router.get("/from", response_model=List[StationDashboardTopStationsEntry])
def get_from_dashboard_for_station(station_id: int, start_date: datetime.date, end_date: datetime.date):
validate_daterange(start_date, end_date)
return api_database.get_dashboard_from(station_id, start_date, end_date)
class StationDashboardDurationGroup(BaseModel):
number: int
minutesGroup: str
@router.get("/duration", response_model=List[StationDashboardDurationGroup])
def get_duration_dashboard_for_station(station_id: int, start_date: datetime.date, end_date: datetime.date):
validate_daterange(start_date, end_date)
return api_database.get_dashboard_duration(station_id, start_date, end_date)
class StationDashboardTimeGroup(BaseModel):
timeFrame: str
number: int
avgDuration: int
@router.get("/time", response_model=List[StationDashboardTimeGroup])
def get_time_dashboard_for_station(station_id: int, start_date: datetime.date, end_date: datetime.date):
validate_daterange(start_date, end_date)
return api_database.get_dashboard_time(station_id, start_date, end_date)

View File

@ -0,0 +1,37 @@
# generated 2020-12-20, Mozilla Guideline v5.6, nginx 1.17.7, OpenSSL 1.1.1d, modern configuration, no OCSP
# https://ssl-config.mozilla.org/#server=nginx&version=1.17.7&config=modern&openssl=1.1.1d&ocsp=false&guideline=5.6
server {
listen 80;
listen [::]:80;
server_name it-schwarz.net www.it-schwarz.net;
return 301 https://$host$request_uri;
}
server {
listen 443 ssl http2;
listen [::]:443 ssl http2;
server_name it-schwarz.net www.it-schwarz.net;
root /home/geovis/geovis-frontend/projects/project-3/frontend/dist/frontend;
ssl_certificate /etc/letsencrypt/live/it-schwarz.net/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/it-schwarz.net/privkey.pem;
ssl_session_timeout 1d;
ssl_session_cache shared:MozSSL:10m; # about 40000 sessions
ssl_session_tickets off;
# modern configuration
ssl_protocols TLSv1.3;
ssl_prefer_server_ciphers off;
# HSTS (ngx_http_headers_module is required) (63072000 seconds)
add_header Strict-Transport-Security "max-age=63072000" always;
location /api/ {
proxy_pass "http://localhost:8080/api/";
}
location / {
try_files $uri $uri/ /index.html;
}
}