diff --git a/projects/project-3/backend/.dockerignore b/projects/project-3/backend/.dockerignore new file mode 100644 index 0000000..9adde20 --- /dev/null +++ b/projects/project-3/backend/.dockerignore @@ -0,0 +1,2 @@ +bike-data.db +*.log \ No newline at end of file diff --git a/projects/project-3/backend/.gitignore b/projects/project-3/backend/.gitignore index 9adde20..551bc55 100644 --- a/projects/project-3/backend/.gitignore +++ b/projects/project-3/backend/.gitignore @@ -1,2 +1,3 @@ bike-data.db -*.log \ No newline at end of file +*.log +__pycache__ \ No newline at end of file diff --git a/projects/project-3/backend/Dockerfile b/projects/project-3/backend/Dockerfile new file mode 100644 index 0000000..1bb3617 --- /dev/null +++ b/projects/project-3/backend/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.9 + +WORKDIR /app + +COPY requirements.txt /app + +RUN pip install -r requirements.txt + +CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "80"] \ No newline at end of file diff --git a/projects/project-3/backend/README.md b/projects/project-3/backend/README.md index d145ee1..23cb7b3 100644 --- a/projects/project-3/backend/README.md +++ b/projects/project-3/backend/README.md @@ -18,4 +18,16 @@ Note: The importer outputs logs to the terminal, and the db_init.log file. ## 3. Start the backend To start the backend, you first have to initialize the database! - \ No newline at end of file +### Run with docker +Switch into the project directory, then run: +```shell +docker build -t geovis-backend . +``` +After the build make sure you are in the same directory as "bike-data.db" resides, if so, run +```shell +docker run -v $(pwd):/app -p 8080:80 --restart always -d test +``` + +Note: `$(pwd)` puts the current directory in the command, if you are on Windows, you can use WSL or provide the full path by typing it out. + +To stop just shut down the container. \ No newline at end of file diff --git a/projects/project-3/backend/api.py b/projects/project-3/backend/api.py new file mode 100644 index 0000000..c948ca8 --- /dev/null +++ b/projects/project-3/backend/api.py @@ -0,0 +1,38 @@ +import uvicorn +from fastapi import FastAPI, APIRouter +from fastapi.middleware.cors import CORSMiddleware + +from routers import accidents, bikepoints, dashboard + +app = FastAPI( + title="London Bikestations Dashboard API", + docs_url="/api/docs", + redoc_url="/api/redoc", + openapi_url="/api/openapi.json" +) + +origins = [ + "http://it-schwarz.net", + "https://it-schwarz.net", + "http://localhost", + "http://localhost:4200", +] + +app.add_middleware( + CORSMiddleware, + allow_origins=origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +v1_router = APIRouter() +v1_router.include_router(accidents.router) +v1_router.include_router(bikepoints.router) +v1_router.include_router(dashboard.router) + +app.include_router(v1_router, prefix="/api/latest") + + +if __name__ == "__main__": + uvicorn.run("api:app", host="0.0.0.0", port=8080, reload=True) diff --git a/projects/project-3/backend/api_database.py b/projects/project-3/backend/api_database.py new file mode 100644 index 0000000..bb6e5ed --- /dev/null +++ b/projects/project-3/backend/api_database.py @@ -0,0 +1,102 @@ +import sqlite3 + +UPSTREAM_BASE_URL = "https://api.tfl.gov.uk" +DB_NAME = "bike-data.db" + + +def get_db_connection(): + conn = sqlite3.connect(DB_NAME, timeout=300) + conn.row_factory = sqlite3.Row + return conn + + +# ACCIDENTS +def get_all_accidents(): + query = """SELECT id, lat, lon, location, date, severity FROM accidents""" + return get_db_connection().execute(query).fetchall() + + +def get_accidents(year: str): + query = """ + SELECT id, lat, lon, location, date, severity + FROM accidents WHERE STRFTIME('%Y', date) = ?""" + return get_db_connection().execute(query, (year,)).fetchall() + + +# DASHBOARD +def get_dashboard(station_id): + query = """ + SELECT + b.id_num as id, + b.common_name AS commonName, + b.lat, + b.lon, + d.max_end_date AS maxEndDate, + d.max_start_date AS maxStartDate + FROM usage_stats u + JOIN bike_points b ON u.start_station_id = b.id_num + JOIN dashboard d ON u.start_station_id = d.id + WHERE u.start_station_id = ?""" + return get_db_connection().execute(query, (station_id,)).fetchall() + + +def get_dashboard_to(station_id, start_date, end_date): + query = """ + SELECT + u.start_station_name AS startStationName, + u.end_station_name AS endStationName, + count(*) AS number, + round(avg(u.duration)) AS avgDuration + FROM usage_stats u + WHERE u.start_station_id = ? AND date(u.start_date, 'unixepoch') BETWEEN ? AND ? + GROUP BY u.end_station_name + ORDER BY number DESC + LIMIT 3""" + return get_db_connection().execute(query, (station_id, start_date, end_date)).fetchall() + + +def get_dashboard_from(station_id, start_date, end_date): + query = """ + SELECT + u.start_station_name AS startStationName, + u.end_station_name AS endStationName, + count(*) AS number, + round(avg(u.duration)) AS avgDuration + FROM usage_stats u + WHERE u.end_station_id = ? AND date(u.start_date, 'unixepoch') BETWEEN ? AND ? + GROUP BY u.start_station_name + ORDER BY number DESC + LIMIT 3""" + return get_db_connection().execute(query, (station_id, start_date, end_date)).fetchall() + + +def get_dashboard_duration(station_id, start_date, end_date): + query = """ + SELECT + count(*) AS number, + CASE WHEN duration <= 300 THEN '0-5' + WHEN duration <= 900 THEN '5-15' + WHEN duration <= 1800 THEN '15-30' + WHEN duration <= 2700 THEN '30-45' + ELSE '45+' + END AS minutesGroup + FROM usage_stats + WHERE + start_station_id = ? AND + date(start_date, 'unixepoch') BETWEEN ? AND ? + GROUP BY minutesGroup""" + return get_db_connection().execute(query, (station_id, start_date, end_date)).fetchall() + + +def get_dashboard_time(station_id, start_date, end_date): + query = """ + SELECT + substr(strftime('%H:%M', start_date, 'unixepoch'), 1, 4) || '0' as timeFrame, + count(*) AS number, + round(avg(duration)) AS avgDuration + FROM usage_stats + WHERE + start_station_id = ? + AND date(start_date, 'unixepoch') BETWEEN ? AND ? + GROUP BY substr(strftime('%H:%M', start_date, 'unixepoch'), 1, 4)""" + return get_db_connection().execute(query, (station_id, start_date, end_date)).fetchall() diff --git a/projects/project-3/backend/db_init.py b/projects/project-3/backend/db_init.py index 9ad85d0..51eae02 100644 --- a/projects/project-3/backend/db_init.py +++ b/projects/project-3/backend/db_init.py @@ -77,7 +77,7 @@ def init_database(): common_name TEXT, lat REAL, lon REAL, - id_num INTEGER AS (CAST(SUBSTR(id, 12) as INTEGER)) STORED + id_num INTEGER )""") conn.execute("""CREATE TABLE IF NOT EXISTS accidents( id INTEGER PRIMARY KEY, @@ -95,21 +95,48 @@ def init_database(): def create_indexes(): LOG.info("Try to create indexes") conn = sqlite3.connect(DB_NAME, timeout=300) + LOG.info("Starting to build index: idx_date_of_start_date") conn.execute("""CREATE INDEX IF NOT EXISTS idx_date_of_start_date - ON usage_stats (date(start_date, "unixepoch"))""") + ON usage_stats (date(start_date, 'unixepoch'))""") conn.commit() + LOG.info("Created index: idx_date_of_start_date") + LOG.info("Starting to build index: idx_end_station_id_date_of_start_date") + conn.execute("""CREATE INDEX IF NOT EXISTS "idx_end_station_id_date_of_start_date" + ON "usage_stats" ("end_station_id" ASC, date(start_date, 'unixepoch'))""") + conn.commit() + LOG.info("Created index: idx_end_station_id_date_of_start_date") + LOG.info("Starting to build index: idx_start_station_id_date_of_start_date") + conn.execute("""CREATE INDEX IF NOT EXISTS "idx_start_station_id_date_of_start_date" + ON "usage_stats" ("start_station_id" ASC, date("start_date", 'unixepoch'))""") + conn.commit() + LOG.info("Created index: idx_start_station_id_date_of_start_date") conn.close() LOG.info("Indexes created") +def create_dashboard_table(): + LOG.info("Creating dashboard table") + conn = sqlite3.connect(DB_NAME, timeout=300) + conn.execute("DROP TABLE IF EXISTS dashboard") + conn.execute("""CREATE TABLE dashboard AS SELECT + b.id_num as id, + max(date(u.start_date, 'unixepoch')) AS max_end_date, + min(date(u.start_date, 'unixepoch')) AS max_start_date + FROM usage_stats u + JOIN bike_points b ON u.start_station_id = b.id_num + GROUP BY b.id_num""") + conn.commit() + LOG.info("Created dashboard table") + + def import_bikepoints(): LOG.info("Importing bikepoints") conn = sqlite3.connect(DB_NAME, timeout=300) points = json.loads(requests.get("https://api.tfl.gov.uk/BikePoint").text) - points = list(map(lambda p: (p['id'], p['commonName'], p['lat'], p['lon']), points)) + points = list(map(lambda p: (p['id'], p['commonName'], p['lat'], p['lon'], int(p['id'][11:])), points)) LOG.info(f"Writing {len(points)} bikepoints to DB") - conn.executemany("INSERT OR IGNORE INTO bike_points VALUES (?, ?, ?, ?)", points) + conn.executemany("INSERT OR IGNORE INTO bike_points VALUES (?, ?, ?, ?, ?)", points) conn.commit() conn.close() LOG.info("Bikepoints imported") @@ -191,11 +218,15 @@ def main(): # General DB init init_database() + count_pre = sqlite3.connect(DB_NAME, timeout=300).execute("SELECT count(*) FROM usage_stats").fetchone()[0] + # Download and import opendata from S3 bucket all_files = get_online_files_list(subdir_filter="usage-stats", file_extension_filter=".csv") for file in all_files: import_usage_stats_file(file) + count_after = sqlite3.connect(DB_NAME, timeout=300).execute("SELECT count(*) FROM usage_stats").fetchone()[0] + # Create search-index for faster querying create_indexes() # Import Bikepoints @@ -203,6 +234,9 @@ def main(): # Import bike accidents import_accidents(2019) + if count_after - count_pre > 0: + create_dashboard_table() + if __name__ == "__main__": main() diff --git a/projects/project-3/backend/requirements.txt b/projects/project-3/backend/requirements.txt index 663bd1f..3fd40cb 100644 --- a/projects/project-3/backend/requirements.txt +++ b/projects/project-3/backend/requirements.txt @@ -1 +1,4 @@ -requests \ No newline at end of file +requests +fastapi[all] +uvicorn +pydantic \ No newline at end of file diff --git a/projects/project-3/backend/routers/__init__.py b/projects/project-3/backend/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/projects/project-3/backend/routers/accidents.py b/projects/project-3/backend/routers/accidents.py new file mode 100644 index 0000000..e9fe942 --- /dev/null +++ b/projects/project-3/backend/routers/accidents.py @@ -0,0 +1,36 @@ +import logging +from typing import List + +from fastapi import APIRouter +from pydantic.main import BaseModel + +import api_database + +router = APIRouter(prefix="/accidents", tags=["accidents", "local"]) +LOG = logging.getLogger() + + +class Accident(BaseModel): + lat: float + lon: float + severity: str + + +@router.get( + "/", + name="Get all accidents", + description="Get all bike accidents in London.", + response_model=List[Accident] +) +def get_accidents(): + return api_database.get_all_accidents() + + +@router.get( + "/{year}", + name="Get accidents by year", + description="Get bike accidents in London for a specific year.", + response_model=List[Accident] +) +def get_accidents(year: str): + return api_database.get_accidents(year) diff --git a/projects/project-3/backend/routers/bikepoints.py b/projects/project-3/backend/routers/bikepoints.py new file mode 100644 index 0000000..248c1ed --- /dev/null +++ b/projects/project-3/backend/routers/bikepoints.py @@ -0,0 +1,61 @@ +import json +from typing import List + +import requests +from fastapi import APIRouter +from pydantic import BaseModel + +from api_database import UPSTREAM_BASE_URL + +router = APIRouter(prefix="/bikepoints", tags=["bikepoints"]) + + +class BikepointStatus(BaseModel): + NbBikes: int + NbEmptyDocks: int + NbDocks: int + + +class Bikepoint(BaseModel): + id: str + commonName: str + lat: float + lon: float + status: BikepointStatus + + +def bikepoint_mapper(bikepoint): + mapped_point = { + "id": bikepoint['id'][11:], + "url": bikepoint['url'], + "commonName": bikepoint['commonName'], + "lat": bikepoint['lat'], + "lon": bikepoint['lon'] + } + props = list(filter( + lambda p: p['key'] in ["NbBikes", "NbEmptyDocks", "NbDocks"], + bikepoint['additionalProperties'] + )) + mapped_point['status'] = {prop['key']: int(prop['value']) for prop in props} + return mapped_point + + +@router.get( + "/", + tags=["upstream"], + response_model=List[Bikepoint] +) +def get_all(): + bikepoints = json.loads(requests.get(UPSTREAM_BASE_URL + "/BikePoint").text) + mapped_points = list(map(bikepoint_mapper, bikepoints)) + return mapped_points + + +@router.get( + "/{id}", + tags=["upstream"], + response_model=Bikepoint +) +def get_single(id: int): + bikepoint = json.loads(requests.get(UPSTREAM_BASE_URL + f"/BikePoint/BikePoints_{id}").text) + return bikepoint_mapper(bikepoint) diff --git a/projects/project-3/backend/routers/dashboard.py b/projects/project-3/backend/routers/dashboard.py new file mode 100644 index 0000000..dba1edb --- /dev/null +++ b/projects/project-3/backend/routers/dashboard.py @@ -0,0 +1,71 @@ +import datetime +from typing import Optional, List + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel + +import api_database + +router = APIRouter(prefix="/dashboard/{station_id}", tags=["dashboard", "local"]) + + +def validate_daterange(start_date: datetime.date, end_date: datetime.date): + days_requested = (end_date - start_date).days + if days_requested < 0: + raise HTTPException(status_code=400, detail="Requested date-range is negative") + + +class StationDashboard(BaseModel): + id: Optional[int] + commonName: Optional[str] + lat: Optional[float] + lon: Optional[float] + maxEndDate: Optional[datetime.date] + maxStartDate: Optional[datetime.date] + + +@router.get("/", response_model=StationDashboard) +def get_general_dashboard(station_id: int): + return api_database.get_dashboard(station_id)[0] + + +class StationDashboardTopStationsEntry(BaseModel): + startStationName: str + endStationName: str + number: int + avgDuration: int + + +@router.get("/to", response_model=List[StationDashboardTopStationsEntry]) +def get_to_dashboard_for_station(station_id: int, start_date: datetime.date, end_date: datetime.date): + validate_daterange(start_date, end_date) + return api_database.get_dashboard_to(station_id, start_date, end_date) + + +@router.get("/from", response_model=List[StationDashboardTopStationsEntry]) +def get_from_dashboard_for_station(station_id: int, start_date: datetime.date, end_date: datetime.date): + validate_daterange(start_date, end_date) + return api_database.get_dashboard_from(station_id, start_date, end_date) + + +class StationDashboardDurationGroup(BaseModel): + number: int + minutesGroup: str + + +@router.get("/duration", response_model=List[StationDashboardDurationGroup]) +def get_duration_dashboard_for_station(station_id: int, start_date: datetime.date, end_date: datetime.date): + validate_daterange(start_date, end_date) + return api_database.get_dashboard_duration(station_id, start_date, end_date) + + +class StationDashboardTimeGroup(BaseModel): + timeFrame: str + number: int + avgDuration: int + + +@router.get("/time", response_model=List[StationDashboardTimeGroup]) +def get_time_dashboard_for_station(station_id: int, start_date: datetime.date, end_date: datetime.date): + validate_daterange(start_date, end_date) + return api_database.get_dashboard_time(station_id, start_date, end_date) diff --git a/projects/project-3/server-config/nginx-server.conf b/projects/project-3/server-config/nginx-server.conf new file mode 100644 index 0000000..c4af9b5 --- /dev/null +++ b/projects/project-3/server-config/nginx-server.conf @@ -0,0 +1,37 @@ +# generated 2020-12-20, Mozilla Guideline v5.6, nginx 1.17.7, OpenSSL 1.1.1d, modern configuration, no OCSP +# https://ssl-config.mozilla.org/#server=nginx&version=1.17.7&config=modern&openssl=1.1.1d&ocsp=false&guideline=5.6 +server { + listen 80; + listen [::]:80; + server_name it-schwarz.net www.it-schwarz.net; + return 301 https://$host$request_uri; +} + +server { + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name it-schwarz.net www.it-schwarz.net; + root /home/geovis/geovis-frontend/projects/project-3/frontend/dist/frontend; + + + ssl_certificate /etc/letsencrypt/live/it-schwarz.net/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/it-schwarz.net/privkey.pem; + ssl_session_timeout 1d; + ssl_session_cache shared:MozSSL:10m; # about 40000 sessions + ssl_session_tickets off; + + # modern configuration + ssl_protocols TLSv1.3; + ssl_prefer_server_ciphers off; + + # HSTS (ngx_http_headers_module is required) (63072000 seconds) + add_header Strict-Transport-Security "max-age=63072000" always; + + location /api/ { + proxy_pass "http://localhost:8080/api/"; + } + + location / { + try_files $uri $uri/ /index.html; + } +} \ No newline at end of file