geovisualisierung/projects/project-3/openapi/downloader.py

35 lines
1.0 KiB
Python

import urllib.parse
import urllib.request
import xml.etree.ElementTree
import os.path
import requests
BASE_URI = "https://s3-eu-west-1.amazonaws.com/cycling.data.tfl.gov.uk/"
BASE_DIR = "data/"
xml_data = xml.etree.ElementTree.fromstringlist(requests.get(BASE_URI).text)
for child in xml_data.findall('{http://s3.amazonaws.com/doc/2006-03-01/}Contents'):
key = child.find('{http://s3.amazonaws.com/doc/2006-03-01/}Key').text
if key.endswith('/'):
continue
parts = key.rsplit('/')
# create download url
parts.append(urllib.parse.quote_plus(parts.pop()))
download_url = BASE_URI + "/".join(parts)
# create folders and files
parts.append(urllib.parse.unquote_plus(parts.pop()))
save_path = BASE_DIR + "/".join(parts)
os.makedirs(os.path.dirname(save_path), exist_ok=True)
# skip already downloaded files
if os.path.exists(save_path):
continue
# do the download
urllib.request.urlretrieve(download_url, save_path)
print(f"DOWNLOADING... {download_url} to {save_path}")