35 lines
1.0 KiB
Python
35 lines
1.0 KiB
Python
|
import urllib.parse
|
||
|
import urllib.request
|
||
|
import xml.etree.ElementTree
|
||
|
import os.path
|
||
|
|
||
|
import requests
|
||
|
|
||
|
BASE_URI = "https://s3-eu-west-1.amazonaws.com/cycling.data.tfl.gov.uk/"
|
||
|
BASE_DIR = "data/"
|
||
|
|
||
|
xml_data = xml.etree.ElementTree.fromstringlist(requests.get(BASE_URI).text)
|
||
|
|
||
|
for child in xml_data.findall('{http://s3.amazonaws.com/doc/2006-03-01/}Contents'):
|
||
|
key = child.find('{http://s3.amazonaws.com/doc/2006-03-01/}Key').text
|
||
|
if key.endswith('/'):
|
||
|
continue
|
||
|
|
||
|
parts = key.rsplit('/')
|
||
|
# create download url
|
||
|
parts.append(urllib.parse.quote_plus(parts.pop()))
|
||
|
download_url = BASE_URI + "/".join(parts)
|
||
|
|
||
|
# create folders and files
|
||
|
parts.append(urllib.parse.unquote_plus(parts.pop()))
|
||
|
save_path = BASE_DIR + "/".join(parts)
|
||
|
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
||
|
|
||
|
# skip already downloaded files
|
||
|
if os.path.exists(save_path):
|
||
|
continue
|
||
|
|
||
|
# do the download
|
||
|
urllib.request.urlretrieve(download_url, save_path)
|
||
|
print(f"DOWNLOADING... {download_url} to {save_path}")
|