import urllib.parse import urllib.request import xml.etree.ElementTree import os.path import requests BASE_URI = "https://s3-eu-west-1.amazonaws.com/cycling.data.tfl.gov.uk/" BASE_DIR = "data/" xml_data = xml.etree.ElementTree.fromstringlist(requests.get(BASE_URI).text) for child in xml_data.findall('{http://s3.amazonaws.com/doc/2006-03-01/}Contents'): key = child.find('{http://s3.amazonaws.com/doc/2006-03-01/}Key').text if key.endswith('/'): continue parts = key.rsplit('/') # create download url parts.append(urllib.parse.quote_plus(parts.pop())) download_url = BASE_URI + "/".join(parts) # create folders and files parts.append(urllib.parse.unquote_plus(parts.pop())) save_path = BASE_DIR + "/".join(parts) os.makedirs(os.path.dirname(save_path), exist_ok=True) # skip already downloaded files if os.path.exists(save_path): continue # do the download urllib.request.urlretrieve(download_url, save_path) print(f"DOWNLOADING... {download_url} to {save_path}")