Add downloader and csv reader
This commit is contained in:
parent
50f28d3125
commit
3604c6423a
34
projects/project-3/openapi/downloader.py
Normal file
34
projects/project-3/openapi/downloader.py
Normal file
@ -0,0 +1,34 @@
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree
|
||||
import os.path
|
||||
|
||||
import requests
|
||||
|
||||
BASE_URI = "https://s3-eu-west-1.amazonaws.com/cycling.data.tfl.gov.uk/"
|
||||
BASE_DIR = "data/"
|
||||
|
||||
xml_data = xml.etree.ElementTree.fromstringlist(requests.get(BASE_URI).text)
|
||||
|
||||
for child in xml_data.findall('{http://s3.amazonaws.com/doc/2006-03-01/}Contents'):
|
||||
key = child.find('{http://s3.amazonaws.com/doc/2006-03-01/}Key').text
|
||||
if key.endswith('/'):
|
||||
continue
|
||||
|
||||
parts = key.rsplit('/')
|
||||
# create download url
|
||||
parts.append(urllib.parse.quote_plus(parts.pop()))
|
||||
download_url = BASE_URI + "/".join(parts)
|
||||
|
||||
# create folders and files
|
||||
parts.append(urllib.parse.unquote_plus(parts.pop()))
|
||||
save_path = BASE_DIR + "/".join(parts)
|
||||
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
||||
|
||||
# skip already downloaded files
|
||||
if os.path.exists(save_path):
|
||||
continue
|
||||
|
||||
# do the download
|
||||
urllib.request.urlretrieve(download_url, save_path)
|
||||
print(f"DOWNLOADING... {download_url} to {save_path}")
|
39
projects/project-3/openapi/read_csv.py
Normal file
39
projects/project-3/openapi/read_csv.py
Normal file
@ -0,0 +1,39 @@
|
||||
import csv
|
||||
import os
|
||||
|
||||
|
||||
def read_files(base_dir):
|
||||
data = []
|
||||
for file in os.listdir(base_dir):
|
||||
print(f'reading file \'{file}\'')
|
||||
with open(os.path.join('data', file), 'r') as input_file:
|
||||
data.extend(list(csv.DictReader(input_file)))
|
||||
return data
|
||||
|
||||
|
||||
def main():
|
||||
data = read_files('data')
|
||||
print(data[0])
|
||||
print("Sorting")
|
||||
data = sorted(data, key=lambda entry: int(entry['SERIAL_NUMBER']))
|
||||
print(data[0])
|
||||
print(f'final length of data {len(data)}')
|
||||
|
||||
with open('test.csv', 'w') as out_file:
|
||||
writer = csv.DictWriter(out_file, data[0].keys())
|
||||
writer.writeheader()
|
||||
writer.writerows(data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
||||
#
|
||||
# with open('test.csv') as again_in:
|
||||
# reader2 = csv.DictReader(again_in)
|
||||
# data2 = list(reader2)
|
||||
#
|
||||
# print(data2[0])
|
||||
#
|
||||
# print(f"Is same? {data[0] == data2[0]}")
|
Loading…
Reference in New Issue
Block a user