diff --git a/projects/project-3/openapi/read_csv.py b/projects/project-3/openapi/read_csv.py index 62e39c2..31d9ac3 100644 --- a/projects/project-3/openapi/read_csv.py +++ b/projects/project-3/openapi/read_csv.py @@ -2,25 +2,43 @@ import csv import os -def read_files(base_dir): +def read_files(base_dir, max_num=2): data = [] for file in os.listdir(base_dir): + if max_num == 0: + break print(f'reading file \'{file}\'') - with open(os.path.join('data', file), 'r') as input_file: + with open(os.path.join(base_dir, file), 'r', newline='') as input_file: data.extend(list(csv.DictReader(input_file))) + max_num -= 1 return data +def get_key_counts(data): + all_key_sets = [e.keys() for e in data] + key_counts = {} + for key_set in all_key_sets: + for key in key_set: + try: + key_counts[key] += 1 + except KeyError: + key_counts[key] = 1 + return key_counts + + def main(): - data = read_files('data') - print(data[0]) + data = read_files('data/usage-stats', max_num=10) print("Sorting") - data = sorted(data, key=lambda entry: int(entry['SERIAL_NUMBER'])) - print(data[0]) + data = sorted(data, key=lambda entry: int(entry['Rental Id'])) print(f'final length of data {len(data)}') - with open('test.csv', 'w') as out_file: - writer = csv.DictWriter(out_file, data[0].keys()) + # counts = get_key_counts(data) + # for count, val in counts.items(): + # if val != len(data): + # print(count, val) + # print(counts) + with open('test.csv', 'w', newline='') as out_file: + writer = csv.DictWriter(out_file, data[0].keys(), extrasaction='ignore') writer.writeheader() writer.writerows(data)