import csv import os def read_files(base_dir, max_num=2): data = [] for file in os.listdir(base_dir): if max_num == 0: break print(f'reading file \'{file}\'') with open(os.path.join(base_dir, file), 'r', newline='') as input_file: data.extend(list(csv.DictReader(input_file))) max_num -= 1 return data def get_key_counts(data): all_key_sets = [e.keys() for e in data] key_counts = {} for key_set in all_key_sets: for key in key_set: try: key_counts[key] += 1 except KeyError: key_counts[key] = 1 return key_counts def main(): data = read_files('data/usage-stats', max_num=10) print("Sorting") data = sorted(data, key=lambda entry: int(entry['Rental Id'])) print(f'final length of data {len(data)}') # counts = get_key_counts(data) # for count, val in counts.items(): # if val != len(data): # print(count, val) # print(counts) with open('test.csv', 'w', newline='') as out_file: writer = csv.DictWriter(out_file, data[0].keys(), extrasaction='ignore') writer.writeheader() writer.writerows(data) if __name__ == '__main__': main() # # with open('test.csv') as again_in: # reader2 = csv.DictReader(again_in) # data2 = list(reader2) # # print(data2[0]) # # print(f"Is same? {data[0] == data2[0]}")