Data Engineer & DataOps
My LinkedIn
My GitHub
import json
import csv
import pandas as pd
json_file_path = "data.json"
with open(json_file_path, 'r') as j:
content = json.loads(j.read())
content
{'John Peterson': {'age': 20, 'address': 'Turku', 'occupation': 'Doctor'},
'John Munoz': {'age': 26, 'address': 'Oulu', 'occupation': 'Nurse'},
'Samantha Kennedy': {'age': 27,
'address': 'Turku',
'occupation': 'Researcher'},
'Tiffany Munoz': {'age': 21,
'address': 'Tampere',
'occupation': 'Programmer'},
'Thea Peterson': {'age': 55,
'address': 'Helsinki',
'occupation': 'Programmer'},
'Evie Kennedy': {'age': 32, 'address': 'Lapland', 'occupation': 'Programmer'},
'Beth Wagner': {'age': 37, 'address': 'Tampere', 'occupation': 'Programmer'},
'Marie Reid': {'age': 42, 'address': 'Helsinki', 'occupation': 'Programmer'},
'John Reid': {'age': 46, 'address': 'Oulu', 'occupation': 'Doctor'},
'Adele Wagner': {'age': 37, 'address': 'Oulu', 'occupation': 'Doctor'},
'Lola Reid': {'age': 32, 'address': 'Oulu', 'occupation': 'Analyst'},
'Charlie Peterson': {'age': 41, 'address': 'Tampere', 'occupation': 'Doctor'},
'Tiffany Kennedy': {'age': 37, 'address': 'Tampere', 'occupation': 'Doctor'},
'Evie Wagner': {'age': 51, 'address': 'Lapland', 'occupation': 'Analyst'},
'Annie Reid': {'age': 21, 'address': 'Tampere', 'occupation': 'Doctor'},
'Aeysha Kennedy': {'age': 35, 'address': 'Turku', 'occupation': 'Analyst'},
'Jacqueline Peterson': {'age': 26,
'address': 'Helsinki',
'occupation': 'Researcher'},
'Amirah Wagner': {'age': 46, 'address': 'Oulu', 'occupation': 'Programmer'},
'Beth Peterson': {'age': 34, 'address': 'Oulu', 'occupation': 'Analyst'},
'Gabrielle Munoz': {'age': 47, 'address': 'Helsinki', 'occupation': 'Doctor'},
'Gabrielle Wagner': {'age': 29, 'address': 'Lapland', 'occupation': 'Doctor'},
'Marie Munoz': {'age': 24, 'address': 'Oulu', 'occupation': 'Nurse'},
'Beth Kennedy': {'age': 44, 'address': 'Turku', 'occupation': 'Doctor'},
'Gabrielle Reid': {'age': 20, 'address': 'Turku', 'occupation': 'Nurse'},
'Aimee Reid': {'age': 47, 'address': 'Turku', 'occupation': 'Programmer'},
'Thea Wagner': {'age': 55, 'address': 'Tampere', 'occupation': 'Programmer'},
'Aeysha Reid': {'age': 50, 'address': 'Oulu', 'occupation': 'Analyst'},
'Adele Peterson': {'age': 42,
'address': 'Tampere',
'occupation': 'Programmer'},
'Gabrielle Peterson': {'age': 32,
'address': 'Helsinki',
'occupation': 'Doctor'},
'Amirah Reid': {'age': 36, 'address': 'Helsinki', 'occupation': 'Nurse'},
'Aeysha Munoz': {'age': 42, 'address': 'Oulu', 'occupation': 'Doctor'},
'Annie Wagner': {'age': 50, 'address': 'Oulu', 'occupation': 'Doctor'},
'Samantha Peterson': {'age': 48,
'address': 'Tampere',
'occupation': 'Doctor'},
'Thea Kennedy': {'age': 43, 'address': 'Tampere', 'occupation': 'Researcher'},
'Lola Munoz': {'age': 28, 'address': 'Tampere', 'occupation': 'Researcher'},
'Kimberly Reid': {'age': 44, 'address': 'Lapland', 'occupation': 'Doctor'},
'Adele Kennedy': {'age': 32, 'address': 'Oulu', 'occupation': 'Analyst'},
'Kimberly Kennedy': {'age': 41,
'address': 'Lapland',
'occupation': 'Programmer'},
'Amirah Munoz': {'age': 45, 'address': 'Lapland', 'occupation': 'Doctor'},
'Aimee Munoz': {'age': 24, 'address': 'Turku', 'occupation': 'Programmer'},
'Samantha Reid': {'age': 21, 'address': 'Oulu', 'occupation': 'Researcher'},
'Taylor Munoz': {'age': 53, 'address': 'Oulu', 'occupation': 'Researcher'},
'John Wagner': {'age': 51, 'address': 'Turku', 'occupation': 'Nurse'},
'Aimee Peterson': {'age': 55, 'address': 'Tampere', 'occupation': 'Doctor'},
'Evie Peterson': {'age': 46, 'address': 'Oulu', 'occupation': 'Doctor'},
'Aimee Kennedy': {'age': 29,
'address': 'Lapland',
'occupation': 'Researcher'},
'Adele Reid': {'age': 46, 'address': 'Lapland', 'occupation': 'Researcher'},
'Taylor Peterson': {'age': 55, 'address': 'Helsinki', 'occupation': 'Nurse'},
'Gabrielle Kennedy': {'age': 30,
'address': 'Lapland',
'occupation': 'Analyst'},
'Lola Peterson': {'age': 31, 'address': 'Helsinki', 'occupation': 'Doctor'},
'Thea Reid': {'age': 33, 'address': 'Oulu', 'occupation': 'Nurse'},
'Amirah Kennedy': {'age': 42, 'address': 'Oulu', 'occupation': 'Nurse'},
'Taylor Wagner': {'age': 53, 'address': 'Turku', 'occupation': 'Doctor'},
'Lola Wagner': {'age': 49, 'address': 'Oulu', 'occupation': 'Analyst'},
'Jacqueline Munoz': {'age': 42,
'address': 'Helsinki',
'occupation': 'Programmer'},
'Amirah Peterson': {'age': 22, 'address': 'Lapland', 'occupation': 'Nurse'},
'Kimberly Peterson': {'age': 28, 'address': 'Tampere', 'occupation': 'Nurse'},
'Charlie Reid': {'age': 45,
'address': 'Helsinki',
'occupation': 'Researcher'},
'Beth Munoz': {'age': 21, 'address': 'Lapland', 'occupation': 'Programmer'},
'Mariam Kennedy': {'age': 45, 'address': 'Tampere', 'occupation': 'Analyst'},
'Marie Peterson': {'age': 26, 'address': 'Tampere', 'occupation': 'Analyst'},
'Tiffany Reid': {'age': 25, 'address': 'Oulu', 'occupation': 'Analyst'},
'Kimberly Wagner': {'age': 20,
'address': 'Tampere',
'occupation': 'Programmer'},
'Evie Reid': {'age': 47, 'address': 'Lapland', 'occupation': 'Nurse'},
'Adele Munoz': {'age': 34, 'address': 'Helsinki', 'occupation': 'Researcher'},
'Evie Munoz': {'age': 52, 'address': 'Oulu', 'occupation': 'Researcher'},
'Annie Peterson': {'age': 22, 'address': 'Oulu', 'occupation': 'Programmer'},
'Annie Kennedy': {'age': 47, 'address': 'Lapland', 'occupation': 'Doctor'},
'Thea Munoz': {'age': 42, 'address': 'Helsinki', 'occupation': 'Analyst'},
'Jacqueline Reid': {'age': 45, 'address': 'Oulu', 'occupation': 'Programmer'},
'Mariam Peterson': {'age': 33, 'address': 'Tampere', 'occupation': 'Analyst'},
'Aimee Wagner': {'age': 50,
'address': 'Helsinki',
'occupation': 'Researcher'},
'Kimberly Munoz': {'age': 45, 'address': 'Oulu', 'occupation': 'Researcher'},
'Tiffany Wagner': {'age': 19,
'address': 'Lapland',
'occupation': 'Programmer'},
'Marie Kennedy': {'age': 50, 'address': 'Oulu', 'occupation': 'Researcher'},
'Aeysha Peterson': {'age': 21, 'address': 'Turku', 'occupation': 'Nurse'},
'Taylor Reid': {'age': 20, 'address': 'Tampere', 'occupation': 'Doctor'},
'Jacqueline Wagner': {'age': 40,
'address': 'Tampere',
'occupation': 'Researcher'},
'Charlie Kennedy': {'age': 54, 'address': 'Lapland', 'occupation': 'Doctor'},
'Samantha Wagner': {'age': 38, 'address': 'Lapland', 'occupation': 'Nurse'},
'Charlie Munoz': {'age': 42, 'address': 'Oulu', 'occupation': 'Doctor'},
'Aeysha Wagner': {'age': 23, 'address': 'Helsinki', 'occupation': 'Analyst'},
'Marie Wagner': {'age': 45, 'address': 'Tampere', 'occupation': 'Programmer'}}
header = ['fullname', 'age', 'address', 'occupation']
with open('data.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
writer.writerow(header)
for i in content:
try:
writer.writerow([i, content[i]['age'], content[i]['address'], content[i]['occupation']])
except:
print('unable to write data for ' + i)
csv = pd.read_csv('data.csv')
csv
fullname | age | address | occupation | |
---|---|---|---|---|
0 | John Peterson | 20 | Turku | Doctor |
1 | John Munoz | 26 | Oulu | Nurse |
2 | Samantha Kennedy | 27 | Turku | Researcher |
3 | Tiffany Munoz | 21 | Tampere | Programmer |
4 | Thea Peterson | 55 | Helsinki | Programmer |
... | ... | ... | ... | ... |
78 | Charlie Kennedy | 54 | Lapland | Doctor |
79 | Samantha Wagner | 38 | Lapland | Nurse |
80 | Charlie Munoz | 42 | Oulu | Doctor |
81 | Aeysha Wagner | 23 | Helsinki | Analyst |
82 | Marie Wagner | 45 | Tampere | Programmer |
83 rows × 4 columns
csv['lastname'] = csv['fullname'].apply(lambda x: x.split()[-1])
output = dict()
def count(df):
n_of_people = df['lastname'].value_counts()
for i in n_of_people.index:
output[i] = {'count': int(n_of_people.loc[i])}
for i in ['age', 'address', 'occupation']:
df_pivot = pd.pivot_table(csv, index='lastname', columns=[i], aggfunc='count')
for n in df_pivot.index:
output[n][i] = df_pivot.loc[n].dropna().reset_index(level=0, drop=True).astype('int32').to_dict()
count(csv)
output
{'Peterson': {'count': 18,
'age': {20: 1,
21: 1,
22: 2,
26: 2,
28: 1,
31: 1,
32: 1,
33: 1,
34: 1,
41: 1,
42: 1,
46: 1,
48: 1,
55: 3},
'address': {'Helsinki': 5,
'Lapland': 1,
'Oulu': 3,
'Tampere': 7,
'Turku': 2},
'occupation': {'Analyst': 3,
'Doctor': 7,
'Nurse': 4,
'Programmer': 3,
'Researcher': 1}},
'Wagner': {'count': 17,
'age': {19: 1,
20: 1,
23: 1,
29: 1,
37: 2,
38: 1,
40: 1,
45: 1,
46: 1,
49: 1,
50: 2,
51: 2,
53: 1,
55: 1},
'address': {'Helsinki': 2,
'Lapland': 4,
'Oulu': 4,
'Tampere': 5,
'Turku': 2},
'occupation': {'Analyst': 3,
'Doctor': 4,
'Nurse': 2,
'Programmer': 6,
'Researcher': 2}},
'Reid': {'count': 17,
'age': {20: 2,
21: 2,
25: 1,
32: 1,
33: 1,
36: 1,
42: 1,
44: 1,
45: 2,
46: 2,
47: 2,
50: 1},
'address': {'Helsinki': 3,
'Lapland': 3,
'Oulu': 7,
'Tampere': 2,
'Turku': 2},
'occupation': {'Analyst': 3,
'Doctor': 4,
'Nurse': 4,
'Programmer': 3,
'Researcher': 3}},
'Munoz': {'count': 16,
'age': {21: 2,
24: 2,
26: 1,
28: 1,
34: 1,
42: 4,
45: 2,
47: 1,
52: 1,
53: 1},
'address': {'Helsinki': 4,
'Lapland': 2,
'Oulu': 7,
'Tampere': 2,
'Turku': 1},
'occupation': {'Analyst': 1,
'Doctor': 4,
'Nurse': 2,
'Programmer': 4,
'Researcher': 5}},
'Kennedy': {'count': 15,
'age': {27: 1,
29: 1,
30: 1,
32: 2,
35: 1,
37: 1,
41: 1,
42: 1,
43: 1,
44: 1,
45: 1,
47: 1,
50: 1,
54: 1},
'address': {'Lapland': 6, 'Oulu': 3, 'Tampere': 3, 'Turku': 3},
'occupation': {'Analyst': 4,
'Doctor': 4,
'Nurse': 1,
'Programmer': 2,
'Researcher': 4}}}
print(json.dumps(output, indent=2))
{
"Peterson": {
"count": 18,
"age": {
"20": 1,
"21": 1,
"22": 2,
"26": 2,
"28": 1,
"31": 1,
"32": 1,
"33": 1,
"34": 1,
"41": 1,
"42": 1,
"46": 1,
"48": 1,
"55": 3
},
"address": {
"Helsinki": 5,
"Lapland": 1,
"Oulu": 3,
"Tampere": 7,
"Turku": 2
},
"occupation": {
"Analyst": 3,
"Doctor": 7,
"Nurse": 4,
"Programmer": 3,
"Researcher": 1
}
},
"Wagner": {
"count": 17,
"age": {
"19": 1,
"20": 1,
"23": 1,
"29": 1,
"37": 2,
"38": 1,
"40": 1,
"45": 1,
"46": 1,
"49": 1,
"50": 2,
"51": 2,
"53": 1,
"55": 1
},
"address": {
"Helsinki": 2,
"Lapland": 4,
"Oulu": 4,
"Tampere": 5,
"Turku": 2
},
"occupation": {
"Analyst": 3,
"Doctor": 4,
"Nurse": 2,
"Programmer": 6,
"Researcher": 2
}
},
"Reid": {
"count": 17,
"age": {
"20": 2,
"21": 2,
"25": 1,
"32": 1,
"33": 1,
"36": 1,
"42": 1,
"44": 1,
"45": 2,
"46": 2,
"47": 2,
"50": 1
},
"address": {
"Helsinki": 3,
"Lapland": 3,
"Oulu": 7,
"Tampere": 2,
"Turku": 2
},
"occupation": {
"Analyst": 3,
"Doctor": 4,
"Nurse": 4,
"Programmer": 3,
"Researcher": 3
}
},
"Munoz": {
"count": 16,
"age": {
"21": 2,
"24": 2,
"26": 1,
"28": 1,
"34": 1,
"42": 4,
"45": 2,
"47": 1,
"52": 1,
"53": 1
},
"address": {
"Helsinki": 4,
"Lapland": 2,
"Oulu": 7,
"Tampere": 2,
"Turku": 1
},
"occupation": {
"Analyst": 1,
"Doctor": 4,
"Nurse": 2,
"Programmer": 4,
"Researcher": 5
}
},
"Kennedy": {
"count": 15,
"age": {
"27": 1,
"29": 1,
"30": 1,
"32": 2,
"35": 1,
"37": 1,
"41": 1,
"42": 1,
"43": 1,
"44": 1,
"45": 1,
"47": 1,
"50": 1,
"54": 1
},
"address": {
"Lapland": 6,
"Oulu": 3,
"Tampere": 3,
"Turku": 3
},
"occupation": {
"Analyst": 4,
"Doctor": 4,
"Nurse": 1,
"Programmer": 2,
"Researcher": 4
}
}
}