library(tidyverse)
library(googlesheets4)
gs4_deauth()
<- read_sheet("https://docs.google.com/spreadsheets/d/1RctTQmKB0hzbm1E8rGcufYdMshRdhmYdeL29nXqmvsc/pub?gid=0") gdp_data
Class activity solutions
import pandas as pd
= r.gdp_data
gdp_data
={gdp_data.columns[0]: 'Country'})
(gdp_data.rename(columns= 'Country',
.melt(id_vars = 'gdp',
value_name = 'year')
var_name .dropna())
Country year gdp
4 Algeria 1960 1280.384828
10 Argentina 1960 5251.876754
13 Australia 1960 9407.685082
14 Austria 1960 7434.183717
16 Bahamas 1960 11926.460965
... ... ... ...
14271 Venezuela 2011 5671.912202
14275 Vietnam 2011 757.400929
14279 Yemen 2011 528.398673
14281 Zambia 2011 443.920631
14282 Zimbabwe 2011 347.745661
[7988 rows x 3 columns]
= r.who2
who2
= who2.melt(id_vars = ['country', 'year'], value_name = 'count', var_name = 'group')
new_who 'diagnosis', 'sex', 'age_group']] = new_who['group'].str.split('_', expand=True)
new_who[['group', axis = 'columns').dropna() new_who.drop(
country year count diagnosis sex age_group
17 Afghanistan 1997.0 0.0 sp m 014
18 Afghanistan 1998.0 30.0 sp m 014
19 Afghanistan 1999.0 8.0 sp m 014
20 Afghanistan 2000.0 52.0 sp m 014
21 Afghanistan 2001.0 129.0 sp m 014
... ... ... ... ... .. ...
405269 Viet Nam 2013.0 3110.0 rel f 65
405303 Wallis and Futuna Islands 2013.0 2.0 rel f 65
405371 Yemen 2013.0 360.0 rel f 65
405405 Zambia 2013.0 669.0 rel f 65
405439 Zimbabwe 2013.0 725.0 rel f 65
[76046 rows x 6 columns]
= pd.DataFrame({
household 'family': [1, 2, 3, 4, 5],
'dob_child1': ["1998-11-26", "1996-06-22", "2002-07-11", "2004-10-10", "2000-12-05"],
'dob_child2': ["2000-01-29", pd.NA, "2004-04-05", "2009-08-27", "2005-02-28"],
'name_child1': ["Susan", "Mark", "Sam", "Craig", "Parker"],
'name_child2': ["Jose", pd.NA, "Seth", "Khai", "Gracie"]
})
= household.melt(id_vars = 'family')
new_household 'var', 'child']] = new_household['variable'].str.split('_', expand=True)
new_household[['variable', axis='columns')
(new_household.drop(= ['family', 'child'],
.pivot(index = 'var', values = 'value')
columns .dropna().reset_index())
var family child dob name
0 1 child1 1998-11-26 Susan
1 1 child2 2000-01-29 Jose
2 2 child1 1996-06-22 Mark
3 3 child1 2002-07-11 Sam
4 3 child2 2004-04-05 Seth
5 4 child1 2004-10-10 Craig
6 4 child2 2009-08-27 Khai
7 5 child1 2000-12-05 Parker
8 5 child2 2005-02-28 Gracie