Class activity solutions

Author

Ciaran Evans

library(tidyverse)
library(googlesheets4)
gs4_deauth()
gdp_data <- read_sheet("https://docs.google.com/spreadsheets/d/1RctTQmKB0hzbm1E8rGcufYdMshRdhmYdeL29nXqmvsc/pub?gid=0")
import pandas as pd

gdp_data = r.gdp_data

(gdp_data.rename(columns={gdp_data.columns[0]: 'Country'})
         .melt(id_vars = 'Country',
               value_name = 'gdp',
               var_name = 'year')
         .dropna())
         Country  year           gdp
4        Algeria  1960   1280.384828
10     Argentina  1960   5251.876754
13     Australia  1960   9407.685082
14       Austria  1960   7434.183717
16       Bahamas  1960  11926.460965
...          ...   ...           ...
14271  Venezuela  2011   5671.912202
14275    Vietnam  2011    757.400929
14279      Yemen  2011    528.398673
14281     Zambia  2011    443.920631
14282   Zimbabwe  2011    347.745661

[7988 rows x 3 columns]
who2 = r.who2

new_who = who2.melt(id_vars = ['country', 'year'], value_name = 'count', var_name = 'group')
new_who[['diagnosis', 'sex', 'age_group']] = new_who['group'].str.split('_', expand=True)
new_who.drop('group', axis = 'columns').dropna()
                          country    year   count diagnosis sex age_group
17                    Afghanistan  1997.0     0.0        sp   m       014
18                    Afghanistan  1998.0    30.0        sp   m       014
19                    Afghanistan  1999.0     8.0        sp   m       014
20                    Afghanistan  2000.0    52.0        sp   m       014
21                    Afghanistan  2001.0   129.0        sp   m       014
...                           ...     ...     ...       ...  ..       ...
405269                   Viet Nam  2013.0  3110.0       rel   f        65
405303  Wallis and Futuna Islands  2013.0     2.0       rel   f        65
405371                      Yemen  2013.0   360.0       rel   f        65
405405                     Zambia  2013.0   669.0       rel   f        65
405439                   Zimbabwe  2013.0   725.0       rel   f        65

[76046 rows x 6 columns]
household = pd.DataFrame({
  'family': [1, 2, 3, 4, 5],
  'dob_child1': ["1998-11-26", "1996-06-22", "2002-07-11", "2004-10-10", "2000-12-05"],
  'dob_child2': ["2000-01-29", pd.NA, "2004-04-05", "2009-08-27", "2005-02-28"],
  'name_child1': ["Susan", "Mark", "Sam", "Craig", "Parker"],
  'name_child2': ["Jose", pd.NA, "Seth", "Khai", "Gracie"]
})

new_household = household.melt(id_vars = 'family')
new_household[['var', 'child']] = new_household['variable'].str.split('_', expand=True)
(new_household.drop('variable', axis='columns')
              .pivot(index = ['family', 'child'], 
                     columns = 'var', values = 'value')
              .dropna().reset_index())
var  family   child         dob    name
0         1  child1  1998-11-26   Susan
1         1  child2  2000-01-29    Jose
2         2  child1  1996-06-22    Mark
3         3  child1  2002-07-11     Sam
4         3  child2  2004-04-05    Seth
5         4  child1  2004-10-10   Craig
6         4  child2  2009-08-27    Khai
7         5  child1  2000-12-05  Parker
8         5  child2  2005-02-28  Gracie