library(gapminder)
Class activity solutions
import pandas as pd
import numpy as np
= r.gapminder gapminder
'continent == "Asia" & year == 2002') gapminder.query(
country continent year lifeExp pop gdpPercap
10 Afghanistan Asia 2002 42.129 25268405 726.734055
94 Bahrain Asia 2002 74.795 656397 23403.559270
106 Bangladesh Asia 2002 62.013 135656790 1136.390430
226 Cambodia Asia 2002 56.752 12926707 896.226015
298 China Asia 2002 72.028 1280400000 3119.280896
670 Hong Kong, China Asia 2002 81.495 6762476 30209.015160
706 India Asia 2002 62.879 1034172547 1746.769454
718 Indonesia Asia 2002 68.588 211060000 2873.912870
730 Iran Asia 2002 69.451 66907826 9240.761975
742 Iraq Asia 2002 57.046 24001816 4390.717312
766 Israel Asia 2002 79.696 6029529 21905.595140
802 Japan Asia 2002 82.000 127065841 28604.591900
814 Jordan Asia 2002 71.263 5307470 3844.917194
838 Korea, Dem. Rep. Asia 2002 66.662 22215365 1646.758151
850 Korea, Rep. Asia 2002 77.045 47969150 19233.988180
862 Kuwait Asia 2002 76.904 2111561 35110.105660
874 Lebanon Asia 2002 71.028 3677780 9313.938830
946 Malaysia Asia 2002 73.044 22662365 10206.977940
1006 Mongolia Asia 2002 65.033 2674234 2140.739323
1054 Myanmar Asia 2002 59.908 45598081 611.000000
1078 Nepal Asia 2002 61.340 25873917 1057.206311
1162 Oman Asia 2002 74.193 2713462 19774.836870
1174 Pakistan Asia 2002 63.610 153403524 2092.712441
1222 Philippines Asia 2002 70.303 82995088 2650.921068
1318 Saudi Arabia Asia 2002 71.626 24501530 19014.541180
1366 Singapore Asia 2002 78.770 4197776 36023.105400
1438 Sri Lanka Asia 2002 70.815 19576783 3015.378833
1498 Syria Asia 2002 73.053 17155814 4090.925331
1510 Taiwan Asia 2002 76.990 22454239 23235.423290
1534 Thailand Asia 2002 68.564 62806748 5913.187529
1654 Vietnam Asia 2002 73.017 80908147 1764.456677
1666 West Bank and Gaza Asia 2002 72.370 3389578 4515.487575
1678 Yemen, Rep. Asia 2002 60.308 18701257 2234.820827
'year == 2002')
(gapminder.query(= 'continent')
.groupby(by = ('country', 'count'))
.agg(num_countries )
num_countries
continent
Africa 52
Americas 25
Asia 33
Europe 30
Oceania 2
= np.log(gapminder['gdpPercap'])) gapminder.assign(log_gdp
country continent year lifeExp pop gdpPercap log_gdp
0 Afghanistan Asia 1952 28.801 8425333 779.445314 6.658583
1 Afghanistan Asia 1957 30.332 9240934 820.853030 6.710344
2 Afghanistan Asia 1962 31.997 10267083 853.100710 6.748878
3 Afghanistan Asia 1967 34.020 11537966 836.197138 6.728864
4 Afghanistan Asia 1972 36.088 13079460 739.981106 6.606625
... ... ... ... ... ... ... ...
1699 Zimbabwe Africa 1987 62.351 9216418 706.157306 6.559838
1700 Zimbabwe Africa 1992 60.377 10704340 693.420786 6.541637
1701 Zimbabwe Africa 1997 46.809 11404948 792.449960 6.675129
1702 Zimbabwe Africa 2002 39.989 11926563 672.038623 6.510316
1703 Zimbabwe Africa 2007 43.487 12311143 469.709298 6.152114
[1704 rows x 7 columns]
- Here are two ways to create the table. The first way uses
assign
to first create a new column:
= np.log(gapminder['gdpPercap']))
(gapminder.assign(log_gdp 'year == 2002')
.query(= 'continent')
.groupby(by = ('country', 'count'), mean_log_gdp = ('log_gdp', 'mean'))
.agg(num_countries )
num_countries mean_log_gdp
continent
Africa 52 7.367332
Americas 25 8.847365
Asia 33 8.542181
Europe 30 9.808402
Oceania 2 10.191543
The second way uses an anonymous function inside agg
:
'year == 2002')
(gapminder.query(= 'continent')
.groupby(by = ('country', 'count'),
.agg(num_countries = ('gdpPercap', lambda x: np.mean(np.log(x))))
mean_log_gdp )
num_countries mean_log_gdp
continent
Africa 52 7.367332
Americas 25 8.847365
Asia 33 8.542181
Europe 30 9.808402
Oceania 2 10.191543