Class activity solutions

Author

Ciaran Evans

library(gapminder)
import pandas as pd
import numpy as np

gapminder = r.gapminder
gapminder.query('continent == "Asia" & year == 2002')
                 country continent  year  lifeExp         pop     gdpPercap
10           Afghanistan      Asia  2002   42.129    25268405    726.734055
94               Bahrain      Asia  2002   74.795      656397  23403.559270
106           Bangladesh      Asia  2002   62.013   135656790   1136.390430
226             Cambodia      Asia  2002   56.752    12926707    896.226015
298                China      Asia  2002   72.028  1280400000   3119.280896
670     Hong Kong, China      Asia  2002   81.495     6762476  30209.015160
706                India      Asia  2002   62.879  1034172547   1746.769454
718            Indonesia      Asia  2002   68.588   211060000   2873.912870
730                 Iran      Asia  2002   69.451    66907826   9240.761975
742                 Iraq      Asia  2002   57.046    24001816   4390.717312
766               Israel      Asia  2002   79.696     6029529  21905.595140
802                Japan      Asia  2002   82.000   127065841  28604.591900
814               Jordan      Asia  2002   71.263     5307470   3844.917194
838     Korea, Dem. Rep.      Asia  2002   66.662    22215365   1646.758151
850          Korea, Rep.      Asia  2002   77.045    47969150  19233.988180
862               Kuwait      Asia  2002   76.904     2111561  35110.105660
874              Lebanon      Asia  2002   71.028     3677780   9313.938830
946             Malaysia      Asia  2002   73.044    22662365  10206.977940
1006            Mongolia      Asia  2002   65.033     2674234   2140.739323
1054             Myanmar      Asia  2002   59.908    45598081    611.000000
1078               Nepal      Asia  2002   61.340    25873917   1057.206311
1162                Oman      Asia  2002   74.193     2713462  19774.836870
1174            Pakistan      Asia  2002   63.610   153403524   2092.712441
1222         Philippines      Asia  2002   70.303    82995088   2650.921068
1318        Saudi Arabia      Asia  2002   71.626    24501530  19014.541180
1366           Singapore      Asia  2002   78.770     4197776  36023.105400
1438           Sri Lanka      Asia  2002   70.815    19576783   3015.378833
1498               Syria      Asia  2002   73.053    17155814   4090.925331
1510              Taiwan      Asia  2002   76.990    22454239  23235.423290
1534            Thailand      Asia  2002   68.564    62806748   5913.187529
1654             Vietnam      Asia  2002   73.017    80908147   1764.456677
1666  West Bank and Gaza      Asia  2002   72.370     3389578   4515.487575
1678         Yemen, Rep.      Asia  2002   60.308    18701257   2234.820827
(gapminder.query('year == 2002')
          .groupby(by = 'continent')
          .agg(num_countries = ('country', 'count'))
)
           num_countries
continent               
Africa                52
Americas              25
Asia                  33
Europe                30
Oceania                2
gapminder.assign(log_gdp = np.log(gapminder['gdpPercap']))
          country continent  year  lifeExp       pop   gdpPercap   log_gdp
0     Afghanistan      Asia  1952   28.801   8425333  779.445314  6.658583
1     Afghanistan      Asia  1957   30.332   9240934  820.853030  6.710344
2     Afghanistan      Asia  1962   31.997  10267083  853.100710  6.748878
3     Afghanistan      Asia  1967   34.020  11537966  836.197138  6.728864
4     Afghanistan      Asia  1972   36.088  13079460  739.981106  6.606625
...           ...       ...   ...      ...       ...         ...       ...
1699     Zimbabwe    Africa  1987   62.351   9216418  706.157306  6.559838
1700     Zimbabwe    Africa  1992   60.377  10704340  693.420786  6.541637
1701     Zimbabwe    Africa  1997   46.809  11404948  792.449960  6.675129
1702     Zimbabwe    Africa  2002   39.989  11926563  672.038623  6.510316
1703     Zimbabwe    Africa  2007   43.487  12311143  469.709298  6.152114

[1704 rows x 7 columns]
  1. Here are two ways to create the table. The first way uses assign to first create a new column:
(gapminder.assign(log_gdp = np.log(gapminder['gdpPercap']))
  .query('year == 2002')
  .groupby(by = 'continent')
  .agg(num_countries = ('country', 'count'), mean_log_gdp = ('log_gdp', 'mean'))
)
           num_countries  mean_log_gdp
continent                             
Africa                52      7.367332
Americas              25      8.847365
Asia                  33      8.542181
Europe                30      9.808402
Oceania                2     10.191543

The second way uses an anonymous function inside agg:

(gapminder.query('year == 2002')
          .groupby(by = 'continent')
          .agg(num_countries = ('country', 'count'), 
               mean_log_gdp = ('gdpPercap', lambda x: np.mean(np.log(x))))
)
           num_countries  mean_log_gdp
continent                             
Africa                52      7.367332
Americas              25      8.847365
Asia                  33      8.542181
Europe                30      9.808402
Oceania                2     10.191543