library(tidyverse)
library(googlesheets4)
gs4_deauth()
<- read_sheet("https://docs.google.com/spreadsheets/d/1RctTQmKB0hzbm1E8rGcufYdMshRdhmYdeL29nXqmvsc/pub?gid=0")
gdp_data = read_sheet("https://docs.google.com/spreadsheets/d/1hDinTIRHQIaZg1RUn6Z_6mo12PtKwEPFIz_mJVF6P5I/pub?gid=0") lit_data
Class activity solutions
<- gdp_data |>
gdp_data rename(country = starts_with("Income")) |>
pivot_longer(-country, names_to = "year", values_to = "gdp",
values_drop_na = TRUE)
<- lit_data |>
lit_data rename(country = starts_with("Adult")) |>
pivot_longer(-country, names_to = "year", values_to = "literacy",
values_drop_na = TRUE)
<- lit_data |>
lit_gdp inner_join(gdp_data, join_by(country, year))
In R:
<- read.csv("https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/air_quality_long.csv")
air_quality <- read.csv("https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/air_quality_parameters.csv")
air_parameters
<- air_quality |>
air_quality left_join(air_parameters, join_by(parameter == id))
head(air_quality)
city country date.utc location parameter value unit
1 Antwerpen BE 2019-06-18 06:00:00+00:00 BETR801 pm25 18.0 µg/m³
2 Antwerpen BE 2019-06-17 08:00:00+00:00 BETR801 pm25 6.5 µg/m³
3 Antwerpen BE 2019-06-17 07:00:00+00:00 BETR801 pm25 18.5 µg/m³
4 Antwerpen BE 2019-06-17 06:00:00+00:00 BETR801 pm25 16.0 µg/m³
5 Antwerpen BE 2019-06-17 05:00:00+00:00 BETR801 pm25 7.5 µg/m³
6 Antwerpen BE 2019-06-17 04:00:00+00:00 BETR801 pm25 7.5 µg/m³
description name
1 Particulate matter less than 2.5 micrometers in diameter PM2.5
2 Particulate matter less than 2.5 micrometers in diameter PM2.5
3 Particulate matter less than 2.5 micrometers in diameter PM2.5
4 Particulate matter less than 2.5 micrometers in diameter PM2.5
5 Particulate matter less than 2.5 micrometers in diameter PM2.5
6 Particulate matter less than 2.5 micrometers in diameter PM2.5
In Python:
import pandas as pd
= pd.read_csv("https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/air_quality_long.csv")
air_quality = pd.read_csv("https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/air_quality_parameters.csv")
air_parameters
= pd.merge(air_quality, air_parameters, how = 'left',
air_quality = 'parameter', right_on = 'id')
left_on air_quality.head()
city country ... description name
0 Antwerpen BE ... Particulate matter less than 2.5 micrometers i... PM2.5
1 Antwerpen BE ... Particulate matter less than 2.5 micrometers i... PM2.5
2 Antwerpen BE ... Particulate matter less than 2.5 micrometers i... PM2.5
3 Antwerpen BE ... Particulate matter less than 2.5 micrometers i... PM2.5
4 Antwerpen BE ... Particulate matter less than 2.5 micrometers i... PM2.5
[5 rows x 10 columns]
library(nycflights13)
|>
flights left_join(planes, join_by(tailnum)) |>
head()
# A tibble: 6 × 27
year.x month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
<int> <int> <int> <int> <int> <dbl> <int> <int>
1 2013 1 1 517 515 2 830 819
2 2013 1 1 533 529 4 850 830
3 2013 1 1 542 540 2 923 850
4 2013 1 1 544 545 -1 1004 1022
5 2013 1 1 554 600 -6 812 837
6 2013 1 1 554 558 -4 740 728
# ℹ 19 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
# tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
# hour <dbl>, minute <dbl>, time_hour <dttm>, year.y <int>, type <chr>,
# manufacturer <chr>, model <chr>, engines <int>, seats <int>, speed <int>,
# engine <chr>
|>
flights left_join(planes, join_by(tailnum)) |>
rename(manufacture_date = year.y) |>
select(tailnum, manufacture_date) |>
distinct() |>
slice_min(manufacture_date, n=3)
# A tibble: 3 × 2
tailnum manufacture_date
<chr> <int>
1 N381AA 1956
2 N201AA 1959
3 N567AA 1959
The oldest plane has tail number N381AA.