# load libraries
library(sf)
library(dplyr)
# Import data
german_districts <-
sf::read_sf("./data/VG250_KRS.shp") |>
sf::st_transform(3035) |>
dplyr::select(AGS, GEN)
attributes_districts <-
readr::read_delim("./data/attributes_districts.csv", delim = ";")
# Join data and transform
german_districts_enhanced <-
german_districts |>
dplyr::left_join(attributes_districts, by = "AGS")
# Check
sf::st_crs(german_districts_enhanced)
head(german_districts_enhanced, 2)Exercise 4_2: Subsetting and Linking
Introduction to Geospatial Techniques for Social Scientists in R
Stefan Jünger & Dennis Abel
Exercises
Import the ESRI shapefile of German districts and the district attribute table. Join the two data frames, transform the CRS to EPSG:3035, and check your output.
You can use base R’s merge() function or dplyr’s dplyr::left_join(). Plus, you need the function sf::st_transform() to change the CRS.
We want a first descriptive visual of the distribution of charging stations in Cologne (or any other district of your choice) and the surrounding districts. Filter the district of Cologne (AGS == "05315") and find the surrounding districts. Calculate the number of chargers per district (charger_count) and the number of chargers per 1,000 inhabitants in each district (charger_dens). Plot the two columns for Cologne and its surrounding districts.
You can use the dplyr function dplyr::bind_rows() to combine the two spatial objects, “Cologne” and “Cologne Surroundings”.
To wrap up, let’s do some stuff with other data formats. Sample 50 points that fall within the boundaries of the city of Cologne.
Sampling is straightforward: Apply the sf::st_sample to the Cologne vector data, but make sure to apply the sf::st_as_sf() function afterward to receive a full-fledged data table (with a geometry column only).
Create a new raster layer comprising Cologne’s immigrant rates based on the raster layers from the previous exercises. Extract the immigrant rate value at each position of the previously sampled points as a vector. What is your observation?
You need the citizens_cologne.tif and inhabitants_cologne.tif files in the ./data folder. Due to severe data protection measures, the German Census 2012 data could be quite sparse.
Use an adequate method of raster extraction to gather information in the geographic surroundings of a point. What is your observation now?
Assume that people move in a 1,000-meter radius around their location. Thus, extracting information on buffers of 1,000 meters around the points might be interesting using the option sf::st_buffer() function. In that case, you should also set a descriptive statistics function, e.g., with the option fun = mean and its helpful companion option to consider missing values na.rm = TRUE.
Solutions
Warning: Paket 'sf' wurde unter R Version 4.4.3 erstellt
Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
Warning: Paket 'dplyr' wurde unter R Version 4.4.3 erstellt
Attache Paket: 'dplyr'
Die folgenden Objekte sind maskiert von 'package:stats':
filter, lag
Die folgenden Objekte sind maskiert von 'package:base':
intersect, setdiff, setequal, union
Rows: 400 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (1): AGS
dbl (6): car_density, ecar_share, publictransport_meandist, population, gree...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Coordinate Reference System:
User input: EPSG:3035
wkt:
PROJCRS["ETRS89-extended / LAEA Europe",
BASEGEOGCRS["ETRS89",
ENSEMBLE["European Terrestrial Reference System 1989 ensemble",
MEMBER["European Terrestrial Reference Frame 1989"],
MEMBER["European Terrestrial Reference Frame 1990"],
MEMBER["European Terrestrial Reference Frame 1991"],
MEMBER["European Terrestrial Reference Frame 1992"],
MEMBER["European Terrestrial Reference Frame 1993"],
MEMBER["European Terrestrial Reference Frame 1994"],
MEMBER["European Terrestrial Reference Frame 1996"],
MEMBER["European Terrestrial Reference Frame 1997"],
MEMBER["European Terrestrial Reference Frame 2000"],
MEMBER["European Terrestrial Reference Frame 2005"],
MEMBER["European Terrestrial Reference Frame 2014"],
MEMBER["European Terrestrial Reference Frame 2020"],
ELLIPSOID["GRS 1980",6378137,298.257222101,
LENGTHUNIT["metre",1]],
ENSEMBLEACCURACY[0.1]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4258]],
CONVERSION["Europe Equal Area 2001",
METHOD["Lambert Azimuthal Equal Area",
ID["EPSG",9820]],
PARAMETER["Latitude of natural origin",52,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8801]],
PARAMETER["Longitude of natural origin",10,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8802]],
PARAMETER["False easting",4321000,
LENGTHUNIT["metre",1],
ID["EPSG",8806]],
PARAMETER["False northing",3210000,
LENGTHUNIT["metre",1],
ID["EPSG",8807]]],
CS[Cartesian,2],
AXIS["northing (Y)",north,
ORDER[1],
LENGTHUNIT["metre",1]],
AXIS["easting (X)",east,
ORDER[2],
LENGTHUNIT["metre",1]],
USAGE[
SCOPE["Statistical analysis."],
AREA["Europe - European Union (EU) countries and candidates. Europe - onshore and offshore: Albania; Andorra; Austria; Belgium; Bosnia and Herzegovina; Bulgaria; Croatia; Cyprus; Czechia; Denmark; Estonia; Faroe Islands; Finland; France; Germany; Gibraltar; Greece; Hungary; Iceland; Ireland; Italy; Kosovo; Latvia; Liechtenstein; Lithuania; Luxembourg; Malta; Monaco; Montenegro; Netherlands; North Macedonia; Norway including Svalbard and Jan Mayen; Poland; Portugal including Madeira and Azores; Romania; San Marino; Serbia; Slovakia; Slovenia; Spain including Canary Islands; Sweden; Switzerland; Türkiye (Turkey); United Kingdom (UK) including Channel Islands and Isle of Man; Vatican City State."],
BBOX[24.6,-35.58,84.73,44.83]],
ID["EPSG",3035]]
Simple feature collection with 2 features and 8 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 4279627 ymin: 3460480 xmax: 4335232 ymax: 3524426
Projected CRS: ETRS89-extended / LAEA Europe
# A tibble: 2 × 9
AGS GEN geometry car_density ecar_share
<chr> <chr> <MULTIPOLYGON [m]> <dbl> <dbl>
1 01001 Flensburg (((4283235 3524256, 4283267 3524100, 4… 4915 2.4
2 01002 Kiel (((4331981 3480575, 4332008 3480496, 4… 4528 2.2
# ℹ 4 more variables: publictransport_meandist <dbl>, population <dbl>,
# green_voteshare <dbl>, afd_voteshare <dbl>
# filter Cologne
cologne <-
german_districts |>
dplyr::filter(AGS == "05315")
# filter surrounding districts, append with Cologne data and select the charger column
cologne_sur <-
german_districts |>
dplyr::filter(lengths(sf::st_touches(german_districts, cologne)) > 0) |>
dplyr::bind_rows(cologne)
# one pipe to rule them all
cologne_sur_enhanced <-
readr::read_delim("./data/charging_points_ger.csv", delim =";") |>
dplyr::filter(!is.na(longitude) & !is.na(latitude)) |>
sf::st_as_sf(coords = c("longitude", "latitude"), crs = 4326) |>
sf::st_transform(crs = 3035) |>
sf::st_join(cologne_sur, join = sf::st_within) |>
dplyr::group_by(AGS) |>
dplyr::summarise(charger_count = dplyr::n()) |>
sf::st_drop_geometry() |>
dplyr::left_join(cologne_sur, y = _, by = "AGS") |>
dplyr::left_join(
attributes_districts |>
dplyr::select(AGS, population),
by = "AGS"
) |>
dplyr::mutate(charger_dens = (charger_count*1000) / population)
# plot
cologne_sur_enhanced |>
dplyr::select(charger_count, charger_dens) |>
plot()Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 60560 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (3): operator, federal_state, type
dbl (4): latitude, longitude, power_kw, num_plugs
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(terra)
# load all layers
citizens_cologne <- terra::rast("./data/citizens_cologne.tif")
inhabitants_cologne <- terra::rast("./data/inhabitants_cologne.tif")
# define missing values
citizens_cologne[citizens_cologne == -9] <- NA
inhabitants_cologne[inhabitants_cologne == -9] <- NA
# create immigrants layer
immigrants_cologne <- inhabitants_cologne - citizens_cologne
# something's off - let's get rid of the wierd negative numbers
immigrants_cologne[immigrants_cologne < 0] <- NA
immigrant_rates <-
immigrants_cologne * 100 / inhabitants_cologne
immigrant_rates_at_point <- terra::extract(immigrant_rates, cologne_50_points)
immigrant_rates_at_point
# There are a lot of missing values.Warning: Paket 'terra' wurde unter R Version 4.4.3 erstellt
terra 1.9.11
ID cat_0
1 1 57.142857
2 2 20.869565
3 3 43.589744
4 4 NA
5 5 NA
6 6 NA
7 7 NA
8 8 NA
9 9 20.512821
10 10 NA
11 11 48.666667
12 12 NA
13 13 NA
14 14 NA
15 15 63.513514
16 16 15.384615
17 17 NA
18 18 NA
19 19 NA
20 20 NA
21 21 21.072797
22 22 NA
23 23 NA
24 24 47.619048
25 25 NA
26 26 14.545455
27 27 NA
28 28 NA
29 29 NA
30 30 NA
31 31 NA
32 32 18.439716
33 33 23.529412
34 34 NA
35 35 20.754717
36 36 NA
37 37 12.068966
38 38 NA
39 39 NA
40 40 9.756098
41 41 19.444444
42 42 51.020408
43 43 NA
44 44 NA
45 45 NA
46 46 NA
47 47 NA
48 48 22.222222
49 49 NA
50 50 NA
ID cat_0
1 1 23.89633
2 2 26.99151
3 3 24.74833
4 4 23.93386
5 5 29.13051
6 6 17.04614
7 7 20.35059
8 8 34.61046
9 9 28.30252
10 10 20.68858
11 11 26.36429
12 12 22.10383
13 13 33.33333
14 14 29.70576
15 15 31.79816
16 16 29.09181
17 17 28.00210
18 18 12.51894
19 19 22.56886
20 20 23.98581
21 21 22.86830
22 22 32.56351
23 23 23.39827
24 24 26.43435
25 25 22.81005
26 26 22.45830
27 27 31.10686
28 28 25.40024
29 29 19.27193
30 30 29.33389
31 31 0.00000
32 32 22.85417
33 33 24.00638
34 34 23.62727
35 35 31.72377
36 36 19.91084
37 37 24.96999
38 38 30.36571
39 39 28.74568
40 40 22.41471
41 41 27.09108
42 42 42.21840
43 43 28.23864
44 44 27.05159
45 45 25.47683
46 46 34.39369
47 47 28.31016
48 48 24.73430
49 49 38.12571
50 50 35.44514