Exercise 7: Neighborhood Matrices and Autocorrelation

Introduction to Geospatial Techniques for Social Scientists in R

Author

Stefan Jünger, Anne Stroppe & Dennis Abel

Thus far, we have only used Queen neighborhood matrices with our data. Let’s use this exercise to try out different variations. First of all, run the code below to compile the data that were also used in the lecture.

# Load relevant packages
library(dplyr)
library(ggplot2)
library(sf)
library(terra)
library(tmap)

# Load voting districts and results
voting_districts <-
  sf::st_read("./data/Stimmbezirk.shp") |> 
  dplyr::mutate(
    district_id = as.numeric(nummer)
  ) |> 
  dplyr::select(district_id, Shape_Area, geometry) 

btw21_votes <-
  glue::glue(
    "https://www.stadt-koeln.de/wahlen/bundestagswahl/09-2021/praesentation/\\
    Open-Data-Bundestagswahl476.csv"
  ) |> 
  readr::read_csv2() |>
  dplyr::mutate(
    district_id = as.numeric(`gebiet-nr`),
    valid_votes = `F`,
    cdu_share = (F1 / valid_votes) * 100,
    spd_share = (F2 / valid_votes) * 100,
    fdp_share = (F3 / valid_votes) * 100,
    afd_share = (F4 / valid_votes) * 100,
    greens_share = (F5 / valid_votes) * 100,
    linke_share = (F6 / valid_votes) * 100,
    .keep = "none"
  )

btw21_votes <- btw21_votes |>
  dplyr::rowwise() |>
  dplyr::mutate(
    highest_vote = factor(
      sub(
        "_share$",
        "",
        names(dplyr::pick(dplyr::ends_with("_share")))[
          order(
            dplyr::c_across(dplyr::ends_with("_share")),
            decreasing = TRUE
          )[1]
        ]
      )
    ),
    second_highest_vote = factor(
      sub(
        "_share$",
        "",
        names(dplyr::pick(dplyr::ends_with("_share")))[
          order(
            dplyr::c_across(dplyr::ends_with("_share")),
            decreasing = TRUE
          )[2]
        ]
      )
    )
  ) |>
  dplyr::ungroup()

election_results <-
  dplyr::left_join(
    voting_districts,
    btw21_votes,
    by = "district_id"
  )

# Load additional raster data
immigrants_cologne <- terra::rast("./data/immigrants_cologne.tif")
inhabitants_cologne <- terra::rast("./data/inhabitants_cologne.tif")

immigrants_cologne  <- terra::subst(immigrants_cologne,  from = -9, to = NA)
inhabitants_cologne <- terra::subst(inhabitants_cologne, from = -9, to = NA)

immigrant_share_cologne <- (immigrants_cologne / inhabitants_cologne)*100

age_rast <- terra::rast("./data/census22_age_avg.tif")
rent_rast <- terra::rast("./data/census22_rent_avg.tif")

# Extract
election_results <-
  election_results |>
  dplyr::mutate(
    immigrant_share = 
      exactextractr::exact_extract(
        immigrant_share_cologne, election_results, 'mean', progress = FALSE
      ),
    inhabitants = 
      exactextractr::exact_extract(
        inhabitants_cologne, election_results, 'mean', progress = FALSE
      ),
    age_avg = 
      exactextractr::exact_extract(
        age_rast, election_results, 'mean', progress = FALSE
      ),
    rent_avg = 
      exactextractr::exact_extract(
        rent_rast, election_results, 'mean', progress = FALSE
      )
  )
Reading layer `Stimmbezirk' from data source 
  `C:\Users\abelds\Desktop\gesis-workshop-geospatial-techniques-R-2026\data\Stimmbezirk.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 543 features and 14 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 343914.7 ymin: 5632759 xmax: 370674.3 ymax: 5661475
Projected CRS: ETRS89 / UTM zone 32N

Exercises

Note🏋 Exercise 1

As in the lecture, create a neighborhood (weight) matrix, but this time, do it for Queen and Rook neighborhoods. Also, apply a row normalization.

You could either use the sdep package with its function spdep::poly2nb() or the more modern approach of the sfdep package using the function sfdep::st_contiguity(). In both cases, you must set the option queen = FALSE for Rook neighborhoods.

Note🏋 Exercise 2

We have not used them, but you can also create distance-based weight matrices. Use the package of your choice again and create weights for a distance between 0 and 5000 meters. Use again row-normalization.

You must also convert the polygon data to point coordinates for this exercise. We’d propose to use the centroids for this task:

election_results_centroids <- sf::st_centroid(election_results)

Use a map to corroborate this conversion was successful.

If you use spdep, use the function spdep::dnearneigh(); if you use sfdep, use the function sfdep::st_dist_band().

Note🏋 Exercise 3

Now, let’s see how these different spatial weights perform in an analysis. Calculate Moran’s I and Geary’s C for each one of the weights and report their results for the variable spd_share.

It is essential to know which path you have taken before – using spdep and sfdep – as it determines how you solve this exercise.

Note🏋 Exercise 4

To wrap up, plot two Moran scatterplots to visualize the relationship between the actual values and the lagged values. Plot one plot with the neighbour-based Queen or Rook weights matrix and another one with the inverse-distance based. Compare the results.

Solutions

# spdep
queen_neighborhood <-
  spdep::poly2nb(
    election_results,
    queen = TRUE
  )

queen_W <- spdep::nb2listw(queen_neighborhood, style = "W")

rook_neighborhood <-
  spdep::poly2nb(
    election_results,
    queen = FALSE
  )

rook_W <- spdep::nb2listw(rook_neighborhood, style = "W")

# sfdep
election_results <-
  election_results |> 
  dplyr::mutate(
    queen_neighborhood = sfdep::st_contiguity(election_results, queen = TRUE),
    queen_W = sfdep::st_weights(queen_neighborhood),
    rook_neighborhood = sfdep::st_contiguity(election_results, queen = FALSE),
    rook_W = sfdep::st_weights(rook_neighborhood)
  )
# convert to centroids
election_results_centroids <- sf::st_centroid(election_results)
Warning: st_centroid assumes attributes are constant over geometries
ggplot() +
  geom_sf(data = election_results_centroids, color = "black")

# spdep
distance_neighborhood_5000 <-
  spdep::dnearneigh(election_results_centroids, 0, 5000)

distance_neighborhood_5000_W <- 
  spdep::nb2listw(distance_neighborhood_5000, style = "W")

# sfdep
election_results_centroids <-
  election_results_centroids |> 
  dplyr::mutate(
    neighbors_5000 = sfdep::st_dist_band(election_results_centroids, 0, 5000),
    weights_5000 = sfdep::st_weights(neighbors_5000)
  )
spdep::moran.test(election_results$spd_share, listw = queen_W)

    Moran I test under randomisation

data:  election_results$spd_share  
weights: queen_W    

Moran I statistic standard deviate = 20.869, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
     0.5404799889     -0.0018450185      0.0006753055 
spdep::moran.test(election_results$spd_share, listw = rook_W)

    Moran I test under randomisation

data:  election_results$spd_share  
weights: rook_W    

Moran I statistic standard deviate = 20.332, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
     0.5444447649     -0.0018450185      0.0007219029 
spdep::moran.test(
  election_results_centroids$spd_share, 
  listw = distance_neighborhood_5000_W
)

    Moran I test under randomisation

data:  election_results_centroids$spd_share  
weights: distance_neighborhood_5000_W    

Moran I statistic standard deviate = 45.601, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
     2.494222e-01     -1.845018e-03      3.036107e-05 
spdep::geary.test(election_results$spd_share, listw = queen_W)

    Geary C test under randomisation

data:  election_results$spd_share 
weights: queen_W   

Geary C statistic standard deviate = 19.232, p-value < 2.2e-16
alternative hypothesis: Expectation greater than statistic
sample estimates:
Geary C statistic       Expectation          Variance 
     0.4556471765      1.0000000000      0.0008011317 
spdep::geary.test(election_results$spd_share, listw = rook_W)

    Geary C test under randomisation

data:  election_results$spd_share 
weights: rook_W   

Geary C statistic standard deviate = 18.947, p-value < 2.2e-16
alternative hypothesis: Expectation greater than statistic
sample estimates:
Geary C statistic       Expectation          Variance 
     0.4477206733      1.0000000000      0.0008496821 
spdep::geary.test(
  election_results_centroids$spd_share, 
  listw = distance_neighborhood_5000_W
)

    Geary C test under randomisation

data:  election_results_centroids$spd_share 
weights: distance_neighborhood_5000_W   

Geary C statistic standard deviate = 31.847, p-value < 2.2e-16
alternative hypothesis: Expectation greater than statistic
sample estimates:
Geary C statistic       Expectation          Variance 
     7.535200e-01      1.000000e+00      5.990103e-05 
# sfdep
library(magrittr)

Attache Paket: 'magrittr'
Die folgenden Objekte sind maskiert von 'package:terra':

    extract, inset
election_results %$% 
  sfdep::global_moran_test(spd_share, queen_neighborhood, queen_W)

    Moran I test under randomisation

data:  x  
weights: listw    

Moran I statistic standard deviate = 20.869, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
     0.5404799889     -0.0018450185      0.0006753055 
election_results %$% 
  sfdep::global_moran_test(spd_share, rook_neighborhood, rook_W)

    Moran I test under randomisation

data:  x  
weights: listw    

Moran I statistic standard deviate = 20.332, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
     0.5444447649     -0.0018450185      0.0007219029 
election_results_centroids %$% 
  sfdep::global_moran_test(spd_share, neighbors_5000, weights_5000)

    Moran I test under randomisation

data:  x  
weights: listw    

Moran I statistic standard deviate = 45.601, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
     2.494222e-01     -1.845018e-03      3.036107e-05 
election_results %$% 
  sfdep::global_c_test(spd_share, queen_neighborhood, queen_W)

    Geary C test under randomisation

data:  x 
weights: listw   

Geary C statistic standard deviate = 19.232, p-value < 2.2e-16
alternative hypothesis: Expectation greater than statistic
sample estimates:
Geary C statistic       Expectation          Variance 
     0.4556471765      1.0000000000      0.0008011317 
election_results %$% 
  sfdep::global_c_test(spd_share, rook_neighborhood, rook_W)

    Geary C test under randomisation

data:  x 
weights: listw   

Geary C statistic standard deviate = 18.947, p-value < 2.2e-16
alternative hypothesis: Expectation greater than statistic
sample estimates:
Geary C statistic       Expectation          Variance 
     0.4477206733      1.0000000000      0.0008496821 
election_results_centroids %$% 
  sfdep::global_c_test(spd_share, neighbors_5000, weights_5000)

    Geary C test under randomisation

data:  x 
weights: listw   

Geary C statistic standard deviate = 31.847, p-value < 2.2e-16
alternative hypothesis: Expectation greater than statistic
sample estimates:
Geary C statistic       Expectation          Variance 
     7.535200e-01      1.000000e+00      5.990103e-05 
spdep::moran.plot(
  x = election_results$spd_share,
  listw = queen_W,
  labels = election_results$district_id,
  xlab = "SPD share (Queen W)",
  ylab = "Spatial lag of SPD share (Queen W)"
)

spdep::moran.plot(
  x = election_results_centroids$spd_share,
  listw = distance_neighborhood_5000_W,
  labels = election_results_centroids$district_id,
  xlab = "SPD share (5km)",
  ylab = "Spatial lag of SPD share (5km)"
)