Plot multiple countries on the world map

Data Visualization
Spatial Data
How to plot administrative areas from several countries in R, and how to avoid stray connecting lines caused by non-unique polygon group IDs.
Author

Yang Liu

Published

May 29, 2023

This post shows how to download and plot administrative areas from multiple countries on a world map. The main lesson is a subtle plotting bug: if region names are not unique across countries, ggplot2::geom_polygon() can draw strange connecting lines between unrelated areas.

Maintenance note: this example preserves the original sp/raster workflow. For new spatial work in R, sf, terra, and geodata are usually easier to maintain, but the polygon-grouping issue shown here is still relevant.

The basic map

(You may download the world map from GitHub)

suppressPackageStartupMessages({
library("data.table")
library("ggplot2")
library("rgdal")
library("raster")
library("rgeos")
library("here")
library("ggthemes")
})

# download data from GADM directly and row-bind the spatial polygons data frame
cnames <- c("Haiti", "Togo", "Uganda", "Ghana", "South Africa", "Angola")
download.GADM <- function(cname0) raster::getData("GADM", country = cname0, level = 1)
dt_geos <- raster::bind(lapply(cnames, download.GADM))
dt_geos <- sp::spTransform(dt_geos, CRS("+proj=robin")) # robin transformation
dt_geos_df <- broom::tidy(dt_geos, region = "GID_1")

  x_min <- min(dt_geos_df$long)*1.2
  x_max <- max(dt_geos_df$long)*1.2
  y_min <- min(dt_geos_df$lat)* 1.2
  y_max <- max(dt_geos_df$lat)* 1.2
  
  map_theme <- ggthemes::theme_map() +
    theme(legend.position = "bottom", legend.direction = "horizontal", legend.justification = c("center"))
  
  shp_world_robin <- readRDS(here::here("../Data/World.shp/sp.world.robin.rds")) # this has to be sourced locally
  ggplot() +
    geom_polygon(data = shp_world_robin, aes(x = long, y = lat, group = group), fill="lightgray", 
                 colour = "white", size=0.05) +
    geom_polygon(data = dt_geos, aes(x = long, y = lat, group = group), 
                 color = "red", size=0.05, fill="#ffc069")  +
    # if want to crop map:
    coord_fixed(xlim=c(x_min, x_max), ylim=c(y_min, y_max)) +
    map_theme +
    guides(fill = guide_legend(nrow = 1, title.position = "top"))

Mapping values by administrative area

Next, we add values to the map. These values can be merged directly to the spatial polygons object or to a data frame derived from it. Here I show the second approach: transform the sp object into a data frame first, then supply that data frame to ggplot2::geom_polygon().

In GADM admin 1 files, GID_1 is a unique identifier even after binding multiple countries. To make the bug visible, this example intentionally uses region names from NAME_1 as the region identifier. In my applied work, I sometimes had to merge estimates using those names.

# `region` will become the id/group used to identify each area 
# Either broom::tidy or ggplot2::fortify can work:
# dt_geos_data <- broom::tidy(dt_geos, region = "NAME_1")
# dt_geos_data <- ggplot2::fortify(dt_geos, region = "NAME_1")

download.GADM.df <- function(cname0){
  dfsp <- raster::getData("GADM", country = cname0, level = 1) # download data from GADM
  dfsp <- sp::spTransform(dfsp, CRS("+proj=robin")) # robin transformation
  dfsf <- broom::tidy(dfsp, region = "NAME_1") # get df
  dfsf$country <- cname0 # add a country identifier 
  return(setDT(dfsf))
}

dt_geos_data <- rbindlist(lapply(cnames, download.GADM.df))

# imagine we want to plot estimates, not only the map
dt_admin1 <- unique(setDT(dt_geos_data)[,.(country, id)])
set.seed(1234)
dt_admin1$value <- rgamma(nrow(dt_admin1), shape = 4, scale = 15) # some random values

setkey(dt_geos_data, country, id)
setkey(dt_admin1, country, id)
dt_geos_data_value <- dt_geos_data[dt_admin1]

legend_break <- c(0, 25, 50, 75,  100, 150, 200, 300, 500)
legend_label <- c("≤25", "25 to 50", "50 to 75", "75 to 100", "100 to 150","150 to 200","200 to 300",">300")
legend_color <- c("#80BD41", "#CFF4FF","#feec9f","#ffc069","#fa8c16","#d46b08","#ad4e00","#612500")
dt_geos_data_value$col <- cut(dt_geos_data_value$value, breaks = legend_break, labels = legend_label)

The bug: strange connecting lines

ggplot() +
  geom_polygon(data = shp_world_robin, aes(x = long, y = lat, group = group), fill="lightgray", 
               colour = "white", size=0.05) +
  geom_polygon(data = dt_geos_data_value, aes(x = long, y = lat, group = group, fill= col), 
               color = "red", size=0.05)  +
  coord_fixed(xlim=c(x_min, x_max), ylim=c(y_min, y_max)) + # crop map
  scale_fill_manual("Some random values for example", values = legend_color, drop = FALSE) +# Keep all legend item
  map_theme + 
  guides(fill = guide_legend(nrow = 1, title.position = "top"))

The problem is that some countries share admin 1 names from NAME_1 in the GADM file during the broom::tidy() or ggplot2::fortify() step.
In this example, Haiti and Togo share names such as Centre.1 and Centre.2 (see the table below).
The fix is to set group to an identifier that is unique across countries in ggplot2::geom_polygon():

dt_geos_data_value[, county_group := paste(country, group, sep = "_")]
unique(dt_geos_data_value[group %in% c("Centre.1"),.(country, id, group, county_group, value)])
##    country     id    group   county_group    value
## 1:   Haiti Centre Centre.1 Haiti_Centre.1 24.09058
## 2:    Togo Centre Centre.1  Togo_Centre.1 54.32445
ggplot() +
    geom_polygon(data = shp_world_robin, aes(x = long, y = lat, group = group), fill="lightgray", 
                 colour = "white", size=0.05) +
    geom_polygon(data = dt_geos_data_value, aes(x = long, y = lat, group = county_group, fill= col), 
                 # it's important to set `group = county_group` instead of `group = group`
                 color = "red", size=0.05)  +
    coord_fixed(xlim=c(x_min, x_max), ylim=c(y_min, y_max))  +
    scale_fill_manual("Some random values for example", values = legend_color, drop = FALSE) +# Keep all legend item
    map_theme + 
    guides(fill = guide_legend(nrow = 1, title.position = "top"))

The key habit is to check whether the polygon grouping variable is unique after combining countries. If it is not, create a country-specific group ID before plotting.