ggplot US state and China province heatmap

Data Visualization
R examples for drawing US state and China province heatmaps with ggplot2, usmap, shapefiles, labels, scale bars, and leaflet-based alternatives.
Author

Yang Liu

Published

June 12, 2020

Updated on 2020-06-12; first posted on 2019-04-17.

This post collects a few R approaches for drawing state- and province-level thematic maps. The goal is to keep the examples practical: labels, scale bars, color fills, and enough flexibility to adjust the final plot.

I will show:

  1. 50-state (including Alaska and Hawaii) United States thematic map, with map scale and with state abbreviations

  2. China thematic map, with map scale and province names in either English or Chinese

  1. US Map by state

The dt1 below can be used by both methods. With usmap, only two variables are needed: state names and the value to plot.

suppressPackageStartupMessages({ library(ggplot2) library(maps) library(usmap) library(rgeos) # for rgeos::gCentroid library(data.table) library(ggsn) # for adding map scale bar ggsn::scalebar library(shadowtext) # for adding label library(ggrepel) # if need to repel labels }) # A sample dataset of 77 states: dt1 <- as.data.table(copy(state.x77)) dt1$state <- tolower(rownames(state.x77)) dt1 <- dt1[,.(state, Population)] setkey(dt1, state)

states <- setDT(ggplot2::map_data("state")) setkey(states, region) # join data to map: left join states to dt1 dt1 <- dt1[states] # rmarkdown::paged_table(dt1[1:50,])

Method 1. Use usmap

The easiest way is to use the usmap package. State abbreviations can be added directly with usmap::plot_usmap. This is easy to start with, though harder to fine-tune in detail.

dt1_simple <- unique(dt1[,.(state, Population)])
usmap::plot_usmap(data = dt1_simple, values = "Population", labels = TRUE) +
  labs(fill = 'State Population (1975)') + 
  scale_fill_gradientn(colours = RColorBrewer::brewer.pal(9, "GnBu"),
                       na.value = "grey90",
                       guide = guide_colourbar(barwidth = 25, 
                                               barheight = 0.4,
                                               #put legend title on top of legend 
                                               title.position = "top")) +
  # put legend at the bottom, adjust legend title and text font sizes
  theme(legend.position = "bottom",
        legend.title=element_text(size=12), 
        legend.text=element_text(size=10))

The plot_usmap returns a ggplot object so it is possible to further revise by just adding more layers.

Method 2. Use map_data and build from shape files

This approach makes it possible to fine-tune label locations, as in the China map later. Here the label is placed near the center of each state: mean(lon), min(lat).

# create labels
# incl `Population` (the value to plot) in the label dataset, if want to fill with color. 
#' get centroids from a shp file dataset
get.centroids <- function(
  data = dt1, 
  long = "long", lat = "lat", 
  by_var = "state",  # the grouping variable, e.g. state: get centroid by state
  fill_var = NULL # the variable to plot 
  ){
  data <- data[!is.na(data[[by_var]]),]
  data[[by_var]] <- as.character(data[[by_var]]) # sometimes there is empty factor level
  dt1_df <- sp::SpatialPointsDataFrame(coords = data[, c(long, lat), with = FALSE], data = data)
  dt1_geo <- by(dt1_df, dt1_df[[by_var]], function(x) {sp::Polygon(x[c(long, lat)])@labpt})
  centroids <- stats::setNames(do.call("rbind.data.frame", dt1_geo), c(long, lat))
  centroids$name <- names(dt1_geo) 
  if(!is.null(fill_var)){ # if need to join fill value 
    setkeyv(setDT(centroids), "name")
    dt_var <- unique(data[,c(by_var, fill_var), with = FALSE])
    setkeyv(dt_var, by_var)
    centroids <- dt_var[centroids]
  }
  return(centroids)
}
centroids <- get.centroids(data = dt1, long = "long", lat = "lat", by_var = "state", fill_var = "Population")
# join short state names (e.g. VA, SC)
abb_names <- data.table(state = tolower(state.name), abb = state.abb) # these are R dataset
setkey(setDT(centroids), state)
setkey(abb_names, state)
centroids <- abb_names[centroids]
head(centroids, 3)
##       state abb Population       long      lat
## 1:  alabama  AL       3615  -86.83042 32.80316
## 2:  arizona  AZ       2212 -111.66786 34.30060
## 3: arkansas  AR       2110  -92.44013 34.90418
x_boundary = -77
ggplot(data = dt1, aes(x = long, y = lat, group = group))+
  geom_polygon(aes(fill = Population))+
  geom_path()+ 
  scale_fill_gradientn(colours = RColorBrewer::brewer.pal(9, "GnBu"), 
                       na.value = "grey90",
                       guide = guide_colourbar(barwidth = 25, barheight = 0.4,
                                               #put legend title on top of legend
                                               title.position = "top")) +
  # shadowtext::geom_shadowtext(data = centroids,
  #                             aes(x = long, y = lat, label = abb),
  #                             size = 3,
  #                             inherit.aes = FALSE) + 
  # if need to repel labels... could further fine-tune
  ggrepel::geom_label_repel(data = centroids[long >= x_boundary,],
                   aes(x = long, y = lat, label = abb, fill = Population),
    force = 1, size = 3,
    inherit.aes = F
  ) +
  # the normal labels:
  geom_text(data=centroids[long < x_boundary,],
            aes(x = long, y = lat, label = abb),
            size = 3, inherit.aes=F) +
  labs(fill = "Population", x = "Longitude", y = "Latitude") + 
  coord_map() + 
  # map scale
  ggsn::scalebar(data = dt1, dist = 500, dist_unit = "km",
           border.size = 0.4, st.size = 4,
           box.fill = c('black','white'),
           transform = TRUE, model = "WGS84") + 
  theme_void() + 
  # put legend at the bottom, adjust legend title and text font sizes
  theme(legend.position = "bottom",
        legend.title=element_text(size=12),  # font size of the legend 
        legend.text=element_text(size=10))

# 
# ggsave("us_example.png", width = 10, height = 6)

  1. China map by province

Method 1. China map by province using downloaded shapefiles

The China map is more complicated. This blog by Zhang Zhen refers to several useful resources, especially the older Capital of Statistics post on drawing China maps in R. I have not found a China equivalent of usmap, but the workflow is manageable once the shapefiles are available.

The shapefiles can be downloaded from the Capital of Statistics website. The zip file contains three files: bou2_4p.dbf, bou2_4p.shp, and bou2_4p.shx. We only need the bou2_4p.shp file later.
For a quick download: you can also download from my GitHub repo of blogdown: Blogdown/blogData/China.shp

Update 2020/06 There is a chinamap package created by Guangchuang Yu (and his team), who is also the author of the package nCov2019.

Another useful source is the GADM website, which has admin1 and admin2 shapefiles for every country. Note that China and Taiwan are downloaded separately.

On character encoding Showing Chinese is indeed sometimes a problem. The key is to make sure in the datafile all the province names are coded in UTF-8. In the markdown file below you can see province names shown as UTF-8 code. But if in the map (plot) they are rendered as Chinese characters. And if your system can show Chinese, they read as Chinese in the console too.

In Chinese (一点额外说明): 在Windows系统上,只要RStudio中显示中文没有问题好像Markdown的时候就没有问题。关键是要统一数据中的中文都是UTF-8编码的,不要从网站上拷贝中文进Rstudio,会产生各种问题。不需要特别调整Rstudio或者Markdown的设置。 所以虽然我下面的数据中显示UTF-8,在Console和最后在地图上都是显示为中文。

# China -------------------------------------------------------------------
china_map <- rgdal::readOGR(here::here("../Data/China.shp/bou2_4p.shp")) # read it locally
## OGR data source with driver: ESRI Shapefile 
## Source: "D:\liuyanguu\Blogdown\Data\China.shp\bou2_4p.shp", layer: "bou2_4p"
## with 925 features
## It has 7 fields
## Integer64 fields read as strings:  BOU2_4M_ BOU2_4M_ID

# extract province information from shapefile china_map_data = data.table::setDT(china_map@data) data.table::setnames(china_map_data, "NAME", "province")

transform to UTF-8 coding format

china_map_data[, province:=iconv(province, from = "GBK", to = "UTF-8")] # create id to join province back to lat and long, id = 0 ~ 924 china_map_data[, id:= .I-1] # id = 0, 1, 2, … , used to match to dt_china # there are more shapes for one province due to small islands china_map_data[, province:= as.factor(province)] china_map_data <- china_map_data[!is.na(province)] china_map_data <- china_map_data[AREA > 0.1] head(china_map_data, 3)
##       AREA PERIMETER BOU2_4M_ BOU2_4M_ID ADCODE93 ADCODE99         province id
## 1:  54.447    68.489        2         23   230000   230000         黑龙江省  0
## 2: 129.113   129.933        3         15   150000   150000     内蒙古自治区  1
## 3: 175.591    84.905        4         65   650000   650000 新疆维吾尔自治区  2
# 
dt_china = setDT(fortify(china_map))
head(dt_china, 3)
##        long      lat order  hole piece id group
## 1: 121.4884 53.33265     1 FALSE     1  0   0.1
## 2: 121.4995 53.33601     2 FALSE     1  0   0.1
## 3: 121.5184 53.33919     3 FALSE     1  0   0.1
dt_china[, id:= as.numeric(id)]
setkey(china_map_data, id); setkey(dt_china, id)
dt_china <- china_map_data[dt_china]

# add the province EN, CH label file
province_CH <- china_map_data[, levels(province)] # the CH are in UTF-8 code
province_EN <- c("Shanghai", "Yunnan", "Inner Mongolia", "Beijing", "Taiwan",
"Jilin", "Sichuan", "Tianjin", "Ningxia", "Anhui",
"Shandong", "Shanxi", "Guangdong", "Guangxi", "Xinjiang",
"Jiangsu", "Jiangxi", "Hebei", "Henan", "Zhejiang",
"Hainan", "Hubei", "Hunan", "Gansu", "Fujian",
"Tibet", "Guizhou", "Liaoning", "Chongqing", "Shaanxi",
"Qinghai", "Hong Kong", "Heilongjiang"
)
# some value to plot (from years ago too)
value <- c(8893483, 12695396,  8470472,  7355291, 23193638,  9162183, 26383458,  3963604,  1945064, 19322432, 30794664, 10654162, 32222752, 13467663,  6902850, 25635291, 11847841, 20813492, 26404973, 20060115, 2451819, 17253385, 19029894,  7113833, 11971873, 689521, 10745630, 15334912, 10272559, 11084516, 1586635,  7026400, 13192935)
input_data <- data.table(province_CH, province_EN, value)
setkey(input_data, province_CH)
setkey(dt_china, province)

dt_china <- input_data[dt_china, nomatch = 0]
# saveRDS(dt_china, file = "D:/liuyanguu/Blogdown/Data/dt_china.rds")
# create label file of province `label_dt`
centroids_cn <- get.centroids(data = dt_china, by_var = "province_CH")
centroids_en <- get.centroids(data = dt_china, by_var = "province_EN")

# plot
gg_cn <- ggplot(dt_china, aes(x = long, y = lat, group = group, fill = value)) +
  labs(fill = "Population")+
  geom_polygon()+
  scale_fill_gradientn(colours = RColorBrewer::brewer.pal(9, "GnBu"), 
                       na.value = "grey90",
                       guide = guide_colourbar(barwidth = 25, barheight = 0.4,
                                               #put legend title on top of legend
                                               title.position = "top")) + 
  labs(fill = "Population", x = "Longitude", y = "Latitude") + 
  coord_map() + 
  # map scale
  ggsn::scalebar(data = dt_china, dist = 500, dist_unit = "km",
           border.size = 0.4, st.size = 4,
           box.fill = c('black','white'),
           transform = TRUE, model = "WGS84") + 
  theme_void() +
  theme(legend.position = "bottom",
        legend.title=element_text(size=12),  # font size of the legend 
        legend.text=element_text(size=10))

# In Chinese
gg_cn +  shadowtext::geom_shadowtext(data = centroids_cn, aes(x = long, y = lat, label = name), inherit.aes = FALSE)

# In English
gg_cn +  shadowtext::geom_shadowtext(data = centroids_en, aes(x = long, y = lat, label = name), inherit.aes = FALSE)

Method 2. Using geojsonMap (leaflet)

Just saw it on cosx.org blog. It is an interactive map built on leaflet.

library(leafletCN)
dt_china <- as.data.frame(dt_china)
geojsonMap(dat = dt_china, mapName = "china",
           # namevar = ~ province_EN, 
           valuevar = ~ value, 
           popup =  paste0(dt_china$province_EN),
           palette = RColorBrewer::brewer.pal(9, "GnBu"), legendTitle = "Population")