

This script updates the CSV files created in the ‘Acquire Data’ section to include information regarding the state and Census Burea region of origin for each tweet.


Within the “updated_plurals <- left_join(join_tweets_to_states(plurals),census_regions)” line of code, replace “updated_plurals” and “plurals” with the name of your updated CSV and the original CSV, respectively. Do this for each group of terms analyzed.


# Script-specific options or packages

join_tweets_to_states <- function(tweets) {
  # Function
  # Takes a data frame result from an rtweet::search_term() query
  # and maps the tweets with lat/lng values to US states
  # Requires a US Census API Key: https://api.census.gov/data/key_signup.html
  # that is added to the .Renviron file with tidycensus::census_api_key()
  library(tidyverse, quietly = TRUE) # data manipulation
  library(tidycensus)                # us state geometries
  # Get the spatial geometries for each US state/ territory
  states_sf <- 
    get_acs(geography = "state", # states
            variables = "B01003_001", # total population
            geometry = TRUE) %>% # spatial geometry
    select(state_name = NAME, geometry) # only keep state name and geometry columns
  # Align the CRS for tweets to the census
  tweets_sf <- 
    tweets %>% # original data frame
    filter(lat != "") %>% # only keep tweets with lat/lng values
    st_as_sf(coords = c("lng", "lat"), # map x/long, y/lat values to coords
             crs = st_crs(states_sf)) # align coordinate reference system
  # Map tweet coords to census state geometries
  tweets_states_sf <- 
    st_join(tweets_sf, states_sf) 
  tweets_states_df <- 
    tweets_states_sf %>% # spatial features object
    as_tibble() %>% # convert to tibble (remove spatial features)
    filter(state_name != "") # keep only US states/ territories
  return(tweets_states_df) # return the final data frame


census_regions <- read_csv(file = "../data/original/twitter/census_regions.csv")
## Rows: 51 Columns: 2
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): state_name, census_region
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Second Person Plurals

updated_plurals <- left_join(join_tweets_to_states(plurals),census_regions) # adds census regions to the plurals CSV
## Joining, by = "state_name"
write_csv(updated_plurals, file = "../data/derived/updated_plurals.csv") # write to disk

Outdoor Sales

updated_sales <- left_join(join_tweets_to_states(sale),census_regions) # adds census regions to the plurals CSV
## Joining, by = "state_name"
write_csv(updated_sales, file = "../data/derived/updated_sales.csv") # write to disk

Gym Shoes

updated_shoes <- left_join(join_tweets_to_states(shoes),census_regions) # adds census regions to the plurals CSV
## Joining, by = "state_name"
write_csv(updated_shoes, file = "../data/derived/updated_shoes.csv") # write to disk

Soft Drinks

updated_tonix <- left_join(join_tweets_to_states(tonix),census_regions) # adds census regions to the plurals CSV
## Joining, by = "state_name"
write_csv(updated_tonix, file = "../data/derived/updated_tonix.csv") # write to disk

Major Roads

updated_roads <- left_join(join_tweets_to_states(roads),census_regions) # adds census regions to the plurals CSV
## Joining, by = "state_name"
write_csv(updated_roads, file = "../data/derived/updated_roads.csv") # write to disk



If everything runs as it should, there should be no output from this code.


Vaux, Bert. “Harvard Dialect Survey.” Harvard Dialect Survey, 2003, http://dialect.redlog.net.