Acquire data

About

Description

This script is crucial to acquiring the Twitter data around which the whole project is based. After initially loading the necessary libraries and creating the term_search function, tweets including any search term can be acquired. As seen below, I used the same code to create the five groups of search terms used for this project, with only the “terms to search” line of code, the name of the CSV created, and “n = …” changing; the “terms to search” line of code should be replaced with the name given to your group of terms, the name of the CSV should be changed to whatever makes the most sense for you, and “n = …” should be replaced with the number of tweets containing each term you want to acquire.

Usage

API keys for both Twitter and the Census are necessary for this project. Once these have been acquired, simply run the requisite packages and substitute the terms and quantities seen here with your own.

Setup

# Script-specific options or packages
library(tidyverse)  # data manipulation
library(rtweet)  # collecting tweets from Twitter API
library(rvest)  # webscraping
library(purrr)  # iteration
library(sf)
library(tidycensus)

token = readRDS("../golsen_token.rds") # read personal Twitter API token

Run

term_search <- 
  function(search_term, n = 1000) {
    # Function: 
    # Search recent tweets for specific term
    
    library(rtweet) # to search Twitter API
    
    tweets <- 
      search_tweets(q = sQuote(search_term), # query term (from search_term)
                    n = n, # number of desired tweets (from n)
                    include_rts = FALSE, # no retweets
                    geocode = lookup_coords("usa"), # only from US
                    token = token) %>%  # token for authentication
      lat_lng() %>% # extract the geocoordinates where available
      mutate(search_term = search_term) # add search_term value to the data frame
    return(tweets) # return the results
  }

Second Person Plurals

second_person_plurals <- c("y'all", "yall", "\"you guys\"")

fs::dir_create(path = "../data/original/twitter/") # create 'twitter' sub-directory in '../data/original/'

conduct_search <- FALSE # set to TRUE to conduct a new search

if(conduct_search) {
  
  cat("Conducting new search. \n")
  
  
  second_person_plurals %>% # terms to search
    map(term_search, n = 1000) %>% # apply the function to each term, retrieve 500 tweets (if available)
    do_call_rbind() %>% # join the results by rows
    save_as_csv(file_name = "../data/original/twitter/plurals.csv") # write results to disk
  cat("Search results saved to disk. \n")
  
} else {
  cat("Keeping previous search results. \n")
}

## Keeping previous search results.

Outdoor Sales

sales <- c("\"garage sale\"","\"yard sale\"")

fs::dir_create(path = "../data/original/twitter/") # create 'twitter' sub-directory in '../data/original/'

conduct_search <- FALSE # set to TRUE to conduct a new search

if(conduct_search) {
  
  cat("Conducting new search. \n")
  
  
  sales %>% # terms to search
    map(term_search, n = 500) %>% # apply the function to each term, retrieve 500 tweets (if available)
    do_call_rbind() %>% # join the results by rows
    save_as_csv(file_name = "../data/original/twitter/sale.csv")
  cat("Search results saved to disk. \n")
  
} else {
  cat("Keeping previous search results. \n")
}

## Keeping previous search results.

Gym Shoes

shoes <- c("sneakers","\"tennis shoes\"")

fs::dir_create(path = "../data/original/twitter/") # create 'twitter' sub-directory in '../data/original/'

conduct_search <- FALSE # set to TRUE to conduct a new search

if(conduct_search) {
  
  cat("Conducting new search. \n")
  
  
  shoes %>% # terms to search
    map(term_search, n = 1000) %>% # apply the function to each term, retrieve 1000 tweets (if available)
    do_call_rbind() %>% # join the results by rows
    save_as_csv(file_name = "../data/original/twitter/shoes.csv")
  cat("Search results saved to disk. \n")
  
} else {
  cat("Keeping previous search results. \n")
}

## Keeping previous search results.

Soft Drinks

tonic <- c("soda","coke","pop")

fs::dir_create(path = "../data/original/twitter/") # create 'twitter' sub-directory in '../data/original/'

conduct_search <- FALSE # set to TRUE to conduct a new search

if(conduct_search) {
  
  cat("Conducting new search. \n")
  
  
  tonic %>% # terms to search
    map(term_search, n = 2000) %>% # apply the function to each term, retrieve 2000 tweets (if available)
    do_call_rbind() %>% # join the results by rows
    save_as_csv(file_name = "../data/original/twitter/tonix.csv")
  cat("Search results saved to disk. \n")
  
} else {
  cat("Keeping previous search results. \n")
}

## Keeping previous search results.

Major Roads

roads <- c("highway","freeway")

fs::dir_create(path = "../data/original/twitter/") # create 'twitter' sub-directory in '../data/original/'

conduct_search <- FALSE # set to TRUE to conduct a new search

if(conduct_search) {
  
  cat("Conducting new search. \n")
  
  
  roads %>% # terms to search
    map(term_search, n = 2000) %>% # apply the function to each term, retrieve 2000 tweets (if available)
    do_call_rbind() %>% # join the results by rows
    save_as_csv(file_name = "../data/original/twitter/roads.csv")
  cat("Search results saved to disk. \n")
  
} else {
  cat("Keeping previous search results. \n")
}

## Keeping previous search results.

Finalize

Log

This script should create five CSV files that appear within your project, bearing the names given to them by the save_as_csv() function.

References

Vaux, Bert. “Harvard Dialect Survey.” Harvard Dialect Survey, 2003, http://dialect.redlog.net.