This script is crucial to acquiring the Twitter data around which the whole project is based. After initially loading the necessary libraries and creating the term_search function, tweets including any search term can be acquired. As seen below, I used the same code to create the five groups of search terms used for this project, with only the “terms to search” line of code, the name of the CSV created, and “n = …” changing; the “terms to search” line of code should be replaced with the name given to your group of terms, the name of the CSV should be changed to whatever makes the most sense for you, and “n = …” should be replaced with the number of tweets containing each term you want to acquire.
API keys for both Twitter and the Census are necessary for this project. Once these have been acquired, simply run the requisite packages and substitute the terms and quantities seen here with your own.
# Script-specific options or packages
library(tidyverse) # data manipulation
library(rtweet) # collecting tweets from Twitter API
library(rvest) # webscraping
library(purrr) # iteration
library(sf)
library(tidycensus)
= readRDS("../golsen_token.rds") # read personal Twitter API token token
<-
term_search function(search_term, n = 1000) {
# Function:
# Search recent tweets for specific term
library(rtweet) # to search Twitter API
<-
tweets search_tweets(q = sQuote(search_term), # query term (from search_term)
n = n, # number of desired tweets (from n)
include_rts = FALSE, # no retweets
geocode = lookup_coords("usa"), # only from US
token = token) %>% # token for authentication
lat_lng() %>% # extract the geocoordinates where available
mutate(search_term = search_term) # add search_term value to the data frame
return(tweets) # return the results
}
<- c("y'all", "yall", "\"you guys\"") second_person_plurals
::dir_create(path = "../data/original/twitter/") # create 'twitter' sub-directory in '../data/original/'
fs
<- FALSE # set to TRUE to conduct a new search
conduct_search
if(conduct_search) {
cat("Conducting new search. \n")
%>% # terms to search
second_person_plurals map(term_search, n = 1000) %>% # apply the function to each term, retrieve 500 tweets (if available)
do_call_rbind() %>% # join the results by rows
save_as_csv(file_name = "../data/original/twitter/plurals.csv") # write results to disk
cat("Search results saved to disk. \n")
else {
} cat("Keeping previous search results. \n")
}
## Keeping previous search results.
<- c("\"garage sale\"","\"yard sale\"") sales
::dir_create(path = "../data/original/twitter/") # create 'twitter' sub-directory in '../data/original/'
fs
<- FALSE # set to TRUE to conduct a new search
conduct_search
if(conduct_search) {
cat("Conducting new search. \n")
%>% # terms to search
sales map(term_search, n = 500) %>% # apply the function to each term, retrieve 500 tweets (if available)
do_call_rbind() %>% # join the results by rows
save_as_csv(file_name = "../data/original/twitter/sale.csv")
cat("Search results saved to disk. \n")
else {
} cat("Keeping previous search results. \n")
}
## Keeping previous search results.
<- c("sneakers","\"tennis shoes\"") shoes
::dir_create(path = "../data/original/twitter/") # create 'twitter' sub-directory in '../data/original/'
fs
<- FALSE # set to TRUE to conduct a new search
conduct_search
if(conduct_search) {
cat("Conducting new search. \n")
%>% # terms to search
shoes map(term_search, n = 1000) %>% # apply the function to each term, retrieve 1000 tweets (if available)
do_call_rbind() %>% # join the results by rows
save_as_csv(file_name = "../data/original/twitter/shoes.csv")
cat("Search results saved to disk. \n")
else {
} cat("Keeping previous search results. \n")
}
## Keeping previous search results.
<- c("soda","coke","pop") tonic
::dir_create(path = "../data/original/twitter/") # create 'twitter' sub-directory in '../data/original/'
fs
<- FALSE # set to TRUE to conduct a new search
conduct_search
if(conduct_search) {
cat("Conducting new search. \n")
%>% # terms to search
tonic map(term_search, n = 2000) %>% # apply the function to each term, retrieve 2000 tweets (if available)
do_call_rbind() %>% # join the results by rows
save_as_csv(file_name = "../data/original/twitter/tonix.csv")
cat("Search results saved to disk. \n")
else {
} cat("Keeping previous search results. \n")
}
## Keeping previous search results.
<- c("highway","freeway") roads
::dir_create(path = "../data/original/twitter/") # create 'twitter' sub-directory in '../data/original/'
fs
<- FALSE # set to TRUE to conduct a new search
conduct_search
if(conduct_search) {
cat("Conducting new search. \n")
%>% # terms to search
roads map(term_search, n = 2000) %>% # apply the function to each term, retrieve 2000 tweets (if available)
do_call_rbind() %>% # join the results by rows
save_as_csv(file_name = "../data/original/twitter/roads.csv")
cat("Search results saved to disk. \n")
else {
} cat("Keeping previous search results. \n")
}
## Keeping previous search results.
This script should create five CSV files that appear within your project, bearing the names given to them by the save_as_csv() function.
Vaux, Bert. “Harvard Dialect Survey.” Harvard Dialect Survey, 2003, http://dialect.redlog.net.