## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, eval = FALSE------------------------------------------------------ # # install package -------------------------------------------------------------- # # current cran version # install.packages("globaltrends") # # current dev version # devtools::install_github("ha-pu/globaltrends", build_vignettes = TRUE) # # # load package ----------------------------------------------------------------- # library(globaltrends) # # # package version -------------------------------------------------------------- # packageVersion("globaltrends") # #> [1] '0.0.14' ## ---- eval = FALSE------------------------------------------------------------ # # initialize_db ---------------------------------------------------------------- # setwd("your/globaltrends/folder") # initialize_db() # #> Database has been created. # #> Table 'batch_keywords' has been created. # #> ... # #> Table 'data_global' has been created. # #> Successfully disconnected. ## ---- eval = FALSE------------------------------------------------------------ # # start_db --------------------------------------------------------------------- # setwd("your/globaltrends/folder") # start_db() # #> Successfully connected to database. # #> Successfully exported all objects to .GlobalEnv. # print(ls()) # #> [1] "batch_keywords" "batch_time" "countries" "data_control" # #> [5] "data_doi" "data_global" "data_locations" "data_mapping" # #> [9] "data_object" "data_score" "dir_current" "dir_wd" # #> [13] "globaltrends_db" "keyword_synonyms" "keywords_control" "keywords_object" # #> [17] "time_control" "time_object" "us_states" ## ---- eval = FALSE------------------------------------------------------------ # # disconnect_db ---------------------------------------------------------------- # disconnect_db() # #> Successfully disconnected. ## ---- eval = FALSE------------------------------------------------------------ # # add_control_keyword ---------------------------------------------------------- # new_control <- add_control_keyword( # keyword = c("gmail", "maps", "translate", "wikipedia", "youtube"), # time = "2010-01-01 2020-12-31" # ) # #> Successfully created new control batch 1 (gmail ... youtube, 2010-01-01 2020-12-31). ## ---- eval = FALSE------------------------------------------------------------ # # keywords_control and dplyr interaction --------------------------------------- # dplyr::filter(keywords_control, keyword == "gmail") # #> # A tibble: 1 x 2 # #> batch keyword # #> # #> 1 1 gmail ## ---- eval = FALSE------------------------------------------------------------ # # download_control ------------------------------------------------------------- # download_control(control = new_control, locations = countries) # #> Successfully downloaded control data | control: 1 | location: US [1/66] # #> ... # #> Successfully downloaded control data | control: 1 | location: DO [66/66] ## ---- eval = FALSE------------------------------------------------------------ # # download_control_global ------------------------------------------------------ # download_control_global(control = new_control) # #> Successfully downloaded control data | control: 1 | location: world [1/1] ## ---- eval = FALSE------------------------------------------------------------ # # add_object_keyword ----------------------------------------------------------- # new_object <- add_object_keyword( # keyword = list( # c("coca cola", "facebook", "microsoft"), # c("alaska air group", "illinois tool works", "jm smucker") # ), # time = "2010-01-01 2020-12-31" # ) # #> Successfully created new object batch 1 (coca cola ... microsoft, 2010-01-01 2020-12-31). # #> Successfully created new object batch 2 (alaska air group ... jm smucker, 2010-01-01 2020-12-31). ## ---- eval = FALSE------------------------------------------------------------ # # keywords_object and dplyr interaction ---------------------------------------- # dplyr::filter(keywords_object, keyword == "coca cola") # #> # A tibble: 1 x 2 # #> batch keyword # #> # #> 1 1 coca cola ## ---- eval = FALSE------------------------------------------------------------ # # download_object -------------------------------------------------------------- # download_object(object = new_object, locations = countries) # #> Successfully downloaded object data | object: 1 | location: US [1/66] # #> ... # #> Successfully downloaded object data | object: 2 | location: DO [66/66] ## ---- eval = FALSE------------------------------------------------------------ # # download_object_global ------------------------------------------------------- # download_object_global(object = new_object) # #> Successfully downloaded object data | object: 1 | location: world [1/1] # #> Successfully downloaded object data | object: 2 | location: world [1/1] ## ---- eval = FALSE------------------------------------------------------------ # # compute_score ---------------------------------------------------------------- # compute_score(control = new_control[[1]], object = new_object, locations = countries) # #> Successfully computed search score | control: 1 | object: 1 | location: US [1/66] # #> ... # #> Successfully computed search score | control: 1 | object: 2 | location: DO [66/66] ## ---- eval = FALSE------------------------------------------------------------ # # compute_voi ------------------------------------------------------------------ # compute_voi(control = new_control[[1]], object = new_object) # #> Successfully computed search score | control: 1 | object: 1 | location: world [1/1] # #> Successfully computed search score | control: 1 | object: 2 | location: world [1/1] ## ---- eval = FALSE------------------------------------------------------------ # # compute_doi ------------------------------------------------------------------ # compute_doi(control = new_control[[1]], object = new_object, locations = "countries") # #> Successfully computed DOI | control: 1 | object: 1 [1/2] # #> Successfully computed DOI | control: 1 | object: 2 [2/2] ## ---- eval = FALSE------------------------------------------------------------ # # manual exports --------------------------------------------------------------- # library(dplyr) # data_score %>% # filter(keyword == "coca cola") %>% # collect() # #> # A tibble: 8,040 x 8 # #> location keyword date score_obs score_sad score_trd batch_c batch_o # #> # #> 1 US coca cola 14610 0.00362 0.00381 0.00548 1 1 # #> ... # #> 10 US coca cola 14883 0.00347 0.00365 0.00389 1 1 # #> # ... with 8,030 more rows ## ---- eval = FALSE------------------------------------------------------------ # # export_control --------------------------------------------------------------- # export_control(control = 1) # #> # A tibble: 39,600 x 5 # #> location keyword date hits control # #> # #> 1 US gmail 2010-01-01 22 1 # #> ... # #> 10 US gmail 2010-10-01 27 1 # #> # ... with 39,590 more rows # # # export_score ----------------------------------------------------------------- # export_score(object = 1, control = 1) # #> # A tibble: 23,760 x 8 # #> location keyword date score_obs score_sad score_trd control object # #> # #> 1 US coca cola 2010-01-01 0.00362 0.00381 0.00548 1 1 # #> ... # #> 10 US coca cola 2010-10-01 0.00347 0.00365 0.00389 1 1 # #> # ... with 23,750 more rows # # # export_doi and purrr interaction --------------------------------------------- # purrr::map_dfr(c("coca cola", "microsoft"), export_doi, control = 1, type = "obs") # #> # A tibble: 240 x 9 # #> keyword date type gini hhi entropy control object locations # #> # #> 1 coca cola 2010-01-01 score_obs 0.397 0.874 -0.938 1 1 countries # #> ... # #> 10 coca cola 2010-10-01 score_obs 0.574 0.968 -0.303 1 1 countries # #> # ... with 230 more rows ## ---- eval = FALSE------------------------------------------------------------ # # export and dplyr interaction ------------------------------------------------- # library(dplyr) # export_doi(object = 1, control = 1, type = "obs") %>% # filter(lubridate::year(date) == 2019) %>% # group_by(keyword) %>% # summarise(gini = mean(gini), .groups = "drop") # #> # A tibble: 3 x 2 # #> keyword gini # #> # #> 1 coca cola 0.615 # #> 2 facebook 0.707 # #> 3 microsoft 0.682 ## ---- eval = FALSE------------------------------------------------------------ # # plot_score ------------------------------------------------------------------- # library(dplyr) # export_score(keyword = "coca cola", control = 1) %>% # filter(lubridate::year(date) == 2019) %>% # plot_bar() ## ---- eval = FALSE------------------------------------------------------------ # # plot_doi_ts and plot_doi_box ------------------------------------------------- # data <- purrr::map_dfr(1:2, export_doi, keyword = NULL, control = 1, type = "obs") # plot_ts(data) # plot_box(data) ## ---- eval = FALSE------------------------------------------------------------ # # plot_voi_doi ----------------------------------------------------------------- # out_voi <- export_voi(keyword = "facebook", type = "obs") # out_doi <- export_doi(keyword = "facebook", object = 1, type = "obs") # plot_voi_doi(data_voi = out_voi, data_doi = out_doi) ## ---- eval = FALSE------------------------------------------------------------ # data <- export_score(keyword = "facebook", locations = countries) # out <- get_abnorm_hist(data) # na.omit(out) # to drop baseline NA values # #> # A tibble: 7,590 x 8 # #> keyword location date control object score score_abnorm quantile # #> # #> 1 facebook US 2011-01-01 1 1 1.19 0.0220 0.728 # #> ... # #> 10 facebook US 2011-10-01 1 1 1.32 -0.0669 0.456 # #> # ... with 7,580 more rows # # data <- export_voi(object = 1) # out <- get_abnorm_hist(data) # na.omit(out) # to drop baseline NA values # #> # A tibble: 345 x 7 # #> keyword date control object voi voi_abnorm quantile # #> # #> 1 coca cola 2011-01-01 1 1 0.00320 -0.000299 0.316 # #> ... # #> 10 coca cola 2011-10-01 1 1 0.00274 -0.000458 0.193 # #> # ... with 335 more rows # # data <- export_doi(keyword = "microsoft", locations = "us_states") # out <- get_abnorm_hist(data) # na.omit(out) # to drop baseline NA values # #> # A tibble: 345 x 9 # #> keyword date type control object locations doi doi_abnorm quantile # #> # #> 1 microsoft 2011-01-01 score_obs 1 1 us_states 0.919 0.0330 0.991 # #> ... # #> 10 microsoft 2011-04-01 score_obs 1 1 us_states 0.909 0.0171 0.886 # #> # ... with 335 more rows ## ---- eval = FALSE------------------------------------------------------------ # data <- export_score(object = 1, locations = countries) # data <- dplyr::filter(data, keyword == "facebook" & lubridate::year(date) >= 2018) # # use 2018 as baseline to compute abnormal changes in 2019 # out <- get_abnorm_hist(data) # plot_bar(out) ## ---- eval = FALSE------------------------------------------------------------ # data <- export_score(keyword = "facebook", locations = "DE") # out <- get_abnorm_hist(data) # plot_ts(out) # # data <- export_doi(keyword = "coca cola", locations = "countries") # out <- get_abnorm_hist(data) # plot_box(out) ## ---- eval = FALSE------------------------------------------------------------ # # computation seasonally adjusted ---------------------------------------------- # search_score <- ts(data$hits, frequency = 12) # fit <- stl(search_score, s.window = "period") # trend <- fit$time.series[, "trend"] # # computation trend only ------------------------------------------------------- # search_score <- ts(data$hits, frequency = 12) # fit <- stl(search_score, s.window = "period") # seasad <- forecast::seasadj(fit) ## ---- eval = FALSE------------------------------------------------------------ # # adapt export and plot options ------------------------------------------------ # data_score <- export_score(keyword) # data_voi <- export_voi(keyword) # data_doi <- export_doi(keyword, type = "obs") # # plot_bar(data_score, type = "obs") # plot_ts(data_voi, type = "sad") # plot_box(data_doi, type = "trd") # plot_voi_doi(data_voi, data_doi, type = "obs") # # get_abnorm_hist(data_voi, type = "obs") ## ---- eval = FALSE------------------------------------------------------------ # # adapt export and plot options ------------------------------------------------ # data_voi <- export_voi(keyword) # data_doi <- export_doi(keyword, measure = "gini") # # plot_ts(data_doi, measure = "gini") # plot_box(data_doi, measure = "hhi") # plot_voi_doi(data_voi, data_doi, measure = "entropy") # # get_abnorm_hist(data_doi, measure = "hhi") ## ---- eval = FALSE------------------------------------------------------------ # # change locations ------------------------------------------------------------- # download_control(control = 1, locations = us_states) # download_object(object = list(1, 2), locations = us_states) # download_mapping(control = 1, object = 2, locations = us_states) # compute_score(control = 1, object = 2, locations = us_states) # compute_doi(control = 1, object = list(1, 2), locations = "us_states") ## ---- eval = FALSE------------------------------------------------------------ # add_locations(c("AT", "CH", "DE"), type = "dach") # #> Successfully created new location set dach (AT, CH, DE). # data <- export_score(keyword = "coca cola", locations = dach) # dplyr::count(data, location) # #> # A tibble: 3 x 2 # #> location n # #> # #> 1 AT 127 # #> 2 CH 127 # #> 3 DE 127