Annotated Cleaning Documentation.Rmd

---
title: "Psycorona - Data Cleaning Documentation"
subtitle: "Step by step description" 
author: "PsyCorona: Max, Jannis & Ben"
date: "3/30/2020"
output:
  html_document: 
    code_folding: hide
    mathjax: default
    theme: yeti
    toc: yes
    toc_float: yes
editor_options:
  chunk_output_type: console
---


<style type="text/css">
.main-container {
  max-width: 1300px;
  margin-left: auto;
  margin-right: auto;
}
.table {
  margin-left:auto; 
  margin-right:auto;
}
</style>


```{r setup, include=FALSE}
# R Studio Clean-Up
  cat("\014") # clear console
  rm(list=ls()) # clear workspace
  gc # garbage collector
  
# Install and Load Packages
  # if(!require(pacman)) install.packages("pacman")
  # require(pacman)
  # pacman::p_load(psych, ggplot2, ggthemes, haven, data.table, dplyr, tidyr, Hmisc, mada,
  #                knitr, kableExtra, naniar, stats, readxl, matrixStats, ISOcodes, pander,
  #                Scale, haven, lubridate, naniar, stats)
lib <- c("psych", "ggplot2", "ggthemes", "haven", "data.table", "dplyr", "tidyr", "Hmisc", "mada", 
         "knitr", "kableExtra", "naniar", "stats", "readxl", "matrixStats", "ISOcodes", "pander", "lubridate")

# "Scale"

invisible(lapply(lib, library, character.only = TRUE))  
lapply(lib, library, character.only = TRUE)
rm(lib)  

# Load Custom Packages  
  source("./scripts/functions/fun.panel.R")
  source("./scripts/functions/themes.R")
  source("./scripts/functions/dictionary_functions.R")
  source("./scripts/functions/recode_if.R")

# Markdown Options
  knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file()) # set working directory
  knitr::opts_knit$get("root.dir") # check working directory
  options(scipen = 999, digits = 4, width = 400) #removes scientific quotation
  #knitr::opts_chunk$set(echo = TRUE, cache = F, cache.path = rprojroot::find_rstudio_root_file('cache/')) # cache settings
  knitr::knit_hooks$set(
   error = function(x, options) {
     paste('\n\n<div class="alert alert-danger">',
           gsub('##', '\n', gsub('^##\ Error', '**Error**', x)),
           '</div>', sep = '\n')
   },
   warning = function(x, options) {
     paste('\n\n<div class="alert alert-warning">',
           gsub('##', '\n', gsub('^##\ Warning:', '**Warning**', x)),
           '</div>', sep = '\n')
   },
   message = function(x, options) {
     paste('\n\n<div class="alert alert-info">',
           gsub('##', '\n', x),
           '</div>', sep = '\n')
   }
  )
  htmltools::tagList(rmarkdown::html_dependency_font_awesome())

# Global Chunk Options
  knitr::opts_chunk$set(echo = TRUE)
```

Note. Boxplots display the interquartile range (IQR, center box), and the whiskers extend 1.5*IQR from the lower and upper hinge. The white point indicates the mean and the white center line indicates the median.   

<br/>

## **Import Data**
In a first step we import the raw Qualtrics data, which was downloaded as an SPSS file.   
### Baseline
```{r LoadRawBase, echo=T, warning=F, message=F}
# Reset working directory to folder current file is saved in
#setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

# Import RuG Snowball
dt0RawRUG <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Baseline.\\-.RUG.General", full.names = TRUE, ignore.case = TRUE))
dt0RawRUG$source <- "RUG"

# RUG Representative sample
dt0RawRepRUG <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Baseline.\\-.RUG.Representative", full.names = TRUE, ignore.case = TRUE))
dt0RawRepRUG$source <- "Rep RUG"
dt0RawRepRUG$country <- dt0RawRepRUG$country_new #fixed here but later in MTurk

# NYU Snowball
dt0RawNYUAD <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Baseline.\\-.NYUAD.General", full.names = TRUE, ignore.case = TRUE))
dt0RawNYUAD$source <- "NYU-AD"

# NYU representative
dt0RawRepNYUAD <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Baseline.\\-.NYUAD.Representative", full.names = TRUE, ignore.case = TRUE))
dt0RawRepNYUAD$source <- "Rep NYU-AD"

# Iranian data
dt0RawIran <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Baseline.\\-.Iran", full.names = TRUE, ignore.case = TRUE))
dt0RawIran$source <- "Iran"

# How Nuts are the Dutch data
dt0RawHNATD <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "HNDPsyC19", full.names = TRUE, ignore.case = TRUE))
dt0RawHNATD$source <- "HNATD"

# Flycatcher data
dt0RawFly <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Flycatcher", full.names = TRUE, ignore.case = TRUE))
   dt0RawFly$source <- "Fly"

# Chinese data
dt0RawChina <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "China", full.names = TRUE, ignore.case = TRUE))
   dt0RawChina$source <- "Rep China"

# prepare dfs before merge (basically naming variables correct)
notRug <- dt0RawRUG %>%
  dplyr::select_if(!(names(dt0RawRUG) %in% names(dt0RawNYUAD)))
notNyu <- dt0RawNYUAD %>%
  dplyr::select_if(!(names(dt0RawNYUAD) %in% names(dt0RawRUG)))
notRepRug <- dt0RawRepRUG %>%
  dplyr::select_if(!(names(dt0RawRepRUG) %in% names(dt0RawNYUAD)))
notRepNYU <- dt0RawRepNYUAD %>%
  dplyr::select_if(!(names(dt0RawRepNYUAD) %in% names(dt0RawNYUAD)))
names(notRug)
names(notNyu)
cat("Looks good as non-overlapping variables pertain to the political items"); rm(notRug, notNyu)
names(notRepRug)
names(notRepNYU)
cat("Looks good as country_new is calculated by Qualtrics"); rm(notRepRug, notRepNYU)

# check dataset from Iran
notRepIran <- dt0RawIran %>%
  dplyr::select_if(!(names(dt0RawIran) %in% names(dt0RawNYUAD)))
names(notRepIran)
cat("All variables have theo correct names in the Iranian dataset"); rm(notRepIran)

# check dataset from HNATD
notRepHNATD <- dt0RawHNATD %>%
  dplyr::select_if(!(names(dt0RawHNATD) %in% names(dt0RawNYUAD)))
names(notRepHNATD)
cat("Variables that do not match are no problem"); rm(notRepHNATD)

# check dataset from Flycatcher
notRepFly <- dt0RawFly %>%
  dplyr::select_if(!(names(dt0RawFly) %in% names(dt0RawNYUAD)))
names(notRepFly)
cat("Variables that do not match are no problem"); rm(notRepFly)

# check dataset from China
notRepChina <- dt0RawChina %>%
  dplyr::select_if(!(names(dt0RawChina) %in% names(dt0RawNYUAD)))
names(notRepChina)
cat("Variables that do not match are no problem"); rm(notRepChina)

# merge adn fill missing
dt0Raw <- plyr::rbind.fill(dt0RawRUG, dt0RawNYUAD, dt0RawRepNYUAD, dt0RawRepRUG, dt0RawIran, dt0RawHNATD, dt0RawFly, dt0RawChina)

rm(dt0RawRUG, dt0RawNYUAD, dt0RawRepNYUAD, dt0RawRepRUG, dt0RawIran, dt0RawHNATD)
```

The raw data set includes `r length(dt0Raw)` variables for `r nrow(dt0Raw)` cases.   

### Recontacts
```{r LoadRawRec, echo=T, warning=F, message=F}
# Import Wave 1
  dt0w1 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.1...M", full.names = TRUE, ignore.case = TRUE))
  dt0w2 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.2...A", full.names = TRUE, ignore.case = TRUE))
  dt0w3 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.3", full.names = TRUE, ignore.case = TRUE)) 
  dt0w4 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.4", full.names = TRUE, ignore.case = TRUE)) 
  dt0w5 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.5", full.names = TRUE, ignore.case = TRUE)) 
  dt0w6 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.6", full.names = TRUE, ignore.case = TRUE)) 
  dt0w7 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.7", full.names = TRUE, ignore.case = TRUE)) 
  dt0w8 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.8", full.names = TRUE, ignore.case = TRUE)) 
  dt0w9 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.9", full.names = TRUE, ignore.case = TRUE)) 
  dt0w10 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.10", full.names = TRUE, ignore.case = TRUE)) 
  dt0w11 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.11", full.names = TRUE, ignore.case = TRUE)) 
  dt0w12 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.12", full.names = TRUE, ignore.case = TRUE)) 
  dt0w13 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.13", full.names = TRUE, ignore.case = TRUE)) 
  dt0w14 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.14", full.names = TRUE, ignore.case = TRUE)) 
  dt0w15 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.15", full.names = TRUE, ignore.case = TRUE)) 
  dt0w16 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.16", full.names = TRUE, ignore.case = TRUE)) 
  dt0w17 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.17", full.names = TRUE, ignore.case = TRUE)) 
  dt0w18 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.18", full.names = TRUE, ignore.case = TRUE)) 
  dt0w19 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.19", full.names = TRUE, ignore.case = TRUE)) 
  dt0w20 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.20", full.names = TRUE, ignore.case = TRUE)) 
  dt0w21 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.21", full.names = TRUE, ignore.case = TRUE)) 
  dt0w22 <- haven::read_spss(dir("data/collab data/Shared/Data/raw data", pattern = "Wave.22", full.names = TRUE, ignore.case = TRUE)) 
  # all studies were redownloaded on 28/10/2021
  
# prepare dfs before merge (basically naming variables correct)
notBasew1 <- dt0w1 %>%
  dplyr::select_if(!(names(dt0w1) %in% names(dt0Raw)))
notBasew2 <- dt0w2 %>%
  dplyr::select_if(!(names(dt0w2) %in% names(dt0Raw)))
notBasew3 <- dt0w3 %>%
  dplyr::select_if(!(names(dt0w3) %in% names(dt0Raw)))
notBasew4 <- dt0w4 %>%
  dplyr::select_if(!(names(dt0w4) %in% names(dt0Raw)))
notBasew5 <- dt0w5 %>%
  dplyr::select_if(!(names(dt0w5) %in% names(dt0Raw)))
notBasew6 <- dt0w6 %>%
  dplyr::select_if(!(names(dt0w6) %in% names(dt0Raw)))
notBasew7 <- dt0w7 %>%
  dplyr::select_if(!(names(dt0w7) %in% names(dt0Raw)))
notBasew8 <- dt0w8 %>%
  dplyr::select_if(!(names(dt0w8) %in% names(dt0Raw)))
notBasew9 <- dt0w9 %>%
  dplyr::select_if(!(names(dt0w9) %in% names(dt0Raw)))
notBasew10 <- dt0w10 %>%
  dplyr::select_if(!(names(dt0w10) %in% names(dt0Raw)))
notBasew11 <- dt0w11 %>%
  dplyr::select_if(!(names(dt0w11) %in% names(dt0Raw)))
notBasew12 <- dt0w12 %>%
  dplyr::select_if(!(names(dt0w12) %in% names(dt0Raw)))
notBasew13 <- dt0w13 %>%
  dplyr::select_if(!(names(dt0w13) %in% names(dt0Raw)))
notBasew14 <- dt0w14 %>%
  dplyr::select_if(!(names(dt0w14) %in% names(dt0Raw)))
notBasew15 <- dt0w15 %>%
  dplyr::select_if(!(names(dt0w15) %in% names(dt0Raw)))
notBasew16 <- dt0w16 %>%
  dplyr::select_if(!(names(dt0w16) %in% names(dt0Raw)))
notBasew17 <- dt0w17 %>%
  dplyr::select_if(!(names(dt0w17) %in% names(dt0Raw)))
notBasew18 <- dt0w18 %>%
  dplyr::select_if(!(names(dt0w18) %in% names(dt0Raw)))
notBasew19 <- dt0w19 %>%
  dplyr::select_if(!(names(dt0w19) %in% names(dt0Raw)))
notBasew20 <- dt0w20 %>%
  dplyr::select_if(!(names(dt0w20) %in% names(dt0Raw)))
notBasew21 <- dt0w21 %>%
  dplyr::select_if(!(names(dt0w21) %in% names(dt0Raw)))
notBasew22 <- dt0w22 %>%
  dplyr::select_if(!(names(dt0w22) %in% names(dt0Raw)))

cat("Missmatch between wave 1 and baseline:")
names(notBasew1) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Checked and all mismatches were added later!"); rm(notBasew1)

cat("Missmatch between wave 2 and baseline:")
names(notBasew2) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Checked and all mismatches were added later!"); rm(notBasew2)

cat("Missmatch between wave 3 and baseline:")
names(notBasew3) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Checked and all mismatches were added later!"); rm(notBasew3)

cat("Missmatch between wave 4 and baseline:")
names(notBasew4) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Checked and all mismatches were added later!"); rm(notBasew4)

cat("Missmatch between wave 5 and baseline:")
names(notBasew5) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Still needs checking"); rm(notBasew5)

cat("Missmatch between wave 6 and baseline:")
names(notBasew6) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Still needs checking"); rm(notBasew6)

cat("Missmatch between wave 7 and baseline:")
names(notBasew7) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Still needs checking"); rm(notBasew7)

cat("Missmatch between wave 8 and baseline:")
names(notBasew8) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Still needs checking"); rm(notBasew8)

cat("Missmatch between wave 9 and baseline:")
names(notBasew9) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Still needs checking"); rm(notBasew9)

cat("Missmatch between wave 10 and baseline:")
names(notBasew10) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Still needs checking"); rm(notBasew10)

cat("Missmatch between wave 11 and baseline:")
names(notBasew11) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("fail01 is twice in there"); rm(notBasew11)

cat("Missmatch between wave 12 and baseline:")
names(notBasew12) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Looks good"); rm(notBasew12)

cat("Missmatch between wave 13 and baseline:")
names(notBasew13) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Looks good"); rm(notBasew13)

cat("Missmatch between wave 14 and baseline:")
names(notBasew14) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Looks good"); rm(notBasew14)

cat("Missmatch between wave 15 and baseline:")
names(notBasew15) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Looks good"); rm(notBasew15)

cat("Missmatch between wave 16 and baseline:")
names(notBasew16) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Looks good"); rm(notBasew16)

cat("Missmatch between wave 17 and baseline:")
names(notBasew17) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Looks good"); rm(notBasew17)

cat("Missmatch between wave 18 and baseline:")
names(notBasew18) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Looks good"); rm(notBasew18)

cat("Missmatch between wave 19 and baseline:")
names(notBasew19) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Looks good"); rm(notBasew19)

cat("Missmatch between wave 20 and baseline:")
names(notBasew20) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Looks good"); rm(notBasew20)

cat("Missmatch between wave 21 and baseline:")
names(notBasew21) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Looks good"); rm(notBasew21)

cat("Missmatch between wave 22 and baseline:")
names(notBasew22) %>% # get missmatch names
  data.frame()%>%
  filter(!grepl("t_",.), #filter timer
         !grepl("_DO_",.), #filter display order
         !grepl("_Count",.), #filter Click Counts
         !grepl("_Submit",.)) #filter Submit
cat("Looks good"); rm(notBasew22)
```

## **Data Quality**   
### Baseline
#### Filter: Preview Responses
Filter the Preview responses.
```{r preview, echo=T, warning=F, message=F}
# flag Preview Responses
# labelled slow
  # dt1Preview <- dt0Raw %>%
  #   mutate(FilterPreview = labelled(ifelse(Status == 0,0,1),
  #                                   labels = c(preview = 1), label="Filter: survey preview response"))
# not labelled fast
  dt1Preview <- dt0Raw %>%
    mutate(FilterPreview = ifelse(Status == 0,0,1))
# dt0Raw$FilterPreview <- as.numeric(!dt0Raw$Status == 0)
```


#### Filter: Survey Progress (drop out)  
<!-- https://cran.r-project.org/web/packages/naniar/vignettes/naniar-visualisation.html -->

Inspecting missing data in the items.

```{r Missing, echo=T, warning=F, message=F}
# Table: Missing Data per item
dt1Preview %>%
  dplyr::select(-starts_with("t_"), -starts_with("Pol")) %>% #drop timers and Political orientation (because of translation missingness)
  dplyr::select_if(~sum(is.na(.)) > 0) %>% # remove all variables that have no missingess
  naniar::miss_var_summary(.) %>% # by variable summary of missingness proportion
  DT::datatable(.,
                colnames = c("Variable", "Number Missing", "Percentage Missing"),
                filter = 'top',
                extensions = 'Buttons',
                options = list(
                  columnDefs = list(list(className = 'dt-center')),
                  #autoWidth = TRUE,
                  dom = 'Bfrtlip',
                  buttons = c('copy', 'csv', 'excel', 'pdf', 'print'))) %>%
  DT::formatRound('pct_miss', digits = 2)
  
# Plot: Missing Data per item
dt1Preview %>%
  dplyr::select(-starts_with("t_"), -starts_with("Pol")) %>% #drop timers and Political orientation (because of translation missingness)
  dplyr::select_if(~sum(is.na(.)) > 0) %>% # remove all variables that have no missingess
  naniar::gg_miss_var(.) # visualize by variable summary of missingness proportion

# Plot: Missing Data cumulative
dt1Preview %>%
  dplyr::select(-starts_with("t_"), -starts_with("Pol")) %>% #drop timers and Political orientation (because of translation missingness)
  dplyr::select_if(~sum(is.na(.)) > 0) %>% # remove all variables that have no missingess
  naniar::gg_miss_var_cumsum(.) # missingness development over survey

# Co-occurences of missingess - too many variables
#dt0Raw %>%
#  dplyr::select(-starts_with("t_"), -starts_with("Pol")) %>% #drop timers and Political orientation (because of translation missingness)
#  dplyr::select_if(~sum(is.na(.)) > 0) %>% # remove all variables that have no missingess
#  naniar::gg_miss_upset(., nsets = n_var_miss(.)) # visualize missingess co-occurences
rm(dt0Raw, dt0RawChina, dt0RawFly)

# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt1Preview %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)

progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2) # percent of missing data with current cut-off criterion

# plot histogram and missing (COMMENTED FOR TIME)
# ggplot(data=progressFilter, aes(x=Progress, fill=out)) +
#   geom_histogram(bins=50,
#                  alpha=.6) +
#   geom_vline(xintercept = progressCutOff, 
#              color = "darkred",
#              linetype = "longdash") +
#   geom_text(aes(x=progressCutOff, label=paste0("Progress cut-off: ",progressCutOff,"%\n"), y=Inf), 
#             hjust = 1,
#             colour="darkred", 
#             angle=90) +
#   geom_text(aes(x=progressCutOff, label=paste0("\ndata loss: ",progressCutOffPerc,"%"), y=Inf), 
#             hjust = 1,
#             colour="darkred", 
#             angle=90) +
#   #scale_x_continuous(breaks = seq(0, 100,3)) +
#   scale_fill_manual(values=c("darkgrey","darkred")) +
#   labs(title = "Histogram: Survey Progress", 
#        x = "Survey Progress [Percent completed]",
#        y = "Frequency Count") +
#   theme_Publication() +
#   theme(legend.position = "none")

# flag anyone with less than 5 minutes survey duration
# labelled slow
  # dt2Progress <- dt1Preview %>%
  #   mutate(FilterProgress = labelled(ifelse(Progress < progressCutOff,1,0),
  #                                labels = c(`consent` = 1), label="Filter: Did not see debriefing"))
# not labelled fast
  dt2Progress <- dt1Preview %>%
    mutate(FilterProgress = ifelse(Progress < progressCutOff,1,0))

rm(progressFilter, progressCutOff, progressCutOffPerc)
```


#### Filter: Short Duration on Survey   
Filter survey responses that were shorter than 5 minutes.   
```{r Duration, echo=T, warning=F, message=F}
# truncate data:
tOutlierHigh <- dt2Progress %>%
  dplyr::select(Duration__in_seconds_) %>%
  filter(Duration__in_seconds_<=stats::median(Duration__in_seconds_)+stats::mad(Duration__in_seconds_)*3.5) %>%
  mutate(Minutes = Duration__in_seconds_/60)

# set time cut-off criterion:
tCutOff <- 5 #cut-off criterion in minutes
# CJ: This might be a bit strict, I suspect that I completed it in <10 minutes. 
tCutOffPerc <- round(sum(tOutlierHigh$Minutes<tCutOff)/nrow(dt2Progress)*100,2) # percent of missing data with current cut-off criterion
tOutlierHigh$out <- tOutlierHigh$Minutes < tCutOff

# plot histogram and missing (COMMENTED FOR TIME)
# ggplot(data=tOutlierHigh, aes(x=Minutes, fill=out)) +
#   geom_histogram(bins=round(max(tOutlierHigh$Minutes),0),
#                  alpha=.6) +
#   geom_vline(xintercept = tCutOff, 
#              color = "darkred",
#              linetype = "longdash") +
#   geom_text(aes(x=tCutOff, label=paste0("time cut-off: ",tCutOff," Minutes\n"), y=Inf), 
#             hjust = 1,
#             colour="darkred", 
#             angle=90) +
#   geom_text(aes(x=tCutOff, label=paste0("\ndata loss: ",tCutOffPerc,"%"), y=Inf), 
#             hjust = 1,
#             colour="darkred", 
#             angle=90) +
#   scale_x_continuous(breaks = seq(0, round(max(tOutlierHigh$Minutes),0), 5)) +
#   scale_fill_manual(values=c("darkgrey","darkred")) +
#   labs(title = "Truncated Histogram: Survey Duration", 
#        x = "Duration [Mintues]",
#        y = "Frequency Count",
#        caption = "Notes:
#        (1) Truncated: all participants who took less time than Median+3.5*MAD
#        (2) Each bin represents one Minute") +
#   theme_Publication() +
#   theme(legend.position = "none")

# flag anyone with less than 5 minutes survey duration
# labelled slow
  # dt3Time <- dt2Progress %>%
  #   mutate(FilterTime = labelled(ifelse(Duration__in_seconds_ > tCutOff*60,0,1),
  #                                labels = c(`extremely quick` = 1), label="Filter: Took less than 5 minutes on survey"))
# not labelled fast
  dt3Time <- dt2Progress %>%
    mutate(FilterTime = ifelse(Duration__in_seconds_ > tCutOff*60,0,1),
           FilterTimeQualtrics = ifelse(Duration__in_seconds_ > 365,0,1))


rm(tOutlierHigh, tCutOff, tCutOffPerc, dt2Progress)
# flag anyone with less than 5 minutes survey duration
# labelled slow
  # dt2Time <- dt1Preview %>%
  #   mutate(FilterTime = labelled(ifelse(Duration__in_seconds_ > 300,0,1),
  #                                labels = c(`extremely quick` = 1), label="Filter: Took less than 5 minutes on survey"))
# not labelled fast
  # dt2Time <- dt1Preview %>%
  #   mutate(ifelse(Duration__in_seconds_ > 300,0,1))
rm(dt1Preview)
```


#### Filter: Straightliners   
Filter participants, who have straightlined on the job insecurity scale, which includes a reverse coded item. We only flag people who straightlined outside the median categories because all "neither agree nor disagree" might be meaningful response.   
```{r Straightliner, echo=T, warning=F, message=F}
# CheckMissingness pattern
naniar::gg_miss_upset(dt3Time %>%
  dplyr::select(ResponseId, jbInsec01, jbInsec02, jbInsec03) %>%
  na_if(., -99) # all -99 into <NA>
)

# isolate respondents who have straightlined outside a the median categories (b/c all "neither agree nor disagree" might be meaningful response) 
jobinsecRed <- dt3Time %>%
  dplyr::select(ResponseId, jbInsec01, jbInsec02, jbInsec03) %>%
  na_if(., -99) %>% # all -99 into <NA>
  na.omit() %>% # remove people who have missing data on one of the three items
  mutate(mean = rowMeans(dplyr::select(., c("jbInsec01", "jbInsec02", "jbInsec03"))), 
         sd = matrixStats::rowSds(as.matrix(dplyr::select(., c("jbInsec01", "jbInsec02", "jbInsec03"))))) %>% # calculate row-means and row-sds 
  filter(sd == 0, mean != 0)

# flag anyone who straightlined on job insecurity
# labelled slow
  # dt4Straightliner <- dt3Time %>%
  #   mutate(FilterStraightliner = labelled(ifelse(!ResponseId %in% jobinsecRed$ResponseId,0,1),
  #                                         labels = c(straightliner = 1), label="Filter: straightliner on Job Insecurity"))
# not labelled fast
  dt4Straightliner <- dt3Time %>%
    mutate(FilterStraightliner = ifelse(!ResponseId %in% jobinsecRed$ResponseId,0,1))

rm(jobinsecRed, dt3Time)
```

### Recontacts
#### Filter w1: Survey Progress
```{r recSurvProgw1, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent (check each wave for correct number)
progressFilter <- dt0w1 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w1 <- dt0w1 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w2: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 95 #cut-off criterion in percent (check each wave for correct number)
progressFilter <- dt0w2 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w2 <- dt0w2 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w3: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 95 #cut-off criterion in percent (NEEDS CHANGING; NO DROPOUTS YET)
progressFilter <- dt0w3 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w3 <- dt0w3 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w4: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 95 #cut-off criterion in percent (NEEDS CHANGING; NO DROPOUTS YET)
progressFilter <- dt0w4 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w4 <- dt0w4 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w5: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 95 #cut-off criterion in percent (NEEDS CHANGING; NO DROPOUTS YET)
progressFilter <- dt0w5 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w5 <- dt0w5 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w6: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 95 #cut-off criterion in percent (NEEDS CHANGING; NO DROPOUTS YET)
progressFilter <- dt0w6 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w6 <- dt0w6 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w7: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w7 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w7 <- dt0w7 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w8: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w8 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w8 <- dt0w8 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w9: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w9 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w9 <- dt0w9 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w10: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w10 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w10 <- dt0w10 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w11: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w11 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w11 <- dt0w11 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w12: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w12 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w12 <- dt0w12 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w13: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w13 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w13 <- dt0w13 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w14: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w14 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w14 <- dt0w14 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w15: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w15 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w15 <- dt0w15 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w16: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w16 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w16 <- dt0w16 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w17: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w17 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w17 <- dt0w17 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w18: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w18 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w18 <- dt0w18 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w19: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w19 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w19 <- dt0w19 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w20: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w20 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w20 <- dt0w20 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w21: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w21 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w21 <- dt0w21 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

#### Filter w22: Survey Progress
```{r recSurvProg, echo=T, warning=F, message=F}
# set time cut-off criterion:
progressCutOff <- 97 #cut-off criterion in percent
progressFilter <- dt0w22 %>%
  dplyr::select(Progress) %>%
  mutate(out = Progress < progressCutOff)
table(progressFilter$out)
(progressCutOffPerc <- round(sum(progressFilter$out)/nrow(progressFilter)*100,2)) # percent of missing data with current cut-off criterion

# throw them out before the merge
  dt0w22 <- dt0w22 %>%
  filter(progressFilter$out == F)

rm(progressFilter, progressCutOff, progressCutOffPerc)
```

### Recoded Items   
Re-coding reverse coded items and the Qualtrics language codes.
```{r recode, echo=T, warning=F, message=F}
# Recoded Items
dt5newVars <- dt4Straightliner %>%
  mutate(jbInsec02_R = labelled(recode(as.numeric(jbInsec02), `-2` = 2, `-1` = 1, `0` = 0, `1` = -1, `2` = -2, `-99` = -99),
                                labels = NULL, label="Job Insecurity 02 (re-coded)"),
         disc03_R = labelled(recode(as.numeric(disc03), `-2` = 2, `-1` = 1, `0` = 0, `1` = -1, `2` = -2),
                            labels = NULL, label="Discontent 03 (re-coded)"),
         bor03_R = labelled(recode(as.numeric(bor03), `-2` = 2, `-1` = 1, `0` = 0, `1` = -1, `2` = -2),
                            labels = NULL, label="Boredom 03 (re-coded)"))
# Language
# Import Qualtrics Language Codes
qualtricsLanguage <- read_excel("data/collab data/Shared/Data/raw data/qualtricsLanguageCodes.xlsx")
dt5newVars <- merge(x=dt5newVars, y=qualtricsLanguage, by="Q_Language", all.x=TRUE)
rm(qualtricsLanguage, dt4Straightliner)
```

### Filter Participants
#### "Bad" ResponseIds
```{r filtBefCalc, echo=T, warning=F, message=F}
# RUG Representative
# Qualtrics filter
  tmp <- dt5newVars %>%
    filter(gc == 1,
           source == "Rep RUG")
# which people do we exclude?
  exclRepRUG <- tmp %>%
  filter((FilterPreview == 1 |
         FilterProgress == 1 |
         FilterTime == 1 |
         FilterStraightliner == 1),
         source == "Rep RUG")
  # export for qualtrics
    exclRepRUGRespId <- exclRepRUG$ResponseId
    namExcl <- paste0("data/cleaned data/Exclude RUG RespId ", format(Sys.time(), format = "%F %H-%M %Z"),".csv")
    write.csv(exclRepRUGRespId, file = namExcl); rm(namExcl)
# which people do we include?
  inclRepRUG <- dt5newVars %>%
  filter(gc == 1,
         FilterPreview == 0,
         FilterProgress == 0,
         FilterTime == 0,
         FilterStraightliner == 0,
         source == "Rep RUG")
  # which of the included people have duplicate IPs
    duplEmailRepRug <- inclRepRUG %>%
      filter(duplicated(ICRec_1_TEXT),
             ICRec_1_TEXT != "")%>%
      transmute(ResponseId = ResponseId,
             E_mail = ICRec_1_TEXT)
  
# which people do we include? (no straightlining exclusion)
  inclRepRUG2 <- dt5newVars %>%
  filter(gc == 1,
         FilterPreview == 0,
         FilterProgress == 0,
         FilterTime == 0,
         source == "Rep RUG")
rm(tmp, exclRepRUG, exclRepRUGRespId, inclRepRUG, inclRepRUG2)

# NYU Representative
# Qualtrics filter
  tmp <- dt5newVars %>%
    filter(gc == 1,
           source == "Rep NYU-AD")
# which people do we exclude?
  exclRepNYU <- tmp %>%
  filter((FilterPreview == 1 |
         FilterProgress == 1 |
         FilterTime == 1 |
         FilterStraightliner == 1),
         source == "Rep NYU-AD")
  # export for qualtrics
    exclRepNYURespId <- exclRepNYU$ResponseId
    namExcl <- paste0("data/cleaned data/Exclude NYU RespId ", format(Sys.time(), format = "%F %H-%M %Z"),".csv")
    write.csv(exclRepNYURespId, file = namExcl); rm(namExcl)
# which people do we include?
  inclRepNYU <- dt5newVars %>%
  filter(gc == 1,
         FilterPreview == 0,
         FilterProgress == 0,
         FilterTime == 0,
         FilterStraightliner == 0,
         source == "Rep NYU-AD")
  # which of the included people have duplicate IPs
    duplEmailRepNYU <- inclRepNYU %>%
      filter(duplicated(ICRec_1_TEXT),
             ICRec_1_TEXT != "")%>%
      transmute(ResponseId = ResponseId,
             E_mail = ICRec_1_TEXT)
# which people do we include? (no straightlining exclusion)
  inclRepNYU2 <- dt5newVars %>%
  filter(gc == 1,
         FilterPreview == 0,
         FilterProgress == 0,
         FilterTime == 0,
         source == "Rep NYU-AD")
rm(tmp, exclRepNYU, exclRepNYURespId, inclRepNYU, inclRepNYU2)
```

#### Filter Out
```{r filtBefCalc, echo=T, warning=F, message=F}
# TEMPORARY (the commented code should only be run after creating the coded_country; it serves to identify exclusion by country)
# throw out the ones that need to be excluded
  # tmp <- dt5newVars %>%
  # filter(FilterPreview == 0,
  #        FilterProgress == 0)
  # # how many people do we loose per country
  #   tmp <- tmp %>%
  #     group_by(coded_country) %>%
  #     summarise(outTime = sum(FilterTime),
  #               outStraight = sum(FilterStraightliner),
  #               total = n(),
  #               percOut = (outTime + outStraight)/ total)
  #   # save it
  #     write.csv(tmp,'data/cleaned data/Exclusion by Country.csv'); rm(tmp)
  # 

dt5newVars <- dt5newVars %>%
  filter(FilterPreview == 0,
         FilterProgress == 0,
         FilterTime == 0,
         FilterStraightliner == 0)

# prepare country before checking for test
  dt5newVars$countryAdj <- dt5newVars$country %>%
    tolower() %>% #make them lower case
    gsub(" ", "", ., fixed = TRUE) %>% # remove white space
    as.character() #make sure everything is characters

# prepare country before checking for test
  dt5newVars$relAdj <- dt5newVars$whatRel %>%
    tolower() %>% #make them lower case
    gsub(" ", "", ., fixed = TRUE) %>% # remove white space
    as.character() #make sure everything is characters

# remove test sessions
  strings <- c("^test", "test$", "^yasin", '^check') # add strings that need removing
  tmp <- dt5newVars %>%
    dplyr::select(countryAdj, ICRec_1_TEXT, deb, ResponseId)%>%
    dplyr::filter_all(any_vars(stringr::str_detect(., paste(strings, collapse = "|")))) #loop over cols
  dt5newVars <- dt5newVars[!(dt5newVars$ResponseId %in% tmp$ResponseId), ]; rm(tmp) # remove test Response IDs 
```

## **Merge Dataframes**
```{r merge}
# change column names for waves
  colnames(dt0w1) <- paste("w1", colnames(dt0w1), sep = "_")
  colnames(dt0w2) <- paste("w2", colnames(dt0w2), sep = "_")
  colnames(dt0w3) <- paste("w3", colnames(dt0w3), sep = "_")
  colnames(dt0w4) <- paste("w4", colnames(dt0w4), sep = "_")
  colnames(dt0w5) <- paste("w5", colnames(dt0w5), sep = "_")
  colnames(dt0w6) <- paste("w6", colnames(dt0w6), sep = "_")
  colnames(dt0w7) <- paste("w7", colnames(dt0w7), sep = "_")
  colnames(dt0w8) <- paste("w8", colnames(dt0w8), sep = "_")
  colnames(dt0w9) <- paste("w9", colnames(dt0w9), sep = "_")
  colnames(dt0w10) <- paste("w10", colnames(dt0w10), sep = "_")
  colnames(dt0w11) <- paste("w11", colnames(dt0w11), sep = "_")
  colnames(dt0w12) <- paste("w12", colnames(dt0w12), sep = "_")
  colnames(dt0w13) <- paste("w13", colnames(dt0w13), sep = "_")
  colnames(dt0w14) <- paste("w14", colnames(dt0w14), sep = "_")
  colnames(dt0w15) <- paste("w15", colnames(dt0w15), sep = "_")
  colnames(dt0w16) <- paste("w16", colnames(dt0w16), sep = "_")
  colnames(dt0w17) <- paste("w17", colnames(dt0w17), sep = "_")
  colnames(dt0w18) <- paste("w18", colnames(dt0w18), sep = "_")
  colnames(dt0w19) <- paste("w19", colnames(dt0w19), sep = "_")
  colnames(dt0w20) <- paste("w20", colnames(dt0w20), sep = "_")
  colnames(dt0w21) <- paste("w21", colnames(dt0w21), sep = "_")
  colnames(dt0w22) <- paste("w22", colnames(dt0w22), sep = "_")

# create correct ID for merge across waves from the cleaned datasets
  dt5newVars$mergeID <- as.character(dt5newVars$ResponseId)
  dt0w1$mergeID <- as.character(dt0w1$w1_ExternalReference) #add MTurk IDs but puh...
  dt0w2$mergeID <- as.character(dt0w2$w2_ExternalReference)
  dt0w3$mergeID <- as.character(dt0w3$w3_ExternalReference)
  dt0w4$mergeID <- as.character(dt0w4$w4_ExternalReference)
  dt0w5$mergeID <- as.character(dt0w5$w5_ExternalReference)
  dt0w6$mergeID <- as.character(dt0w6$w6_ExternalReference)
  dt0w7$mergeID <- as.character(dt0w7$w7_ExternalReference)
  dt0w8$mergeID <- as.character(dt0w8$w8_ExternalReference)
  dt0w9$mergeID <- as.character(dt0w9$w9_ExternalReference)
  dt0w10$mergeID <- as.character(dt0w10$w10_ExternalReference)
  dt0w11$mergeID <- as.character(dt0w11$w11_ExternalReference)
  dt0w12$mergeID <- as.character(dt0w12$w12_ExternalReference)
  dt0w13$mergeID <- as.character(dt0w13$w13_ExternalReference)
  dt0w14$mergeID <- as.character(dt0w14$w14_ExternalReference)
  dt0w15$mergeID <- as.character(dt0w15$w15_ExternalReference)
  dt0w16$mergeID <- as.character(dt0w16$w16_ExternalReference)
  dt0w17$mergeID <- as.character(dt0w17$w17_ExternalReference)
  dt0w18$mergeID <- as.character(dt0w18$w18_ExternalReference)
  dt0w19$mergeID <- as.character(dt0w19$w19_ExternalReference)
  dt0w20$mergeID <- as.character(dt0w20$w20_ExternalReference)
  dt0w21$mergeID <- as.character(dt0w21$w21_ExternalReference)
  dt0w22$mergeID <- as.character(dt0w22$w22_ExternalReference)
  
# How many people do not have a merge ID?
  # baseline
cat("How many people do we loose in the baseline due to missing merge ID:")
  sum(dt5newVars$mergeID == "")
  dt5newVars <- dt5newVars[dt5newVars$mergeID != "",]
  # wave 1
cat("How many people do we loose in wave 1 due to missing merge ID:")
  sum(dt0w1$mergeID == "")
  dt0w1 <- dt0w1[dt0w1$mergeID != "",] # due to the link being sent out and translator testing
  # wave 2
cat("How many people do we loose in wave 2 due to missing merge ID:")
  sum(dt0w2$mergeID == "")
  dt0w2 <- dt0w2[dt0w2$mergeID != "",] #translator testing
  # wave 3
cat("How many people do we loose in wave 3 due to missing merge ID:")
  sum(dt0w3$mergeID == "")
  dt0w3 <- dt0w3[dt0w3$mergeID != "",]
  # wave 4
cat("How many people do we loose in wave 4 due to missing merge ID:")
  sum(dt0w4$mergeID == "")
  dt0w4 <- dt0w4[dt0w4$mergeID != "",]
cat("How many people do we loose in wave 5 due to missing merge ID:")
  sum(dt0w5$mergeID == "")
  dt0w5 <- dt0w5[dt0w5$mergeID != "",]
cat("How many people do we loose in wave 6 due to missing merge ID:")
  sum(dt0w6$mergeID == "")
  dt0w6 <- dt0w6[dt0w6$mergeID != "",]
cat("How many people do we loose in wave 7 due to missing merge ID:")
  sum(dt0w7$mergeID == "")
  dt0w7 <- dt0w7[dt0w7$mergeID != "",]
cat("How many people do we loose in wave 8 due to missing merge ID:")
  sum(dt0w8$mergeID == "")
  dt0w8 <- dt0w8[dt0w8$mergeID != "",]
cat("How many people do we loose in wave 9 due to missing merge ID:")
  sum(dt0w9$mergeID == "")
  dt0w9 <- dt0w9[dt0w9$mergeID != "",]
cat("How many people do we loose in wave 10 due to missing merge ID:")
  sum(dt0w10$mergeID == "")
  dt0w10 <- dt0w10[dt0w10$mergeID != "",]
cat("How many people do we loose in wave 11 due to missing merge ID:")
  sum(dt0w11$mergeID == "")
  dt0w11 <- dt0w11[dt0w11$mergeID != "",]
cat("How many people do we loose in wave 12 due to missing merge ID:")
  sum(dt0w12$mergeID == "")
  dt0w12 <- dt0w12[dt0w12$mergeID != "",]
cat("How many people do we loose in wave 13 due to missing merge ID:")
  sum(dt0w13$mergeID == "")
  dt0w13 <- dt0w13[dt0w13$mergeID != "",]
cat("How many people do we loose in wave 14 due to missing merge ID:")
  sum(dt0w14$mergeID == "")
  dt0w14 <- dt0w14[dt0w14$mergeID != "",]
cat("How many people do we loose in wave 15 due to missing merge ID:")
  sum(dt0w15$mergeID == "")
  dt0w15 <- dt0w15[dt0w15$mergeID != "",]
cat("How many people do we loose in wave 16 due to missing merge ID:")
  sum(dt0w16$mergeID == "")
  dt0w16 <- dt0w16[dt0w16$mergeID != "",]
cat("How many people do we loose in wave 17 due to missing merge ID:")
  sum(dt0w17$mergeID == "")
  dt0w17 <- dt0w17[dt0w17$mergeID != "",]
cat("How many people do we loose in wave 18 due to missing merge ID:")
  sum(dt0w18$mergeID == "")
  dt0w18 <- dt0w18[dt0w18$mergeID != "",]
cat("How many people do we loose in wave 19 due to missing merge ID:")
  sum(dt0w19$mergeID == "")
  dt0w19 <- dt0w19[dt0w19$mergeID != "",]
cat("How many people do we loose in wave 20 due to missing merge ID:")
  sum(dt0w20$mergeID == "")
  dt0w20 <- dt0w20[dt0w20$mergeID != "",]
cat("How many people do we loose in wave 21 due to missing merge ID:")
  sum(dt0w21$mergeID == "")
  dt0w21 <- dt0w21[dt0w21$mergeID != "",]
cat("How many people do we loose in wave 22 due to missing merge ID:")
  sum(dt0w22$mergeID == "")
  dt0w22 <- dt0w22[dt0w22$mergeID != "",]

# merge (can also be dt0Raw but for now seperate)
  dt0All <- list(dt5newVars, dt0w1, dt0w2, dt0w3, dt0w4, dt0w5, dt0w6, dt0w7, dt0w8, dt0w9, dt0w10, dt0w11, dt0w12, dt0w13, dt0w14, dt0w15, dt0w16, dt0w17, dt0w18, dt0w19, dt0w20, dt0w21, dt0w22) %>% #merge multiple dfs at once
    Reduce(function(dtf1,dtf2) dplyr::left_join(dtf1,dtf2,by="mergeID"), .) #duplicates if full_join
  
# problems (weirdly there are some people in wave 1 without a matching response ID (no idea why...))
  tmp <- dplyr::anti_join(dt0w1, dt5newVars,by="mergeID"); rm(tmp)

# find mistakes if not the same length
cat("How many people do not have fitting emails or response IDs in wave 1:")
  nrow(dt0w1) - sum(dt0All$ICRec_1_TEXT == dt0All$w1_RecipientEmail, na.rm = T) #Emails
  nrow(dt0w1) - sum(dt0All$ResponseId == dt0All$w1_ExternalReference, na.rm = T) #Response IDs
  cat("We loose them because the weirdness above")
  
cat("How many people do not have fitting emails or response IDs in wave 2:")
  nrow(dt0w2) - sum(dt0All$ICRec_1_TEXT == dt0All$w2_RecipientEmail, na.rm = T)
  nrow(dt0w2) - sum(dt0All$ResponseId == dt0All$w2_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 3:")
  nrow(dt0w3) - sum(dt0All$ICRec_1_TEXT == dt0All$w3_RecipientEmail, na.rm = T)
  nrow(dt0w3) - sum(dt0All$ResponseId == dt0All$w3_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")
  
cat("How many people do not have fitting emails or response IDs in wave 4:")
  nrow(dt0w4) - sum(dt0All$ICRec_1_TEXT == dt0All$w4_RecipientEmail, na.rm = T)
  nrow(dt0w4) - sum(dt0All$ResponseId == dt0All$w4_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 5:")
  nrow(dt0w5) - sum(dt0All$ICRec_1_TEXT == dt0All$w5_RecipientEmail, na.rm = T)
  nrow(dt0w5) - sum(dt0All$ResponseId == dt0All$w5_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 6:")
  nrow(dt0w6) - sum(dt0All$ICRec_1_TEXT == dt0All$w6_RecipientEmail, na.rm = T)
  nrow(dt0w6) - sum(dt0All$ResponseId == dt0All$w6_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 7:")
  nrow(dt0w7) - sum(dt0All$ICRec_1_TEXT == dt0All$w7_RecipientEmail, na.rm = T)
  nrow(dt0w7) - sum(dt0All$ResponseId == dt0All$w7_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 8:")
  nrow(dt0w8) - sum(dt0All$ICRec_1_TEXT == dt0All$w8_RecipientEmail, na.rm = T)
  nrow(dt0w8) - sum(dt0All$ResponseId == dt0All$w8_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 9:")
  nrow(dt0w9) - sum(dt0All$ICRec_1_TEXT == dt0All$w9_RecipientEmail, na.rm = T)
  nrow(dt0w9) - sum(dt0All$ResponseId == dt0All$w9_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 10:")
  nrow(dt0w10) - sum(dt0All$ICRec_1_TEXT == dt0All$w10_RecipientEmail, na.rm = T)
  nrow(dt0w10) - sum(dt0All$ResponseId == dt0All$w10_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 11:")
  nrow(dt0w11) - sum(dt0All$ICRec_1_TEXT == dt0All$w11_RecipientEmail, na.rm = T)
  nrow(dt0w11) - sum(dt0All$ResponseId == dt0All$w11_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 12:")
  nrow(dt0w12) - sum(dt0All$ICRec_1_TEXT == dt0All$w12_RecipientEmail, na.rm = T)
  nrow(dt0w12) - sum(dt0All$ResponseId == dt0All$w12_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 13:")
  nrow(dt0w13) - sum(dt0All$ICRec_1_TEXT == dt0All$w13_RecipientEmail, na.rm = T)
  nrow(dt0w13) - sum(dt0All$ResponseId == dt0All$w13_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 14:")
  nrow(dt0w14) - sum(dt0All$ICRec_1_TEXT == dt0All$w14_RecipientEmail, na.rm = T)
  nrow(dt0w14) - sum(dt0All$ResponseId == dt0All$w14_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 15:")
  nrow(dt0w15) - sum(dt0All$ICRec_1_TEXT == dt0All$w15_RecipientEmail, na.rm = T)
  nrow(dt0w15) - sum(dt0All$ResponseId == dt0All$w15_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 16:")
  nrow(dt0w16) - sum(dt0All$ICRec_1_TEXT == dt0All$w16_RecipientEmail, na.rm = T)
  nrow(dt0w16) - sum(dt0All$ResponseId == dt0All$w16_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 17:")
  nrow(dt0w17) - sum(dt0All$ICRec_1_TEXT == dt0All$w17_RecipientEmail, na.rm = T)
  nrow(dt0w17) - sum(dt0All$ResponseId == dt0All$w17_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 18:")
  nrow(dt0w18) - sum(dt0All$ICRec_1_TEXT == dt0All$w18_RecipientEmail, na.rm = T)
  nrow(dt0w18) - sum(dt0All$ResponseId == dt0All$w18_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 19:")
  nrow(dt0w19) - sum(dt0All$ICRec_1_TEXT == dt0All$w19_RecipientEmail, na.rm = T)
  nrow(dt0w19) - sum(dt0All$ResponseId == dt0All$w19_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 20:")
  nrow(dt0w20) - sum(dt0All$ICRec_1_TEXT == dt0All$w20_RecipientEmail, na.rm = T)
  nrow(dt0w20) - sum(dt0All$ResponseId == dt0All$w20_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 21:")
  nrow(dt0w21) - sum(dt0All$ICRec_1_TEXT == dt0All$w21_RecipientEmail, na.rm = T)
  nrow(dt0w21) - sum(dt0All$ResponseId == dt0All$w21_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

cat("How many people do not have fitting emails or response IDs in wave 22:")
  nrow(dt0w22) - sum(dt0All$ICRec_1_TEXT == dt0All$w22_RecipientEmail, na.rm = T)
  nrow(dt0w22) - sum(dt0All$ResponseId == dt0All$w22_ExternalReference, na.rm = T)
  cat("Nobody lost; emails are probably string problems")

rm(list=ls(pattern="dt0w"))

# make correct NAs
  dt0All <- dt0All %>%
    dplyr::mutate_all(na_if, -99)
 
dt5newVars <- dt0All
rm(dt0All)

```

## **Data Preparation**   
Note: For each of the scales we do an item analysis, and combine the items to the mean- (.m), and factor scores (.fa). We also centered (.c) and standardized (.z) the mean scores. Most of these items are not labelled for SPSS yet. For centering and standardizing to be accurate, we first filter the relevant criteria.

### Country
We currently have `r length(table(dt5newVars$country))` different free text country responses. Here we aim to consolidate them into one variable.
```{r Country, echo=T, warning=F, message=F}
# CJ: Just as a benchmark, geolocate IP address. Can always use this if a free 
# CJ: text country response does not resolve 
# MA: should only be used for those that agree; also VPNs are problematic

# CJ: This is a slightly cleaner, faster, and more informative regex based approach.
# CJ: The function and dictionary (based on the code below, some errors corrected) 
# CJ: is in

#saveRDS(dt5newVars$country, file = "country.RDS")

source("./scripts/functions/dictionary_functions.R")

# sourcing seems to destroy everything
country_dict <- list('Afghanistan' = c('Afghanistan'),
                     'Albania' = c('albania', 'shkoder', '(?i)Albania'),
                     'Algeria' = c('(?i)Algrie', 'الجزاء و', 'ALGERIE',  'جزائر',   'Alger',  'الجزاء و ', 'الجزاير',  '🇩🇿', 'ALGÉRIE',  'البليدة',  'algeri', 'الجزاىر',  'الجوائر',    'Algeria', 'الحزائر',  'الجزائر', 'algérie', 'الجزاءر', 'Algérie', 'Algerie'),
                     'Algeria' = c('الجزاير (حمادي بومرداس)', 'الحزاءر'),
                     'Andorra' = c('(?i)andorra'),
                     'Argentina' = c('Αργεντινή', '(?i)αργεντινή', 'argentin.', 'Argentine', 'ARGENTINA', 'Argentina', '^arge$'),
                     'Armenia' = c('(?i)армения'),
                     'Australia' = c('tasmania', 'a.strali.', 'australi..', 'a.strlaia',  'austrija', '199'),
                     'Austria' = c('النمسا', 'áo', 'oostenrijk',  '(?i)Австрія', '(?i)autriche', 'Asturies', '(?i)austria', 'Austrija', 'Austria', 'sterreich'),
                     'Azerbaijan' = c('azerba', 'Azarbaycan',  'Azerbaycan', '(?i)Azerbaijan'),
                     'Bahrain' = c('бахре.н', 'bahrain', 'Bahrain'),
                     'Bangladesh' = c('bangladesh', 'Bangladesh', 'বাংলাদেশ'),
                     'Belarus' = c('(?i)Belarus', '(?i)беларусь'),
                     'Belgium' = c('βελγιο', 'belgien', 'Бельгия', 'b.lgica', 'belgi.', 'belg', 'belgique', 'бельгия', 'belgium'),
                     'Benin' = c('(?i)bénin'),
                     'Bosnia and Herzegovina' = c('Bosnia and Herzegovina', 'bosniaandherzegovina', 'bosnaihercegovina', 'Bosna i Hercegovina', "bih"),
                     'Botswana' = c('(?i)botswana'),
                     'Brazil' = c('brazil', 'riodejaneiro', 'Rio de Janeiro', 'Esp santo', 'Brassil', 'BRASIL', 'Brazil', 'brasil', 'Brasil'),
                     'Brunei' = c('bruneidarussalam', '^brunei'),
                     'Bulgaria' = c('(?i)bulgaria', 'Bulgaria', 'българия'),
                     'Cambodia' = c('(?i)Cambodia'),
                     'Cameroon' = c('camer', 'Cameroun'),
                     'Canada' = c('แคนาดา', 'canad', 'کانادا', '(?i)Windsor', '(?i)Québec', 'Ontario', 'Vancouver', 'vancouver', 'КАНАДА', 'CANADA', 'Canadá', '(?i)Канада',  'канада', 'Canada', '^ca$'),
                     'Chad' = c('(?i)Tchad'),
                     'Chile' = c('^chil.$', '^chile$', 'CHile',  'Chile', 'CHILE'),
                     'China' = c('中国china', '中国泰安', '中国山东', '中国全年在中国', '中国、中国', '中国，一年四季都在国内', '中国，一年都生活在国内。', '^hkg$', '^china$','Macao', 'kina',  'macau',  'hui', 'chine', '山东泰安东平县', '中华人民共和国', '^chin.$','(?i)китай','^中国$', '^中國$',   '^中華民國$', '^中国大陆$'),
                     'Colombia' = c('.olumbien', '.ombia', '.olombi..'),
                     'Costa Rica' = c('costa', 'Costa rica', 	'Costa Rica'), 
                     'Croatia' = c('croa', '(?i)Kroatien', '(?i)Horvátország',  '(?i)Croácia', '(?i)Croazia',  '(?i)HRVATSKA', 'Hrvatskoj', '(?i)Hrv', 	'(?i)Hrvarska', '(?i)Hrvatska', "hrvatsk", '(?i)Croatia'),
                     'Cyprus' = c('kktc', 'Κύπρος', 'Κυπρο', 'Κύπρο', 'Cypris', 'cyp', 'Κυπρο','Kuzey Kıbrıs Türk Cumhuriyeti',  'Kuzey Kıbrıs Türk Cumhuriyeti', 'Κυπρος', 'Κύπρος', 'Κύπρος', '^κ...ο.', '^κ...ο', 'Cyprus', 'k.br.s'), 
                     'Czech Republic' = c('kamenice', 'czech',  'tschechien', 'Τσεχία', '(?i)Republica Checa', 'češka republika', 'Tschechien',  'República Checa',  '(?i)Czechia',  '(?i)Czech Republic', 'checa', 'чех.я'),
                     'Denmark' = c('mark', 'danemarca'), 
                     'Dominican Republic' = c('domin', 'Dominican Republic', 'República Dominicana'),
                     'Ecuador' = c('(?i)ecuador', 'Quito', 'Ecuador'),
                     'Egypt' = c('ميتسراج', 'ميتابوشيخه', 'مليج', 'مثر', 'كفرهلال', 'كفرنفره', 'كفرمناوهله', 'كفرشبرا', 'كفردنشواي', 'كفرالسكرية', 'كفرالخضرة', 'كفرابومحمود', 'كفرأبومحمود', 'قريهميتالكرام', 'قريةطنبشا', 'قريةصراوه', 'قريةدنشواي', 'قريةبوهةشطانوف', '	طملاي', 'ساقيهابوشعره', 'سنتريس', 'شيبينالكوم', 'شنتناالحجر', 'سنجرج', 'ستتريس', '	سنتريس', 'زاويهبمم', 'زاويهبمم', 'دروه', 'حصةمليج', 'جزيرهالحجر', 'تتا', 'بنها', 'الواديالجديد', 'القليوبيه', 'القااهرة', 'البرانيةمركزأشمون', 'البحيرهكفرالدوار', 'الاسكندرية',   'الإسكندرية', 'اسوان', 'ابوكلس', 'أشمونشنشور', 'أبشيش', 'mesir', 'منشأةالسلام',  'كفرطنبدي', 'كفرشكر',  'طنطا', 'شبين', 'ساحلالجوابر', 'بركةالسبع',  'القاهرة', 'العاشرمنرمضان', 'شنوان', 'قويسنا', 'اشمون', 'بركهالسبع','البتانون', 'الباجور', 'الباجو'  ,'مصر', '(?i)Egypt', 'المنوفيه', 'شبينالكوم', 'المنوفية', 'القاهره', 'منوف', 'تلا'),
                     'El Salvador' = c('salvador', '(?i)elsalvado', 'El salvado', 'El Salvaodr', 'El Salvador'),
                     'Estonia' = c('(?i)estonia', '(?i)эстония', 'Estonia'),
                     'Ethiopia' = c('ethiopia'),
                     'Finland' = c('(?i)finland', '(?i)ф.нлянд.я', '(?i)Finnland', 'Finland'),
                     'France' = c('(?i)france', 'régionpaca', 'francr', 'francuska', 'frankreich', 'fransa', '(?i)франц.я', '(?i)franța', 'Vendée', 'Valmontone (RM)', 'vouziers','guyanefrançaise',  'Région PACA', '(?i)Paris', '(?i)París', 'Oise', 'nouvelle-calédonie', 'LYON', 'Guyane Française', '(?i)Guadeloupe', 'Grand est', 'Fransa', 'Frznce', 'Réunion', 'Franche', 'frankreich', '^fr$', 'La Réunion', 'laréunion', 'La réunion', 'fran.e', 'francia',  'frankrijk'),
                     'Gabon' = c('Gabon'),
                     'Georgia' = c('(?i)грузия', 'georgien'),
                     'Germany' = c('^德国$', 'niemcy', '`^de$', 'Duitsland', 'n\\w{0,2}ema.ka', 'brd', '(?i)Németország', 'ΓΕΡΜΑΝΙΑ', 'آلمان', '(?i)Германия', 'ألمانيا', 'Berlin', 'Γε..magne', 'Almanya', 'Alemania', '^.erman\\w{1,3}', '.eutschlan.', 'alemania',  'germania',  '^almanya$', 'berlin',  'duitsland', 'γ.ρ.....', 'gj.rman.', 'all.magn.'),
                     'Greece' = c('اليونان', 'ΕΛΛΑΣΑ', '(?i)athens', 'αθήνα', 'θεσσαλονίκη', 'ιωάννινα', '(?i)Ελλα', 'Ιωάννινα', 'greece', 'Θεσσαλονίκη', '(?i)Hellas', 'GRecia', 'Griechenland', 'Griekenland',  'Grèce',  '(?i)Ellada', '^希腊$', '(?i)GRECE', '(?i)ellada', 'Grecia', 'grèce', 'greee', 'griechenland', 'griekenland', '(?i)GREECE', 'Ελλα', 'ΕΛΛΑΣ', 'ΕΛΛΑΣΑ', 'ΕΛΛΆΔΑ', 'Greece', 'ΕΛΛΑΔΑ', 'grecia', '.λλ...', '.λλ..', 'ελλάδα', '200'),
                     'Guatemala' = c('guat', 'Guatemala'),
                     'Hong Kong S.A.R.' = c('Hongkong', '^hk$', 'hong.{0,}kong', '香港'),
                     'Hungary' = c('(?i)hungary', 'ungheria', 'magxarország', 'Ungheria', 'Ungarn', 'ma.arska', '(?i)Magyar', '(?i)Magyarorszag', 'magyarosz.g', '^magyarorsz', '(?i)magyar', 'ungarn'),
                     'Iceland' = c('iceland'),
                     'India' = c('i.dia', 'هند', 'Infis'),
                     'Indonesia' = c('^ina$', 'Jakarta', 'bali,indonesia', 'denpasar,bali,indonesia', 'Indomesia', 'indobesia', 'indonesia.', 'indonesia,bali', 'indonesia-kotatarakan', 'indonesià', 'Insonesia', 'republikindonesia', 'Indònesia', 'Yogyakarta, indonesia', 'Indonesià', 'Indonedia', 'Indomesia	', 'Indoesia', 'Indobesia', 'Imdonesia', 'Indoasia', '^indo$', 'i.d.nesi.', 'i.d.nesi..',  'kota, tarakan',  'jakarta', '205'),
                     'Iran' = c('لات', 'iran', 'abd',  'ایران', 'Iran'),
                     'Iraq' = c('عراق',  'iraq', 'الغراق', 'Iraq', 'العراق'),
                     'Ireland' = c('republicofireland', 'Ιρλανδία', 'irska', '^nireland$', 'Ιρλανδια', '^ireland$', 'Irska', 'Ireland', 'Irlanda', 'irlanda', 'ιρλ.....'),
                     'Israel' = c('جتالمثلث',  'كفرقاسم', '(?i)израиль',  'عارة',   'سخنين',   'رمله',   'القدس','Isreal', 'الطيبة',  'Израиль', 'الطيبه',  '(?i)Jerusalem', 'كفر قاسم',  'الطيرة', 'ISRAEL', 'جت المثلث', '^Israel$', 'جلجولية',  'israel', 'اللد',  'اسرائيل',  'كفر, قاسم',  'isreal',  'רמלה', 'الرمله'), 
                     'Italy' = c('ss.cosmaedamiano',  'tezzed.......tv.ita', 'santicosmaed..........', 'itali.', 'piemonte', 'marche', 'dro-tn', 'ital.', 'itali..', 'fiuggi', 'gragnanolucca',  'calagonone', 'ιταλία',  '(?i)Італ.я', 'Ιταλία', 'Terralba', 'Tezze di Piave (Tv) ITA', 'Vicenza', 'Vergato', 'Vittorio Veneto',  'SETTIMO VITTONE', 'SS. Cosma e Damiano', 'Segni (RM)', 'Scalea', 'Scauri', 'ROMA', 'Predazzo (Tn)', 'Pellizzano', 'Padova', 'Olevano romano',  'Orosei', 'Ital.a', 'Itaia', 'Imola', 'Ladispoli', 'Guidonia', 'Guidonia Montecelio', 'Ferentino', 'Fiano romano', 'Chieti', 'Cala gonone',  'Brescia',  'Bari',  'marenodipiave', '.тал..я','.тал.я', '.тали.а', 'Mareno di Piave', 'Frosinone', 'Milano', '.tal.a', 'Dorgali', 'olevanoromano', 'Castelforte', 'Bisceglie',  'Sardegna',  'salerno',  'sancosmaedamiano', 'santicosmaedamiano(lt)', '^ital', 'Roma', 'sardegna',  'bisceglie',  'ladispoli',  'castelforte',  'milano',  '^roma$', 'dorgali',  'bari',  'bologna',  'brescia', 'vicenza', 'cala, gonone', 'tezzedipiave(tv)ita', 'chieti',  'ferentino',  'frosinone',  'gragnano, lucca, ',  'guidonia',  'mareno, di, piave',  'modena',  'pellizzano',  'predazzo',  'refrontolo',  'cosma, e, damiano',  'scalea',  'scauri',  'segni',  'settimo, vittone',  'susegana',  'terralba',  'trento',  'treviso',  'tezze, di, piave',  'valmontone',  'vergato',  'veneto',  'gragnano, lucca', 'arezzo', 'soresina', 'avellino'),
                     'Jamaica' = c('jamaica', 'Jamaica'),
                     'Japan' = c('^日本　日本$', '^愛知$', '^日本　愛知$', '(?i)jap.n', 'japao', '(?i)Japão', '(?i)Japonya', 'にほん', 'Giappone',  'giappone', 'japonya', '^日本$', '^일본$', '^愛知県$'),
                     'Jordan' = c(  'الاردن', 'Jordanie',  'jordan', 'الأردن'),
                     'Kazakhstan' = c('(?i)каза..тан', 'рк', 'казах', 'кз',	'^kz$',  '(?i)алматы', 'a.akh..an', 'kа.aza.stan', 'захстан', 'қазақстан', '.aza.stan', 'kaza..stan'),
                     'Kenya' = c('kenia', 'Kenia'),
                     'Kosovo' = c('shtime', 'vshtrri', 'vraniq,suharekë', 's.harek.', 'sk.nderaj', 'sfe..l', 'prizren', 'rahovec',  'raçak-shtime', 'klin.', 'pej.', 'mitrovi..',  'kosov', 'gjilan', 'drenas', '^de.an$', 'vushtrri', 'obiliq', 'ferizaj', 'ske...raj', 'istog', '.osov.', 'pri..tin.','poduj.v.', 'mill.sh.v.', 'mil.sh.v.', 'l.pj.n', 'bellan.c.', 'gjak.v.'),
                     'Kuwait' = c('الكويت'),
                     'Kyrgyzstan' = c('(?i)кыргызстан'),
                     'Laos' = c('(?i)laos', 'Laos'),
                     'Latvia' = c('latvia', 'Latvia'),
                     'Lebanon' = c('(?i)lebanon', 'LIBAN', '(?i)Ливан',  'لبنان',  'Lebanon', '(?i)liban'),
                     'Libya' = c('ليبيا'),
                     'Lithuania' = c('litouwen', 'litvan.a', '(?i)lithuania', 'lituania'),
                     'Luxembourg' = c('lux','(?i)Luksemburg',  '(?i)Luxembourg', '(?i)Luxemburg'),
                     'Malaysia' = c('pulaupinang', 'm.laysia', 'mlalaysia', 'malaudia5', 'malaysi.....s', 'kajang.kualalumpur', '....aysia', 'malay', 'Malaysia', 'ماليزيا', '马来西亚', '馬來西亞'),
                     'Mali' = c('(?i)Mali'),
                     'Malta' = c('(?i)malta', 'Malta'),
                     'Mauritius' = c('maurice', 'İle Maurice', 'Mauritius'),
                     'Mexico' = c('mexico', 'Μεξικό', 'MÉXICO', 'México', 'méxico', 'Mexico'),
                     'Moldova' = c('re..........dova', 'модова', 'moldova', 'мо..ова'),
                     'Mongolia' = c('(?i)Mongolia'),
                     'Montenegro' = c('crnagora', '(?i)Montenegro', 'Crna Gora'),
                     'Morocco' = c('المغرب',  'rabat', 'النغرب',  'MAROC', 'مغرب', 'morocco', '(?i)Morocco', 'Morroco', 'Maroko', 'Marruecos', 'maroc', 'Maroc'),
                     'Myanmar' = c('(?i)myanmar'),
                     'Namibia' = c('(?i)Namibia'),
                     'Nepal' = c('nepal', 'Nepal'),
                     'Netherlands' = c('thenl', 'nederlans', '(?i)Netherland', 'holandija',  '..riledejos', '(?i)Nizozemska',  'Holandija',  'Gelderland', '荷兰', '(?i)Н..ерланд.',  '(?i)Голанд.я', '(?i)Paises Bajos', '(?i)Curacao',  '(?i)Hollanda', 'Países Bajos', 'Paesi Bassi', '(?i)netherland', 'Nl', 'Belanda', 'Olanda', 'New Zealand', 'NL', 'Niederlande', 'The Netherlands', 'Netherlands', 'Nederland',  'nederland',  'niederlande',  'belanda',  '^nl$',  'olanda',  'paesi, bassi',  'paesi bassi',  'bajos',  'gelderland',  '(?i)hollanda', 'paes.+bass.', 'nerderland', 'aruba', '204'),
                     'New Zealand' = c('นิวซีแลนด์', 'new.+zeal',  'newzealand',  'neuseeland'),
                     'Nigeria' = c('nigeria', 'Nigeria'),
                     'North Macedonia' = c('North Macedonia'),
                     'Norway' = c('noruega', 'norway', 'norv.ge', 'nor..g..'),
                     'Oman' = c('^oman$', 'Oman'),
                     'Pakistan' = c('^pakistan', 'pakistan$', 'پاکستان', 'Peshawar',  'Abbottabad', 'Pak',	'Pakistan',  'abbottabad',  'peshawar'),
                     'Palestine' = c('جنين-فلسطين', 'بيتلحم', 'نابلس',     'كفر عقب',     'فلسطين،غزة',     'فلسطين غزة',   'فلستين',    'غزة',  'رامالله',     '	جنين - فلسطين',   'بيت لحم', 'palestine', 'رام الله',  'Palestine', 'فلسطين, ',  '^فلسطين$',  'الرملة'),
                     'Panama' = c('panam'), 
                     'Peru' = c('PERÚ', '^(?i)Peru$', '(?i)Perú', '^(?i)peru$', '^perú$'),
                     'Philippines' = c('phillipines',  'pllipines', 'phillippines', 'hils.', '^thephilippines$', 'philippinea', 'phillippines', '^philippines$', 'republicofthephilippines', 'Ph', 'Pilipinas', '^Phil$', 'Philippinea', '^phil$',  'filipinas', 'Philippines', 'PHILIPPINES', '206'),
                     'Poland' = c('pols..',  'spore', 'polen', 'polaska', 'p.ock', 'pl', '(?i)poland', 'POLAND', 'POLONIA', 'Portogallo', 'Poland',  'polonia', '(?i)Польша', '(?i)поль.а', '(?i)polska'),
                     'Portugal' = c('(?i)portugal', '葡萄牙', 'portogallo', 'Portugal'), 
                     'Qatar' = c('qatar', 'Qatar', 'Katar'),
                     'Romania' = c('r.mania', 'r..mania', '^ro$', 'iasi', 'bucuresti', '(?i)rom.ni.', '^rom.n.$', 'suceava', 'romanis', 'piatrasoimuluijudețuneamț', '(?i)Roumanie', '208'),
                     'Russia' = c('.россии', 'россия', 'rusia', '(?i)Рос..я', '^рф$',  'Rusia', 'российскаяфедерация', '202'),
                     'Republic of Serbia' = c('zllatarë', '(?i)srbija', 'svrbija', 'srija', 'srbijs', 'sebia',  '(?i)Serbis', '(?i)Servië', 'SEBIA', 'У Србији',  'у Србији',  '(?i)Србијс', '(?i)усрбији', 'Szerbia', '(?i)Srpski', '(?i)Srbiji', 'SRBIJA',  'serbia',  'србија', 'srbiji', 'Србија', 's.erbia', 'Srbija', '201', 'pr.sh.v.'),
                     'Rwanda' = c('Rwanda'),
                     'Saudi Arabia' = c('المدينهالمنورة', 'اريا',  'المدينة المنورة', 'المملكة', 'جازان',  'جده',  '(?i)SaudiArabia', 'جدة',  'Riyadh',  'Sa',    'سعوديه',  '(?i)Riyad', '(?i)Saudi', 'K.S.A',  '(?i)Arabia Saudita', 'Arabie saoudite', 'Saidi Arabia', '	Saudi arbia', 'Ksa', 'Saudi Arabia', 'SA', 'riyad',  'saud.+arab',  'arabia, saudita',  '^saudi$', '209',  'kingdom, of, saudia, arabia', '(?i)arabiasaudita', '(?i)arabiesaoudite',   'ksa',  'k\\.s\\.a',  'arabie, saoudite',  'الرياض',  'السعودية',  'السعوديه', 'emira+arab', 'رياض',  'خرخير',  'حفر الباطن',  'القويعيه',   'المدينه المنورة', 'مدينة ضرماء',      'عنيزة',    'سكاكا الجوف',     'ضرماء',      '	سكاكا الجوف',  'سعودية',   'الدمام', 'الرباض', 'السعرديه', 'السعوديّه'),
                     'Singapore' = c('สิงคโปร์', '^sg$', '(?i)singapore', 'SINGAPORE', '(?i)Singapur', '(?i)Singapore', '新加坡'),
                     'Slovakia' = c('slova', '(?i)Словаччина',   '(?i)Szlovákia',  'Slovakia', 'Slovak Republic'),
                     'Slovenia' = c('(?i)Slovenia', 'Slovenija'),
                     'South Africa' = c('gauteng', 'جنوب أفريقيا', 'Garankuwa','Western Cape',  'Limpopo', 'Western cape', 'Eastern Cape', 'Cape Town', 'Gauteng', 'limpopo', 'SOUTH AFRICA', 'southafrica🇿���america', 'South aftica', 'South Afica', 'South AFRICA', 'South Africa', 'africa',  '^sa$',  'sudáfrica',  'western, cape', '(?i)westerncape', 'southaf...', 'afriquedusud'),
                     'South Korea' = c('한국', 'h.nqu.c', '韩国', 'korea', 'güneykore', '한국', '대한민국', 'Korea, Republic of', 'Güney Kore', 'Korea Selatan, Busan',  'South Korea', 'güney, kore', '203'),
                     'Spain' = c('galicia', '(?i).спан.я', 'saipan', 'espanya',  '^es$', 'euskadi', 'espagne', 'espsña', 'ισπανία', 'spania', 'Ισπανία', 'Pais vasco', 'Montalbán de Córdoba', 'Madrid', 'Málaga', '(?i)Fuerteventura', 'Espña', '^Es$', '(?i)Esapaña', 'El Rocío', 'Eapaña', 'Cataluña', 'Andalucia', 'Catalunya', 'Espsña', 'Euskal Herria', 'Іспанія', '(?i)Spanyolország', 'SPAIN', 'Estado Español', 'estadoespañol', 'españs', 'español', 'españ', 'espana', 'Españistan', 'ESpaña', 'Espanha', 'paisvasco', 'Espana', 'montalbándecórdoba', 'Spanje', '(?i)E.pañ.', 'Españ', 'Espagne', 'Spanien', 'Español', 'Espanya', 'spain', 'Spagna', '(?i)Spain', '^esp$', '(?i)ESPAÑA', 'España', 'spagna',  'spanien',  'catal',  'euskal, herria',  'basque',  'eapaña',  'esapaña',  'madrid',  'montalbán, de, córdoba',  'pais, vasco',  'spanje', 'euskalherria'),
                     'Sweden' = c('^σουηδια$', 's.edia', 'weden', 'Svezia', 'σουηδια', '(?i)SWEDEN',  'Isvec', '(?i)Suède'),
                     'Switzerland' = c('elvetia', '(?i)Ελβετία', '.svi.re',  'switzerl', 'Zwitserland', 'Switzerlannd', 'Svizzera','İsviçre',  'Швейцарія', 'Suiza', 'SUIZA', 'schweiz', 'Ελβετία', 'Schweiz', 'Switzerland', 'suiza', 'Suisse', 'svizzera',  'zwitserland',  'switzerland', 'suisse'),
                     'Taiwan' = c('taiwan', 'Tayvan', 'Taiwan', '.灣'),
                     'Thailand' = c('^thailand$', 'Thailand', 'ไทย', '^thai'),
                     "Trinidad and Tobago" = c("trinid", 'Trinidad', 'Trinidad and Tobago'),
                     'Tunisia' = c('Tunise', 'جندوبة',  'Tunis', 'Tunis', 'Tunisia', 'tuni', 'Tunisie', 'تونس'),
                     'Turkey' = c('eski.ehir,turqi',  'تركيا - اسطنبول', 'turkey', 'تركيا', 'Tür', 't.rk..', 'Турция',  'Türkıye', '^Tr$', '^TR$', 'tr$', 'turki', 'turquie', 'TÜRKİYE', 'tur.iye', 'Türkiye', 't.rk.y.', 't.rk.{0,2}', "t[a-z]rkiye", 'турция'), #"'\u00fcrkiye'"
                     'Ukraine' = c('(?i)ukraine', 'укр', 'кр.м', '(?i)Ураїна', 'укра.н.', 'Киев', '(?i)Укрїна', '(?i)Ук...на', 'Українська', 'Украине', 'УКРАЇНА', 'Украіна', 'у.ра..а', 'україна', '(?i)украина', 'Україна', 'Украина', '207'),
                     'United Arab Emirates' = c('^الإمارات$', '^u.a.e.$', 'khali.a', 'оаэ', 'u\\.a\\.\\.', 'emiradosarabesunidos', 'emiratelearabeunite', 'emiratosarabesunidos', 'emiratsarabesunis', '(?i)abudhabi',    'الإمارات العربية المتحدة' ,  'Uae' , 'Emirats Arabes Unis', 'Emiratos Arabes Unidos' ,'(?i)Abu Dhabi',  'Emiratele Arabe Unite' ,   'Dubai',   'Emirados Arabes Unidos', 'الخرج','United Arab emirates',  'United Arab Emirates',  'UAE', 'united, arab, emirates', 'الامارات', '^uae$', 'arabemirates', 'لعربيةالمتحدة'),
                     'United Kingdom' = c('isoleverginibritanniche', 'inglaterra', 'anh', '.νωμ....ασίλειο', '.γγλ..', 'regatulunit', '(?i)ecosse',  	'ηνωμένοβασίλειο', 'ηνωμένοβασίλειο(αγγλία)', 'theuk', '(?i)regnounito', 'mareabritanie',  '(?i)Αγγλια', 'اسكوتلندا', 'Egland', 'royaume uni', 'Liverpool', 'Grand Cayman, Cayman Islands', '^GB$', 'Anglia', 'Ecosse', '(?i)Angleterre', '(?i)Великобритания', 'Αγγλια', 'U.K','SCOTLAN', '(?i)Schotland',  'Scozia',  'Ηνωμένο Βασίλειο', '(?i)London', 'londra', 'Verenigd Koninkrijk', 'Marea Britanie', 'Britain', 'Isole Vergini Britanniche',  'Great Britain',  'United Kingdon', '^Northern ireland$', 'united Kingdom', 'Regno Unito', 'United kingdom', 'United Kingdon', 'uni.+kin', 'Wales','verenigdkoninkrijk', 'Reino Unido', '^Uk$', 'Scotland', 'United Kingdom', 'england', '^UK$',  '^uk$',  '(?i)reino, unido',  'britain',  'regno, unito',  'u\\.k\\.',  'بريطانيا',  'the, uk',  'u, k',  'verenigd, koninkrijk', 'scotland',  'england',  'wales',  'ingiltere',  'northern, ireland',  'egland',  '^gb$',  'n, ireland',  'schotland', 'grandcayman,caymanislands', 'scozia', '^英国$', 'ngiltere', 'n.{4,9}ireland', 'reinounido', 'anglia'),
                     'United Republic of Tanzania' = c('tanzania', 'Tanzania'),
                     'United States of America' = c('الولاياتالمتحدةالامريكية', '美國鳳凰城', '^アメリカ合衆国$', 'unitedstates....america', 'tempe', 'penssylvania', 'loseeuu', 'فلوريدا',     'الولايات المتحدة الامريكية'  , 'الولايات المتحدة الأمريكية',   '(?i)Αμερική' , 'ηπα',   '^U.s.a.$', 'Tempe' , 'alameda',  'TEXAS', 'untied states', 'TX', 'Untied STates', 'Wisconsin', 'virginia', 'illinois', 'Puerto Rico', 'puertorico', 'idaho', 'Penssylvania', 'NY', 'new yark', 'Munford', 'michigan', 'midwest', 'Minnesota', 'EEUU', 'IDAHO', 'georgia', 'concord,ca', 'Amerika Serikat', 'amerikaserikat', '^ameri.a', '(?i)ALASKA', '^us$', '미국', 'آمریکا', 'أمريكا', '(?i)Pennsylvania', '(?i)Pennssylvania', 'united sta.e.', 'Estados unidos', 'mcdonaughcounty', 'southcarolina', 'minnesota', 'NEWYORK', 'Arizona',  '(?i)New York', 'new york', 'New york', 'NEW YORK', 'США', 'newjersey', 'newyark', 'new jersey',  'Unites stats', 'California', 'Uniteed States', 'United. States', 'United Staes', 'Unite States', 'ESTADOS UNIDOS', 'Unites states', 'United Ststes', 'United statea', 'United Sates', 'Estados Unidos', 'northcarolina', '^ny$',  'Estados unidos', 'munford', 'wisconsin', 'ESTADOS UNIDOS', 'Untied statest', '\\d{0,30}merica$', 'United State', 'U.S', 'unite. sta//w{1,3}es', '^usa', 'u.s.a',  'uni.+stat', 'uni.+merica',  'unitedststes', 'unitedstaes', '^u.{0,1}s.{0,1}$',  'esta.+unid',  'colorado',  'xas',  'sates',  'amerika, serikat',  'california',  'corlifornia',  'états-unis', 'puerto, rico',  '^tx$',  '^tn$', '^美国$', 'new+york', 'сша', 'un..edsta.es', 'arizona', '^\\w{1,3}us$', 'merica$', 'theu.s.', 'امریکا', '^美國$'), 
                     'Uruguay' = c('urug', 'Uruguay'),
                     'Uzbekistan' = c('u.{2,5}istan', 'u\\w{2,6}istan', '(?i)Uzbekistan', '(?i)узбекистан'),
                     'Venezuela' = c('venezuela'),
                     'Vietnam' = c('vietna.', '^vn$', 'tâyninh', '...tnam', 'vi..̣tnam'),
                     'Yemen' = c('العند')
)


country_matches <- cat_words(dt5newVars$countryAdj, country_dict)
#country_matches$words[country_matches$words %in% names(country_matches$unmatched)] <- NA
# Check unmatched strings, fix common ones
  # View(country_matches$unmatched)
# Check duplicates, and which regular expressions triggered them
  # View(country_matches$dup)
dt5newVars$coded_country <- country_matches$words
dt5newVars$coded_country[dt5newVars$coded_country %in% names(country_matches$unmatched)] <- NA
# save for Solomiia
  nam# namCtrySol <- paste0("data/cleaned data/Solomiia Country List ", format(Sys.time(), format = "%F %H-%M %Z"),".csv")
  #ite.csv(namCtrySol, file = country_matches$unmatched)

# CODE FOR EXPORT:
country_entry <- dt5newVars$country
country_names <- rnaturalearth::ne_countries(scale = "medium")$admin
namTR <- paste0("data/cleaned data/countries ", format(Sys.time(), format = "%F %H-%M %Z"),".Rdata")
save(country_entry, country_names, file = namTR)

rm(country_dict, country_matches, country_names, country_entry)
```
<br>
<div class="alert alert-warning">
<strong><i class="fa fa-exclamation-triangle"></i> Action needed:</strong> <br> 
We currently have `r length(unique(dt5newVars$country))` different free text country responses. The most recent codes leaves `r sum(country_counts$n[country_counts$country != ""])` responses are still not consolidated.
</div>

### Religion
We currently have `r length(table(dt5newVars$whatRel))` different free text country responses. Here we aim to consolidate them into one variable.
```{r Country, echo=T, warning=F, message=F}
#saveRDS(dt5newVars$country, file = "country.RDS")
# clean the religion variable

# sourcing seems to destroy everything
rel_dict <- list('Hinduism' = c('^.indu'),
                 'Buddhism' = c('buddist', '佛', '^budi', 'buddism','^.uddh', '^.udh', '불교', '仏教', '佛教', 'буддизм', 'ph.tgi.o', 'พุทธ'),
                 'Islam' = c('^isla', '.sl.m', 'ইসলাম', 'musul', 'musl', 'ISLAM', 'ислам', '..sl.m', '^.slam', 'الاسلام', 'مسلم', 'الإسلام', 'اسلام', 'الأسلام'),
                 'Confucianism' = c('^.onfucia'),
                 'Christianity' = c('^sda','^pkn', 'episcopal','^cath', 'advent', 'hri..anin','c.nggi.o', '.hristian', '^cat', '^.attolic', '천주교', '.athol', '^.hristi', '^.hrist', '^.risti', '.lic.$', 'olic$', '^.rtodo', '^.rthodo', '.rtodo', '^.rotest', '^.ravoslav', '.ристианство', 'Ορθόδοξος', '기독교', '.uther', 'katoli', '.равославие', '.aptist', '^.evangel', 'Ορθόδοξος', 'χ............', 'χ.........', 'християнство', '.ervormd', '^.orm.n', '^.uter', 'jesus', 'evangel', 'kristen', 'crist.', 'правосла', 'methodist', 'anglican', 'христианин', '^rk$', 'chrze.cija.stwo', 'правос', 'orthodox', '^χ.', '개신교', 'presbyterian', '基督教', 'crestin', 'hriscanin', 'churchofengland', 'pentecostal', 'cofe', 'ο....ο..', 'reform', 'episcopalian', 'христианско.', 'lds', 'bornagain', 'chr.tie', '^kr.', 'kereszt', '^rc', 'cat.lic', 'християнин', 'христианка', '가톨릭', 'христьянство', 'adventist', '^unitarianism$', '^unitarian$', 'христи', 'キリスト教', '^ev$', 'christ', 'nondenominational', 'penticostal', '^cre.ti', '^ch', '^hri', '天主教', 'tant$', 'evang.l'),
                 'Taoism' = c('^.aois', '道教'),
                 'Judaism' = c('^.udais', '^jew', 'jew$', '^jud'),
                 'Missing Data' = c('-3'),
                 'Non-Religious' = c('agnostic', 'none', 'keine', 'nothing', '^no$'),
                 'Other' = c('^.agan', 'jehova', 'sikh', '^xo$', 'white', 'unitarianuniversal', '35', 'никакой', '神道', '^wic', '^sp.r.tual', '^.sp.r.t', '^god$', '^sp.r.tu..l', 'unite')

                 )


rel_matches <- cat_words(dt5newVars$relAdj, rel_dict)
#rel_matches$words[rel_matches$words %in% names(rel_matches$unmatched)] <- NA
# Check unmatched strings, fix common ones
  # View(rel_matches$unmatched)
  # View(rel_matches$dup)

  sum(rel_matches$unmatched)-rel_matches$unmatched[[1]]
# Check duplicates, and which regular expressions triggered them
dt5newVars$coded_rel <- rel_matches$words
dt5newVars$coded_rel[dt5newVars$coded_rel %in% names(rel_matches$unmatched)] <- NA
# save for Solomiia
  # namCtrySol <- paste0("data/cleaned data/Solomiia rel List ", format(Sys.time(), format = "%F %H-%M %Z"),".csv")
  #ite.csv(namCtrySol, file = r el_matches$unmatched)

# CODE FOR EXPORT:
rel_entry <- dt5newVars$rel
rel_names <- rnaturalearth::ne_countries(scale = "medium")$admin
namTR <- paste0("data/cleaned data/countries ", format(Sys.time(), format = "%F %H-%M %Z"),".Rdata")
save(rel_entry, rel_names, file = namTR)

rm(rel_dict, rel_matches, rel_names, rel_entry)
```

### Filter Continued
#### Chinese trust responses made NA
```{r filtCh , echo=T, warning=F, message=F}
dt5newVars <- dt5newVars %>%
  dplyr::mutate_at(.vars = vars(ends_with("GovCtry"), ends_with("GovState"), ends_with("Business")),
   #which variables mutates
                   .funs = list(~ ifelse(coded_country == "China", NA, .)))  # function to mutate`

```

### Visualize Response Rates
#### For the baseline
We visualize how many responses we have per day.
```{r RespVis, echo=T, warning=F, message=F}
# All data
tmp <- setNames(data.frame(table(lubridate::round_date(dt5newVars$EndDate, unit = "12 hours"))),c("Date","Count"))
ggplot(tmp, aes(x=Date, y = Count))+
   geom_line(group = 1)+
   theme_bw()+
   # scale_y_continuous(limits = c(0, 158),
   #                    breaks = seq(0,160,20))+
   theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 8)); rm (tmp)
# only snowball
  tmpdt <- dt5newVars[!grepl("Rep", dt5newVars$source),]
tmp <- setNames(data.frame(table(lubridate::round_date(tmpdt$EndDate, unit = "12 hours"))),c("Date","Count"))

ggplot(tmp, aes(x=Date, y = Count))+
   geom_line(group = 1)+
   theme_bw()+
   # scale_y_continuous(limits = c(0, 158),
   #                    breaks = seq(0,160,20))+
   theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 8)); rm (tmp, tmpdt)

tmp# Check Chinese data
  tmp <- dt5newVars %>%
    filter(source != "Rep China",
           coded_country == "China" | coded_country == "Hong Kong S.A.R.")
  tmp2 <- setNames(data.frame(table(lubridate::round_date(tmp$StartDate, unit = "12 hours"))),c("Date","Count"))
  
  ggplot(tmp2, aes(x=Date, y = Count))+
     geom_line(group = 1)+
     theme_bw()+
     # scale_y_continuous(limits = c(0, 158),
     #                    breaks = seq(0,160,20))+
     theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 8)); rm (tmp, tmp2)
```

#### For the recontact
```{r RespVisWeekly, echo=T, warning=F, message=F}

tmp <- dt5newVars %>%
  dplyr::select(ends_with("EndDate")) %>%
   dplyr::mutate_all(~ifelse(is.na(.), 0, 1)) 

tmp$numRec <- rowSums(tmp)-1
tmp$coded_country <- dt5newVars$coded_country
tmp$source <- dt5newVars$source
table(tmp$numRec) # -1 are the Chinese who have no endDate; 0 means no recontacts
tmp$lang <- dt5newVars$language
table(tmp$numRec, tmp$lang) # recontact per language
rm(tmp)

```


### Political Orientation   
Political orientation was measured per language. We merge these variables here.

#### Update the x and y scales to be correct
As the picture sizes were different we update the x and y-axis here
```{r PolOrUpd, echo=T, warning=F, message=F}
dt5newVars <- dt5newVars %>%
  dplyr::mutate(PolEN_1_x = PolEN_1_x - 200,
                PolEN_1_y = PolEN_1_y - 213,
                PolDE_1_x = PolDE_1_x - 210,
                PolDE_1_y = PolDE_1_y - 177,
                PolID_1_x = PolID_1_x - 210,
                PolID_1_y = PolID_1_y - 177,
                PolMS_1_x = PolMS_1_x - 210,
                PolMS_1_y = PolMS_1_y - 181,
                PolES_ES_1_x = PolES_ES_1_x - 239,
                PolES_ES_1_y = PolES_ES_1_y - 181,
                PolFR_1_x = PolFR_1_x - 236,
                PolFR_1_y = PolFR_1_y - 178,
                PolHR_1_x = PolHR_1_x - 215,
                PolIT_1_x = PolIT_1_x - 227,
                PolIT_1_y = PolIT_1_y - 173,
                PolHU_1_x = PolHU_1_x - 286,
                PolHU_1_y = PolHU_1_y - 175,
                polNL_1_x = polNL_1_x - 196,
                polNL_1_y = polNL_1_y - 176,
                PolPL_1_x = PolPL_1_x - 245,
                PolPL_1_y = PolPL_1_y - 179,
                PolPT_1_x = PolPT_1_x - 256,
                PolPT_1_y = PolPT_1_y - 177,
                PolRO_1_x = PolRO_1_x - 224,
                PolRO_1_y = PolRO_1_y - 171,
                PolSQI_1_x = PolSQI_1_x - 211,
                PolSQI_1_y = PolSQI_1_y - 177,
                PolSR_1_x = PolSR_1_x - 209,
                PolSR_1_y = PolSR_1_y - 184,
                PolVI_1_x = PolVI_1_x - 233,
                PolVI_1_y = PolVI_1_y - 178,
                PolTR_1_x = PolTR_1_x - 210,
                PolTR_1_y = PolTR_1_y - 177,
                PolEL_1_x = PolEL_1_x - 288,
                PolEL_1_y = PolEL_1_y - 181,
                PolRU_1_x = PolRU_1_x - 239,
                PolRU_1_y = PolRU_1_y - 173,
                PolUK_1_x = PolUK_1_x - 195,
                PolUK_1_y = PolUK_1_y - 176,
                PolUR_1_x = PolUR_1_x - 210,
                PolUR_1_y = PolUR_1_y - 177,
                PolAR_1_x = PolAR_1_x - 200,
                PolAR_1_y = PolAR_1_y - 177,
                PolFA_1_x = PolFA_1_x - 186,
                PolFA_1_y = PolFA_1_y - 179,
                PolHI_1_x = PolHI_1_x - 255,
                PolHI_1_y = PolHI_1_y - 182,
                PolBN_1_x = PolBN_1_x - 210,
                PolBN_1_y = PolBN_1_y - 184,
                PolTH_1_x = PolTH_1_x - 210,
                PolTH_1_y = PolTH_1_y - 185,
                PolKO_1_x = PolKO_1_x - 279,
                PolKO_1_y = PolKO_1_y - 180,
                PolJA_1_x = PolJA_1_x - 210,
                PolJA_1_y = PolJA_1_y - 185
                )

```


#### Merge the Languages
```{r PolOr, echo=T, warning=F, message=F}
# cleitical orientation
dt5newVars <- dt5newVars %>%
  mutate(PolOrX = labelled(rowSums(dplyr::select(., ends_with("_x")), na.rm = T), 
                           labels = NULL, label="Political Compass X-Coordinate"),
         PolOrY = labelled(rowSums(dplyr::select(., ends_with("_y")), na.rm = T), 
                           labels = NULL, label="Political Compass Y-Coordinate"),
         PolOrAuthoritarianLeft = rowSums(dplyr::select(., ends_with("_Authoritarian_Left")), na.rm = T),
         PolOrAuthoritarianLeftLab = dplyr::recode(PolOrAuthoritarianLeft, `1` =  "Authoritarian Left", `0` = ""),
         PolOrAuthoritarianRight = rowSums(dplyr::select(., ends_with("_Authoritarian_right")), na.rm = T),
         PolOrAuthoritarianRightLab = dplyr::recode(PolOrAuthoritarianRight, `1` =  "Authoritarian Right", `0` = ""),
         PolOrLibertarianLeft = rowSums(dplyr::select(., ends_with("_Libertarian_Left")), na.rm = T),
         PolOrLibertarianLeftLab = dplyr::recode(PolOrLibertarianLeft, `1` =  "Libertarian Left", `0` = ""),
         PolOrLibertarianRight = rowSums(dplyr::select(., ends_with("_Libertarian_Right")), na.rm = T),
         PolOrLibertarianRightLab = dplyr::recode(PolOrLibertarianRight, `1` =  "Libertarian Right", `0` = ""),
         PolOrOther = rowSums(dplyr::select(., ends_with("_Other")), na.rm = T),
         PolOrOtherLab = dplyr::recode(PolOrOther, `1` =  "Other", `0` = ""),
         PolOrCat = paste0(PolOrAuthoritarianLeftLab, 
                           PolOrAuthoritarianRightLab,
                           PolOrLibertarianLeftLab,
                           PolOrLibertarianRightLab,
                           PolOrOtherLab), 
         PolOrCat = as.factor(na_if(PolOrCat, ""))) %>%
  dplyr::select(-starts_with("Pol"),
         PolOrX,
         PolOrY,
         PolOrCat)
attr(dt5newVars$PolOrCat,'label') <- 'Political Orientation Quadrant'
```

### Affect    
<!-- TO DO: Write function for item analysis and scale construction -->


#### High Arousal Negative   
```{r affHighNeg, echo=F, results='asis', warning=F, message=F}
# High Arousal Negative
## Anger not measured in wave 1
#pairs.panels.new(dt5newVars %>% dplyr::select(affAnx, affNerv))

cat("<br>")

dt5newVars$affHighNeg.m <- scoreItems(keys=c(1,1), items = dt5newVars %>% dplyr::select(affAnx, affNerv), min = 1, max = 5)$scores

# as.data.frame(psych::describe(dt5newVars$affHighNeg.m, skew=F)) %>%
#   mutate(vars = "High Arousal Negative Affect") %>%
#   kable(., caption = "High Arousal Negative Affect: Scale Descriptives", row.names = FALSE) %>% 
# able_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$affHighNeg.c <- scale(dt5newVars$affHighNeg.m, scale = F, center = T)
dt5newVars$affHighNeg.z <- scale(dt5newVars$affHighNeg.m, scale = T)
dt5newVars$affHighNeg.fa <- fa(dt5newVars %>% dplyr::select(affAnx, affNerv))$scores
```

#### Low Arousal Negative Affect  
```{r affLowNeg, echo=F, results='asis', warning=F, message=F}
# Low Arousal Negative Affect
ia.affLowNeg <- dt5newVars %>%
    dplyr::select(affBor, affExh, affDepr) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.affLowNeg$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.affLowNeg)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(affBor, affExh, affDepr))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Low Arousal Negative Affect: Item Descriptives", row.names = FALSE) %>% 
# able_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
#pairs.panels.new(dt5newVars %>% dplyr::select(affBor, affExh, affDepr))
# 
# cat("<br>")
# 
dt5newVars$affLowNeg.m <- scoreItems(keys=c(1,1,1), items = dt5newVars %>% dplyr::select(affBor, affExh, affDepr), min = 1, max = 5)$scores

# as.data.frame(psych::describe(dt5newVars$affLowNeg.m, skew=F)) %>%
#   mutate(vars = "Low Arousal Negative Affect") %>%
#   kable(., caption = "Low Arousal Negative Affect: Scale Descriptives", row.names = FALSE) %>% 
# able_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$affLowNeg.c <- scale(dt5newVars$affLowNeg.m, scale = F, center = T)
dt5newVars$affLowNeg.z <- scale(dt5newVars$affLowNeg.m, scale = T)
dt5newVars$affLowNeg.fa <- fa(dt5newVars %>% dplyr::select(affBor, affExh, affDepr))$scores
```

#### Low Arousal Positive Affect  
```{r affLowPos, echo=F, results='asis', warning=F, message=F}
# Low Arousal Positive Affect
ia.affLowPos <- dt5newVars %>%
    dplyr::select(affCalm, affContent, affRel) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.affLowPos$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.affLowPos)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(affCalm, affContent, affRel))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Low Arousal Positive Affect: Item Descriptives", row.names = FALSE) %>% 
# able_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# pairs.panels.new(dt5newVars %>% dplyr::select(affCalm, affContent, affRel))
# 
# cat("<br>")
# 
dt5newVars$affLowPos.m <- scoreItems(keys=c(1,1,1), items = dt5newVars %>% dplyr::select(affCalm, affContent, affRel), min = 1, max = 5)$scores

# as.data.frame(psych::describe(dt5newVars$affLowPos.m, skew=F)) %>%
#   mutate(vars = "Low Arousal Positive Affect") %>%
#   kable(., caption = "Low Arousal Positive Affect: Scale Descriptives", row.names = FALSE) %>% 
# able_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$affLowPos.c <- scale(dt5newVars$affLowPos.m, scale = F, center = T)
dt5newVars$affLowPos.z <- scale(dt5newVars$affLowPos.m, scale = T)
dt5newVars$affLowPos.fa <- fa(dt5newVars %>% dplyr::select(affCalm, affContent, affRel))$scores
```

#### High Arousal Positive Affect  
```{r affHighPos, echo=F, results='asis', warning=F, message=F}
# High Arousal Positive Affect
ia.affHighPos <- dt5newVars %>%
    dplyr::select(affEnerg, affExc, affInsp) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.affHighPos$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.affHighPos)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(affEnerg, affExc, affInsp))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "High Arousal Positive Affect: Item Descriptives", row.names = FALSE) %>% 
# able_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
#pairs.panels.new(dt5newVars %>% dplyr::select(affEnerg, affExc, affInsp))
# 
# cat("<br>")
# 
dt5newVars$affHighPos.m <- scoreItems(keys=c(1,1,1), items = dt5newVars %>% dplyr::select(affEnerg, affExc, affInsp), min = 1, max = 5)$scores

# as.data.frame(psych::describe(dt5newVars$affHighPos.m, skew=F)) %>%
#   mutate(vars = "High Arousal Positive Affect") %>%
#   kable(., caption = "High Arousal Positive Affect: Scale Descriptives", row.names = FALSE) %>% 
# able_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$affHighPos.c <- scale(dt5newVars$affHighPos.m, scale = F, center = T)
dt5newVars$affHighPos.z <- scale(dt5newVars$affHighPos.m, scale = T)
dt5newVars$affHighPos.fa <- fa(dt5newVars %>% dplyr::select(affEnerg, affExc, affInsp))$scores
```

### Loneliness
```{r lone, echo=F, results='asis', warning=F, message=F}
ia.lone<- dt5newVars %>%
    dplyr::select(starts_with("lone")) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.lone$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.lone)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(starts_with("lone")))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Loneliness: Item Descriptives", row.names = FALSE) %>% 
# able_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
#pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("lone")))
# 
# cat("<br>")
# 
dt5newVars$lone.m <- scoreItems(keys=c(1,1,1), items = dt5newVars %>% dplyr::select(starts_with("lone")), min = 1, max = 5)$scores

# as.data.frame(psych::describe(dt5newVars$lone.m, skew=F)) %>%
#   mutate(vars = "Loneliness") %>%
#   kable(., caption = "Loneliness: Scale Descriptives", row.names = FALSE) %>% 
# able_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$lone.c <- scale(dt5newVars$lone.m, scale = F, center = T)
dt5newVars$lone.z <- scale(dt5newVars$lone.m, scale = T)
dt5newVars$lone.fa <- fa(dt5newVars %>% dplyr::select(starts_with("lone")))$scores
```  

### Boredom
```{r bore, echo=F, results='asis', warning=F, message=F}
ia.bor<- dt5newVars %>%
    dplyr::select(starts_with("bor0"), -bor03) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.bor$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.bor)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
#pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("bor0"), -bor03))
```
<div class="alert alert-info">
<strong><i class="fa fa-exclamation-triangle"></i> Item dropped:</strong> <br> 
Item three was not well behaved. It seems to measure something else. We dropped it for now.
</div>

```{r bore.red, echo=F, results='asis', warning=F, message=F}

#pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("bor0"), -bor03, -bor03_R))
# 
# cat("<br>")
# 
dt5newVars$bor.m <- scoreItems(keys=c(1,1), items = dt5newVars %>% dplyr::select(starts_with("bor0"), -bor03, -bor03_R), min = -3, max = 3)$scores
# 
# as.data.frame(psych::describe(dt5newVars$bor.m, skew=F)) %>%
#   mutate(vars = "Boredom") %>%
#   kable(., caption = "Boredom: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
dt5newVars$bor.c <- scale(dt5newVars$bor.m, scale = F, center = T)
dt5newVars$bor.z <- scale(dt5newVars$bor.m, scale = T)
dt5newVars$bor.fa <- fa(dt5newVars %>% dplyr::select(starts_with("bor0"), -bor03, -bor03_R))$scores
```
  
### Isolation
```{r iso, echo=F, results='asis', warning=F, message=F}
cat(crayon::bold("Offline Isolation"))
ia.isoPers <- dt5newVars %>%
    dplyr::select(ends_with("inPerson"), -starts_with("w")) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.isoPers$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.isoPers)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(ends_with("inPerson"), -starts_with("w")))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Isolation offline: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
#pairs.panels.new(dt5newVars %>% dplyr::select(ends_with("inPerson"), -starts_with("w")))
# 
# cat("<br>")
# 
dt5newVars$isoPers.m <- scoreItems(keys=c(1,1,1), items = dt5newVars %>% dplyr::select(ends_with("inPerson"), -starts_with("w")), min = 0, max = 7)$scores
# 
# as.data.frame(psych::describe(dt5newVars$isoPers.m, skew=F)) %>%
#   mutate(vars = "Isolation offline") %>%
#   kable(., caption = "Isolation offline: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
dt5newVars$isoPers.c <- scale(dt5newVars$isoPers.m, scale = F, center = T)
dt5newVars$isoPers.z <- scale(dt5newVars$isoPers.m, scale = T)
dt5newVars$isoPers.fa <- fa(dt5newVars %>% dplyr::select(ends_with("inPerson"), -starts_with("w")))$scores

cat(crayon::bold("Online Isolation"))
ia.isoOnl <- dt5newVars %>%
    dplyr::select(ends_with("online"), -starts_with("w")) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.isoOnl$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.isoOnl)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(ends_with("inPerson"), -starts_with("w")))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Isolation online: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# #pairs.panels.new(dt5newVars %>% dplyr::select(ends_with("online"), -starts_with("w")))
# 
# cat("<br>")
# 
dt5newVars$isoOnl.m <- scoreItems(keys=c(1,1,1), items = dt5newVars %>% dplyr::select(ends_with("online"), -starts_with("w")), min = 0, max = 7)$scores
# 
# as.data.frame(psych::describe(dt5newVars$isoPers.m, skew=F)) %>%
#   mutate(vars = "Isolation online") %>%
#   kable(., caption = "Isolation online: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
dt5newVars$isoOnl.c <- scale(dt5newVars$isoOnl.m, scale = F, center = T)
dt5newVars$isoOnl.z <- scale(dt5newVars$isoOnl.m, scale = T)
dt5newVars$isoOnl.fa <- fa(dt5newVars %>% dplyr::select(ends_with("online"), -starts_with("w")))$scores

# Leave House
# as.data.frame(psych::describe(dt5newVars$houseLeave, skew=F)) %>%
#   mutate(vars = "Leaving House") %>%
#   kable(., caption = "Leaving House: Item Descriptive", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
```

### Government Clarity
```{r gov, echo=F, results='asis', warning=F, message=F}
# as.data.frame(psych::describe(dt5newVars$extC19Msg)) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Government Response: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# ggplot(dt5newVars, aes(x = extC19Msg)) +
#   geom_histogram(binwidth=1, alpha=0.5) +
#   #geom_density(alpha=0.6)+
#   labs(title="Government Response distribution",x="Government Response", y = "Frequency") +
#   theme_Publication()
```

### Coronavirus Responses
#### Community Response
```{r community, echo=F, results='asis', warning=F, message=F}
ia.ext <- dt5newVars %>%
  dplyr::select(starts_with("c19Is")) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.ext$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.ext)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(starts_with("c19Is")))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Community response: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
#pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("c19Is")))

cat("<br>")

dt5newVars$ext.m <- scoreItems(keys=c(1,1,1), items = dt5newVars %>% dplyr::select(starts_with("c19Is")), 
                             min = 1, max = 6)$scores
# 
# as.data.frame(psych::describe(dt5newVars$ext.m, skew=F)) %>%
#   mutate(vars = "Community response") %>%
#   kable(., caption = "Community response: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
dt5newVars$ext.c <- scale(dt5newVars$ext.m, scale = F, center = T)
dt5newVars$ext.z <- scale(dt5newVars$ext.m, scale = T)
dt5newVars$ext.fa <- fa(dt5newVars %>% dplyr::select(starts_with("c19Is")))$scores
```

#### Behavioral Response
```{r beh, echo=F, results='asis', warning=F, message=F}
ia.beh <- dt5newVars %>%
    dplyr::select(starts_with("c19per")) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.beh$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.beh)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(starts_with("c19per")))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Behavioral response: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
#pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("c19per")))

cat("<br>")

dt5newVars$beh.m <- scoreItems(keys=c(1,1,1), items = dt5newVars %>% dplyr::select(starts_with("c19per")), 
                             min = -3, max = 3)$scores

# as.data.frame(psych::describe(dt5newVars$beh.m, skew=F)) %>%
#   mutate(vars = "Behavioral response") %>%
#   kable(., caption = "Behavioral response: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$beh.c <- scale(dt5newVars$beh.m, scale = F, center = T)
dt5newVars$beh.z <- scale(dt5newVars$beh.m, scale = T)
dt5newVars$beh.fa <- fa(dt5newVars %>% dplyr::select(starts_with("c19per")))$scores
```

#### Pro-Social Response
```{r beh, echo=F, results='asis', warning=F, message=F}
ia.proSo <- dt5newVars %>%
    dplyr::select(starts_with("c19ProSo")) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.proSo$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.proSo)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(starts_with("c19ProSo")))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Pro-Social response: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# #pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("c19ProSo")))
# 
# cat("<br>")
# 
dt5newVars$proSo.m <- scoreItems(keys=c(1,1,1,1), items = dt5newVars %>% dplyr::select(starts_with("c19ProSo")), 
                             min = -3, max = 3)$scores
# 
# as.data.frame(psych::describe(dt5newVars$proSo.m, skew=F)) %>%
#   mutate(vars = "Pro-Social response") %>%
#   kable(., caption = "Pro-Social response: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$proSo.c <- scale(dt5newVars$proSo.m, scale = F, center = T)
dt5newVars$proSo.z <- scale(dt5newVars$proSo.m, scale = T)
dt5newVars$proSo.fa <- fa(dt5newVars %>% dplyr::select(starts_with("c19ProSo")))$scores
```

### Likelihood Threat
```{r likelihood, echo=F, results='asis', warning=F, message=F}
# as.data.frame(psych::describe(dt5newVars$PLRAC19)) %>%
#   mutate(vars = "Likelihood c19") %>%
#   kable(., caption = "Likelihood c19: Item Descriptive", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# ggplot(dt5newVars, aes(x = PLRAC19)) +
#   geom_histogram(binwidth=1, alpha=0.5) +
#   #geom_density(alpha=0.6)+
#   labs(title="Likelihood c19 distribution",x="Likelihood c19", y = "Frequency") +
#   theme_Publication()
#   
# as.data.frame(psych::describe(dt5newVars$PLRAEco)) %>%
#   mutate(vars = "Likelihood Eco") %>%
#   kable(., caption = "Likelihood Eco: Item Descriptive", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# ggplot(dt5newVars, aes(x = PLRAEco)) +
#   geom_histogram(binwidth=1, alpha=0.5) +
#   #geom_density(alpha=0.6)+
#   labs(title="Likelihood Eco distribution",x="Likelihood Eco", y = "Frequency") +
#   theme_Publication()
```

### Hope and Efficacy
```{r hopeEff, echo=F, results='asis', warning=F, message=F}
# as.data.frame(psych::describe(dt5newVars$c19Hope)) %>%
#   mutate(vars = "Hope") %>%
#   kable(., caption = "Hope: Item Descriptive", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# ggplot(dt5newVars, aes(x = c19Hope)) +
#   geom_histogram(binwidth=1, alpha=0.5) +
#   #geom_density(alpha=0.6)+
#   labs(title="Hope distribution",x="Corona Virus Hope", y = "Frequency") +
#   theme_Publication()
#   
# as.data.frame(psych::describe(dt5newVars$c19Eff)) %>%
#   mutate(vars = "Efficacy") %>%
#   kable(., caption = "Efficacy: Item Descriptive", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# ggplot(dt5newVars, aes(x = c19Eff)) +
#   geom_histogram(binwidth=1, alpha=0.5) +
#   #geom_density(alpha=0.6)+
#   labs(title="Efficacy distribution",x="Corona Virus Efficacy", y = "Frequency") +
#   theme_Publication()
```

### State Paranoia
```{r para, echo=F, results='asis', warning=F, message=F}
ia.para <- dt5newVars %>%
    dplyr::select(starts_with("para")) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.para$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.para)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(starts_with("para")))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "State Paranoia: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
#pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("para")))

cat("<br>")

dt5newVars$para.m <- scoreItems(keys=c(1,1,1), items = dt5newVars %>% dplyr::select(starts_with("para")), 
                             min = 0, max = 10)$scores

# as.data.frame(psych::describe(dt5newVars$para.m, skew=F)) %>%
#   mutate(vars = "State Paranoia") %>%
#   kable(., caption = "State Paranoia: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$para.c <- scale(dt5newVars$para.m, scale = F, center = T)
dt5newVars$para.z <- scale(dt5newVars$para.m, scale = T)
dt5newVars$para.fa <- fa(dt5newVars %>% dplyr::select(starts_with("para")))$scores
```

### Conspiracy Theory
```{r consp, echo=F, results='asis', warning=F, message=F}
ia.consp <- dt5newVars %>%
    dplyr::select(starts_with("consp")) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.consp$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.consp)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(starts_with("consp")))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Conspiracy Theory: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
#pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("consp")))

cat("<br>")

dt5newVars$consp.m <- scoreItems(keys=c(1,1,1), items = dt5newVars %>% dplyr::select(starts_with("consp")), 
                             min = 0, max = 10)$scores

# as.data.frame(psych::describe(dt5newVars$consp.m, skew=F)) %>%
#   mutate(vars = "Conspiracy Theory") %>%
#   kable(., caption = "Conspiracy Theory: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$para.c <- scale(dt5newVars$consp.m, scale = F, center = T)
dt5newVars$para.z <- scale(dt5newVars$consp.m, scale = T)
dt5newVars$para.fa <- fa(dt5newVars %>% dplyr::select(starts_with("consp")))$scores
```

### Disempowerment
```{r disemp, echo=F, results='asis', warning=F, message=F}
ia.disemp<- dt5newVars %>%
    dplyr::select(starts_with("fail"), -contains("DO")) %>%
    na_if(., -99) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.disemp$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.disemp)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(starts_with("fail"), -contains("DO")) %>% na_if(., -99))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Disempowerment: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")

#pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("fail"), -contains("DO")) %>% na_if(., -99))

cat("<br>")

dt5newVars$disemp.m <- scoreItems(keys=c(1,1,1), 
                                  items = dt5newVars %>% dplyr::select(starts_with("fail"), -contains("DO")) %>% na_if(., -99),
                                  min = -2, max = 2)$scores

# as.data.frame(psych::describe(dt5newVars$disemp.m, skew=F)) %>%
#   mutate(vars = "Disempowerment") %>%
#   kable(., caption = "Disempowerment: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$disemp.c <- scale(dt5newVars$disemp.m, scale = F, center = T)
dt5newVars$disemp.z <- scale(dt5newVars$disemp.m, scale = T)
dt5newVars$disemp.fa <- fa(dt5newVars %>% dplyr::select(starts_with("fail"), -contains("DO")))$scores
```

### Societal Discontent
```{r socDis, echo=F, results='asis', warning=F, message=F}
ia.socdisc<- dt5newVars %>%
    dplyr::select(starts_with("disc"), -discPers , -disc03, -contains("DO")) %>%
    na_if(., -99) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.socdisc$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.socdisc)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(starts_with("disc"), -discPers , -disc03, -contains("DO")) %>% na_if(., -99))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Societal Discontent: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
#pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("disc"), -discPers , -disc03, -contains("DO")) %>% na_if(., -99))

cat("<br>")

dt5newVars$socdisc.m <- scoreItems(keys=c(1,1,1), 
                                  items = dt5newVars %>% dplyr::select(starts_with("disc"), -discPers , -disc03, -contains("DO")) %>% na_if(., -99),
                                  min = -2, max = 2)$scores

# as.data.frame(psych::describe(dt5newVars$socdisc.m, skew=F)) %>%
#   mutate(vars = "Societal Discontent") %>%
#   kable(., caption = "Societal Discontent: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
dt5newVars$socdisc.c <- scale(dt5newVars$socdisc.m, scale = F, center = T)
dt5newVars$socdisc.z <- scale(dt5newVars$socdisc.m, scale = T)
dt5newVars$socdisc.fa <- fa(dt5newVars %>% dplyr::select(starts_with("disc"), -discPers , -disc03, -contains("DO")))$scores
```

### Job Insecurity
```{r jbinsc, echo=F, results='asis', warning=F, message=F}
ia.jobinsec<- dt5newVars %>%
    dplyr::select(starts_with("jbInsec"), -jbInsec02, -jbInsec04) %>%
    na_if(., -99) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.jobinsec$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.jobinsec)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(starts_with("jbInsec"), -jbInsec02, -jbInsec04) %>% na_if(., -99))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Job insecurity: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
#pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("jbInsec"), -jbInsec02, -jbInsec04) %>% na_if(., -99))

cat("<br>")

dt5newVars$jobinsec.m <- scoreItems(keys=c(1,1,1), 
                                  items = dt5newVars %>% dplyr::select(starts_with("jbInsec"), -jbInsec02, -jbInsec04) %>% na_if(., -99),
                                  min = -2, max = 2)$scores

# as.data.frame(psych::describe(dt5newVars$jobinsec.m, skew=F)) %>%
#   mutate(vars = "Job insecurity") %>%
#   kable(., caption = "Job insecurity: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$jobinsec.c <- scale(dt5newVars$jobinsec.m, scale = F, center = T)
dt5newVars$jobinsec.z <- scale(dt5newVars$jobinsec.m, scale = T)
dt5newVars$jobinsec.fa <- fa(dt5newVars %>% dplyr::select(starts_with("jbInsec"), -jbInsec02, -jbInsec04))$scores
```

### Financial Strain
```{r finance, echo=F, results='asis', warning=F, message=F}
ia.pfs<- dt5newVars %>%
    dplyr::select(starts_with("PFS0")) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.pfs$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.pfs)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(starts_with("PFS0")))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Financial Strain: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")

#pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("PFS0")))

cat("<br>")

dt5newVars$pfs.m <- scoreItems(keys=c(1,1,1), 
                                  items = dt5newVars %>% dplyr::select(starts_with("PFS0")),
                                  min = -2, max = 2)$scores

# as.data.frame(psych::describe(dt5newVars$pfs.m, skew=F)) %>%
#   mutate(vars = "Financial Strain") %>%
#   kable(., caption = "Financial Strain: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$pfs.c <- scale(dt5newVars$pfs.m, scale = F, center = T)
dt5newVars$pfs.z <- scale(dt5newVars$pfs.m, scale = T)
dt5newVars$pfs.fa <- fa(dt5newVars %>% dplyr::select(starts_with("PFS0")))$scores
```

### Coping
#### Problem Solving
```{r probSolv, echo=F, results='asis', warning=F, message=F}
ia.probSolv<- dt5newVars %>%
    dplyr::select(starts_with("probSolving"), -contains("DO")) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.probSolv$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.probSolv)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(starts_with("probSolving"), -contains("DO")))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Problem Solving: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")

#pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("probSolving"), -contains("DO")))

cat("<br>")

dt5newVars$probSolv.m <- scoreItems(keys=c(1,1,1), 
                                  items = dt5newVars %>% dplyr::select(starts_with("probSolving"), -contains("DO")),
                                  min = -2, max = 2)$scores

# as.data.frame(psych::describe(dt5newVars$probSolv.m, skew=F)) %>%
#   mutate(vars = "Problem Solving") %>%
#   kable(., caption = "Problem Solving: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$probSolv.c <- scale(dt5newVars$probSolv.m, scale = F, center = T)
dt5newVars$probSolv.z <- scale(dt5newVars$probSolv.m, scale = T)
dt5newVars$probSolv.fa <- fa(dt5newVars %>% dplyr::select(starts_with("probSolving"), -contains("DO")))$scores
```

#### Distraction
```{r distract, echo=F, results='asis', warning=F, message=F}
ia.distract<- dt5newVars %>%
    dplyr::select(starts_with("posrefocus"), -contains("DO")) %>%
    Scale::Scale() %>%
    Scale::ItemAnalysis()
ia.distract$rely   
# cat("<br><br>A gls factor analysis was conducted. Items were regressed to a single factor. Their loadings are the following:")
# as.data.frame(Scale::ReportTable(ia.distract)) %>%
#   kable(., row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")
# 
# as.data.frame(psych::describe(dt5newVars %>% dplyr::select(starts_with("posrefocus"), -contains("DO")))) %>%
#   mutate(vars = rownames(.)) %>%
#   kable(., caption = "Distraction: Item Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")
# 
# cat("<br>")

#pairs.panels.new(dt5newVars %>% dplyr::select(starts_with("posrefocus"), -contains("DO")))

cat("<br>")

dt5newVars$distract.m <- scoreItems(keys=c(1,1,1), 
                                  items = dt5newVars %>% dplyr::select(starts_with("posrefocus"), -contains("DO")),
                                  min = -2, max = 2)$scores

# as.data.frame(psych::describe(dt5newVars$distract.m, skew=F)) %>%
#   mutate(vars = "Distraction") %>%
#   kable(., caption = "Distraction: Scale Descriptives", row.names = FALSE) %>% 
#   kable_styling("hover", full_width = F, latex_options = "hold_position")

dt5newVars$distract.c <- scale(dt5newVars$distract.m, scale = F, center = T)
dt5newVars$distract.z <- scale(dt5newVars$distract.m, scale = T)
dt5newVars$distract.fa <- fa(dt5newVars %>% dplyr::select(starts_with("posrefocus"), -contains("DO")))$scores
```

### Well-Being
#### Happy
The happiness scale was reverse coded in French. Gets fixed here.
```{r happy,echo=F, warning=F, message=F}
# recode_if function is loaded at the beginning
  dt5newVars <- dt5newVars %>%
  dplyr::mutate(happy = labelled(recode_if(as.numeric(happy), language == "French", 
                        `1` = 10, `2` = 9, `3` = 8, `4` = 7, `5` = 6,
                        `6` = 5, `7` = 4, `8` = 3, `9` = 2, `10` = 1), labels = NULL, 
                        label = "In general, how happy would you say you are?" ))
```

## Cross-Cultural Adjustement
### Merge Weights from Bertus
```{r mergWeight,echo=F, warning=F, message=F}
wgtCtry <- haven::read_spss(dir("data/collab data/Shared/Data/raw data/weights", pattern = "Weights", full.names = TRUE, ignore.case = TRUE))

dt5newVars <- dplyr::left_join(dt5newVars, wgtCtry, by = c("coded_country" = "coded_country",
                                                    "age" = "age", 
                                                    "gender" = "gender"))
rm(wgtCtry)
```

### Create Pp Grand Mean and Sd
#### Set-Up
We basically load in the variables we want to have harmonized (see RMD50 for those variables)
```{r grandMean,echo=F, warning=F, message=F}
stringOut <- NA #make var for fill out in function
  source("./scripts/functions/nameOut.R") # load nameOut function

# Import Request sheet (we only need column 50 really)
  url <- gsheet::construct_download_url('https://docs.google.com/spreadsheets/d/13PFXsmgjrZBNddHodR2Z_80cjicN4s7yd8BbazKffx8/edit?usp=sharing', format = "csv", sheetid = NULL)
  dtReq <- gsheet::gsheet2tbl(url, sheetid = NULL); rm(url)
  namedtReq <- dtReq[1,]
  dtReq <- dtReq[-c(1:3),]

# prepare dataframe
name <- 'RMD50' #RMD50 is the column that has all the continous vars requested
  author <- namedtReq %>%
    dplyr::select(one_of(paste0(name, '_vars'))) %>% # select the column in which the name and vars are
    as.character() # make it a character
  author <- strsplit(author, "\n")[[1]][3] # get the author which is the third occurence after \n

# make df  
  tmp <- dtReq %>% 
    dplyr::select(name = var, # get variable names
                  request = one_of(paste0(name, '_vars'))) # and the requested columns
 
  tmp <- tmp %>% #remove empty rows
    dplyr::filter(request != "") 
  
# run function to get requested variable names 
  varNames <- apply(tmp, 1, nameOut); rm(tmp)

```

#### Fix reverse coded items
```{r}
dt5newVarsRec <- dt5newVars

# recode scales going from 1 to 5
dt5newVarsRec <- dt5newVarsRec %>%
  dplyr::mutate_at(.funs = list(~ recode(as.numeric(.), `1` = 5, `2` = 4, `3` = 3, `4` = 2, `5` = 1)), #function used
                   .vars = vars(matches("affCalm|affContent|affEnerg|affExc|affInsp|affRel|affLov|posrefocus01|posrefocus02|posrefocus03"))) # variables selected

# recode scales going from -2 to 2
dt5newVarsRec <- dt5newVarsRec %>%
  dplyr::mutate_at(.funs = list(~ recode(as.numeric(.), `-2` = 2, `-1` = 1, `0` = 0, `1` = -1, `2` = -2)), #function used
                   .vars = vars(matches("disc03|jbInsec02|Masks_4"))) # variables selected

# recode scales going from -3 to 3
dt5newVarsRec <- dt5newVarsRec %>%
  dplyr::mutate_at(.funs = list(~ recode(as.numeric(.), `-3` = 3, `-2` = 2, `-1` = 1, `0` = 0, `1` = -1, `2` = -2, `3` = -3)), #function used
                   .vars = vars(matches("bor03|neuro03|godOpinInfl"))) # variables selected

# recode scales going from 1 to 7
dt5newVarsRec <- dt5newVarsRec %>%
  dplyr::mutate_at(.funs = list(~ recode(as.numeric(.), `1` = 7, `2` = 6, `3` = 5, `4` = 4, `5` = 3, `6` = 2, `7` = 1)), #function used
                   .vars = vars(matches("MoneyTime"))) # variables selected

# recode scales going from 1 to 6
dt5newVarsRec <- dt5newVarsRec %>%
  dplyr::mutate_at(.funs = list(~ recode(as.numeric(.), `1` = 6, `2` = 5, `3` = 4, `4` = 3, `5` = 2, `6` = 1)), #function used
                   .vars = vars(matches("godForgive"))) # variables selected

```

#### Calculate Mean and Sd per Wave
```{r}
# make dataframe for All (for saving it later)
  dt5newVarsHarmo <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames))) # get row of var names
  dt5newVarsHarmo <- apply(dt5newVarsHarmo, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)

# make dataframe for baseline
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[1,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 1
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[2,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w1_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w1_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 2
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[3,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w2_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w2_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 3
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[4,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w3_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w3_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 4
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[5,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w4_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w4_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 5
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[6,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w5_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w5_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 6
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[7,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w6_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w6_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 7
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[8,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w7_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w7_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 8
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[9,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w8_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w8_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 9
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[10,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w9_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w9_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 10
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[11,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w10_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w10_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 11
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[12,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w11_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w11_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 12
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[13,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w12_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w12_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 13
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[14,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w13_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w13_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 14
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[15,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w14_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w14_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 15
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[16,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w15_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w15_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 16
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[17,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w16_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w16_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 17
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[18,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w17_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w17_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 18
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[19,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w18_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w18_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 19
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[20,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w19_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w19_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)

# make dataframe for wave 20
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[21,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w20_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w20_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)
    
# make dataframe for wave 21
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[22,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w21_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w21_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)
    
# make dataframe for wave 22
  tmpdt <- dt5newVarsRec %>%
    dplyr::select(one_of(as.character(varNames[23,]))) # get row of var names
  tmpdt <- apply(tmpdt, 2, scales::rescale, to = c(1,7)) # rescale all of them (apply over columns)
  
  # calc grand mean and sd
    dt5newVars$w22_respSetMean <- rowMeans(tmpdt, na.rm = T)
    dt5newVars$w22_respSetSd <- rowSds(tmpdt, na.rm = T); rm(tmpdt)
  
  # clean dt (still NaNs?)
  # tmp <- dt5newVars %>%
  #   dplyr::mutate_at(.vars = vars(contains("respSet")),
  #                    .funs = list(~ifelse(is.na(.), 'NaN', .)))
 
    
```


## Local Virus Spread
### Prepare Data
```{r}
# set people location and IP NA if they do not provide consent
  dt5newVars <- dt5newVars %>%
    dplyr::mutate_at(.vars = vars(LocationLatitude, LocationLongitude, IPAddress),
                     .funs = list(~ ifelse(ZIP == 1, ., NA))) # get cell if ZIP == 1 if not set NA
```


### Localization Script by Floris Uithof (Geodienst Groningen University)
```{r localize}
# if necessary because of memory
  # rm(list= ls()[!(ls() %in% c('dt5newVars','dt5newVarsHarmo'))])

# Load packages
library(GADMTools)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)
library(ggplot2)
library(dplyr)
library(osmdata)
library(ggmap)
library(RColorBrewer)

# Load in shapefile of provinces of the whole world
# map <- st_read(file.choose())
# map <- st_read("C:/Users/user/Downloads/ne_10m_admin_1_states_provinces.shp")

map <- sf::st_read(dsn="data/collab data/Shared/Data/raw data/ZIP", layer = "ne_10m_admin_1_states_provinces")

# Filter can be added here
Provinces <- map 

# Create the proper data set with responseId for later merging
  data <- dt5newVars %>%
    filter(!is.na(LocationLatitude),
           LocationLatitude != "")%>%
    dplyr::select(ResponseId, LocationLatitude, LocationLongitude)

# add localization
  data <- st_as_sf(data, coords=c("LocationLongitude", "LocationLatitude"), crs=4326, remove=FALSE)  

# plot if needed
  plot(Provinces$geometry)
  plot(data$geometry, col='red', pch=16, cex=0.4,add = T)
  
# Select the provinces with points in them
  provincesWithPoints <- Provinces[data, ]
  
# Add the Province name to each of the data points
  dataPerProvince <- st_join(data, Provinces[c('name', 'latitude', 'longitude', 'adm1_code', 'iso_3166_2',
                                               'fips', 'woe_label', 'gn_a1_code')])

# some people were not identified as their dots did not correspond to the map we uploaded
  dataPerProvince$name[is.na(dataPerProvince$name)] <- 'no province'

# merge province by Id
  tmp <- data.frame(ResponseId = dataPerProvince$ResponseId,
                    region = dataPerProvince$name,
                    regionLat = dataPerProvince$latitude,
                    regionLong = dataPerProvince$longitude,
                    region_adm1_code = dataPerProvince$adm1_code,
                    region_iso_3166_2 = dataPerProvince$iso_3166_2,
                    region_fips = dataPerProvince$fips,
                    region_woe_label = dataPerProvince$woe_label,
                    region_gn_a1_code = dataPerProvince$gn_a1_code)
# and merge
    dt5newVars <- dplyr::left_join(dt5newVars, tmp, by = "ResponseId")
    
  rm(tmp, dataPerProvince, data, Provinces, provincesWithPoints)
```


### Check with Geolocation (not crucial for now)
```{r geoloc, echo=F, warning=F, message=F}
# library(readr)
# library(renv)
# library(countrycode)
# library(revgeo)
#
# # # Set to NULL to get all GPS locations; takes a long time
# only_missing_gps <- TRUE
# number_of_gps <- 100
# 
# dt5newVars$countryiso3 <- countrycode::countrycode(dt5newVars$coded_country, origin = "country.name", destination = "iso3c")
# 
# dt5newVars$id <- paste0(formatC(dt5newVars$LocationLongitude, digits = 7, format = "f"),
#                     formatC(dt5newVars$LocationLatitude, digits = 7, format = "f"))
# 
# if(!file.exists(file.path("data", "cleaned data", "geolocate.csv", "id"))){
#   gps <- dt5newVars[, c("LocationLongitude", "LocationLatitude")]
#   if(only_missing_gps) gps <- gps[is.na(dt5newVars$countryiso3), ]
#   gps$id <- paste0(formatC(gps$LocationLongitude, digits = 2, format = "f"),
#                    formatC(gps$LocationLatitude, digits = 2, format = "f"))
#   gps <- gps[!duplicated(gps$id), ]
#   if(is.null(number_of_gps)) number_of_gps <- nrow(gps)
# 
#   geolocs <- data.frame(revgeo(longitude=gps$LocationLongitude[1:number_of_gps], latitude=gps$LocationLatitude[1:number_of_gps], provider = 'photon', output="frame"), id =  gps$id[1:number_of_gps], stringsAsFactors = FALSE)
#   names(geolocs)[1:6] <- paste0("gps_", names(geolocs)[1:6])
#   geolocs$gps_countryiso3 <- countrycode::countrycode(geolocs$gps_country, origin = "country.name", destination = "iso3c")
#   if(only_missing_gps) write.csv(geolocs, file.path("data", "cleaned data", "geolocate_missing_iso3.csv"), row.names = FALSE)
#   write.csv(geolocs, file.path("data", "cleaned data", "geolocate.csv"), row.names = FALSE)
# } else {
#   geolocs <- read.csv(file.path("data", "cleaned data", "geolocate.csv"), stringsAsFactors = FALSE)
# }
# 
# tmp <- merge(dt5newVars, geolocs, all.x = TRUE, by = "id")
# 
# # Check which locations are mismatched
# head(dt5newVars[which(dt5newVars$countryiso3 != dt5newVars$gps_countryiso3), ])
# rm(tmp, dt5newVars, only_missing_gps, number_of_gps, gps)
```

## **Reduce to Relevant Variales**   
```{r keyVars, echo=T, warning=F, message=F}
# clean-up Item Analyses
rm(list=ls(pattern="ia"))

# remove directly identifiable data (with and without page timers)
dt6ReducedTimer <- dt5newVars %>%
  dplyr::select(-c(contains("IPAddress"),
                   contains("RecipientLastName"), 
                   contains("RecipientFirstName"), 
                   contains("Email"),
                   contains("ExternalReference"), 
                   contains("LocationLatitude"), 
                   contains("LocationLongitude"),
                   contains("DistributionChannel"),
                   contains("ICRec_0_TEXT"),
                   contains("ICRec_1_TEXT")))
dt6Reduced <- dt6ReducedTimer %>%
  dplyr::select(-starts_with("t_"))

```

## **Recontact**
```{r recontact, echo=T, warning=F, message=F}
# COMMENT IF NEEDED
as.Date(dt5newVars$EndDate)

library("lubridate")

# Make contact list of all people before cutoff date
  ContactListAll <- dt5newVars %>%
  filter(FilterPreview == 0,
         ICRec_1_TEXT!="") %>%
         # ymd_hms(dt5newVars$EndDate) < "2020-06-07 11:00:00 UTC",
         # chdyr::distinct(ICRec_1_TEXT, .keep_all = TRUE)
  dplyr::select(ExternalDataReference = ResponseId,
                Language = Q_Language,
                Email = ICRec_1_TEXT,
                StartDateBaseline = StartDate,
                EndDateBaseline = EndDate)
# download contact lists from qualtrics
  CL1 <- read.csv("data/collab data/Shared/Data/raw data/contact list/c19Recontact_20_1.csv", header = T)
  CL2 <- read.csv("data/collab data/Shared/Data/raw data/contact list/c19Recontact_20_2.csv", header = T)
  # CL3 <- read.csv("data/collab data/Shared/Data/raw data/contact list/c19Recontact4_4.csv", header = T)
  CL <- plyr::rbind.fill(CL1, CL2)
# get Unsubscribed column (keep up to date), dataref (for merge), and Email (for comparison with df)
  CL <- CL %>%
    dplyr::transmute(Unsubscribed = Unsubscribed, 
                     ExternalDataReference = as.character(ExternalDataReference),
                     EmailDown = Email)
# merge downloaded contact list with all participants that are before cutoff
  CLmerge <- dplyr::full_join(CL, ContactListAll, by="ExternalDataReference")
  CLmerge$Unsubscribed[is.na(CLmerge$Unsubscribed)] <- 0 # make "new participants" NA
  table(CLmerge$Unsubscribed)

# check merge  
  table(CLmerge$EmailDown == CLmerge$Email) # not a big issue
  CLmerge$Email <- ifelse(is.na(CLmerge$EmailDown), CLmerge$Email, CLmerge$EmailDown) #overwrite the original Email with the downloaded one as some change BUT ONLY IF not NA (first contact)
  # remove all irrelevant columns
    CLUp <- CLmerge %>%
      dplyr::select(-EmailDown)
    CLUp1 <- CLUp[1:20000,]
    CLUp2 <- CLUp[20001:nrow(CLUp),]
# # fix because qualtrics fucked up check whether there are multiple response ID
  n_occur <- data.frame(table(CLmerge$ExternalDataReference)) 
  n_occur[n_occur$Freq > 1,] #which ids occurred more than once.
CLmerge[CLmerge$ExternalDataReference %in% n_occur$Var1[n_occur$Freq > 1],] #which ones
```

### Make Contact List per Country
```{r CLperCountry}
CLLang <- split(CLUp, CLUp$Language)
# save it
  mapply(write.csv, CLLang, file=paste0('data/cleaned data/Contact List ', names(CLLang), '.csv'), row.names = F)
```


## **Data for Shiny App**
```{r shinyApp, echo=T, warning=F, message=F}
# Dataframe for Shiny App
# load geo spatial data
library(rnaturalearth)
library(rnaturalearthdata)
world.data <- ne_countries(scale = "medium", returnclass = "sf")
world.data$iso_a2[world.data$admin=="Kosovo"] <- "XK"
unique(dt5newVars$coded_country)[!unique(dt5newVars$coded_country) %in% world.data$admin] # check whether all country names are spelled correctly

# all ISO-2 country code to dataframe and flags
shiny_prep <- merge(x = dt5newVars, y = world.data %>% dplyr::select(admin, iso_a2), by.x = "coded_country", by.y = "admin", all.x = T)
shiny_prep$flag <- sprintf("https://cdn.rawgit.com/lipis/flag-icon-css/master/flags/4x3/%s.svg", tolower(shiny_prep$iso_a2))

ctry.scales <- shiny_prep %>%
  filter(!is.na(coded_country)) %>%
  dplyr::group_by(coded_country) %>%
  dplyr::summarize(
    n = n(),
    
    affAnx = mean(affAnx, na.rm = T),
    affBor = mean(affBor, na.rm = T),
    affCalm = mean(affCalm, na.rm = T),
    affContent = mean(affContent, na.rm = T),
    affDepr = mean(affDepr, na.rm = T),
    affEnerg = mean(affEnerg, na.rm = T),
    affExc = mean(affExc, na.rm = T),
    affNerv = mean(affNerv, na.rm = T),
    affExh = mean(affExh, na.rm = T),
    affInsp = mean(affInsp, na.rm = T),
    affRel = mean(affRel, na.rm = T),
    affHighPos = mean(affHighPos.m, na.rm = T),
    affHighNeg = mean(affHighNeg.m, na.rm = T),
    affLowPos = mean(affLowPos.m, na.rm = T),
    affLowNeg = mean(affLowNeg.m, na.rm = T),
    
    #ext = mean(ext.m, na.rm = T),
    
    gov = mean(extC19Msg, na.rm = T),
    gov.sd = sd(extC19Msg, na.rm = T),
    gov.se = gov.sd/sqrt(n),
    
    comRule = mean(c19IsStrict, na.rm = T),
    comRule.sd = sd(c19IsStrict, na.rm = T),
    comRule.se = comRule.sd/sqrt(n),
    
    comPunish = mean(c19IsPunish, na.rm = T),
    comPunish.sd = sd(c19IsPunish, na.rm = T),
    comPunish.se = comPunish.sd/sqrt(n),
    
    comOrg = mean(c19IsOrg, na.rm = T),
    comOrg.sd = sd(c19IsOrg, na.rm = T),
    comOrg.se = comOrg.sd/sqrt(n),
    
    lone = mean(lone.m, na.rm = T),
    lone.sd = sd(lone.m, na.rm = T),
    lone.se = lone.sd/sqrt(n),
    
    #bor = mean(bor.m, na.rm = T),
    isoPers = mean(isoPers.m, na.rm = T),
    isoPers.sd = sd(isoPers.m, na.rm = T),
    isoPers.se = isoPers.sd/sqrt(n),
    
    isoOnl = mean(isoOnl.m, na.rm = T),
    isoOnl.sd = sd(isoOnl.m, na.rm = T),
    isoOnl.se = isoOnl.sd/sqrt(n),
    
    #beh = mean(beh.m, na.rm = T),
    behWash = mean(c19perBeh01, na.rm = T),
    behWash.sd = sd(c19perBeh01, na.rm = T),
    behWash.se = behWash.sd/sqrt(n),
    
    behAvoid = mean(c19perBeh02, na.rm = T),
    behAvoid.sd = sd(c19perBeh02, na.rm = T),
    behAvoid.se = behAvoid.sd/sqrt(n),
    
    covidHope = mean(c19Hope, na.rm = T),
    covidHope.sd = sd(c19Hope, na.rm = T),
    covidHope.se = covidHope.sd/sqrt(n),
    
    covidEff = mean(c19Eff, na.rm = T),
    covidEff.sd = sd(c19Eff, na.rm = T),
    covidEff.se = covidEff.sd/sqrt(n),
    
    para = mean(para.m, na.rm = T),
    para.sd = sd(para.m, na.rm = T),
    para.se = para.sd/sqrt(n),
    
    consp = mean(consp.m, na.rm = T),
    consp.sd = sd(consp.m, na.rm = T),
    consp.se = consp.sd/sqrt(n)
    
    #jobinsec = mean(jobinsec.m, na.rm = T),
    #pfs = mean(pfs.m, na.rm = T),
  )
ctry.scales <- merge(x = ctry.scales, y = unique(shiny_prep %>% dplyr::select(coded_country, iso_a2, flag)), all.x = T) # add flags and ISO

global.scales <- shiny_prep %>%
  filter(!is.na(coded_country)) %>%
  dplyr::summarize(
    coded_country = "global",
    n = n(),
    
    affAnx = mean(affAnx, na.rm = T),
    affBor = mean(affBor, na.rm = T),
    affCalm = mean(affCalm, na.rm = T),
    affContent = mean(affContent, na.rm = T),
    affDepr = mean(affDepr, na.rm = T),
    affEnerg = mean(affEnerg, na.rm = T),
    affExc = mean(affExc, na.rm = T),
    affNerv = mean(affNerv, na.rm = T),
    affExh = mean(affExh, na.rm = T),
    affInsp = mean(affInsp, na.rm = T),
    affRel = mean(affRel, na.rm = T),
    affHighPos = mean(affHighPos.m, na.rm = T),
    affHighNeg = mean(affHighNeg.m, na.rm = T),
    affLowPos = mean(affLowPos.m, na.rm = T),
    affLowNeg = mean(affLowNeg.m, na.rm = T),
    
    #ext = mean(ext.m, na.rm = T),
    
    gov = mean(extC19Msg, na.rm = T),
    gov.sd = sd(extC19Msg, na.rm = T),
    gov.se = gov.sd/sqrt(n),
    
    comRule = mean(c19IsStrict, na.rm = T),
    comRule.sd = sd(c19IsStrict, na.rm = T),
    comRule.se = comRule.sd/sqrt(n),
    
    comPunish = mean(c19IsPunish, na.rm = T),
    comPunish.sd = sd(c19IsPunish, na.rm = T),
    comPunish.se = comPunish.sd/sqrt(n),
    
    comOrg = mean(c19IsOrg, na.rm = T),
    comOrg.sd = sd(c19IsOrg, na.rm = T),
    comOrg.se = comOrg.sd/sqrt(n),
    
    lone = mean(lone.m, na.rm = T),
    lone.sd = sd(lone.m, na.rm = T),
    lone.se = lone.sd/sqrt(n),
    
    #bor = mean(bor.m, na.rm = T),
    isoPers = mean(isoPers.m, na.rm = T),
    isoPers.sd = sd(isoPers.m, na.rm = T),
    isoPers.se = isoPers.sd/sqrt(n),
    
    isoOnl = mean(isoOnl.m, na.rm = T),
    isoOnl.sd = sd(isoOnl.m, na.rm = T),
    isoOnl.se = isoOnl.sd/sqrt(n),
    
    #beh = mean(beh.m, na.rm = T),
    behWash = mean(c19perBeh01, na.rm = T),
    behWash.sd = sd(c19perBeh01, na.rm = T),
    behWash.se = behWash.sd/sqrt(n),
    
    behAvoid = mean(c19perBeh02, na.rm = T),
    behAvoid.sd = sd(c19perBeh02, na.rm = T),
    behAvoid.se = behAvoid.sd/sqrt(n),
    
    covidHope = mean(c19Hope, na.rm = T),
    covidHope.sd = sd(c19Hope, na.rm = T),
    covidHope.se = covidHope.sd/sqrt(n),
    
    covidEff = mean(c19Eff, na.rm = T),
    covidEff.sd = sd(c19Eff, na.rm = T),
    covidEff.se = covidEff.sd/sqrt(n),
    
    para = mean(para.m, na.rm = T),
    para.sd = sd(para.m, na.rm = T),
    para.se = para.sd/sqrt(n),
    
    consp = mean(consp.m, na.rm = T),
    consp.sd = sd(consp.m, na.rm = T),
    consp.se = consp.sd/sqrt(n),
    
    #jobinsec = mean(jobinsec.m, na.rm = T),
    #pfs = mean(pfs.m, na.rm = T),
    #jobinsec = mean(jobinsec.m, na.rm = T),
    #pfs = mean(pfs.m, na.rm = T),
    iso_a2 = NA,
    flag = "https://rawcdn.githack.com/FortAwesome/Font-Awesome/4e6402443679e0a9d12c7401ac8783ef4646657f/svgs/solid/globe.svg"
  )
ctry.scales <- rbind(global.scales, ctry.scales); rm(global.scales)

scramble20 <- function(x) {ifelse(x<20, abs(x+sample(-2:2, 1, replace = T)), x)}
scramble50 <- function(x) {ifelse(x<50, abs(x+sample(-5:5, 1, replace = T)), x)}

languages <- shiny_prep %>% 
  dplyr::select(coded_country, language) %>%
  group_by(language, coded_country) %>%
  tally() %>%
  tidyr::spread(language, n)
languages.glob <- shiny_prep %>%
  dplyr::select(language) %>%
  mutate(coded_country = "global") %>%
  group_by(language, coded_country) %>%
  tally() %>%
  tidyr::spread(language, n)
languages <- rbind(languages.glob, languages); rm(languages.glob)
names(languages)[names(languages) != "coded_country"] = paste0("languages_", names(languages)[names(languages) != "coded_country"])
languages[,-1] <- lapply(languages[,-1], scramble20)
#languages[names(languages) != "coded_country"] <- lapply(languages[names(languages) != "coded_country"], scramble20)
# languages[] <- lapply(languages, function(x) ifelse(x<20, abs(x+sample(-2:2, 1, replace = T)), x))

gender <- data.frame(coded_country = shiny_prep$coded_country, 
                     gender = as_factor(shiny_prep$gender)) %>%
  group_by(gender, coded_country) %>%
  tally() %>%
  tidyr::spread(gender, n)
gender.glob <- data.frame(coded_country = "global", 
                             gender = as_factor(shiny_prep$gender)) %>%
  group_by(gender, coded_country) %>%
  tally() %>%
  tidyr::spread(gender, n)
gender <- rbind(gender.glob, gender); rm(gender.glob)
names(gender)[names(gender) != "coded_country"] = paste0("gender_", names(gender)[names(gender) != "coded_country"])
gender[,-1] <- lapply(gender[,-1], scramble20)

age <- data.frame(coded_country = shiny_prep$coded_country, 
                  age = as_factor(shiny_prep$age)) %>%
  group_by(age, coded_country) %>%
  tally() %>%
  tidyr::spread(age, n)
age.glob <- data.frame(coded_country = "global",
                       age = as_factor(shiny_prep$age)) %>%
  group_by(age, coded_country) %>%
  tally() %>%
  tidyr::spread(age, n)
age <- rbind(age.glob, age); rm(age.glob)
names(age)[names(age) != "coded_country"] = paste0("age_", names(age)[names(age) != "coded_country"])
age[,-1] <- lapply(age[,-1], scramble20)

edu <- data.frame(coded_country = shiny_prep$coded_country, 
                  edu = as_factor(shiny_prep$edu)) %>%
  group_by(edu, coded_country) %>%
  tally() %>%
  tidyr::spread(edu, n)
edu.glob <- data.frame(coded_country = "global", 
                       edu = as_factor(shiny_prep$edu)) %>%
  group_by(edu, coded_country) %>%
  tally() %>%
  tidyr::spread(edu, n)
edu <- rbind(edu.glob, edu); rm(edu.glob)
names(edu)[names(edu) != "coded_country"] = paste0("education_", names(edu)[names(edu) != "coded_country"])
edu[,-1] <- lapply(edu[,-1], scramble20)

pol <- shiny_prep %>%
  dplyr::select(coded_country, PolOrCat) %>%
  mutate(PolOrCat = na_if(PolOrCat, "Libertarian LeftLibertarian Right")) %>%
  mutate(PolOrCat = na_if(PolOrCat, "Authoritarian RightLibertarian Right")) %>%
  group_by(PolOrCat, coded_country) %>%
  tally() %>%
  tidyr::spread(PolOrCat, n)
pol.glob <- shiny_prep %>%
  dplyr::select(PolOrCat) %>%
  mutate(PolOrCat = na_if(PolOrCat, "Libertarian LeftLibertarian Right")) %>%
  mutate(PolOrCat = na_if(PolOrCat, "Authoritarian RightLibertarian Right")) %>%
  mutate(coded_country = "global") %>%
  group_by(PolOrCat, coded_country) %>%
  tally() %>%
  tidyr::spread(PolOrCat, n)
pol <- rbind(pol.glob, pol); rm(pol.glob)
names(pol)[names(pol) != "coded_country"] = paste0("political_", names(pol)[names(pol) != "coded_country"])
pol[,-1] <- lapply(pol[,-1], scramble50)

#ctry.scales <- merge(x=ctry.scales, y=languages, by="coded_country", all.x=TRUE)
ctry.scales <- plyr::join(x=ctry.scales, y=languages, by="coded_country")
ctry.scales <- plyr::join(x=ctry.scales, y=gender, by="coded_country")
ctry.scales <- plyr::join(x=ctry.scales, y=age, by="coded_country")
ctry.scales <- plyr::join(x=ctry.scales, y=edu, by="coded_country")
ctry.scales <- plyr::join(x=ctry.scales, y=pol, by="coded_country")
rm(languages, gender, age, edu, pol)

# sample size per country (including NA)
world.n <- shiny_prep %>% 
  dplyr::select(coded_country, iso_a2, flag) %>%
  dplyr::group_by(coded_country, iso_a2, flag) %>%
  dplyr::summarize(
    n = n()
  )

ctry.red <- ctry.scales %>%
  dplyr::select(coded_country, iso_a2, flag, n) %>%
  filter(n >= 20) #, coded_country != "global"
ctry.only.red <- ctry.scales %>%
  dplyr::select(coded_country, iso_a2, flag, n) %>%
  filter(n >= 20, coded_country != "global")

ctry.scales <- ctry.scales %>%
  filter(n>=20)

latest.DateTime <- format(max(ymd_hms(shiny_prep$EndDate, tz = "CET"), na.rm=T), "%d %B, %Y - %H:%M %Z")
```

## **Descriptives for Representativeness**
```{r rep, echo=T, warning=F, message=F}
# missing filter per item
  ctry.repMiss <- dt5newVars %>%
    #filter(!is.na(coded_country)) %>%
    filter(!is.na(PLRAC19),
           !is.na(extC19Msg),
           !is.na(pfs.m),
           !is.na(consp.m),
           !is.na(lone.m)
           ) %>%
    filter_at(vars(jbInsec01,jbInsec02, jbInsec03, jbInsec04, PLRAEco),all_vars(!is.na(.))) %>%
    filter_at(vars(disc01,disc02, disc03),all_vars(!is.na(.))) %>%
    filter_at(vars(isoFriends_inPerson, isoOthPpl_inPerson, isoFriends_online, isoOthPpl_online),all_vars(!is.na(.))) %>%
    filter_at(vars(c19Hope, c19Eff, ecoHope, ecoEff),all_vars(!is.na(.))) %>%
    filter_at(vars(c19RCA01, c19RCA02, c19RCA03, ecoPerBeh01, ecoPerBeh02, ecoPerBeh03),all_vars(!is.na(.))) %>%
    filter_at(vars(c19NormShould, c19ProSo01, c19ProSo02, c19ProSo03),all_vars(!is.na(.))) %>%
    filter_at(vars(c19NormDo, extC19Rules, extC19Punish, extC19Org),all_vars(!is.na(.))) %>%
    filter_at(vars(c19ProSo01, c19ProSo02, c19ProSo03, c19ProSo04, ecoProSo01, ecoProSo02, ecoProSo03, ecoProSo04),all_vars(!is.na(.))) %>%
    group_by(coded_country) %>%
    dplyr::summarize(
      n = n(),
      gender_women = sum(gender == 1, na.rm = T),
      gender_men = sum(gender == 2, na.rm = T),
      gender_other = sum(gender == 3, na.rm = T),
      gender_na = sum(is.na(gender), na.rm = T),
      age_18to24 = sum(age == 1, na.rm = T),
      age_25to34 = sum(age == 2, na.rm = T),
      age_35to44 = sum(age == 3, na.rm = T),
      age_45to54 = sum(age == 4, na.rm = T),
      age_55to64 = sum(age == 5, na.rm = T),
      age_65to75 = sum(age == 6, na.rm = T),
      age_75to85 = sum(age == 7, na.rm = T),
      age_85plus = sum(age == 8, na.rm = T),
      age_na = sum(is.na(age), na.rm = T)
    )
# full sample
# no missing filter per item
  ctry.repFull <- dt5newVars %>%
    group_by(coded_country) %>%
    dplyr::summarize(
      n = n(),
      gender_women = sum(gender == 1, na.rm = T),
      gender_men = sum(gender == 2, na.rm = T),
      gender_other = sum(gender == 3, na.rm = T),
      gender_na = sum(is.na(gender), na.rm = T),
      age_18to24 = sum(age == 1, na.rm = T),
      age_25to34 = sum(age == 2, na.rm = T),
      age_35to44 = sum(age == 3, na.rm = T),
      age_45to54 = sum(age == 4, na.rm = T),
      age_55to64 = sum(age == 5, na.rm = T),
      age_65to75 = sum(age == 6, na.rm = T),
      age_75to85 = sum(age == 7, na.rm = T),
      age_85plus = sum(age == 8, na.rm = T),
      age_na = sum(is.na(age), na.rm = T)
    )
  ctry.repFull <- ctry.repFull %>%
    filter(n > 20)%>%
    transmute(coded_country = coded_country,
           n = n)
# language
  lang.repFull <- dt5newVars %>%
    group_by(language) %>%
    dplyr::summarize(
      n = n())
  lang.repFull <- lang.repFull %>%
    filter(n > 20)
  
# only snowball
# no missing filter per item
  ctry.repFullSnow <- dt5newVars %>%
    filter(!grepl("Rep", source)) %>%
    group_by(coded_country) %>%
    dplyr::summarize(
      n = n(),
      gender_women = sum(gender == 1, na.rm = T),
      gender_men = sum(gender == 2, na.rm = T),
      gender_other = sum(gender == 3, na.rm = T),
      gender_na = sum(is.na(gender), na.rm = T),
      age_18to24 = sum(age == 1, na.rm = T),
      age_25to34 = sum(age == 2, na.rm = T),
      age_35to44 = sum(age == 3, na.rm = T),
      age_45to54 = sum(age == 4, na.rm = T),
      age_55to64 = sum(age == 5, na.rm = T),
      age_65to75 = sum(age == 6, na.rm = T),
      age_75to85 = sum(age == 7, na.rm = T),
      age_85plus = sum(age == 8, na.rm = T),
      age_na = sum(is.na(age), na.rm = T)
    )
  ctry.repFullSnow <- ctry.repFullSnow %>%
    filter(n > 20)%>%
    transmute(coded_country = coded_country,
           n = n)
# language
  lang.repFullSnow <- dt5newVars %>%
    filter(!grepl("Rep", source)) %>%
    group_by(language) %>%
    dplyr::summarize(
      n = n())
  lang.repFullSnow <- lang.repFullSnow %>%
    filter(n > 20)
```

## **Export**   
Export main dataframe as RData and SPSS sav files. We export versions with and without page timers
```{r export, echo=T, warning=F, message=F}
namSPSS <- paste0("data/cleaned data/Psycorona Baseline cleaned ", format(Sys.time(), format = "%F %H-%M %Z"),".sav")
namR <- paste0("data/cleaned data/Psycorona Baseline cleaned ", format(Sys.time(), format = "%F %H-%M %Z"),".RData")
# relevant saves
  namASPSS <- paste0("data/cleaned data/Psycorona Baseline cleaned - All ",  format(Sys.time(), format = "%F %H-%M %Z"),".sav")
  namAR <- paste0("data/cleaned data/Psycorona Baseline cleaned - All ", format(Sys.time(), format = "%F %H-%M %Z"),".RData")
  namASPSSHarmo <- paste0("data/cleaned data/Psycorona Baseline cleaned - All Harmonized ",  format(Sys.time(), format = "%F %H-%M %Z"),".sav")
  namARHarmo <- paste0("data/cleaned data/Psycorona Baseline cleaned - All Harmonized ", format(Sys.time(), format = "%F %H-%M %Z"),".RData")

namTSPSS <- paste0("data/cleaned data/Psycorona Baseline cleaned with page timer ", format(Sys.time(), format = "%F %H-%M %Z"),".sav")
namTR <- paste0("data/cleaned data/Psycorona Baseline cleaned with page timer ", format(Sys.time(), format = "%F %H-%M %Z"),".RData")
namCL <- paste0("data/cleaned data/Contact List 1 ", format(Sys.time(), format = "%F %H-%M %Z"),".csv")
namCL2 <- paste0("data/cleaned data/Contact List 2 ", format(Sys.time(), format = "%F %H-%M %Z"),".csv")
namCL3 <- paste0("data/cleaned data/Contact List 3 ", format(Sys.time(), format = "%F %H-%M %Z"),".csv")
namCLAll <- paste0("data/cleaned data/Contact List All ", format(Sys.time(), format = "%F %H-%M %Z"),".csv")
namCLUp1 <- paste0("data/cleaned data/Contact List 0_20000 ", format(Sys.time(), format = "%F %H-%M %Z"),".csv")
namCLUp2 <- paste0("data/cleaned data/Contact List 20001_End ", format(Sys.time(), format = "%F %H-%M %Z"),".csv")
#namCLUp3 <- paste0("data/cleaned data/Contact List 25000_End ", format(Sys.time(), format = "%F %H-%M %Z"),".csv")
namLL <- paste0("data/cleaned data/Language List ", format(Sys.time(), format = "%F %H-%M %Z"),".csv")
namDemo <- paste0("data/cleaned data/Country Demographics ", format(Sys.time(), format = "%F %H-%M %Z"),".csv")

# save reduced dataset
  write_sav(dt6Reduced, namSPSS)
  save(dt6Reduced, file = namR)

# save full dataset
  write_sav(dt5newVars, namASPSS)
  write_sav(dt5newVars, namAR)

# save harmonized dataset
  write_sav(as.data.frame(dt5newVarsHarmo), namASPSSHarmo)
  write_sav(as.data.frame(dt5newVarsHarmo), namARHarmo)

# save data
  write_sav(dt6ReducedTimer, namTSPSS)
  save(dt6ReducedTimer, file = namTR)

# export for Shiny
  save(ctry.scales, world.n, ctry.red, ctry.only.red, latest.DateTime, 
       file = "../PsyCorona-WebApp/data/shinyDataAggregated.RData")

# export Contact List
  # write.csv(ContactList1, file = namCL)
  # write.csv(ContactList2, file = namCL2)
  # write.csv(ContactList3, file = namCL3)
  write.csv(CLUp, file = namCLAll)
  write.csv(CLUp1, file = namCLUp1)
  write.csv(CLUp2, file = namCLUp2)
  #write.csv(CLUp3, file = namCLUp3)

# export Language List
#write.csv(LanguageList, file = namLL)

# export Country Demographics

  #write.csv(ctry.repMiss,'data/cleaned data/Country Demographics Joce.csv')
  write.csv(ctry.repFull,'data/cleaned data/Country Size Full.csv')
  write.csv(lang.repFull,'data/cleaned data/Language.csv')
  write.csv(ctry.repFullSnow,'data/cleaned data/Country Size Full - Snowball.csv')
  write.csv(lang.repFullSnow,'data/cleaned data/Language - Snowball.csv')

#rm(list=ls(pattern="nam"))
```