| Title: | Visualisation And Analysis During a Survey Field |
|---|---|
| Description: | vizsurvey is an R package designed to streamline the quality assessment of survey data by providing intuitive visual diagnostics through an interactive dashboard. vizsurvey is especially useful for institutions or researchers conducting large-scale surveys with multiple interviewers, enabling a fast and systematic overview of data quality over time. |
| Authors: | Thomas Delclite [aut, cre], Adrien Mierop [aut] |
| Maintainer: | Thomas Delclite <[email protected]> |
| License: | GPL (>= 3) |
| Version: | 0.2.1 |
| Built: | 2026-05-15 09:46:51 UTC |
| Source: | https://github.com/tdelc/vizsurvey |
Classify all variable of a data.frame
classify_df(df, threhold = 15)classify_df(df, threhold = 15)
df |
A data frame |
threhold |
Maximum number of modalities to classify variable as modal |
a data frame
classify_df(iris)classify_df(iris)
corrections of each df of a list
correct_list_df(list_df)correct_list_df(list_df)
list_df |
list of df |
list
Create a template of configuration file
create_config( folder_path, file_name = "config.txt", name_survey = NULL, vars_discretes = NULL, vars_continous = NULL, var_wave = NULL, var_zone = NULL, var_group = NULL )create_config( folder_path, file_name = "config.txt", name_survey = NULL, vars_discretes = NULL, vars_continous = NULL, var_wave = NULL, var_zone = NULL, var_group = NULL )
folder_path |
folder where create the file |
file_name |
Name of the config file (config.txt by default) |
name_survey |
Name of the survey (not used) |
vars_discretes |
(optional) preset discretes variables name (VAR1,VAR2,...) |
vars_continous |
(optional) preset continous variables name (VAR1,VAR2,...) |
var_wave |
(optional) variable name of wave |
var_zone |
(optional) variable name of zone |
var_group |
variable name of group |
create_config(".") # creation of config.txt in working directorycreate_config(".") # creation of config.txt in working directory
Create statistics from database
create_df_stats(df_, configs, var_calculs, zone_filter = NULL)create_df_stats(df_, configs, var_calculs, zone_filter = NULL)
df_ |
database |
configs |
configs |
var_calculs |
variable to create stats |
zone_filter |
(optional) zone modality to filter data |
df
Simulate EU-SILC dataset with injected errors
create_eusilc_sim()create_eusilc_sim()
data.frame from laeken::eusilc with ids and errors
# create_eusilc_sim()# create_eusilc_sim()
Use PUF files for SILC example
create_fake_silc( vec_country = c("BE", "RO"), path_out = "inst/shiny-examples/complete/data/SILC/" )create_fake_silc( vec_country = c("BE", "RO"), path_out = "inst/shiny-examples/complete/data/SILC/" )
vec_country |
Vector of country to import |
path_out |
Path to export csv |
nothing, but creation of csv datasets
# create_fake_silc()# create_fake_silc()
Replace empty by na
empty_as_na(vec)empty_as_na(vec)
vec |
Vector of values |
Vector
airquality[which(is.na(airquality$Ozone)), "Ozone"] <- "" empty_as_na(airquality$Ozone)airquality[which(is.na(airquality$Ozone)), "Ozone"] <- "" empty_as_na(airquality$Ozone)
extract a config from key (config from load_config)
extract_config(config, key_)extract_config(config, key_)
config |
df of configuration |
key_ |
key to extract |
string
tranform data from folder to config and df
folder_to_df(folder, file_pattern = "*.csv", file_config = "config.txt")folder_to_df(folder, file_pattern = "*.csv", file_config = "config.txt")
folder |
folder of databases |
file_pattern |
pattern of the databases (*.csv by default) |
file_config |
name of the configuration file (config.txt by default) |
list(df,configs)
## Not run: folder_to_df("ESS10") ## End(Not run)## Not run: folder_to_df("ESS10") ## End(Not run)
Create a heatmap
heatmap_group(df_stats, threshold = 5, color = "red2")heatmap_group(df_stats, threshold = 5, color = "red2")
df_stats |
data frame from prepa_stats function |
threshold |
threshold to show difference |
color |
color of the cells |
heatmap (ggplot)
library(laeken) data(eusilc) df_stats <- prepa_stats(eusilc, "db040") heatmap_group(df_stats, 5)library(laeken) data(eusilc) df_stats <- prepa_stats(eusilc, "db040") heatmap_group(df_stats, 5)
Check if value is integer64
is.integer64(x)is.integer64(x)
x |
Value |
Boolean
is.integer64(c(1:100)) # FALSEis.integer64(c(1:100)) # FALSE
List distribution of discrete variables
list_dist(df, vars_vd)list_dist(df, vars_vd)
df |
data.frame |
vars_vd |
vector of discrete variables |
list
list_dist(mtcars,c("cyl","vs","gear"))list_dist(mtcars,c("cyl","vs","gear"))
load a config file for prepare data
load_config(file_path)load_config(file_path)
file_path |
path of the configuration file |
df
Loop of stats creation by zone
loop_stats(df, configs, var_calculs)loop_stats(df, configs, var_calculs)
df |
database |
configs |
configs |
var_calculs |
variable to create stats |
df
Specific chisq test to NA and Other modality
my_chisq_test(x, varname, ldist)my_chisq_test(x, varname, ldist)
x |
value to procede chisq test |
varname |
name of the variable |
ldist |
named list of expected probability |
chisq value
ldist <- list_dist(mtcars,c("cyl","gear")) sub_mtcars <- subset(mtcars,vs == 1) my_chisq_test(sub_mtcars$cyl,"cyl",ldist)ldist <- list_dist(mtcars,c("cyl","gear")) sub_mtcars <- subset(mtcars,vs == 1) my_chisq_test(sub_mtcars$cyl,"cyl",ldist)
Create a summarise of all the difference
prepa_stats(df, var_group, vars_vd = NULL, vars_vc = NULL)prepa_stats(df, var_group, vars_vd = NULL, vars_vc = NULL)
df |
data frame for the summary |
var_group |
Name of group variable |
vars_vd |
(optional) Vector of discrete variables |
vars_vc |
(optional) Vector of continuous variables |
data frame
library(laeken) data(eusilc) info_vars <- classify_df(eusilc) vars_vd <- info_vars[info_vars$type == "Modal", ]$variable vars_vc <- info_vars[info_vars$type == "Continuous", ]$variable prepa_stats(eusilc, "db040", vars_vd, vars_vc)library(laeken) data(eusilc) info_vars <- classify_df(eusilc) vars_vd <- info_vars[info_vars$type == "Modal", ]$variable vars_vc <- info_vars[info_vars$type == "Continuous", ]$variable prepa_stats(eusilc, "db040", vars_vd, vars_vc)
Preparation of a survey
prepa_survey(folder_path, file_pattern = "*.csv", file_config = "config.txt")prepa_survey(folder_path, file_pattern = "*.csv", file_config = "config.txt")
folder_path |
folder of survey |
file_pattern |
pattern of the databases (*.csv by default) |
file_config |
name of the configuration file (config.txt by default) |
NULL (creation of rds)
## Not run: prepa_survey("shiny-examples/complete/ESS10") ## End(Not run)## Not run: prepa_survey("shiny-examples/complete/ESS10") ## End(Not run)
Preparation of all surveys from a folder
prepa_surveys( folder_path, depth_folder = 1, file_pattern = "*.csv", file_config = "config.txt" )prepa_surveys( folder_path, depth_folder = 1, file_pattern = "*.csv", file_config = "config.txt" )
folder_path |
folder of the folders of survey |
depth_folder |
level of depth for the tree structure |
file_pattern |
pattern of the databases (*.csv by default) |
file_config |
name of the configuration file (config.txt by default) |
NULL (creation of rds)
## Not run: prepa_surveys("inst/extdata/SILC/HFILE") ## End(Not run)## Not run: prepa_surveys("inst/extdata/SILC/HFILE") ## End(Not run)
Shiny Example of vizsurvey
runExample()runExample()
shinyapp
# runExample()# runExample()
Shiny vizsurvey
runVizsurvey()runVizsurvey()
shinyapp
## Not run: runVizsurvey()## Not run: runVizsurvey()
Shiny vizsurvey from a csv/tsv
runVizsurvey_from_file( path, vars_discretes = NULL, vars_continous = NULL, var_wave = NULL, var_zone = NULL, var_group = NULL )runVizsurvey_from_file( path, vars_discretes = NULL, vars_continous = NULL, var_wave = NULL, var_zone = NULL, var_group = NULL )
path |
path of a data.frame (can be readed by fread) |
vars_discretes |
(optional) preset of discretes variables |
vars_continous |
(optional) preset of continous variables |
var_wave |
(optional) name of wave variable |
var_zone |
(optional) name of zone variable |
var_group |
(optional) name of group variable |
shinyapp
path <- "inst/extdata/SILC/HFILE/BE_2012h_EUSILC.csv" ## Not run: runVizsurvey_from_file(path,var_group = "NR_ITW",var_zone = "db040")path <- "inst/extdata/SILC/HFILE/BE_2012h_EUSILC.csv" ## Not run: runVizsurvey_from_file(path,var_group = "NR_ITW",var_zone = "db040")
Shiny vizsurvey with already prepared data
runVizsurvey_from_folder(link, data_rds_pattern = "global", depth_folder = 1)runVizsurvey_from_folder(link, data_rds_pattern = "global", depth_folder = 1)
link |
link to directory of data |
data_rds_pattern |
name of the rds file contains all the data |
depth_folder |
level of depth for the tree structure |
shinyapp
# We assume that config.txt, and prepa_surveys are already done here. ## Not run: runVizsurvey_from_folder("inst/extdata",depth_folder = 3)# We assume that config.txt, and prepa_surveys are already done here. ## Not run: runVizsurvey_from_folder("inst/extdata",depth_folder = 3)
Shiny vizsurvey from a R data.frame
runVizsurvey_from_r( df, vars_discretes = NULL, vars_continous = NULL, var_wave = NULL, var_zone = NULL, var_group = NULL )runVizsurvey_from_r( df, vars_discretes = NULL, vars_continous = NULL, var_wave = NULL, var_zone = NULL, var_group = NULL )
df |
data.frame |
vars_discretes |
(optional) preset of discretes variables |
vars_continous |
(optional) preset of continous variables |
var_wave |
(optional) name of wave variable |
var_zone |
(optional) name of zone variable |
var_group |
(optional) name of group variable |
shinyapp
library(laeken) data(eusilc) set.seed(123) eusilc$NR_ITW <- paste(eusilc$db040,sample(1:5,nrow(eusilc),replace = TRUE),sep="-") ## Not run: runVizsurvey_from_r(eusilc,var_group = "NR_ITW",var_zone = "db040")library(laeken) data(eusilc) set.seed(123) eusilc$NR_ITW <- paste(eusilc$db040,sample(1:5,nrow(eusilc),replace = TRUE),sep="-") ## Not run: runVizsurvey_from_r(eusilc,var_group = "NR_ITW",var_zone = "db040")
Robust Scale of a varible with IQR
scale_IQR(x)scale_IQR(x)
x |
vector |
vector
head(scale_IQR(iris$Sepal.Length))head(scale_IQR(iris$Sepal.Length))
calculate isoforest score from df
score_isoforest(df)score_isoforest(df)
df |
database |
vector
score_isoforest(iris[sapply(iris, is.numeric)])score_isoforest(iris[sapply(iris, is.numeric)])