Skip to content

Commit 854a71f

Browse files
authored
Merge pull request #40 from InseeFrLab/dev_auto_indic_links
Dev auto indic links
2 parents e121f03 + ab756ac commit 854a71f

39 files changed

Lines changed: 1015 additions & 224 deletions

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ Package: rtauargus
22
Type: Package
33
Title: Using Tau-Argus from R
44
Language: fr
5-
Version: 1.3.1
5+
Version: 1.3.2
66
Depends: R (>= 3.5.0)
77
Imports:
88
purrr (>= 0.2),
@@ -67,7 +67,7 @@ Description: Protects tables by calling the Tau-Argus software from R.
6767
License: MIT + file LICENSE
6868
Encoding: UTF-8
6969
LazyData: true
70-
RoxygenNote: 7.3.2
70+
RoxygenNote: 7.3.3
7171
VignetteBuilder: knitr
7272
URL: https://inseefrlab.github.io/rtauargus,
7373
https://github.com/inseefrlab/rtauargus,

LICENSE

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,2 @@
1-
Copyright 2021 Institut National de la Statistique et des Études Économiques
2-
3-
Permission is hereby granted, free of charge, to any person obtaining a copy
4-
of this software and associated documentation files (the "Software"), to deal
5-
in the Software without restriction, including without limitation the rights
6-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7-
copies of the Software, and to permit persons to whom the Software is
8-
furnished to do so, subject to the following conditions:
9-
10-
The above copyright notice and this permission notice shall be included in all
11-
copies or substantial portions of the Software.
12-
13-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19-
SOFTWARE.
1+
YEAR: 2021
2+
COPYRIGHT HOLDER: Institut National de la Statistique et des Études Économiques

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ export(from_5_to_3)
1212
export(grp_tab_in_cluster)
1313
export(grp_tab_names)
1414
export(identify_hrc)
15+
export(identify_hrc_with_eq)
1516
export(import)
1617
export(length_tabs)
1718
export(micro_arb)
@@ -59,6 +60,7 @@ importFrom(dplyr,rowwise)
5960
importFrom(dplyr,select)
6061
importFrom(dplyr,summarise)
6162
importFrom(dplyr,ungroup)
63+
importFrom(dplyr,where)
6264
importFrom(igraph,graph_from_data_frame)
6365
importFrom(igraph,which_mutual)
6466
importFrom(lifecycle,badge)

R/analyse_metadata.R

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#' creating a structured output for further processing.
77
#'
88
#' @param df_metadata A dataframe containing metadata in wide format.
9+
#' @param df_eq_indicator A datadframe containing the indicators equations if needed.
910
#' @param verbose Logical. If `TRUE`, returns a detailed list of intermediate results
1011
#' from each processing step. If `FALSE`, returns only the cluster assignments. Defaults to `FALSE`.
1112
#'
@@ -28,7 +29,7 @@
2829
#' @details The function performs the following steps:
2930
#' \itemize{
3031
#' \item Converts the metadata from wide format to long format using \code{wide_to_long}.
31-
#' \item Identifies hierarchical relationships and renames variables with \code{identify_hrc}.
32+
#' \item Identifies hierarchical relationships and renames variables with \code{identify_hrc} or \code{identify_hrc_with_eq}.
3233
#' \item Splits hierarchical relationships into clusters using \code{split_in_clusters}.
3334
#' \item Creates edges to describe the relationships via \code{create_edges}.
3435
#' \item Generates translation tables for regrouping with \code{grp_tab_names}.
@@ -53,7 +54,7 @@
5354
#' `r lifecycle::badge("experimental")`
5455
#'
5556
#' @export
56-
analyse_metadata <- function(df_metadata,verbose = FALSE){
57+
analyse_metadata <- function(df_metadata,df_eq_indicator = NULL,verbose = FALSE){
5758
# check that the input is in the right format: right column names
5859
check_column_names <- function(df) {
5960
# Expected fixed column names
@@ -102,7 +103,14 @@ analyse_metadata <- function(df_metadata,verbose = FALSE){
102103

103104
# start of the actual analysis
104105
df_metadata_long <- wide_to_long(df_metadata)
105-
list_hrc_identified <- identify_hrc(df_metadata_long)
106+
if(is.null(df_eq_indicator)){
107+
list_hrc_identified <- identify_hrc(df_metadata_long)
108+
}else{
109+
warning("For the variables part of equations specified in df_eq_indicator,
110+
the hrc_indicator column will be ignored.")
111+
list_hrc_identified <- identify_hrc_with_eq(df_metadata_long,df_eq_indicator)
112+
}
113+
106114
list_split <- split_in_clusters(list_hrc_identified)
107115
list_desc_links <- create_edges(list_split)
108116
list_translation_tables <- grp_tab_names(list_desc_links)

R/create_edges.R

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,26 +57,43 @@ create_edges <- function(list_split){
5757
small_tibble$spanning}))
5858
})
5959
list_desc_links <- nested_crois %>% map(function(big_tibble) {
60-
# Condition for clusters that only have one table
60+
# Only compare the spanning variables for clusters that have more than 1 table
6161
if(length(big_tibble$table_name) > 1) {
6262
spannings_nom_tab <- combn(big_tibble$table_name, 2, FUN = list)
63-
6463
tab_to_keep <- map(spannings_nom_tab, function(crois) {
64+
# Extract the first table (Ta) corresponding to the first table of the
65+
# pair of tables we are studying
6566
Ta <- big_tibble %>% filter(table_name == crois[[1]])
67+
# Get the 'spanning' column for Ta
6668
crois_Ta <- Ta$spanning[[1]]
69+
70+
# Extract the first table (Tb) corresponding to the second table of the
71+
# pair of tables we are studying
6772
Tb <- big_tibble %>% filter(table_name == crois[[2]])
73+
# Get the 'spanning' column for Tb
6874
crois_Tb <- Tb$spanning[[1]]
75+
76+
# Create an empty data frame to store origin → destination relationships
6977
df_origin_dest <- data.frame(from = character(), to = character(), stringsAsFactors = FALSE)
78+
79+
# Check if all elements of Ta are included in Tb
7080
if(all(crois_Ta %in% crois_Tb)) {
81+
# Add a row indicating that Ta can go to Tb
7182
df_origin_dest <- rbind(df_origin_dest,
72-
data.frame(from = crois[[1]], to = crois[[2]],stringsAsFactors = FALSE))
83+
data.frame(from = crois[[1]], to = crois[[2]], stringsAsFactors = FALSE))
7384
}
85+
86+
# Check if all elements of Tb are included in Ta
7487
if(all(crois_Tb %in% crois_Ta)) {
88+
# Add a row indicating that Tb can go to Ta
7589
df_origin_dest <- rbind(df_origin_dest,
7690
data.frame(from = crois[[2]], to = crois[[1]], stringsAsFactors = FALSE))
7791
}
92+
93+
# Return the data frame containing possible links
7894
return(df_origin_dest)
7995
})
96+
8097
# Filter non empty tables and combine them
8198
tab_to_keep_compact <- tab_to_keep %>%
8299
purrr::discard(is.null) %>%

R/format_template.R

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,23 +78,35 @@ filter_on_marginal_of_spanning_var <- function(data, criteria, subset_keys) {
7878
)
7979
# Create filter expressions for all other keys with !=
8080
other_keys <- setdiff(names(criteria), subset_keys)
81-
filter_expr_not_in <- purrr::map2(
82-
other_keys,
83-
criteria[other_keys],
84-
~ rlang::expr(!!rlang::sym(.x) != !!.y)
85-
)
81+
if(all(other_keys == "")){
82+
# treat the eventuality of all spanning variables being crossed on their
83+
# non total values
84+
not_totals_all_spannings <- purrr::map2(
85+
subset_keys,
86+
criteria[subset_keys],
87+
~ rlang::expr(!!rlang::sym(.x) != !!.y)
88+
)
89+
return(data %>% filter(!!!not_totals_all_spannings))
90+
} else {
91+
filter_expr_not_in <- purrr::map2(
92+
other_keys,
93+
criteria[other_keys],
94+
~ rlang::expr(!!rlang::sym(.x) != !!.y)
95+
)
96+
}
97+
8698
# Combine the two sets of expressions
8799
combined_filter_expr <- c(filter_expr_in, filter_expr_not_in)
88100
# Apply the combined filter
89-
data %>% filter(!!!combined_filter_expr) %>% select(-!!subset_keys)
101+
return(data %>% filter(!!!combined_filter_expr) %>% select(-!!subset_keys))
90102
}
91103

92104
#' Determines the tables described in a template gathering all the published cells
93105
#'
94106
#' @param data template gathering all the published cells
95107
#' @param indicator_column name of the column in which the indicators are
96108
#' @param spanning_var_tot a named list of the spanning variables and their totals
97-
#' @param field_columns vecotr of all the columns that are fields (ex: year of collect)
109+
#' @param field_columns vector of all the columns that are fields (ex: year of collect)
98110
#'
99111
#' @return named list of a dataframe describing the tables (metadata) and a list of
100112
#' the modalities of each hierarchical variable (modalities)

R/globals.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ utils::globalVariables(
33
c("table_name", "data","cluster","field","hrc_field","indicator","hrc_indicator",
44
"n_unique","column","unique_modalities","from.eg","to.eg","from","to","mutual_full",
55
"Group","table_eg","spanning","hrc_spanning","spanning_old","tab_inclus",
6-
"starts_with","spanning_name","hrc_spanning_name",
6+
"starts_with","spanning_name","hrc_spanning_name","eq_indicator","rhs","total","term_number",
7+
"eq_name","unit","var","n_total","total_alt","group",
78
".")
89
)

R/grp_tab_in_cluster.R

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
grp_tab_in_cluster <- function(list_split, list_translation_tables) {
5454
# Nest each cluster of tables by `table_name`
5555
nested_crois <- list_split %>%
56-
purrr::map(function(tab) { tab %>% dplyr::group_by(table_name) %>% tidyr::nest() })
56+
purrr::map(function(tab) {tab %>% dplyr::group_by(table_name) %>% tidyr::nest() })
5757

5858
# Process each cluster using inclusion relationships
5959
purrr::map2(list_translation_tables, nested_crois, function(tab_to_keep, big_tibble) {
@@ -66,7 +66,7 @@ grp_tab_in_cluster <- function(list_split, list_translation_tables) {
6666
# Process the cluster
6767
big_tibble_eg <- big_tibble %>%
6868
mutate(
69-
spanning = map(data, function(small_tibble) { small_tibble$spanning })
69+
spanning = map(data, function(small_tibble) {small_tibble$spanning})
7070
) %>%
7171
dplyr::left_join(tab_to_keep[[2]], by = c("table_name" = "Original")) %>%
7272
mutate(table_eg = ifelse(is.na(Group), table_name, Group)) %>%
@@ -84,6 +84,13 @@ grp_tab_in_cluster <- function(list_split, list_translation_tables) {
8484
)
8585
})
8686
) %>% dplyr::rename(table_name = table_eg)
87+
# adding the tables that are not included in each other in the cluster
88+
if(length(big_tibble$table_name) != length(tab_to_keep$passage_nom_tab$Original)){
89+
tables_no_inclusion <- big_tibble %>%
90+
filter(table_name %in% setdiff(big_tibble$table_name,tab_to_keep$passage_nom_tab$Original)) %>%
91+
mutate(spanning = map(data, function(small_tibble) {small_tibble$spanning}))
92+
big_tibble_eg <- bind_rows(tables_no_inclusion,big_tibble_eg)
93+
}
8794

8895
return(big_tibble_eg)
8996
} else {

R/identify_hrc.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ identify_hrc <- function(df_metadata_long){
4848
table_name = df_spannings$table_name
4949
) %>% unique()
5050
df_spannings <- df_spannings %>% select(-spanning_old)
51-
if(all(is.na(df_spannings$hrc_indicator))){ # condition pour les hiérarchies sur les indicateurs
51+
if(all(is.na(df_spannings$hrc_indicator))){ # condition for hierarchies on indicators
5252
df_indicators <- df_spannings
5353
return(list(df_indicators,df_variable_info))
5454
} else {

0 commit comments

Comments
 (0)