Skip to content

Commit 2d3c77a

Browse files
authored
Merge pull request #20 from InseeFrLab/dev2023
Dev2023
2 parents f3810af + c9a89a5 commit 2d3c77a

107 files changed

Lines changed: 8280 additions & 1126 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.Rbuildignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,9 @@
1313
^_pkgdown\.yml$
1414
^docs$
1515
^pkgdown$
16+
^\.github$
17+
^README\.Rmd$
18+
^tauargus_files/*$
19+
tauargus_files/*
20+
^_pkgdown_old\.yml$
21+
^README\.html$

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,5 @@ vignettes/tauargus_exe.ini
1414
.hst
1515

1616
docs
17+
18+
output/

DESCRIPTION

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,18 @@ Package: rtauargus
22
Type: Package
33
Title: Using Tau-Argus from R
44
Language: fr
5-
Version: 1.1.2
5+
Version: 1.2.0
66
Depends: R (>= 3.5.0)
77
Imports:
88
purrr (>= 0.2),
99
dplyr (>= 0.7),
10+
data.table,
1011
gdata,
1112
stringr,
1213
rlang,
13-
zoo
14+
zoo,
15+
sdcHierarchies,
16+
lifecycle
1417
Suggests:
1518
testthat,
1619
knitr,
@@ -39,6 +42,14 @@ Authors@R: c(
3942
"Félix", "Beroud",
4043
role = c("aut")
4144
),
45+
person(
46+
"André-Raymond", "Socard",
47+
role = c("aut")
48+
),
49+
person(
50+
"Wistan", "Pomel",
51+
role = c("aut")
52+
),
4253
person(
4354
family = "Institut National de la Statistique et des Études Économiques",
4455
role = "cph"
@@ -48,9 +59,12 @@ Description: Protects tables by calling the Tau-Argus software from R.
4859
License: MIT + file LICENSE
4960
Encoding: UTF-8
5061
LazyData: true
51-
RoxygenNote: 7.1.2
62+
RoxygenNote: 7.2.3
5263
VignetteBuilder: knitr
5364
URL: https://inseefrlab.github.io/rtauargus,
5465
https://github.com/inseefrlab/rtauargus,
5566
https://inseefrlab.github.io/rtauargus/
5667
BugReports: https://github.com/inseefrlab/rtauargus/issues
68+
Roxygen: list(markdown = TRUE)
69+
StagedInstall: no
70+

NAMESPACE

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,48 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
export(from_4_to_3)
4+
export(from_4_to_3_case_0_hr)
5+
export(from_4_to_3_case_1_hr)
6+
export(from_4_to_3_case_2_hr)
7+
export(from_5_to_3)
38
export(import)
9+
export(length_tabs)
410
export(micro_arb)
511
export(micro_asc_rda)
12+
export(micro_rtauargus)
13+
export(nb_tab_generated)
14+
export(reduce_dims)
615
export(reset_rtauargus_options)
7-
export(rtauargus)
16+
export(restore_format)
817
export(rtauargus_options)
918
export(rtauargus_plus)
1019
export(run_arb)
20+
export(sp_format)
1121
export(tab_arb)
1222
export(tab_multi_manager)
1323
export(tab_rda)
1424
export(tab_rtauargus)
1525
export(tab_rtauargus2)
26+
export(tab_rtauargus4)
27+
export(tabulate_micro_data)
28+
export(var_to_merge)
1629
export(write_hrc)
1730
export(write_hrc2)
31+
import(data.table, except = transpose)
1832
importFrom(dplyr,"%>%")
1933
importFrom(dplyr,arrange)
34+
importFrom(dplyr,filter)
2035
importFrom(dplyr,mutate)
36+
importFrom(dplyr,select)
37+
importFrom(lifecycle,badge)
38+
importFrom(lifecycle,deprecated)
2139
importFrom(purrr,map)
2240
importFrom(purrr,map_at)
2341
importFrom(purrr,transpose)
2442
importFrom(rlang,.data)
43+
importFrom(sdcHierarchies,hier_convert)
44+
importFrom(sdcHierarchies,hier_import)
45+
importFrom(stats,setNames)
46+
importFrom(stringr,str_detect)
47+
importFrom(utils,combn)
2548
importFrom(zoo,na.locf)

NEWS.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,29 @@ subtitle: History of changes / Historique des modifications
44
output: rmarkdown::html_vignette
55
---
66

7+
8+
## rtauargus 1.2.0
9+
10+
[01/2024]
11+
12+
* Implementation of a method to tackle some tables of 4/5 dimensions.
13+
14+
> The method is quickly explained and its use is shown in a specific vignette (french).
15+
> A paper explaining more deeply the idea and the modus operandi is available
16+
here: "https://github.com/InseeFrLab/dims_reduction_tables_workshop_20231215".
17+
18+
* Implementation of the function `tabulate_micro_data()` to compute tabular data from
19+
a microdata file.
20+
21+
> The function can create frequency and magnitude tabular data with hierarchical variables.
22+
The tabular data computed contains the information to compute primary secret
23+
according to frequency rule and (1,k)-dominance rule.
24+
25+
* Resolution of a malfunction while dealing with costs.
26+
27+
* **rtauargus()** function has been renamed more properly as **micro_rtauargus()**.
28+
Its arguments and its behaviour remain the same.
29+
730
## rtauargus 1.1.2
831

932
[01/02/2023]

R/data.R

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#' \item{ACTIVITY}{business sector, hierarchical variables with three levels described
99
#' in the activity_corr_table dataset. The root is noted "Total"}
1010
#' \item{SIZE}{size of the companies (Number of employees in three categories
11-
#' + overall category "Total")}
11+
#' and overall category "Total")}
1212
#' \item{N_OBS}{Frequency, number of companies}
1313
#' \item{TOT}{turnover value in euros}
1414
#' \item{MAX}{turnover of the company which contributes the most to the cell.}
@@ -44,7 +44,7 @@
4444
#' Hierarchical variables with two levels (nuts2 and nuts3) described
4545
#' in the nuts23_fr_corr_table dataset. The root is noted "Total"}
4646
#' \item{SIZE}{size of the companies (Number of employees in three categories
47-
#' + overall category "Total")}
47+
#' and overall category "Total")}
4848
#' \item{N_OBS}{Frequency, number of companies}
4949
#' \item{TOT}{turnover value in euros}
5050
#' \item{MAX}{turnover of the company which contributes the most to the cell.}
@@ -81,7 +81,7 @@
8181
#' \item{A21}{business sectors in 21 categories}
8282
#' \item{A88}{business sectors in 88 categories}
8383
#' }
84-
#' @details Use the \code{write_hrc2} function to create a .hrc file from this
84+
#' @details Use the `write_hrc2` function to create a .hrc file from this
8585
#' correspondence table.
8686
"activity_corr_table"
8787

@@ -95,7 +95,7 @@
9595
#' \item{NUTS2}{NUTS2 levels in France - equivalent of French "Régions"}
9696
#' \item{NUTS3}{NUTS3 levels in France - equivalent of French "Départements"}
9797
#' }
98-
#' @details Use the \code{write_hrc2} function to create a .hrc file from this
98+
#' @details Use the `write_hrc2` function to create a .hrc file from this
9999
#' correspondence table.
100100
"nuts23_fr_corr_table"
101101

@@ -116,7 +116,7 @@
116116
#' areas and their corresponding NUTS3 areas are in the data.
117117
#' The root is noted "Total_EAST"}
118118
#' \item{SIZE}{size of the companies (Number of employees in three categories
119-
#' + overall category "Total")}
119+
#' and overall category "Total")}
120120
#' \item{N_OBS}{Frequency, number of companies}
121121
#' \item{TOT}{turnover value in euros}
122122
#' \item{MAX}{turnover of the company which contributes the most to the cell.}
@@ -125,3 +125,64 @@
125125
#' activity_corr_table
126126
#' nuts23_fr_corr_table
127127
"turnover_act_nuts_size"
128+
129+
130+
#' data crossing 4 categorical variables, none are hierarchical.
131+
#'
132+
#' @format A tibble/data frame with 689 rows and 12 variables:
133+
#' \describe{
134+
#' \item{A10}{business sector, not hierarchical}
135+
#' \item{cj}{legal category, not hierarchical}
136+
#' \item{type_distrib}{type of distribution, not hierarchical}
137+
#' \item{treff}{Number of employees (categorical), not hierarchical}
138+
#' \item{nb_obs}{Frequency, number of companies}
139+
#' \item{nb_obs_rnd}{Frequency rounded, number of companies}
140+
#' \item{pizzas_tot}{turnover value in euros}
141+
#' \item{pizzas_tot_abs}{turnover absolute value in euros}
142+
#' \item{pizzas_max}{turnover max value in euros}
143+
#' \item{is_secret_freq}{Boolean, TRUE if primary secret for frequency rule}
144+
#' \item{is_secret_dom}{Boolean, TRUE if primary secret for dominance rule}
145+
#' \item{is_secret_prim}{Boolean, TRUE if primary secret for any rule}
146+
#'
147+
#' }
148+
"datatest1"
149+
150+
#' data crossing 5 categorical variables, none are hierarchical.
151+
#'
152+
#' @format A tibble/data frame with 5 612 rows and 15 variables:
153+
#' \describe{
154+
#' \item{A10}{business sector, not hierarchical}
155+
#' \item{cj}{legal category, not hierarchical}
156+
#' \item{type_distrib}{type of distribution, not hierarchical}
157+
#' \item{treff}{Number of employees (categorical), not hierarchical}
158+
#' \item{nuts1}{NUTS region, no hierarchical}
159+
#' \item{nb_obs}{Frequency, number of companies}
160+
#' \item{nb_obs_rnd}{Frequency rounded, number of companies}
161+
#' \item{pizzas_tot}{turnover value in euros}
162+
#' \item{pizzas_tot_abs}{turnover absolute value in euros}
163+
#' \item{pizzas_max}{turnover max value in euros}
164+
#' \item{is_secret_freq}{Boolean, TRUE if primary secret for frequency rule}
165+
#' \item{is_secret_dom}{Boolean, TRUE if primary secret for dominance rule}
166+
#' \item{is_secret_prim}{Boolean, TRUE if primary secret for any rule}
167+
#'
168+
#' }
169+
"datatest2"
170+
171+
#' Companies data at individual level.
172+
#'
173+
#' @format A data.table with 9 786 rows and 12 variables:
174+
#' \describe{
175+
#' \item{A10}{business sector, not hierarchical}
176+
#' \item{A21}{business sector, not hierarchical but nested in A10}
177+
#' \item{A88}{business sector, not hierarchical but nested in A21}
178+
#' \item{CJ}{legal category, not hierarchical}
179+
#' \item{TYPE}{type of distribution, not hierarchical}
180+
#' \item{SIZE}{Number of employees (categorical), not hierarchical}
181+
#' \item{NUTS1}{NUTS 1 level of European administrative regions, not hierarchical}
182+
#' \item{NUTS2}{NUTS 2 level of European administrative regions, not hierarchical}
183+
#' \item{NUTS3}{NUTS 3 level of European administrative regions, not hierarchical}
184+
#' \item{WEIGHT}{Weight of the companies, numeric}
185+
#' \item{TURNOVER}{Turnover, numeric}
186+
#' \item{PRODUCTION}{Production, numeric}
187+
#' }
188+
"indiv_dt"

R/hrc.R

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
#' microdonnées.
99
#'
1010
#' The function reconstructs the variable hierarchy from the levels
11-
#' present in the data. The variables in \code{vars_hrc} must be
12-
#' \strong{classified from the finest to the most aggregated}.
11+
#' present in the data. The variables in `vars_hrc` must be
12+
#' **classified from the finest to the most aggregated**.
1313
#'
1414
#' The relationship between each hierarchical level must be an application (in the
1515
#' mathematical sense of the term), i.e. each fine level must have a
@@ -22,17 +22,17 @@
2222
#'
2323
#' Missing values in the hierarchical variables will be
2424
#' imputed beforehand using another hierarchical variable (parameter
25-
#' \code{fill_na}). In ascending strategy (\code{"up"}), the variables are
25+
#' `fill_na`). In ascending strategy (`"up"`), the variables are
2626
#' from the most aggregated to the most refined, and vice versa in the
27-
#' downward strategy (\code{"down"}).
27+
#' downward strategy (`"down"`).
2828
#'
29-
#' The parameter \code{compact} allows to create hierarchies with variable
29+
#' The parameter `compact` allows to create hierarchies with variable
3030
#' depths. The idea is to cut the branches consisting of a single value
3131
#' repeated up to the maximum depth (see examples).\cr
3232
#'
3333
#' La fonction reconstitue la hiérarchie des variables à partir des niveaux
34-
#' présents dans les données. Les variables dans \code{vars_hrc} doivent être
35-
#' \strong{classées de la plus fine à la plus agrégée}.
34+
#' présents dans les données. Les variables dans `vars_hrc` doivent être
35+
#' **classées de la plus fine à la plus agrégée**.
3636
#'
3737
#' La relation entre chaque niveau hiérarchique doit être une application (au
3838
#' sens mathématique du terme), c'est-à-dire que chaque niveau fin doit avoir un
@@ -45,47 +45,47 @@
4545
#'
4646
#' Les valeurs manquantes présentes dans les variables hiérarchiques seront
4747
#' préalablement imputées à l'aide d'une autre variable hiérarchique (paramètre
48-
#' \code{fill_na}). En stratégie ascendante (\code{"up"}), les variables sont
48+
#' `fill_na`). En stratégie ascendante (`"up"`), les variables sont
4949
#' parcourues de la plus agrégée à la plus fine, et inversement en stratégie
50-
#' descendante (\code{"down"}).
50+
#' descendante (`"down"`).
5151
#'
52-
#' Le paramètre \code{compact} permet de créer des hiérarchies à profondeurs
52+
#' Le paramètre `compact` permet de créer des hiérarchies à profondeurs
5353
#' variables. L'idée est de couper les branches constituées d'une seule valeur
5454
#' répétée jusqu'à la profondeur maximale (voir exemples).
5555
#'
5656
#' @inheritParams micro_asc_rda
57-
#' @param vars_hrc \strong{[mandatory]} vector of variable names
57+
#' @param vars_hrc vector of variable names
5858
#' constituting the hierarchy, from the finest to the most aggregated level.\cr
59-
#' (\strong{[obligatoire]} vecteur des noms des variables
59+
#' (vecteur des noms des variables
6060
#' constituant la hiérarchie, du niveau le plus fin au niveau le plus agrégé.)
6161
#' @param hrc_filename name and location of the produced hrc file. If not
6262
#' filled, a temporary file.\cr
6363
#' (nom et emplacement du fichier hrc produit. Si non renseigné, un fichier temporaire.)
6464
#' @param fill_na fill in any missing values, using an other variable :
6565
#' \itemize{
66-
#' \item{\code{"up"} (default) : hierarchical variable of the level level
66+
#' \item{`"up"` (default) : hierarchical variable of the level level
6767
#' immediately above}
68-
#' \item{\code{"down"} : hierarchical variable of the level immediately
68+
#' \item{`"down"` : hierarchical variable of the level immediately
6969
#' lower}
7070
#' }\cr
7171
#' (remplissage d'éventuelles valeurs manquantes, à l'aide d'une
7272
#' autre variable :\itemize{
73-
#' \item{\code{"up"} (défaut) : variable hiérarchique de niveau
73+
#' \item{`"up"` (défaut) : variable hiérarchique de niveau
7474
#' immédiatement supérieur}
75-
#' \item{\code{"down"} : variable hiérarchique de niveau immédiatement
75+
#' \item{`"down"` : variable hiérarchique de niveau immédiatement
7676
#' inférieur}
7777
#' })
7878
#' @param compact to prune branches repeating a single value to the
79-
#' lowest level of depth (\code{TRUE} by default).\cr
79+
#' lowest level of depth (`TRUE` by default).\cr
8080
#' (pour élaguer les branches répétant une unique valeur jusqu'au
81-
#' plus bas niveau de profondeur (\code{TRUE} par défaut).)
82-
#' @param hierlevels if only one variable is specified in \code{vars_hrc},
81+
#' plus bas niveau de profondeur (`TRUE` par défaut).)
82+
#' @param hierlevels if only one variable is specified in `vars_hrc`,
8383
#' allows to generate the hierarchy according to the position of the characters in the
84-
#' string. For example, \code{hierlevels = "2 3"} to build a
84+
#' string. For example, `hierlevels = "2 3"` to build a
8585
#' hierarchy from a common code.\cr
86-
#' (si une seule variable est spécifiée dans \code{vars_hrc},
86+
#' (si une seule variable est spécifiée dans `vars_hrc`,
8787
#' permet de générer la hiérarchie selon la position des caractères dans la
88-
#' chaîne. Par exemple, \code{hierlevels = "2 3"} pour construire une
88+
#' chaîne. Par exemple, `hierlevels = "2 3"` pour construire une
8989
#' hiérarchie département-commune à partir d'un code commune.)
9090
#'
9191
#' @return The name of the hrc file (useful in the case of a temporary file with
@@ -446,7 +446,8 @@ df_hierlevels <- function(var_hrc, hierlevels) {
446446
}
447447

448448
lev <- strsplit(hierlevels, " +")[[1]]
449-
lev <- as.integer(lev) %>% `[`(. != 0)
449+
lev <- as.integer(lev)
450+
lev <- lev[lev != 0]
450451
if (sum(lev) != n1) {
451452
stop("la somme de hierlevels doit etre egale au nombre de caracteres")
452453
}

0 commit comments

Comments
 (0)