sdcTools
diff --git a/‎.Rbuildignore‎
Lines changed: 3 additions & 0 deletions b/‎.Rbuildignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 9 additions & 7 deletions b/‎DESCRIPTION‎
Lines changed: 9 additions & 7 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 2 additions & 2 deletions b/‎NAMESPACE‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎NEWS‎
Lines changed: 20 additions & 0 deletions b/‎NEWS‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎R/AI_applyAnonymization.R‎
Lines changed: 223 additions & 0 deletions b/‎R/AI_applyAnonymization.R‎
Lines changed: 223 additions & 0 deletions
@@ -15,3 +15,6 @@ src/recordSwap/compile.sh
 vignettes/precompile.R
 vignettes/recordSwapping.Rmd.orig
 ctime.pdf
+^\.claude$
+^CLAUDE\.md$
+^docs/plans$
@@ -33,3 +33,6 @@ inst/shiny/sdcApp/.vscode
 inst/shiny/sdcApp/www/.sass-cache
 ihsn-extra-sidebar.css.map
 docs
+.claude/
+CLAUDE.md
+docs/plans/
@@ -2,8 +2,8 @@ Package: sdcMicro
 Type: Package
 Title: Statistical Disclosure Control Methods for Anonymization of Data and
     Risk Estimation
-Version: 5.7.9
-Date: 2025-08-01
+Version: 5.8.1
+Date: 2026-03-05
 Authors@R: c(
   person("Matthias", "Templ", email="matthias.templ@gmail.com", role = c("aut", "cre"), comment=c(ORCID="0000-0002-8638-5276")),
   person("Bernhard", "Meindl", email = "Bernhard.Meindl@statistik.gv.at", role = c("aut")),
@@ -27,7 +27,9 @@ Depends:
 Suggests:
     laeken,
     parallel,
-    testthat
+    testthat,
+    pdftools,
+    yaml
 Imports:
     utils,
     stats,
@@ -74,10 +76,10 @@ Collate:
     'GUIfunctions.R'
     'indivRisk.R'
     'infoLoss.R'
-    'KI_createSdcObj.R'
-    'ki_access_utilities.R'
-    'KI_applyAnonymization.R'
-    'ki_anonymization_utilities.R'
+    'AI_createSdcObj.R'
+    'ai_access_utilities.R'
+    'AI_applyAnonymization.R'
+    'ai_anonymization_utilities.R'
     'LocalRecProg.R'
     'localSupp.R'
     'localSuppression.R'
 
@@ -19,10 +19,10 @@ S3method(summary,freqCalc)
 S3method(summary,micro)
 S3method(summary,pram)
 export("strataVar<-")
+export(AI_applyAnonymization)
+export(AI_createSdcObj)
 export(IL_correl)
 export(IL_variables)
-export(KI_applyAnonymization)
-export(KI_createSdcObj)
 export(LocalRecProg)
 export(addGhostVars)
 export(addNoise)
 
@@ -1,3 +1,23 @@
+# 5.8.1
+- New AI-assisted anonymization features:
+  + `AI_createSdcObj()`: LLM-assisted variable classification into SDC roles
+    (key variables, numerical variables, PRAM variables, weight, household ID)
+  + `AI_applyAnonymization()`: Agentic loop that proposes, evaluates, and refines
+    anonymization strategies using structured tool calling
+  + Provider-agnostic LLM access via `query_llm()` supporting OpenAI, Anthropic,
+    and any OpenAI-compatible endpoint (Ollama, Azure, vLLM)
+  + Privacy by design: only metadata (variable names, types, cardinality, factor
+    levels) is sent to the LLM — never the actual microdata
+  + Combined utility score for quantitative strategy comparison
+  + New vignette: "AI-Assisted Statistical Disclosure Control with sdcMicro"
+- AI integration in `sdcApp()`:
+  + New "AI-Assisted" tab for interactive strategy generation and comparison
+  + "AI suggest variables" button in the setup panel with auto-fill
+  + Applied AI methods tracked in the Reproducibility tab
+- Renamed function prefixes from `KI_` to `AI_` for consistency
+- Remove `shinyBS` dependency; replaced with pure R+JS implementations
+- Fix deprecated enum-to-float arithmetic warnings in C++ code (`RankSwapping.h`, `Mdav.h`) flagged by clang 21+ and gcc 14+
+
 # 5.7.10
 - fix inconsistency in `rankSwap()` Output [Issue #361]
 - remove shinyBS dependency in `sdcApp`
 
@@ -0,0 +1,223 @@
+#' AI_applyAnonymization: Automatically apply anonymization strategy using LLM
+#'
+#' Uses an agentic loop to explore multiple anonymization strategies.
+#' The LLM proposes strategies as structured tool calls, each is evaluated
+#' with a combined utility score, and the best is selected.
+#'
+#' @param sdcObj An object of class sdcMicroObj.
+#' @param k Desired k-anonymity level (default 3).
+#' @param verbose If \code{TRUE}, prints progress and scores for each strategy.
+#' @param model LLM model identifier. If \code{NULL}, a default is chosen per provider.
+#' @param api_key API key. If \code{NULL}, auto-detected from environment variables.
+#' @param provider LLM provider: \code{"openai"} (default), \code{"anthropic"}, or
+#'   \code{"custom"} for any OpenAI-compatible endpoint.
+#' @param base_url Base URL for the API endpoint. Required when \code{provider = "custom"}.
+#' @param confirm Logical; if \code{TRUE} (default) and session is interactive, shows the
+#'   best strategy and asks for confirmation before applying.
+#' @param max_iter Number of refinement iterations after the initial batch (default 2).
+#' @param n_strategies Number of strategies in the initial batch (default 3).
+#' @param weights Numeric vector of length 3: weights for suppression rate,
+#'   category loss, and IL1 in the utility score. Default \code{c(1/3, 1/3, 1/3)}.
+#' @param generateReport If \code{TRUE}, generates internal and external reports.
+#' @return Modified sdcMicroObj with the best anonymization strategy applied.
+#' @author Matthias Templ
+#' @export
+#' @examples
+#' \dontrun{
+#' if (interactive() && nzchar(Sys.getenv("OPENAI_API_KEY"))) {
+#'   library(sdcMicro)
+#'   data(testdata)
+#'   sdc <- AI_createSdcObj(dat = testdata, policy = "open", confirm = FALSE)
+#'   sdc <- AI_applyAnonymization(sdcObj = sdc, k = 3, verbose = TRUE, confirm = FALSE)
+#' }
+#' }
+AI_applyAnonymization <- function(sdcObj, k = 3, verbose = TRUE,
+                                  model = NULL, api_key = NULL,
+                                  provider = c("openai", "anthropic", "custom"),
+                                  base_url = NULL, confirm = TRUE,
+                                  max_iter = 2, n_strategies = 3,
+                                  weights = c(1/3, 1/3, 1/3),
+                                  generateReport = TRUE) {
+  provider <- match.arg(provider)
+  summary_info <- summarize_sdcObj_structure(sdcObj, k)
+  tool_schemas <- get_tool_schemas()
+
+  # Common LLM call args
+  llm_args <- list(provider = provider, model = model, api_key = api_key, base_url = base_url)
+
+  # ====== Phase 1: Batch ======
+  if (verbose) message("=== Batch phase: requesting ", n_strategies, " strategies ===")
+
+  prompt <- build_agentic_prompt(summary_info, k, n_strategies, tool_schemas)
+
+  # Try native tool calling for openai/anthropic, text fallback for custom
+  use_native_tools <- provider %in% c("openai", "anthropic")
+
+  if (use_native_tools) {
+    response <- query_llm(
+      prompt = prompt,
+      system_prompt = "You are an expert in statistical disclosure control.",
+      provider = provider, model = model, api_key = api_key,
+      base_url = base_url, tools = tool_schemas
+    )
+    # Native tool calling returns tool_calls directly -- but for batch we need
+    # multiple strategies. The prompt asks for JSON with strategies array even
+    # with native tools, so we may get text content with JSON.
+    # Parse the content as JSON strategies if tool_calls is empty
+    if (length(response$tool_calls) > 0) {
+      # Single strategy from tool calls
+      strategies <- list(list(
+        name = "native",
+        reasoning = if (!is.null(response$content)) response$content else "",
+        calls = response$tool_calls
+      ))
+    } else if (!is.null(response$content)) {
+      strategies <- tryCatch(
+        parse_strategies_json(response$content),
+        error = function(e) {
+          stop("Failed to parse batch strategies from LLM: ", e$message, call. = FALSE)
+        }
+      )
+    } else {
+      stop("LLM returned neither tool calls nor content.", call. = FALSE)
+    }
+  } else {
+    # Text/JSON fallback
+    content <- query_llm(
+      prompt = prompt,
+      system_prompt = "You are an expert in statistical disclosure control.",
+      provider = provider, model = model, api_key = api_key,
+      base_url = base_url
+    )
+    strategies <- parse_strategies_json(content)
+  }
+
+  # Evaluate each strategy
+  results <- list()
+  for (i in seq_along(strategies)) {
+    strat <- strategies[[i]]
+    if (verbose) {
+      name <- if (!is.null(strat$name)) strat$name else paste("Strategy", i)
+      message(sprintf("  Evaluating %s...", name))
+    }
+
+    sdcObj_copy <- sdcObj
+    sdcObj_copy <- tryCatch(
+      execute_tool_calls(sdcObj_copy, strat$calls),
+      error = function(e) {
+        if (verbose) message(sprintf("    Failed: %s", e$message))
+        NULL
+      }
+    )
+    if (is.null(sdcObj_copy)) next
+
+    sdcObj_copy <- localSuppression(sdcObj_copy, k = k)
+    score <- ai_utility_score(sdcObj, sdcObj_copy, weights)
+
+    results[[length(results) + 1]] <- list(
+      strategy = strat, sdcObj = sdcObj_copy, score = score
+    )
+
+    if (verbose) {
+      message(sprintf("    U=%.4f (S=%.4f, C=%.4f, IL1=%.4f)",
+                      score$total, score$suppression_rate,
+                      score$category_loss, score$il1))
+    }
+  }
+
+  if (length(results) == 0) {
+    stop("All strategies failed. Cannot proceed.", call. = FALSE)
+  }
+
+  # Find best
+  best_idx <- which.min(vapply(results, function(r) r$score$total, numeric(1)))
+  best <- results[[best_idx]]
+
+  # ====== Phase 2: Refinement ======
+  for (iter in seq_len(max_iter)) {
+    if (verbose) message(sprintf("=== Refinement iteration %d/%d ===", iter, max_iter))
+
+    ref_prompt <- build_refinement_prompt(results, summary_info, k)
+
+    # Refinement always uses text/JSON mode (no native tools) for reliable parsing
+    ref_content <- query_llm(
+      prompt = ref_prompt,
+      system_prompt = "You are an expert in statistical disclosure control. Always respond with valid JSON only, no markdown or prose.",
+      provider = provider, model = model, api_key = api_key,
+      base_url = base_url
+    )
+    # Handle query_llm returning a list (native mode) vs character (text mode)
+    if (is.list(ref_content)) ref_content <- ref_content$content
+    refined <- tryCatch(
+      parse_single_strategy_json(ref_content),
+      error = function(e) {
+        if (verbose) message("  Could not parse refinement response, skipping.")
+        NULL
+      }
+    )
+    if (is.null(refined)) next
+
+    sdcObj_copy <- sdcObj
+    sdcObj_copy <- tryCatch(
+      execute_tool_calls(sdcObj_copy, refined$calls),
+      error = function(e) {
+        if (verbose) message(sprintf("  Refinement failed: %s", e$message))
+        NULL
+      }
+    )
+    if (is.null(sdcObj_copy)) next
+
+    sdcObj_copy <- localSuppression(sdcObj_copy, k = k)
+    score <- ai_utility_score(sdcObj, sdcObj_copy, weights)
+
+    if (verbose) {
+      message(sprintf("  U=%.4f (S=%.4f, C=%.4f, IL1=%.4f)",
+                      score$total, score$suppression_rate,
+                      score$category_loss, score$il1))
+    }
+
+    result_entry <- list(strategy = refined, sdcObj = sdcObj_copy, score = score)
+    results[[length(results) + 1]] <- result_entry
+
+    if (score$total < best$score$total) {
+      best <- result_entry
+      if (verbose) message("  -> New best!")
+    }
+  }
+
+  # ====== Show result + confirm ======
+  if (verbose) {
+    message(sprintf("\n=== Best strategy: '%s' (U=%.4f) ===",
+                    if (!is.null(best$strategy$name)) best$strategy$name else "best",
+                    best$score$total))
+    if (!is.null(best$strategy$reasoning) && nzchar(best$strategy$reasoning)) {
+      cat(strwrap(best$strategy$reasoning, width = 75), sep = "\n")
+      cat("\n")
+    }
+    message(sprintf("  Suppression rate: %.4f", best$score$suppression_rate))
+    message(sprintf("  Category loss:    %.4f", best$score$category_loss))
+    message(sprintf("  IL1:              %.4f", best$score$il1))
+  }
+
+  if (confirm && interactive()) {
+    answer <- readline("Apply this strategy? [Y/n/q] ")
+    answer <- tolower(trimws(answer))
+    if (answer %in% c("q", "quit")) {
+      message("Aborted by user.")
+      return(sdcObj)
+    }
+    if (answer %in% c("n", "no")) {
+      message("Rejected. Returning original sdcObj unchanged.")
+      return(sdcObj)
+    }
+  }
+
+  sdcObj <- best$sdcObj
+
+  if (generateReport) {
+    report(sdcObj, filename = "anonymization_internal.html", internal = TRUE)
+    report(sdcObj, filename = "anonymization_external.html", internal = FALSE)
+  }
+
+  return(sdcObj)
+}