Skip to content

[Bug] Slicing can exceed what is necessary #330

@emstruong

Description

@emstruong

Hello,

When I select the following with the slicer: https://github.com/brandmaier/semtree/blob/7e95acf7461cf1e3d273784e5aa46eadea1b4759/R/growTree.R#L111

The slicer shows that this snippet is what causes the values of model.

function(model=NULL, mydata=NULL, control=NULL, invariance=NULL, meta=NULL, edgelabel=NULL, depth=0, constraints=NULL, ...) {
    node <- list()
    node$left_child <- NULL
    node$right_child <- NULL
    node$caption <- "TERMINAL"
    node$N <- dim(mydata)[1]
    class(node) <- "semtree"
    fulldata <- mydata
    fullmeta <- meta
    if(control$mtry > 0) {
        model.names <- names(mydata)[meta$model.ids]
        covariate.names <- names(mydata)[meta$covariate.ids]
        mydata <- sampleColumns(mydata, names(mydata)[meta$covariate.ids], control$mtry)
        meta$model.ids <- sapply(model.names, function(x) {
      which(x == names(mydata))
    })
        names(meta$model.ids) <- NULL
        meta$covariate.ids <- unlist(lapply(covariate.names, function(x) {
      which(x == names(mydata))
    }))
        node$colnames <- colnames(mydata)
        if(control$verbose) { ui_message("Subsampled predictors: ", paste(node$colnames[meta$covariate.ids])) }
    }
    arguments <- list(...)
    if("forced_splits" %in% names(arguments) && !is.null(arguments$forced_splits)) { 
            forced_splits <- arguments$forced_splits
            model.names <- names(mydata)[meta$model.ids]
            covariate.names <- names(mydata)[meta$covariate.ids]
            forcedsplit.name <- forced_splits[1]
            mydata <- fulldata[, c(model.names, forcedsplit.name) ]
            node$colnames <- colnames(mydata)
            meta$model.ids <- sapply(model.names, function(x) {
      which(x == names(mydata))
    })
            names(meta$model.ids) <- NULL
            meta$covariate.ids <- unlist(lapply(covariate.names, function(x) {
      which(x == names(mydata))
    }))
        } else 
    { forced_splits <- NULL }
    node$p.values.valid <- control$method != "cv"
    node$lr <- NA
    node$edge_label <- edgelabel
    if(depth == 0 & !control$refit & length(constraints) == 0) { node$model <- model } else
    {
            if(control$sem.prog == "OpenMx") {
                full.model <- mxAddNewModelData(
        model = model, data = mydata,
        name = "BASE MODEL"
      )
                node$model <- try(
        suppressMessages(OpenMx::mxTryHard(
          model = full.model, paste = FALSE,
          silent = TRUE, bestInitsOutput = FALSE
        )),
        silent = TRUE
      )
            }
            if(control$sem.prog == "lavaan") { node$model <- try(suppressWarnings(eval(
        parse(text = paste("lavaan::", model@Options$model.type,
          "(lavaan::parTable(model),data=mydata,missing='",
          model@Options$missing, "')",
          sep = ""
        ))
      )), silent = T) }
            if(control$sem.prog == "ctsem") {
                full.model <- suppressMessages(try(
        ctsemOMX::ctFit(
          dat = mydata[, -meta$covariate.ids],
          ctmodelobj = model$ctmodelobj,
          dataform = "wide",
          objective = model$ctfitargs$objective,
          stationary = model$ctfitargs$stationary,
          optimizer = model$ctfitargs$optimizer,
          retryattempts = ifelse(model$ctfitargs$retryattempts >= 20,
            yes = model$ctfitargs$retryattempts,
            no = 20
          ),
          carefulFit = model$ctfitargs$carefulFit,
          showInits = model$ctfitargs$showInits,
          asymptotes = model$ctfitargs$asymptotes,
          meanIntervals = model$ctfitargs$meanIntervals,
          discreteTime = model$ctfitargs$discreteTime,
          verbose = model$ctfitargs$verbose,
          transformedParams = model$ctfitargs$transformedParams
        )
      ))
                full.model$mxobj@name <- "BASE MODEL"
                node$model <- full.model
            }
        }
    if(is(node$model, "try-error")) {
        node$term.reason <- node$model[[1]]
        node$model <- NULL
        return(node)
    }
    if(is.null(node$model)) {
        node$term.reason <- "Model was NULL! Model could not be estimated."
        return(node)
    }
    if(control$sem.prog == "OpenMx") {
        S3summary <- getS3method("summary", "MxModel")
        node$params <- S3summary(node$model)$parameters[, 5]
        names(node$params) <- S3summary(node$model)$parameters[, 1]
        node$params_sd <- S3summary(node$model)$parameters[, 6]
        node$param_names <- S3summary(node$model)$parameters[, 1]
    }
    if(control$sem.prog == "lavaan") {
        node$params <- lavaan::coef(node$model)
        names(node$params) <- names(lavaan::coef(node$model))
        parameters <- lavaan::parameterEstimates(node$model)
        for(i in 1:nrow(parameters)) if(!is.null(parameters$label)) { if(parameters$label[i] == "") { parameters$label[i] <- paste(parameters$lhs[i], parameters$op[i], parameters$rhs[i], sep = "") } }
        if(is.null(parameters$label)) {
            label <- paste(parameters$lhs, parameters$op, parameters$rhs, sep = "")
            parameters <- cbind(parameters, label)
        }
        se <- rep(NA, length(unique(parameters$se)))
        for(i in 1:length(unique(parameters$label))) for(j in 1:nrow(parameters)) if(unique(parameters$label)[i] == parameters$label[j]) { se[i] <- parameters$se[j] }
        node$params_sd <- se
        node$param_names <- names(lavaan::coef(node$model))
    }
    if(control$sem.prog == "ctsem")
        { if(control$ctsem_sd) { 
                ctsem_summary <- summary(node$model)
                node$params <- ctsem_summary$ctparameters[, "Value"]
                names(node$params) <- rownames(ctsem_summary$ctparameters)
                node$params_sd <- ctsem_summary$ctparameters[, "StdError"]
                node$param_names <- rownames(ctsem_summary$ctparameters)
            } else 
        {
                ctsem_coef <- coef.ctsemFit(node$model)
                node$params <- ctsem_coef
                node$params_sd <- NA
                node$param_names <- names(ctsem_coef)
            } }
    if(!is.null(constraints$focus.parameters)) { node$df <- length(constraints$focus.parameters) } else
    { node$df <- length(node$param_names) }
    node$node_id <- getGlobal("global.node.id")
    if(!is.na(control$min.N))
        { if(node$N <= control$min.N) {
            node$term.reason <- "Minimum number of cases in leaf node"
            return(node)
        } }
    if(!is.na(control$max.depth))
        { if(depth >= control$max.depth) {
            node$term.reason <- "Maximum depth reached in leaf node"
            return(node)
        } }
    result <- NULL
    if(control$method == "naive") { result <- tryCatch(
      ################################################
      naiveSplit(node$model, mydata, control, invariance, meta, constraints = constraints, ...)
      ################################################
      ,
      error = function(e) {
        cat(paste("Error occured!", e, sep = "\n"))
        traceback()
        return(NULL)
      }
    ) } else
    if(control$method == "score") { result <- tryCatch(
      ################################################
      result <- ScoreSplit(node$model, mydata, control, invariance, meta, constraints = constraints, ...)
      ################################################
      ,
      error = function(e) {
        cat(paste("Error occured!", e, sep = "\n"))
        traceback()
        return(NULL)
      }
    ) } else
        if(control$method == "fair") { 
                    control$fair3Step <- FALSE
                    result <- tryCatch(
      ################################################
      fairSplit(node$model, mydata, control, invariance, meta, constraints = constraints, ...)
      ################################################
      ,
      error = function(e) {
        cat(paste("Error occured!", e, sep = "\n"))
        return(NULL)
      }
    )
                } else 
            if(control$method == "fair3") { 
                        control$fair3Step <- TRUE
                        result <- tryCatch(
      ################################################
      fairSplit(node$model, mydata, control, invariance, meta, constraints = constraints, ...)
      ################################################
      ,
      error = function(e) {
        cat(paste("Error occured!", e, sep = "\n"))
        return(NULL)
      }
    )
                    } else 
                { stop() }
    node$lr <- NA
    if(!is.null(result)) {
        node$lr <- result$LL.max
        node$result <- result
    }
    if(!is.null(result$p.max)) { node$p <- result$p.max } else
    {
            node$p <- pchisq(node$lr, df = node$df, lower.tail = F)
            if(control$use.maxlr) {
                if(!is.factor(mydata[, result$name.max])) {
                    props <- cumsum(table(mydata[, result$name.max])) / node$N
                    split_val_lhs <- as.numeric(names(which(props >= control$strucchange.from)[1]))
                    split_val_rhs <- as.numeric(names(which(props >= control$strucchange.to)[1]))
                    btn_matrix_max <- result$btn.matrix[, result$btn.matrix["variable", ] ==
          result$name.max, drop = FALSE]
                    num_split_val <- as.numeric(btn_matrix_max["split val", ])
                    n1 <- which(num_split_val <= split_val_lhs)
                    n1 <- n1[length(n1)]
                    n2 <- which(num_split_val >= split_val_rhs)[1]
                    if(length(n1) == 0) { n1 <- 1 }
                    if(is.na(n2)) { n2 <- length(num_split_val) }
                    LR <- as.numeric(btn_matrix_max["LR", n1:n2])
                    max_pos <- which.max(LR) + n1 - 1
                    node$result$LL.max <- node$lr <- as.numeric(btn_matrix_max["LR", max_pos])
                    node$result$split.max <- as.numeric(btn_matrix_max["split val", max_pos])
                }
                node$p <- computePval_maxLR(
        maxLR = node$lr, q = node$df,
        covariate = mydata[, result$col.max], from = control$strucchange.from,
        to = control$strucchange.to, nrep = control$strucchange.nrep
      )
            }
        }
    if(control$mtry > 0) {
        mydata <- fulldata
        meta <- fullmeta
    }
    if(!is.null(forced_splits)) {
        mydata <- fulldata
        cont.split <- TRUE
    }
}

Although as far as I can tell, really, model is merely passed as an argument to the growTree() function.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions