diff --git a/.Rbuildignore b/.Rbuildignore index d9bd1f3..f5d97ef 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,4 +4,10 @@ ^codecov.yml$ ^cran-comments.md$ ^README.Rmd$ -^LICENSE.md$ \ No newline at end of file +^LICENSE.md$ +^inst$ +^cran-comments\.md$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ +^CRAN-SUBMISSION$ diff --git a/.github/ISSUE_TEMPLATE/issue_template.md b/.github/ISSUE_TEMPLATE/issue_template.md new file mode 100644 index 0000000..d7c9eba --- /dev/null +++ b/.github/ISSUE_TEMPLATE/issue_template.md @@ -0,0 +1,16 @@ +--- +name: Bug report or feature request +about: Describe a bug you've seen or make a case for a new feature +--- + +Please briefly describe your problem and what output you expect. If you have a question, please don't use this form. Instead, ask on or . + +Please include a minimal reproducible example (AKA a reprex). If you've never heard of a [reprex](http://reprex.tidyverse.org/) before, start by reading . + +For more advice on how to write a great issue, see . + +Brief description of the problem + +```r +# insert reprex here +``` diff --git a/.github/workflows/rhub.yaml b/.github/workflows/rhub.yaml new file mode 100644 index 0000000..74ec7b0 --- /dev/null +++ b/.github/workflows/rhub.yaml @@ -0,0 +1,95 @@ +# R-hub's generic GitHub Actions workflow file. It's canonical location is at +# https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml +# You can update this file to a newer version using the rhub2 package: +# +# rhub::rhub_setup() +# +# It is unlikely that you need to modify this file manually. + +name: R-hub +run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}" + +on: + workflow_dispatch: + inputs: + config: + description: 'A comma separated list of R-hub platforms to use.' + type: string + default: 'linux,windows,macos' + name: + description: 'Run name. You can leave this empty now.' + type: string + id: + description: 'Unique ID. You can leave this empty now.' + type: string + +jobs: + + setup: + runs-on: ubuntu-latest + outputs: + containers: ${{ steps.rhub-setup.outputs.containers }} + platforms: ${{ steps.rhub-setup.outputs.platforms }} + + steps: + # NO NEED TO CHECKOUT HERE + - uses: r-hub/actions/setup@v1 + with: + config: ${{ github.event.inputs.config }} + id: rhub-setup + + linux-containers: + needs: setup + if: ${{ needs.setup.outputs.containers != '[]' }} + runs-on: ubuntu-latest + name: ${{ matrix.config.label }} + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.containers) }} + container: + image: ${{ matrix.config.container }} + + steps: + - uses: r-hub/actions/checkout@v1 + - uses: r-hub/actions/platform-info@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + - uses: r-hub/actions/setup-deps@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + - uses: r-hub/actions/run-check@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + + other-platforms: + needs: setup + if: ${{ needs.setup.outputs.platforms != '[]' }} + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.label }} + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.platforms) }} + + steps: + - uses: r-hub/actions/checkout@v1 + - uses: r-hub/actions/setup-r@v1 + with: + job-config: ${{ matrix.config.job-config }} + token: ${{ secrets.RHUB_TOKEN }} + - uses: r-hub/actions/platform-info@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + - uses: r-hub/actions/setup-deps@v1 + with: + job-config: ${{ matrix.config.job-config }} + token: ${{ secrets.RHUB_TOKEN }} + - uses: r-hub/actions/run-check@v1 + with: + job-config: ${{ matrix.config.job-config }} + token: ${{ secrets.RHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 7cc8b6a..2c7b1e9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,50 +1,42 @@ # History files .Rhistory .Rapp.history - # Session Data files .RData .RDataTmp - # User-specific files .Ruserdata - # Example code in package build process *-Ex.R - # Output files from R CMD build /*.tar.gz - # Output files from R CMD check /*.Rcheck/ - # RStudio files .Rproj.user/ - # produced vignettes vignettes/*.html vignettes/*.pdf - # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 .httr-oauth - # knitr and R markdown default cache directories *_cache/ /cache/ - # Temporary files created by R markdown *.utf8.md *.knit.md - # R Environment Variables .Renviron - # pkgdown site docs/ - # translation temp files po/*~ - # RStudio Connect folder rsconnect/ .Rproj.user +*.Rproj +.DS_Store +docs +CRAN-SUBMISSION +cran-comments.md +*.html diff --git a/DESCRIPTION b/DESCRIPTION index f6bedf2..d7c3f54 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rocrateR Title: RO-Crate R Package Wrapper -Version: 0.0.1.9000 +Version: 0.0.1 Authors@R: c( person(given = "Roberto", family = "Villegas-Diaz", @@ -9,15 +9,22 @@ Authors@R: c( comment = c(ORCID = "0000-0001-5036-8661")), person("Research Object community", role = c("cph"))) Description: R package for creating, manipulating and reading RO-Crates. Latest - supported version 1.2 of the specification. + supported version of the specification: . License: MIT + file LICENSE -Suggests: +Suggests: + spelling, testthat (>= 3.0.0) Config/testthat/edition: 3 Encoding: UTF-8 Language: en-GB +URL: https://github.com/ResearchObject/ro-crate-r/ +BugReports: https://github.com/ResearchObject/ro-crate-r/issues/ Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Imports: + digest, jsonlite, - tibble + tibble, + zip +Depends: + R (>= 4.1.0) diff --git a/NAMESPACE b/NAMESPACE index 2a23d48..da7def1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,13 +1,22 @@ # Generated by roxygen2: do not edit by hand +S3method(bag_rocrate,character) +S3method(bag_rocrate,rocrate) S3method(entity,default) +S3method(print,entity) S3method(print,rocrate) +export(add_entities) export(add_entity) export(add_entity_value) +export(bag_rocrate) export(entity) +export(get_entity) export(is_rocrate) +export(is_rocrate_bag) export(read_rocrate) +export(remove_entities) export(remove_entity) export(rocrate) export(rocrate_5s) +export(unbag_rocrate) export(write_rocrate) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..07d9e35 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,3 @@ +# rocrateR 0.0.1 + +* Initial CRAN submission. diff --git a/R/entity.R b/R/entity.R index ba947c2..7d8b254 100644 --- a/R/entity.R +++ b/R/entity.R @@ -66,7 +66,7 @@ add_entity <- function(rocrate, entity, overwrite = FALSE) { #' pair `{key}`-`{value}` within `@graph`. #' #' @inheritParams is_rocrate -#' @param id String with the ID of the RO-Crate entity within `@graph` +#' @param id String with the ID of the RO-Crate entity within `@graph`. #' @param key String with the `key` of the entity with `id` to be modified. #' @param value String with the `value` for `key`. #' @param overwrite Boolean flag to indicate if the existing value (if any), @@ -112,12 +112,41 @@ add_entity_value <- function(rocrate, id, key, value, overwrite = TRUE) { return(rocrate) } +#' Wrapper for \link[rocrateR]{add_entity} +#' +#' Wrapper for \link[rocrateR]{add_entity}, can be use to add multiple entities. +#' +#' @inheritParams add_entity +#' @param entity List with entity objects. +#' @param quiet Boolean flag to indicate if status messages should be hidden +#' (default: `FALSE`). +#' +#' @returns Updated RO-Crate with the new entities. +#' @export +add_entities <- function(rocrate, entity, overwrite = FALSE, quiet = FALSE) { + for (i in seq_along(entity)) { + if (!quiet) { + # extract entity @id, if missing, then use index, `i` + ent_id <- getElement(entity[[i]], "@id") + ent_id <- ifelse(is.null(ent_id), + paste0("with index=", i), + paste0("with @id='", ent_id, "'") + ) + message("Adding entity ", ent_id, "...\n") + } + # call the add_entity function + rocrate <- rocrate |> + rocrateR::add_entity(entity[[i]], overwrite = overwrite) + } + return(rocrate) +} + #' Create a data entity #' #' @param x New entity. If a single value (e.g., `character`, `numeric`) is #' given, this is assumed to be the entity's `@id`, if a `list` is given, #' this is assumed to be a complete entity. Other options are objects of -#' type `person` and `organisation` (equivalenly `organization`). +#' type `person` and `organisation` (equivalently `organization`). #' @param ... Optional additional entity values/properties. Used when `x` is #' a single value. #' @@ -148,6 +177,8 @@ entity <- function(x, ...) { #' @export entity.default <- function(x, ...) { + # define local bindings + id <- type <- NULL args <- list(...) new_entity <- list( `@id` = c(x, getElement(args, "id"))[1], @@ -160,6 +191,105 @@ entity.default <- function(x, ...) { return(new_entity) } +#' Get entity(ies) +#' +#' @inheritParams is_rocrate +#' @param id String with the ID of the RO-Crate entity within `@graph` +#' (optional if `type` is provided). Alternatively, an entity object / list +#' with `@id` and `@type`. +#' @param type String with the type of the RO-Crate entity(ies) within `@graph` +#' to retrieve (optional if `id` is provided). +#' +#' @returns List with found entity object(s), if any, `NULL` otherwise. +#' @export +#' +#' @examples +#' basic_crate <- rocrateR::rocrate() +#' +#' # create entity for an organisation +#' organisation_uol <- rocrateR::entity( +#' x = "https://ror.org/04xs57h96", +#' type = "Organization", +#' name = "University of Liverpool", +#' url = "http://www.liv.ac.uk" +#' ) +#' +#' # create an entity for a person +#' person_rvd <- rocrateR::entity( +#' x = "https://orcid.org/0000-0001-5036-8661", +#' type = "Person", +#' name = "Roberto Villegas-Diaz", +#' affiliation = list(`@id` = organisation_uol$`@id`) +#' ) +#' +#' basic_crate_person <- basic_crate |> +#' rocrateR::add_entity(person_rvd) |> +#' rocrateR::add_entity_value(id = "./", key = "author", value = list(`@id` = person_rvd$`@id`)) |> +#' rocrateR::add_entity(organisation_uol) |> +#' rocrateR::get_entity(person_rvd) +#' +#' basic_crate_person[[1]]$name == person_rvd$name +#' basic_crate_person[[1]]$`@id` == person_rvd$`@id` +get_entity <- function(rocrate, id = NULL, type = NULL) { + # check the `rocrate` object + is_rocrate(rocrate) + + # if `id` is given as an entity object / list, extract @id and @type + if (is.list(id)) { + type <- getElement(id, "@type") + id <- getElement(id, "@id") + } + + # check that either `id` or `type` were provided + if (all(is.null(id), is.null(type))) { + stop("You must provide a value for either `id` or `type`!", + call. = FALSE) + } + + # initialise local variables + idx_by_id <- idx_by_type <- NULL + + # if `id` was provided, then find elements with that @id + if (!is.null(id)) { + idx_by_id <- .find_id_index(rocrate, id) + } + # if `type` was provided, then find elements with that @type + if (!is.null(type)) { + idx_by_type <- .find_type_index(rocrate, type) + } + + # combine (if both id and type were provided) the indices + idx <- NULL + if (!is.null(id) && !is.null(type)) { + idx <- idx_by_id & idx_by_type + } else if (!is.null(id)) { + idx <- idx_by_id + } else if (!is.null(type)) { + idx <- idx_by_type + } + if (sum(idx) > 0) { # at least one entity was found + matching_entities <- rocrate$`@graph`[idx] |> + lapply(function(x) { + class(x) <- unique(c("entity", class(x))) + return(x) + }) + return(matching_entities) + } else { + msg <- "No entities were found with " + msg_id <- paste0("@id = '", id, "'") + msg_type <- paste0("@type = '", type, "'") + warning(msg, + ifelse(is.null(id), "", msg_id), + ifelse(!is.null(id) && !is.null(type), " and ", ""), + ifelse(is.null(type), "", msg_type), + "!", + call. = FALSE) + } + + # return NULL invisibly, if no entities were found + return(invisible(NULL)) +} + #' Remove entity #' #' @inheritParams is_rocrate @@ -216,3 +346,21 @@ remove_entity <- function(rocrate, entity) { return(rocrate) } + +#' Wrapper for \link[rocrateR]{remove_entity} +#' +#' Wrapper for \link[rocrateR]{remove_entity}, can be use to remove multiple +#' entities. +#' +#' @inheritParams remove_entity +#' +#' @returns Updated RO-Crate. +#' @export +remove_entities <- function(rocrate, entity) { + for (i in seq_along(entity)) { + # call the add_entity function + rocrate <- rocrate |> + rocrateR::remove_entity(entity[[i]]) + } + return(rocrate) +} diff --git a/R/print.R b/R/print.R index 34c7573..860c5ae 100644 --- a/R/print.R +++ b/R/print.R @@ -29,3 +29,29 @@ print.rocrate <- function(x, ...) { # return (invisibly) the input object invisible(x) } + +#' Print RO-Crate entity +#' +#' Print RO-Crate entity, S3 method for class 'entity'. +#' +#' @param x RO-Crate entity object, see [rocrateR::entity]. +#' @param ... Optional arguments, not used. +#' +#' @returns Invisibly the input RO-Crate entity, `x`. +#' @export +#' +#' @examples +#' rocrateR::rocrate() |> +#' rocrateR::get_entity("./") +print.entity <- function(x, ...) { + # check the `x` object + .validate_entity(x) + + # display formatted RO-Crate entity + message("RO-Crate entity:", + "\n @id = '", getElement(x, "@id"), "'", + "\n @type = '", getElement(x, "@type"), "'" + ) + # return (invisibly) the input object + invisible(x) +} diff --git a/R/rocrate.R b/R/rocrate.R index 57d070d..9b3d7a2 100644 --- a/R/rocrate.R +++ b/R/rocrate.R @@ -2,7 +2,7 @@ #' #' Create a new RO-Crate object. This object includes basic skeleton for the #' RO-Crate metadata descriptor (`ro-crate-metadata.json`) file, as described -#' in the official documentation: https://w3id.org/ro/crate/1.2 > +#' in the official documentation: https://w3id.org/ro/crate/1.2/ > #' [Root Data Entity](https://www.researchobject.org/ro-crate/specification/1.2/root-data-entity.html). #' #' @param ... Optional entities to include in the RO-Crate (e.g., author). @@ -41,15 +41,15 @@ rocrate <- function(..., new_ro_crate <- list( `@context` = context, `@graph` = list( - list( - `@id` = "ro-crate-metadata.json", - `@type` = "CreativeWork", + rocrateR::entity( + x = "ro-crate-metadata.json", + type = "CreativeWork", about = list(`@id` = "./"), conformsTo = list(`@id` = conformsTo) ), - list( - `@id` = "./", - `@type` = "Dataset", + rocrateR::entity( + x = "./", + type = "Dataset", name = name, description = description, datePublished = as.character(datePublished), @@ -111,7 +111,7 @@ rocrate_5s <- function(..., v5scrate_id <- paste0("https://w3id.org/5s-crate/", v5scrate) prof_5scrate <- list( `@id` = v5scrate_id, - `@type` = "Profile", + `@type` = c("CreativeWork", "Profile"), name = "Five Safes RO-Crate profile" ) diff --git a/R/utils-bag.R b/R/utils-bag.R new file mode 100644 index 0000000..c4ee0d3 --- /dev/null +++ b/R/utils-bag.R @@ -0,0 +1,419 @@ +#' Bag the contents of an RO-Crate +#' +#' Bag the contents of an RO-Crate using the BagIt file packaging format v1.0. +#' For more details see the definition: +#' \doi{10.17487/RFC8493} +#' +#' @param x A string to a path containing at the very minimum an RO-Crate +#' metadata descriptor file, `ro-crate-metadata.json`. Alternatively, an +#' object with the \link[rocrateR]{rocrate} class. +#' @param ... Additional parameters, see below. +#' +#' @export +#' +#' @family bag_rocrate +# @examples +bag_rocrate <- function(x, ...) { + UseMethod("bag_rocrate", x) +} + +#' @rdname bag_rocrate +#' +#' @param output String with path where the RO-Crate bag will be stored +#' (default: `x` - same path as the input value). +#' @param force_bag Boolean flag to indicate whether the force the creation of +#' a 'bag' even if not all the files were successfully bagged +#' (default: `FALSE` ~ check if all the files were copied successfully). +#' +#' @returns String with full path to the final RO-Crate bag. +#' +#' @export +bag_rocrate.character <- function(x, ..., output = x, force_bag = FALSE) { + # check a valid path was given + if (!dir.exists(x)) { + stop("The given path, `x`, does not exist!\n", + "Create with:\n\t`mkdir ", x, "`", call. = FALSE) + } + + # list all the files inside the given path + rocrate_files <- list.files(x, recursive = TRUE) + + # check if the given path is empty + if (length(rocrate_files) == 0) { + stop("No files were found inside the given path: \n", + x, call. = FALSE) + } + + # create an RO-Crate ID + rocrate_id <- paste0("rocrate-", digest::digest(Sys.time())) + + # create temporary directory, including `rocrate_id` + tmp_dir <- file.path(tempdir(), rocrate_id, "data") + + # create sub-directories + dir.create(tmp_dir, showWarnings = FALSE, recursive = TRUE) + on.exit(unlink(dirname(tmp_dir), recursive = TRUE, force = TRUE)) + + # copy files inside the temporary directory + rocrate_files_status <- rocrate_files |> + sapply(function(f) { + # ensure the target sub-directory exists + dir.create(dirname(file.path(tmp_dir, f)), + showWarnings = FALSE, recursive = TRUE) + # create copy of file + file.copy(file.path(x, f), file.path(tmp_dir, f), overwrite = TRUE) + }) + + # check that all the files were copied, unless force_bag = TRUE + if (!all(rocrate_files_status) || force_bag) { + if (!force_bag) { + stop("It was not possible to bag all your files!\nMissing file(s):\n", + paste0(" - ", rocrate_files[!rocrate_files_status], collapse = "\n"), + "\n\nTo ignore this check, set `force_bag = TRUE`.", call. = FALSE) + } else { + warning("Forcing the creation of the RO-Crate bag! ", + "Note that this will ignore checking if all files were copied", + "into the RO-Crate bag", + call. = FALSE) + } + } + + # create bag declaration + bagit_declaration(tmp_dir) + + # create bag manifest and stored one level above `tmp_dir` + bagit_manifest(tmp_dir, rocrate_files) + + # create BagIt tagmanifest + bagit_tagmanifest(dirname(tmp_dir), + list.files(dirname(tmp_dir), pattern = "txt$")) + + # create BagIt fetch file + bagit_fetch(tmp_dir) + + # compress bag contents inside original path + output_bag <- file.path(output, paste0(rocrate_id, ".zip")) + bag_files <- list.files(dirname(tmp_dir), + include.dirs = TRUE, + full.names = FALSE, + recursive = FALSE) + zip::zip(output_bag, files = bag_files, + mode = "cherry-pick", root = dirname(tmp_dir)) + + message("RO-Crate successfully 'bagged'!\nFor details, see: ", output_bag) + + # attempt to delete the temporary directory created to bag the RO-Crate + unlink(dirname(tmp_dir), recursive = TRUE, force = TRUE) + + # return path to RO-Crate bag invisibly + return(invisible(output_bag)) +} + +#' @rdname bag_rocrate +#' +#' @param path String with path to the root of the RO-Crate. +#' @param overwrite Boolean flag to indicate if the RO-Crate metadata descriptor +#' file should be overwritten if already inside `path` (default: `FALSE`). +#' +#' @export +bag_rocrate.rocrate <- function(x, ..., path, output = path, overwrite = FALSE, force_bag = FALSE) { + # check the `x` object + is_rocrate(x) + # check a valid path was given + if (!dir.exists(path)) { + stop("The given `path` does not exist!\nCreate with:\n\t`mkdir ", path, "`", + call. = FALSE) + } + # check if the given path contains an RO-Crate metadata descriptor file + if (file.exists(file.path(path, "ro-crate-metadata.json"))){ + if (overwrite) { + warning("Overwriting the RO-Crate metadata descriptor file!", call. = FALSE) + } else { + stop("The given `path` already contains an RO-Crate metadata descriptor ", + "file, `ro-crate-metadata.json`. To ignore this check, set ", + "`overwrite = TRUE` when calling this function!", call. = FALSE) + } + } + # write the RO-Crate metadata descriptor file + write_rocrate(x, file.path(path, "ro-crate-metadata.json")) + + # call the bag method for the given `path` + bag_rocrate(path, output = output, force_bag = force_bag) +} + +#' Generate BagIt declaration +#' +#' @param path String with path where the BagIt declaration will be stored. +#' @param version String with BagIt version (default: `"1.0"`)/ +#' +#' @keywords internal +#' @source https://www.rfc-editor.org/rfc/rfc8493.html#section-2.2.2 +bagit_declaration <- function(path, version = "1.0") { + declaration_lines <- c(paste0("BagIt-version: ", version), + "Tag-File-Character-Encoding: UTF-8") + writeLines(declaration_lines, + con = file.path(dirname(path), "bagit.txt")) +} + +#' @keywords internal +bagit_fetch <- function(path, rocrate = NULL) { + # to-do + # 1. read rocrate and find any file entities that have an external URL + # 2. list results from step 1 in a file called fetch.txt + # See: https://www.researchobject.org/ro-crate/specification/1.1/appendix/implementation-notes.html + # Also: https://www.rfc-editor.org/rfc/rfc8493.html#section-2.2.3 +} + +#' @keywords internal +bagit_manifest <- function(path, files, algo = "sha512") { + manifest_lines <- sapply(files, function(f) { + # generate checksum + checksum <- digest::digest(file.path(path, f), algo = algo, file = TRUE) + # combine checksum with file path & name + paste0(checksum, " data/", f) + }) + writeLines(manifest_lines, + con = file.path(dirname(path), paste0("manifest-", algo, ".txt"))) + return(invisible(manifest_lines)) +} + +#' @keywords internal +bagit_tagmanifest <- function(path, files, algo = "sha512") { + tagmanifest_lines <- sapply(files, function(f) { + # generate checksum + checksum <- digest::digest(file.path(path, f), algo = algo, file = TRUE) + # combine checksum with file path & name + paste0(checksum, " ", f) + }) + writeLines(tagmanifest_lines, + con = file.path(path, paste0("tagmanifest-", algo, ".txt"))) + return(invisible(tagmanifest_lines)) +} + +#' Check if path points to a valid RO-Crate bag +#' +#' @param path String with full path to a compressed file contain an RO-Crate +#' bag, see \link[rocrateR]{bag_rocrate} for details. Alternatively, a path +#' to a directory containing an RO-Crate bag. +#' @param algo String with algorithm used to generate the RO-Crate bag +#' (default: `"sha512"`). See \link[digest]{digest} for more details. +#' @param bagit_version String with version of BagIt used to generate the +#' RO-Crate bag (default: `"1.0"`). +#' See \doi{10.17487/RFC8493} for more details. +#' +#' @returns Returns invisibly the RO-Crate pointed by `path`. +#' @export +#' +#' @family bag_rocrate +is_rocrate_bag <- function(path, algo = "sha512", bagit_version = "1.0") { + # initialise object that will be returned + ro_crate <- NULL + + # check if given path is a directory or a file + idx <- c(dir.exists(path), file.exists(path)) + if (all(!idx)){ + stop("The given `path` is invalid!", call. = FALSE) + } else if(idx[1]) { # path is a valid directory + # no extra steps required + } else if (idx[2]) { # path is a valid file + # create temporary directory + tmp_dir <- file.path(tempdir(), digest::digest(Sys.time())) + on.exit(unlink(tmp_dir, recursive = TRUE, force = TRUE)) + + # extract contents of the RO-Crate bag inside temporary directory AND + # update path, so it points to the contents of the RO-Crate bag + path <- unbag_rocrate(path, output = tmp_dir, quiet = TRUE) + } + # call the .validate_rocrate_bag function + ro_crate <- .validate_rocrate_bag(path, algo = algo) + return(invisible(ro_crate)) +} + +#' Verify if a given path points to a valid RO-Crate bag +#' +#' @inheritParams is_rocrate_bag +#' +#' @returns Returns invisibly the RO-Crate pointed by `path`. +#' @keywords internal +.validate_rocrate_bag <- function(path, algo = "sha512", bagit_version = "1.0") { + # list files inside the given path / top level only + rocrate_bag_files <- list.files(path, recursive = FALSE) + + # check that at least the following files & directory are in the given path + expected_contents <- c("bagit.txt", "data", paste0("manifest-", algo, ".txt")) + idx <- expected_contents %in% rocrate_bag_files + if (!all(idx)) { + stop("The given `path` is missing the following:\n", + paste0(" - ", expected_contents[!idx], "\n"), call. = FALSE) + } + + # list files inside the given path / all levels + rocrate_bag_files <- list.files(path, recursive = TRUE) + + # check for valid BagIt declaration + valid_bagit_declaration <- .validate_bagit_declaration(path, algo, bagit_version) + + # check integrity of manifest file + valid_bagit_manifest <- .validate_bagit_manifest(path, algo) + + # check integrity of tagmanifest file (if found) + if (file.exists(file.path(path, paste0("tagmanifest-", algo, ".txt")))) { + valid_bagit_tagmanifest <- + .validate_bagit_manifest(path, algo, manifest_suffix = "tagmanifest") + } else { + valid_bagit_tagmanifest <- list(status = TRUE) + } + + # validation overview + idx <- c( + valid_bagit_declaration$status, + valid_bagit_manifest$status, + valid_bagit_tagmanifest$status + ) + + if (any(!idx)) { + error_message <- "Invalid RO-Crate bag! The following issues were found:\n" + # BagIt declaration (required) + if (!idx[1]) { + error_message <- paste0( + error_message, + "\n BagIt declaration (bagit.txt) missing the following:\n", + paste0(" - ", valid_bagit_declaration$errors, collapse = "\n") + ) + } + # BagIt manifest (required) + if (!idx[2]) { + error_message <- paste0( + error_message, + "\n BagIt manifest contains invalid file(s):\n", + paste0(" - ", valid_bagit_manifest$errors, collapse = "\n") + ) + } + # BagIt tagmanifest (optional) + if (!idx[3]) { + error_message <- paste0( + error_message, + "\n BagIt tagmanifest contains invalid file(s):\n", + paste0(" - ", valid_bagit_tagmanifest$errors, collapse = "\n") + ) + } + # print error message and stop execution + stop(error_message, call. = FALSE) + } + + # if no errors where found, load the and return the RO-Crate in the bag + rocrate_contents <- file.path(path, "data/ro-crate-metadata.json") |> + rocrateR::read_rocrate() + + message("Valid RO-Crate found!") + return(rocrate_contents) +} + +#' Validate BagIt declaration +#' +#' @inheritParams is_rocrate_bag +#' +#' @returns A list with `status` and `errors` identified. +#' @keywords internal +#' @rdname bagit_declaration +.validate_bagit_declaration <- function(path, algo = "sha512", bagit_version = "1.0") { + # load the BagIt declaration file + bagit_declaration_txt <- readLines(file.path(path, "bagit.txt")) + # expect lines + expected_bagit_declaration <- c(paste0("BagIt-version: ", bagit_version), + "Tag-File-Character-Encoding: UTF-8") + valid_bagit_declaration_validity <- + expected_bagit_declaration %in% bagit_declaration_txt + # return list with status: TRUE = all lines found, FALSE = missing line AND + # errors: vector of the missing lines (if any) + list( + status = all(valid_bagit_declaration_validity), + errors = expected_bagit_declaration[!valid_bagit_declaration_validity] + ) +} + +#' Validate BagIt declaration +#' +#' @inheritParams is_rocrate_bag +#' @param manifest_suffix String with suffix for the manifest file (default: +#' `"manifest"`). +#' +#' @returns A list with `status` and `errors` identified. +#' @keywords internal +#' @rdname bagit_manifest +.validate_bagit_manifest <- function(path, algo = "sha512", manifest_suffix = "manifest") { + # load the manifest file + manifest_filename <- paste0(manifest_suffix, "-", algo, ".txt") + bagit_manifest_txt <- file.path(path, manifest_filename) |> + utils::read.table(header = FALSE, col.names = c("checksum", "filename")) + # check all the files in the manifest file + bagit_manifest_txt_validity <- seq_len(nrow(bagit_manifest_txt)) |> + sapply(function(i) { + est_checksum <- file.path(path, bagit_manifest_txt[i, "filename"]) |> + digest::digest(algo = algo, file = TRUE) + est_checksum == bagit_manifest_txt[i, "checksum"] + }) + # return list with status: TRUE = all valid, FALSE = invalid file found AND + # errors: vector of invalid files (if any) + list( + status = all(bagit_manifest_txt_validity), + errors = bagit_manifest_txt[!bagit_manifest_txt_validity, "filename"] + ) +} + +#' 'Unbag' (extract) RO-Crate packed with BagIt +#' +#' @param path String with path to compressed file containing an RO-Crate bag. +#' @param output String with target path where the contents will be extracted +#' (default: `dirname(path)` - same directory as input `path`). +#' @param quiet Boolean flag to indicate if messages should be suppressed +#' (default: `FALSE` - display messages). +#' +#' @export +#' +#' @returns String with path to root of the RO-Crate, invisibly. +#' +#' @family bag_rocrate +unbag_rocrate <- function(path, output = dirname(path), quiet = FALSE) { + # check a valid path was given + if (!file.exists(path)) { + stop("The given path, `path`, does not exist!", call. = FALSE) + } + + # check if file has .zip extension + if (!grepl("zip$", path, ignore.case = TRUE)) { + stop("The given `path` does not point to a .zip file!", call. = FALSE) + } + + # check if the `output` directory exists, if not, then it creates it + if (dir.exists(output)) { + dir.create(output, showWarnings = FALSE, recursive = TRUE) + } + + # extract contents inside the `output` path + zip::unzip(path, exdir = output) + + # list directories inside the RO-Crate bag + rocrate_bag_dir <- list.dirs(output, recursive = FALSE, full.names = FALSE) + ## filter out the payload directory (./data): + rocrate_bag_dir <- rocrate_bag_dir[!grepl("data", rocrate_bag_dir)] + + # check if the RO-Crate bag has only a root directory + if (length(rocrate_bag_dir) == 0) { + rocrate_bag_dir <- "." + } + + # check if the RO-Crate bag has more than one directory, only 1 is expected + if (length(unique(rocrate_bag_dir)) > 1) { + stop("A valid RO-Crate bag should have ONE and ONLY ONE root directory!", + "\nThe given path has the following: ", + paste0(" - ", unique(rocrate_bag_dir), "\n"), call. = FALSE) + } + + if (!quiet) { + message("RO-Crate bag successfully extracted! For details, see:\n", output) + } + + # path to root of the RO-Crate bag + return(invisible(file.path(output, rocrate_bag_dir))) +} diff --git a/R/utils-entity.R b/R/utils-entity.R index 0e4d78d..ac19250 100644 --- a/R/utils-entity.R +++ b/R/utils-entity.R @@ -47,6 +47,24 @@ # return(idx) } +#' Find `@type` index in RO-Crate +#' +#' Find `@type` index in RO-Crate. Useful to retrieve entities with a particular +#' type in the RO-Crate. +#' +#' @inheritParams add_entity_value +#' +#' @returns Boolean vector with index for entity(ies) with `@type`. +#' @keywords internal +.find_type_index <- function(rocrate, type) { + # check the `rocrate` object + is_rocrate(rocrate) + + # find the index in `@graph` with the matching {type} (at least one entry) + getElement(rocrate, "@graph") |> + sapply(\(x) type %in% getElement(x, "@type")) +} + #' Validate entity #' #' @inheritParams entity @@ -60,6 +78,7 @@ UseMethod(".validate_entity", x) } +#' @method validate_entity character #' @keywords internal .validate_entity.character <- function(x, ..., ent_name = NULL, required = "type") { has_elements <- sapply(required, \(x) !is.null(getElement(list(...), x))) @@ -67,6 +86,13 @@ .validate_entity_overview(required, ent_name) } +#' @method validate_entity entity +#' @keywords internal +.validate_entity.entity <- function(x, ..., ent_name = NULL, required = c("@id", "@type")) { + NextMethod() +} + +#' @method validate_entity list #' @keywords internal .validate_entity.list <- function(x, ..., ent_name = NULL, required = c("@id", "@type")) { has_elements <- required %in% names(x) @@ -74,6 +100,7 @@ .validate_entity_overview(required, ent_name) } +#' @method validate_entity numeric #' @keywords internal .validate_entity.numeric <- function(x, ..., ent_name = NULL, required = "type") { has_elements <- sapply(required, \(x) !is.null(getElement(list(...), x))) @@ -95,7 +122,9 @@ msg <- "" if (!is.null(ent_name)) msg <- paste0("===== Checking: ", ent_name, " =====\n") - msg <- paste0(msg, "Missing: \n", paste0("- ", required[!has_elements], collapse = "\n")) - stop(msg) + msg <- paste0(msg, "Missing: \n", + paste0(" - ", required[!has_elements], collapse = "\n")) + warning(msg, call. = FALSE) + return(FALSE) } diff --git a/R/utils-json.R b/R/utils-json.R index 74f0e17..5c2e57c 100644 --- a/R/utils-json.R +++ b/R/utils-json.R @@ -13,6 +13,10 @@ read_rocrate <- function(path, simplifyVector = FALSE, ...) { rocrate <- jsonlite::read_json(path, simplifyVector = simplifyVector, ...) # assigns it the 'rocrate' class class(rocrate) <- c("rocrate", class(rocrate)) + # assign the entity class to each element in the `@graph` + for (i in seq_along(rocrate$`@graph`)) { + class(rocrate$`@graph`[[i]]) <- c("entity", class(rocrate$`@graph`[[i]])) + } # checks the object has the basic structure of an RO-Crate is_rocrate(rocrate) # returns the new object as an RO-crate diff --git a/R/utils-rocrate.R b/R/utils-rocrate.R index 6fd6f97..10ed756 100644 --- a/R/utils-rocrate.R +++ b/R/utils-rocrate.R @@ -37,13 +37,14 @@ is_rocrate <- function(rocrate) { sapply(`[[`, "@id") |> unlist() - # extract @graph elements' @type - graph_types <- ro_crate_graph |> - sapply(`[[`, "@type") |> - unlist() + # validate @graph entities + valid_entities <- seq_along(ro_crate_graph) |> + sapply(function(i) { + .validate_entity.list(ro_crate_graph[[i]], ent_name = graph_ids[i]) + }) - # check lengths of @ids and @types, must be the same - valid_length_graph <- length(graph_ids) == length(graph_types) + # check lengths of @ids and number of entities, must be the same + valid_length_graph <- length(graph_ids) == sum(valid_entities == TRUE) # has an RO-Crate Metadata descriptor entity has_rocrate_meta <- "ro-crate-metadata.json" %in% graph_ids @@ -76,4 +77,3 @@ is_rocrate <- function(rocrate) { # return (invisibly) the input RO-Crate return(invisible(rocrate)) } - diff --git a/R/utils-url.R b/R/utils-url.R index 05cbae6..c85b453 100644 --- a/R/utils-url.R +++ b/R/utils-url.R @@ -8,18 +8,6 @@ #' @keywords internal #' #' @source https://stackoverflow.com/a/73952264 -#' @examples -#' url <- c( -#' "w3id.org/ro/crate/1.2/context", -#' "http://w3id.org/ro/crate/1.2", -#' "http://w3id.org/ro/crate/1.2/context", -#' "https://w3id.org/ro/crate/1.2/context", -#' "123", -#' "https://w3id.org/ro/crate/1.1/context", -#' "https://w3id.org/ro/crate/1.0/context" -#' ) -#' rocrateR:::.is_valid_url(url) -#' rocrateR:::.is_valid_url(url, suffix = "/context") .is_valid_url <- function(x, suffix = "") { pattern <- paste0("(https?|ftp)://[^ /$.?#].[^\\s]*", suffix, "$") grepl(pattern, x) diff --git a/README.Rmd b/README.Rmd index fa55913..41e101c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -13,27 +13,55 @@ knitr::opts_chunk$set( ) ``` -# rocrateR: RO-Crate R Package Wrapper +# rocrateR: RO-Crate R Package Wrapper logo -[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![CRAN status](https://www.r-pkg.org/badges/version/rocrateR)](https://CRAN.R-project.org/package=rocrateR) -[![R-CMD-check](https://github.com/villegar/ro-crate-r/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/villegar/ro-crate-r/actions/workflows/R-CMD-check.yaml) -[![Codecov test coverage](https://codecov.io/gh/villegar/ro-crate-r/graph/badge.svg)](https://app.codecov.io/gh/villegar/ro-crate-r) +[![R-CMD-check](https://github.com/ResearchObject/ro-crate-r/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ResearchObject/ro-crate-r/actions/workflows/R-CMD-check.yaml) +[![Codecov test coverage](https://codecov.io/gh/ResearchObject/ro-crate-r/graph/badge.svg)](https://app.codecov.io/gh/ResearchObject/ro-crate-r) -The goal of rocrateR is to provide an R package for creating, manipulating and reading RO-Crates. Latest supported version 1.2 of the specification. +The goal of `{rocrateR}` is to provide an R package for creating, manipulating and reading RO-Crates. Latest supported version of the specification: https://w3id.org/ro/crate/1.2/. + +#### What is an RO-Crate? + +> An RO-Crate is an integrated view through which you can see an entire Research Object; the methods, the data, the output and the outcomes of a project or a piece of work. Linking all this together enables the sharing of research outputs with their context, as a coherent whole. +> +> RO-Crates link data and metadata no matter where they are stored – so that from a paper, you can find the data, and from the data, you can find its authors, and so on. + +For more details, please visit: https://www.researchobject.org/ro-crate/about_ro_crate ## 0. Installation -You can install the development version of rocrateR like so: +You can install the released version of `{rocrateR}` from [CRAN](https://cran.r-project.org/package=rocrateR) with: ``` r # install.packages("pak") -pak::pkg_install("ResearchObject/ro-crate-r@main") +pak::pak("rocrateR") ``` -## 1. First RO-Crate +And the development version from [GitHub](https://github.com/ResearchObject/ro-crate-r/) with: + +``` r +# install.packages("pak") +pak::pak("ResearchObject/ro-crate-r@dev") +``` + +## 1. Functions Overview + +| Function | Purpose | +|-----------|----------| +| `rocrate()` | Create an empty or initialized RO-Crate | +| `entity()` | Define a new entity (Person, Dataset, etc.) | +| `add_entity()` / `add_entities()` | Add entities to a crate | +| `get_entity()` | Retrieve entities by `@id` or `@type` | +| `remove_entity()` / `remove_entities()` | Remove one or more entities | +| `write_rocrate()` | Save RO-Crate to disk | +| `bag_rocrate()` / `is_rocrate_bag()` / `unbag_rocrate()` | Bagging and unbagging RO-Crates | +| `validate_rocrate()` *(planned)* | Validate RO-Crate using external Python validator | + + +## 2. First RO-Crate The following command creates an RO-Crate Metadata descriptor (`ro-crate-metadata.json`). This should be stored inside the root (`./`) of your RO-Crate. @@ -69,9 +97,9 @@ readLines(tmp) unlink(tmp) ``` -## 2. Adding additional entities +## 3. Including additional entities -In the previous section we created a very basic RO-Crate with the `rocrateR::rocrate()` function; however, you are likely to include additional entities in your RO-Crate. Entities must contain at least two components `@id` and `@type` (see [https://w3id.org/ro/crate/1.2](https://w3id.org/ro/crate/1.2) for details). +In the previous section we created a very basic RO-Crate with the `rocrateR::rocrate()` function; however, you are likely to include additional entities in your RO-Crate. Entities must contain at least two components `@id` and `@type` (see [https://w3id.org/ro/crate/1.2/](https://w3id.org/ro/crate/1.2/) for details). For example, a contextual entity can be defined as follows: @@ -117,3 +145,215 @@ my_second_ro_crate <- rocrateR::rocrate(person_rvd, organisation_uol) |> ```{r} print(my_second_ro_crate) ``` +## 4. Wrangle RO-Crate +Previously, we covered how to include additional entities, other valid +operations are to extract (`rocrateR::get_entity()`) and remove +(`rocrateR::remove_entities()`). + +### 4.1. Set up + +```{r} +# create basic RO-Crate +basic_ro_crate <- rocrateR::rocrate() + +# create some entities for a project and datasets +dataset_entities <- seq_len(5) |> + lapply(\(x) rocrateR::entity(x, type = "Dataset", name = paste0("Data ", x))) +project_entity <- rocrateR::entity( + "#proj101", + type = "Project", + name = "Project 101", + hasPart = dataset_entities |> + lapply(\(x) list(`@id` = x[["@id"]])) + ) + +# add project and entities to the RO-Crate +basic_ro_crate <- basic_ro_crate |> + rocrateR::add_entity(project_entity) |> + # note that here we are using `rocrateR::add_entities` and `rocrateR::add_entity` + rocrateR::add_entities(dataset_entities) + +print(basic_ro_crate) +``` + +### 4.2. Extract entity + +We can extract entities via the `@id`, `@type` or both: + +#### 4.2.1. Extract using `@id` + +```{r} +basic_ro_crate_project <- basic_ro_crate |> + rocrateR::get_entity(id = "#proj101") + +print(basic_ro_crate_project) +``` + +#### 4.2.2. Extract using `@type` + +```{r} +basic_ro_crate_datasets <- basic_ro_crate |> + rocrateR::get_entity(type = "Dataset") + +print(basic_ro_crate_datasets) +``` + +#### 4.2.3. Extract using `@id` and `@type` + +```{r} +basic_ro_crate_dataset_root <- basic_ro_crate |> + rocrateR::get_entity(id = "./", type = "Dataset") + +print(basic_ro_crate_dataset_root) +``` + +### 4.3. Remove entity + +Similarly, we can remove entities from an RO-Crate: + +#### 4.3.1. Remove using scalar `@id` +```{r} +basic_ro_crate_alt <- basic_ro_crate |> + rocrateR::remove_entity("#proj101") +``` + +#### 4.3.2. Remove using `entity` object +```{r} +basic_ro_crate_alt <- basic_ro_crate |> + rocrateR::remove_entity(project_entity) +``` + +#### 4.3.3. Remove multiple entities +```{r} +basic_ro_crate_alt <- basic_ro_crate |> + rocrateR::remove_entities(dataset_entities) +``` + +## 5. Create an RO-Crate Bag + +Here we will explore the BagIt file packaging format, which is the recommended +to use for _bagging_ RO-Crates. BagIt is described in +[RFC 8493](https://doi.org/10.17487/RFC8493): + +> [BagIt is] … a set of hierarchical file layout conventions for storage and transfer of arbitrary digital content. A “bag” has just enough structure to enclose descriptive metadata “tags” and a file “payload” but does not require knowledge of the payload’s internal semantics. This BagIt format is suitable for reliable storage and transfer. + +In this package, the function `rocrateR::bag_rocrate` will take either a `path` +pointing to the root of an RO-Crate (must have at least an RO-Crate metadata +descriptor file, `ro-crate-metadata.json`) or an RO-Crate object created with +`rocrateR::rocrate` (and alternatives), as shown in step 1. + +For more details, run the following command: + +```r +?rocrateR::bag_rocrate +``` + +### 5.1. `rocrateR::bag_rocrate()` + +Here we will create an RO-Crate bag inside temporary directory: + +```{r} +# create basic RO-Crate +basic_ro_crate <- rocrateR::rocrate() + +# create temporary directory +tmp_dir <- file.path(tempdir(), paste0("rocrate-", digest::digest(Sys.time()))) +dir.create(tmp_dir, showWarnings = FALSE, recursive = TRUE) + +# then, we can create the RO-Crate bag +path_to_rocrate_bag <- basic_ro_crate |> + rocrateR::bag_rocrate(path = tmp_dir) +``` + + +### 5.2. `rocrateR::is_rocrate_bag()` + +We can use the function `rocrateR::is_rocrate_bag()` to verify that a given path +points to a ZIP file or a directory with a valid RO-Crate bag. The expected +files are + +- `bagit.txt` with the BagIt [definition](https://www.rfc-editor.org/rfc/rfc8493.html#section-2.2.2) +- `data` directory with [payload](https://www.rfc-editor.org/rfc/rfc8493.html#section-2.1.2) of the RO-Crate +- `manifest-[algorithm].txt` with the checksum for each file inside the `data` directory; . + +```{r} +basic_ro_crate_contents <- path_to_rocrate_bag |> + rocrateR::is_rocrate_bag() +``` + +And then, the RO-Crate can be displayed + +```{r} +print(basic_ro_crate_contents) +``` + + +### 5.3. `rocrateR::unbag_rocrate()` + +We can explore the contents of the RO-Crate bag with the following commands: + +```{r, echo=FALSE, eval=FALSE} +# list files without unzipping +unzip(path_to_rocrate_bag, list = TRUE) +``` + +```{r} +# extract files in temporary directory +path_to_rocrate_bag_contents <- path_to_rocrate_bag |> + rocrateR::unbag_rocrate(output = file.path(tmp_dir, "ROC")) + +# create tree with the files +fs::dir_tree(path_to_rocrate_bag_contents) +``` + +```{r} +# delete temporary directory +unlink(tmp_dir, recursive = TRUE, force = TRUE) +``` + + +## 6. Validation (experimental) + +As you develop your RO-Crates, you might want to validate them. There are few validators online (some of which can be found at https://www.researchobject.org/ro-crate/tools), here we will explore the Python package [`rocrate-validator`](https://github.com/crs4/rocrate-validator). For installation details, please visit https://github.com/crs4/rocrate-validator. + +⚠️ The validation workflow depends on Python’s [`rocrate-validator`](https://github.com/crs4/rocrate-validator). Ensure you have a working Python installation and [`{reticulate}`](https://cran.r-project.org/package=reticulate) configured correctly (`reticulate::py_config()`). On Windows, you may need to restart R after installation. + +### 6.1. Install [`{reticulate}`](https://cran.r-project.org/package=reticulate) +``` r +pak::pkg_install("reticulate") +``` + +### 6.2. Install [`rocrate-validator`](https://github.com/crs4/rocrate-validator) + +``` r +reticulate::py_install("roc-validator", env = "rocrateR") +``` + +### 6.3. Create example RO-Crate and validate it + +```{r} +basic_ro_crate <- rocrateR::rocrate() + +# store crate inside temporal directory +tmp <- file.path(tempdir(), "ro-crate-metadata.json") +basic_ro_crate |> + rocrateR::write_rocrate(tmp) +# wrap crate into zip file (expected by validator) +tmp_zip <- paste(tmp, ".zip") +zip(tmp_zip, tmp) + +# validate (note the name of the module: rocrate_validator) +reticulate::use_virtualenv("rocrateR") +rocrate_validator <- reticulate::import("rocrate_validator") +status <- rocrate_validator$utils$validate_rocrate_uri(tmp_zip) + +if (status) { + message("RO-Crate is valid!") +} else { + message("RO-Crate is invalid!") +} + +# delete temporal files +unlink(tmp) +unlink(tmp_zip) +``` \ No newline at end of file diff --git a/README.md b/README.md index 69aa3d1..3f98cd8 100644 --- a/README.md +++ b/README.md @@ -1,33 +1,67 @@ -# rocrateR: RO-Crate R Package Wrapper +# rocrateR: RO-Crate R Package Wrapper logo -[![Lifecycle: -experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![CRAN status](https://www.r-pkg.org/badges/version/rocrateR)](https://CRAN.R-project.org/package=rocrateR) -[![R-CMD-check](https://github.com/villegar/ro-crate-r/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/villegar/ro-crate-r/actions/workflows/R-CMD-check.yaml) +[![R-CMD-check](https://github.com/ResearchObject/ro-crate-r/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ResearchObject/ro-crate-r/actions/workflows/R-CMD-check.yaml) [![Codecov test -coverage](https://codecov.io/gh/villegar/ro-crate-r/graph/badge.svg)](https://app.codecov.io/gh/villegar/ro-crate-r) +coverage](https://codecov.io/gh/ResearchObject/ro-crate-r/graph/badge.svg)](https://app.codecov.io/gh/ResearchObject/ro-crate-r) -The goal of rocrateR is to provide an R package for creating, -manipulating and reading RO-Crates. Latest supported version 1.2 of the -specification. +The goal of `{rocrateR}` is to provide an R package for creating, +manipulating and reading RO-Crates. Latest supported version of the +specification: . + +#### What is an RO-Crate? + +> An RO-Crate is an integrated view through which you can see an entire +> Research Object; the methods, the data, the output and the outcomes of +> a project or a piece of work. Linking all this together enables the +> sharing of research outputs with their context, as a coherent whole. +> +> RO-Crates link data and metadata no matter where they are stored – so +> that from a paper, you can find the data, and from the data, you can +> find its authors, and so on. + +For more details, please visit: + ## 0. Installation -You can install the development version of rocrateR like so: +You can install the released version of `{rocrateR}` from +[CRAN](https://cran.r-project.org/package=rocrateR) with: ``` r # install.packages("pak") -pak::pkg_install("ResearchObject/ro-crate-r@main") +pak::pak("rocrateR") ``` -## 1. First RO-Crate +And the development version from +[GitHub](https://github.com/ResearchObject/ro-crate-r/) with: + +``` r +# install.packages("pak") +pak::pak("ResearchObject/ro-crate-r@dev") +``` + +## 1. Functions Overview + +| Function | Purpose | +|----|----| +| `rocrate()` | Create an empty or initialized RO-Crate | +| `entity()` | Define a new entity (Person, Dataset, etc.) | +| `add_entity()` / `add_entities()` | Add entities to a crate | +| `get_entity()` | Retrieve entities by `@id` or `@type` | +| `remove_entity()` / `remove_entities()` | Remove one or more entities | +| `write_rocrate()` | Save RO-Crate to disk | +| `bag_rocrate()` / `is_rocrate_bag()` / `unbag_rocrate()` | Bagging and unbagging RO-Crates | +| `validate_rocrate()` *(planned)* | Validate RO-Crate using external Python validator | + +## 2. First RO-Crate The following command creates an RO-Crate Metadata descriptor (`ro-crate-metadata.json`). This should be stored inside the root (`./`) @@ -61,7 +95,7 @@ print(my_first_ro_crate) #> "@type": "Dataset", #> "name": "", #> "description": "", -#> "datePublished": "2025-09-20", +#> "datePublished": "2025-11-04", #> "license": { #> "@id": "http://spdx.org/licenses/CC-BY-4.0" #> } @@ -104,7 +138,7 @@ readLines(tmp) #> [16] " \"@type\": \"Dataset\"," #> [17] " \"name\": \"\"," #> [18] " \"description\": \"\"," -#> [19] " \"datePublished\": \"2025-09-20\"," +#> [19] " \"datePublished\": \"2025-11-04\"," #> [20] " \"license\": {" #> [21] " \"@id\": \"http://spdx.org/licenses/CC-BY-4.0\"" #> [22] " }" @@ -116,12 +150,12 @@ readLines(tmp) unlink(tmp) ``` -## 2. Adding additional entities +## 3. Including additional entities In the previous section we created a very basic RO-Crate with the `rocrateR::rocrate()` function; however, you are likely to include additional entities in your RO-Crate. Entities must contain at least two -components `@id` and `@type` (see for +components `@id` and `@type` (see for details). For example, a contextual entity can be defined as follows: @@ -186,7 +220,7 @@ print(my_second_ro_crate) #> "@type": "Dataset", #> "name": "", #> "description": "", -#> "datePublished": "2025-09-20", +#> "datePublished": "2025-11-04", #> "license": { #> "@id": "http://spdx.org/licenses/CC-BY-4.0" #> }, @@ -211,3 +245,393 @@ print(my_second_ro_crate) #> ] #> } ``` + +## 4. Wrangle RO-Crate + +Previously, we covered how to include additional entities, other valid +operations are to extract (`rocrateR::get_entity()`) and remove +(`rocrateR::remove_entities()`). + +### 4.1. Set up + +``` r +# create basic RO-Crate +basic_ro_crate <- rocrateR::rocrate() + +# create some entities for a project and datasets +dataset_entities <- seq_len(5) |> + lapply(\(x) rocrateR::entity(x, type = "Dataset", name = paste0("Data ", x))) +project_entity <- rocrateR::entity( + "#proj101", + type = "Project", + name = "Project 101", + hasPart = dataset_entities |> + lapply(\(x) list(`@id` = x[["@id"]])) + ) + +# add project and entities to the RO-Crate +basic_ro_crate <- basic_ro_crate |> + rocrateR::add_entity(project_entity) |> + # note that here we are using `rocrateR::add_entities` and `rocrateR::add_entity` + rocrateR::add_entities(dataset_entities) +#> Adding entity with @id='1'... +#> Adding entity with @id='2'... +#> Adding entity with @id='3'... +#> Adding entity with @id='4'... +#> Adding entity with @id='5'... + +print(basic_ro_crate) +#> { +#> "@context": "https://w3id.org/ro/crate/1.2/context", +#> "@graph": [ +#> { +#> "@id": "ro-crate-metadata.json", +#> "@type": "CreativeWork", +#> "about": { +#> "@id": "./" +#> }, +#> "conformsTo": { +#> "@id": "https://w3id.org/ro/crate/1.2" +#> } +#> }, +#> { +#> "@id": "./", +#> "@type": "Dataset", +#> "name": "", +#> "description": "", +#> "datePublished": "2025-11-04", +#> "license": { +#> "@id": "http://spdx.org/licenses/CC-BY-4.0" +#> } +#> }, +#> { +#> "@id": "#proj101", +#> "@type": "Project", +#> "name": "Project 101", +#> "hasPart": [ +#> { +#> "@id": 1 +#> }, +#> { +#> "@id": 2 +#> }, +#> { +#> "@id": 3 +#> }, +#> { +#> "@id": 4 +#> }, +#> { +#> "@id": 5 +#> } +#> ] +#> }, +#> { +#> "@id": 1, +#> "@type": "Dataset", +#> "name": "Data 1" +#> }, +#> { +#> "@id": 2, +#> "@type": "Dataset", +#> "name": "Data 2" +#> }, +#> { +#> "@id": 3, +#> "@type": "Dataset", +#> "name": "Data 3" +#> }, +#> { +#> "@id": 4, +#> "@type": "Dataset", +#> "name": "Data 4" +#> }, +#> { +#> "@id": 5, +#> "@type": "Dataset", +#> "name": "Data 5" +#> } +#> ] +#> } +``` + +### 4.2. Extract entity + +We can extract entities via the `@id`, `@type` or both: + +#### 4.2.1. Extract using `@id` + +``` r +basic_ro_crate_project <- basic_ro_crate |> + rocrateR::get_entity(id = "#proj101") + +print(basic_ro_crate_project) +#> [[1]] +#> RO-Crate entity: +#> @id = '#proj101' +#> @type = 'Project' +``` + +#### 4.2.2. Extract using `@type` + +``` r +basic_ro_crate_datasets <- basic_ro_crate |> + rocrateR::get_entity(type = "Dataset") + +print(basic_ro_crate_datasets) +#> [[1]] +#> RO-Crate entity: +#> @id = './' +#> @type = 'Dataset' +#> +#> [[2]] +#> RO-Crate entity: +#> @id = '1' +#> @type = 'Dataset' +#> +#> [[3]] +#> RO-Crate entity: +#> @id = '2' +#> @type = 'Dataset' +#> +#> [[4]] +#> RO-Crate entity: +#> @id = '3' +#> @type = 'Dataset' +#> +#> [[5]] +#> RO-Crate entity: +#> @id = '4' +#> @type = 'Dataset' +#> +#> [[6]] +#> RO-Crate entity: +#> @id = '5' +#> @type = 'Dataset' +``` + +#### 4.2.3. Extract using `@id` and `@type` + +``` r +basic_ro_crate_dataset_root <- basic_ro_crate |> + rocrateR::get_entity(id = "./", type = "Dataset") + +print(basic_ro_crate_dataset_root) +#> [[1]] +#> RO-Crate entity: +#> @id = './' +#> @type = 'Dataset' +``` + +### 4.3. Remove entity + +Similarly, we can remove entities from an RO-Crate: + +#### 4.3.1. Remove using scalar `@id` + +``` r +basic_ro_crate_alt <- basic_ro_crate |> + rocrateR::remove_entity("#proj101") +#> Removing the entity with @id = '#proj101'. +``` + +#### 4.3.2. Remove using `entity` object + +``` r +basic_ro_crate_alt <- basic_ro_crate |> + rocrateR::remove_entity(project_entity) +#> Removing the entity with @id = '#proj101'. +``` + +#### 4.3.3. Remove multiple entities + +``` r +basic_ro_crate_alt <- basic_ro_crate |> + rocrateR::remove_entities(dataset_entities) +#> Removing the entity with @id = '1'. +#> Removing the entity with @id = '2'. +#> Removing the entity with @id = '3'. +#> Removing the entity with @id = '4'. +#> Removing the entity with @id = '5'. +``` + +## 5. Create an RO-Crate Bag + +Here we will explore the BagIt file packaging format, which is the +recommended to use for *bagging* RO-Crates. BagIt is described in [RFC +8493](https://doi.org/10.17487/RFC8493): + +> \[BagIt is\] … a set of hierarchical file layout conventions for +> storage and transfer of arbitrary digital content. A “bag” has just +> enough structure to enclose descriptive metadata “tags” and a file +> “payload” but does not require knowledge of the payload’s internal +> semantics. This BagIt format is suitable for reliable storage and +> transfer. + +In this package, the function `rocrateR::bag_rocrate` will take either a +`path` pointing to the root of an RO-Crate (must have at least an +RO-Crate metadata descriptor file, `ro-crate-metadata.json`) or an +RO-Crate object created with `rocrateR::rocrate` (and alternatives), as +shown in step 1. + +For more details, run the following command: + +``` r +?rocrateR::bag_rocrate +``` + +### 5.1. `rocrateR::bag_rocrate()` + +Here we will create an RO-Crate bag inside temporary directory: + +``` r +# create basic RO-Crate +basic_ro_crate <- rocrateR::rocrate() + +# create temporary directory +tmp_dir <- file.path(tempdir(), paste0("rocrate-", digest::digest(Sys.time()))) +dir.create(tmp_dir, showWarnings = FALSE, recursive = TRUE) + +# then, we can create the RO-Crate bag +path_to_rocrate_bag <- basic_ro_crate |> + rocrateR::bag_rocrate(path = tmp_dir) +#> RO-Crate successfully 'bagged'! +#> For details, see: /tmp/RtmpJDledw/rocrate-dc0b8f71bc4ab2e85c9c47e17b4c6cfc/rocrate-198ae50c2d60ae04b7a9a2da24243c5e.zip +``` + +### 5.2. `rocrateR::is_rocrate_bag()` + +We can use the function `rocrateR::is_rocrate_bag()` to verify that a +given path points to a ZIP file or a directory with a valid RO-Crate +bag. The expected files are + +- `bagit.txt` with the BagIt + [definition](https://www.rfc-editor.org/rfc/rfc8493.html#section-2.2.2) +- `data` directory with + [payload](https://www.rfc-editor.org/rfc/rfc8493.html#section-2.1.2) + of the RO-Crate +- `manifest-[algorithm].txt` with the checksum for each file inside the + `data` directory; . + +``` r +basic_ro_crate_contents <- path_to_rocrate_bag |> + rocrateR::is_rocrate_bag() +#> Valid RO-Crate found! +``` + +And then, the RO-Crate can be displayed + +``` r +print(basic_ro_crate_contents) +#> { +#> "@context": "https://w3id.org/ro/crate/1.2/context", +#> "@graph": [ +#> { +#> "@id": "ro-crate-metadata.json", +#> "@type": "CreativeWork", +#> "about": { +#> "@id": "./" +#> }, +#> "conformsTo": { +#> "@id": "https://w3id.org/ro/crate/1.2" +#> } +#> }, +#> { +#> "@id": "./", +#> "@type": "Dataset", +#> "name": "", +#> "description": "", +#> "datePublished": "2025-11-04", +#> "license": { +#> "@id": "http://spdx.org/licenses/CC-BY-4.0" +#> } +#> } +#> ] +#> } +``` + +### 5.3. `rocrateR::unbag_rocrate()` + +We can explore the contents of the RO-Crate bag with the following +commands: + +``` r +# extract files in temporary directory +path_to_rocrate_bag_contents <- path_to_rocrate_bag |> + rocrateR::unbag_rocrate(output = file.path(tmp_dir, "ROC")) +#> RO-Crate bag successfully extracted! For details, see: +#> /tmp/RtmpJDledw/rocrate-dc0b8f71bc4ab2e85c9c47e17b4c6cfc/ROC + +# create tree with the files +fs::dir_tree(path_to_rocrate_bag_contents) +#> /tmp/RtmpJDledw/rocrate-dc0b8f71bc4ab2e85c9c47e17b4c6cfc/ROC/. +#> ├── bagit.txt +#> ├── data +#> │ └── ro-crate-metadata.json +#> ├── manifest-sha512.txt +#> └── tagmanifest-sha512.txt +``` + +``` r +# delete temporary directory +unlink(tmp_dir, recursive = TRUE, force = TRUE) +``` + +## 6. Validation (experimental) + +As you develop your RO-Crates, you might want to validate them. There +are few validators online (some of which can be found at +), here we will explore +the Python package +[`rocrate-validator`](https://github.com/crs4/rocrate-validator). For +installation details, please visit +. + +⚠️ The validation workflow depends on Python’s +[`rocrate-validator`](https://github.com/crs4/rocrate-validator). Ensure +you have a working Python installation and +[`{reticulate}`](https://cran.r-project.org/package=reticulate) +configured correctly (`reticulate::py_config()`). On Windows, you may +need to restart R after installation. + +### 6.1. Install [`{reticulate}`](https://cran.r-project.org/package=reticulate) + +``` r +pak::pkg_install("reticulate") +``` + +### 6.2. Install [`rocrate-validator`](https://github.com/crs4/rocrate-validator) + +``` r +reticulate::py_install("roc-validator", env = "rocrateR") +``` + +### 6.3. Create example RO-Crate and validate it + +``` r +basic_ro_crate <- rocrateR::rocrate() + +# store crate inside temporal directory +tmp <- file.path(tempdir(), "ro-crate-metadata.json") +basic_ro_crate |> + rocrateR::write_rocrate(tmp) +# wrap crate into zip file (expected by validator) +tmp_zip <- paste(tmp, ".zip") +zip(tmp_zip, tmp) + +# validate (note the name of the module: rocrate_validator) +reticulate::use_virtualenv("rocrateR") +rocrate_validator <- reticulate::import("rocrate_validator") +status <- rocrate_validator$utils$validate_rocrate_uri(tmp_zip) + +if (status) { + message("RO-Crate is valid!") +} else { + message("RO-Crate is invalid!") +} +#> RO-Crate is valid! + +# delete temporal files +unlink(tmp) +unlink(tmp_zip) +``` diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..d71acfb --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,4 @@ +url: ~ +template: + bootstrap: 5 + diff --git a/cran-comments.md b/cran-comments.md deleted file mode 100644 index 858617d..0000000 --- a/cran-comments.md +++ /dev/null @@ -1,5 +0,0 @@ -## R CMD check results - -0 errors | 0 warnings | 1 note - -* This is a new release. diff --git a/inst/R.ignore/hex-logo.R b/inst/R.ignore/hex-logo.R new file mode 100644 index 0000000..1f4bc54 --- /dev/null +++ b/inst/R.ignore/hex-logo.R @@ -0,0 +1,40 @@ +#' Create hexagonal logo +#' +#' Create hexagonal logo for the package. +#' +#' @param subplot Image to use as the main logo. +#' @param dpi Plot resolution (dots-per-inch). +#' @param h_color Colour for hexagon border. +#' @param h_fill Colour to fill hexagon. +#' @param output Output file (hexagonal logo). +#' @param package Title for logo (package name). +#' @param p_color Colour for package name. +#' @param url URL for package repository or website. +#' @param u_size Text size for URL. +#' +#' @return Hexagonal logo. +#' @keywords internal +hex_logo <- function(subplot = system.file("images/ro-crate-w-text.png", + package = "rocrateR"), + dpi = 600, + h_color = "#000000", + h_fill = "#FFFFFF", + output = system.file("images/logo.png", + package = "rocrateR"), + package = "rocrateR", + p_color = "#54969D", + url = "https://github.com/ResearchObject/ro-crate-r", + u_size = 1.25) { + hexSticker::sticker(subplot = subplot, package = package, + h_color = h_color, h_fill = h_fill, + dpi = dpi, + s_x = 1.0, s_y = 1.0, s_width = .7, + p_x = 1.0, p_y = 1.52, p_size = 20, p_color = p_color, + url = url, + u_angle = 30, u_color = p_color, u_size = u_size, u_y = 0.06, + filename = output) +} + +# pak::pak("emilioxavier/hexSticker") +hex_logo("inst/images/ro-crate-w-text.png", output = "man/figures/logo_hq.png", package = "", u_size = 6.8, dpi = 600) +hex_logo("inst/images/ro-crate-w-text.png", output = "man/figures/logo.png", package = "", u_size = 3.5, dpi = 300) diff --git a/inst/R.ignore/readme_logo.R b/inst/R.ignore/readme_logo.R new file mode 100644 index 0000000..b078c9a --- /dev/null +++ b/inst/R.ignore/readme_logo.R @@ -0,0 +1,3 @@ +cat(' + + ') \ No newline at end of file diff --git a/inst/WORDLIST b/inst/WORDLIST new file mode 100644 index 0000000..9778c83 --- /dev/null +++ b/inst/WORDLIST @@ -0,0 +1,19 @@ +BagIt +CMD +Codecov +ORCID +POSIXt +RO +Unbag +columnmajor +com +ies +mongo +org +rfc +ro +rocrate +rowmajor +stackoverflow +unclass +www diff --git a/inst/images/ro-crate-w-text.png b/inst/images/ro-crate-w-text.png new file mode 100644 index 0000000..442fad7 Binary files /dev/null and b/inst/images/ro-crate-w-text.png differ diff --git a/man/add_entities.Rd b/man/add_entities.Rd new file mode 100644 index 0000000..f8ed55d --- /dev/null +++ b/man/add_entities.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/entity.R +\name{add_entities} +\alias{add_entities} +\title{Wrapper for \link[rocrateR]{add_entity}} +\usage{ +add_entities(rocrate, entity, overwrite = FALSE, quiet = FALSE) +} +\arguments{ +\item{rocrate}{RO-Crate object, see \link{rocrate}.} + +\item{entity}{List with entity objects.} + +\item{overwrite}{Boolean flag to indicate if the entity (if found in the +given RO-Crate) should be overwritten.} + +\item{quiet}{Boolean flag to indicate if status messages should be hidden +(default: \code{FALSE}).} +} +\value{ +Updated RO-Crate with the new entities. +} +\description{ +Wrapper for \link[rocrateR]{add_entity}, can be use to add multiple entities. +} diff --git a/man/add_entity_value.Rd b/man/add_entity_value.Rd index 1033906..172d7a8 100644 --- a/man/add_entity_value.Rd +++ b/man/add_entity_value.Rd @@ -9,7 +9,7 @@ add_entity_value(rocrate, id, key, value, overwrite = TRUE) \arguments{ \item{rocrate}{RO-Crate object, see \link{rocrate}.} -\item{id}{String with the ID of the RO-Crate entity within \verb{@graph}} +\item{id}{String with the ID of the RO-Crate entity within \verb{@graph}.} \item{key}{String with the \code{key} of the entity with \code{id} to be modified.} diff --git a/man/bag_rocrate.Rd b/man/bag_rocrate.Rd new file mode 100644 index 0000000..22d02bb --- /dev/null +++ b/man/bag_rocrate.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-bag.R +\name{bag_rocrate} +\alias{bag_rocrate} +\alias{bag_rocrate.character} +\alias{bag_rocrate.rocrate} +\title{Bag the contents of an RO-Crate} +\usage{ +bag_rocrate(x, ...) + +\method{bag_rocrate}{character}(x, ..., output = x, force_bag = FALSE) + +\method{bag_rocrate}{rocrate}(x, ..., path, output = path, overwrite = FALSE, force_bag = FALSE) +} +\arguments{ +\item{x}{A string to a path containing at the very minimum an RO-Crate +metadata descriptor file, \code{ro-crate-metadata.json}. Alternatively, an +object with the \link[rocrateR]{rocrate} class.} + +\item{...}{Additional parameters, see below.} + +\item{output}{String with path where the RO-Crate bag will be stored +(default: \code{x} - same path as the input value).} + +\item{force_bag}{Boolean flag to indicate whether the force the creation of +a 'bag' even if not all the files were successfully bagged +(default: \code{FALSE} ~ check if all the files were copied successfully).} + +\item{path}{String with path to the root of the RO-Crate.} + +\item{overwrite}{Boolean flag to indicate if the RO-Crate metadata descriptor +file should be overwritten if already inside \code{path} (default: \code{FALSE}).} +} +\value{ +String with full path to the final RO-Crate bag. +} +\description{ +Bag the contents of an RO-Crate using the BagIt file packaging format v1.0. +For more details see the definition: +\doi{10.17487/RFC8493} +} +\seealso{ +Other bag_rocrate: +\code{\link{is_rocrate_bag}()}, +\code{\link{unbag_rocrate}()} +} +\concept{bag_rocrate} diff --git a/man/bagit_declaration.Rd b/man/bagit_declaration.Rd new file mode 100644 index 0000000..fd1378c --- /dev/null +++ b/man/bagit_declaration.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-bag.R +\name{bagit_declaration} +\alias{bagit_declaration} +\alias{.validate_bagit_declaration} +\title{Generate BagIt declaration} +\source{ +https://www.rfc-editor.org/rfc/rfc8493.html#section-2.2.2 +} +\usage{ +bagit_declaration(path, version = "1.0") + +.validate_bagit_declaration(path, algo = "sha512", bagit_version = "1.0") +} +\arguments{ +\item{path}{String with path where the BagIt declaration will be stored.} + +\item{version}{String with BagIt version (default: \code{"1.0"})/} + +\item{algo}{String with algorithm used to generate the RO-Crate bag +(default: \code{"sha512"}). See \link[digest]{digest} for more details.} + +\item{bagit_version}{String with version of BagIt used to generate the +RO-Crate bag (default: \code{"1.0"}). +See \doi{10.17487/RFC8493} for more details.} +} +\value{ +A list with \code{status} and \code{errors} identified. +} +\description{ +Generate BagIt declaration + +Validate BagIt declaration +} +\keyword{internal} diff --git a/man/bagit_manifest.Rd b/man/bagit_manifest.Rd new file mode 100644 index 0000000..94a9e9d --- /dev/null +++ b/man/bagit_manifest.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-bag.R +\name{.validate_bagit_manifest} +\alias{.validate_bagit_manifest} +\title{Validate BagIt declaration} +\usage{ +.validate_bagit_manifest(path, algo = "sha512", manifest_suffix = "manifest") +} +\arguments{ +\item{path}{String with full path to a compressed file contain an RO-Crate +bag, see \link[rocrateR]{bag_rocrate} for details. Alternatively, a path +to a directory containing an RO-Crate bag.} + +\item{algo}{String with algorithm used to generate the RO-Crate bag +(default: \code{"sha512"}). See \link[digest]{digest} for more details.} + +\item{manifest_suffix}{String with suffix for the manifest file (default: +\code{"manifest"}).} +} +\value{ +A list with \code{status} and \code{errors} identified. +} +\description{ +Validate BagIt declaration +} +\keyword{internal} diff --git a/man/dot-find_id_index.Rd b/man/dot-find_id_index.Rd index 5149fd4..9c7cd20 100644 --- a/man/dot-find_id_index.Rd +++ b/man/dot-find_id_index.Rd @@ -9,7 +9,7 @@ \arguments{ \item{rocrate}{RO-Crate object, see \link{rocrate}.} -\item{id}{String with the ID of the RO-Crate entity within \verb{@graph}} +\item{id}{String with the ID of the RO-Crate entity within \verb{@graph}.} } \value{ Boolean vector with index for entity with \verb{@id}. diff --git a/man/dot-find_type_index.Rd b/man/dot-find_type_index.Rd new file mode 100644 index 0000000..db047b0 --- /dev/null +++ b/man/dot-find_type_index.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-entity.R +\name{.find_type_index} +\alias{.find_type_index} +\title{Find \verb{@type} index in RO-Crate} +\usage{ +.find_type_index(rocrate, type) +} +\arguments{ +\item{rocrate}{RO-Crate object, see \link{rocrate}.} +} +\value{ +Boolean vector with index for entity(ies) with \verb{@type}. +} +\description{ +Find \verb{@type} index in RO-Crate. Useful to retrieve entities with a particular +type in the RO-Crate. +} +\keyword{internal} diff --git a/man/dot-is_valid_url.Rd b/man/dot-is_valid_url.Rd index 7f58692..06b898c 100644 --- a/man/dot-is_valid_url.Rd +++ b/man/dot-is_valid_url.Rd @@ -21,17 +21,4 @@ Boolean value indicating if the given string (`x) is a valid URL. \description{ Validate URL } -\examples{ -url <- c( - "w3id.org/ro/crate/1.2/context", - "http://w3id.org/ro/crate/1.2", - "http://w3id.org/ro/crate/1.2/context", - "https://w3id.org/ro/crate/1.2/context", - "123", - "https://w3id.org/ro/crate/1.1/context", - "https://w3id.org/ro/crate/1.0/context" -) -rocrateR:::.is_valid_url(url) -rocrateR:::.is_valid_url(url, suffix = "/context") -} \keyword{internal} diff --git a/man/dot-validate_entity.Rd b/man/dot-validate_entity.Rd index 7ff9e07..8ef4e81 100644 --- a/man/dot-validate_entity.Rd +++ b/man/dot-validate_entity.Rd @@ -10,7 +10,7 @@ \item{x}{New entity. If a single value (e.g., \code{character}, \code{numeric}) is given, this is assumed to be the entity's \verb{@id}, if a \code{list} is given, this is assumed to be a complete entity. Other options are objects of -type \code{person} and \code{organisation} (equivalenly \code{organization}).} +type \code{person} and \code{organisation} (equivalently \code{organization}).} \item{...}{Optional additional entity values/properties. Used when \code{x} is a single value.} diff --git a/man/dot-validate_rocrate_bag.Rd b/man/dot-validate_rocrate_bag.Rd new file mode 100644 index 0000000..70da587 --- /dev/null +++ b/man/dot-validate_rocrate_bag.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-bag.R +\name{.validate_rocrate_bag} +\alias{.validate_rocrate_bag} +\title{Verify if a given path points to a valid RO-Crate bag} +\usage{ +.validate_rocrate_bag(path, algo = "sha512", bagit_version = "1.0") +} +\arguments{ +\item{path}{String with full path to a compressed file contain an RO-Crate +bag, see \link[rocrateR]{bag_rocrate} for details. Alternatively, a path +to a directory containing an RO-Crate bag.} + +\item{algo}{String with algorithm used to generate the RO-Crate bag +(default: \code{"sha512"}). See \link[digest]{digest} for more details.} + +\item{bagit_version}{String with version of BagIt used to generate the +RO-Crate bag (default: \code{"1.0"}). +See \doi{10.17487/RFC8493} for more details.} +} +\value{ +Returns invisibly the RO-Crate pointed by \code{path}. +} +\description{ +Verify if a given path points to a valid RO-Crate bag +} +\keyword{internal} diff --git a/man/entity.Rd b/man/entity.Rd index 7044afb..17e82b7 100644 --- a/man/entity.Rd +++ b/man/entity.Rd @@ -10,7 +10,7 @@ entity(x, ...) \item{x}{New entity. If a single value (e.g., \code{character}, \code{numeric}) is given, this is assumed to be the entity's \verb{@id}, if a \code{list} is given, this is assumed to be a complete entity. Other options are objects of -type \code{person} and \code{organisation} (equivalenly \code{organization}).} +type \code{person} and \code{organisation} (equivalently \code{organization}).} \item{...}{Optional additional entity values/properties. Used when \code{x} is a single value.} diff --git a/man/figures/logo.png b/man/figures/logo.png new file mode 100644 index 0000000..e54129f Binary files /dev/null and b/man/figures/logo.png differ diff --git a/man/get_entity.Rd b/man/get_entity.Rd new file mode 100644 index 0000000..6233c7a --- /dev/null +++ b/man/get_entity.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/entity.R +\name{get_entity} +\alias{get_entity} +\title{Get entity(ies)} +\usage{ +get_entity(rocrate, id = NULL, type = NULL) +} +\arguments{ +\item{rocrate}{RO-Crate object, see \link{rocrate}.} + +\item{id}{String with the ID of the RO-Crate entity within \verb{@graph} +(optional if \code{type} is provided). Alternatively, an entity object / list +with \verb{@id} and \verb{@type}.} + +\item{type}{String with the type of the RO-Crate entity(ies) within \verb{@graph} +to retrieve (optional if \code{id} is provided).} +} +\value{ +List with found entity object(s), if any, \code{NULL} otherwise. +} +\description{ +Get entity(ies) +} +\examples{ +basic_crate <- rocrateR::rocrate() + +# create entity for an organisation +organisation_uol <- rocrateR::entity( + x = "https://ror.org/04xs57h96", + type = "Organization", + name = "University of Liverpool", + url = "http://www.liv.ac.uk" +) + +# create an entity for a person +person_rvd <- rocrateR::entity( + x = "https://orcid.org/0000-0001-5036-8661", + type = "Person", + name = "Roberto Villegas-Diaz", + affiliation = list(`@id` = organisation_uol$`@id`) +) + +basic_crate_person <- basic_crate |> + rocrateR::add_entity(person_rvd) |> + rocrateR::add_entity_value(id = "./", key = "author", value = list(`@id` = person_rvd$`@id`)) |> + rocrateR::add_entity(organisation_uol) |> + rocrateR::get_entity(person_rvd) + +basic_crate_person[[1]]$name == person_rvd$name +basic_crate_person[[1]]$`@id` == person_rvd$`@id` +} diff --git a/man/is_rocrate_bag.Rd b/man/is_rocrate_bag.Rd new file mode 100644 index 0000000..e7557ae --- /dev/null +++ b/man/is_rocrate_bag.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-bag.R +\name{is_rocrate_bag} +\alias{is_rocrate_bag} +\title{Check if path points to a valid RO-Crate bag} +\usage{ +is_rocrate_bag(path, algo = "sha512", bagit_version = "1.0") +} +\arguments{ +\item{path}{String with full path to a compressed file contain an RO-Crate +bag, see \link[rocrateR]{bag_rocrate} for details. Alternatively, a path +to a directory containing an RO-Crate bag.} + +\item{algo}{String with algorithm used to generate the RO-Crate bag +(default: \code{"sha512"}). See \link[digest]{digest} for more details.} + +\item{bagit_version}{String with version of BagIt used to generate the +RO-Crate bag (default: \code{"1.0"}). +See \doi{10.17487/RFC8493} for more details.} +} +\value{ +Returns invisibly the RO-Crate pointed by \code{path}. +} +\description{ +Check if path points to a valid RO-Crate bag +} +\seealso{ +Other bag_rocrate: +\code{\link{bag_rocrate}()}, +\code{\link{unbag_rocrate}()} +} +\concept{bag_rocrate} diff --git a/man/print.entity.Rd b/man/print.entity.Rd new file mode 100644 index 0000000..32e940c --- /dev/null +++ b/man/print.entity.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/print.R +\name{print.entity} +\alias{print.entity} +\title{Print RO-Crate entity} +\usage{ +\method{print}{entity}(x, ...) +} +\arguments{ +\item{x}{RO-Crate entity object, see \link{entity}.} + +\item{...}{Optional arguments, not used.} +} +\value{ +Invisibly the input RO-Crate entity, \code{x}. +} +\description{ +Print RO-Crate entity, S3 method for class 'entity'. +} +\examples{ +rocrateR::rocrate() |> + rocrateR::get_entity("./") +} diff --git a/man/remove_entities.Rd b/man/remove_entities.Rd new file mode 100644 index 0000000..b8d0a3c --- /dev/null +++ b/man/remove_entities.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/entity.R +\name{remove_entities} +\alias{remove_entities} +\title{Wrapper for \link[rocrateR]{remove_entity}} +\usage{ +remove_entities(rocrate, entity) +} +\arguments{ +\item{rocrate}{RO-Crate object, see \link{rocrate}.} + +\item{entity}{Entity object (list) that contains at least the following +components: \verb{@id} and \verb{@type}. Or, scalar value with entity \verb{@id}.} +} +\value{ +Updated RO-Crate. +} +\description{ +Wrapper for \link[rocrateR]{remove_entity}, can be use to remove multiple +entities. +} diff --git a/man/rocrate.Rd b/man/rocrate.Rd index be4b7a6..85c979c 100644 --- a/man/rocrate.Rd +++ b/man/rocrate.Rd @@ -48,7 +48,7 @@ RO-Crate object, list with an additional class, \code{rocrate}. \description{ Create a new RO-Crate object. This object includes basic skeleton for the RO-Crate metadata descriptor (\code{ro-crate-metadata.json}) file, as described -in the official documentation: https://w3id.org/ro/crate/1.2 > +in the official documentation: https://w3id.org/ro/crate/1.2/ > \href{https://www.researchobject.org/ro-crate/specification/1.2/root-data-entity.html}{Root Data Entity}. } \examples{ diff --git a/man/rocrateR-package.Rd b/man/rocrateR-package.Rd index 3f122fa..46fc962 100644 --- a/man/rocrateR-package.Rd +++ b/man/rocrateR-package.Rd @@ -6,7 +6,17 @@ \alias{rocrateR-package} \title{rocrateR: RO-Crate R Package Wrapper} \description{ -R package for creating, manipulating and reading RO-Crates. Latest supported version 1.2 of the specification. +\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} + +R package for creating, manipulating and reading RO-Crates. Latest supported version of the specification: \url{https://w3id.org/ro/crate/1.2/}. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/ResearchObject/ro-crate-r/} + \item Report bugs at \url{https://github.com/ResearchObject/ro-crate-r/issues/} +} + } \author{ \strong{Maintainer}: Roberto Villegas-Diaz \email{r.villegas-diaz@outlook.com} (\href{https://orcid.org/0000-0001-5036-8661}{ORCID}) diff --git a/man/unbag_rocrate.Rd b/man/unbag_rocrate.Rd new file mode 100644 index 0000000..2f0cf9e --- /dev/null +++ b/man/unbag_rocrate.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-bag.R +\name{unbag_rocrate} +\alias{unbag_rocrate} +\title{'Unbag' (extract) RO-Crate packed with BagIt} +\usage{ +unbag_rocrate(path, output = dirname(path), quiet = FALSE) +} +\arguments{ +\item{path}{String with path to compressed file containing an RO-Crate bag.} + +\item{output}{String with target path where the contents will be extracted +(default: \code{dirname(path)} - same directory as input \code{path}).} + +\item{quiet}{Boolean flag to indicate if messages should be suppressed +(default: \code{FALSE} - display messages).} +} +\value{ +String with path to root of the RO-Crate, invisibly. +} +\description{ +'Unbag' (extract) RO-Crate packed with BagIt +} +\seealso{ +Other bag_rocrate: +\code{\link{bag_rocrate}()}, +\code{\link{is_rocrate_bag}()} +} +\concept{bag_rocrate} diff --git a/pkgdown/favicon/apple-touch-icon.png b/pkgdown/favicon/apple-touch-icon.png new file mode 100644 index 0000000..1651a27 Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon.png differ diff --git a/pkgdown/favicon/favicon-96x96.png b/pkgdown/favicon/favicon-96x96.png new file mode 100644 index 0000000..9b3b3d1 Binary files /dev/null and b/pkgdown/favicon/favicon-96x96.png differ diff --git a/pkgdown/favicon/favicon.ico b/pkgdown/favicon/favicon.ico new file mode 100644 index 0000000..2acebe4 Binary files /dev/null and b/pkgdown/favicon/favicon.ico differ diff --git a/pkgdown/favicon/favicon.svg b/pkgdown/favicon/favicon.svg new file mode 100644 index 0000000..9943492 --- /dev/null +++ b/pkgdown/favicon/favicon.svg @@ -0,0 +1,3 @@ + \ No newline at end of file diff --git a/pkgdown/favicon/site.webmanifest b/pkgdown/favicon/site.webmanifest new file mode 100644 index 0000000..4ebda26 --- /dev/null +++ b/pkgdown/favicon/site.webmanifest @@ -0,0 +1,21 @@ +{ + "name": "", + "short_name": "", + "icons": [ + { + "src": "/web-app-manifest-192x192.png", + "sizes": "192x192", + "type": "image/png", + "purpose": "maskable" + }, + { + "src": "/web-app-manifest-512x512.png", + "sizes": "512x512", + "type": "image/png", + "purpose": "maskable" + } + ], + "theme_color": "#ffffff", + "background_color": "#ffffff", + "display": "standalone" +} \ No newline at end of file diff --git a/pkgdown/favicon/web-app-manifest-192x192.png b/pkgdown/favicon/web-app-manifest-192x192.png new file mode 100644 index 0000000..accb14f Binary files /dev/null and b/pkgdown/favicon/web-app-manifest-192x192.png differ diff --git a/pkgdown/favicon/web-app-manifest-512x512.png b/pkgdown/favicon/web-app-manifest-512x512.png new file mode 100644 index 0000000..e0cb8e6 Binary files /dev/null and b/pkgdown/favicon/web-app-manifest-512x512.png differ diff --git a/tests/testthat/test-entity.R b/tests/testthat/test-entity.R index 2e80d30..0b18e06 100644 --- a/tests/testthat/test-entity.R +++ b/tests/testthat/test-entity.R @@ -33,7 +33,7 @@ test_that("entity works", { person_rvd) # invalid entity, missing type - expect_error({ + expect_warning({ rocrateR::entity( x = "https://orcid.org/0000-0001-5036-8661", name = "Roberto Villegas-Diaz", @@ -74,6 +74,68 @@ test_that("add_entity_value works", { }) }) +test_that("add_entities works", { + # attempt adding same entity without `overwrite = TRUE` + expect_error({ + basic_crate |> + rocrateR::add_entity(person_rvd) |> + rocrateR::add_entities(list(person_rvd)) + }) + + # set `overwrite = TRUE` + expect_warning({ + basic_crate |> + rocrateR::add_entity(person_rvd) |> + rocrateR::add_entities(list(person_rvd), overwrite = TRUE) + }) + + # expect message + expect_message({ + basic_crate |> + rocrateR::add_entities(list(person_rvd)) + }) + + # supress messages + expect_no_message({ + basic_crate |> + rocrateR::add_entities(list(person_rvd), quiet = TRUE) + }) +}) + +test_that("get_entity works", { + # call without `id` and `type` + expect_error(rocrateR::get_entity(basic_crate)) + + # call with invalid `id` + expect_warning(rocrateR::get_entity(basic_crate, id = "cool_id")) + + # call with invalid `type` + expect_warning(rocrateR::get_entity(basic_crate, type = "cool_type")) + + # call with valid `id` + res_val_id <- rocrateR::get_entity(basic_crate, id = "./") + expect_equal(length(res_val_id), 1) + expect_equal(class(res_val_id[[1]]), c("entity", "list")) + + # call with valid `type` + res_val_type <- rocrateR::get_entity(basic_crate, type = "Dataset") + expect_equal(length(res_val_type), 1) + expect_equal(class(res_val_type[[1]]), c("entity", "list")) + + # call with valid `id` and `type` + res_val_id_type <- rocrateR::get_entity(basic_crate, + id = "./", + type = "Dataset") + expect_equal(length(res_val_id_type), 1) + expect_equal(class(res_val_id_type[[1]]), c("entity", "list")) + + # call with `entity` object + res_val_entity <- basic_crate |> + rocrateR::get_entity(rocrateR::entity("./", type = "Dataset")) + expect_equal(length(res_val_entity), 1) + expect_equal(class(res_val_entity[[1]]), c("entity", "list")) +}) + test_that("remove_entity works", { # attempt adding and removing the same entity using entity object expect_equal( @@ -98,3 +160,26 @@ test_that("remove_entity works", { }) }) +test_that("remove_entities works", { + # attempt adding and removing the same entity using entity object + expect_equal( + basic_crate |> + rocrateR::add_entity(person_rvd) |> + rocrateR::remove_entities(list(person_rvd)), + basic_crate + ) + + # attempt adding and removing the same entity using @id + expect_equal( + basic_crate |> + rocrateR::add_entity(person_rvd) |> + rocrateR::remove_entities(list("https://orcid.org/0000-0001-5036-8661")), + basic_crate + ) + + # attempt removing non-existing entity + expect_warning({ + basic_crate |> + rocrateR::remove_entities(list("https://orcid.org/0000-0001-5036-8661")) + }) +}) diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R index 523c796..7534fd4 100644 --- a/tests/testthat/test-print.R +++ b/tests/testthat/test-print.R @@ -8,3 +8,14 @@ test_that("print.rocrate works", { # test that the contents of the RO-Crate are displayed as message testthat::expect_message(print(basic_crate)) }) + +test_that("print.entity works", { + # create basic RO-Crate entity + basic_entity <- rocrateR::entity("./", type = "Dateset") + # test that the print method returns invisibly an RO-Crate entity + testthat::expect_invisible(print(basic_entity)) + testthat::expect_equal(print(basic_entity), basic_entity) + + # test that the contents of the RO-Crate are displayed as message + testthat::expect_message(print(basic_entity)) +}) diff --git a/tests/testthat/test-utils-bag.R b/tests/testthat/test-utils-bag.R new file mode 100644 index 0000000..7733d82 --- /dev/null +++ b/tests/testthat/test-utils-bag.R @@ -0,0 +1,259 @@ +test_that("bag_rocrate works", { + # create basic RO-Crate + basic_crate <- rocrateR::rocrate() + + # create temporary directory + tmp_dir <- file.path(tempdir(), + paste0("rocrate-tests-", digest::digest(Sys.time()))) + dir.create(tmp_dir, showWarnings = FALSE, recursive = TRUE) + + # missing path + expect_error(rocrateR::bag_rocrate(basic_crate)) + + # use invalid path + expect_error(rocrateR::bag_rocrate(basic_crate, path = "/invalid/path")) + expect_error(rocrateR::bag_rocrate("/invalid/path")) + + # write RO-Crate to temporary file + tmp_file <- file.path(tmp_dir, "ro-crate-metadata.json") + + # check that the temporary file doesn't exist + expect_false(file.exists(tmp_file)) + + # write to temporary file + basic_crate |> + rocrateR::write_rocrate(path = tmp_file) + + # check that the temporary file exists + expect_true(file.exists(tmp_file)) + + # try to bag RO-Crate without overwriting previous one + expect_error(rocrateR::bag_rocrate(basic_crate, path = tmp_dir)) + + # force creation of bag + expect_warning( # warning because force_bag = TRUE + expect_warning( # warning because overwrite = TRUE + rocrate_bag_filename <- basic_crate |> + rocrateR::bag_rocrate(path = tmp_dir, + overwrite = TRUE, + force_bag = TRUE) + ) + ) + # check that the RO-Crate bag exists + expect_true(file.exists(rocrate_bag_filename)) + + # delete intermediate RO-Crate bag + unlink(rocrate_bag_filename, force = TRUE) + + # delete RO-Crate metadata descriptor file + unlink(file.path(dirname(rocrate_bag_filename), "ro-crate-metadata.json"), + force = TRUE) + + # attempt bagging empty directory + expect_error(dirname(rocrate_bag_filename) |> + rocrateR::bag_rocrate(overwrite = TRUE, + force_bag = FALSE)) + + # try to bag RO-Crate overwriting previous one + rocrate_bag_filename <- basic_crate |> + rocrateR::bag_rocrate(path = tmp_dir, overwrite = TRUE) + + # check that the RO-Crate bag exists + expect_true(file.exists(rocrate_bag_filename)) + + # check contents of RO-Crate bag + ## unzip the new RO-Crate bag + unzip(rocrate_bag_filename, exdir = file.path(tmp_dir, "..", "VALIDATION")) + ## list files in the RO-Crate bag + rocrate_bag_files <- list.files(file.path(tmp_dir, "..", "VALIDATION"), + recursive = TRUE) + ## subset files in the data/ directory + rocrate_bag_files <- + basename(rocrate_bag_files[grepl("data/", rocrate_bag_files)]) + ## list files in the original input directory + tmp_dir_files <- list.files(tmp_dir, recursive = TRUE) + ## subset files in the RO-Crate bag, excluding the bag itself + tmp_dir_files <- + tmp_dir_files[!grepl(basename(rocrate_bag_filename), tmp_dir_files)] + ## compare main contents of the RO-Crate bag + expect_equal(rocrate_bag_files, tmp_dir_files) + + # delete temporary directory + unlink(tmp_dir, recursive = TRUE, force = TRUE) + + # check if the temporary directory was successfully deleted + expect_false(dir.exists(tmp_dir)) + + # delete temporary directory used for validation + unlink(file.path(dirname(tmp_dir), "VALIDATION"), + recursive = TRUE, force = TRUE) + expect_false(dir.exists(file.path(dirname(tmp_dir), "VALIDATION"))) +}) + +test_that("is_rocrate_bag works", { + # create basic RO-Crate + basic_crate <- rocrateR::rocrate() + + # create temporary directory + tmp_dir <- file.path(tempdir(), + paste0("rocrate-tests-", digest::digest(Sys.time()))) + dir.create(tmp_dir, showWarnings = FALSE, recursive = TRUE) + + # missing path + expect_error(rocrateR::is_rocrate_bag()) + + # invalid path + expect_error(rocrateR::is_rocrate_bag("/invalid/path")) + + # path to empty directory + expect_error(rocrateR::is_rocrate_bag(tmp_dir)) + + # write RO-Crate to temporary file + tmp_file <- file.path(tmp_dir, "ro-crate-metadata.json") + + # check that the temporary file doesn't exist + expect_false(file.exists(tmp_file)) + + # write to temporary file + basic_crate |> + rocrateR::write_rocrate(path = tmp_file) + + # check that the temporary file exists + expect_true(file.exists(tmp_file)) + + # try to bag RO-Crate without overwriting previous one + expect_error(rocrateR::bag_rocrate(basic_crate, path = tmp_dir)) + + # try to bag RO-Crate overwriting previous one + expect_message( + expect_warning(rocrate_bag_filename <- basic_crate |> + rocrateR::bag_rocrate(path = tmp_dir, overwrite = TRUE) + ) + ) + + # check that the RO-Crate bag exists + expect_true(file.exists(rocrate_bag_filename)) + + # check that the created object is a valid RO-Crate bag + expect_message( + basic_crate_from_bag <- rocrateR::is_rocrate_bag(rocrate_bag_filename) + ) + + # compare object read from the bag and original RO-Crate + expect_equal(basic_crate_from_bag, basic_crate) + + # extract RO-Crate bag + expect_message( + rocrate_bag_contents <- rocrateR::unbag_rocrate(rocrate_bag_filename) + ) + # delete the tagmanifest file and validate RO-Crate bag + expect_true(file.exists(file.path(rocrate_bag_contents, "tagmanifest-sha512.txt"))) + unlink(file.path(rocrate_bag_contents, "tagmanifest-sha512.txt")) + expect_false(file.exists(file.path(rocrate_bag_contents, "tagmanifest-sha512.txt"))) + expect_message( + basic_crate_from_bag <- rocrateR::is_rocrate_bag(rocrate_bag_contents) + ) + + # create invalid bag for testing purposes + dir.create(file.path(tmp_dir, "INVALID/data"), recursive = TRUE, + showWarnings = FALSE) + # create skeleton with empty files + idx <- file.path(tmp_dir, "INVALID", + c("bagit.txt", "manifest-sha512.txt", "tagmanifest-sha512.txt")) |> + file.create(showWarnings = FALSE) + # create data dir + dir.create(file.path(tmp_dir, "INVALID/data"), + showWarnings = FALSE, + recursive = TRUE) + idx <- file.path(tmp_dir, "INVALID/data/ro-crate-metadata.json") |> + file.create(showWarnings = FALSE) + # populate invalid manifest and tagmanifest files + writeLines("1234 data/ro-crate-metadata.json", + file.path(tmp_dir, "INVALID/manifest-sha512.txt")) + writeLines("1234 bagit.txt", + file.path(tmp_dir, "INVALID/tagmanifest-sha512.txt")) + # check invalid RO-Crate bag + expect_error(rocrateR::is_rocrate_bag(file.path(tmp_dir, "INVALID"))) + + # delete temporary directory + unlink(tmp_dir, recursive = TRUE, force = TRUE) + + # check if the temporary directory was successfully deleted + expect_false(dir.exists(tmp_dir)) +}) + +test_that("unbag_rocrate works", { + # create basic RO-Crate + basic_crate <- rocrateR::rocrate() + + # create temporary directory + tmp_dir <- file.path(tempdir(), + paste0("rocrate-tests-", digest::digest(Sys.time()))) + dir.create(tmp_dir, showWarnings = FALSE, recursive = TRUE) + + # missing path + expect_error(rocrateR::unbag_rocrate()) + + # invalid path + expect_error(rocrateR::unbag_rocrate("/invalid/path")) + + # path to empty directory + expect_error(rocrateR::unbag_rocrate(tmp_dir)) + + # write RO-Crate to temporary file + tmp_file <- file.path(tmp_dir, "ro-crate-metadata.json") + + # check that the temporary file doesn't exist + expect_false(file.exists(tmp_file)) + + # write to temporary file + basic_crate |> + rocrateR::write_rocrate(path = tmp_file) + + # check that the temporary file exists + expect_true(file.exists(tmp_file)) + + # try to unbag non-zipped file + expect_error(rocrateR::unbag_rocrate(file.path(tmp_file))) + + # try to bag RO-Crate overwriting previous one + expect_message( + expect_warning(rocrate_bag_filename <- basic_crate |> + rocrateR::bag_rocrate(path = tmp_dir, overwrite = TRUE) + ) + ) + + # check that the RO-Crate bag exists + expect_true(file.exists(rocrate_bag_filename)) + + rocrate_bag_files <- rocrateR::unbag_rocrate(rocrate_bag_filename, + output = tmp_dir) + + # read RO-Crate metadata descriptor file + basic_crate_from_bag <- file.path(rocrate_bag_files, + "data/ro-crate-metadata.json") |> + rocrateR::read_rocrate() + + # compare with the original RO-Crate + expect_equal(basic_crate_from_bag, basic_crate) + + # add new directory in root of RO-Crate + dir.create(file.path(rocrate_bag_files, "not_a_crate")) + # create new zip file with the additional directory + new_roc_zip_file <- file.path(dirname(rocrate_bag_files), "test_roc2.zip") + expect_false(file.exists(new_roc_zip_file)) + zip::zip(new_roc_zip_file, rocrate_bag_files, mode = "cherry-pick") + expect_true(file.exists(new_roc_zip_file)) + expect_error( + temp_roc_files <- rocrateR::unbag_rocrate(new_roc_zip_file) + ) + # delete new zip + unlink(new_roc_zip_file, force = TRUE) + expect_false(file.exists(new_roc_zip_file)) + + # delete temporary directory + unlink(tmp_dir, recursive = TRUE, force = TRUE) + + # check if the temporary directory was successfully deleted + expect_false(dir.exists(tmp_dir)) +}) diff --git a/tests/testthat/test-utils-entity.R b/tests/testthat/test-utils-entity.R index 1530bf9..fd33d95 100644 --- a/tests/testthat/test-utils-entity.R +++ b/tests/testthat/test-utils-entity.R @@ -9,5 +9,5 @@ test_that(".validate_entity works", { # create RO-Crate with entity missing @type incomplete_entity <- list(`@id` = 123456) - expect_error(rocrateR::rocrate(incomplete_entity)) + expect_warning(rocrateR::rocrate(incomplete_entity)) }) diff --git a/tests/testthat/test-utils-rocrate.R b/tests/testthat/test-utils-rocrate.R index 398c9b2..53de92b 100644 --- a/tests/testthat/test-utils-rocrate.R +++ b/tests/testthat/test-utils-rocrate.R @@ -25,11 +25,27 @@ test_that("is_rocrate works", { # modify entity to remove @type basic_crate$`@graph`[[1]]$`@type` <- NULL - expect_error(basic_crate |> - rocrateR::is_rocrate()) - + expect_error( + expect_warning(basic_crate |> rocrateR::is_rocrate()) + ) + # set invalid context value basic_crate$`@context` <- "My awesome, but non-standard context" - expect_error(basic_crate |> - rocrateR::is_rocrate()) + expect_error( + expect_warning( + basic_crate |> rocrateR::is_rocrate() + ) + ) + + # drop @graph from a valid RO-Crate + basic_crate_v4 <- rocrateR::rocrate() + basic_crate_v4$`@graph` <- NULL + expect_error(rocrateR::is_rocrate(basic_crate_v4)) + + # drop @type from one of the entities + basic_crate_v5 <- rocrateR::rocrate() + basic_crate_v5$`@graph`[[2]]$`@type` <- NULL + expect_error( + expect_warning(rocrateR::is_rocrate(basic_crate_v5)) + ) })