diff --git a/docs/build/.pages b/docs/build/.pages index bcedf2a48..4c705c59f 100644 --- a/docs/build/.pages +++ b/docs/build/.pages @@ -3,6 +3,7 @@ nav: - Introduction to the User Interface: introduction-to-the-user-interface - Rule Operators: rule-operators - Define Prefixes / Namespaces: define-prefixes-namespaces + - Task and Operator Reference: reference - Cool IRIs: cool-iris - Lift Tabular Data: lift-data-from-tabular-data-such-as-csv-xslx-or-database-tables - Lift Hierarchical Data: lift-data-from-json-and-xml-sources @@ -14,4 +15,4 @@ nav: - Link Intrusion Detection Systems to Open-Source INTelligence: tutorial-how-to-link-ids-to-osint - Project and Global Variables: variables - Evaluate Template Operator: evaluate-template - - Build Knowledge Graphs from Kafka Topics: kafka-consumer \ No newline at end of file + - Build Knowledge Graphs from Kafka Topics: kafka-consumer diff --git a/docs/build/reference/.pages b/docs/build/reference/.pages new file mode 100644 index 000000000..d7e693ba7 --- /dev/null +++ b/docs/build/reference/.pages @@ -0,0 +1,7 @@ +nav: + - "Task and Operator Reference": index.md + - "Aggregators": aggregator + - "Custom Workflow Tasks": customtask + - "Datasets": dataset + - "Distance Measures": distancemeasure + - "Transformers": transformer \ No newline at end of file diff --git a/docs/build/reference/aggregator/.pages b/docs/build/reference/aggregator/.pages new file mode 100644 index 000000000..77d39f480 --- /dev/null +++ b/docs/build/reference/aggregator/.pages @@ -0,0 +1,11 @@ +nav: + - index.md + - "And": min.md + - "Average": average.md + - "Euclidian distance": quadraticMean.md + - "First non-empty score": firstNonEmpty.md + - "Geometric mean": geometricMean.md + - "Handle missing values": handleMissingValues.md + - "Negate": negate.md + - "Or": max.md + - "Scale": scale.md \ No newline at end of file diff --git a/docs/build/reference/aggregator/average.md b/docs/build/reference/aggregator/average.md new file mode 100644 index 000000000..7ca353c73 --- /dev/null +++ b/docs/build/reference/aggregator/average.md @@ -0,0 +1,43 @@ +--- +title: "Average" +description: "Computes the weighted average." +icon: octicons/cross-reference-24 +tags: +--- +# Average + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Computes the arithmetic mean of all similarity scores: + +* Input values: [0.4, 0.5, 0.9] +* Returns: `0.6` + + +--- +#### Multiplies individual similarity scores with their weight before averaging: + +* Weights: [1, 1, 2] +* Input values: [0.3, 0.5, 0.6] +* Returns: `0.5` + + +--- +#### Missing scores always lead to an output of none: + +* Input values: [-1.0, (none), 1.0] +* Returns: `(none)` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/aggregator/firstNonEmpty.md b/docs/build/reference/aggregator/firstNonEmpty.md new file mode 100644 index 000000000..1eeb08f14 --- /dev/null +++ b/docs/build/reference/aggregator/firstNonEmpty.md @@ -0,0 +1,28 @@ +--- +title: "First non-empty score" +description: "Forwards the first input that provides a non-empty similarity score." +icon: octicons/cross-reference-24 +tags: +--- +# First non-empty score + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### The first defined score is returned, even if it's not the highest score: + +* Input values: [(none), 0.2, 0.5] +* Returns: `0.2` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/aggregator/geometricMean.md b/docs/build/reference/aggregator/geometricMean.md new file mode 100644 index 000000000..b6f200387 --- /dev/null +++ b/docs/build/reference/aggregator/geometricMean.md @@ -0,0 +1,68 @@ +--- +title: "Geometric mean" +description: "Compute the (weighted) geometric mean." +icon: octicons/cross-reference-24 +tags: +--- +# Geometric mean + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Weights: [1, 2, 1] +* Input values: [0.0, 0.0, 0.0] +* Returns: `0.0` + + +--- +#### Example 2: + +* Weights: [1, 2, 1] +* Input values: [1.0, 1.0, 1.0] +* Returns: `1.0` + + +--- +#### Example 3: + +* Weights: [2, 1] +* Input values: [0.5, 1.0] +* Returns: `0.629961` + + +--- +#### Example 4: + +* Weights: [2, 1, 5] +* Input values: [0.5, 1.0, 0.7] +* Returns: `0.672866` + + +--- +#### Example 5: + +* Weights: [10, 2, 3] +* Input values: [0.1, 0.9, 0.2] +* Returns: `0.153971` + + +--- +#### Missing scores always lead to an output of none: + +* Input values: [-1.0, (none), 1.0] +* Returns: `(none)` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/aggregator/handleMissingValues.md b/docs/build/reference/aggregator/handleMissingValues.md new file mode 100644 index 000000000..66337d825 --- /dev/null +++ b/docs/build/reference/aggregator/handleMissingValues.md @@ -0,0 +1,46 @@ +--- +title: "Handle missing values" +description: "Generates a default similarity score, if no similarity score is provided (e.g., due to missing values). Using this operator can have a performance impact, since it lowers the efficiency of the underlying computation." +icon: octicons/cross-reference-24 +tags: +--- +# Handle missing values + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Forwards input similarity scores: + +* Input values: [0.1] +* Returns: `0.1` + + +--- +#### Outputs the default score, if no input score is provided: + +* Parameters + * *defaultValue*: `1.0` + +* Input values: [(none)] +* Returns: `1.0` + + + + +## Parameter + +### Default value + +The default value to be generated, if no similarity score is provided. Must be a value between -1 (inclusive) and 1 (inclusive). '1' represents boolean true and '-1' represents boolean false. + +- Datatype: `double` +- Default Value: `-1.0` + + + diff --git a/docs/build/reference/aggregator/index.md b/docs/build/reference/aggregator/index.md new file mode 100644 index 000000000..3bda643c8 --- /dev/null +++ b/docs/build/reference/aggregator/index.md @@ -0,0 +1,25 @@ +--- +title: "Aggregators" +icon: octicons/cross-reference-24 +tags: + - Build + - Reference +--- +# Aggregators + + +This kind of task aggregates multiple similarity scores. + +**:octicons-people-24: Intended audience:** Linked Data Experts and Domain Experts + +| Name | Description | +|------------------------:| :--------- | +|[And](min.md) | All input scores must be within the threshold. Selects the minimum score. | +|[Average](average.md) | Computes the weighted average. | +|[Euclidian distance](quadraticMean.md) | Calculates the Euclidian distance. | +|[First non-empty score](firstNonEmpty.md) | Forwards the first input that provides a non-empty similarity score. | +|[Geometric mean](geometricMean.md) | Compute the (weighted) geometric mean. | +|[Handle missing values](handleMissingValues.md) | Generates a default similarity score, if no similarity score is provided (e.g., due to missing values). Using this operator can have a performance impact, since it lowers the efficiency of the underlying computation. | +|[Negate](negate.md) | Negates the result of the input comparison. A single input is expected. Using this operator can have a performance impact, since it lowers the efficiency of the underlying computation. | +|[Or](max.md) | At least one input score must be within the threshold. Selects the maximum score. | +|[Scale](scale.md) | Scales a similarity score by a factor. | diff --git a/docs/build/reference/aggregator/max.md b/docs/build/reference/aggregator/max.md new file mode 100644 index 000000000..9b3587afe --- /dev/null +++ b/docs/build/reference/aggregator/max.md @@ -0,0 +1,50 @@ +--- +title: "Or" +description: "At least one input score must be within the threshold. Selects the maximum score." +icon: octicons/cross-reference-24 +tags: +--- +# Or + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Selects the maximum similarity score: + +* Input values: [0.5, 0.0] +* Returns: `0.5` + + +--- +#### Selects the maximum similarity score: + +* Input values: [-1.0, -0.5, -0.3] +* Returns: `-0.3` + + +--- +#### Missing scores default to a similarity score of -1: + +* Input values: [(none)] +* Returns: `-1.0` + + +--- +#### Weights are ignored: + +* Weights: [1000, 0] +* Input values: [1.0, 0.0] +* Returns: `1.0` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/aggregator/min.md b/docs/build/reference/aggregator/min.md new file mode 100644 index 000000000..a8aacd472 --- /dev/null +++ b/docs/build/reference/aggregator/min.md @@ -0,0 +1,50 @@ +--- +title: "And" +description: "All input scores must be within the threshold. Selects the minimum score." +icon: octicons/cross-reference-24 +tags: +--- +# And + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Selects the minimum similarity score: + +* Input values: [1.0, 0.0] +* Returns: `0.0` + + +--- +#### Selects the minimum similarity score: + +* Input values: [-1.0, 0.0, 0.5, 1.0] +* Returns: `-1.0` + + +--- +#### Missing scores default to a similarity score of -1: + +* Input values: [1.0, (none), -0.5] +* Returns: `-1.0` + + +--- +#### Weights are ignored: + +* Weights: [1000, 0] +* Input values: [1.0, 0.0] +* Returns: `0.0` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/aggregator/negate.md b/docs/build/reference/aggregator/negate.md new file mode 100644 index 000000000..9f1bf96a6 --- /dev/null +++ b/docs/build/reference/aggregator/negate.md @@ -0,0 +1,16 @@ +--- +title: "Negate" +description: "Negates the result of the input comparison. A single input is expected. Using this operator can have a performance impact, since it lowers the efficiency of the underlying computation." +icon: octicons/cross-reference-24 +tags: +--- +# Negate + + + + +Negates the result of the input comparison. A single input is expected. Using this operator can have a performance impact, since it lowers the efficiency of the underlying computation. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/aggregator/quadraticMean.md b/docs/build/reference/aggregator/quadraticMean.md new file mode 100644 index 000000000..979568842 --- /dev/null +++ b/docs/build/reference/aggregator/quadraticMean.md @@ -0,0 +1,76 @@ +--- +title: "Euclidian distance" +description: "Calculates the Euclidian distance." +icon: octicons/cross-reference-24 +tags: +--- +# Euclidian distance + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Weights: [1, 1, 1] +* Input values: [1.0, 1.0, 1.0] +* Returns: `1.0` + + +--- +#### Example 2: + +* Weights: [1, 1] +* Input values: [1.0, 0.0] +* Returns: `0.707107` + + +--- +#### Example 3: + +* Weights: [1, 1, 1] +* Input values: [0.4, 0.5, 0.6] +* Returns: `0.506623` + + +--- +#### Example 4: + +* Weights: [1, 1] +* Input values: [0.0, 0.0] +* Returns: `0.0` + + +--- +#### Example 5: + +* Weights: [2, 1, 1] +* Input values: [1.0, 0.0, 0.0] +* Returns: `0.707107` + + +--- +#### Example 6: + +* Weights: [1, 2, 3] +* Input values: [0.4, 0.5, 0.6] +* Returns: `0.538516` + + +--- +#### Missing scores always lead to an output of none: + +* Input values: [-1.0, (none), 1.0] +* Returns: `(none)` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/aggregator/scale.md b/docs/build/reference/aggregator/scale.md new file mode 100644 index 000000000..6dd4e76fd --- /dev/null +++ b/docs/build/reference/aggregator/scale.md @@ -0,0 +1,53 @@ +--- +title: "Scale" +description: "Scales a similarity score by a factor." +icon: octicons/cross-reference-24 +tags: +--- +# Scale + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Scales similarity scores by the specified factor: + +* Parameters + * *factor*: `0.5` + +* Input values: [1.0] +* Returns: `0.5` + + +--- +#### Ignores missing values: + +* Input values: [(none)] +* Returns: `(none)` + + +--- +#### Throws a validation error if more than one input is provided: + +* Input values: [0.1, 0.2] +* Returns: `(none)` + + + + +## Parameter + +### Factor + +All input similarity values are multiplied with this factor. + +- Datatype: `double` +- Default Value: `1.0` + + + diff --git a/docs/build/reference/customtask/.pages b/docs/build/reference/customtask/.pages new file mode 100644 index 000000000..0471513ed --- /dev/null +++ b/docs/build/reference/customtask/.pages @@ -0,0 +1,67 @@ +nav: + - index.md + - "Add project files": addProjectFiles.md + - "Cancel Workflow": CancelWorkflow.md + - "Combine CSV files": combine-csv.md + - "Concatenate to file": ConcatenateToFile.md + - "Create Embeddings": cmem_plugin_llm-CreateEmbeddings.md + - "Create/Update Salesforce Objects": cmem_plugin_salesforce-workflow-operations-SobjectCreate.md + - "Delete project files": deleteProjectFiles.md + - "Distinct by": DistinctBy.md + - "Download file": downloadFile.md + - "Download Nextcloud files": cmem_plugin_nextcloud-Download.md + - "Download Office 365 Files": cmem_plugin_office365-Download.md + - "Download SSH files": cmem_plugin_ssh-Download.md + - "Evaluate template": Template.md + - "Execute commands via SSH": cmem_plugin_ssh-Execute.md + - "Execute Instructions": cmem_plugin_llm-ExecuteInstructions.md + - "Execute REST requests": eccencaRestOperator.md + - "Execute Spark function": SparkFunction.md + - "Extract from PDF files": cmem_plugin_pdf_extract-pdf_extract-PdfExtract.md + - "Generate base36 IRDIs": cmem_plugin_irdi-workflow-irdi_plugin-IrdiPlugin.md + - "Generate SHACL shapes from data": cmem_plugin_shapes-plugin_shapes-ShapesPlugin.md + - "Get project files": getProjectFiles.md + - "GraphQL query": cmem_plugin_graphql-workflow-graphql-GraphQLPlugin.md + - "Join tables": Merge.md + - "jq": cmem-plugin-jq-workflow.md + - "JQL query": cmem_plugin_jira-JqlQuery.md + - "Kafka Consumer (Receive Messages)": cmem_plugin_kafka-ReceiveMessages.md + - "Kafka Producer (Send Messages)": cmem_plugin_kafka-SendMessages.md + - "List Nextcloud files": cmem_plugin_nextcloud-List.md + - "List Office 365 Files": cmem_plugin_office365-List.md + - "List project files": cmem_plugin_project_resources-List.md + - "List SSH files": cmem_plugin_ssh-List.md + - "Merge tables": MultiTableMerge.md + - "Normalize units of measurement": ucumNormalizationTask.md + - "OAuth2 Authentication": cmem_plugin_auth-workflow-auth-OAuth2.md + - "Office 365 Upload Files": cmem_plugin_office365-Upload.md + - "Parse JSON": JsonParserOperator.md + - "Parse XML": XmlParserOperator.md + - "Parse YAML": cmem_plugin_yaml-parse.md + - "Pivot": Pivot.md + - "Request RDF triples": tripleRequestOperator.md + - "Scheduler": Scheduler.md + - "Search addresses": SearchAddresses.md + - "Search Vector Embeddings": cmem_plugin_pgvector-Search.md + - "Send eMail": SendEMail.md + - "Send Mattermost messages": cmem_plugin_mattermost.md + - "Set or Overwrite parameters": cmem_plugin_parameters-ParametersPlugin.md + - "SHACL validation with pySHACL": shacl-pyshacl.md + - "SOQL query (Salesforce)": cmem_plugin_salesforce-SoqlQuery.md + - "SPARQL Construct query": sparqlCopyOperator.md + - "SPARQL Select query": sparqlSelectOperator.md + - "SPARQL Update query": sparqlUpdateOperator.md + - "Split file": cmem_plugin_splitfile-plugin_splitfile-SplitFilePlugin.md + - "SQL query": CustomSQLExecution.md + - "Start Workflow per Entity": cmem_plugin_loopwf-task-StartWorkflow.md + - "Store Vector Embeddings": cmem_plugin_pgvector-Store.md + - "Unpivot": Unpivot.md + - "Update SemSpect": cmem_plugin_semspect-task-Update.md + - "Upload File to Knowledge Graph": eccencaDataPlatformGraphStoreFileUploadOperator.md + - "Upload files to Nextcloud": cmem_plugin_nextcloud-Upload.md + - "Upload local files": cmem_plugin_project_resources-UploadLocalFiles.md + - "Upload SSH files": cmem_plugin_ssh-Upload.md + - "Validate Entities": cmem_plugin_validation-validate-ValidateEntities.md + - "Validate Knowledge Graph": cmem_plugin_validation-validate-ValidateGraph.md + - "Validate XML": validateXsdOperator.md + - "XSLT": xsltOperator.md \ No newline at end of file diff --git a/docs/build/reference/customtask/CancelWorkflow.md b/docs/build/reference/customtask/CancelWorkflow.md new file mode 100644 index 000000000..7794ca2d5 --- /dev/null +++ b/docs/build/reference/customtask/CancelWorkflow.md @@ -0,0 +1,52 @@ +--- +title: "Cancel Workflow" +description: "Cancels a workflow if a specified condition is fulfilled." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Cancel Workflow + + + + +Cancels a workflow if a specified condition is fulfilled. + +## Parameter + +### Type URI + +The entity type to check the condition on. + +- Datatype: `uri` +- Default Value: `None` + + + +### Condition + +The cancellation condition + +- Datatype: `enumeration` +- Default Value: `empty` + + + +### Invert condition + +If true, the specified condition will be inverted, i.e., the workflow execution will be cancelled if the condition is not fulfilled. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Fail workflow + +If true, the workflow execution will fail if the condition is met. If false, the workflow execution would be stopped, but shown as successfull. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/customtask/ConcatenateToFile.md b/docs/build/reference/customtask/ConcatenateToFile.md new file mode 100644 index 000000000..d2b49fd87 --- /dev/null +++ b/docs/build/reference/customtask/ConcatenateToFile.md @@ -0,0 +1,79 @@ +--- +title: "Concatenate to file" +description: "Concatenates values into a file." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Concatenate to file + + + + +Concatenates values into a file. + +## Parameter + +### Path + +Values from this path will be concatenated. + +- Datatype: `string` +- Default Value: `None` + + + +### Mime type + +MIME type of the output file. + +- Datatype: `string` +- Default Value: `None` + + + +### Prefix + +Prefix to be written before the first value. + +- Datatype: `multiline string` +- Default Value: `None` + + + +### Glue + +Separator to be inserted between concatenated values. + +- Datatype: `multiline string` +- Default Value: `None` + + + +### Suffix + +Suffix to be written after the last value. + +- Datatype: `multiline string` +- Default Value: `None` + + + +### Charset + +The file encoding. + +- Datatype: `string` +- Default Value: `UTF-8` + + + +### File extension + +File extension of the output file. + +- Datatype: `string` +- Default Value: `.tmp` + + + diff --git a/docs/build/reference/customtask/CustomSQLExecution.md b/docs/build/reference/customtask/CustomSQLExecution.md new file mode 100644 index 000000000..f25e56d86 --- /dev/null +++ b/docs/build/reference/customtask/CustomSQLExecution.md @@ -0,0 +1,25 @@ +--- +title: "SQL query" +description: "Executes a custom SQL query on the first input dataset and returns the result as its output." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# SQL query + + + + +Executes a custom SQL query on the first input dataset and returns the result as its output. + +## Parameter + +### Command + +SQL command. The name of the table in the statement must be 'dataset', regardless the input. + +- Datatype: `code-sql` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/DistinctBy.md b/docs/build/reference/customtask/DistinctBy.md new file mode 100644 index 000000000..a6c3b8bb5 --- /dev/null +++ b/docs/build/reference/customtask/DistinctBy.md @@ -0,0 +1,34 @@ +--- +title: "Distinct by" +description: "Removes duplicated entities based on a user-defined path. Note that this operator does not retain the order of the entities." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Distinct by + + + + +Removes duplicated entities based on a user-defined path. Note that this operator does not retain the order of the entities. + +## Parameter + +### Distinct path + +Entities that share this path will be deduplicated. + +- Datatype: `string` +- Default Value: `None` + + + +### Resolve duplicates + +Strategy to resolve duplicates. + +- Datatype: `enumeration` +- Default Value: `keepLast` + + + diff --git a/docs/build/reference/customtask/JsonParserOperator.md b/docs/build/reference/customtask/JsonParserOperator.md new file mode 100644 index 000000000..ab2a9036f --- /dev/null +++ b/docs/build/reference/customtask/JsonParserOperator.md @@ -0,0 +1,52 @@ +--- +title: "Parse JSON" +description: "Parses an incoming entity as a JSON dataset. Typically, it is used before a transformation task. Takes exactly one input of which only the first entity is processed." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Parse JSON + + + + +Parses an incoming entity as a JSON dataset. Typically, it is used before a transformation task. Takes exactly one input of which only the first entity is processed. + +## Parameter + +### Input path + +The Silk path expression of the input entity that contains the JSON document. If not set, the value of the first defined property will be taken. + +- Datatype: `string` +- Default Value: `None` + + + +### Base path + +The path to the elements to be read, starting from the root element, e.g., '/Persons/Person'. If left empty, all direct children of the root element will be read. + +- Datatype: `string` +- Default Value: `None` + + + +### URI suffix pattern + +A URI pattern that is relative to the base URI of the input entity, e.g., /{ID}, where {path} may contain relative paths to elements. This relative part is appended to the input entity URI to construct the full URI pattern. + +- Datatype: `string` +- Default Value: `None` + + + +### Navigate into arrays + +Navigate into arrays automatically. If set to false, the `#array` path operator must be used to navigate into arrays. + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/customtask/Merge.md b/docs/build/reference/customtask/Merge.md new file mode 100644 index 000000000..21302cb70 --- /dev/null +++ b/docs/build/reference/customtask/Merge.md @@ -0,0 +1,17 @@ +--- +title: "Join tables" +description: "Joins a set of inputs into a single table. Expects a list of entity tables and links. All entity tables are joined into the first entity table using the provided links." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Join tables + + + + +Joins a set of inputs into a single table. Expects a list of entity tables and links. All entity tables are joined into the first entity table using the provided links. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/customtask/MultiTableMerge.md b/docs/build/reference/customtask/MultiTableMerge.md new file mode 100644 index 000000000..d3cab1c60 --- /dev/null +++ b/docs/build/reference/customtask/MultiTableMerge.md @@ -0,0 +1,52 @@ +--- +title: "Merge tables" +description: "Stores sets of instance and mapping inputs as relational tables with the mapping as an n:m relation. Expects a list of entity tables and links. All entity tables have a relation to the first entity table using the provided links." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Merge tables + + + + +Stores sets of instance and mapping inputs as relational tables with the mapping as an n:m relation. Expects a list of entity tables and links. All entity tables have a relation to the first entity table using the provided links. + +## Parameter + +### Multi table output + +test + +- Datatype: `boolean` +- Default Value: `true` + + + +### Pivot table name + +Name of the pivot table. + +- Datatype: `string` +- Default Value: `None` + + + +### Mapping names + +Name of the mapping tables. Comma separated list. + +- Datatype: `string` +- Default Value: `None` + + + +### Instance set names + +Name of the tables joined to the pivot. Comma separated list. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/Pivot.md b/docs/build/reference/customtask/Pivot.md new file mode 100644 index 000000000..1e1ce64d5 --- /dev/null +++ b/docs/build/reference/customtask/Pivot.md @@ -0,0 +1,70 @@ +--- +title: "Pivot" +description: "The pivot operator takes data in separate rows, aggregates it and converts it into columns. The operator works on a flat input schema only and creates a flat output schema. A pivot table is a data summarization that is used to automatically sort, count, total, or average data in a dataset. It allows you to view the data from a different perspective. The following aggregation (summary) functions are available: - **first** - Shows the first value (works with numbers and strings) - **min** - Shows the lowest value (works with numbers and strings) - **max** - Shows the highest value (works with numbers and strings) - **sum** - Adds up the values (works with numbers only) - **average** - Finds the average of the values (works with numbers only)" +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Pivot + + + + +The pivot operator takes data in separate rows, aggregates it and converts it into columns. The operator works on a flat input schema only and creates a flat output schema. A pivot table is a data summarization that is used to automatically sort, count, total, or average data in a dataset. It allows you to view the data from a different perspective. The following aggregation (summary) functions are available: - **first** - Shows the first value (works with numbers and strings) - **min** - Shows the lowest value (works with numbers and strings) - **max** - Shows the highest value (works with numbers and strings) - **sum** - Adds up the values (works with numbers only) - **average** - Finds the average of the values (works with numbers only) + +## Parameter + +### Pivot property + +The pivot column refers to the column in the input data that is used to organize the data along the horizontal axis of the pivot table. + +- Datatype: `string` +- Default Value: `None` + + + +### First group property + +The name of the first group column in the range. All columns starting with this will be grouped. + +- Datatype: `string` +- Default Value: `None` + + + +### Last group property + +The name of the last group column in the range. If left empty, only the first column is grouped. + +- Datatype: `string` +- Default Value: `None` + + + +### Value property + +The property that contains the grouped values that will be aggregated. + +- Datatype: `string` +- Default Value: `None` + + + +### Aggregation function + +The aggregation function used to aggregate values. + +- Datatype: `enumeration` +- Default Value: `sum` + + + +### URI prefix + +Prefix to prepend to all generated pivot columns. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/Scheduler.md b/docs/build/reference/customtask/Scheduler.md new file mode 100644 index 000000000..7bd53226f --- /dev/null +++ b/docs/build/reference/customtask/Scheduler.md @@ -0,0 +1,61 @@ +--- +title: "Scheduler" +description: "Executes a workflow at specified intervals." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Scheduler + + + + +Executes a workflow at specified intervals. + +## Parameter + +### Workflow + +The name of the workflow to be executed + +- Datatype: `task` +- Default Value: `None` + + + +### Interval + +The interval at which the scheduler should run the referenced task. Must be in ISO-8601 duration format PnDTnHnMn.nS + +- Datatype: `duration` +- Default Value: `PT15M` + + + +### Start time + +The time when the scheduled task is run for the first time, e.g., 2017-12-03T10:15:30. If no start time is set, midnight on the day the scheduler is started is assumed. + +- Datatype: `string` +- Default Value: `None` + + + +### Enabled + +Enables or disables the scheduler. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Stop on error + +If true, this will stop the scheduler, so the failed task is not scheduled again for execution. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/customtask/SearchAddresses.md b/docs/build/reference/customtask/SearchAddresses.md new file mode 100644 index 000000000..09974bfbb --- /dev/null +++ b/docs/build/reference/customtask/SearchAddresses.md @@ -0,0 +1,85 @@ +--- +title: "Search addresses" +description: "Looks up locations from textual descriptions using the configured geocoding API. Outputs results as RDF." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Search addresses + + + + + +**Configuration** + +The geocoding service to be queried for searches can be set up in the configuration. +The default configuration is as follows: + + com.eccenca.di.geo = { + # The URL of the geocoding service + # url = "https://nominatim.eccenca.com/search" + url = "https://photon.komoot.de/api" + # url = https://api-adresse.data.gouv.fr/search + + # Additional URL parameters to be attached to all HTTP search requests. Example: '&countrycodes=de&addressdetails=1'. + # Will be attached in addition to the parameters set on each search operator directly. + searchParameters = "" + + # The minimum pause time between subsequent queries + pauseTime = 1s + + # Number of coordinates to be cached in-memory + cacheSize = 10 + } + +In general, all services adhering to the [Nominatim search API](https://nominatim.org/release-docs/develop/api/Search/) should be usable. +Please note that when using public services, the pause time should be set to avoid overloading. + +**Logging** + +By default, individual requests to the geocoding service are not logged. To enable logging each request, the following configuration option can be set: + + logging.level { + com.eccenca.di.geo=DEBUG + } + + +## Parameter + +### Search attributes + +List of attributes that contain search terms. Multiple attributes (comma-separated) will be concatenated into a single search. + +- Datatype: `traversable[string]` +- Default Value: `None` + + + +### Limit + +Optionally limits the number of results for each search. + +- Datatype: `option[int]` +- Default Value: `None` + + + +### JSON-LD context + +Optional JSON-LD context to be used for converting the returned JSON to RDF. If not provided, a default context will be used. + +- Datatype: `resource` +- Default Value: `None` + + + +### Additional parameters + +Additional URL parameters to be attached to each HTTP search request. Example: '&countrycodes=de&addressdetails=1'. Consult the API documentation for a list of available parameters. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/SendEMail.md b/docs/build/reference/customtask/SendEMail.md new file mode 100644 index 000000000..91f938c99 --- /dev/null +++ b/docs/build/reference/customtask/SendEMail.md @@ -0,0 +1,160 @@ +--- +title: "Send eMail" +description: "Sends an eMail using an SMTP server. If connected to a dataset that is based on a file in a workflow, it will send that file whenever the workflow is executed It can be used to send the result of a workflow via Mail." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Send eMail + + + + +Sends an eMail using an SMTP server. If connected to a dataset that is based on a file in a workflow, it will send that file whenever the workflow is executed It can be used to send the result of a workflow via Mail. + +## Parameter + +### Host + +The SMTP host, e.g, mail.myProvider.com + +- Datatype: `string` +- Default Value: `None` + + + +### Port + +The SMTP port + +- Datatype: `int` +- Default Value: `587` + + + +### User + +Username + +- Datatype: `string` +- Default Value: `None` + + + +### Password + +Password + +- Datatype: `password` +- Default Value: `None` + + + +### From + +The sender eMail address + +- Datatype: `string` +- Default Value: `None` + + + +### To + +The email addresses of the receivers. Email addresses are comma separated. Names must be quoted when containing commas.Example: john.smith@example.com, "Doe, John" , needs no quoting + +- Datatype: `string` +- Default Value: `None` + + + +### CC + +The CC-receiver eMail address. Email addresses are comma separated. Names must be quoted when containing commas.Example: john.smith@example.com, "Doe, John" , needs no quoting + +- Datatype: `string` +- Default Value: `None` + + + +### BCC + +The BCC-receiver eMail address. Email addresses are comma separated. Names must be quoted when containing commas.Example: john.smith@example.com, "Doe, John" , needs no quoting + +- Datatype: `string` +- Default Value: `None` + + + +### Subject + +The eMail subject + +- Datatype: `string` +- Default Value: `Dataset` + + + +### Message + +The eMail text message + +- Datatype: `multiline string` +- Default Value: `None` + + + +### With attachment + +If enabled a file from the input is attached to the email. A single input to this operator is expected that provides a file, e.g. a file based dataset (XML, JSON etc.). + +- Datatype: `boolean` +- Default Value: `true` + + + +### Force SSL + +When enabled a SSL/TLS connection will be forced from the start without negotiation with the server. Not to be confused with STARTTLS which upgrades an insecure connection to a SSL/TLS connection, which is done by default. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Timeout + +Timeout in milliseconds to establish a connection or wait for a server response. Setting it to 0 or negative number will disable the timeout. + +- Datatype: `int` +- Default Value: `10000` + + + +### Read e-mail properties from input + +When enabled this allows to send multiple e-mails. All e-mail configurations are input via the first operator input with each entry representing a different e-mail. The optional second input can be a file based dataset for the attachment. E-mail parameters that can be overwritten are: from, receiver, cc, bcc, subject and message. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Nr. of retries + +The number of retries per email when send errors are encountered. + +- Datatype: `int` +- Default Value: `2` + + + +### Delay between e-mails (ms) + +The delay in milliseconds between sending two consecutive e-mails. This applies to the retry mechanism, but also to sending multiple e-mails. + +- Datatype: `int` +- Default Value: `2` + + + diff --git a/docs/build/reference/customtask/SparkFunction.md b/docs/build/reference/customtask/SparkFunction.md new file mode 100644 index 000000000..94506b6c0 --- /dev/null +++ b/docs/build/reference/customtask/SparkFunction.md @@ -0,0 +1,43 @@ +--- +title: "Execute Spark function" +description: "Applies a specified Scala function to a specified field. E.g. when the inputField is 'name', the inputFunction is 'any => "Arrrrgh!" and the alias is 'xxx',)' a query corresponding to 'Function existingField1, existingFiled2, ... "Arrrrgh!" as "xxx"' will be generated. If alias is empty the inputField will be overwritten, otherwise a new field will be added and the rest of the schema stays the same." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Execute Spark function + + + + +Applies a specified Scala function to a specified field. E.g. when the inputField is 'name', the inputFunction is 'any => "Arrrrgh!" and the alias is 'xxx',)' a query corresponding to 'Function existingField1, existingFiled2, ... "Arrrrgh!" as "xxx"' will be generated. If alias is empty the inputField will be overwritten, otherwise a new field will be added and the rest of the schema stays the same. + +## Parameter + +### Function + +Scala function expression. + +- Datatype: `multiline string` +- Default Value: `None` + + + +### Input field + +Input field. + +- Datatype: `string` +- Default Value: `None` + + + +### Alias + +Alias. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/Template.md b/docs/build/reference/customtask/Template.md new file mode 100644 index 000000000..7c19b64b5 --- /dev/null +++ b/docs/build/reference/customtask/Template.md @@ -0,0 +1,97 @@ +--- +title: "Evaluate template" +description: "Evaluates a template on a sequence of entities. Can be used after a transformation or directly after datasets that output a single table, such as CSV or Excel." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Evaluate template + + + + + +The template operator supports the Jinja templating language. Documentation about Jinja can be found in the official [Template Designer Documentation](https://jinja.palletsprojects.com/en/2.11.x/templates/). + +Note that support for RDF properties is limited, because Jinja does not support some special characters (in particula colons) in variable names. This makes it impractical to access RDF properties. For this reason, the transformation that precedes the template operator needs to make sure that it generates attributes that are valid Jinja variable names. + +## Default evaluation + +By default, the template is evaluated separately for each entity. +For each input entity, a output entity is generated that provides a single output attribute, which contains the evaluated template. + +*Limitation*: For the default evaluation, accessing nested paths is not supported. If the preceding transformation contains hierarchical mappings, only the attributes from the root mapping can be accessed. + +## Full evaluation + +If 'full evaluation' is enabled, the entire input set will be evaluated at once. + +The entities variable will contain all input entities and can be iterated over: + + {% for entity in entities %} + {{entity.property}} + {% endfor %} + +A single output entity will be generated that contains the evaluated template. + +If the input entities are hierarchical (typically the case if the input transformation is hierarchical), each entity will be hierarchical as well. + +Example iterating over an sequence of books that each contains a list of chapters: + + {% for book in entities %} + Book {{book.title}} + {% for chapter in book.chapter %} + Chapter {{chapter.chapterNumber}} + {% endfor %} + {% endfor %} + +In this example, the child mapping defines a `chapter` target property from which it is accessible from the root entities. If the child mapping allows multiple entities, the value of the property will be a list of entities. + + +## Parameter + +### Template + +The template + +- Datatype: `template` +- Default Value: `None` + + + +### Language + +The template language. Currently, Jinja is supported. + +- Datatype: `string` +- Default Value: `jinja` + + + +### Output attribute + +The attribute in the output that will hold the evaluated template. + +- Datatype: `string` +- Default Value: `output` + + + +### Full evaluation + +If enabled, the entire input set will be evaluated at once. The template will receive a hierarchical 'entities' variable that can be iterated over. A single output entity will be generated that contains the evaluated template. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Forward input attributes + +If true, the input attributes will be forwarded to the output. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/customtask/Unpivot.md b/docs/build/reference/customtask/Unpivot.md new file mode 100644 index 000000000..d4339f1fa --- /dev/null +++ b/docs/build/reference/customtask/Unpivot.md @@ -0,0 +1,61 @@ +--- +title: "Unpivot" +description: "Given a list of table columns, transforms those columns into attribute-value pairs." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Unpivot + + + + +Given a list of table columns, transforms those columns into attribute-value pairs. + +## Parameter + +### First pivot property + +The name of the first pivot column in the range. + +- Datatype: `string` +- Default Value: `None` + + + +### Last pivot property + +the name of the last pivot column in the range. If left empty, all columns starting with the first pivot column are used. + +- Datatype: `string` +- Default Value: `None` + + + +### Attribute property + +The URI of the output column used to hold the attribute. + +- Datatype: `string` +- Default Value: `attribute` + + + +### Value property + +The URI of the output column used to hold the value. + +- Datatype: `string` +- Default Value: `value` + + + +### Pivot columns + +Comma separated list of pivot column names. This property will override all inferred columns of the first two arguments. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/XmlParserOperator.md b/docs/build/reference/customtask/XmlParserOperator.md new file mode 100644 index 000000000..8bfa62627 --- /dev/null +++ b/docs/build/reference/customtask/XmlParserOperator.md @@ -0,0 +1,43 @@ +--- +title: "Parse XML" +description: "Takes exactly one input and reads either the defined inputPath or the first value of the first entity as XML document. Then executes the given output entity schema similar to the XML dataset to construct the result entities." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Parse XML + + + + +Takes exactly one input and reads either the defined inputPath or the first value of the first entity as XML document. Then executes the given output entity schema similar to the XML dataset to construct the result entities. + +## Parameter + +### Input path + +The Silk path expression of the input entity that contains the XML document. If not set, the value of the first defined property will be taken. + +- Datatype: `string` +- Default Value: `None` + + + +### Base path + +The path to the elements to be read, starting from the root element, e.g., '/Persons/Person'. If left empty, all direct children of the root element will be read. + +- Datatype: `string` +- Default Value: `None` + + + +### URI suffix pattern + +A URI pattern that is relative to the base URI of the input entity, e.g., /{ID}, where {path} may contain relative paths to elements. This relative part is appended to the input entity URI to construct the full URI pattern. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/addProjectFiles.md b/docs/build/reference/customtask/addProjectFiles.md new file mode 100644 index 000000000..ae0767315 --- /dev/null +++ b/docs/build/reference/customtask/addProjectFiles.md @@ -0,0 +1,43 @@ +--- +title: "Add project files" +description: "Adds file resources to the project that are piped into the input port." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Add project files + + + + +Adds file resources to the project that are piped into the input port. + +## Parameter + +### File name + +File name of the uploaded file(s). If multiple files are uploaded, an index will be appended to the file name. If left empty, the existing file names will be used. + +- Datatype: `string` +- Default Value: `None` + + + +### Directory + +Directory to which the files should be written. If left empty, the files will be uploaded to the project root directory. Note that all files will be written to this directory even if they have been read from a different project directory initially. + +- Datatype: `string` +- Default Value: `None` + + + +### Overwrite strategy + +The strategy to use if a file with the same name already exists. + +- Datatype: `enumeration` +- Default Value: `fail` + + + diff --git a/docs/build/reference/customtask/cmem-plugin-jq-workflow.md b/docs/build/reference/customtask/cmem-plugin-jq-workflow.md new file mode 100644 index 000000000..ca8246b65 --- /dev/null +++ b/docs/build/reference/customtask/cmem-plugin-jq-workflow.md @@ -0,0 +1,98 @@ +--- +title: "jq" +description: "Process a JSON document with a jq filter / program." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# jq + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +> [jq](https://jqlang.org/) is like sed for JSON data - you can use it to +> slice and filter and map and transform structured data with the same ease that sed, awk, +> grep and friends let you play with text. + +In order to test jq expressions, you can use [play.jqlang.org](https://play.jqlang.org/). + +## Basic concepts: + +- Filters separated by a comma will produce multiple independent outputs: `,` +- Will ignores error if the type is unexpected: `?` +- Array construction: `[]` +- Object construction: `{}` +- Concatenate or Add: `+` +- Difference of sets or Substract: `-` +- Size of selected element: `length` +- Pipes are used to chain commands in a similar fashion than bash: `|` + +## Dealing with JSON objects + +- Display all keys: `keys` +- Adds + 1 to all items: `map_values(.+1)` +- Delete a key: `del(.foo)` +- Convert an object to array: `to_entries | map([.key, .value])` + +## Dealing with fields + +- Concatenate two fields: `fieldNew=.field1+' '+.field2` + +## Dealing with arrays: Slicing and Filtering + +- All: `.[]` +- First: `.[0]` +- Range: `.[2:4]` +- First 3: `.[:3]` +- Last 2: `.[-2:]` +- Before Last: `.[-2]` +- Select array of int by value: `map(select(. >= 2))` +- Select array of objects by value: `.[] | select(.id == "second")` +- Select by type: `.[] | numbers` + +Types can be `arrays`, `objects`, `iterables`, `booleans`, `numbers`, `normals`, + `finites`, `strings`, `nulls`, `values` and `scalars`. + +## Dealing with arrays: Mapping and Transforming + +- Add + 1 to all items: `map(.+1)` +- Delete 2 items: `del(.[1, 2])` +- Concatenate arrays: `add` +- Flatten an array: `flatten` +- Create a range of numbers: `[range(2;4)]` +- Display the type of each item: `map(type)` +- Sort an array of basic type: `sort` +- Sort an array of objects: `sort_by(.foo)` +- Group by a key - opposite to flatten: `group_by(.foo)` +- Minimum value of an array: `min` (see also `max`, `min_by(path_exp)`, `max_by(path_exp)`) +- Remove duplicates: `unique` or `unique_by(.foo)` or `unique_by(length)` +- Reverse an array: `reverse` + + + +## Parameter + +### jq Expression + +The jq program to apply to the input JSON string. + +- Datatype: `string` +- Default Value: `.` + + + +### JSON source which can be used with the validate expression action + + + +- Datatype: `code-json` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_auth-workflow-auth-OAuth2.md b/docs/build/reference/customtask/cmem_plugin_auth-workflow-auth-OAuth2.md new file mode 100644 index 000000000..ccc955a95 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_auth-workflow-auth-OAuth2.md @@ -0,0 +1,84 @@ +--- +title: "OAuth2 Authentication" +description: "Provide an OAuth2 access token for other tasks (via config port)." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# OAuth2 Authentication + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +Provide an OAuth2 access token for other tasks (via config port). + +This task uses the provided client or user credentials and runs an OAuth2 +authorization to the given service URL. It will fetch the output and provide the +token in a way that it can be used by other tasks to access the service. + +Note: The consuming task needs to have the parameter `oauth_access_token` in order to +to use the output this task. You need to connect this task to the +**config port** of the consuming task. + + +## Parameter + +### Grant Type + +Select the used OAuth Grant Type in order to specify how this plugin gets a valid token. Depending on the value of this parameter, other authentication related parameter will become mandatory or obsolete. The following values can be used: - `client_credentials`: - this refers to the OAuth 2.0 Client Credentials Grant Type. Mandatory parameter for this grant type are Client ID and Client Secret. - `password` - this refers to the OAuth 2.0 Password Grant Type. Mandatory variables for this grant type are Client ID, User name and Password. + +- Datatype: `string` +- Default Value: `client_credentials` + + + +### Token Endpoint + +This is the OpenID Connect (OIDC) OAuth 2.0 token endpoint location (a HTTP(S) URL). + +- Datatype: `string` +- Default Value: `None` + + + +### Client ID + +The Client ID obtained during registration. + +- Datatype: `string` +- Default Value: `None` + + + +### Client Secret + +The Client Secret obtained during registration. + +- Datatype: `string` +- Default Value: `None` + + + +### Username + +The user account name used for authentication. + +- Datatype: `string` +- Default Value: `None` + + + +### Password + +The user account password. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_graphql-workflow-graphql-GraphQLPlugin.md b/docs/build/reference/customtask/cmem_plugin_graphql-workflow-graphql-GraphQLPlugin.md new file mode 100644 index 000000000..0418a7447 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_graphql-workflow-graphql-GraphQLPlugin.md @@ -0,0 +1,71 @@ +--- +title: "GraphQL query" +description: "Executes a custom GraphQL query to a GraphQL endpoint and saves result to a JSON dataset." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# GraphQL query + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +This workflow task performs GraphQL operations by sending + queries, mutations, and variables over operations. Allows for customization + in the GraphQL query using, Jinja queries and Jinja variables, which can be + obtained from entities. The result of the query is saved as a JSON document + in a pre-created JSON dataset. + + +## Parameter + +### Endpoint + +The URL of the GraphQL endpoint you want to query. A collective list of public GraphQL APIs is available [here](https://github.com/IvanGoncharov/graphql-apis). Example Endpoint: `https://fruits-api.netlify.app/graphql` + +- Datatype: `string` +- Default Value: `None` + + + +### Query + +The query text of the GraphQL Query you want to execute. GraphQL is a query language for APIs and a runtime for fulfilling those queries with your existing data. Learn more on GraphQL [here](https://graphql.org/). Example Query: query allFruits { fruits { id scientific_name tree_name fruit_name family origin description climatic_zone } } + +- Datatype: `multiline string` +- Default Value: `None` + + + +### Query variables + +Pass dynamic variables when making a query or mutation. Example Variables: {"id" : 1} + +- Datatype: `multiline string` +- Default Value: `{}` + + + +### Target JSON Dataset + +The Dataset where this task will save the JSON results. + +- Datatype: `string` +- Default Value: `None` + + + +### OAuth access token + +Access token that connects to a GraphQL endpoint to authorize and secure user access to resources and data. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_irdi-workflow-irdi_plugin-IrdiPlugin.md b/docs/build/reference/customtask/cmem_plugin_irdi-workflow-irdi_plugin-IrdiPlugin.md new file mode 100644 index 000000000..0e2319258 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_irdi-workflow-irdi_plugin-IrdiPlugin.md @@ -0,0 +1,123 @@ +--- +title: "Generate base36 IRDIs" +description: "Create unique ECLASS IRDIs." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Generate base36 IRDIs + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +Create unique +[ECLASS](https://eclass.eu/support/technical-specification/structure-and-elements/irdi) IRDIs. + +IRDIs are unique for each combination of (non-advanced) parameters. +If no input path is configured, values are read from the URIs of the input (Transformation Input). + +- All fields of the IRDI are configurable, except `Item Code`, which is created by the plugin. + - Created IRDIs are unique per configuration. +- Specify a graph that stores the state of Item Codes. +- Input and output paths are configurable. + - if no input path is configured, values are read from the URIs of the input + (transformation input). + + +## Parameter + +### Counter graph + +Graph in which the Item Code (IC) counter is stored + +- Datatype: `string` +- Default Value: `None` + + + +### International Code Designator (ICD): Numeric, 4 characters + + + +- Datatype: `string` +- Default Value: `None` + + + +### Organization Identifier (OI): Numeric, 4 characters + + + +- Datatype: `string` +- Default Value: `None` + + + +### Organization Part Identifier (OPI): Alphanumeric, up to 35 characters (base36) + + + +- Datatype: `string` +- Default Value: `None` + + + +### OPI Source Indicator (OPIS): Numeric, 1 character + + + +- Datatype: `string` +- Default Value: `None` + + + +### Additional information (AI): Numeric, 4 characters + + + +- Datatype: `string` +- Default Value: `None` + + + +### Code-space identifier (CSI): Alphanumeric, 2 character (base36) + + + +- Datatype: `string` +- Default Value: `None` + + + +### Counted object + +The class of objects that are counted. (IRI) + +- Datatype: `string` +- Default Value: `None` + + + +### Input Schema Path / Property + +Path from which input values are taken. If empty, values are read from the URIs of the input + +- Datatype: `string` +- Default Value: `None` + + + +### Output path / property + +Path or property that will connect input values and their generated IRDIs + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_jira-JqlQuery.md b/docs/build/reference/customtask/cmem_plugin_jira-JqlQuery.md new file mode 100644 index 000000000..609723256 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_jira-JqlQuery.md @@ -0,0 +1,104 @@ +--- +title: "JQL query" +description: "Search and retrieve JIRA issues." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# JQL query + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +This workflow task sends a [JQL query](https://www.atlassian.com/software/jira/guides/jql/overview) +to the [REST API (v2)](https://developer.atlassian.com/cloud/jira/platform/rest/v2/) of a given +Jira service. It is tested both with on-premise Jira deployments as well as with instances on +`atlassian.net`. + +The result of the JQL query is a list of JIRA issue descriptions (entities). +This list is forwarded as a JSON document to the output port, +where you should connect a JSON Dataset. + +Note that you need to create an [API token](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/) +for your Atlassian account, to access the API of your atlassian.net hosted Jira instance. + + +## Parameter + +### Jira Server + +Base URL of the jira service, e.g. 'https://jira.example.org' + +- Datatype: `string` +- Default Value: `None` + + + +### Account + + + +- Datatype: `string` +- Default Value: `None` + + + +### Password or Token + + + +- Datatype: `password` +- Default Value: `None` + + + +### JQL Query + +Warning: An empty query string retrieves all issues. + +- Datatype: `string` +- Default Value: `None` + + + +### Limit + +Maximum number of issues to retrieve (0 = retrieve all issues). + +- Datatype: `Long` +- Default Value: `0` + + + +### Verify SSL Connection + + + +- Datatype: `boolean` +- Default Value: `true` + + + +### Connection Timeout + +Number of seconds, the plugin will wait to establish a connection to the Jira Service. + +- Datatype: `Long` +- Default Value: `300` + + + +### Results per Page + +Number of items to return per request. + +- Datatype: `Long` +- Default Value: `100` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_kafka-ReceiveMessages.md b/docs/build/reference/customtask/cmem_plugin_kafka-ReceiveMessages.md new file mode 100644 index 000000000..cc3bda5fc --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_kafka-ReceiveMessages.md @@ -0,0 +1,234 @@ +--- +title: "Kafka Consumer (Receive Messages)" +description: "Reads messages from a Kafka topic and saves it to a messages dataset (Consumer)." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Kafka Consumer (Receive Messages) + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This workflow operator uses the Kafka Consumer API +to receive messages from an [Apache Kafka](https://kafka.apache.org/) topic. + +Messages received from the topic will be generated as entities with the following +flat schema: + +- **key** - the optional key of the message, +- **content** - the message itself as plain text (use other operators, such as + [Parse JSON](https://documentation.eccenca.com/latest/deploy-and-configure/configuration/dataintegration/plugin-reference/#parse-json) or [Parse XML](https://documentation.eccenca.com/latest/deploy-and-configure/configuration/dataintegration/plugin-reference/#parse-xml) to process + complex message content), +- **offset** - the given offset of the message in the topic, +- **ts-production** - the timestamp when the message was written to the topic, +- **ts-consumption** - the timestamp when the message was consumed from the topic. + +In order to process the resulting entities, they have to run through a transformation. + +As an alternate working mode, messages can be exported directly to a JSON or XML +dataset if you know that the messages on your topic are valid JSON or XML documents +(see Advanced Options > Messages Dataset). + +In this case, a sample response from the consumer will appear as follows: + +
+ Sample JSON Response + +```json +[ + { + "message": { + "key": "818432-942813-832642-453478", + "headers": { + "type": "ADD" + }, + "content": { + "location": ["Leipzig"], + "obstacle": { + "name": "Iron Bars", + "order": "1" + } + } + } + }, + { + "message": { + "key": "887428-119918-570674-866526", + "headers": { + "type": "REMOVE" + }, + "content": { + "comments": "We can pass any json payload here." + } + } + }, + { + "message": { + "key": "TestKey", + "tombstone": true, + "headers": { + "h1": "v1", + "h2": "v2" + }, + "content": { + "will_be_ignored": "..." + } + } + } +] +``` + +
+
+ Sample XML Response + +```xml + + + + + + string + + + + + + + string + + + + will be ignored + +``` + +
+ + +## Parameter + +### Messages Dataset + +Where do you want to save the messages? The dropdown lists usable datasets from the current project only. In case you miss your dataset, check for the correct type (XML/JSON) and build project. + +- Datatype: `string` +- Default Value: `None` + + + +### Bootstrap Server + +This is URL of one of the Kafka brokers. The task fetches the initial metadata about your Kafka cluster from this URL. + +- Datatype: `string` +- Default Value: `None` + + + +### Security Protocol + +Which security mechanisms need to be applied to connect? Use PLAINTEXT in case you connect to a plain Kafka, which is available inside your VPN. Use SASL in case you connect to a [confluent.cloud](https://confluent.cloud) cluster (then you also need to specify your SASL credentials in the advanced options section). + +- Datatype: `string` +- Default Value: `PLAINTEXT` + + + +### SASL Mechanisms + + + +- Datatype: `string` +- Default Value: `PLAIN` + + + +### SASL Account + +The account identifier for the SASL authentication. In case you are using a [confluent.cloud](https://confluent.cloud) cluster, this is the API key. + +- Datatype: `string` +- Default Value: `None` + + + +### SASL Password + +The credentials for the SASL Account. In case you are using a [confluent.cloud](https://confluent.cloud) cluster, this is the API secret. + +- Datatype: `password` +- Default Value: `None` + + + +### Topic + +The name of the category/feed where messages were published. + +- Datatype: `string` +- Default Value: `None` + + + +### Auto Offset Reset + +What to do when there is no initial offset in Kafka or if the current offset does not exist any more on the server (e.g. because that data has been deleted). - `earliest` will fetch the whole topic beginning from the oldest record. - `latest` will receive nothing but will get any new records on the next run. + +- Datatype: `string` +- Default Value: `latest` + + + +### Consumer Group Name + +When a topic is consumed by consumers in the same group, every record will be delivered to only one consumer of that group. If all the consumers of a topic are labeled the same consumer group, then the records will effectively be load-balanced over these consumers. If all the consumer of a topic are labeled different consumer groups, then each record will be broadcast to all the consumers. When the Group Id field is empty, the plugin defaults to DNS:PROJECT ID:TASK ID. + +- Datatype: `string` +- Default Value: `None` + + + +### Client Id + +An optional identifier of a Kafka client (producer/consumer) that is passed to a Kafka broker with every request. The sole purpose of this is to be able to track the source of requests beyond just ip and port by allowing a logical application name to be included in Kafka logs and monitoring aggregates. When the Client Id field is empty, the plugin defaults to DNS:PROJECT ID:TASK ID. + +- Datatype: `string` +- Default Value: `None` + + + +### Local Consumer Queue Size + +Maximum total message size in kilobytes that the consumer can buffer for a specific partition. The consumer will stop fetching from the partition if it hits this limit. This helps prevent consumers from running out of memory. + +- Datatype: `Long` +- Default Value: `5000` + + + +### Message Limit + +The maximum number of messages to fetch and process in each run. If 0 or less, all messages will be fetched. + +- Datatype: `Long` +- Default Value: `100000` + + + +### Disable Commit + +Setting this to true will disable committing messages after retrival. This means you will receive the same messages on the next execution (for debugging). + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_kafka-SendMessages.md b/docs/build/reference/customtask/cmem_plugin_kafka-SendMessages.md new file mode 100644 index 000000000..adbf84f7c --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_kafka-SendMessages.md @@ -0,0 +1,201 @@ +--- +title: "Kafka Producer (Send Messages)" +description: "Reads a messages dataset and sends records to a Kafka topic (Producer)." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Kafka Producer (Send Messages) + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +This workflow operator uses the Kafka Producer API to send +messages to a [Apache Kafka](https://kafka.apache.org/). + +Accepts entities as input, and, if desired, accepts a pre-constructed XML/JSON dataset, +which is transformed into messages and sent to a designated Kafka topic based +on configuration. + +
+ Sample XML format + + An example XML document is shown below. This document will be sent as two messages + to the configured topic. Each message is created as a proper XML document. + + +```xml + + + + + + string + + + + + + + string + + + + will be ignored + +``` + +
+ +
+ Sample JSON format + + An example JSON document is shown below. This document will be sent as two messages + to the configured topic. Each message is created as a proper JSON document. + + +```json +[ + { + "message": { + "key": "818432-942813-832642-453478", + "headers": { + "type": "ADD" + }, + "content": { + "location": ["Leipzig"], + "obstacle": { + "name": "Iron Bars", + "order": "1" + } + } + } + }, + { + "message": { + "key": "887428-119918-570674-866526", + "headers": { + "type": "REMOVE" + }, + "content": { + "comments": "We can pass any json payload here." + } + } + }, + { + "message": { + "key": "TestKey", + "tombstone": true, + "headers": { + "h1": "v1", + "h2": "v2" + }, + "content": { + "will_be_ignored": "..." + } + } + } +] +``` + +
+ + + +## Parameter + +### Messages Dataset + +Where do you want to retrieve the messages from? The dropdown lists usable datasets from the current project only. In case you miss your dataset, check for the correct type (XML/JSON) and build project). The messages will be retrieved from the entities if no dataset is provided. + +- Datatype: `string` +- Default Value: `None` + + + +### Bootstrap Server + +This is URL of one of the Kafka brokers. The task fetches the initial metadata about your Kafka cluster from this URL. + +- Datatype: `string` +- Default Value: `None` + + + +### Security Protocol + +Which security mechanisms need to be applied to connect? Use PLAINTEXT in case you connect to a plain Kafka, which is available inside your VPN. Use SASL in case you connect to a [confluent.cloud](https://confluent.cloud) cluster (then you also need to specify your SASL credentials in the advanced options section). + +- Datatype: `string` +- Default Value: `PLAINTEXT` + + + +### SASL Mechanisms + + + +- Datatype: `string` +- Default Value: `PLAIN` + + + +### SASL Account + +The account identifier for the SASL authentication. In case you are using a [confluent.cloud](https://confluent.cloud) cluster, this is the API key. + +- Datatype: `string` +- Default Value: `None` + + + +### SASL Password + +The credentials for the SASL Account. In case you are using a [confluent.cloud](https://confluent.cloud) cluster, this is the API secret. + +- Datatype: `password` +- Default Value: `None` + + + +### Topic + +The name of the category/feed to which the messages will be published. Note that you may create this topic in advance before publishing messages to it. This is especially true for a kafka cluster hosted at [confluent.cloud](https://confluent.cloud). + +- Datatype: `string` +- Default Value: `None` + + + +### Client Id + +An optional identifier of a Kafka client (producer/consumer) that is passed to a Kafka broker with every request. The sole purpose of this is to be able to track the source of requests beyond just ip and port by allowing a logical application name to be included in Kafka logs and monitoring aggregates. When the Client Id field is empty, the plugin defaults to DNS:PROJECT ID:TASK ID. + +- Datatype: `string` +- Default Value: `None` + + + +### Maximum Message Size + +The maximum size of a request message in bytes. This is also effectively a cap on the maximum record size. Note that the server has its own cap on record size which may be different from this. + +- Datatype: `Long` +- Default Value: `1048576` + + + +### Compression Type + +The compression type for all data generated by the producer. The default is none (i.e. no compression). + +- Datatype: `string` +- Default Value: `none` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_llm-CreateEmbeddings.md b/docs/build/reference/customtask/cmem_plugin_llm-CreateEmbeddings.md new file mode 100644 index 000000000..8c2c75516 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_llm-CreateEmbeddings.md @@ -0,0 +1,128 @@ +--- +title: "Create Embeddings" +description: "Fetch and output LLM created embeddings from input entities." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Create Embeddings + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This plugin creates vector embeddings from text data using OpenAI's embeddings API. +It processes input entities containing text data and generates high-dimensional vector +representations that capture semantic meaning. + +## Features + +- Supports OpenAI embeddings models (e.g., text-embedding-3-small) +- Batch processing for efficient API usage +- Configurable input/output paths +- Automatic schema generation based on configuration +- Built-in error handling and workflow cancellation support + +## Configuration + +- **URL**: OpenAI API endpoint (default: https://api.openai.com/v1) +- **API Key**: Your OpenAI API key for authentication +- **Model**: The embedding model to use (e.g., text-embedding-3-small) +- **Timeout**: Request timeout in milliseconds (default: 10000) +- **Buffer Size**: Number of texts to process per batch (default: 100) +- **Input Paths**: Comma-separated list of entity paths to embed (default: "text") +- **Output Paths**: Configurable paths for embedding vectors and source text + +## Input/Output + +- **Input**: Entities with text data in specified paths +- **Output**: Original entities enhanced with embedding vectors and source text +- Embedding vectors are stored as string representations of float arrays +- Source text used for embedding is preserved for reference + +## Use Cases + +- Semantic search and similarity matching +- Text clustering and classification +- Recommendation systems +- Natural language processing pipelines + +## Parameter + +### Base URL + +URL of the OpenAI API (without endpoint path and without trailing slash) + +- Datatype: `string` +- Default Value: `https://api.openai.com/v1` + + + +### The OpenAI API key + +Fill the OpenAI API key if needed (or give a dummy value in case you access an unsecured endpoint). + +- Datatype: `password` +- Default Value: `None` + + + +### The embeddings model, e.g. text-embedding-3-small + + + +- Datatype: `string` +- Default Value: `text-embedding-3-small` + + + +### Timeout (Single Request, in Milliseconds) + + + +- Datatype: `Long` +- Default Value: `10000` + + + +### Entries Processing Buffer + +How many input values do you want to send per request? + +- Datatype: `Long` +- Default Value: `100` + + + +### Used entity paths (comma-separated list) + +Changing this value will change, which input paths are used by the workflow task. A blank value means, all paths are used. + +- Datatype: `string` +- Default Value: `text` + + + +### Entity Embedding text (output) + +Changing this value will change the output schema accordingly. Default: _embedding_source + +- Datatype: `string` +- Default Value: `_embedding_source` + + + +### Entity Embedding path (output) + +Changing this value will change the output schema accordingly. Default: _embedding + +- Datatype: `string` +- Default Value: `_embedding` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_llm-ExecuteInstructions.md b/docs/build/reference/customtask/cmem_plugin_llm-ExecuteInstructions.md new file mode 100644 index 000000000..7f34be4ac --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_llm-ExecuteInstructions.md @@ -0,0 +1,272 @@ +--- +title: "Execute Instructions" +description: "Send instructions (prompt) to an LLM and process the result." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Execute Instructions + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +## Introduction + +This plugin allows to execute an LLM instruction over a given list of entities. + +After being processed, each entity receives one additional path (`_instruction_output`). +This path contains the output of the executed instruction over the entity. + +## Parameters + +### Base URL + +- The base URL of the OpenAI compatible API (without endpoint path). +- Default: `https://api.openai.com/v1` + +### API key + +- An optional OpenAI API key. +- Default: blank + +### Instruct Model + +- The instruct model. +- Example: `gpt-4o-mini` + +### Instruction Prompt Template + +- The instruction prompt template. Please have look at + [Text generation and prompting](https://platform.openai.com/docs/guides/text?api-mode=chat) + to learn how to prompt a model to generate text. +- You can add Jinja placeholder to the template text, which will be replaced with data from + incoming entities: + - A placeholder such as `{{ variable }}` will be replaced with the whole incoming entity + as a JSON string. + - A placeholder which includes a path (`{{ variable.name }}`) will be replaced with the + `name` property of an incoming entity. +- If you use Jinja placeholders in the template text, input and output ports of this task are + configured as follows: + - For each different placeholder object, an additional input port is added to the task. + - If you do not insert any placeholders, there will be no input ports. + - Variables are sorted alphabetically, so `{{ variable_a }}` will be replaced with entity + data from the first input port, while `{{ variable_b }}` will be replaced with entity + data from the second input port. + - During execution, the task iterates over the entities from the first input port. + - Entities from all other input ports will be consumed when the execution starts. Then, in + each iteration, their data will inserted to the prompt `{{ variable }}` accordingly. + - You can configure how those additional input ports will be consumed with the parameter + consume_all_entities. + - It is recommended to only use known entity paths from the connected input tasks, such as + `{{ variable.path }}`, so the ports can be configured with a FixedSchema. + This avoids the need for additional transformation tasks on the output port. +- Your instruct prompt template is inserted as a user message in + the messages_template. +- Default template: +``` jinja2 +Write a paragraph about this entity: {{ entity }} +``` + +
+Advanced Parameter + +### Temperature (between 0 and 2) - Advanced Parameter + +- Higher values like 0.8 will make the output more random,while lower values like 0.2 will make it more focused and deterministic. +- Default: `1.0` + +### Timeout for a single API call - Advanced Parameter + +- The timeout of a single request in seconds. +- Default: `300` + +### Instruction Output Path - Advanced Parameter + +- The entity path where the instruction result will be provided. +- Default: `_instruction_output` + +### Messages Template - Advanced Parameter + +- A list of messages comprising the conversation compatible with OpenAI + chat completion API message object. +- Have look at [Message roles and instruction following](https://platform.openai.com/docs/guides/text#message-roles-and-instruction-following) + to learn about different levels of priority to messages with different roles. +- Default messages template: +``` json +[ + { + "role": "developer", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "{{ instruction_prompt }}" + } +] +``` + +### Consume all entities from additional input ports - Advanced Parameter + +- If true, all entities from additional input ports will be consumed. + Otherwise, only the first entity of the additional ports will be used. +- Be aware that all entities are loaded in memory. +- Default: `False` + +### Output Format - Advanced Parameter + +- Specifying the format that the model must output. +- Possible values: + - TEXT: Standard text output. + - STRUCTURED_OUTPUT: Structured output following a given schema. Add your schema as Pydantic + model here: pydantic_schema + - JSON_MODE: JSON mode is a more basic version of the Structured Outputs feature. + While JSON mode ensures that model output is valid JSON, Structured Outputs + reliably matches the model's output to the schema you specify. If you want to request + a specified structure, you can add it to + instruct_prompt_template +- Default: `OutputFormat.TEXT` + +### Pydantic Schema definition the model is using in the response. - Advanced Parameter + +- The Pydantic schema definition with a mandatory class named + `StructuredOutput(BaseModel)`. +- This field is only used when output_format + is set to `STRUCTURED_OUTPUT`. +- A schema may have up to 100 object properties total, with up to 5 levels of nesting. +- The total string length of all property names, definition names, enum values, + and const values cannot exceed 15,000 characters. +- Default: +``` python +from pydantic import BaseModel + +class StructuredOutput(BaseModel): + title: str + abstract: str + keywords: list[str] + +``` +
+ + +## Parameter + +### Base URL + +The base URL of the OpenAI compatible API (without endpoint path). + +- Datatype: `string` +- Default Value: `https://api.openai.com/v1` + + + +### API key + +An optional OpenAI API key. + +- Datatype: `password` +- Default Value: `None` + + + +### Instruct Model + +The instruct model. + +- Datatype: `string` +- Default Value: `gpt-4o-mini` + + + +### Temperature (between 0 and 2) + +Higher values like 0.8 will make the output more random,while lower values like 0.2 will make it more focused and deterministic. + +- Datatype: `double` +- Default Value: `1.0` + + + +### Timeout for a single API call + +The timeout of a single request in seconds. + +- Datatype: `double` +- Default Value: `300` + + + +### Instruction Output Path + +The entity path where the instruction result will be provided. + +- Datatype: `string` +- Default Value: `_instruction_output` + + + +### Messages Template + +A list of messages comprising the conversation compatible with OpenAI chat completion API message object. + +- Datatype: `code-json` +- Default Value: `[ + { + "role": "developer", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "{{ instruction_prompt }}" + } +]` + + + +### Instruction Prompt Template + +The instruction prompt template. + +- Datatype: `code-jinja2` +- Default Value: `Write a paragraph about this entity: {{ entity }}` + + + +### Consume all entities from additional input ports + +If true, all entities from additional input ports will be consumed. Otherwise, only the first entity of the additional ports will be used. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Output Format + +Specifying the format that the model must output. + +- Datatype: `enumeration` +- Default Value: `TEXT` + + + +### Pydantic Schema definition the model is using in the response. + +The Pydantic schema definition with a mandatory class named `StructuredOutput(BaseModel)`. + +- Datatype: `code-python` +- Default Value: `from pydantic import BaseModel + +class StructuredOutput(BaseModel): + title: str + abstract: str + keywords: list[str] +` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_loopwf-task-StartWorkflow.md b/docs/build/reference/customtask/cmem_plugin_loopwf-task-StartWorkflow.md new file mode 100644 index 000000000..c22fc222c --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_loopwf-task-StartWorkflow.md @@ -0,0 +1,70 @@ +--- +title: "Start Workflow per Entity" +description: "Loop over the output of a task and start a sub-workflow for each entity." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Start Workflow per Entity + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +This workflow task operates on a list of incoming entities +and sequentially starts a single "inner" workflow for each entity. +In case one "inner" workflow fails, the execution is stopped with an error. +In this case the error message can be seen in the Activities view +(see `Execute with payload of [inner workflow name]`). + +The started workflow needs to have a replaceable JSON dataset as input. + +Current notes and limitations: + +- The entities which are the input of the "inner" workflow can not be hierarchic. +- The replaceable dataset of the "inner" workflow needs to be a JSON dataset. +- There is no check for circles implemented! + + +## Parameter + +### Workflow + +Which workflow do you want to start per entity. + +- Datatype: `string` +- Default Value: `None` + + + +### How many workflow jobs should run in parallel? + + + +- Datatype: `Long` +- Default Value: `1` + + + +### Forward incoming entities to the output port? + + + +- Datatype: `boolean` +- Default Value: `false` + + + +### Mime-type for file by file processing (beta) + +When working with file entities, setting this to a proper value will send the file to the workflow instead of the meta-data. Examples are: 'application/x-plugin-binaryFile', 'application/json', 'application/xml', 'text/csv', 'application/octet-stream' or 'application/x-plugin-excel'. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_mattermost.md b/docs/build/reference/customtask/cmem_plugin_mattermost.md new file mode 100644 index 000000000..69e8fd197 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_mattermost.md @@ -0,0 +1,100 @@ +--- +title: "Send Mattermost messages" +description: "Send messages to Mattermost channels and/or users." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Send Mattermost messages + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +This task sends messages to Mattermost channels and users. + +You need a bot account in order to connect to Mattermost. +Learn more on bot accounts at +[developers.mattermost.com](https://developers.mattermost.com): + +- [Using bot accounts](https://developers.mattermost.com/integrate/reference/bot-accounts/) +- [Personal access tokens](https://developers.mattermost.com/integrate/reference/personal-access-token/) + +The task has two working modes. + +# Single Message + +You can send a single static message to a pre-configured channel or user. +Just configure the User and/or Channel and Message parameters to do so. + +# Multiple Messages + +You can send multiple messages to different channels or users by piping data into +the task. For each entity, a message is send. For dynamic messages the following +input paths are recognized: + +- user +- channel +- message + + +## Parameter + +### URL + +The base URL of your Mattermost deployment. Example: https://mattermost.example.org + +- Datatype: `string` +- Default Value: `None` + + + +### Access Token + +The Personal Access Token of the bot account. + +- Datatype: `password` +- Default Value: `None` + + + +### Bot name + +The name or display name of the bot you want to use to connect. + +- Datatype: `string` +- Default Value: `None` + + + +### User + +The user account which will receive the message. You can search for users if the connection was successful (Base URl, bot + token). + +- Datatype: `string` +- Default Value: `None` + + + +### Channel + +The channel which will receive the message. You can search for channels if the connection was successful (Base URl, bot + token). If you want to send your message to multiple channels, separate them with a comma. + +- Datatype: `string` +- Default Value: `None` + + + +### Message + +The message size is limited to a configured maximum (e.g. 16383 characters). + +- Datatype: `multiline string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_nextcloud-Download.md b/docs/build/reference/customtask/cmem_plugin_nextcloud-Download.md new file mode 100644 index 000000000..39d31b0ff --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_nextcloud-Download.md @@ -0,0 +1,101 @@ +--- +title: "Download Nextcloud files" +description: "Download files from a given Nextcloud instance." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Download Nextcloud files + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This workflow task downloads files from a specified Nextcloud service instance. + +Given the URL of the target Nextcloud instance along with your credentials, you can specify any +directory from which files should be downloaded. Additionally, you may define file patterns to +include or exclude specific files within the selected directory. The files are not downloaded to the +project resources, but are only available within the workflow itself. + +#### Nextcloud List Files input +If this workflow has an input, it will take the data that comes in instead of the selected values. +This works only with the schema used in the **Nextcloud List Files** Plugin. +Make sure you still add the appropriate URL, identification and token. + +#### Important: +To establish a secure connection, you must generate a dedicated [app-specific password and username](https://docs.nextcloud.com/server/latest/user_manual/de/session_management.html) +in the Security section of your Nextcloud account settings. Do not use your standard login +credentials. + + +## Parameter + +### Nextcloud URL + +The Base URL of your Nextcloud service, e.g. `https://cloud.example.com`. + +- Datatype: `string` +- Default Value: `None` + + + +### API identification + +The identification generated by the app-password function in Nextcloud. + +- Datatype: `string` +- Default Value: `None` + + + +### API token + +The token generated by the app-password function in Nextcloud. + +- Datatype: `password` +- Default Value: `None` + + + +### File or directory path + +The path to a specific directory from which to download files.Includes all the subdirectories. Leave empty or type '/' for root directory.When pasting a directory path, select 'Custom entry:' to trigger autocompletion andcheck if the folder is recognized. + +- Datatype: `string` +- Default Value: `None` + + + +### File expression using * + +A filepath for searching specified files through the given path. E.g. searching for *.txt results in all .txt files under the given directory and its subdirectories. + +- Datatype: `string` +- Default Value: `None` + + + +### Error on empty result + +A flag indicating weather an empty output will throw an error. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Exclude files in subfolders from download. + +A flag indicating whether files located in subfolders should be excluded from the download. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_nextcloud-List.md b/docs/build/reference/customtask/cmem_plugin_nextcloud-List.md new file mode 100644 index 000000000..3b544ad5c --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_nextcloud-List.md @@ -0,0 +1,96 @@ +--- +title: "List Nextcloud files" +description: "List directories and files from a given Nextcloud folder." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# List Nextcloud files + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This workflow task creates a structured output from a specified Nextcloud +service instance. + +Given the URL of the target Nextcloud instance along with your credentials, you can specify any +directory from which data should be extracted. Additionally, you may define file patterns to +include or exclude specific files within the selected directory. + +#### Important: +To establish a secure connection, you must generate a dedicated [app-specific password and username](https://docs.nextcloud.com/server/latest/user_manual/de/session_management.html) +in the Security section of your Nextcloud account settings. Do not use your standard login +credentials. + + +## Parameter + +### Nextcloud URL + +The Base URL of your Nextcloud service, e.g. `https://cloud.example.com`. + +- Datatype: `string` +- Default Value: `None` + + + +### API identification + +The identification generated by the app-password function in Nextcloud. + +- Datatype: `string` +- Default Value: `None` + + + +### API token + +The token generated by the app-password function in Nextcloud. + +- Datatype: `password` +- Default Value: `None` + + + +### File or directory path + +The path of a specific file or directory that needs to be transformed. Includes all the subdirectories. Leave empty or type '/' for root directory.When pasting a directory path, select 'Custom entry:' to trigger autocompletion andcheck if the folder is recognized. + +- Datatype: `string` +- Default Value: `None` + + + +### File expression using * + +A filepath for searching specified files through the given path.Leave blank for all file types.E.g. searching for *.txt results in all .txt files under the given directory and its subdirectories. + +- Datatype: `string` +- Default Value: `None` + + + +### Error on empty result + +A flag indicating weather an empty output will throw an error. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Exclude files in subfolders. + +A flag indicating whether files located in subfolders should be excluded from the workflow. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_nextcloud-Upload.md b/docs/build/reference/customtask/cmem_plugin_nextcloud-Upload.md new file mode 100644 index 000000000..659abad14 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_nextcloud-Upload.md @@ -0,0 +1,80 @@ +--- +title: "Upload files to Nextcloud" +description: "Upload files to a given Nextcloud instance." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Upload files to Nextcloud + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This workflow task uploads files to a specified Nextcloud service instance. + +Given the URL of the target Nextcloud instance along with your credentials, you can specify any +directory to which files should be uploaded. + +### Input +If this workflow has an input, it will take the data that comes in instead of the selected source +file. + +#### Important: +To establish a secure connection, you must generate a dedicated [app-specific password and username](https://docs.nextcloud.com/server/latest/user_manual/de/session_management.html) +in the Security section of your Nextcloud account settings. Do not use your standard login +credentials. + + +## Parameter + +### Nextcloud URL + +The Base URL of your Nextcloud service, e.g. `https://cloud.example.com`. + +- Datatype: `string` +- Default Value: `None` + + + +### API identification + +The identification generated by the app-password function in Nextcloud. + +- Datatype: `string` +- Default Value: `None` + + + +### API token + +The token generated by the app-password function in Nextcloud. + +- Datatype: `password` +- Default Value: `None` + + + +### File or directory path + +The path to a specific directory to which to upload files.Leave empty or type '/' for root directory.When pasting a directory path, select 'Custom entry:' to trigger autocompletion andcheck if the folder is recognized. + +- Datatype: `string` +- Default Value: `None` + + + +### File + +The file which you would like to upload. If this is set, no connections in the workflow are allowed + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_office365-Download.md b/docs/build/reference/customtask/cmem_plugin_office365-Download.md new file mode 100644 index 000000000..bede37cf5 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_office365-Download.md @@ -0,0 +1,128 @@ +--- +title: "Download Office 365 Files" +description: "Download files from Microsoft OneDrive or Sites" +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Download Office 365 Files + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This workflow task downloads files from a specified Office 365 instance. +For this to work a registered app in Microsoft's Entra ID space is necessary. +Further information can be found [here](https://learn.microsoft.com/en-us/entra/identity-platform/quickstart-register-app). + +After registering an application, it needs to be granted application wide API permissions: +- Files.Read.All +- Sites.Read.All + +Admin consent is required to activate these permissions. +With this setup, anyone with the secret can access all users' OneDrives and all Sharepoint/Team +sites. + +#### Important +Make sure only trusted admins can create or manage secrets! +Whoever holds the secrets has all the access to granted resources so best not to distribute +recklessly. + + +## Parameter + +### Tenant ID + +ID of your tenant. Can be seen within your registered application + +- Datatype: `string` +- Default Value: `None` + + + +### Client ID + +Client ID of your registered application. + +- Datatype: `string` +- Default Value: `None` + + + +### Client secret + +Client secret created withing your registered application. + +- Datatype: `password` +- Default Value: `None` + + + +### Type resource + +The type of resource you want the data to be extracted from. This can either be a site or a users share + +- Datatype: `string` +- Default Value: `None` + + + +### Target resource + +Target resource which files will be listed from. This can either be a specific users share address or a microsoft site URL. + +- Datatype: `string` +- Default Value: `None` + + + +### Drives + +A list of drives from the selected target resource. + +- Datatype: `string` +- Default Value: `None` + + + +### Maximum amount of workers + +Specifies the maximum number of threads used for parallel execution of the workflow. The default is 32, and the valid range is 1 to 32. Note: Due to known throttling limits imposed by Microsoft, running with high parallelism may cause errors. If you encounter issues, try reducing the number of threads to 1. + +- Datatype: `Long` +- Default Value: `32` + + + +### Directory path + +The path of a directory that needs to be transformed. Includes all subdirectories by default + +- Datatype: `string` +- Default Value: `None` + + + +### Regular expression + +A regular expression performed on all the files within the selected path + +- Datatype: `string` +- Default Value: `^.*$` + + + +### Exclude files in subfolders + +A flag indicating if files should only be listed from subfolders or not. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_office365-List.md b/docs/build/reference/customtask/cmem_plugin_office365-List.md new file mode 100644 index 000000000..efb132768 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_office365-List.md @@ -0,0 +1,128 @@ +--- +title: "List Office 365 Files" +description: "List files from OneDrive or Sites" +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# List Office 365 Files + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This workflow task creates a structured output from a specified Office 365 instance. +For this to work a registered app in Microsoft's Entra ID space is necessary. +Further information can be found [here](https://learn.microsoft.com/en-us/entra/identity-platform/quickstart-register-app). + +After registering an application, it needs to be granted application wide API permissions: +- Files.Read.All +- Sites.Read.All + +Admin consent is required to activate these permissions. +With this setup, anyone with the secret can access all users' OneDrives and all Sharepoint/Team +sites. + +#### Important +Make sure only trusted admins can create or manage secrets! +Whoever holds the secrets has all the access to granted resources so best not to distribute +recklessly. + + +## Parameter + +### Tenant ID + +ID of your tenant. Can be seen within your registered application + +- Datatype: `string` +- Default Value: `None` + + + +### Client ID + +Client ID of your registered application. + +- Datatype: `string` +- Default Value: `None` + + + +### Client secret + +Client secret created withing your registered application. + +- Datatype: `password` +- Default Value: `None` + + + +### Type resource + +The type of resource you want the data to be extracted from. This can either be a site or a users share + +- Datatype: `string` +- Default Value: `None` + + + +### Target resource + +Target resource which files will be listed from. This can either be a specific users share address or a microsoft site URL. + +- Datatype: `string` +- Default Value: `None` + + + +### Drives + +A list of drives from the selected target resource. + +- Datatype: `string` +- Default Value: `None` + + + +### Maximum amount of workers + +Specifies the maximum number of threads used for parallel execution of the workflow. The default is 32, and the valid range is 1 to 32. Note: Due to known throttling limits imposed by Microsoft, running with high parallelism may cause errors. If you encounter issues, try reducing the number of threads to 1. + +- Datatype: `Long` +- Default Value: `32` + + + +### Directory path + +The path of a directory that needs to be transformed. Includes all subdirectories by default + +- Datatype: `string` +- Default Value: `None` + + + +### Regular expression + +A regular expression performed on all the files within the selected path + +- Datatype: `string` +- Default Value: `^.*$` + + + +### Exclude files in subfolders + +A flag indicating if files should only be listed from subfolders or not. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_office365-Upload.md b/docs/build/reference/customtask/cmem_plugin_office365-Upload.md new file mode 100644 index 000000000..deb44ef5d --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_office365-Upload.md @@ -0,0 +1,110 @@ +--- +title: "Office 365 Upload Files" +description: "Upload files to OneDrive or a site Sharepoint" +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Office 365 Upload Files + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This workflow task upload files to specified Office 365 instance. +For this to work a registered app in Microsoft's Entra ID space is necessary. +Further information can be found [here](https://learn.microsoft.com/en-us/entra/identity-platform/quickstart-register-app). + +After registering an application, it needs to be granted application wide API permissions: +- Files.Read.All, Files.Write.All +- Sites.Read.All, Sites.Write.All + +Admin consent is required to activate these permissions. +With this setup, anyone with the secret can access all users' OneDrives and all Sharepoint/Team +sites. + +#### Important +Make sure only trusted admins can create or manage secrets! +Whoever holds the secrets has all the access to granted resources so best not to distribute +recklessly. + + +## Parameter + +### Tenant ID + +ID of your tenant. Can be seen within your registered application + +- Datatype: `string` +- Default Value: `None` + + + +### Client ID + +Client ID of your registered application. + +- Datatype: `string` +- Default Value: `None` + + + +### Client secret + +Client secret created withing your registered application. + +- Datatype: `password` +- Default Value: `None` + + + +### Type resource + +The type of resource you want the data to be extracted from. This can either be a site or a users share + +- Datatype: `string` +- Default Value: `None` + + + +### Target resource + +Target resource which files will be listed from. This can either be a specific users share address or a microsoft site URL. + +- Datatype: `string` +- Default Value: `None` + + + +### Drives + +A list of drives from the selected target resource. + +- Datatype: `string` +- Default Value: `None` + + + +### Maximum amount of workers + +Specifies the maximum number of threads used for parallel execution of the workflow. The default is 32, and the valid range is 1 to 32. Note: Due to known throttling limits imposed by Microsoft, running with high parallelism may cause errors. If you encounter issues, try reducing the number of threads to 1. + +- Datatype: `Long` +- Default Value: `32` + + + +### Directory path + +The path of a directory that needs to be transformed. Includes all subdirectories by default + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_parameters-ParametersPlugin.md b/docs/build/reference/customtask/cmem_plugin_parameters-ParametersPlugin.md new file mode 100644 index 000000000..2aedc8dd2 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_parameters-ParametersPlugin.md @@ -0,0 +1,63 @@ +--- +title: "Set or Overwrite parameters" +description: "Connect this task to a config port of another task in order to set or overwrite the parameter values of this task." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Set or Overwrite parameters + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +Connect this task to a config port of another task in order to set +or overwrite the parameter values of this task. + +To configure this task, add one `key: value` pair per line to the Parameter +Configuration multiline field (YAML syntax). `key` is the ID of the parameter +you want to set or update, `value` is the new value to set. + +You can also use multiline values with `|` +(be aware of the correct indentation with spaces, not tabs). + +Example parameter configuration: + +``` +url: http://example.org +method: GET +query: | + SELECT ?s + WHERE {{ + ?s ?p ?o + }} +execute_once: True +limit: 5 + +``` + + +## Parameter + +### Parameter Configuration + +Your parameter configuration in YAML Syntax. One 'parameter: value' pair per line. url: http://example.org method: GET query: | SELECT ?s WHERE {{ ?s ?p ?o }} execute_once: True limit: 5 + +- Datatype: `code-yaml` +- Default Value: `url: http://example.org +method: GET +query: | + SELECT ?s + WHERE {{ + ?s ?p ?o + }} +execute_once: True +limit: 5 +` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_pdf_extract-pdf_extract-PdfExtract.md b/docs/build/reference/customtask/cmem_plugin_pdf_extract-pdf_extract-PdfExtract.md new file mode 100644 index 000000000..838aabe98 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_pdf_extract-pdf_extract-PdfExtract.md @@ -0,0 +1,252 @@ +--- +title: "Extract from PDF files" +description: "Extract text and tables from PDF files" +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Extract from PDF files + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +A task to extract text and tables from PDF files. + +## Output format + +The output is a JSON string on the path `pdf_extract_output`. The format depends on the +["Combine the results from all files into a single value"](#parameter_doc_all_files) parameter. + + +### Output one entity/value per file + +``` +{ + "metadata": { + "Filename": "sample.pdf", + "Title": "Sample Report", + "Author": "eccenca GmbH", + ... + }, + "pages": [ + { + "page_number": 1, + "text": "This is digital text from the PDF.", + "tables": [...] + }, + { + "page_number": 2, + "text": "", + "tables": [] + }, + ... + ] +} +``` + + +### Output one entity/value for all files + +``` +[ + { + "metadata": {"Filename": "file1.pdf", ...}, + "pages": [...] + }, + { + "metadata": {"Filename": "file2.pdf", ...}, + "pages": [...] + }, + ... +] +``` + +## Input format + +This task can either work with project files when a regular expression is being used or with +entities coming from another task or dataset. +The input must be file entities following the [FileEntitySchema](https://github.com/eccenca/cmem-plugin-base/blob/main/cmem_plugin_base/dataintegration/typed_entities/file.py). +If a regular expression is set, the input ports will close and no connection will be possible. + + +## Parameters + +**File name regex filter** + +Regular expression used to filter the resources of the project to be processed. Only matching file names will be included in the extraction. + +**Page selection** + +Comma-separated page numbers or ranges (e.g., 1,2-5,7) for page selection. Files that do not contain any of the specified pages will return +empty results with the information logged. If no page selection is specified, all pages will be processed. + +**Combine the results from all files into a single value** + +If set to "Combine", the results of all files will be combined into a single output value. If set to "Don't combine", each file result will be output in a separate entity. + +**Error Handling Mode** + +Specifies how errors during PDF extraction should be handled. +- *Ignore*: Log errors and continue processing, returning empty or error-marked results. +- *Raise on errors*: Raise an error when extraction fails. +- *Raise on errors and warnings*: Treat any warning from the underlying PDF extraction module (pdfplumber) when extracting text and tables from pages as an error if empty results are returned. + +**Table extraction strategy** + +Method used to detect tables in PDF pages. For further explanation click [here](https://github.com/jsvine/pdfplumber/blob/stable/README.md#extracting-tables). + +Available strategies include: +- *lines*: Uses detected lines in the PDF layout to find table boundaries. +- *text*: Relies on text alignment and spacing. +- *lattice*: Best for machine-generated perfect grids. +- *sparse*: Best for tables with minimal text content. +- *custom*: Allows custom settings to be provided via the advanced parameter below. + +**Custom table extraction strategy** + +Defines a custom table extraction strategy using YAML syntax. Only used if "custom" is selected as the table strategy. + +**Text extraction strategy** + +Method used to extract text in PDF pages. For further explanation click [here](https://github.com/jsvine/pdfplumber/blob/stable/README.md#extracting-text). + +Available strategies include: +- *default*: Balanced for most digital PDFs. +- *raw*: Extract the PDFs with no merging of text fragments. +- *scanned*: Best for scanned PDFs as it merges text more agressively. +- *layout*: Layout-aware extraction for complex/multi-column documents + +**Maximum number of processes for processing files** + +Defines the maximum number of processes to use for concurrent file processing. By default, this is set to (number of virtual cores - 1). + + +## Test regular expression + +Clicking the "Test regex pattern" button displays the files in the current project that match the regular expression +specified with the ["File name regex filter"](#parameter_doc_regex) parameter. +This does not display the files if there is another dataset or task connected to the input +as the entities are not known before execution. + + +## Parameter + +### File name regex filter + +Regular expression for filtering resources of the project. If this parameter is set, the input port will be closed and project files will be compared against the regular expression. + +- Datatype: `string` +- Default Value: `None` + + + +### Combine the results from all files into a single value + +If set to 'Combine', the results of all files will be combined into a single output value. If set to 'Don't combine', each file result will be output in a separate entity. + +- Datatype: `string` +- Default Value: `no_combine` + + + +### Page selection + +Comma-separated page numbers or ranges (e.g., 1,2-5,7) for page selection. Files that do not contain any of the specified pages will return empty results with the information logged. If no page selection is specified, all pages will be processed. + +- Datatype: `string` +- Default Value: `None` + + + +### Error Handling Mode + +The mode in which errors during the extraction are handled. If set to "Ignore", it will log errors and continue, returning empty or error-marked results for files. When "Raise on errors and warnings" is selected, any warning from the underlying PDF extraction module when extracting text and tables from pages is treated as an error if empty results are returned. + +- Datatype: `string` +- Default Value: `raise_on_error` + + + +### Table extraction strategy + +Specifies the method used to detect tables in the PDF page. Options include "lines" and "text", each using different cues (such as lines or text alignment) to find tables. If "Custom" is selected, a custom setting needs to defined under advanced options. + +- Datatype: `string` +- Default Value: `lines` + + + +### Text extraction strategy + +Specifies how text is extracted from a PDF page. Options include "raw", "layout", and others, each interpreting character positions and formatting differently to control how text is grouped and ordered. + +- Datatype: `string` +- Default Value: `default` + + + +### Custom table extraction strategy + +Custom table extraction strategy in YAML format. + +- Datatype: `multiline string` +- Default Value: `# edge_min_length: 3 +# explicit_horizontal_lines: [] +# explicit_vertical_lines: [] +# horizontal_strategy: lines +# intersection_tolerance: 3 +# intersection_x_tolerance: 3 +# intersection_y_tolerance: 3 +# join_tolerance: 3 +# join_x_tolerance: 3 +# join_y_tolerance: 3 +# min_words_horizontal: 1 +# min_words_vertical: 3 +# snap_tolerance: 3 +# snap_x_tolerance: 3 +# snap_y_tolerance: 3 +# text_settings: +# extra_attrs: [] +# horizontal_ltr: true +# keep_blank_chars: false +# use_text_flow: false +# vertical_ttb: true +# x_tolerance: 2 +# y_tolerance: 2 +# vertical_strategy: lines` + + + +### Custom_text_strategy + +Custom text extraction strategy in YAML format. + +- Datatype: `multiline string` +- Default Value: `# extra_attrs: [] +# horizontal_ltr: true +# keep_blank_chars: false +# layout: false +# split_at_punctuation: false +# use_text_flow: false +# vertical_ttb: true +# x_density: 7.25 +# x_tolerance: 1 +# y_density: 13 +# y_tolerance: 1` + + + +### Maximum number of processes for processing files + +The maximum number of processes to use for processing multiple files concurrently. The default is (number of virtual cores)-1. + +- Datatype: `Long` +- Default Value: `9` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_pgvector-Search.md b/docs/build/reference/customtask/cmem_plugin_pgvector-Search.md new file mode 100644 index 000000000..5611655eb --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_pgvector-Search.md @@ -0,0 +1,137 @@ +--- +title: "Search Vector Embeddings" +description: "Search for top-k metadata stored in Postgres Vector Store (PGVector)." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Search Vector Embeddings + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This workflow task search for the top-k metadata stored into Postgres Vector Store. + +The incoming embedding entities are used to retrieve the nearest top-k +vectors in the collection stored in the Postgres Vector Store. +It is possible to specify which paths are going to be used for searching as well as which Postgres +Vector Store and collection name. + +The task uses the embeddings from the path configured with the Embedding Query Path +parameter (`embedding_query_path`, default value: `_embedding`) to search over the collection. +The results are provided in the output path configured with the Search Result Path parameter +(`search_result_path`, default value: `_search_result`). + +The results in this output are structured like this: + +``` json +[ +{ + "id": "...", + "metadata": "..", + "_embedding_source": "..", + "distance": ".." +} +... +] +``` + + +## Parameter + +### Database Host + +The hostname of the postgres database service. + +- Datatype: `string` +- Default Value: `pgvector` + + + +### Database Port + +The port number of the postgres database service. + +- Datatype: `Long` +- Default Value: `5432` + + + +### Database User + +The account name used to login to the postgres database service. + +- Datatype: `string` +- Default Value: `pgvector` + + + +### Database Password + +The password of the database account. + +- Datatype: `password` +- Default Value: `None` + + + +### Database Name + +The database name. + +- Datatype: `string` +- Default Value: `pgvector` + + + +### Collection Name + +The name of the collection that will be used for search. + +- Datatype: `string` +- Default Value: `None` + + + +### Search Result Path + +The path containing the search result in the output entities. + +- Datatype: `string` +- Default Value: `_search_result` + + + +### Embedding Query Path + +The path containing the embedding to be used for searching. + +- Datatype: `string` +- Default Value: `_embedding` + + + +### Top-k + +The number of entries to be returned in the search result. + +- Datatype: `Long` +- Default Value: `10` + + + +### Distance Strategy + +The distance strategy to use. (default: COSINE) + +- Datatype: `enumeration` +- Default Value: `COSINE` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_pgvector-Store.md b/docs/build/reference/customtask/cmem_plugin_pgvector-Store.md new file mode 100644 index 000000000..5aefad39e --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_pgvector-Store.md @@ -0,0 +1,127 @@ +--- +title: "Store Vector Embeddings" +description: "Store embeddings into Postgres Vector Store (PGVector)." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Store Vector Embeddings + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This plugin workflow store embeddings into Postgres Vector Store. + +The vector embeddings and its respective metadata are going to be stored into a collection inside +the Postgres Vector Store. +It is possible to specify either the name of the attributes containing the vectors as well as the +metadata. + + +## Parameter + +### Database Host + +The hostname of the postgres database service. + +- Datatype: `string` +- Default Value: `pgvector` + + + +### Database Port + +The port number of the postgres database service. + +- Datatype: `Long` +- Default Value: `5432` + + + +### Database User + +The account name used to login to the postgres database service. + +- Datatype: `string` +- Default Value: `pgvector` + + + +### Database Password + +The password of the database account. + +- Datatype: `password` +- Default Value: `None` + + + +### Database Name + +The database name. + +- Datatype: `string` +- Default Value: `pgvector` + + + +### Collection Name + +The name of the collection that will be used for search. + +- Datatype: `string` +- Default Value: `None` + + + +### Pre Delete Collection + +If set to true, then the collection will removed at the beginning. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Source Path + +The name of the path to use for reading the embedding source. + +- Datatype: `string` +- Default Value: `_embedding_source` + + + +### Embedding Path + +The name of the path to use for reading the embeddings. + +- Datatype: `string` +- Default Value: `_embedding` + + + +### Metadata Paths + +The comma separated list path names to be used as metadata. Empty name means all paths (except embedding source and embedding) will be used + +- Datatype: `string` +- Default Value: `None` + + + +### Batch Processing Size + +The number of entries to be processed in batch. + +- Datatype: `Long` +- Default Value: `100` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_project_resources-List.md b/docs/build/reference/customtask/cmem_plugin_project_resources-List.md new file mode 100644 index 000000000..c24ade671 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_project_resources-List.md @@ -0,0 +1,67 @@ +--- +title: "List project files" +description: "List file resources from the project." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# List project files + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +List file resources from the current project based on a regular expression. + +The project-relative path of each file of the current project is tested against a given +regular expression. +The project resource is listed in the output, if the expression matches this path. +The output entities have the following paths: + +- `name` - the plain file name of the resource (example: `file.txt`) +- `fullPath` - the project-relative path including directories but no leading slash + (example: `directory/file.txt`) +- `modified` - modified timestamp (example: `2025-03-10T15:38:41.023Z`) +- `size` - size of the file in bytes (example: `123345`) + +The regular expression has to match the `fullPath` of the file and is case sensitive. + +Given this list of example files of a project: + +``` +dataset.csv +my-dataset.xml +json/example.json +json/example_new.json +json/data.xml +``` + +Here are some regular expressions with the expected result: + +- The regex `dataset\.csv` lists only the first file. +- The regex `json/.*` lists all files in the `json` sub-directory. +- The regex `new` lists nothing. +- The regex `.*new.*` list the file `json/example_new.json` +(and all other files with `new` in the path) + +We recommend to test your regular expression before using it. +[regex101.com](https://regex101.com) is a proper service to test your regular expressions. +[This deep-link](https://regex101.com/?testString=dataset.csv%0Amy-dataset.xml%0Ajson/example.json%0Ajson/example_new.json%0Ajson/data.xml®ex=.*new.*) +provides a test bed using the example files and the last expression from the list. + + +## Parameter + +### File matching regex + +The regex for filtering the file names. The regex needs to match the full path (i.e. from beginning to end, including sub-directories) in order for the file to be deleted. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_project_resources-UploadLocalFiles.md b/docs/build/reference/customtask/cmem_plugin_project_resources-UploadLocalFiles.md new file mode 100644 index 000000000..3e043182d --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_project_resources-UploadLocalFiles.md @@ -0,0 +1,56 @@ +--- +title: "Upload local files" +description: "Replace a file dataset resource with a local file or upload multiple local files to a project." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Upload local files + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This plugin allows you to upload multiple local files to the next workflow task. + +Be aware that only file based datasets can handle file entities (e.g. JSON, CSV). + +As an advanced option, you can change the working mode to UPLOAD_TO_PROJECT, which +allows for blindly adding files to the project space (with a consuming workflow task). +Make sure to use always use the preview function to avoid overloading you project. + + +## Parameter + +### Directory + +The local directory where the files are located. + +- Datatype: `string` +- Default Value: `None` + + + +### File matching regex + +The regex for filtering the file names. The regex needs to fully match the local name without directory. + +- Datatype: `string` +- Default Value: `.*` + + + +### Working mode + +Which activity should be done with the selected local files. + +- Datatype: `enumeration` +- Default Value: `SEND_TO_TASK` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_salesforce-SoqlQuery.md b/docs/build/reference/customtask/cmem_plugin_salesforce-SoqlQuery.md new file mode 100644 index 000000000..2c781a8bb --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_salesforce-SoqlQuery.md @@ -0,0 +1,96 @@ +--- +title: "SOQL query (Salesforce)" +description: "Executes a custom Salesforce Object Query (SOQL) to return sets of data your organization’s Salesforce account." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# SOQL query (Salesforce) + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This task executes a custom Salesforce Object Query (SOQL) +and returns sets of tabular data from your organization’s Salesforce account. + +> Use the Salesforce Object Query Language (SOQL) to search your organization’s +> Salesforce data for specific information. SOQL is similar to the SELECT statement in +> the widely used Structured Query Language (SQL) but is designed specifically for +> Salesforce data. +-- [developer.salesforce.com](https://developer.salesforce.com/docs/atlas.en-us.soql_sosl.meta/soql_sosl/sforce_api_calls_soql.htm) + +SOQL uses the SELECT statement combined with filtering statements to return sets of +data, which can optionally be ordered. For a complete description of the syntax, see +[Salesforce SOQL SELECT Syntax](https://developer.salesforce.com/docs/atlas.en-us.soql_sosl.meta/soql_sosl/sforce_api_calls_soql_select.htm). + +In the Advanced Options section, you can enable / disable the validation of your +SOQL Query. By default, this Parse SOQL option is set `True` (enabled). + +Examples: + +Retrieve all standard fields from all Lead resources. (without parser validation) +``` +SELECT FIELDS(STANDARD) FROM Lead +``` +Retrieve first name and last name of all Contact resources. (with parser validation) +``` +SELECT Contact.Firstname, Contact.Lastname FROM Contact +``` + +Please refer to the [Salesforce Standard Objects list](https://developer.salesforce.com/docs/atlas.en-us.238.0.object_reference.meta/object_reference/sforce_api_objects_list.htm) of the Salesforce Platform data +model in order to get an overview of the available objects and fields. + + +## Parameter + +### Username + +Username of the Salesforce Account. This is typically your email address. + +- Datatype: `string` +- Default Value: `None` + + + +### Password + + + +- Datatype: `string` +- Default Value: `None` + + + +### Security Token + +In addition to your standard account credentials, you need to provide a security token to access your data. Refer to the [Salesforce Reset Token Documentation](https://help.salesforce.com/s/articleView?id=sf.user_security_token.htm&type=5) to learn how to retrieve or reset your token. + +- Datatype: `string` +- Default Value: `None` + + + +### SOQL Query + +The query text of your SOQL query. SOQL uses the SELECT statement combined with filtering statements to return sets of data, which can optionally be ordered. For a complete description of the syntax, see [Salesforce SOQL SELECT Syntax](https://developer.salesforce.com/docs/atlas.en-us.soql_sosl.meta/soql_sosl/sforce_api_calls_soql_select.htm). + +- Datatype: `multiline string` +- Default Value: `None` + + + +### Dataset + +In addition to have direct output of the fetched entities of your SOQL query, you can directly write the response to a JSON dataset (mostly for debugging purpose). + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_salesforce-workflow-operations-SobjectCreate.md b/docs/build/reference/customtask/cmem_plugin_salesforce-workflow-operations-SobjectCreate.md new file mode 100644 index 000000000..04f3fc098 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_salesforce-workflow-operations-SobjectCreate.md @@ -0,0 +1,84 @@ +--- +title: "Create/Update Salesforce Objects" +description: "Manipulate data in your organization’s Salesforce account." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Create/Update Salesforce Objects + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This task retrieves data from an incoming workflow task (such as a SPARQL query), +and sends bulk API requests to the Salesforce Object API, in order to +manipulate data in your organization’s Salesforce account. + +The working model is: +- Each entity from the input data is interpreted as a single Salesforce object of the +configured object type. +- Each path from the input entity is interpreted as a field from the Salesforce +data model (refer to the [Salesforce Standard Objects list](https://developer.salesforce.com/docs/atlas.en-us.238.0.object_reference.meta/object_reference/sforce_api_objects_list.htm)). +- The special path `id` is used to identify an object in Salesforce and switch +between update/creation mode, means: + - If there is NO id path available, a new object is created. + - If there IS an id path available, an update is done if the object exists. + +Example: +- You want to create new Lead objects based on data from a Knowledge Graph. +- The [Lead Object Reference](https://developer.salesforce.com/docs/atlas.en-us.238.0.object_reference.meta/object_reference/sforce_api_objects_lead.htm) lists the supported fields, e.g. `FirstName`, +`LastName` and `Email`. +- Your input SPARQL task looks like this. Note that the variables need +to match the field strings from the Salesforce data model: +``` +SELECT DISTINCT FirstName, LastName, Email ... +``` +- You select `Lead` as the Object API Name of this task and you connect both task in +the workflow in order get the result of the SPARQL task as in input for this task. +- For each SPARQL result, a new Lead is created. + + +## Parameter + +### Username + +Username of the Salesforce Account. This is typically your email address. + +- Datatype: `string` +- Default Value: `None` + + + +### Password + + + +- Datatype: `string` +- Default Value: `None` + + + +### Security Token + +In addition to your standard account credentials, you need to provide a security token to access your data. Refer to the [Salesforce Reset Token Documentation](https://help.salesforce.com/s/articleView?id=sf.user_security_token.htm&type=5) to learn how to retrieve or reset your token. + +- Datatype: `string` +- Default Value: `None` + + + +### Object API Name + +Salesforce Object API Name + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_semspect-task-Update.md b/docs/build/reference/customtask/cmem_plugin_semspect-task-Update.md new file mode 100644 index 000000000..1cc2aa4d6 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_semspect-task-Update.md @@ -0,0 +1,93 @@ +--- +title: "Update SemSpect" +description: "Tell SemSpect to prepare a Knowledge Graph for visualization." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Update SemSpect + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +Tell SemSpect to prepare a Knowledge Graph for visualization. + +## Parameter + +### The URL of the SemSpect application.This needs to be accessible from 'within' DataIntegration. + + + +- Datatype: `string` +- Default Value: `http://semspect:8080/semspect/` + + + +### The SemSpect database ID. Not existing databases will be created. + + + +- Datatype: `string` +- Default Value: `cmem` + + + +### Knowledge Graph + + + +- Datatype: `string` +- Default Value: `None` + + + +### The URL of the DataPlatform application.This needs to be accessible from 'within' SemSpect. + + + +- Datatype: `string` +- Default Value: `None` + + + +### Timeout (in seconds) for the overall indexing activity. + + + +- Datatype: `Long` +- Default Value: `300` + + + +### Timeout (in seconds) for individual Semspect requests + + + +- Datatype: `Long` +- Default Value: `10` + + + +### ignore_proxy: Ignore system settings for HTTP proxies for the requests to semspect. + + + +- Datatype: `boolean` +- Default Value: `false` + + + +### verify_ssl: If disabled, the plugin will accept any TLS certificate presented by the server and will ignore hostname mismatches and/or expired certificates, which will make the requests vulnerable to man-in-the-middle (MitM) attacks. (use for testing only) + + + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_shapes-plugin_shapes-ShapesPlugin.md b/docs/build/reference/customtask/cmem_plugin_shapes-plugin_shapes-ShapesPlugin.md new file mode 100644 index 000000000..40187de1e --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_shapes-plugin_shapes-ShapesPlugin.md @@ -0,0 +1,140 @@ +--- +title: "Generate SHACL shapes from data" +description: "Generate SHACL node and property shapes from a data graph" +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Generate SHACL shapes from data + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +A task to generate SHACL node and property shapes from an instance data knowledge graph. + +## Parameters + +**Input data graph** + +The knowledge graph containing the instance data to be analyzed for the SHACL shapes generation. + +**Output Shape Catalog** + +The knowledge graph, the generated shapes will be added to. + +**Output shape catalog label** + +The label for the shape catalog graph. If no label is specified for a new shapes graph, a label will be generated. If +no label is specified when adding to a shapes graph, the original label will be kept, or, if the existing graph does not have +a label, a label will be generated. Only labels with language tag "en" or without language tag are considered. + +**Handle existing output graph** + +Add result to the existing graph (add result to graph), overwrite the existing graph with the result (replace existing +graph with result), or stop the workflow if the output graph already exists (stop workflow if output graph exists). + +**Import the output graph into the central shapes catalog** + +Import the SHACL shapes graph in the CMEM shapes catalog by adding an `owl:imports` statement to the central CMEM shapes catalog. +If the graph is not imported, the new shapes are not activated and used. + +**Fetch namespace prefixes from prefix.cc** + +Fetch the list of namespace prefixes from https://prefix.cc instead of using the local prefix database. If unavailable, +fall back to the local database. Prefixes defined in the Corporate Memory project override database prefixes. Enabling this +option exposes your IP address to prefix.cc but no other data is shared. If unsure, keep this option disabled. See +https://prefix.cc/about. + +**Properties to ignore** + +Provide the list of properties (as IRIs) for which you do not want to create property shapes. +Example: +``` +http://www.w3.org/1999/02/22-rdf-syntax-ns#type +http://xmlns.com/foaf/0.1/familyName +``` + +**Include plugin provenance** + +Add information about the plugin and plugin settings to the shapes graph. + + +## Parameter + +### Input data graph + +The knowledge graph containing the instance data to be analyzed for the SHACL shapes generation. + +- Datatype: `string` +- Default Value: `None` + + + +### Output shape catalog + +The knowledge graph the generated shapes will be added to. + +- Datatype: `string` +- Default Value: `None` + + + +### Output shape catalog label + +The label for the shape catalog graph. If no label is specified for a new shapes graph, a label will be generated. If no label is specified when adding to a shapes graph, the original label will be kept, or, if the existing graph does not have a label, a label will be generated. Only labels with language tag "en" or without language tag are considered. + +- Datatype: `string` +- Default Value: `None` + + + +### Handle existing output graph + +Add result to the existing graph (add result to graph), overwrite the existing graph with the result (replace existing graph with result), or stop the workflow if the output graph already exists (stop workflow if output graph exists). + +- Datatype: `string` +- Default Value: `stop` + + + +### Import the output graph into the central shapes catalog + +Import the SHACL shapes graph in the CMEM shapes catalog by adding an `owl:imports` statement to the central CMEM shapes catalog. If the graph is not imported, the new shapes are not activated and used. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Fetch namespace prefixes from prefix.cc + +Fetch the list of namespace prefixes from https://prefix.cc instead of using the local prefix database. If unavailable, fall back to the local database. Prefixes defined in the Corporate Memory project override database prefixes. Enabling this option exposes your IP address to prefix.cc but no other data is shared. If unsure, keep this option disabled. See https://prefix.cc/about. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Properties to ignore + +Provide the list of properties (as IRIs) to ignore. + +- Datatype: `multiline string` +- Default Value: `http://www.w3.org/1999/02/22-rdf-syntax-ns#type` + + + +### Include plugin provenance + +Add information about the plugin and plugin settings to the shapes graph. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_splitfile-plugin_splitfile-SplitFilePlugin.md b/docs/build/reference/customtask/cmem_plugin_splitfile-plugin_splitfile-SplitFilePlugin.md new file mode 100644 index 000000000..7718418cc --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_splitfile-plugin_splitfile-SplitFilePlugin.md @@ -0,0 +1,120 @@ +--- +title: "Split file" +description: "Split a file into multiple parts with a specified size." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Split file + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +A task splitting a text file into multiple parts with a specified size. + +## Options + +### Input filename + +The input file to be split. +_Example:_ An input file with the name _input.nt_ will be split into files with the names _input\_000000001.nt_, +_input\_000000002.nt_, _input\_000000003.nt_, etc. +⚠️ Existing files will be overwritten! + +### Chunk size + +The maximum size of the chunk files. + +### Size unit + +The unit of the size value: kilobyte (KB), megabyte (MB), gigabyte (GB), or number of lines (Lines). + +### Delete input file + +Delete the input file after splitting. + +### Include header + +Include the header in each split. The first line of the input file is treated as the header. + +### Use internal projects directory + +Use the internal projects directory of DataItegration to fetch and store files, instead of using the API. +If enabled, the "Internal projects directory" parameter has to be set. + +### Internal projects directory + +The path to the internal projects directory. If "Use internal projects directory" is disabled, +this parameter has no effect. + + +## Parameter + +### Input filename + +The input file to be split. + +- Datatype: `string` +- Default Value: `None` + + + +### Chunk size + +The maximum size of the chunk files. + +- Datatype: `double` +- Default Value: `None` + + + +### Size unit + +The unit of the size value: kilobyte (KB), megabyte (MB), gigabyte (GB), or number of lines (Lines). + +- Datatype: `string` +- Default Value: `MB` + + + +### Include header + +Include the header in each split. The first line of the input file is treated as the header. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Delete input file + +Delete the input file after splitting. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Use internal projects directory + +Use the internal projects directory of DataIntegration to fetch and store files, instead of using the API. If enabled, the "Internal projects directory" parameter has to be set. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Internal projects directory + +The path to the internal projects directory. If "Use internal projects directory" is disabled, this parameter has no effect. + +- Datatype: `string` +- Default Value: `/data/datalake` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_ssh-Download.md b/docs/build/reference/customtask/cmem_plugin_ssh-Download.md new file mode 100644 index 000000000..792f680ba --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_ssh-Download.md @@ -0,0 +1,148 @@ +--- +title: "Download SSH files" +description: "Download files from a given SSH instance" +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Download SSH files + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This workflow task downloads files from a specified SSH instance. + +By providing the hostname, username, port and authentication method, you can specify the +folder from which the data should be extracted. + +You can also define a regular expression to include or exclude specific files. + +There is also an option to prevent files in subfolders from being included. + +#### Authentication Methods: +* **Password:** Only the password will be used for authentication. The private key field is +ignored, even if filled. +* **Key:** The private key will be used for authentication. If the key is encrypted, the password +will be used to decrypt it. + +#### Error handling modes: +* **Ignore:** Ignores the permission rights of files and lists downloads all files it has access to. +Skips folders and files when there is no correct permission. +* **Warning:** Warns the user about files that the user has no permission rights to. Downloads +all other files and skips files folder when there is no correct permission. +* **Error:** Throws an error when there is a single file or folder with incorrect permission rights. + +#### Note: +* If a connection cannot be established within 20 seconds, a timeout occurs. +* Currently supported key types are: RSA, DSS, ECDSA, Ed25519. +* Setting the maximum amount of workers to more than 1 may cause a Channel Exception when +the amount of files is too large + + +## Parameter + +### Hostname + +Hostname to connect to. Usually in the form of an IP address + +- Datatype: `string` +- Default Value: `None` + + + +### Port + +The port on which the connection will be tried on. Default is 22. + +- Datatype: `Long` +- Default Value: `22` + + + +### Username + +The username of which a connection will be instantiated. + +- Datatype: `string` +- Default Value: `None` + + + +### Authentication method + +The method that is used to connect to the SSH server. + +- Datatype: `string` +- Default Value: `password` + + + +### Private key + +Your private key to connect via SSH. + +- Datatype: `password` +- Default Value: `None` + + + +### Password + +Depending on your authentication method this will either be used toconnect via password to SSH or is used to decrypt the SSH private key + +- Datatype: `password` +- Default Value: `None` + + + +### Path + +The currently selected path withing your SSH instance. + +- Datatype: `string` +- Default Value: `None` + + + +### Error handling for missing permissions. + +A choice on how to handle errors concerning the permissions rights.When choosing 'ignore' all files get skipped if the current user has correct permission rights.When choosing 'warning' all files get downloaded however there will be a mention that some of the files are not under the users permissionsif there are any and these get skipped.When choosing 'error' the files will not get downloaded if thereis even a single file the user has no access to. + +- Datatype: `string` +- Default Value: `error` + + + +### No subfolder + +When this flag is set, only files from the current directory will be downloaded. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Regular expression + +A regular expression used to define which files will get downloaded. + +- Datatype: `string` +- Default Value: `^.*$` + + + +### Maximum amount of workers. + +Determines the amount of workers used for concurrent thread execution of the task. Default is 1, maximum is 32. Note that too many workers can cause a ChannelException. + +- Datatype: `Long` +- Default Value: `1` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_ssh-Execute.md b/docs/build/reference/customtask/cmem_plugin_ssh-Execute.md new file mode 100644 index 000000000..24a0b4aa3 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_ssh-Execute.md @@ -0,0 +1,149 @@ +--- +title: "Execute commands via SSH" +description: "Execute commands on a given SSH instance." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Execute commands via SSH + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This workflow task executes commands on a given SSH instance. + +By providing the hostname, username, port and authentication method, you can specify the +folder in which the command should be executed in. + +#### Input Methods: +* **No input:** The command will be executed with no input attached to the plugin. Stdin +is non-existent in this case. +* **File input:** The command will be executed with the stdin being represented by the +files that are connected via the input port of the plugin. This also allows for looping +over multiple files executing the same command over them. + + +#### Output Methods: +* **Structured process output:** The output will produce entities with its own schema including +the stdout and stderr as well as the exit code to confirm the execution of the command. +* **File output:** The stdout will be converted into a file a be provided for further use. +* **No output:** The output port will be closed. + +#### Authentication Methods: +* **Password:** Only the password will be used for authentication. The private key field is +ignored, even if filled. +* **Key:** The private key will be used for authentication. If the key is encrypted, the password +will be used to decrypt it. + +#### Note: +* If a connection cannot be established within 20 seconds, a timeout occurs. +* Currently supported key types are: RSA, DSS, ECDSA, Ed25519. + + +## Parameter + +### Hostname + +Hostname to connect to. Usually in the form of an IP address + +- Datatype: `string` +- Default Value: `None` + + + +### Port + +The port on which the connection will be tried on. Default is 22. + +- Datatype: `Long` +- Default Value: `22` + + + +### Username + +The username of which a connection will be instantiated. + +- Datatype: `string` +- Default Value: `None` + + + +### Authentication method + +The method that is used to connect to the SSH server. + +- Datatype: `string` +- Default Value: `password` + + + +### Private key + +Your private key to connect via SSH. + +- Datatype: `password` +- Default Value: `None` + + + +### Password + +Depending on your authentication method this will either be used toconnect via password to SSH or is used to decrypt the SSH private key + +- Datatype: `password` +- Default Value: `None` + + + +### Path + +The currently selected path withing your SSH instance. + +- Datatype: `string` +- Default Value: `None` + + + +### Input method + +Parameter to decide weather files will be used as stdin or no input is needed. If 'File input' is chosen, the input port will open for all entities withthe FileEntitySchema. + +- Datatype: `string` +- Default Value: `None` + + + +### Output method + +Parameter to decide which type of output the user wants. This can be either no output, a structured process output with its own schema or a file based output + +- Datatype: `string` +- Default Value: `None` + + + +### Command + +The command that will be executed on the SSH instance. When the inputmethod is set to 'File input', the command will be executed over these files. + +- Datatype: `string` +- Default Value: `ls` + + + +### Timeout + +A timeout for the executed command. + +- Datatype: `Long` +- Default Value: `0` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_ssh-List.md b/docs/build/reference/customtask/cmem_plugin_ssh-List.md new file mode 100644 index 000000000..f272b63c8 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_ssh-List.md @@ -0,0 +1,148 @@ +--- +title: "List SSH files" +description: "List files from a given SSH instance." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# List SSH files + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This workflow task generates structured output from a specified SSH instance. + +By providing the hostname, username, port and authentication method, you can specify the +folder from which the data should be extracted. + +You can also define a regular expression to include or exclude specific files. + +There is also an option to prevent files in subfolders from being included. + +#### Authentication Methods: +* **Password:** Only the password will be used for authentication. The private key field is +ignored, even if filled. +* **Key:** The private key will be used for authentication. If the key is encrypted, the password +will be used to decrypt it. + +#### Error handling modes: +* **Ignore:** Ignores the permission rights of files and lists them all. Skips folders when there +is no correct permission. +* **Warning:** Warns the user about files that the user has no permission rights to. Lists all files +and skips folder when there is no correct permission. +* **Error:** Throws an error when there is a single file or folder with incorrect permission rights. + +#### Note: +* If a connection cannot be established within 20 seconds, a timeout occurs. +* Currently supported key types are: RSA, DSS, ECDSA, Ed25519. +* Setting the maximum amount of workers to more than 1 may cause a Channel Exception when +the amount of files is too large + + +## Parameter + +### Hostname + +Hostname to connect to.Usually in the form of an IP address + +- Datatype: `string` +- Default Value: `None` + + + +### Port + +The port on which the connection will be tried on. Default is 22. + +- Datatype: `Long` +- Default Value: `22` + + + +### Username + +The username of which a connection will be instantiated. + +- Datatype: `string` +- Default Value: `None` + + + +### Authentication method + +The method that is used to connect to the SSH server. + +- Datatype: `string` +- Default Value: `password` + + + +### Private key + +Your private key to connect via SSH. + +- Datatype: `password` +- Default Value: `None` + + + +### Password + +Depending on your authentication method this will either be used toconnect via password to SSH or is used to decrypt the SSH private key + +- Datatype: `password` +- Default Value: `None` + + + +### Path + +The currently selected path withing your SSH instance. + +- Datatype: `string` +- Default Value: `None` + + + +### Error handling for missing permissions. + +A choice on how to handle errors concerning the permissions rights.When choosing 'ignore' all files get listed regardless if the current user has correct permission rightsWhen choosing 'warning' all files get listed however there will be a mention that some of the files are not under the users permissionsif there are anyWhen choosing 'error' the files will not get listed if therethere are files the user has no access to. + +- Datatype: `string` +- Default Value: `error` + + + +### No subfolder + +When this flag is set, only files from the current directory will be listed. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Regular expression + +A regular expression used to define which files will get listed. + +- Datatype: `string` +- Default Value: `^.*$` + + + +### Maximum amount of workers. + +Determines the amount of workers used for concurrent thread execution of the task. Default is 1, maximum is 32. Note that too many workers can cause a ChannelException. + +- Datatype: `Long` +- Default Value: `1` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_ssh-Upload.md b/docs/build/reference/customtask/cmem_plugin_ssh-Upload.md new file mode 100644 index 000000000..933e7f20a --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_ssh-Upload.md @@ -0,0 +1,99 @@ +--- +title: "Upload SSH files" +description: "Upload files to a given SSH instance." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Upload SSH files + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This workflow task uploads files to a given SSH instance. + +By providing the hostname, username, port and authentication method, you can specify the +folder the data should be uploaded to. + +#### Authentication Methods: +* **Password:** Only the password will be used for authentication. The private key field is +ignored, even if filled. +* **Key:** The private key will be used for authentication. If the key is encrypted, the password +will be used to decrypt it. + +#### Note: +* If a connection cannot be established within 20 seconds, a timeout occurs. +* Currently supported key types are: RSA, DSS, ECDSA, Ed25519. + + +## Parameter + +### Hostname + +Hostname to connect to. Usually in the form of an IP address + +- Datatype: `string` +- Default Value: `None` + + + +### Port + +The port on which the connection will be tried on. Default is 22. + +- Datatype: `Long` +- Default Value: `22` + + + +### Username + +The username of which a connection will be instantiated. + +- Datatype: `string` +- Default Value: `None` + + + +### Authentication method + +The method that is used to connect to the SSH server. + +- Datatype: `string` +- Default Value: `password` + + + +### Private key + +Your private key to connect via SSH. + +- Datatype: `password` +- Default Value: `None` + + + +### Password + +Depending on your authentication method this will either be used toconnect via password to SSH or is used to decrypt the SSH private key + +- Datatype: `password` +- Default Value: `None` + + + +### Path + +The currently selected path withing your SSH instance. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_validation-validate-ValidateEntities.md b/docs/build/reference/customtask/cmem_plugin_validation-validate-ValidateEntities.md new file mode 100644 index 000000000..247389dc0 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_validation-validate-ValidateEntities.md @@ -0,0 +1,105 @@ +--- +title: "Validate Entities" +description: "Use a JSON schema to validate entities or a JSON dataset." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Validate Entities + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +[JSON Schema](https://json-schema.org/) specifies a JSON-based format to +define the structure of JSON data for validation, documentation, and interaction control. +It provides a contract for the JSON data required by a given application. + +This workflow task can validate incoming entities or a stand-alone JSON dataset by using a +JSON Schema specification. + +The used JSON Schema needs to be provided as a JSON Dataset in the project. + +### Input Modes +The plugin supports two input modes for validation: +1. **Validate Entities**: Validates entities received from the input port in the workflow. +2. **Validate JSON Dataset**: Validates a JSON dataset stored in the project. + - If the JSON dataset is a JSON array, the schema will validate each object inside the array. + - If the JSON dataset is a JSON object, it will be validated against the schema directly. + +Validated data objects can be sent to an output port for further processing in the workflow +or saved in a JSON dataset in the project. + +### Output Modes +1. **Valid JSON objects sent to Output Port**: Valid JSON objects can be sent as entities to the output port. +2. **Saved in JSON Dataset**: Valid JSON objects can be stored in a specified JSON dataset in the project. + +### Error Handling +The task can either: +- Fail instantly if there is a data violation, halting the workflow. +- Provide warnings in the workflow report, allowing follow-up tasks to run based on the validated data. + +The error handling behavior is configurable through the `Fail on violations` parameter. + + + +## Parameter + +### Source / Input Mode + + + +- Datatype: `string` +- Default Value: `entities` + + + +### Target / Output Mode + + + +- Datatype: `string` +- Default Value: `entities` + + + +### JSON Schema Dataset + +This dataset holds the JSON schema to use for validation. + +- Datatype: `string` +- Default Value: `None` + + + +### Fail on violations + +If enabled, the task will fail on the first data violation. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Source JSON Dataset + +This dataset holds the resources you want to validate. + +- Datatype: `string` +- Default Value: `None` + + + +### Target JSON Dataset + +This dataset will be used to store the valid JSON objects after validation. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_validation-validate-ValidateGraph.md b/docs/build/reference/customtask/cmem_plugin_validation-validate-ValidateGraph.md new file mode 100644 index 000000000..26c9ed435 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_validation-validate-ValidateGraph.md @@ -0,0 +1,90 @@ +--- +title: "Validate Knowledge Graph" +description: "Use SHACL shapes to validate resources in a Knowledge Graph." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Validate Knowledge Graph + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +Start a graph validation process which verifies, that resources in a specific graph are valid +according to the node shapes in a shape catalog graph. + + +## Parameter + +### Context Graph + +This graph holds the resources you want to validate. + +- Datatype: `string` +- Default Value: `None` + + + +### Shape graph + +This graph holds the shapes you want to use for validation. + +- Datatype: `string` +- Default Value: `https://vocab.eccenca.com/shacl/` + + + +### Result graph + +In this graph, the validation results are materialized. If left empty, results are not materialized. + +- Datatype: `string` +- Default Value: `None` + + + +### Clear result graph before validation + + + +- Datatype: `boolean` +- Default Value: `false` + + + +### Fail workflow on violations + + + +- Datatype: `boolean` +- Default Value: `false` + + + +### Output violations as entities + + + +- Datatype: `boolean` +- Default Value: `true` + + + +### Resource Selection Query + +The query to select the resources to validate. Use {{context_graph}} as a placeholder for the select context graph for validation. + +- Datatype: `code-sparql` +- Default Value: `SELECT DISTINCT ?resource +FROM <{{context_graph}}> +WHERE { ?resource a ?class . FILTER isIRI(?resource) } +` + + + diff --git a/docs/build/reference/customtask/cmem_plugin_yaml-parse.md b/docs/build/reference/customtask/cmem_plugin_yaml-parse.md new file mode 100644 index 000000000..7f05d3cd1 --- /dev/null +++ b/docs/build/reference/customtask/cmem_plugin_yaml-parse.md @@ -0,0 +1,84 @@ +--- +title: "Parse YAML" +description: "Parses files, source code or input values as YAML documents." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask + - PythonPlugin +--- +# Parse YAML + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +This workflow task is basically a yaml2json command. + +## Parameter + +### Source / Input Mode + + + +- Datatype: `string` +- Default Value: `code` + + + +### Target / Output Mode + + + +- Datatype: `string` +- Default Value: `entities` + + + +### YAML Source Code (when using the *code* input) + + + +- Datatype: `code-yaml` +- Default Value: `# Add your YAML code here (and select 'code' as input mode).` + + + +### YAML File (when using the *file* input) + +Which YAML file do you want to load into a JSON dataset? The dropdown shows file resources from the current project. + +- Datatype: `string` +- Default Value: `None` + + + +### Target Dataset + +Where do you want to save the result of the conversion? The dropdown shows JSON datasets from the current project. + +- Datatype: `string` +- Default Value: `None` + + + +### Input Schema Type / Class + +In case of source mode 'entities', you can specify the requested input type. + +- Datatype: `string` +- Default Value: `urn:x-eccenca:yaml-document` + + + +### Input Schema Path / Property + +In case of source mode 'entities', you can specify the requested input path. + +- Datatype: `string` +- Default Value: `text` + + + diff --git a/docs/build/reference/customtask/combine-csv.md b/docs/build/reference/customtask/combine-csv.md new file mode 100644 index 000000000..7721d4b68 --- /dev/null +++ b/docs/build/reference/customtask/combine-csv.md @@ -0,0 +1,62 @@ +--- +title: "Combine CSV files" +description: "Combine CSV files with the same structure to one dataset." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Combine CSV files + + + + +Combines CSV files with the same structure to one dataset. + Files are identified by specifying a regex filter. + +## Parameter + +### File name regex filter + +Regular expression for filtering resources of the project. + +- Datatype: `string` +- Default Value: `None` + + + +### Delimiter + +Delimiter in the input CSV files. + +- Datatype: `string` +- Default Value: `,` + + + +### Quotechar + +Quotechar in the input CSV files. + +- Datatype: `string` +- Default Value: `"` + + + +### Skip rows + +The number of rows to skip before the header row. + +- Datatype: `Long` +- Default Value: `0` + + + +### Stop workflow if result is empty + +Stop the workflow if no input files are found or all input files are empty. + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/customtask/deleteProjectFiles.md b/docs/build/reference/customtask/deleteProjectFiles.md new file mode 100644 index 000000000..2a5c4c77e --- /dev/null +++ b/docs/build/reference/customtask/deleteProjectFiles.md @@ -0,0 +1,56 @@ +--- +title: "Delete project files" +description: "Removes file resources from the project based on a regular expression." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Delete project files + + + + +Removes file resources from the project based on a regular expression (regex). + +The project-relative path of each file of the current project is tested against a user given regular expression and the file is deleted if the expression matches this name. The file names include the sub-directory structure if present but do not start with a `/`. The regular expression has to match the full path of the file and is case sensitive. + +Given this list of example files of a project: + +``` +dataset.csv +my-dataset.xml +json/example.json +json/example_new.json +json/data.xml +``` + +Here are some regular expressions with the expected result: + +- The regex `dataset\.csv` deletes only the first file. +- The regex `json/.*` deletes all files in the `json` sub-directory. +- The regex `new` deletes nothing. +- The regex `.*new.*` deletes the file `json/example_new.json` (and all other files with `new` in the path) + +We recommend testing your regular expression before using it. [regex101.com](https://regex101.com) is a nice service to test your regular expressions. [This deep-link](https://regex101.com/?testString=dataset.csv%0Amy-dataset.xml%0Ajson/example.json%0Ajson/example_new.json%0Ajson/data.xml®ex=.*new.*) provides a test bed using the example files and the last expression from the list. + + +## Parameter + +### File matching regex + +The regex for filtering the file names. The regex needs to match the full path (i.e. from beginning to end, including sub-directories) in order for the file to be deleted. + +- Datatype: `string` +- Default Value: `None` + + + +### Output deleted files + +If enabled the operator outputs entities, one entity for each deleted file, with the path of the file as attribute 'filePath'. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/customtask/downloadFile.md b/docs/build/reference/customtask/downloadFile.md new file mode 100644 index 000000000..4d7e90037 --- /dev/null +++ b/docs/build/reference/customtask/downloadFile.md @@ -0,0 +1,88 @@ +--- +title: "Download file" +description: "Downloads a file from a given URL." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Download file + + + + +Downloads a file from a given URL. + +## Parameter + +### URL + +The URL of the file to be downloaded. + +- Datatype: `string` +- Default Value: `None` + + + +### Accept + +The accept header String. + +- Datatype: `string` +- Default Value: `None` + + + +### Request timeout + +Request timeout in ms. The overall maximum time the request should take. + +- Datatype: `int` +- Default Value: `10000` + + + +### Connection timeout + +Connection timeout in ms. The time until which a connection with the remote end must be established. + +- Datatype: `int` +- Default Value: `5000` + + + +### Read timeout + +Read timeout in ms. The max. time a request stays idle, i.e. no data is send or received. + +- Datatype: `int` +- Default Value: `10000` + + + +### HTTP headers + +Configure additional HTTP headers. One header per line. Each header entry follows the curl syntax. + +- Datatype: `multiline string` +- Default Value: `None` + + + +### Authorization header + +The authorization header. This is usually either 'Authorization' or 'Proxy-Authorization'If left empty, no authorization header is sent. + +- Datatype: `string` +- Default Value: `None` + + + +### Authorization header value + +The authorization header value. Usually this has the form 'type secret', e.g. for OAuth 'bearer .'This config parameter will be encrypted in the backend. + +- Datatype: `password` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/eccencaDataPlatformGraphStoreFileUploadOperator.md b/docs/build/reference/customtask/eccencaDataPlatformGraphStoreFileUploadOperator.md new file mode 100644 index 000000000..c65096204 --- /dev/null +++ b/docs/build/reference/customtask/eccencaDataPlatformGraphStoreFileUploadOperator.md @@ -0,0 +1,43 @@ +--- +title: "Upload File to Knowledge Graph" +description: "Uploads an N-Triples or Turtle (limited support) file from the file repository to a 'Knowledge Graph' dataset. The output of this operatorcan be the input of datasets that support graph store file upload, e.g. 'Knowledge Graph'. The file will be uploaded to the graph specified in that dataset." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Upload File to Knowledge Graph + + + + +Uploads an N-Triples or Turtle (limited support) file from the file repository to a 'Knowledge Graph' dataset. The output of this operatorcan be the input of datasets that support graph store file upload, e.g. 'Knowledge Graph'. The file will be uploaded to the graph specified in that dataset. + +## Parameter + +### RDF resource + +RDF file (N-Triples or Turtle) from the resource repository that should be uploaded to the Knowledge Graph. + +- Datatype: `resource` +- Default Value: `None` + + + +### Max chunk size (MB) + +The N-Triples file will be split into multiple chunks if the file size exceeds the max chunk size. For Turtle files this parameter is ignored since no chunking is supported. + +- Datatype: `option[int]` +- Default Value: `None` + + + +### Content type + +The MIME type of the serialization format of the RDF file. + +- Datatype: `enumeration` +- Default Value: `application/n-triples` + + + diff --git a/docs/build/reference/customtask/eccencaRestOperator.md b/docs/build/reference/customtask/eccencaRestOperator.md new file mode 100644 index 000000000..f38cfb21f --- /dev/null +++ b/docs/build/reference/customtask/eccencaRestOperator.md @@ -0,0 +1,369 @@ +--- +title: "Execute REST requests" +description: "REST operator that fetches and optionally merges data from a REST endpoint. It supports executing multiple requests either via input entities that each overwrite config parameters or via paging. If you only need to download a single file, the "Download file" operator might be the better option. Most features are currently only supported for JSON REST APIs. From multiple requests the REST operator can produce a merged JSON result, i.e. for JSON it will concatenate all results in a JSON array. Alternatively multiple results can be written directly to file (of a JSON dataset), either as a merged JSON file or one file per request inside a ZIP file. By default the output of this operator is an entity with a single property 'result', which is the (concatenated) JSON string." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Execute REST requests + + + + +#### Core parameter overview + +- `URL`: The URL the request will be executed against. This value can be overwritten at execution time when the 'Read parameters from input' option + is enabled. This value will also be adapted when a paging approach is configured, see the paging section for more details. +- `Method`: One of the following HTTP methods: GET, POST, PUT, PATCH or DELETE. +- `Accept`: The ACCEPT header value for content negotiation, e.g. 'application/json'. +- `Content type`: The CONTENT-TYPE header value. This is usually used for POST, PUT or PATCH requests when the API endpoint + supports multiple different MIME types and/or requires a content MIME type to be set. E.g. 'application/json' +- `Content`: The text content of a POST, PUT or PATCH request. This value can be overwritten at execution time when the + 'Read parameters from input' option is enabled. + +#### Authorization + +If the request needs authorization following parameters should be set, else there is no authorization header sent. + +- `Authorization header`: The header that is used for authorization, usually either 'Authorization' or 'Proxy-Authorization'. +- `Authorization header value`: The secret value for the authorization, i.e. password or token. This value will be encrypted + and cannot be accessed in the user interface anymore after saving it. + E.g. for OAuth the value would have the following form: `bearer `. + +#### Sending multiple requests + +In the default configuration a single requests is sent. If multiple requests should be sent with different URLs and/or content, +the configurations for these requests must be defined via the input port of the REST operator. + +- Read parameters from input: + The 'URL' and 'Content' parameter values are read from entities that are input via the input + port of the operator. The property names are 'url' and 'content' and only overwrite the + original parameter value if defined. + + For each input entity a separate request will be sent. +- Limit: If set to a positive number, then only that number of input entities will be processed as requests. +- Offset: If set to a positive number, then that many input entities will be ignored before processing them as requests. + +If the option 'Read parameters from input' is enabled, it is currently always assumed that multiple requests will be sent. +The responses must either be of type JSON, then the results are merged into a JSON array, or the 'Output result as file' +option must be enabled in order to write a merged JSON or a ZIP file. See section 'Output options' for more details. + +#### Paging + +If the REST endpoint does not return all results in a single response, multiple requests (one per page) must usually be sent in order +to fetch all results. This is currently only supported for JSON requests. + +- `Paging method`: + There are two paging methods currently supported: + + 1. `Next page full URL`: The JSON response contains the full URL of the next page. This URL will be used for the subsequent request URL. + 2. `Next page identifier`: The JSON response contains the ID of the next page. This ID will be used as query parameter value for the subsequent request. + + In both cases the path to the next page value in the response JSON must be defined via the 'Next page JSON path' parameter. + In case of the 'Next page identifier' paging method, also the parameter 'Next page ID query parameter' must be set. +- `Next page JSON path`: The property path in the result JSON where the 'next page' URL/value is provided. + E.g. for following response structure, the value for this parameter would be `paging/next`: + + ``` + { + ..., + "paging": { + "next": "Next ID" + } + } + ``` +- `Next page ID query parameter`: If the paging method is 'Next page identifier', this defines the query parameter name that should + be attached to the original request URL in combination with the 'next page' value of the current response in order + to request the next page. + +#### Setting HTTP headers + +- `HTTP headers`: This parameter allows to set HTTP headers of the request being made. Each line of the multi-line value should contain a single header, e.g. + ``` + Accept-Language: en-US,en;q=0.5 + Cache-Control: max-age=0 + ``` + +#### Sending a multipart HTTP file request + +If the content of a POST request should be sent as file content of a multipart HTTP request, instead of the request body, +following parameter must be configured: + +- `Multi-part file parameter`: If set to a non-empty value then, instead of a normal POST request, a multipart/form-data + file upload request will be executed. + The value of this parameter is used as the form parameter name. + +#### Output options + +By default, the response body of a request is output as value of the 'result' property of a single output entity. +If the response body needs to be processed this can e.g. be achieved with the 'Parse JSON' operator. Alternatively +the response/s can be written to a file based dataset. Currently only text based datasets are supported. + +The results of multiple requests, see section 'Sending multiple requests' for details, can be written to +a single, merged file (only supported for JSON) or to a ZIP archive, i.e. a file resource that must end in '.zip'. +In the latter case an entry per request is added to the ZIP file. +Currently, the following datasets support the processing of ZIP files: JSON, XML, CSV and RDF file. + +- `Output result as file`: If enabled, instead of outputting a single entity, the result/s will be written directly + to the file of the file-based dataset that is connected to the output of this operator. + +If the option 'Read parameters from input' is enabled, it is currently always assumed that multiple requests will be sent. +The responses must either be JSON, then the results are merged into a JSON array or the 'Output result as file' +option must be enabled in order to write a merged JSON or ZIP file. + +#### Fine-tuning timeouts + +If requests can take a much longer time than what can usually be expected, it is possible to increase the timeouts to +control when a request should eventually fail. + +- `Request timeout`: The maximum overall time in milliseconds the request is allowed to take. Default: `10000`. +- `Connection timeout`: The maximum time in milliseconds the request is allowed to establish a connection to the server. Default: `5000`. +- `Read timeout`: The maximum time a request is allowed to stay idle, i.e. the time while it receives no data. Usually this + should be greater than the time span between the request being sent and the first data being received. Default: `10000` + +#### Throttling requests + +If a lot of requests are sent via the 'Read parameters from input' option, it can make sense to throttle the number +of requests sent in a specific time span. + +- `Delay between requests`: The delay between subsequent requests in milliseconds. Default: `0`. + +#### Error handling + +Following parameters can be tuned in order to decide when an execution should be considered as failed. + +- `Retries per request`: How often a single request configuration (URL, content) should be retried before considering this + request configuration as failed. Default: `3` +- `Abort when request fails`: When enabled, if a single request configuration eventually fails, i.e. it reaches its max. retry count, + the overall execution of the REST operator will fail. +- `Max failed requests`: If set to a value greater 0, the execution will abort if more than the given number of request configurations + have failed (reached max. retries). This can be used if a number of failed requests can be tolerated. + When 'Abort when request fails' is enabled, this option is ignored. + +#### Propagating the request URL + +If having the request URL in the response data is needed, following parameter needs to be configured: + +- `URL property`: If this parameter is non-empty the request URL will be added to the response JSON object. It will be added as value to + a property with the specified name in the root level of the response JSON object. + This is mostly relevant if the request URL cannot be re-constructed from the response data. Only supported for JSON responses. + + +## Parameter + +### URL + +The URL to execute this request against. This can be overwritten at execution time via input. + +- Datatype: `string` +- Default Value: `None` + + + +### Method + +One of the following HTTP methods: GET, POST, PUT, PATCH or DELETE. + +- Datatype: `enumeration` +- Default Value: `GET` + + + +### Accept + +The accept header String. + +- Datatype: `string` +- Default Value: `None` + + + +### Request timeout + +Request timeout in ms. The overall maximum time the request should take. + +- Datatype: `int` +- Default Value: `10000` + + + +### Connection timeout + +Connection timeout in ms. The time until which a connection with the remote end must be established. + +- Datatype: `int` +- Default Value: `5000` + + + +### Read timeout + +Read timeout in ms. The max. time a request stays idle, i.e. no data is send or received. + +- Datatype: `int` +- Default Value: `10000` + + + +### Content type + +The content-type header String. This can be set in case of PUT or POST. If another content type comes back, the task will fail. + +- Datatype: `string` +- Default Value: `None` + + + +### Content + +The content that is send with a POST, PUT or PATCH request. For handling this payload dynamically this parameter must be overwritten via the task input. + +- Datatype: `string` +- Default Value: `None` + + + +### HTTP headers + +Configure additional HTTP headers. One header per line. Each header entry follows the curl syntax. + +- Datatype: `multiline string` +- Default Value: `None` + + + +### Read parameters from input + +If this is set to true, specific parameters can be overwritten at execution time and one request per overwrite config will be executed. Else inputs are ignored and exactly one request will be executed. Parameters that can currently be overwritten: url, content + +- Datatype: `boolean` +- Default Value: `false` + + + +### Multi-part file parameter + +If set to a non-empty String then instead of a normal POST a multipart/form-data file upload request is executed. This value is used as the form parameter name. + +- Datatype: `string` +- Default Value: `None` + + + +### Authorization header + +The authorization header. This is usually either 'Authorization' or 'Proxy-Authorization'If left empty, no authorization header is sent. + +- Datatype: `string` +- Default Value: `None` + + + +### Authorization header value + +The authorization header value. Usually this has the form 'type secret', e.g. for OAuth 'bearer .'This config parameter will be encrypted in the backend. + +- Datatype: `password` +- Default Value: `None` + + + +### Delay between requests + +The delay between requests in milliseconds. + +- Datatype: `int` +- Default Value: `0` + + + +### Retries per request + +How often should a single request be retried if it fails. + +- Datatype: `int` +- Default Value: `3` + + + +### Abort when request fails + +If a single request fails, i.e. it reaches its max. retry count, should the execution then be aborted or the next requests be executed. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Limit + +If this is set to a number greater 0, then only this number of input REST configurations will be executed. Mainly used for debugging and executing a subset. + +- Datatype: `int` +- Default Value: `0` + + + +### Offset + +How many input entries to skip. + +- Datatype: `int` +- Default Value: `0` + + + +### Max failed requests + +If set to greater 0, then the execution will abort if more than the given number of requests have failed. This should be used to fail early. If 'abort on request fail' is set to true, then this option has no effect. + +- Datatype: `int` +- Default Value: `0` + + + +### Paging method + +There are two paging methods currently supported: 1. Next page full URL: The JSON response contains the full URL of the next page. This URL will be used for the subsequent request. 2. Next page identifier: The JSON response contains the ID of the next page. This ID will be used as query parameter for the subsequent request. In both cases the path to the next page value in the response JSON must be defined via the 'Next page JSON path' parameter. In case of the 'Identifier next page parameter' paging method, also the parameter 'Next page ID query parameter' must be set. + +- Datatype: `enumeration` +- Default Value: `none` + + + +### Next page JSON path + +The path to the JSON value containing the next page value of the JSON response, e.g. paging/next. The path syntax follows the Silk path syntax, but only allows forward paths. + +- Datatype: `string` +- Default Value: `None` + + + +### Next page ID query parameter + +The query parameter name for the next page ID that should be attached to the next page URI request. This is necessary for the 'Next page identifier' paging method. + +- Datatype: `string` +- Default Value: `None` + + + +### Output result as file + +If a file based dataset is connected to the output of the REST operator, then this option can be enabled in order to overwrite the file resource of the connected dataset. This allows for handling the result of the REST request/s as a normal dataset. If a non-file based dataset is connected to this operator the execution will fail. If disabled, a single entity with a single property 'result' will be output that contains the (merged) result. + +- Datatype: `boolean` +- Default Value: `false` + + + +### URL property + +If this is non-empty, a property is created in the root JSON object (if it exists) with the same name that has the request URL as value. This is mostly relevant if the request URL cannot be re-constructed from the response data. Only supported for JSON response data. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/getProjectFiles.md b/docs/build/reference/customtask/getProjectFiles.md new file mode 100644 index 000000000..fbc1fea8a --- /dev/null +++ b/docs/build/reference/customtask/getProjectFiles.md @@ -0,0 +1,34 @@ +--- +title: "Get project files" +description: "Get file resources from the project." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Get project files + + + + +Get file resources from the project. + +## Parameter + +### File name + +The path of the project file to retrieve. Leave empty if the file regex parameter should be used. + +- Datatype: `string` +- Default Value: `None` + + + +### Files regex + +Optional regular expression for retrieving files. The regex needs to match the full path (i.e. from beginning to end, including sub-directories). + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/index.md b/docs/build/reference/customtask/index.md new file mode 100644 index 000000000..e391b9265 --- /dev/null +++ b/docs/build/reference/customtask/index.md @@ -0,0 +1,81 @@ +--- +title: "Custom Workflow Tasks" +icon: octicons/cross-reference-24 +tags: + - Build + - Reference +--- +# Custom Workflow Tasks + + +A custom workflow task is an operator that can be used in a workflow. + +**:octicons-people-24: Intended audience:** Linked Data Experts and Domain Experts + +| Name | Description | +|------------------------:| :--------- | +|[Add project files](addProjectFiles.md) | Adds file resources to the project that are piped into the input port. | +|[Cancel Workflow](CancelWorkflow.md) | Cancels a workflow if a specified condition is fulfilled. | +|[Combine CSV files](combine-csv.md) | Combine CSV files with the same structure to one dataset. | +|[Concatenate to file](ConcatenateToFile.md) | Concatenates values into a file. | +|[Create Embeddings](cmem_plugin_llm-CreateEmbeddings.md) | Fetch and output LLM created embeddings from input entities. | +|[Create/Update Salesforce Objects](cmem_plugin_salesforce-workflow-operations-SobjectCreate.md) | Manipulate data in your organization’s Salesforce account. | +|[Delete project files](deleteProjectFiles.md) | Removes file resources from the project based on a regular expression. | +|[Distinct by](DistinctBy.md) | Removes duplicated entities based on a user-defined path. Note that this operator does not retain the order of the entities. | +|[Download file](downloadFile.md) | Downloads a file from a given URL. | +|[Download Nextcloud files](cmem_plugin_nextcloud-Download.md) | Download files from a given Nextcloud instance. | +|[Download Office 365 Files](cmem_plugin_office365-Download.md) | Download files from Microsoft OneDrive or Sites | +|[Download SSH files](cmem_plugin_ssh-Download.md) | Download files from a given SSH instance | +|[Evaluate template](Template.md) | Evaluates a template on a sequence of entities. Can be used after a transformation or directly after datasets that output a single table, such as CSV or Excel. | +|[Execute commands via SSH](cmem_plugin_ssh-Execute.md) | Execute commands on a given SSH instance. | +|[Execute Instructions](cmem_plugin_llm-ExecuteInstructions.md) | Send instructions (prompt) to an LLM and process the result. | +|[Execute REST requests](eccencaRestOperator.md) | REST operator that fetches and optionally merges data from a REST endpoint. It supports executing multiple requests either via input entities that each overwrite config parameters or via paging. If you only need to download a single file, the "Download file" operator might be the better option. Most features are currently only supported for JSON REST APIs. From multiple requests the REST operator can produce a merged JSON result, i.e. for JSON it will concatenate all results in a JSON array. Alternatively multiple results can be written directly to file (of a JSON dataset), either as a merged JSON file or one file per request inside a ZIP file. By default the output of this operator is an entity with a single property 'result', which is the (concatenated) JSON string. | +|[Execute Spark function](SparkFunction.md) | Applies a specified Scala function to a specified field. E.g. when the inputField is 'name', the inputFunction is 'any => "Arrrrgh!" and the alias is 'xxx',)' a query corresponding to 'Function existingField1, existingFiled2, ... "Arrrrgh!" as "xxx"' will be generated. If alias is empty the inputField will be overwritten, otherwise a new field will be added and the rest of the schema stays the same. | +|[Extract from PDF files](cmem_plugin_pdf_extract-pdf_extract-PdfExtract.md) | Extract text and tables from PDF files | +|[Generate base36 IRDIs](cmem_plugin_irdi-workflow-irdi_plugin-IrdiPlugin.md) | Create unique ECLASS IRDIs. | +|[Generate SHACL shapes from data](cmem_plugin_shapes-plugin_shapes-ShapesPlugin.md) | Generate SHACL node and property shapes from a data graph | +|[Get project files](getProjectFiles.md) | Get file resources from the project. | +|[GraphQL query](cmem_plugin_graphql-workflow-graphql-GraphQLPlugin.md) | Executes a custom GraphQL query to a GraphQL endpoint and saves result to a JSON dataset. | +|[Join tables](Merge.md) | Joins a set of inputs into a single table. Expects a list of entity tables and links. All entity tables are joined into the first entity table using the provided links. | +|[jq](cmem-plugin-jq-workflow.md) | Process a JSON document with a jq filter / program. | +|[JQL query](cmem_plugin_jira-JqlQuery.md) | Search and retrieve JIRA issues. | +|[Kafka Consumer (Receive Messages)](cmem_plugin_kafka-ReceiveMessages.md) | Reads messages from a Kafka topic and saves it to a messages dataset (Consumer). | +|[Kafka Producer (Send Messages)](cmem_plugin_kafka-SendMessages.md) | Reads a messages dataset and sends records to a Kafka topic (Producer). | +|[List Nextcloud files](cmem_plugin_nextcloud-List.md) | List directories and files from a given Nextcloud folder. | +|[List Office 365 Files](cmem_plugin_office365-List.md) | List files from OneDrive or Sites | +|[List project files](cmem_plugin_project_resources-List.md) | List file resources from the project. | +|[List SSH files](cmem_plugin_ssh-List.md) | List files from a given SSH instance. | +|[Merge tables](MultiTableMerge.md) | Stores sets of instance and mapping inputs as relational tables with the mapping as an n:m relation. Expects a list of entity tables and links. All entity tables have a relation to the first entity table using the provided links. | +|[Normalize units of measurement](ucumNormalizationTask.md) | Custom task that will substitute numeric values and pertaining unit symbols with a SI-system-unit normalized representation in three columns: * The normalized numeric value. * The unit symbol of the SI-system-unit pertaining to the value. * The origin unit symbol from which it was normalized (so we are able to reverse this action). | +|[OAuth2 Authentication](cmem_plugin_auth-workflow-auth-OAuth2.md) | Provide an OAuth2 access token for other tasks (via config port). | +|[Office 365 Upload Files](cmem_plugin_office365-Upload.md) | Upload files to OneDrive or a site Sharepoint | +|[Parse JSON](JsonParserOperator.md) | Parses an incoming entity as a JSON dataset. Typically, it is used before a transformation task. Takes exactly one input of which only the first entity is processed. | +|[Parse XML](XmlParserOperator.md) | Takes exactly one input and reads either the defined inputPath or the first value of the first entity as XML document. Then executes the given output entity schema similar to the XML dataset to construct the result entities. | +|[Parse YAML](cmem_plugin_yaml-parse.md) | Parses files, source code or input values as YAML documents. | +|[Pivot](Pivot.md) | The pivot operator takes data in separate rows, aggregates it and converts it into columns. The operator works on a flat input schema only and creates a flat output schema. A pivot table is a data summarization that is used to automatically sort, count, total, or average data in a dataset. It allows you to view the data from a different perspective. The following aggregation (summary) functions are available: - **first** - Shows the first value (works with numbers and strings) - **min** - Shows the lowest value (works with numbers and strings) - **max** - Shows the highest value (works with numbers and strings) - **sum** - Adds up the values (works with numbers only) - **average** - Finds the average of the values (works with numbers only) | +|[Request RDF triples](tripleRequestOperator.md) | A task that requests all triples from an RDF dataset. | +|[Scheduler](Scheduler.md) | Executes a workflow at specified intervals. | +|[Search addresses](SearchAddresses.md) | Looks up locations from textual descriptions using the configured geocoding API. Outputs results as RDF. | +|[Search Vector Embeddings](cmem_plugin_pgvector-Search.md) | Search for top-k metadata stored in Postgres Vector Store (PGVector). | +|[Send eMail](SendEMail.md) | Sends an eMail using an SMTP server. If connected to a dataset that is based on a file in a workflow, it will send that file whenever the workflow is executed It can be used to send the result of a workflow via Mail. | +|[Send Mattermost messages](cmem_plugin_mattermost.md) | Send messages to Mattermost channels and/or users. | +|[Set or Overwrite parameters](cmem_plugin_parameters-ParametersPlugin.md) | Connect this task to a config port of another task in order to set or overwrite the parameter values of this task. | +|[SHACL validation with pySHACL](shacl-pyshacl.md) | Performs SHACL validation with pySHACL. | +|[SOQL query (Salesforce)](cmem_plugin_salesforce-SoqlQuery.md) | Executes a custom Salesforce Object Query (SOQL) to return sets of data your organization’s Salesforce account. | +|[SPARQL Construct query](sparqlCopyOperator.md) | A task that executes a SPARQL Construct query on a SPARQL enabled data source and outputs the SPARQL result. If the result should be written to the same RDF store it is read from, the SPARQL Update operator is preferable. | +|[SPARQL Select query](sparqlSelectOperator.md) | A task that executes a SPARQL Select query on a SPARQL enabled data source and outputs the SPARQL result. If the SPARQL source is defined on a specific graph, a FROM clause will be added to the query at execution time, except when there already exists a GRAPH or FROM clause in the query. FROM NAMED clauses are not injected. | +|[SPARQL Update query](sparqlUpdateOperator.md) | A task that outputs SPARQL Update queries for every entity from the input based on a SPARQL Update template. The output of this operator should be connected to the SPARQL datasets to which the results should be written. In contrast to the SPARQL select operator, no FROM clause gets injected into the query. | +|[Split file](cmem_plugin_splitfile-plugin_splitfile-SplitFilePlugin.md) | Split a file into multiple parts with a specified size. | +|[SQL query](CustomSQLExecution.md) | Executes a custom SQL query on the first input dataset and returns the result as its output. | +|[Start Workflow per Entity](cmem_plugin_loopwf-task-StartWorkflow.md) | Loop over the output of a task and start a sub-workflow for each entity. | +|[Store Vector Embeddings](cmem_plugin_pgvector-Store.md) | Store embeddings into Postgres Vector Store (PGVector). | +|[Unpivot](Unpivot.md) | Given a list of table columns, transforms those columns into attribute-value pairs. | +|[Update SemSpect](cmem_plugin_semspect-task-Update.md) | Tell SemSpect to prepare a Knowledge Graph for visualization. | +|[Upload File to Knowledge Graph](eccencaDataPlatformGraphStoreFileUploadOperator.md) | Uploads an N-Triples or Turtle (limited support) file from the file repository to a 'Knowledge Graph' dataset. The output of this operatorcan be the input of datasets that support graph store file upload, e.g. 'Knowledge Graph'. The file will be uploaded to the graph specified in that dataset. | +|[Upload files to Nextcloud](cmem_plugin_nextcloud-Upload.md) | Upload files to a given Nextcloud instance. | +|[Upload local files](cmem_plugin_project_resources-UploadLocalFiles.md) | Replace a file dataset resource with a local file or upload multiple local files to a project. | +|[Upload SSH files](cmem_plugin_ssh-Upload.md) | Upload files to a given SSH instance. | +|[Validate Entities](cmem_plugin_validation-validate-ValidateEntities.md) | Use a JSON schema to validate entities or a JSON dataset. | +|[Validate Knowledge Graph](cmem_plugin_validation-validate-ValidateGraph.md) | Use SHACL shapes to validate resources in a Knowledge Graph. | +|[Validate XML](validateXsdOperator.md) | Validates an XML dataset against a provided XML schema (XSD) file. Any errors are written to the output. Can be used in conjunction with the `Cancel Workflow` operator in order to stop the workflow if errors have been found." | +|[XSLT](xsltOperator.md) | A task that converts an XML resource via an XSLT script and writes the transformed output into a file resource. | diff --git a/docs/build/reference/customtask/shacl-pyshacl.md b/docs/build/reference/customtask/shacl-pyshacl.md new file mode 100644 index 000000000..4393e772b --- /dev/null +++ b/docs/build/reference/customtask/shacl-pyshacl.md @@ -0,0 +1,196 @@ +--- +title: "SHACL validation with pySHACL" +description: "Performs SHACL validation with pySHACL." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# SHACL validation with pySHACL + + + + +Performs SHACL validation with [pySHACL](https://github.com/RDFLib/pySHACL). Select a __Data graph__ and a __SHACL shapes graph__ to get started. The plugin can output __Entities__, and/or produce a more detailed __Validation graph__ by specifying a __Validation graph URI__. Additional configuration parameters can be set to control the output generated by the plugin. Refer to each parameter description for details. + +## Parameter + +### Data graph URI + +The URI of the graph to be validated. The graph URI is selected from a list of graphs of types `void:Dataset`, `shui:ShapeCatalog`, `owl:Ontology` and `dsm:ThesaurusProject`. + +- Datatype: `string` +- Default Value: `None` + + + +### SHACL shapes graph URI + +The URI of the graph containing the SHACL shapes to be validated against. The graph URI is selected from a list of graphs of type `shui:ShapeCatalog`. + +- Datatype: `string` +- Default Value: `None` + + + +### Ontology graph URI + +The URI of a graph containing extra ontological information. RDFS and OWL definitions from this are used to inoculate the data graph. The graph URI is selected from a list of graphs of type `owl:Ontology`. + +- Datatype: `string` +- Default Value: `None` + + + +### Generate validation graph + +If enabled, the validation graph is posted to the CMEM instance with the graph URI specified with the `Validation graph URI` option. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Validation graph URI + +If the `Generate validation graph` option is enabled the validation graph is posted to the CMEM instance with this graph URI. + +- Datatype: `string` +- Default Value: `None` + + + +### Output entities + +If enabled, the plugin outputs the validation results as entities and can be connected to, for instance, a CSV dataset to produce a results table. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Clear validation graph + +If enabled, the validation graph is cleared before workflow execution. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Resolve owl:imports + +If enabled, the graph tree defined with owl:imports in the data graph is resolved. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Blank node skolemization + +If enabled, blank nodes in the validation graph are skolemized into URIs. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Add labels + +If enabled, `rdfs:label` triples are added to the validation graph for instances of `sh:ValidationReport` and `sh:ValidationResult`. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Add labels to focus nodes and values + +If enabled along with the `Add labels` option, `rdfs:label` triples are added for the focus nodes, values and SHACL shapes in the validation graph. The labels are taken from the specified data and SHACL graphs. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Add shui:conforms flag to focus node resources. + +If enabled, `shui:conforms false` triples are added to the focus nodes in the validation graph. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Meta-SHACL + +If enabled, the SHACL shapes graph is validated against the SHACL-SHACL shapes graph before validating the data graph. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Inference + +If enabled, OWL inferencing expansion of the data graph is performed before validation. Options are RDFS, OWLRL, Both, None. + +- Datatype: `string` +- Default Value: `none` + + + +### SHACL advanced features + +Enable SHACL advanced features. + +- Datatype: `boolean` +- Default Value: `false` + + + +### SHACL-JS features + +Enable SHACL-JS features. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Remove graph type http://rdfs.org/ns/void#Dataset from data graph + +Before validating, remove the triple ` a ` from the in-memory data graph. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Remove graph type https://vocab.eccenca.com/dsm/ThesaurusProject from data graph + +Before validating, remove the triple ` a ` from the in-memory data graph. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Remove graph type https://vocab.eccenca.com/shui/ShapeCatalog from data graph + +Before validating, remove the triple ` a ` from the in-memory data graph. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Specify a custom max-evaluation-depth + +specify a custom max-evaluation-depth. If you find yourself with a legitimate use case, and you are certain you need to increase this limit, and you are cetain you know what you are doing. + +- Datatype: `Long` +- Default Value: `15` + + + diff --git a/docs/build/reference/customtask/sparqlCopyOperator.md b/docs/build/reference/customtask/sparqlCopyOperator.md new file mode 100644 index 000000000..d65ef4794 --- /dev/null +++ b/docs/build/reference/customtask/sparqlCopyOperator.md @@ -0,0 +1,34 @@ +--- +title: "SPARQL Construct query" +description: "A task that executes a SPARQL Construct query on a SPARQL enabled data source and outputs the SPARQL result. If the result should be written to the same RDF store it is read from, the SPARQL Update operator is preferable." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# SPARQL Construct query + + + + +A task that executes a SPARQL Construct query on a SPARQL enabled data source and outputs the SPARQL result. If the result should be written to the same RDF store it is read from, the SPARQL Update operator is preferable. + +## Parameter + +### Construct query + +A SPARQL 1.1 construct query + +- Datatype: `code-sparql` +- Default Value: `None` + + + +### Use temporary file + +When copying directly to the same SPARQL Endpoint or when copying large amounts of triples, set to True by default + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/customtask/sparqlSelectOperator.md b/docs/build/reference/customtask/sparqlSelectOperator.md new file mode 100644 index 000000000..14d3816e0 --- /dev/null +++ b/docs/build/reference/customtask/sparqlSelectOperator.md @@ -0,0 +1,52 @@ +--- +title: "SPARQL Select query" +description: "A task that executes a SPARQL Select query on a SPARQL enabled data source and outputs the SPARQL result. If the SPARQL source is defined on a specific graph, a FROM clause will be added to the query at execution time, except when there already exists a GRAPH or FROM clause in the query. FROM NAMED clauses are not injected." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# SPARQL Select query + + + + +A task that executes a SPARQL Select query on a SPARQL enabled data source and outputs the SPARQL result. If the SPARQL source is defined on a specific graph, a FROM clause will be added to the query at execution time, except when there already exists a GRAPH or FROM clause in the query. FROM NAMED clauses are not injected. + +## Parameter + +### Select query + +A SPARQL 1.1 select query + +- Datatype: `code-sparql` +- Default Value: `None` + + + +### Result limit + +If set to a positive integer, the number of results is limited + +- Datatype: `string` +- Default Value: `None` + + + +### Optional SPARQL dataset + +An optional SPARQL dataset that can be used for example data, so e.g. the transformation editor shows mapping examples. + +- Datatype: `SPARQL endpoint` +- Default Value: `None` + + + +### SPARQL query timeout (ms) + +SPARQL query timeout (select/update) in milliseconds. A value of zero means that there is no timeout set explicitly. If a value greater zero is specified this overwrites possible default timeouts. + +- Datatype: `int` +- Default Value: `0` + + + diff --git a/docs/build/reference/customtask/sparqlUpdateOperator.md b/docs/build/reference/customtask/sparqlUpdateOperator.md new file mode 100644 index 000000000..cf2a1370d --- /dev/null +++ b/docs/build/reference/customtask/sparqlUpdateOperator.md @@ -0,0 +1,43 @@ +--- +title: "SPARQL Update query" +description: "A task that outputs SPARQL Update queries for every entity from the input based on a SPARQL Update template. The output of this operator should be connected to the SPARQL datasets to which the results should be written. In contrast to the SPARQL select operator, no FROM clause gets injected into the query." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# SPARQL Update query + + + + +A task that outputs SPARQL Update queries for every entity from the input based on a SPARQL Update template. The output of this operator should be connected to the SPARQL datasets to which the results should be written. In contrast to the SPARQL select operator, no FROM clause gets injected into the query. + +## Parameter + +### SPARQL update query + +This operator takes a SPARQL Update Query Template that depending on the templating mode (Simple/Velocity Engine) supports a set of templating features, e.g. filling in input values via placeholders in the template. Example for the 'Simple' mode: DELETE DATA { ${} rdf:label ${"PROP_FROM_ENTITY_SCHEMA2"} } INSERT DATA { ${} rdf:label ${"PROP_FROM_ENTITY_SCHEMA3"} } This will insert the URI serialization of the property value PROP_FROM_ENTITY_SCHEMA1 for the ${} expression. And it will insert a plain literal serialization for the property values PROP_FROM_ENTITY_SCHEMA2/3 for the template literal expressions. It is be possible to write something like ${"PROP"}^^ or ${"PROP"}@en. Example for the 'Velocity Engine' mode: DELETE DATA { $row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $row.plainLiteral("PROP_FROM_ENTITY_SCHEMA2") } #if ( $row.exists("PROP_FROM_ENTITY_SCHEMA1") ) INSERT DATA { $row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $row.plainLiteral("PROP_FROM_ENTITY_SCHEMA3") } #end Input values are accessible via various methods of the 'row' variable: - uri(inputPath: String): Renders an input value as URI. Throws exception if the value is no valid URI. - plainLiteral(inputPath: String): Renders an input value as plain literal, i.e. escapes problematic characters etc. - rawUnsafe(inputPath: String): Renders an input value as is, i.e. no escaping is done. This should only be used – better never – if the input values can be trusted. - exists(inputPath: String): Returns true if a value for the input path exists, else false. The methods uri, plainLiteral and rawUnsafe throw an exception if no input value is available for the given input path. In addition to input values, properties of the input and output tasks can be accessed via the inputProperties and outputProperties objects in the same way as the row object, e.g. $inputProperties.uri("graph") For more information about the Velocity Engine visit http://velocity.apache.org. + +- Datatype: `code-sparql` +- Default Value: `None` + + + +### Batch size + +How many entities should be handled in a single update request. + +- Datatype: `int` +- Default Value: `1` + + + +### Templating mode + +The templating mode. 'Simple' only allows simple URI and literal insertions, whereas 'Velocity Engine' supports complex templating. See 'Sparql Update Template' parameter description for examples and http://velocity.apache.org for details on the Velocity templates. + +- Datatype: `enumeration` +- Default Value: `simple` + + + diff --git a/docs/build/reference/customtask/tripleRequestOperator.md b/docs/build/reference/customtask/tripleRequestOperator.md new file mode 100644 index 000000000..f61d63564 --- /dev/null +++ b/docs/build/reference/customtask/tripleRequestOperator.md @@ -0,0 +1,17 @@ +--- +title: "Request RDF triples" +description: "A task that requests all triples from an RDF dataset." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Request RDF triples + + + + +A task that requests all triples from an RDF dataset. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/customtask/ucumNormalizationTask.md b/docs/build/reference/customtask/ucumNormalizationTask.md new file mode 100644 index 000000000..4ea97bb3c --- /dev/null +++ b/docs/build/reference/customtask/ucumNormalizationTask.md @@ -0,0 +1,93 @@ +--- +title: "Normalize units of measurement" +description: "Custom task that will substitute numeric values and pertaining unit symbols with a SI-system-unit normalized representation in three columns: * The normalized numeric value. * The unit symbol of the SI-system-unit pertaining to the value. * The origin unit symbol from which it was normalized (so we are able to reverse this action)." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Normalize units of measurement + + + + +Custom task that will substitute numeric values and pertaining unit symbols with a SI-system-unit normalized representation in three columns: * The normalized numeric value. * The unit symbol of the SI-system-unit pertaining to the value. * The origin unit symbol from which it was normalized (so we are able to reverse this action). + +## Parameter + +### Value properties + +The names (comma-separated) of columns containing numeric values interpreted as quantities of the dimension indicated by the pertaining unit. + +- Datatype: `string` +- Default Value: `None` + + + +### Unit property + +The names (comma-separated) of dedicated columns containing the unit symbol for the pertaining value in the value column (the positions in this list have to align with the pertaining value columns). Either this param or 'static unit' has to be set. + +- Datatype: `string` +- Default Value: `None` + + + +### Static units + +Unit symbols (comma-separated) defining the unit for all values in the pertaining value column. If set, the 'unitProperty' param will be ignored and all values of the value column have to be numbers without unit symbols (the positions in this list have to align with the pertaining value columns). + +- Datatype: `string` +- Default Value: `None` + + + +### Target units + +Unit symbols (comma-separated) defining the target unit to which the value column will be converted (Note: Make sure the input unit can be converted to the target unit). By default the pertaining SI-base unit will be used as normalization unit (the positions in this list have to align with the pertaining value columns) + +- Datatype: `string` +- Default Value: `None` + + + +### Suppress errors + +If true, will ignore any parsing or value conversion error and return an empty result (might happen because of unknown unit symbols or non-numbers as values). Beware, the value will be lost completely! + +- Datatype: `boolean` +- Default Value: `false` + + + +### Configuration file path + +An absolute file path for a unit CSV configuration file (for syntax see 'configuration' param). If set, the 'configuration' param will be ignored. + +- Datatype: `resource` +- Default Value: `None` + + + +### Configuration + +While all SI units and decimal prefixes are supported by default, custom or obsolete units have to be added via this configuration. NOTE: when constructing formulae depending on other units defined in the configuration, make sure to order them dependently. ALSO: Rational numbers are not supported by the UCUM syntax, express them as a fraction (see 'grain' example below). + +- Datatype: `multiline string` +- Default Value: ` +# Example configuration, don't forget to remove the '#' in front of each row. +# CSV COLUMNS: +# * unit name - the human readable name of the unit +# * override - (true|false) if true, any assigned unit to the given symbol will be dropped, else if the unit symbol is already in use, the new definition will be ignored +# * symbol - the main symbol used to depict the unit +# * equals formula - the formula to derive the given unit from already registered units +# * [all additional columns] - alternative symbols, will be registered for this unit +# Example CSV: +# unit name, override, symbol, equals formula +# Are , true , are , 100.m2 +# Denier , true , den , g/(9.km) +# Grain , true , gr , (45.g)/100 +# Pound , true , lb , (45359237.kg)/100000000 , # , lbm + ` + + + diff --git a/docs/build/reference/customtask/validateXsdOperator.md b/docs/build/reference/customtask/validateXsdOperator.md new file mode 100644 index 000000000..8bb6c429b --- /dev/null +++ b/docs/build/reference/customtask/validateXsdOperator.md @@ -0,0 +1,25 @@ +--- +title: "Validate XML" +description: "Validates an XML dataset against a provided XML schema (XSD) file. Any errors are written to the output. Can be used in conjunction with the `Cancel Workflow` operator in order to stop the workflow if errors have been found."" +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# Validate XML + + + + +Validates an XML dataset against a provided XML schema (XSD) file. Any errors are written to the output. Can be used in conjunction with the `Cancel Workflow` operator in order to stop the workflow if errors have been found." + +## Parameter + +### File + +The XSD file to be used for validating the XML. + +- Datatype: `resource` +- Default Value: `None` + + + diff --git a/docs/build/reference/customtask/xsltOperator.md b/docs/build/reference/customtask/xsltOperator.md new file mode 100644 index 000000000..238589aa7 --- /dev/null +++ b/docs/build/reference/customtask/xsltOperator.md @@ -0,0 +1,25 @@ +--- +title: "XSLT" +description: "A task that converts an XML resource via an XSLT script and writes the transformed output into a file resource." +icon: octicons/cross-reference-24 +tags: + - WorkflowTask +--- +# XSLT + + + + +A task that converts an XML resource via an XSLT script and writes the transformed output into a file resource. + +## Parameter + +### File + +The XSLT file to be used for transforming XML. + +- Datatype: `resource` +- Default Value: `None` + + + diff --git a/docs/build/reference/dataset/.pages b/docs/build/reference/dataset/.pages new file mode 100644 index 000000000..10aa6933c --- /dev/null +++ b/docs/build/reference/dataset/.pages @@ -0,0 +1,27 @@ +nav: + - index.md + - "Alignment": alignment.md + - "Avro": avro.md + - "Binary file": binaryFile.md + - "CSV": csv.md + - "Excel": excel.md + - "Excel (Google Drive)": googlespreadsheet.md + - "Excel (OneDrive, Office365)": office365preadsheet.md + - "Hive database": Hive.md + - "In-memory dataset": inMemory.md + - "Internal dataset": internal.md + - "Internal dataset (single graph)": LocalInternalDataset.md + - "JDBC endpoint": Jdbc.md + - "JSON": json.md + - "Knowledge Graph": eccencaDataPlatform.md + - "Multi CSV ZIP": multiCsv.md + - "Neo4j": neo4j.md + - "ORC": orc.md + - "Parquet": parquet.md + - "RDF": file.md + - "Snowflake JDBC endpoint": SnowflakeJdbc.md + - "SparkSQL view": sparkView.md + - "SPARQL endpoint": sparqlEndpoint.md + - "SQL endpoint": sqlEndpoint.md + - "Text": text.md + - "XML": xml.md \ No newline at end of file diff --git a/docs/build/reference/dataset/Hive.md b/docs/build/reference/dataset/Hive.md new file mode 100644 index 000000000..01a89577f --- /dev/null +++ b/docs/build/reference/dataset/Hive.md @@ -0,0 +1,70 @@ +--- +title: "Hive database" +description: "Read from or write to an embedded Apache Hive endpoint." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Hive database + + + + +Read from or write to an embedded Apache Hive endpoint. + +## Parameter + +### Schema + +Name of the hive schema or namespace. + +- Datatype: `string` +- Default Value: `None` + + + +### Table + +Name of the hive table. + +- Datatype: `string` +- Default Value: `None` + + + +### Query + +Optional query for projection and selection (e.g. " SELECT * FROM table WHERE x = true". + +- Datatype: `string` +- Default Value: `None` + + + +### Uri pattern + +A pattern used to construct the entity URI. If not provided the prefix + the line number is used. An example of such a pattern is 'urn:zyx:{id}' where *id* is a name of a property. + +- Datatype: `string` +- Default Value: `None` + + + +### Properties + +Comma-separated list of URL-encoded properties. If not provided, the list of properties is read from the first line. + +- Datatype: `string` +- Default Value: `None` + + + +### Charset + +The source internal encoding, e.g., UTF8, ISO-8859-1 + +- Datatype: `string` +- Default Value: `UTF-8` + + + diff --git a/docs/build/reference/dataset/Jdbc.md b/docs/build/reference/dataset/Jdbc.md new file mode 100644 index 000000000..31d9492c7 --- /dev/null +++ b/docs/build/reference/dataset/Jdbc.md @@ -0,0 +1,339 @@ +--- +title: "JDBC endpoint" +description: "Connect to an existing JDBC endpoint." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# JDBC endpoint + + + + +_General usage_ + +The JDBC dataset supports connections to Hive, Microsoft SQL Server, MySQL, MariaDB, SnowFlake, Oracle Database, DB2 and PostgreSQL databases. +A login, password and JDBC URL need to be provided. +This dataset supports queries or simply schema and table names to define what to retrieve from a source DB. +When the dataset is used as a sink, queries are ignored and only schema and table parameters are used. +If the dataset is used as a sink for a hierarchical mapping, it behaves similarly to the SqlEndpoint: One table is created per entity type. + +The names of the written tables are generated as follows: + +- The table name of the root mapping is defined by the table parameter of the dataset. + If the table name is empty, a name is generated from the first type of the mapping. + Special characters are removed and the name is truncated to a maximum of 128 characters. +- For each object mapping, the table name is generated from its type. + +_JDBC Connection Strings/URLs_ + +Most of the dataset parameters are passed directly to the driver. +Please make sure that you use the correct syntax for each DBMS, otherwise you may get unintuitive errors. + +Here are templates for supported database systems: +``` +oracle (external driver needed): +jdbc:oracle:thin:@{host}[:{port}]/{database} + +postgres (integrated): +jdbc:postgresql://{host}[:{port}]/[{database}] + +MySQL/MariaDB (integrated): +jdbc:{mariadb}://{host}[:{port}]/[{database}] + +SnowSQL (external driver needed): +jdbc:snowflake://{AWSAccount}.{AWS region}.snowflakecomputing.com?db={database}&schema={schema} + +MSSqlServer (integrated): +jdbc:sqlserver://{host}[:{port}];databaseName={database} + +DB2 (external driver needed): +jdbc:db2//{host}[:{port}]/{database} + +Trino (external driver needed) +jdbc:trino//{host}:8080/catalog/schema +``` + +_Read and write strategies_ + +There are multiple read and write strategies which can be selected depending on the purpose of the dataset in a workflow. + +Read strategies decide how the database is queried: + +- **full-table**: Queries or wraps a complete table. + Only the DB schema and table name need to be set. +- **query**: The given source query is passed to the database. + The table name is not necessary in this case but a valid query in the SQL-dialect of the source database system must be provided. + +Write strategies decide how a new table is written: + +- **default**: An error will occur if the table exists. + If not a new one will be created. +- **overwrite**: The old table will be removed and a new one will be created. +- **append**: Data will be appended to the existing table. + The schema of the data written has to be the same as the existing table schema. + +_Optimized Writing_ + +Usually specific database systems have custom commands for loading large amounts of data, e.g. from a CSV file into a database table. +For some DBMS and specific JDBC dataset configurations we support these optimized methods of loading data. + +Supported DBMS: + +- MySQL and MariaDB (full support for versions 8.0.19+ and 10.4+, resp.): + - if older DBMS versions are used some dataset options like 'groupBy' might not be supported but equivalent queries will + - the same is true when older driver jars then the one provided by eccenca are used + - both use the MariaDB JDBC driver + - uses `LOAD DATA LOCAL INFILE` internally + - only applies when appending data to an existing table and having `Force Spark Execution` disabled + - Both the server parameter `local_infile` and the client parameter `allowLoadLocalInfile` must be enabled, e.g. by adding `allowLoadLocalInfile=true` to the JDBC URL. + For MySQL starting with version 8 the `local_infile` parameter is by default disabled! + - If during writing to a MySQL/MariaDB a `[…] You have an error in your SQL syntax […]` error is encountered make sure ANSIquotes are used. + `sql_mode=ANSI_QUOTES` can be set via a URL parameter to the JDBC connection string like: + + ```sh + # MySQL + jdbc:mysql://:/?sessionVariables=sql_mode=ANSI_QUOTES + + # MariaDB + jdbc:mariadb://:/?sessionVariables=sql_mode=ANSI_QUOTES + ``` + +_Registering JDBC drivers_ + +More 3rd party databases are supported via adding their JDBC drivers to the classpath of Data Integration. +Drivers are usually provided by the database manufactures. +If 32 bit and 64 bit versions are provided the latter is usually needed and should aways equal the bit-level of the JVM. +To make sure that the drivers are loaded correctly, their class name (in case are jar contains multiple drivers) and location in the file system can be set with the `spark.sql.options.jdbc` option in the `dataintegration.conf` configuration file. + +An example for adding both the DB2 and MySQL drivers to the Data Integration configuration file `spark.sql.options.*` section: + +```raml +spark.sql.options { + … + + # List of database identifiers to specify user provided JDBC drivers. The second part of the protocol of a JDBC URI (e.g. db2 from + # jdbc:db2://host:port) is used to specify the driver. For each protocol on the list a jar classname and optional download + # location can be provided. + jdbc.drivers = "db2,mysql" + + # Some database systems use licenses that are to loose or restrictive for us to ship the drivers. Therefore a path + # to a jar file containing the driver and the name of driver can be specified here. + jdbc.db2.jar = "/home/user/Jars/db2jcc-db2jcc4.jar" + jdbc.mysql.jar = "/home/user/drivers/mysql.jar" + + # Name of the actual driver class for each db + jdbc.db2.name = "com.ibm.db2.jcc.DB2Driver" + jdbc.mysql.name = "com.mysql.jdbc.Driver" +} +``` + +_Driver Priority_ + +In general it will not work to upgrade a JDBC driver by providing an external driver for a database that is already packaged with eccenca Dataintegration. + +The driver delivered with eccenca Dataintegration will be prefered. Driver names (configured via e.g. `spark.sql.options.jdbc.drivers = "mssql"`) will be ignored if JDBC URLs starting with, in this example `jdbc:mssql...` , are already supported in the dataset. + +_Recommended DBMS versions_ + +- **Microsoft SQL Server 2017**: Older versions might work, but do not support the `groupBy` parameter. +- **PostgreSQL 9.5**: The `groupBy` parameter needs at least version 8.4. +- **MySQL v8.0.19**: Older versions do not support the `groupBy` parameter. +- **DB2 v11.5.x**: The `groupBy` feature needs at least version 9.7 to function. +- **Oracle 12.2.x**: The `groupBy` feature does not work for versions prior to 11g Release 2. + +These limitations are the same for JDBC drivers that are older than the fully supported databases. +Queries can achieve a similar outcome if `groupBy` is not supported. + + +## Parameter + +### JDBC Driver Connection URL + +JDBC URL, must contain the database as parameter, i.g. with ;database=DBNAME or /database depending on the vendor. + +- Datatype: `string` +- Default Value: `None` + + + +### Table + +Table name. Can be empty if the read-strategy is not set to read the full table. If non-empty it has to contain at least an existing table. + +- Datatype: `string` +- Default Value: `None` + + + +### Source query + +Source query (e.g. 'SELECT TOP 10 * FROM table WHERE x = true'. Warning: Uses Driver (mySql, HiveQL, MSSql, Postgres) specific syntax. Can be left empty when full tables are loaded. Note: Even if columns with spaces/special characters are named in the query, they need to be referred to URL-encoded in subsequent transformations. + +- Datatype: `code-sql` +- Default Value: `None` + + + +### Group by + +Comma separated list of attributes appearing in the outer SELECT clause that should be grouped by. The attributes are matched case-insensitive. All other attributes will be grouped via an aggregation function that depends on the supported DBMS, e.g. (JSON) array aggregation. + +- Datatype: `string` +- Default Value: `None` + + + +### Order by + +Optional column to sort the result set. + +- Datatype: `string` +- Default Value: `None` + + + +### Limit + +Optional limit of returned records. This limit should be pushed to the source. No value implies that no limit will be applied. + +- Datatype: `option[int]` +- Default Value: `10` + + + +### Query strategy + +The strategy decides how the source system is queried. + +- Datatype: `enumeration` +- Default Value: `access-complete-table` + + + +### Write strategy + +If this dataset is written to, it can be selected if data is overwritten or appended.' + +- Datatype: `enumeration` +- Default Value: `default` + + + +### Multiple values strategy + +How multiple values per entity property are written. + +- Datatype: `enumeration` +- Default Value: `concatenateValuesStrategy` + + + +### Clear table before workflow execution + +If set to true this will clear the specified table before executing a workflow that writes to it. + +- Datatype: `boolean` +- Default Value: `false` + + + +### User + +Username. Must be empty in some cases e.g. if secret key and client id are used. If non-empty this will also overwrite any value set in the JDBC URL string. + +- Datatype: `string` +- Default Value: `None` + + + +### Password + +Password. Can be empty in some cases e.g. secret key and client id are used or if it is just an empty string. The password must be set here and cannot be set in the JDBC URL connection string. + +- Datatype: `password` +- Default Value: `None` + + + +### Token endpoint URL (Azure Active Directory) + +URL for retrieving tokens, when using MS SQL Active Directory token based authentication. Can be found in the Azure AD Admin Center under OAuth2 endpoint or cab be constructed with the general endpoint URL combined with the tenant id and the suffix /outh/v2/authortized. + +- Datatype: `string` +- Default Value: `None` + + + +### Service principal name (Azure Active Directory) + +Service Principal Name identifying the resource. Usually a static URL like https://database.windows.net. + +- Datatype: `string` +- Default Value: `None` + + + +### Client id (Azure Active Directory) + +Client id or application id. Client id used for MS SQL token based authentication. String seperated by - char. + +- Datatype: `string` +- Default Value: `None` + + + +### Client secret (Azure Active Directory) + +Client secret. Client secret used for MS SQL token based authentication. Can be generated in Azure AD admin center. + +- Datatype: `password` +- Default Value: `None` + + + +### Restriction + +An SQL WHERE clause to filter the records to be retrieved. + +- Datatype: `string` +- Default Value: `None` + + + +### Retries + +Optional number of retries per query + +- Datatype: `int` +- Default Value: `0` + + + +### Pause + +Optional pause between queries in ms. + +- Datatype: `int` +- Default Value: `2000` + + + +### Charset + +The source internal encoding, e.g., UTF-8, ISO-8859-1 + +- Datatype: `string` +- Default Value: `UTF-8` + + + +### Force spark execution + +If set to true, Spark will be used for querying the database, even if the local execution manager is configured. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/dataset/LocalInternalDataset.md b/docs/build/reference/dataset/LocalInternalDataset.md new file mode 100644 index 000000000..616ddabf9 --- /dev/null +++ b/docs/build/reference/dataset/LocalInternalDataset.md @@ -0,0 +1,17 @@ +--- +title: "Internal dataset (single graph)" +description: "Dataset for storing entities between workflow steps. This variant does use the same graph for all internal datasets in a workflow. The underlying dataset type can be configured using the `dataset.internal.*` configuration parameters." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Internal dataset (single graph) + + + + +Dataset for storing entities between workflow steps. This variant does use the same graph for all internal datasets in a workflow. The underlying dataset type can be configured using the `dataset.internal.*` configuration parameters. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/dataset/SnowflakeJdbc.md b/docs/build/reference/dataset/SnowflakeJdbc.md new file mode 100644 index 000000000..ce8829b00 --- /dev/null +++ b/docs/build/reference/dataset/SnowflakeJdbc.md @@ -0,0 +1,256 @@ +--- +title: "Snowflake JDBC endpoint" +description: "Connect to Snowflake JDBC endpoint." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Snowflake JDBC endpoint + + + + +This dataset supports connections to the Snowflake JDBC endpoint. + +#### Account URL hostname + +The supplied account URL hostname needs to contain the account identifier. Refer to the Snowflake documentation on [account identifiers](https://docs.snowflake.com/en/user-guide/admin-account-identifier) for details. + +#### Reading + +Either a table or a queries can be specified to retrieve data from Snowflake. + +Read strategies decide how the database is queried: + +- **full-table**: Queries or wraps a complete table. + Only the table name need to be set. +- **query**: The given source query is passed to the database. + The table name is not necessary in this case but a valid query must be provided. + +#### Writing + +When the dataset is used as a sink, queries are ignored and only schema and table parameters are used. +If the dataset is used as a sink for a hierarchical mapping, one table is created per entity type. + +Write strategies decide how a new table is written: + +- **default**: An error will occur if the table exists. + If not a new one will be created. +- **overwrite**: The old table will be removed and a new one will be created. +- **append**: Data will be appended to the existing table. + The schema of the data written has to be the same as the existing table schema. + +The names of the written tables are generated as follows: + +- The table name of the root mapping is defined by the table parameter of the dataset. + If the table name is empty, a name is generated from the first type of the mapping. + Special characters are removed and the name is truncated to a maximum of 128 characters. +- For each object mapping, the table name is generated from its type. + + +## Parameter + +### Connection + +Connection parameters + +- Datatype: `objectParameter` +- Default Value: `{'parameters': {'database': '', 'privateKeyPassword': '', 'host': '-.snowflakecomputing.com', 'privateKey': '', 'user': '', 'schema': '', 'port': '443', 'warehouse': '', 'table': '', 'password': '', 'additionalParameters': ''}}` + + +#### Account URL hostname + +The hostname which is used for the connection. Usually, this is something like '-.snowflakecomputing.com' + +- Datatype: `string` +- Default Value: `-.snowflakecomputing.com` + + +#### Port + +HTTP port + +- Datatype: `int` +- Default Value: `443` + + +#### User + +Username + +- Datatype: `string` +- Default Value: `None` + + +#### Password + +Password for basic authentication. Leave empty if key-pair authentication should be used. + +- Datatype: `password` +- Default Value: `None` + + +#### Private key + +The private key for the specified user. Leave empty if basic password authentication should be used. + +- Datatype: `password` +- Default Value: `None` + + +#### Private key password + +Password for encrypted private keys. Can be left empty if using an unencrypted key. + +- Datatype: `password` +- Default Value: `None` + + +#### Additional parameters + +Additional JDBC connection parameters. A map of the form 'Key1:Value1,Key2:Value2, where keys and values are URL encoded. + +- Datatype: `stringmap` +- Default Value: `None` + + +#### Warehouse + +Warehouse + +- Datatype: `string` +- Default Value: `None` + + +#### Database + +Database + +- Datatype: `string` +- Default Value: `None` + + +#### Schema + +Schema + +- Datatype: `string` +- Default Value: `None` + + +#### Table + +Table name. Can be empty if the read-strategy is not set to read the full table. + +- Datatype: `string` +- Default Value: `None` + + + +### Read + +Parameters related to reading from the database. + +- Datatype: `objectParameter` +- Default Value: `{'parameters': {'queryStrategy': 'access-complete-table', 'restriction': '', 'groupBy': '', 'orderBy': '', 'sourceQuery': '', 'limit': '10'}}` + + +#### Source query + +Source query (e.g. 'SELECT TOP 10 * FROM table WHERE x = true'. Can be left empty when full tables are loaded. Note: Even if columns with spaces/special characters are named in the query, they need to be referred to URL-encoded in subsequent transformations. + +- Datatype: `code-sql` +- Default Value: `None` + + +#### Group by + +Comma separated list of attributes appearing in the outer SELECT clause that should be grouped by. The attributes are matched case-insensitive. All other attributes will be grouped via an aggregation function that depends on the supported DBMS, e.g. (JSON) array aggregation. + +- Datatype: `string` +- Default Value: `None` + + +#### Order by + +Optional column to sort the result set. + +- Datatype: `string` +- Default Value: `None` + + +#### Limit + +Optional limit of returned records. This limit should be pushed to the source. No value implies that no limit will be applied. + +- Datatype: `option[int]` +- Default Value: `10` + + +#### Query strategy + +The strategy decides how the source system is queried. + +- Datatype: `enumeration` +- Default Value: `access-complete-table` + + +#### Restriction + +An SQL WHERE clause to filter the records to be retrieved. + +- Datatype: `string` +- Default Value: `None` + + + +### Write + +Parameters related to writing to the database. + +- Datatype: `objectParameter` +- Default Value: `{'parameters': {'writeStrategy': 'default', 'multipleValuesStrategy': 'concatenateValuesStrategy'}}` + + +#### Write strategy + +If this dataset is written to, it can be selected if data is overwritten or appended.' + +- Datatype: `enumeration` +- Default Value: `default` + + +#### Multiple values strategy + +How multiple values per entity property are written. + +- Datatype: `enumeration` +- Default Value: `concatenateValuesStrategy` + + + +### Query execution + +Query execution parameters. + +- Datatype: `objectParameter` +- Default Value: `{'parameters': {'retries': '0', 'pause': '2000'}}` + + +#### Retries + +Optional number of retries per query + +- Datatype: `int` +- Default Value: `0` + + +#### Pause + +Optional pause between queries in ms. + +- Datatype: `int` +- Default Value: `2000` + + + diff --git a/docs/build/reference/dataset/alignment.md b/docs/build/reference/dataset/alignment.md new file mode 100644 index 000000000..e6011a01a --- /dev/null +++ b/docs/build/reference/dataset/alignment.md @@ -0,0 +1,25 @@ +--- +title: "Alignment" +description: "Writes the alignment format specified at http://alignapi.gforge.inria.fr/format.html." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Alignment + + + + +Writes the alignment format specified at http://alignapi.gforge.inria.fr/format.html. + +## Parameter + +### File + +The alignment file. + +- Datatype: `resource` +- Default Value: `None` + + + diff --git a/docs/build/reference/dataset/avro.md b/docs/build/reference/dataset/avro.md new file mode 100644 index 000000000..0c0a2678b --- /dev/null +++ b/docs/build/reference/dataset/avro.md @@ -0,0 +1,52 @@ +--- +title: "Avro" +description: "Read from or write to an Apache Avro file." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Avro + + + + +Read from or write to an Apache Avro file. + +## Parameter + +### File + +Path (e.g. relative like 'path/filename.avro' or absolute 'hdfs:///path/filename.avro'). + +- Datatype: `resource` +- Default Value: `None` + + + +### Uri pattern + +A pattern used to construct the entity URI. If not provided the prefix + the line number is used. An example of such a pattern is 'urn:zyx:{id}' where *id* is a name of a property. + +- Datatype: `string` +- Default Value: `None` + + + +### Properties + +Comma-separated list of URL-encoded properties. If not provided, the list of properties is read from the first line. + +- Datatype: `string` +- Default Value: `None` + + + +### Charset + +The file encoding, e.g., UTF8, ISO-8859-1 + +- Datatype: `string` +- Default Value: `UTF-8` + + + diff --git a/docs/build/reference/dataset/binaryFile.md b/docs/build/reference/dataset/binaryFile.md new file mode 100644 index 000000000..1d79f3279 --- /dev/null +++ b/docs/build/reference/dataset/binaryFile.md @@ -0,0 +1,51 @@ +--- +title: "Binary file" +description: "Reads and writes binary files. A typical use-case for this dataset is to process PDF documents or images." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Binary file + + + + +Reads and writes binary files. A typical use-case for this dataset is to process PDF documents or images using workflow operators that accept or output files. If an operator reads from this dataset that does not support files directly (such as transformation or linking tasks), it will only receive the file metadata, which includes the file path. + +### ZIP files + +This dataset can be used to compress/decompress ZIP files. If a ZIP file is configured, the behaviour is as follows: +- Writing a ZIP file to this dataset will overwrite the configured ZIP file. +- Writing one or many non-ZIP files will overwrite the dataset file with a ZIP that contains all written files. +- When reading files, the dataset will return all files inside the ZIP that match the configured regex. If the regex is empty, the ZIP file itself will be returned. + +### Replaceable datasets + +It can be used with the `replacable input` flag to replace the configured file in a workflow execution request. +Same for the `replacable output` flag, which will return the file content as a result of a workflow execution request. + +### MIME type + +The generic MIME type for files of this dataset is `application/octet-stream`. + + +## Parameter + +### File + +The file to read or write. + +- Datatype: `resource` +- Default Value: `None` + + + +### ZIP file regex + +If the file is a ZIP file, read files are filtered via this regex. If empty, the zip itself will be returned to readers. + +- Datatype: `string` +- Default Value: `.*` + + + diff --git a/docs/build/reference/dataset/csv.md b/docs/build/reference/dataset/csv.md new file mode 100644 index 000000000..bd18a22f4 --- /dev/null +++ b/docs/build/reference/dataset/csv.md @@ -0,0 +1,151 @@ +--- +title: "CSV" +description: "Read from or write to an CSV file." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# CSV + + + + +Read from or write to an CSV file. + +## Parameter + +### File + +The CSV file. This may also be a zip archive of multiple CSV files that share the same schema. + +- Datatype: `resource` +- Default Value: `None` + + + +### Properties + +Comma-separated list of properties. If not provided, the list of properties is read from the first line. Properties that are no valid (relative or absolute) URIs will be encoded. + +- Datatype: `string` +- Default Value: `None` + + + +### Separator + +The character that is used to separate values. If not provided, defaults to ',', i.e., comma-separated values. "\t" for specifying tab-separated values, is also supported. + +- Datatype: `string` +- Default Value: `,` + + + +### Array separator + +The character that is used to separate the parts of array values. Write "\t" to specify the tab character. + +- Datatype: `string` +- Default Value: `None` + + + +### Quote + +Character used to quote values. + +- Datatype: `string` +- Default Value: `"` + + + +### URI pattern + +*Deprecated* A pattern used to construct the entity URI. If not provided the prefix + the line number is used. An example of such a pattern is 'urn:zyx:{id}' where *id* is a name of a property. + +- Datatype: `string` +- Default Value: `None` + + + +### Charset + +The file encoding, e.g., UTF-8, UTF-8-BOM, ISO-8859-1 + +- Datatype: `string` +- Default Value: `UTF-8` + + + +### Regex filter + +A regex filter used to match rows from the CSV file. If not set all the rows are used. + +- Datatype: `string` +- Default Value: `None` + + + +### Lines to skip + +The number of lines to skip in the beginning, e.g. copyright, meta information etc. + +- Datatype: `int` +- Default Value: `0` + + + +### Max chars per column + +The maximum characters per column. *Warning*: System will request heap memory of that size (2 bytes per character) when reading the CSV. If there are more characters found, the parser will fail. + +- Datatype: `int` +- Default Value: `128000` + + + +### Ignore bad lines + +If set to true then the parser will ignore lines that have syntax errors or do not have to correct number of fields according to the current config. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Quote escape character + +Escape character to be used inside quotes, used to escape the quote character. It must also be used to escape itself, e.g. by doubling it, e.g. "". If left empty, it defaults to quote. + +- Datatype: `string` +- Default Value: `"` + + + +### ZIP file regex + +If the input resource is a ZIP file, files inside the file are filtered via this regex. + +- Datatype: `string` +- Default Value: `^(?!.*[\/\\]\..*$|^\..*$).*\.csv` + + + +### Delete file before workflow execution + +If set to true this will clear the specified file before executing a workflow that writes to it. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Trim whitespace and non-printable characters. + +If set to true, this will trim whitespace and non-printable characters from the contents of the CSV dataset. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/dataset/eccencaDataPlatform.md b/docs/build/reference/dataset/eccencaDataPlatform.md new file mode 100644 index 000000000..218dfdf50 --- /dev/null +++ b/docs/build/reference/dataset/eccencaDataPlatform.md @@ -0,0 +1,115 @@ +--- +title: "Knowledge Graph" +description: "Read RDF from or write RDF to a Knowledge Graph embedded in Corporate Memory." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Knowledge Graph + + + + +Read RDF from or write RDF to a Knowledge Graph embedded in Corporate Memory. + +## Parameter + +### Endpoint + +The named endpoint within the eccenca DataPlatform. + +- Datatype: `string` +- Default Value: `default` + + + +### Graph + +The URI of the named graph. + +- Datatype: `graph uri` +- Default Value: `None` + + + +### Page size + +The number of solutions to be retrieved per SPARQL query. + +- Datatype: `int` +- Default Value: `100000` + + + +### Pause time + +The number of milliseconds to wait between subsequent query + +- Datatype: `int` +- Default Value: `0` + + + +### Retry count + +The number of retries if a query fails + +- Datatype: `int` +- Default Value: `3` + + + +### Retry pause + +The number of milliseconds to wait until a failed query is retried. + +- Datatype: `int` +- Default Value: `1000` + + + +### Strategy + +The strategy use for retrieving entities: simple: Retrieve all entities using a single query; subQuery: Use a single query, but wrap it for improving the performance on Virtuoso; parallel: Use a separate Query for each entity property. + +- Datatype: `enumeration` +- Default Value: `parallel` + + + +### Clear graph before workflow execution + +If set to true this will clear the specified graph before executing a workflow that writes to it. Note that this will always use the configured graph and ignore any overwritten values from the config port. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Entity list + +A list of entities to be retrieved. If not given, all entities will be retrieved. Multiple entities are separated by whitespace. + +- Datatype: `multiline string` +- Default Value: `None` + + + +### SPARQL query timeout (ms) + +SPARQL query timeout (select/update) in milliseconds. A value of zero means that there is no timeout. If a value greater zero is specified this overwrites possible default timeouts. This timeout is also propagated to DataPlatform and may overwrite default timeouts there. + +- Datatype: `int` +- Default Value: `0` + + + +### Optimized entity retrieval + +Optimized retrieval method to remove load from the underlying triple store. Query parallelism is limited and cheaper queries are executed against the backend. By putting the main work on DataIntegration side, the RDF backend is kept responsive. + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/dataset/excel.md b/docs/build/reference/dataset/excel.md new file mode 100644 index 000000000..37584d047 --- /dev/null +++ b/docs/build/reference/dataset/excel.md @@ -0,0 +1,61 @@ +--- +title: "Excel" +description: "Read from or write to an Excel workbook in Open XML format (XLSX)." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Excel + + + + +Read from or write to an Excel workbook in Open XML format (XLSX). + +## Parameter + +### File + +File name inside the resources directory. + +- Datatype: `resource` +- Default Value: `None` + + + +### Streaming + +Streaming enables reading and writing large Excels files. Warning: Be careful to disable streaming for large datasets (> 10MB), because of high memory consumption. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Lines to skip + +The number of lines to skip in the beginning when reading files. + +- Datatype: `int` +- Default Value: `0` + + + +### Has header + +If true, the first line will be read as the table header, which defines the column names. If false, the first line will be read as data. In that case, the columns need to be adressed using #A, #B, etc. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Output object values + +Output results from object rules (URIs). + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/dataset/file.md b/docs/build/reference/dataset/file.md new file mode 100644 index 000000000..10af9dcd6 --- /dev/null +++ b/docs/build/reference/dataset/file.md @@ -0,0 +1,61 @@ +--- +title: "RDF" +description: "Dataset which retrieves and writes all entities from/to an RDF file. For reading, the dataset is loaded in-memory and thus the size is restricted by the available memory. Large datasets should be loaded into an external RDF store and retrieved using the SPARQL dataset instead." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# RDF + + + + +Dataset which retrieves and writes all entities from/to an RDF file. For reading, the dataset is loaded in-memory and thus the size is restricted by the available memory. Large datasets should be loaded into an external RDF store and retrieved using the SPARQL dataset instead. + +## Parameter + +### File + +The RDF file. This may also be a zip archive of multiple RDF files. + +- Datatype: `resource` +- Default Value: `None` + + + +### Format + +Optional RDF format. If left empty, it will be auto-detected based on the file extension. N-Triples is the only format that can be written, while other formats can only be read. + +- Datatype: `string` +- Default Value: `None` + + + +### Graph + +The graph name to be read. If not provided, the default graph will be used. Must be provided if the format is N-Quads. + +- Datatype: `string` +- Default Value: `None` + + + +### Entity list + +A list of entities to be retrieved. If not given, all entities will be retrieved. Multiple entities are separated by whitespace. + +- Datatype: `multiline string` +- Default Value: `None` + + + +### ZIP file regex + +If the input resource is a ZIP file, files inside the file are filtered via this regex. + +- Datatype: `string` +- Default Value: `.*` + + + diff --git a/docs/build/reference/dataset/googlespreadsheet.md b/docs/build/reference/dataset/googlespreadsheet.md new file mode 100644 index 000000000..8617a14bc --- /dev/null +++ b/docs/build/reference/dataset/googlespreadsheet.md @@ -0,0 +1,65 @@ +--- +title: "Excel (Google Drive)" +description: "Read data from a remote Google Spreadsheet." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Excel (Google Drive) + + + + + +The dataset needs the document id of a "share via url" sheet on Google Drive as input. +It will automatically correct the URL and add the "export as xlsx" option to a new URL +that will be used to download an Excel Spreadsheet. +The download will be cached and treated the same way as an xlsx file in the Excel Dataset. + +### Caching + +The advanced parameter `invalidateCacheAfter` allows the user to specify a duration of the file cache +after which it is refreshed. +A file based cache is created to avoid CAPTCHAs. During the caching and validation of the URL +access occurs with random wait times between 1 and 5 seconds. +The cache is invalidated after 5 minutes by default. + + +## Parameter + +### URL + +Link to the document ('share with anyone having a link' must be enabled, URL parameters will be removed and corrected automatically). + +- Datatype: `string` +- Default Value: `None` + + + +### Streaming + +Streaming enables reading and writing large Excels files. Warning: Be careful to disable streaming for large datasets (> 10MB), because of high memory consumption. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Invalidate cache after + +Duration until file based cache is invalidated. + +- Datatype: `duration` +- Default Value: `PT5M` + + + +### Lines to skip + +The number of lines to skip in the beginning when reading files. + +- Datatype: `int` +- Default Value: `0` + + + diff --git a/docs/build/reference/dataset/inMemory.md b/docs/build/reference/dataset/inMemory.md new file mode 100644 index 000000000..8e7a6f0b9 --- /dev/null +++ b/docs/build/reference/dataset/inMemory.md @@ -0,0 +1,25 @@ +--- +title: "In-memory dataset" +description: "A Dataset that holds all data in-memory." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# In-memory dataset + + + + +A Dataset that holds all data in-memory. + +## Parameter + +### Clear graph before workflow execution + +If set to true this will clear this dataset before it is used in a workflow execution. + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/dataset/index.md b/docs/build/reference/dataset/index.md new file mode 100644 index 000000000..a6828015b --- /dev/null +++ b/docs/build/reference/dataset/index.md @@ -0,0 +1,41 @@ +--- +title: "Datasets" +icon: octicons/cross-reference-24 +tags: + - Build + - Reference +--- +# Datasets + + +Datasets are collections of data that can be read or written. + +**:octicons-people-24: Intended audience:** Linked Data Experts and Domain Experts + +| Name | Description | +|------------------------:| :--------- | +|[Alignment](alignment.md) | Writes the alignment format specified at http://alignapi.gforge.inria.fr/format.html. | +|[Avro](avro.md) | Read from or write to an Apache Avro file. | +|[Binary file](binaryFile.md) | Reads and writes binary files. A typical use-case for this dataset is to process PDF documents or images. | +|[CSV](csv.md) | Read from or write to an CSV file. | +|[Excel](excel.md) | Read from or write to an Excel workbook in Open XML format (XLSX). | +|[Excel (Google Drive)](googlespreadsheet.md) | Read data from a remote Google Spreadsheet. | +|[Excel (OneDrive, Office365)](office365preadsheet.md) | Read data from a remote onedrive or Office365 Spreadsheet. | +|[Hive database](Hive.md) | Read from or write to an embedded Apache Hive endpoint. | +|[In-memory dataset](inMemory.md) | A Dataset that holds all data in-memory. | +|[Internal dataset](internal.md) | Dataset for storing entities between workflow steps. The underlying dataset type can be configured using the `dataset.internal.*` configuration parameters. | +|[Internal dataset (single graph)](LocalInternalDataset.md) | Dataset for storing entities between workflow steps. This variant does use the same graph for all internal datasets in a workflow. The underlying dataset type can be configured using the `dataset.internal.*` configuration parameters. | +|[JDBC endpoint](Jdbc.md) | Connect to an existing JDBC endpoint. | +|[JSON](json.md) | Read from or write to a JSON or JSON Lines file. | +|[Knowledge Graph](eccencaDataPlatform.md) | Read RDF from or write RDF to a Knowledge Graph embedded in Corporate Memory. | +|[Multi CSV ZIP](multiCsv.md) | Reads from or writes to multiple CSV files from/to a single ZIP file. | +|[Neo4j](neo4j.md) | Neo4j graph | +|[ORC](orc.md) | Read from or write to an Apache ORC file. | +|[Parquet](parquet.md) | Read from or write to an Apache Parquet file. | +|[RDF](file.md) | Dataset which retrieves and writes all entities from/to an RDF file. For reading, the dataset is loaded in-memory and thus the size is restricted by the available memory. Large datasets should be loaded into an external RDF store and retrieved using the SPARQL dataset instead. | +|[Snowflake JDBC endpoint](SnowflakeJdbc.md) | Connect to Snowflake JDBC endpoint. | +|[SparkSQL view](sparkView.md) | Use the SQL endpoint dataset instead. | +|[SPARQL endpoint](sparqlEndpoint.md) | Connect to an existing SPARQL endpoint. | +|[SQL endpoint](sqlEndpoint.md) | Provides a JDBC endpoint that exposes workflow or transformation results as tables, which can be queried using SQL. | +|[Text](text.md) | Reads and writes plain text files. | +|[XML](xml.md) | Read from or write to an XML file. | diff --git a/docs/build/reference/dataset/internal.md b/docs/build/reference/dataset/internal.md new file mode 100644 index 000000000..d552a8b1b --- /dev/null +++ b/docs/build/reference/dataset/internal.md @@ -0,0 +1,25 @@ +--- +title: "Internal dataset" +description: "Dataset for storing entities between workflow steps. The underlying dataset type can be configured using the `dataset.internal.*` configuration parameters." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Internal dataset + + + + +Dataset for storing entities between workflow steps. The underlying dataset type can be configured using the `dataset.internal.*` configuration parameters. + +## Parameter + +### graph URI + +The RDF graph that is used for storing internal data + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/dataset/json.md b/docs/build/reference/dataset/json.md new file mode 100644 index 000000000..3bd539a11 --- /dev/null +++ b/docs/build/reference/dataset/json.md @@ -0,0 +1,102 @@ +--- +title: "JSON" +description: "Read from or write to a JSON or JSON Lines file." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# JSON + + + + +Typically, this dataset is used to transform an JSON file to another format, e.g., to RDF. + +### Reading + +In addition to plain JSON files, *JSON Lines* files can also be read. + +For reading, the JSON dataset supports a number of special paths: +- `#id` Is a special syntax for generating an id for a selected element. It can be used in URI patterns for entities which do not provide an identifier. Examples: `http://example.org/{#id}` or `http://example.org/{/pathToEntity/#id}`. +- `#text` retrieves the text of the selected node. +- The backslash can be used to navigate to the parent JSON node, e.g., `\parent/key`. The name of the backslash key (here `parent`) is ignored. + +### Writing + +When writing JSON, all entities need to possess a unique URI. Writing multiple root entities with the same URI will result in multiple entries in the generated JSON. If multiple nested entities with the same URI are written, only the last entity with a given URI will be written. + + +## Parameter + +### File + +JSON file. This may also be a zip archive of multiple JSON files that share the same schema. + +- Datatype: `resource` +- Default Value: `None` + + + +### Template + +Template for writing JSON. The term {{output}} will be replaced by the written JSON. + +- Datatype: `code-json` +- Default Value: `{{output}}` + + + +### Navigate into arrays + +Navigate into arrays automatically. If set to false, the `#array` path operator must be used to navigate into arrays. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Base path + +The path to the elements to be read, starting from the root element, e.g., '/Persons/Person'. If left empty, all direct children of the root element will be read. + +- Datatype: `string` +- Default Value: `None` + + + +### URI pattern (deprecated) + +A URI pattern, e.g., http://namespace.org/{ID}, where {path} may contain relative paths to elements + +- Datatype: `string` +- Default Value: `None` + + + +### Max depth + +Maximum depth of written JSON. This acts as a safe guard if a recursive structure is written. + +- Datatype: `int` +- Default Value: `15` + + + +### Streaming + +Streaming allows for reading large JSON files. If streaming is enabled, backward paths are not supported. + +- Datatype: `boolean` +- Default Value: `true` + + + +### ZIP file regex + +If the input resource is a ZIP file, files inside the file are filtered via this regex. + +- Datatype: `string` +- Default Value: `^(?!.*[\/\\]\..*$|^\..*$).*\.jsonl?$` + + + diff --git a/docs/build/reference/dataset/multiCsv.md b/docs/build/reference/dataset/multiCsv.md new file mode 100644 index 000000000..82ee254e1 --- /dev/null +++ b/docs/build/reference/dataset/multiCsv.md @@ -0,0 +1,133 @@ +--- +title: "Multi CSV ZIP" +description: "Reads from or writes to multiple CSV files from/to a single ZIP file." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Multi CSV ZIP + + + + +Reads from or writes to multiple CSV files from/to a single ZIP file. + +## Parameter + +### File + +Zip file name inside the resources directory/repository. + +- Datatype: `resource` +- Default Value: `None` + + + +### Separator + +The character that is used to separate values. If not provided, defaults to ',', i.e., comma-separated values. "\t" for specifying tab-separated values, is also supported. + +- Datatype: `string` +- Default Value: `,` + + + +### Array separator + +The character that is used to separate the parts of array values. Write "\t" to specify the tab character. + +- Datatype: `string` +- Default Value: `None` + + + +### Quote + +Character used to quote values. + +- Datatype: `string` +- Default Value: `"` + + + +### Charset + +The file encoding, e.g., UTF8, ISO-8859-1 + +- Datatype: `string` +- Default Value: `UTF-8` + + + +### Lines to skip + +The number of lines to skip in the beginning, e.g. copyright, meta information etc. + +- Datatype: `int` +- Default Value: `0` + + + +### Max chars per column + +The maximum characters per column. If there are more characters found, the parser will fail. + +- Datatype: `int` +- Default Value: `128000` + + + +### Ignore bad lines + +If set to true then the parser will ignore lines that have syntax errors or do not have to correct number of fields according to the current config. + +- Datatype: `boolean` +- Default Value: `false` + + + +### Quote escape character + +Escape character to be used inside quotes, used to escape the quote character. It must also be used to escape itself, e.g. by doubling it, e.g. "". If left empty, it defaults to quote. + +- Datatype: `string` +- Default Value: `"` + + + +### Append files + +If 'True' then files in the ZIP archive are only added or updated, all other files in the ZIP stay untouched. If 'False' then a new ZIP file will be created on every dataset write. + +- Datatype: `boolean` +- Default Value: `true` + + + +### ZIP file regex + +Filter file paths inside the ZIP file via this regex. By default sub folders or files not ending with .csv are ignored. + +- Datatype: `string` +- Default Value: `^[^/]*\.csv$` + + + +### Delete file before workflow execution + +If set to true this will clear the specified file before executing a workflow that writes to it. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Optionally trim whitespace and non-printable characters. + +If set to true, this will trim whitespace and non-printable characters from the contents of the CSV dataset. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/dataset/neo4j.md b/docs/build/reference/dataset/neo4j.md new file mode 100644 index 000000000..371975725 --- /dev/null +++ b/docs/build/reference/dataset/neo4j.md @@ -0,0 +1,132 @@ +--- +title: "Neo4j" +description: "Neo4j graph" +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Neo4j + + + + + +Supports reading and writing Neo4j graphs. The following sections outline how graphs are generated and read back. + +For more information about Neo4j, please refer to the [Neo4j documentation](https://neo4j.com/docs/). + +### Nodes + +For each entity that is written to a Neo4j dataset, a _node_ will be created. +A property `uri` will be added to each generated node, which holds the URI of the original entity. +In applications, the URI property should be used instead of the node identifiers, which are auto-generated in Neo4j and do not represent stable URIs. + +When reading nodes, the entity URIs will be generated based on that property. +At the moment, it's not supported to read nodes that do not provide a `uri` property. + +### Labels + +_Labels_ in Neo4j are used to group nodes into sets where all nodes that have a certain _label_ belongs to the same set. +Neo4j _labels_ are comparable with _classes_ in RDF (not to be confused with labels in RDF). + +When writing entities to the Neo4j dataset, the following _labels_ will be added to each generated node: + +- For each entity _type_ (such as the _type_ set in a mapping), a _label_ will be added to the node in Neo4j. + Since _types_ in eccenca DataIntegration are usually URIs, they will be converted according to the rules further down. +- The _label_ as configured by the _label_ parameter on the Neo4j dataset itself. + This is typically used to identify all entities that have been written by a certain Neo4j dataset specification in the project. + For instance, if two Neo4j dataset specifications are added to a project - both writing to the same Neo4j database - different labels can be set to distinguish both sets of entities. + In that respect it may be used to model a similar concept as _graphs_ in RDF. + +### Relationships + +A relationship connects two nodes in Neo4j. +Hierarchical mappings will generate relationships for all object mappings. + +Relationships can be addressed with property paths in mappings. +At the moment, only paths of length 1 are supported, i.e., it's not possible to use non-property paths. + +### Handling of URIs + +In eccenca DataIntegration, URIs are typically used to uniquely identify classes and properties. +While URIs are central in RDF, Neo4j does allow arbitrary names and does not have any special support for URIs. + +When generating Neo4j labels, properties and relationships, URIs will be shortened according to the following rules. +- If a registered project prefix matches a URI, a name `{prefixName}_{localPart}` will be generated. For instance, `http://xmlns.com/foaf/0.1/name` will become `foaf_name`. + Note that underscores (`_`) are used instead of colons (`:`) to separate the namespace and the local name. + The reason is that colons are reserved in the Cypher query language and some tools don't escape properly and fail on databases that use colons in names. +- If no project prefix matches a URI, the URI will be used verbatim. This will look ugly in Neo4j tools, so generally it's recommended to define prefixes for all used namespaces. + +When reading generated entities, the URIs of the classes and properties will be reconstructed based on the prefix table of the project. If the prefixes change between writing and reading, different URIs will be generated. + +### RDF vs. Neo4j terminology + +Neo4j uses a different terminology than RDF or description logic. +For users familiar with RDF, the following table shows the correspondent terms for some central concepts. +This is meant to help understanding and does not aim to provide a precise mapping as there are semantic differences between Neo4j and RDF. + +| RDF | Neo4j | +| --- |--- | +| resource | node | +| class | label | +| datatype property | property | +| object property | relationship | +| graph | Do not exist in Neo4j, but labels can be used to mimic graphs. | + + +## Parameter + +### URI + +The URL to the Neo4j instance + +- Datatype: `string` +- Default Value: `bolt://localhost:7687` + + + +### User + +The Neo4j username for basic authentication. + +- Datatype: `string` +- Default Value: `neo4j` + + + +### Password + +The Neo4j password for basic authentication. + +- Datatype: `password` +- Default Value: `PASSWORD_PARAMETER:7vIY2uNcIiwSSo+/MNozEg==` + + + +### Database + +Database (leave empty for default) + +- Datatype: `string` +- Default Value: `None` + + + +### Node label + +Neo4j label for all entities to be covered by this dataset. When reading, all nodes with this label will be read. When writing, this label will be added to all generated nodes. If the dataset is cleared, only nodes with this label will be deleted. + +- Datatype: `string` +- Default Value: `Any` + + + +### Clear before execution + +If set to true, all nodes with the specified label will be removed, before executing a workflow that writes to this graph. + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/dataset/office365preadsheet.md b/docs/build/reference/dataset/office365preadsheet.md new file mode 100644 index 000000000..1afed6b5e --- /dev/null +++ b/docs/build/reference/dataset/office365preadsheet.md @@ -0,0 +1,89 @@ +--- +title: "Excel (OneDrive, Office365)" +description: "Read data from a remote onedrive or Office365 Spreadsheet." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Excel (OneDrive, Office365) + + + + +The dataset needs the URL of a "share via link" sheet on Office 365/OneDrive as input. +It will automatically construct a direct download URL, cache the download file handle it like +an XLSX file in the Excel Dataset. + +### Notes + +There are 2 types of URLs that can be shared: +Onedrive links look like `https://1drv.ms/x/s!AucULvzmJ-dsdfsfgaIcyWP_XY_G4w?e=yx65uu` + +Onedrive (based one sharepoint, for businesses) links look like `https://eccencagmbh-my.sharepoint.com/:x:/g/personal/person_eccenca_com/EdEMTEw1dclHiEZXyvy8P4YBit8wSyGsiwU5Kt__sQOZzw` + +The first type should always work is not recommended for this dataset. The second type requires to set up an application in Microsoft EntraID (formerly Azure Active Directory). +EntraID: https://docs.microsoft.com/azure/active-directory/develop/v2-overview +Instructions and examples can be found here: +https://github.com/Azure-Samples/ms-identity-msal-java-samples/tree/main/3-java-servlet-web-app/1-Authentication/sign-in + +After following the steps access to sharepoint/onedrive for business can be setup in the application.conf file for eccenca DataIntegration. + +Example: + +```conf +com.eccenca.di.office365 = { + authority = "https://login.microsoftonline.com/a0907dd1-f981-4c98-a8b9-1deb27bcf2cc/" + clientId = "4d14959d-3c62-4f90-a072-a96ca4b3fa9f" + secret = "Ceb8Q~QkMMV7TBK-ggB3nh22nUnqoDB1KTmkjj" + scope = "https://graph.microsoft.com/.default" + tenantId = "a0907dd1-f981-4c98-a8b9-1deb27bcf2cc" +} +``` + +### Caching + +The advanced parameter `invalidateCacheAfter` allows the user to specify a duration of the file cache +after which it is refreshed. +A file based cache is created to avoid CAPTCHAs. During the caching and validation of the URL +access occurs with random wait times between 1 and 5 seconds. +The cache is invalidated after 5 minutes by default. + + +## Parameter + +### URL + +Link to the document ('share with anyone having a link' must be enabled). + +- Datatype: `string` +- Default Value: `None` + + + +### Streaming + +Streaming enables reading and writing large Excels files. Warning: Be careful to disable streaming for large datasets (> 10MB), because of high memory consumption. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Invalidate cache after + +Duration until file based cache is invalidated. + +- Datatype: `duration` +- Default Value: `PT5M` + + + +### Lines to skip + +The number of lines to skip in the beginning when reading files. + +- Datatype: `int` +- Default Value: `0` + + + diff --git a/docs/build/reference/dataset/orc.md b/docs/build/reference/dataset/orc.md new file mode 100644 index 000000000..924669412 --- /dev/null +++ b/docs/build/reference/dataset/orc.md @@ -0,0 +1,70 @@ +--- +title: "ORC" +description: "Read from or write to an Apache ORC file." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# ORC + + + + +Read from or write to an Apache ORC file. + +## Parameter + +### File + +Path (e.g. relative like 'path/filename.orc' or absolute 'hdfs:///path/filename.orc'). + +- Datatype: `resource` +- Default Value: `None` + + + +### Uri pattern + +A pattern used to construct the entity URI. If not provided the prefix + the line number is used. An example of such a pattern is 'urn:zyx:{id}' where *id* is a name of a property. + +- Datatype: `string` +- Default Value: `None` + + + +### Properties + +Comma-separated list of URL-encoded properties. If not provided, the list of properties is read from the first line. + +- Datatype: `string` +- Default Value: `None` + + + +### Partition + +Optional specification of the attribute for output partitioning + +- Datatype: `string` +- Default Value: `None` + + + +### Compression + +Optional compression algorithm (e.g. snappy, zlib) + +- Datatype: `string` +- Default Value: `snappy` + + + +### Charset + +The file encoding, e.g., UTF8, ISO-8859-1 + +- Datatype: `string` +- Default Value: `UTF-8` + + + diff --git a/docs/build/reference/dataset/parquet.md b/docs/build/reference/dataset/parquet.md new file mode 100644 index 000000000..46304e678 --- /dev/null +++ b/docs/build/reference/dataset/parquet.md @@ -0,0 +1,70 @@ +--- +title: "Parquet" +description: "Read from or write to an Apache Parquet file." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Parquet + + + + +Read from or write to an Apache Parquet file. + +## Parameter + +### File + +Path (e.g. relative like 'path/filename.orc' or absolute 'hdfs:///path/filename.parquet'). + +- Datatype: `resource` +- Default Value: `None` + + + +### Uri pattern + +A pattern used to construct the entity URI. If not provided the prefix + the line number is used. An example of such a pattern is 'urn:zyx:{id}' where *id* is a name of a property. + +- Datatype: `string` +- Default Value: `None` + + + +### Properties + +Comma-separated list of URL-encoded properties. If not provided, the list of properties is read from the first line. + +- Datatype: `string` +- Default Value: `None` + + + +### Partition + +Optional specification of the attribute for output partitioning + +- Datatype: `string` +- Default Value: `None` + + + +### Compression + +Optional compression algorithm (e.g. snappy, zlib) + +- Datatype: `string` +- Default Value: `None` + + + +### Charset + +The file encoding, e.g., UTF8, ISO-8859-1 + +- Datatype: `string` +- Default Value: `UTF-8` + + + diff --git a/docs/build/reference/dataset/sparkView.md b/docs/build/reference/dataset/sparkView.md new file mode 100644 index 000000000..eecd92df6 --- /dev/null +++ b/docs/build/reference/dataset/sparkView.md @@ -0,0 +1,88 @@ +--- +title: "SparkSQL view" +description: "Use the SQL endpoint dataset instead." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# SparkSQL view + + + + +Use the SQL endpoint dataset instead. + +## Parameter + +### View name + +The name of the view. This specifies the table that can be queried by another virtual dataset or via JDBC (the 'default' schema is used for all virtual datasets). + +- Datatype: `string` +- Default Value: `None` + + + +### Query + +Optional SQL query on the selected table. Has no effect when used as an output dataset. + +- Datatype: `string` +- Default Value: `None` + + + +### Cache + +Optional boolean option that selects if the table should be cached by Spark or not (default = true). + +- Datatype: `boolean` +- Default Value: `true` + + + +### Uri pattern + +A pattern used to construct the entity URI. If not provided the prefix + the line number is used. An example of such a pattern is 'urn:zyx:{id}' where *id* is a name of a property. + +- Datatype: `string` +- Default Value: `None` + + + +### Properties + +Comma-separated list of URL-encoded properties. If not provided, the list of properties is read from the first line. + +- Datatype: `string` +- Default Value: `None` + + + +### Charset + +The source internal encoding, e.g., UTF8, ISO-8859-1 + +- Datatype: `string` +- Default Value: `UTF-8` + + + +### Array separator + +The character that is used to separate the parts of array values. Write "back slash t" to specify the tab character. + +- Datatype: `string` +- Default Value: `|` + + + +### Compatibility + +If true, basic types will be used for types that otherwise would result in client errors. This mainly that arrays will be stored as Strings separated by the separator defined above. If the view is only for use within a SparkContext, this can be set to false. + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/dataset/sparqlEndpoint.md b/docs/build/reference/dataset/sparqlEndpoint.md new file mode 100644 index 000000000..6379dfe44 --- /dev/null +++ b/docs/build/reference/dataset/sparqlEndpoint.md @@ -0,0 +1,142 @@ +--- +title: "SPARQL endpoint" +description: "Connect to an existing SPARQL endpoint." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# SPARQL endpoint + + + + +Connect to an existing SPARQL endpoint. + +## Parameter + +### Endpoint URI + +The URI of the SPARQL endpoint, e.g., http://dbpedia.org/sparql + +- Datatype: `string` +- Default Value: `None` + + + +### Login + +Login required for authentication + +- Datatype: `string` +- Default Value: `None` + + + +### Password + +Password required for authentication + +- Datatype: `password` +- Default Value: `None` + + + +### Graph + +Only retrieve entities from a specific graph + +- Datatype: `string` +- Default Value: `None` + + + +### Page size + +The number of solutions to be retrieved per SPARQL query. + +- Datatype: `int` +- Default Value: `1000` + + + +### Entity list + +A list of entities to be retrieved. If not given, all entities will be retrieved. Multiple entities are separated by whitespace. + +- Datatype: `multiline string` +- Default Value: `None` + + + +### Pause time + +The number of milliseconds to wait between subsequent query + +- Datatype: `int` +- Default Value: `0` + + + +### Retry count + +The number of retries if a query fails + +- Datatype: `int` +- Default Value: `3` + + + +### Retry pause + +The number of milliseconds to wait until a failed query is retried. + +- Datatype: `int` +- Default Value: `1000` + + + +### Query parameters + +Additional parameters to be appended to every request e.g. &soft-limit=1 + +- Datatype: `string` +- Default Value: `None` + + + +### Strategy + +The strategy use for retrieving entities: simple: Retrieve all entities using a single query; subQuery: Use a single query, but wrap it for improving the performance on Virtuoso; parallel: Use a separate Query for each entity property. + +- Datatype: `enumeration` +- Default Value: `parallel` + + + +### Use order by + +Include useOrderBy in queries to enforce correct order of values. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Clear graph before workflow execution + +If set to true this will clear the specified graph before executing a workflow that writes to it. + +- Datatype: `boolean` +- Default Value: `false` + + + +### SPARQL query timeout (ms) + +SPARQL query timeout (select/update) in milliseconds. A value of zero means that the timeout configured via property is used (e.g. configured via silk.remoteSparqlEndpoint.defaults.read.timeout.ms). To overwrite the configured value specify a value greater than zero. + +- Datatype: `int` +- Default Value: `0` + + + diff --git a/docs/build/reference/dataset/sqlEndpoint.md b/docs/build/reference/dataset/sqlEndpoint.md new file mode 100644 index 000000000..6565f7b91 --- /dev/null +++ b/docs/build/reference/dataset/sqlEndpoint.md @@ -0,0 +1,114 @@ +--- +title: "SQL endpoint" +description: "Provides a JDBC endpoint that exposes workflow or transformation results as tables, which can be queried using SQL." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# SQL endpoint + + + + +_SQL endpoint dataset parameters_ + +The dataset only requires that the _tableNamePrefix_ parameter is given. This will be used as the prefix for the names of the generated tables. +When a set of entities is written to the endpoint _a view is generated for each entity type_ (defined by an 'rdf_type' attribute). +That means that the mapping or data source that are used as input for the SQL endpoint need to have a type or require a user defined type mapping. + +The operator has a _compatibility mode_. This mode will avoid complex types such as Arrays. When arrays exist in the input they +are converted to a String using the given _arraySeparator_. This avoids errors and warnings in some Jdbc clients that are unable to +handle typed arrays and may make working with software like Excel easier. + +The parameter _aliasMap_ of the endpoint allows the specification of column aliases. The map is a comma separated list of key-value pairs. +Each key and value is denoted by ```key:value```. An example for renaming 2 columns (source1, source2 to target1, target2) in the result would be: +```source1:target1,source2:target2``` + +Note: Table and column (mapping target) names will be automatically converted to be valid in as many databases as possible. +Table names will be shortened to 128 characters. Only a-z, A-Z, 0-9 and _ are allowed. Others will be replaced with an underscore. +Column names undergo the same transformation but will be converted to lower case as well. The log will inform about changes. +The table names will be generated based on the target type of each mapping. +The user needs to make sure that each object mapping specifies a unique type. +If two object mappings define the same type, only the last one will be written. + +_SQL endpoint activity_ + +See [ActivityDocumentation] for a general description of the Data Integration activities. +The activity will _start_ automatically, when the SQL endpoint is used +as a data sink and Data Integration is configured to make the SQL endpoint accessible remotely. + +When the activity is started and _running_ it returns the server status and JDBC URL as its value. + +_Stopping_ the activity will drop all views generated by the activity. It can be _restarted_ by rerunning the +workflow containing it as a sink. + +_Remote client configuration (via JDBC and ODBC)_ + +Within Data Integration the SQL endpoint can be used as a source or sink like any other dataset. If the _startThriftServer_ option is set to 'true' +access via JDBC or ODBC is possible. + +[ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity) and [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity) drivers can be used to connect to relational databases. + +When selecting a version of a driver the client operating system and its type (32bit/64 bit) are the most important factors. +The version of the client drivers sometimes is the same as the server's. +If no version of a driver is given, the newest driver of the vendor should work, as it _should_ be backwards compatible. + +Any JDBC or ODBC client can connect to an SQL endpoint dataset. SparkSQL uses the same query processing as Hive, therefore the requirements for the client are: + +- A JDBC driver compatible with _Hive 1.2.1_[^hi] (platform independent driver _org.apache.hive.jdbc.HiveDriver_ is needed) or +- A JDBC driver compatible with _Spark 2.3.3_ +- A Hive ODBC driver (ODBC driver for the client architecture and operating system needed) + +[^hi]: Hive 1.2.1 is [ODPi](https://github.com/odpi/specs/blob/master/ODPi-Runtime.md) runtime compliant + +A detailed instruction to connect to a Hive or SparkSQL endpoint with various tools (e.g. SQuirreL, beeline, SQL Developer, ...) can be found at _[Apache HiveServer2 Clients](https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients)_. +The database client _[DBeaver](https://dbeaver.io/)_ can connect to the SQL endpoint out of the box. + + +## Parameter + +### Table name prefix + +Prefix of the table that will be shared. In the case of complex mappings more than one table will be created. If one name is given it will be used as a prefix for table names. If left empty the table names will be generated from the user name and time stamps and start with 'root', 'object-mapping' + +- Datatype: `string` +- Default Value: `None` + + + +### Cache + +Optional boolean option that selects if the table should be cached by Spark or not (default = true). + +- Datatype: `boolean` +- Default Value: `true` + + + +### Array separator + +The character that is used to separate the parts of array values. Write \t to specify the tab character. + +- Datatype: `string` +- Default Value: `|` + + + +### Compatibility + +If true, basic types will be used for unusual data types that otherwise may result in client errors. Try switching this on, if a client has weird error messages. (Default = true) + +- Datatype: `boolean` +- Default Value: `true` + + + +### Map + +Mapping of column names. Similar to aliases E.g. 'c1:c2' would rename column c1 into c2. + +- Datatype: `stringmap` +- Default Value: `None` + + + diff --git a/docs/build/reference/dataset/text.md b/docs/build/reference/dataset/text.md new file mode 100644 index 000000000..c6e318649 --- /dev/null +++ b/docs/build/reference/dataset/text.md @@ -0,0 +1,70 @@ +--- +title: "Text" +description: "Reads and writes plain text files." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# Text + + + + +Reads and writes plain text files. + +## Writing + +All values of each entity will be written as plain text. Multiple values per entity are separated by spaces. Each entity will be written to a new line. + +## Reading + +The entire text will be read as a single entity with a single property. Note that even if multiple entities have been written to this dataset before, those would still be read back as a single entity. The default type is `document`, the default path is `text`. Both values can be configured in the advanced section. + + +## Parameter + +### File + +The plain text file. May also be a zip archive containing multiple text files. + +- Datatype: `resource` +- Default Value: `None` + + + +### Charset + +The file encoding, e.g., UTF-8, UTF-8-BOM, ISO-8859-1 + +- Datatype: `string` +- Default Value: `UTF-8` + + + +### Type name + +A type name that represents this file. + +- Datatype: `string` +- Default Value: `document` + + + +### Property + +The single property that holds the text. + +- Datatype: `string` +- Default Value: `text` + + + +### ZIP file regex + +If the input resource is a ZIP file, files inside the file are filtered via this regex. + +- Datatype: `string` +- Default Value: `.*` + + + diff --git a/docs/build/reference/dataset/xml.md b/docs/build/reference/dataset/xml.md new file mode 100644 index 000000000..2c1c2b79a --- /dev/null +++ b/docs/build/reference/dataset/xml.md @@ -0,0 +1,123 @@ +--- +title: "XML" +description: "Read from or write to an XML file." +icon: octicons/cross-reference-24 +tags: + - Dataset +--- +# XML + + + + +Typically, this dataset is used to transform an XML file to another format, e.g., to RDF. It can also be used to generate XML files. + +### Reading + +When this dataset is used as an input for another task (e.g., a transformation task), the input type of the consuming task selects the path where the entities to be read are located. + +Example: + + + + John Doe + 1970 + + + Max Power + 1980 + + + +A transformation for reading all persons of the above XML would set the input type to `/Person`. +The transformation iterates all entities matching the given input path. +In the above example the first entity to be read is: + + + John Doe + 1970 + + +All paths used in the consuming task are relative to this, e.g., the person name can be addressed with the path `/Name`. + +Path examples: + +- The empty path selects the root element. +- `/Person` selects all persons. +- `/Person[Year = "1970"]` selects all persons which are born in 1970. +- `/#id` Is a special syntax for generating an id for a selected element. It can be used in URI patterns for entities which do not provide an identifier. Examples: `http://example.org/{#id}` or `http://example.org/{/pathToEntity/#id}`. +- The wildcard * enumerates all direct children, e.g., `/Persons/*/Name`. +- The wildcard ** enumerates all direct and indirect children. +- The backslash can be used to navigate to the parent XML node, e.g., `\Persons/SomeHeader`. +- `#text` retrieves the text of the selected node. + +### Writing + +When writing XML, all entities need to possess a unique URI. Writing multiple root entities with the same URI will result in multiple entries in the generated XML. If multiple nested entities with the same URI are written, only the last entity with a given URI will be written. + + +## Parameter + +### File + +The XML file. This may also be a zip archive of multiple XML files that share the same schema. + +- Datatype: `resource` +- Default Value: `None` + + + +### Base path + +The base path when writing XML. For instance: /RootElement/Entity. Should no longer be used for reading XML! Instead, set the base path by specifying it as input type on the subsequent transformation or linking tasks. + +- Datatype: `string` +- Default Value: `None` + + + +### URI pattern + +A URI pattern, e.g., http://namespace.org/{ID}, where {path} may contain relative paths to elements + +- Datatype: `string` +- Default Value: `None` + + + +### Output template + +The output template used for writing XML. Must be valid XML. The generated entity is identified through a processing instruction of the form . + +- Datatype: `code-xml` +- Default Value: `` + + + +### Streaming + +Streaming allows for reading large XML files. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Max depth + +Maximum depth of written XML. This acts as a safe guard if a recursive structure is written. + +- Datatype: `int` +- Default Value: `15` + + + +### ZIP file regex + +If the input resource is a ZIP file, files inside the file are filtered via this regex. + +- Datatype: `string` +- Default Value: `^(?!.*[\/\\]\..*$|^\..*$).*\.xml$` + + + diff --git a/docs/build/reference/distancemeasure/.pages b/docs/build/reference/distancemeasure/.pages new file mode 100644 index 000000000..0717ce79b --- /dev/null +++ b/docs/build/reference/distancemeasure/.pages @@ -0,0 +1,31 @@ +nav: + - index.md + - "CJK reading distance": cjkReadingDistance.md + - "Compare physical quantities": PhysicalQuantitiesDistance.md + - "Constant similarity value": constantDistance.md + - "Cosine": cosine.md + - "Date": date.md + - "DateTime": dateTime.md + - "Dice coefficient": dice.md + - "Geographical distance": wgs84.md + - "Greater than": greaterThan.md + - "Inequality": inequality.md + - "Inside numeric interval": insideNumericInterval.md + - "Is substring": isSubstring.md + - "Jaccard": jaccard.md + - "Jaro distance": jaro.md + - "Jaro-Winkler distance": jaroWinkler.md + - "Korean phoneme distance": koreanPhonemeDistance.md + - "Korean translit distance": koreanTranslitDistance.md + - "Levenshtein distance": levenshteinDistance.md + - "Lower than": lowerThan.md + - "Normalized Levenshtein distance": levenshtein.md + - "Numeric equality": numericEquality.md + - "Numeric similarity": num.md + - "qGrams": qGrams.md + - "Relaxed equality": relaxedEquality.md + - "Soft Jaccard": softjaccard.md + - "Starts with": startsWith.md + - "String equality": equality.md + - "Substring comparison": substringDistance.md + - "Token-wise distance": tokenwiseDistance.md \ No newline at end of file diff --git a/docs/build/reference/distancemeasure/PhysicalQuantitiesDistance.md b/docs/build/reference/distancemeasure/PhysicalQuantitiesDistance.md new file mode 100644 index 000000000..2287b796d --- /dev/null +++ b/docs/build/reference/distancemeasure/PhysicalQuantitiesDistance.md @@ -0,0 +1,229 @@ +--- +title: "Compare physical quantities" +description: "Computes the distance between two physical quantities. The distance is normalized to the SI base unit of the dimension. For instance for lengths, the distance will be in metres. Comparing incompatible units will yield a validation error." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Compare physical quantities + + + + +SI units and common derived units are supported. The following section lists all supported units. By default, all quantities are normalized to their base unit. For instance, lengths will be normalized to metres. +### Supported units + +#### Time + +Time is expressed in seconds (symbol: `s`). +The following alternative symbols are supported: +* `mo_s`: day*29.53059 +* `mo_g`: year/12.0 +* `a`: day*365.25 +* `min`: min +* `a_g`: year +* `mo`: (day*365.25)/12.0 +* `mo_j`: (day*365.25)/12.0 +* `a_j`: day*365.25 +* `h`: h +* `a_t`: day*365.24219 +* `d`: day + + +#### Length + +Length is expressed in metres (symbol: `m`). +The following alternative symbols are supported: +* `in`: c(cm*254.0) +* `nmi`: m*1852.0 +* `Ao`: dnm +* `mil`: m(c(cm*254.0)) +* `yd`: ((c(cm*254.0))*12.0)*3.0 +* `AU`: m*1.49597871E11 +* `ft`: (c(cm*254.0))*12.0 +* `pc`: m*3.085678E16 +* `fth`: ((c(cm*254.0))*12.0)*6.0 +* `mi`: ((c(cm*254.0))*12.0)*5280.0 +* `hd`: (c(cm*254.0))*4.0 + + +#### Mass + +Mass is expressed in kilograms (symbol: `kg`). +The following alternative symbols are supported: +* `lb`: lb +* `ston`: hlb*20.0 +* `t`: Mg +* `stone`: lb*14.0 +* `u`: AMU +* `gr`: (mg*6479891.0)/100000.0 +* `lcwt`: lb*112.0 +* `oz`: oz +* `g`: g +* `scwt`: hlb +* `dr`: oz/16.0 +* `lton`: (lb*112.0)*20.0 + + +#### Electric current + +Electric current is expressed in amperes (symbol: `A`). +The following alternative symbols are supported: +* `Bi`: daA +* `Gb`: cm·(A/m)*250.0/[one?] + + +#### Temperature + +Temperature is expressed in kelvins (symbol: `K`). +The following alternative symbols are supported: +* `Cel`: ℃ + + +#### Amount of substance + +Amount of substance is expressed in moles (symbol: `mol`). + +#### Luminous intensity + +Luminous intensity is expressed in candelas (symbol: `cd`). + +#### Area + +Area is expressed in square metres (symbol: `m²`). +The following alternative symbols are supported: +* `m2`: m² +* `ar`: hm² +* `syd`: ((c(cm*254.0))*12.0)*3.0² +* `cml`: [one?]/4.0·m(c(cm*254.0))² +* `b`: hfm² +* `sft`: (c(cm*254.0))*12.0² +* `sin`: c(cm*254.0)² + + +#### Volume + +Volume is expressed in cubic metres (symbol: `㎥`). +The following alternative symbols are supported: +* `st`: [㎥?] +* `bf`: (c(cm*254.0)³)*144.0 +* `cyd`: ((c(cm*254.0))*12.0)*3.0³ +* `cr`: ((c(cm*254.0))*12.0³)*128.0 +* `L`: L +* `l`: l +* `cin`: c(cm*254.0)³ +* `cft`: (c(cm*254.0))*12.0³ +* `m3`: ㎥ + + +#### Energy + +Energy is expressed in joules (symbol: `J`). +The following alternative symbols are supported: +* `cal_IT`: (J*41868.0)/10000.0 +* `eV`: J*1.602176487E-19 +* `cal_m`: (J*419002.0)/100000.0 +* `cal`: m(J*4184.0) +* `cal_th`: m(J*4184.0) + + +#### Angle + +Angle is expressed in radians (symbol: `rad`). +The following alternative symbols are supported: +* `circ`: [one?]·rad*2.0 +* `gon`: ([one?]·rad/180.0)*0.9 +* `deg`: [one?]·rad/180.0 +* `'`: ([one?]·rad/180.0)/60.0 +* `''`: (([one?]·rad/180.0)/60.0)/60.0 + + +#### Others + +- `1/m`, derived units: `Ky`: c(1/m) +- `kg/(m·s)`, derived units: `P`: g/(s·cm) +- `bit/s`, derived units: `Bd`: bit/s +- `bit`, derived units: `By`: bit*8.0 +- `Sv` +- `N` +- `Ω`, derived units: `Ohm`: Ω +- `T`, derived units: `G`: T/10000.0 +- `sr`, derived units: `sph`: [one?]·sr*4.0 +- `F` +- `C/kg`, derived units: `R`: (C/kg)*2.58E-4 +- `cd/m²`, derived units: `sb`: cd/cm², `Lmb`: cd/([one?]·cm²) +- `Pa`, derived units: `bar`: Pa*100000.0, `atm`: Pa*101325.0 +- `kg/(m·s²)`, derived units: `att`: k(g·(m/s²)*9.80665)/cm² +- `m²/s`, derived units: `St`: cm²/s +- `A/m`, derived units: `Oe`: (A/m)*250.0/[one?] +- `kg·m²/s²`, derived units: `erg`: cm²·g/s² +- `kg/m³`, derived units: `g%`: g/dl +- `mho` +- `V` +- `lx`, derived units: `ph`: lx/10000.0 +- `m/s²`, derived units: `Gal`: cm/s², `m/s2`: m/s² +- `m/s`, derived units: `kn`: m*1852.0/h +- `m·kg/s²`, derived units: `gf`: g·(m/s²)*9.80665, `lbf`: lb·(m/s²)*9.80665, `dyn`: cm·g/s² +- `m²/s²`, derived units: `RAD`: cm²·g/(s²·hg), `REM`: cm²·g/(s²·hg) +- `C` +- `Gy` +- `Hz` +- `H` +- `lm` +- `W` +- `Wb`, derived units: `Mx`: Wb/1.0E8 +- `Bq`, derived units: `Ci`: Bq*3.7E10 +- `S` + + +### Characteristics +This distance measure is not normalized, i.e., all distances start at 0 (exact match) and increase the more different the values are. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Convert SI unit prefixes: + +* Input values: + - Source: `[1 km]` + - Target: `[500 m]` + +* Returns: → `500.0` + + +--- +#### Convert imperial and metric values: + +* Input values: + - Source: `[1 km]` + - Target: `[1 mi]` + +* Returns: → `609.344` + + +--- +#### Validate if the compared units of measurement are compatible: + +* Input values: + - Source: `[1 km]` + - Target: `[1 kg]` + +* Returns: → `NaN` + + + + +## Parameter + +### Number format + +The IETF BCP 47 language tag, e.g., 'en'. + +- Datatype: `string` +- Default Value: `en` + + + diff --git a/docs/build/reference/distancemeasure/cjkReadingDistance.md b/docs/build/reference/distancemeasure/cjkReadingDistance.md new file mode 100644 index 000000000..7b3c1ae28 --- /dev/null +++ b/docs/build/reference/distancemeasure/cjkReadingDistance.md @@ -0,0 +1,38 @@ +--- +title: "CJK reading distance" +description: "CJK Reading Distance." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# CJK reading distance + + + + + +### Characteristics +This distance measure is not normalized, i.e., all distances start at 0 (exact match) and increase the more different the values are. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +### Min char + +No description + +- Datatype: `char` +- Default Value: `0` + + + +### Max char + +No description + +- Datatype: `char` +- Default Value: `z` + + + diff --git a/docs/build/reference/distancemeasure/constantDistance.md b/docs/build/reference/distancemeasure/constantDistance.md new file mode 100644 index 000000000..7ece1873c --- /dev/null +++ b/docs/build/reference/distancemeasure/constantDistance.md @@ -0,0 +1,27 @@ +--- +title: "Constant similarity value" +description: "Always returns a constant similarity value." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Constant similarity value + + + + + +### Characteristics +This distance measure is not normalized, i.e., all distances start at 0 (exact match) and increase the more different the values are. + +## Parameter + +### Value + +No description + +- Datatype: `double` +- Default Value: `1.0` + + + diff --git a/docs/build/reference/distancemeasure/cosine.md b/docs/build/reference/distancemeasure/cosine.md new file mode 100644 index 000000000..47de89ff7 --- /dev/null +++ b/docs/build/reference/distancemeasure/cosine.md @@ -0,0 +1,29 @@ +--- +title: "Cosine" +description: "Cosine Distance Measure." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Cosine + + + + + +### Characteristics +This distance measure is normalized, i.e., all distances are between 0 (exact match) and 1 (no similarity). + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +### K + +No description + +- Datatype: `int` +- Default Value: `3` + + + diff --git a/docs/build/reference/distancemeasure/date.md b/docs/build/reference/distancemeasure/date.md new file mode 100644 index 000000000..78d807ab4 --- /dev/null +++ b/docs/build/reference/distancemeasure/date.md @@ -0,0 +1,140 @@ +--- +title: "Date" +description: "The distance in days between two dates ('YYYY-MM-DD' format)." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Date + + + + + +### Characteristics +This distance measure is not normalized, i.e., all distances start at 0 (exact match) and increase the more different the values are. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Returns 0 if both dates are equal: + +* Input values: + - Source: `[2003-03-01]` + - Target: `[2003-03-01]` + +* Returns: → `0.0` + + +--- +#### Returns 1 if both dates are one day apart: + +* Input values: + - Source: `[2003-03-01]` + - Target: `[2003-03-02]` + +* Returns: → `1.0` + + +--- +#### Returns the number of days if both dates are one month apart: + +* Input values: + - Source: `[2003-03-01]` + - Target: `[2003-04-01]` + +* Returns: → `31.0` + + +--- +#### Returns the number of days if both dates are one year apart: + +* Input values: + - Source: `[2018-03-01]` + - Target: `[2019-03-01]` + +* Returns: → `365.0` + + +--- +#### Time of day is ignored: + +* Input values: + - Source: `[2003-03-01]` + - Target: `[2003-03-01T06:00:00]` + +* Returns: → `0.0` + + +--- +#### Missing days are set to 1 by default: + +* Input values: + - Source: `[2003-01]` + - Target: `[2003-01-01]` + +* Returns: → `0.0` + + +--- +#### Missing months are set to 1 by default: + +* Input values: + - Source: `[2003]` + - Target: `[2003-01-01]` + +* Returns: → `0.0` + + +--- +#### Missing months and days are set to 1 by default: + +* Input values: + - Source: `[2018]` + - Target: `[2019]` + +* Returns: → `365.0` + + +--- +#### If 'requireMonthAndDay' is set, dates without a day and month will not match: + +* Parameters + * *requireMonthAndDay*: `true` + +* Input values: + - Source: `[2003]` + - Target: `[2003-03-01]` + +* Returns: → `Infinity` + + +--- +#### If 'requireMonthAndDay' is set, dates without a day will not match: + +* Parameters + * *requireMonthAndDay*: `true` + +* Input values: + - Source: `[2003-12]` + - Target: `[2003-03-01]` + +* Returns: → `Infinity` + + + + +## Parameter + +### Require month and day + +If true, no distance value will be generated if months or days are missing (e.g., 2019-11). If false, missing month or day fields will default to 1. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/distancemeasure/dateTime.md b/docs/build/reference/distancemeasure/dateTime.md new file mode 100644 index 000000000..f1d3a597c --- /dev/null +++ b/docs/build/reference/distancemeasure/dateTime.md @@ -0,0 +1,21 @@ +--- +title: "DateTime" +description: "Distance between two date time values (xsd:dateTime format) in seconds." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# DateTime + + + + + +### Characteristics +This distance measure is not normalized, i.e., all distances start at 0 (exact match) and increase the more different the values are. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/distancemeasure/dice.md b/docs/build/reference/distancemeasure/dice.md new file mode 100644 index 000000000..c3a60162a --- /dev/null +++ b/docs/build/reference/distancemeasure/dice.md @@ -0,0 +1,21 @@ +--- +title: "Dice coefficient" +description: "Dice similarity coefficient." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Dice coefficient + + + + + +### Characteristics +This distance measure is normalized, i.e., all distances are between 0 (exact match) and 1 (no similarity). + +Compares sets of multiple values.Typically, incoming values are tokenized before being fed into this measure. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/distancemeasure/equality.md b/docs/build/reference/distancemeasure/equality.md new file mode 100644 index 000000000..d03feaa5a --- /dev/null +++ b/docs/build/reference/distancemeasure/equality.md @@ -0,0 +1,46 @@ +--- +title: "String equality" +description: "Checks for equality of the string representation of the given values. Returns success if string values are equal, failure otherwise. For a numeric comparison of values use the 'Numeric Equality' comparator." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# String equality + + + + + +### Characteristics +This is a boolean distance measure, i.e., all distances are either 0 or 1. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Returns distance 0, if at least one value matches: + +* Input values: + - Source: `[max, helmut]` + - Target: `[max]` + +* Returns: → `0.0` + + +--- +#### Returns distance 1, if no value matches: + +* Input values: + - Source: `[max, helmut]` + - Target: `[john]` + +* Returns: → `1.0` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/distancemeasure/greaterThan.md b/docs/build/reference/distancemeasure/greaterThan.md new file mode 100644 index 000000000..10662648c --- /dev/null +++ b/docs/build/reference/distancemeasure/greaterThan.md @@ -0,0 +1,47 @@ +--- +title: "Greater than" +description: "Checks if the source value is greater than the target value. If both strings are numbers, numerical order is used for comparison. Otherwise, alphanumerical order is used." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Greater than + + + + + +### Characteristics +This is a boolean distance measure, i.e., all distances are either 0 or 1. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +### Or equal + +Accept equal values + +- Datatype: `boolean` +- Default Value: `false` + + + +### Order + +Per default, if both strings are numbers, numerical order is used for comparison. Otherwise, alphanumerical order is used. Choose a more specific order for improved performance. + +- Datatype: `enumeration` +- Default Value: `Autodetect` + + + +### Reverse + +Reverse source and target inputs + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/distancemeasure/index.md b/docs/build/reference/distancemeasure/index.md new file mode 100644 index 000000000..4badd6061 --- /dev/null +++ b/docs/build/reference/distancemeasure/index.md @@ -0,0 +1,45 @@ +--- +title: "Distance Measures" +icon: octicons/cross-reference-24 +tags: + - Build + - Reference +--- +# Distance Measures + + +Distance Measures compute a distance metric between two sets of strings. + +**:octicons-people-24: Intended audience:** Linked Data Experts and Domain Experts + +| Name | Description | +|------------------------:| :--------- | +|[CJK reading distance](cjkReadingDistance.md) | CJK Reading Distance. | +|[Compare physical quantities](PhysicalQuantitiesDistance.md) | Computes the distance between two physical quantities. The distance is normalized to the SI base unit of the dimension. For instance for lengths, the distance will be in metres. Comparing incompatible units will yield a validation error. | +|[Constant similarity value](constantDistance.md) | Always returns a constant similarity value. | +|[Cosine](cosine.md) | Cosine Distance Measure. | +|[Date](date.md) | The distance in days between two dates ('YYYY-MM-DD' format). | +|[DateTime](dateTime.md) | Distance between two date time values (xsd:dateTime format) in seconds. | +|[Dice coefficient](dice.md) | Dice similarity coefficient. | +|[Geographical distance](wgs84.md) | Computes the geographical distance between two points. Author: Konrad Höffner (MOLE subgroup of Research Group AKSW, University of Leipzig) | +|[Greater than](greaterThan.md) | Checks if the source value is greater than the target value. If both strings are numbers, numerical order is used for comparison. Otherwise, alphanumerical order is used. | +|[Inequality](inequality.md) | Returns success if values are not equal, failure otherwise. | +|[Inside numeric interval](insideNumericInterval.md) | Checks if a number is contained inside a numeric interval, such as '1900 - 2000'. | +|[Is substring](isSubstring.md) | Checks if a source value is a substring of a target value. | +|[Jaccard](jaccard.md) | Jaccard similarity coefficient. Divides the matching tokens by the number of distinct tokens from both inputs. | +|[Jaro distance](jaro.md) | Matches strings based on the Jaro distance metric. | +|[Jaro-Winkler distance](jaroWinkler.md) | Matches strings based on the Jaro-Winkler distance measure. | +|[Korean phoneme distance](koreanPhonemeDistance.md) | Korean phoneme distance. | +|[Korean translit distance](koreanTranslitDistance.md) | Transliterated Korean distance. | +|[Levenshtein distance](levenshteinDistance.md) | Levenshtein distance. Returns a distance value between zero and the size of the string. | +|[Lower than](lowerThan.md) | Checks if the source value is lower than the target value. | +|[Normalized Levenshtein distance](levenshtein.md) | Normalized Levenshtein distance. Divides the edit distance by the length of the longer string. | +|[Numeric equality](numericEquality.md) | Compares values numerically instead of their string representation as the 'String Equality' operator does. Allows to set the needed precision of the comparison. A value of 0.0 means that the values must represent exactly the same (floating point) value, values higher than that allow for a margin of tolerance. | +|[Numeric similarity](num.md) | Computes the numeric distance between two numbers. | +|[qGrams](qGrams.md) | String similarity based on q-grams (by default q=2). | +|[Relaxed equality](relaxedEquality.md) | Return success if strings are equal, failure otherwise. Lower/upper case and differences like ö/o, n/ñ, c/ç etc. are treated as equal. | +|[Soft Jaccard](softjaccard.md) | Soft Jaccard similarity coefficient. Same as Jaccard distance but values within an levenhstein distance of 'maxDistance' are considered equivalent. | +|[Starts with](startsWith.md) | Returns success if the first string starts with the second string, failure otherwise. | +|[String equality](equality.md) | Checks for equality of the string representation of the given values. Returns success if string values are equal, failure otherwise. For a numeric comparison of values use the 'Numeric Equality' comparator. | +|[Substring comparison](substringDistance.md) | Return 0 to 1 for strong similarity to weak similarity. Based on the paper: Stoilos, Giorgos, Giorgos Stamou, and Stefanos Kollias. "A string metric for ontology alignment." The Semantic Web-ISWC 2005. Springer Berlin Heidelberg, 2005. 624-637. | +|[Token-wise distance](tokenwiseDistance.md) | Token-wise string distance using the specified metric. | diff --git a/docs/build/reference/distancemeasure/inequality.md b/docs/build/reference/distancemeasure/inequality.md new file mode 100644 index 000000000..e13ea1d5f --- /dev/null +++ b/docs/build/reference/distancemeasure/inequality.md @@ -0,0 +1,66 @@ +--- +title: "Inequality" +description: "Returns success if values are not equal, failure otherwise." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Inequality + + + + + +### Characteristics +This is a boolean distance measure, i.e., all distances are either 0 or 1. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Returns distance 0, if the values are different: + +* Input values: + - Source: `[max]` + - Target: `[john]` + +* Returns: → `0.0` + + +--- +#### Returns distance 1, if the values are equal: + +* Input values: + - Source: `[max]` + - Target: `[max]` + +* Returns: → `1.0` + + +--- +#### If multiple values are provided, returns 0, if at least one value does not match: + +* Input values: + - Source: `[max, helmut]` + - Target: `[max]` + +* Returns: → `0.0` + + +--- +#### If multiple values are provided, returns 1, if all value match: + +* Input values: + - Source: `[max, max]` + - Target: `[max, max]` + +* Returns: → `1.0` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/distancemeasure/insideNumericInterval.md b/docs/build/reference/distancemeasure/insideNumericInterval.md new file mode 100644 index 000000000..010ab8b55 --- /dev/null +++ b/docs/build/reference/distancemeasure/insideNumericInterval.md @@ -0,0 +1,29 @@ +--- +title: "Inside numeric interval" +description: "Checks if a number is contained inside a numeric interval, such as '1900 - 2000'." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Inside numeric interval + + + + + +### Characteristics +This is a boolean distance measure, i.e., all distances are either 0 or 1. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +### Separator + +No description + +- Datatype: `string` +- Default Value: `—|–|-` + + + diff --git a/docs/build/reference/distancemeasure/isSubstring.md b/docs/build/reference/distancemeasure/isSubstring.md new file mode 100644 index 000000000..0e9cb5ccf --- /dev/null +++ b/docs/build/reference/distancemeasure/isSubstring.md @@ -0,0 +1,29 @@ +--- +title: "Is substring" +description: "Checks if a source value is a substring of a target value." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Is substring + + + + + +### Characteristics +This is a boolean distance measure, i.e., all distances are either 0 or 1. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +### Reverse + +Reverse source and target inputs + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/distancemeasure/jaccard.md b/docs/build/reference/distancemeasure/jaccard.md new file mode 100644 index 000000000..5c99bd6fe --- /dev/null +++ b/docs/build/reference/distancemeasure/jaccard.md @@ -0,0 +1,66 @@ +--- +title: "Jaccard" +description: "Jaccard similarity coefficient. Divides the matching tokens by the number of distinct tokens from both inputs." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Jaccard + + + + + +### Characteristics +This distance measure is normalized, i.e., all distances are between 0 (exact match) and 1 (no similarity). + +Compares sets of multiple values.Typically, incoming values are tokenized before being fed into this measure. +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Returns 0 for equal sets of values: + +* Input values: + - Source: `[A, B, C]` + - Target: `[B, C, A]` + +* Returns: → `0.0` + + +--- +#### Returns 1 if there is no overlap between both sets of tokens: + +* Input values: + - Source: `[A, B, C]` + - Target: `[D, E, F]` + +* Returns: → `1.0` + + +--- +#### Returns 0.5 if half of all unique tokens overlap: + +* Input values: + - Source: `[A, B, C]` + - Target: `[A, B, D]` + +* Returns: → `0.5` + + +--- +#### Returns 2/3 if one third of all unique tokens overlap: + +* Input values: + - Source: `[John, Jane]` + - Target: `[John, Max]` + +* Returns: → `0.6666666666666666` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/distancemeasure/jaro.md b/docs/build/reference/distancemeasure/jaro.md new file mode 100644 index 000000000..91161b33e --- /dev/null +++ b/docs/build/reference/distancemeasure/jaro.md @@ -0,0 +1,24 @@ +--- +title: "Jaro distance" +description: "Matches strings based on the Jaro distance metric." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Jaro distance + + + + +The Jaro distance measure calculates the similarity between two strings based on the number and order of common characters, the number of transpositions, and the length of the strings. The Jaro distance is 0 for a perfect match and 1 if there is no similarity between the given strings. + +For more information, please refer to: [https://en.wikipedia.org/wiki/Jaro–Winkler_distance](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance). + +### Characteristics +This distance measure is normalized, i.e., all distances are between 0 (exact match) and 1 (no similarity). + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/distancemeasure/jaroWinkler.md b/docs/build/reference/distancemeasure/jaroWinkler.md new file mode 100644 index 000000000..1f896e34c --- /dev/null +++ b/docs/build/reference/distancemeasure/jaroWinkler.md @@ -0,0 +1,24 @@ +--- +title: "Jaro-Winkler distance" +description: "Matches strings based on the Jaro-Winkler distance measure." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Jaro-Winkler distance + + + + +The Jaro-Winkler distance measure is a variation of the Jaro distance metric. It takes into account the prefixes of the strings being compared and assigns higher weights to matching prefixes. + +For more information, please refer to: [https://en.wikipedia.org/wiki/Jaro–Winkler_distance](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance). + +### Characteristics +This distance measure is normalized, i.e., all distances are between 0 (exact match) and 1 (no similarity). + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/distancemeasure/koreanPhonemeDistance.md b/docs/build/reference/distancemeasure/koreanPhonemeDistance.md new file mode 100644 index 000000000..bdeb8bec4 --- /dev/null +++ b/docs/build/reference/distancemeasure/koreanPhonemeDistance.md @@ -0,0 +1,38 @@ +--- +title: "Korean phoneme distance" +description: "Korean phoneme distance." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Korean phoneme distance + + + + + +### Characteristics +This distance measure is not normalized, i.e., all distances start at 0 (exact match) and increase the more different the values are. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +### Min char + +No description + +- Datatype: `char` +- Default Value: `0` + + + +### Max char + +No description + +- Datatype: `char` +- Default Value: `z` + + + diff --git a/docs/build/reference/distancemeasure/koreanTranslitDistance.md b/docs/build/reference/distancemeasure/koreanTranslitDistance.md new file mode 100644 index 000000000..a10acac14 --- /dev/null +++ b/docs/build/reference/distancemeasure/koreanTranslitDistance.md @@ -0,0 +1,38 @@ +--- +title: "Korean translit distance" +description: "Transliterated Korean distance." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Korean translit distance + + + + + +### Characteristics +This distance measure is not normalized, i.e., all distances start at 0 (exact match) and increase the more different the values are. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +### Min char + +No description + +- Datatype: `char` +- Default Value: `0` + + + +### Max char + +No description + +- Datatype: `char` +- Default Value: `z` + + + diff --git a/docs/build/reference/distancemeasure/levenshtein.md b/docs/build/reference/distancemeasure/levenshtein.md new file mode 100644 index 000000000..27be51766 --- /dev/null +++ b/docs/build/reference/distancemeasure/levenshtein.md @@ -0,0 +1,92 @@ +--- +title: "Normalized Levenshtein distance" +description: "Normalized Levenshtein distance. Divides the edit distance by the length of the longer string." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Normalized Levenshtein distance + + + + + +### Characteristics +This distance measure is normalized, i.e., all distances are between 0 (exact match) and 1 (no similarity). + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Returns 0 for equal strings: + +* Input values: + - Source: `[John]` + - Target: `[John]` + +* Returns: → `0.0` + + +--- +#### Returns 1/4 if two strings of length 4 differ by one edit operation: + +* Input values: + - Source: `[John]` + - Target: `[Jxhn]` + +* Returns: → `0.25` + + +--- +#### Normalizes the edit distance by the length of the longer string: + +* Input values: + - Source: `[John]` + - Target: `[Jhn]` + +* Returns: → `0.25` + + +--- +#### Returns the maximum distance of 1 for completely different strings: + +* Input values: + - Source: `[John]` + - Target: `[Clara]` + +* Returns: → `1.0` + + + + +## Parameter + +### Q-grams size + +The size of the q-grams to be indexed. Setting this to zero will disable indexing. + +- Datatype: `int` +- Default Value: `2` + + + +### Min char + +The minimum character that is used for indexing + +- Datatype: `char` +- Default Value: `0` + + + +### Max char + +The maximum character that is used for indexing + +- Datatype: `char` +- Default Value: `z` + + + diff --git a/docs/build/reference/distancemeasure/levenshteinDistance.md b/docs/build/reference/distancemeasure/levenshteinDistance.md new file mode 100644 index 000000000..b049a148e --- /dev/null +++ b/docs/build/reference/distancemeasure/levenshteinDistance.md @@ -0,0 +1,82 @@ +--- +title: "Levenshtein distance" +description: "Levenshtein distance. Returns a distance value between zero and the size of the string." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Levenshtein distance + + + + + +### Characteristics +This distance measure is not normalized, i.e., all distances start at 0 (exact match) and increase the more different the values are. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Returns 0 for equal strings: + +* Input values: + - Source: `[John]` + - Target: `[John]` + +* Returns: → `0.0` + + +--- +#### Returns 1 for strings that differ by one edit operation: + +* Input values: + - Source: `[John]` + - Target: `[Jxhn]` + +* Returns: → `1.0` + + +--- +#### Returns 3 for strings that differ by three edit operations: + +* Input values: + - Source: `[Saturday]` + - Target: `[Sunday]` + +* Returns: → `3.0` + + + + +## Parameter + +### Q-grams size + +The size of the q-grams to be indexed. Setting this to zero will disable indexing. + +- Datatype: `int` +- Default Value: `2` + + + +### Min char + +The minimum character that is used for indexing + +- Datatype: `char` +- Default Value: `0` + + + +### Max char + +The maximum character that is used for indexing + +- Datatype: `char` +- Default Value: `z` + + + diff --git a/docs/build/reference/distancemeasure/lowerThan.md b/docs/build/reference/distancemeasure/lowerThan.md new file mode 100644 index 000000000..d5c1196c5 --- /dev/null +++ b/docs/build/reference/distancemeasure/lowerThan.md @@ -0,0 +1,47 @@ +--- +title: "Lower than" +description: "Checks if the source value is lower than the target value." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Lower than + + + + + +### Characteristics +This is a boolean distance measure, i.e., all distances are either 0 or 1. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +### Or equal + +Accept equal values + +- Datatype: `boolean` +- Default Value: `false` + + + +### Order + +Per default, if both strings are numbers, numerical order is used for comparison. Otherwise, alphanumerical order is used. Choose a more specific order for improved performance. + +- Datatype: `enumeration` +- Default Value: `Autodetect` + + + +### Reverse + +Reverse source and target inputs + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/distancemeasure/num.md b/docs/build/reference/distancemeasure/num.md new file mode 100644 index 000000000..a0e77d914 --- /dev/null +++ b/docs/build/reference/distancemeasure/num.md @@ -0,0 +1,38 @@ +--- +title: "Numeric similarity" +description: "Computes the numeric distance between two numbers." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Numeric similarity + + + + + +### Characteristics +This distance measure is not normalized, i.e., all distances start at 0 (exact match) and increase the more different the values are. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +### Min index value + +The minimum number that is used for indexing + +- Datatype: `double` +- Default Value: `-Infinity` + + + +### Max index value + +The maximum number that is used for indexing + +- Datatype: `double` +- Default Value: `Infinity` + + + diff --git a/docs/build/reference/distancemeasure/numericEquality.md b/docs/build/reference/distancemeasure/numericEquality.md new file mode 100644 index 000000000..e57f234ca --- /dev/null +++ b/docs/build/reference/distancemeasure/numericEquality.md @@ -0,0 +1,80 @@ +--- +title: "Numeric equality" +description: "Compares values numerically instead of their string representation as the 'String Equality' operator does. Allows to set the needed precision of the comparison. A value of 0.0 means that the values must represent exactly the same (floating point) value, values higher than that allow for a margin of tolerance." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Numeric equality + + + + + +### Characteristics +This is a boolean distance measure, i.e., all distances are either 0 or 1. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Returns 0 for equal numbers: + +* Input values: + - Source: `[4.2]` + - Target: `[4.2]` + +* Returns: → `0.0` + + +--- +#### Returns 1 if at least one value is not a number: + +* Input values: + - Source: `[1]` + - Target: `[one]` + +* Returns: → `1.0` + + +--- +#### Returns 0 for numbers within the configured precision: + +* Parameters + * *precision*: `0.1` + +* Input values: + - Source: `[1.3]` + - Target: `[1.35]` + +* Returns: → `0.0` + + +--- +#### Returns 1 for numbers outside the configured precision: + +* Parameters + * *precision*: `0.1` + +* Input values: + - Source: `[1.3]` + - Target: `[1.5]` + +* Returns: → `1.0` + + + + +## Parameter + +### Precision + +The range of tolerance in floating point number comparisons. Must be 0 or a non-negative number smaller than 1. + +- Datatype: `double` +- Default Value: `0.0` + + + diff --git a/docs/build/reference/distancemeasure/qGrams.md b/docs/build/reference/distancemeasure/qGrams.md new file mode 100644 index 000000000..4a36cf770 --- /dev/null +++ b/docs/build/reference/distancemeasure/qGrams.md @@ -0,0 +1,82 @@ +--- +title: "qGrams" +description: "String similarity based on q-grams (by default q=2)." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# qGrams + + + + + +### Characteristics +This distance measure is normalized, i.e., all distances are between 0 (exact match) and 1 (no similarity). + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Returns 0.0 if the input strings are equal: + +* Input values: + - Source: `[abcd]` + - Target: `[abcd]` + +* Returns: → `0.0` + + +--- +#### Returns 1.0 if the input strings do not share a single q-gram: + +* Input values: + - Source: `[abcd]` + - Target: `[dcba]` + +* Returns: → `1.0` + + +--- +#### Returns 1 minus the matching q-grams divided by the total number of q-grams. Generated q-grams in this example: (#a, ab, b#) and (#a, ac, c#): + +* Input values: + - Source: `[ab]` + - Target: `[ac]` + +* Returns: → `0.8` + + + + +## Parameter + +### Q + +No description + +- Datatype: `int` +- Default Value: `2` + + + +### Min char + +The minimum character that is used for indexing + +- Datatype: `char` +- Default Value: `0` + + + +### Max char + +The maximum character that is used for indexing + +- Datatype: `char` +- Default Value: `z` + + + diff --git a/docs/build/reference/distancemeasure/relaxedEquality.md b/docs/build/reference/distancemeasure/relaxedEquality.md new file mode 100644 index 000000000..b465a6ca1 --- /dev/null +++ b/docs/build/reference/distancemeasure/relaxedEquality.md @@ -0,0 +1,21 @@ +--- +title: "Relaxed equality" +description: "Return success if strings are equal, failure otherwise. Lower/upper case and differences like ö/o, n/ñ, c/ç etc. are treated as equal." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Relaxed equality + + + + + +### Characteristics +This is a boolean distance measure, i.e., all distances are either 0 or 1. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/distancemeasure/softjaccard.md b/docs/build/reference/distancemeasure/softjaccard.md new file mode 100644 index 000000000..388c1819f --- /dev/null +++ b/docs/build/reference/distancemeasure/softjaccard.md @@ -0,0 +1,29 @@ +--- +title: "Soft Jaccard" +description: "Soft Jaccard similarity coefficient. Same as Jaccard distance but values within an levenhstein distance of 'maxDistance' are considered equivalent." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Soft Jaccard + + + + + +### Characteristics +This distance measure is normalized, i.e., all distances are between 0 (exact match) and 1 (no similarity). + +Compares sets of multiple values.Typically, incoming values are tokenized before being fed into this measure. + +## Parameter + +### Max distance + +No description + +- Datatype: `int` +- Default Value: `1` + + + diff --git a/docs/build/reference/distancemeasure/startsWith.md b/docs/build/reference/distancemeasure/startsWith.md new file mode 100644 index 000000000..aec1c8077 --- /dev/null +++ b/docs/build/reference/distancemeasure/startsWith.md @@ -0,0 +1,47 @@ +--- +title: "Starts with" +description: "Returns success if the first string starts with the second string, failure otherwise." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Starts with + + + + + +### Characteristics +This is a boolean distance measure, i.e., all distances are either 0 or 1. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +### Reverse + +Reverse source and target values + +- Datatype: `boolean` +- Default Value: `false` + + + +### Min length + +The minimum length of the string being contained. + +- Datatype: `int` +- Default Value: `2` + + + +### Max length + +The potential maximum length of the strings that must match. If the max length is greater than the length of the string to match, the full string must match. + +- Datatype: `int` +- Default Value: `2147483647` + + + diff --git a/docs/build/reference/distancemeasure/substringDistance.md b/docs/build/reference/distancemeasure/substringDistance.md new file mode 100644 index 000000000..7bb2b19cd --- /dev/null +++ b/docs/build/reference/distancemeasure/substringDistance.md @@ -0,0 +1,29 @@ +--- +title: "Substring comparison" +description: "Return 0 to 1 for strong similarity to weak similarity. Based on the paper: Stoilos, Giorgos, Giorgos Stamou, and Stefanos Kollias. "A string metric for ontology alignment." The Semantic Web-ISWC 2005. Springer Berlin Heidelberg, 2005. 624-637." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Substring comparison + + + + + +### Characteristics +This distance measure is normalized, i.e., all distances are between 0 (exact match) and 1 (no similarity). + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +### Granularity + +The minimum length of a possible substring match. + +- Datatype: `string` +- Default Value: `3` + + + diff --git a/docs/build/reference/distancemeasure/tokenwiseDistance.md b/docs/build/reference/distancemeasure/tokenwiseDistance.md new file mode 100644 index 000000000..a72232eb9 --- /dev/null +++ b/docs/build/reference/distancemeasure/tokenwiseDistance.md @@ -0,0 +1,110 @@ +--- +title: "Token-wise distance" +description: "Token-wise string distance using the specified metric." +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Token-wise distance + + + + + +### Characteristics +This distance measure is normalized, i.e., all distances are between 0 (exact match) and 1 (no similarity). + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +### Ignore case + +No description + +- Datatype: `boolean` +- Default Value: `true` + + + +### Metric name + +No description + +- Datatype: `string` +- Default Value: `levenshtein` + + + +### Split regex + +No description + +- Datatype: `string` +- Default Value: `[\s\d\p{Punct}]+` + + + +### Stopwords + +No description + +- Datatype: `string` +- Default Value: `None` + + + +### Stopword weight + +Weight assigned to stopwords + +- Datatype: `double` +- Default Value: `0.01` + + + +### Non stopword weight + +Weight assigned to non-stopwords + +- Datatype: `double` +- Default Value: `0.1` + + + +### Use incremental idf weights + +Use incremental IDF weights + +- Datatype: `boolean` +- Default Value: `false` + + + +### Match threshold + +No description + +- Datatype: `double` +- Default Value: `0.0` + + + +### Ordering impact + +No description + +- Datatype: `double` +- Default Value: `0.0` + + + +### Adjust by token length + +No description + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/distancemeasure/wgs84.md b/docs/build/reference/distancemeasure/wgs84.md new file mode 100644 index 000000000..9bc9a7779 --- /dev/null +++ b/docs/build/reference/distancemeasure/wgs84.md @@ -0,0 +1,29 @@ +--- +title: "Geographical distance" +description: "Computes the geographical distance between two points. Author: Konrad Höffner (MOLE subgroup of Research Group AKSW, University of Leipzig)" +icon: octicons/cross-reference-24 +tags: + - DistanceMeasure +--- +# Geographical distance + + + + + +### Characteristics +This distance measure is not normalized, i.e., all distances start at 0 (exact match) and increase the more different the values are. + +Compares single values (as opposed to sequences of values). If multiple values are provided, all values are compared and the lowest distance is returned. + +## Parameter + +### Unit + +No description + +- Datatype: `string` +- Default Value: `km` + + + diff --git a/docs/build/reference/index.md b/docs/build/reference/index.md new file mode 100644 index 000000000..3aa61a3e1 --- /dev/null +++ b/docs/build/reference/index.md @@ -0,0 +1,45 @@ +--- +title: "Task and Operator Reference" +icon: octicons/cross-reference-24 +tags: + - Build + - Reference +--- +# Task and Operator Reference + + +**:octicons-people-24: Intended audience:** Linked Data Experts and Domain Experts + +
+ +- [Aggregators](aggregator/index.md) + + --- + + This kind of task aggregates multiple similarity scores. + +- [Custom Workflow Tasks](customtask/index.md) + + --- + + An operator that can be used in a workflow. + +- [Datasets](dataset/index.md) + + --- + + A collection of data that can be read or written. + +- [Distance Measures](distancemeasure/index.md) + + --- + + Computes the distance between two sets of strings. + +- [Transformer](transformer/index.md) + + --- + + Transforms a sequence of string values. + +
\ No newline at end of file diff --git a/docs/build/reference/transformer/.pages b/docs/build/reference/transformer/.pages new file mode 100644 index 000000000..096b25530 --- /dev/null +++ b/docs/build/reference/transformer/.pages @@ -0,0 +1,231 @@ +nav: + - index.md + - "Abs": Excel_ABS.md + - "Acos": Excel_ACOS.md + - "Acosh": Excel_ACOSH.md + - "Aggregate numbers": aggregateNumbers.md + - "And": Excel_AND.md + - "Asin": Excel_ASIN.md + - "Asinh": Excel_ASINH.md + - "Atan": Excel_ATAN.md + - "Atan2": Excel_ATAN2.md + - "Atanh": Excel_ATANH.md + - "Avedev": Excel_AVEDEV.md + - "Average": Excel_AVERAGE.md + - "Averagea": Excel_AVERAGEA.md + - "Camel case": camelCase.md + - "Camel case tokenizer": camelcasetokenizer.md + - "Capitalize": capitalize.md + - "Ceiling": Excel_CEILING.md + - "Choose": Excel_CHOOSE.md + - "Clean": Excel_CLEAN.md + - "Clean HTML": htmlCleaner.md + - "Coalesce (first non-empty input)": coalesce.md + - "Code": Excel_CODE.md + - "Combin": Excel_COMBIN.md + - "Compare dates": compareDates.md + - "Compare numbers": compareNumbers.md + - "Concatenate": concat.md + - "Concatenate multiple values": concatMultiValues.md + - "Concatenate pairwise": concatPairwise.md + - "Constant": constant.md + - "Constant URI": constantUri.md + - "Contains all of": containsAllOf.md + - "Contains any of": containsAnyOf.md + - "Convert charset": convertCharset.md + - "Convert currency values": cmem_plugin_currencies-transform.md + - "Convert Number Base": cmem-plugin-number-conversion.md + - "Correl": Excel_CORREL.md + - "Cos": Excel_COS.md + - "Cosh": Excel_COSH.md + - "Count": Excel_COUNT.md + - "Count values": count.md + - "Counta": Excel_COUNTA.md + - "Covar": Excel_COVAR.md + - "Current date": currentDate.md + - "Dataset parameter": datasetParameter.md + - "Date to timestamp": datetoTimestamp.md + - "Default Value": defaultValue.md + - "Degrees": Excel_DEGREES.md + - "Devsq": Excel_DEVSQ.md + - "Duration": duration.md + - "Duration in days": durationInDays.md + - "Duration in seconds": durationInSeconds.md + - "Duration in years": durationInYears.md + - "Empty value": emptyValue.md + - "Encode URL": urlEncode.md + - "Evaluate template": TemplateTransformer.md + - "Even": Excel_EVEN.md + - "Exact": Excel_EXACT.md + - "Excel map": excelMap.md + - "Exp": Excel_EXP.md + - "Extract physical quantity": extractPhysicalQuantity.md + - "Fact": Excel_FACT.md + - "False": Excel_FALSE.md + - "File hash": fileHash.md + - "Filter by length": filterByLength.md + - "Filter by regex": filterByRegex.md + - "Find": Excel_FIND.md + - "Fix URI": uriFix.md + - "Floor": Excel_FLOOR.md + - "Forecast": Excel_FORECAST.md + - "Format number": formatNumber.md + - "Fv": Excel_FV.md + - "Geomean": Excel_GEOMEAN.md + - "Get value by index": getValueByIndex.md + - "If": Excel_IF.md + - "If contains": ifContains.md + - "If exists": ifExists.md + - "If matches regex": ifMatchesRegex.md + - "Input file attributes": inputFileAttributes.md + - "Input hash": inputHash.md + - "Input task attributes": inputTaskAttributes.md + - "Int": Excel_INT.md + - "Intercept": Excel_INTERCEPT.md + - "Ipmt": Excel_IPMT.md + - "Irr": Excel_IRR.md + - "jq": cmem-plugin-jq-transform.md + - "Large": Excel_LARGE.md + - "Left": Excel_LEFT.md + - "Ln": Excel_LN.md + - "Log": Excel_LOG.md + - "Log10": Excel_LOG10.md + - "Logarithm": log.md + - "Lower case": lowerCase.md + - "Map": map.md + - "Map with default": mapWithDefaultInput.md + - "Max": Excel_MAX.md + - "Maxa": Excel_MAXA.md + - "Median": Excel_MEDIAN.md + - "Merge": merge.md + - "Metaphone": metaphone.md + - "Mid": Excel_MID.md + - "Min": Excel_MIN.md + - "Mina": Excel_MINA.md + - "Mirr": Excel_MIRR.md + - "Mod": Excel_MOD.md + - "Mode": Excel_MODE.md + - "Negate binary (NOT)": negateTransformer.md + - "Normalize chars": normalizeChars.md + - "Normalize physical quantity": PhysicalQuantitiesNormalizer.md + - "Normdist": Excel_NORMDIST.md + - "Norminv": Excel_NORMINV.md + - "Normsdist": Excel_NORMSDIST.md + - "Normsinv": Excel_NORMSINV.md + - "Not": Excel_NOT.md + - "Nper": Excel_NPER.md + - "Npv": Excel_NPV.md + - "Number to duration": numberToDuration.md + - "Numeric operation": numOperation.md + - "Numeric reduce": numReduce.md + - "NYSIIS": NYSIIS.md + - "Odd": Excel_ODD.md + - "Or": Excel_OR.md + - "Parse date": DateTypeParser.md + - "Parse date pattern": parseDate.md + - "Parse float": FloatTypeParser.md + - "Parse geo coordinate": GeoCoordinateParser.md + - "Parse geo location": GeoLocationParser.md + - "Parse integer": IntegerParser.md + - "Parse ISIN": IsinParser.md + - "Parse SKOS term": SkosTypeParser.md + - "Parse string": StringParser.md + - "Pearson": Excel_PEARSON.md + - "Percentile": Excel_PERCENTILE.md + - "Percentrank": Excel_PERCENTRANK.md + - "Pi": Excel_PI.md + - "Pmt": Excel_PMT.md + - "Poisson": Excel_POISSON.md + - "Power": Excel_POWER.md + - "Ppmt": Excel_PPMT.md + - "Product": Excel_PRODUCT.md + - "Proper": Excel_PROPER.md + - "Pv": Excel_PV.md + - "Radians": Excel_RADIANS.md + - "Rand": Excel_RAND.md + - "Random number": randomNumber.md + - "Rank": Excel_RANK.md + - "Rate": Excel_RATE.md + - "Read parameter": readParameter.md + - "Regex extract": regexExtract.md + - "Regex replace": regexReplace.md + - "Regex selection": regexSelect.md + - "Remove blanks": removeBlanks.md + - "Remove duplicates": removeDuplicates.md + - "Remove empty values": removeEmptyValues.md + - "Remove parentheses": removeParentheses.md + - "Remove special chars": removeSpecialChars.md + - "Remove stopwords": removeStopwords.md + - "Remove stopwords (remote stopword list)": removeRemoteStopwords.md + - "Remove values": removeValues.md + - "Replace": replace.md + - "Replace": Excel_REPLACE.md + - "Rept": Excel_REPT.md + - "Retrieve coordinates": RetrieveCoordinates.md + - "Retrieve latitude": RetrieveLatitude.md + - "Retrieve longitude": RetrieveLongitude.md + - "Right": Excel_RIGHT.md + - "Roman": Excel_ROMAN.md + - "Round": Excel_ROUND.md + - "Rounddown": Excel_ROUNDDOWN.md + - "Roundup": Excel_ROUNDUP.md + - "Search": Excel_SEARCH.md + - "Sequence values to indexes": toSequenceIndex.md + - "Sign": Excel_SIGN.md + - "Sin": Excel_SIN.md + - "Sinh": Excel_SINH.md + - "Slope": Excel_SLOPE.md + - "Small": Excel_SMALL.md + - "Sort": sort.md + - "Sort words": sortWords.md + - "Soundex": soundex.md + - "Sqrt": Excel_SQRT.md + - "Standardize": Excel_STANDARDIZE.md + - "Stdev": Excel_STDEV.md + - "Stdeva": Excel_STDEVA.md + - "Stdevp": Excel_STDEVP.md + - "Stdevpa": Excel_STDEVPA.md + - "Stem": stem.md + - "Strip non-alphabetic characters": alphaReduce.md + - "Strip postfix": stripPostfix.md + - "Strip prefix": stripPrefix.md + - "Strip URI prefix": stripUriPrefix.md + - "Substitute": Excel_SUBSTITUTE.md + - "Substring": substring.md + - "Sum": Excel_SUM.md + - "Sumproduct": Excel_SUMPRODUCT.md + - "Sumsq": Excel_SUMSQ.md + - "Sumx2my2": Excel_SUMX2MY2.md + - "Sumx2py2": Excel_SUMX2PY2.md + - "Sumxmy2": Excel_SUMXMY2.md + - "Tan": Excel_TAN.md + - "Tanh": Excel_TANH.md + - "Tdist": Excel_TDIST.md + - "Timestamp to date": timeToDate.md + - "Tokenize": tokenize.md + - "Trim": trim.md + - "True": Excel_TRUE.md + - "Trunc": Excel_TRUNC.md + - "ULID": cmem-plugin-ulid.md + - "Until character": untilCharacter.md + - "Upper case": upperCase.md + - "UUID": uuid.md + - "UUID Convert": cmem_plugin_uuid-plugin_uuid-UUIDConvert.md + - "UUID Version": cmem_plugin_uuid-plugin_uuid-UUIDVersion.md + - "UUID1": cmem_plugin_uuid-plugin_uuid-UUID1.md + - "UUID1 to UUID6": cmem_plugin_uuid-plugin_uuid-UUID1ToUUID6.md + - "UUID3": cmem_plugin_uuid-plugin_uuid-UUID3.md + - "UUID4": cmem_plugin_uuid-plugin_uuid-UUID4.md + - "UUID5": cmem_plugin_uuid-plugin_uuid-UUID5.md + - "UUID6": cmem_plugin_uuid-plugin_uuid-UUID6.md + - "UUID7": cmem_plugin_uuid-plugin_uuid-UUID7.md + - "UUID8": cmem_plugin_uuid-plugin_uuid-UUID8.md + - "Validate date after": validateDateAfter.md + - "Validate date range": validateDateRange.md + - "Validate number of values": validateNumberOfValues.md + - "Validate numeric range": validateNumericRange.md + - "Validate regex": validateRegex.md + - "Var": Excel_VAR.md + - "Vara": Excel_VARA.md + - "Varp": Excel_VARP.md + - "Varpa": Excel_VARPA.md \ No newline at end of file diff --git a/docs/build/reference/transformer/DateTypeParser.md b/docs/build/reference/transformer/DateTypeParser.md new file mode 100644 index 000000000..988cd3505 --- /dev/null +++ b/docs/build/reference/transformer/DateTypeParser.md @@ -0,0 +1,340 @@ +--- +title: "Parse date" +description: "Parses and normalizes dates in different formats." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Parse date + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *inputDateFormatId*: `German style date format` + * *outputDateFormatId*: `w3c Date` + +* Input values: + 1. `[20.03.1999]` + +* Returns: + + → `[1999-03-20]` + + +--- +#### Example 2: + +* Parameters + * *inputDateFormatId*: `w3c Date` + * *outputDateFormatId*: `German style date format` + +* Input values: + 1. `[1999-03-20]` + +* Returns: + + → `[20.03.1999]` + + +--- +#### Example 3: + +* Parameters + * *inputDateFormatId*: `common ISO8601` + * *outputDateFormatId*: `w3c Date` + +* Input values: + 1. `[2017-04-04T00:00:00.000+02:00]` + +* Returns: + + → `[2017-04-04]` + + +--- +#### Example 4: + +* Parameters + * *inputDateFormatId*: `common ISO8601` + * *outputDateFormatId*: `w3c Date` + +* Input values: + 1. `[2017-04-04T00:00:00+02:00]` + +* Returns: + + → `[2017-04-04]` + + +--- +#### Example 5: + +* Parameters + * *inputDateFormatId*: `common ISO8601` + * *outputDateFormatId*: `dateTime with month abbr. (US)` + +* Input values: + 1. `[2021-06-24T14:50:05.895+02:00]` + +* Returns: + + → `[24-Jun-2021 14:50:05 +02:00]` + + +--- +#### Example 6: + +* Parameters + * *inputDateFormatId*: `dateTime with month abbr. (US)` + * *outputDateFormatId*: `dateTime with month abbr. (DE)` + +* Input values: + 1. `[24-Dec-2021 14:50:05 +02:00]` + +* Returns: + + → `[24-Dez.-2021 14:50:05 +02:00]` + + +--- +#### Example 7: + +* Parameters + * *alternativeInputFormat*: `dd.MM.yyyy HH:mm.ss` + * *alternativeOutputFormat*: `yyyy-MM-dd'T'HH:mm.ss` + +* Input values: + 1. `[20.03.1999 20:34.44]` + +* Returns: + + → `[1999-03-20T20:34.44]` + + +--- +#### Example 8: + +* Parameters + * *inputDateFormatId*: `excelDateTime` + * *outputDateFormatId*: `xsdTime` + +* Input values: + 1. `[12:20:00.000]` + +* Returns: + + → `[12:20:00.000]` + + +--- +#### Example 9: + +* Parameters + * *inputDateFormatId*: `w3c YearMonth` + * *outputDateFormatId*: `w3c Month` + +* Input values: + 1. `[2020-01]` + +* Returns: + + → `[--01]` + + +--- +#### Example 10: + +* Parameters + * *inputDateFormatId*: `w3c MonthDay` + * *outputDateFormatId*: `w3c Day` + +* Input values: + 1. `[--12-31]` + +* Returns: + + → `[---31]` + + +--- +#### Example 11: + +* Parameters + * *inputDateFormatId*: `w3c Date` + * *outputDateFormatId*: `w3c MonthDay` + +* Input values: + 1. `[2020-12-31]` + +* Returns: + + → `[--12-31]` + + +--- +#### Example 12: + +* Parameters + * *inputDateFormatId*: `w3c MonthDay` + * *outputDateFormatId*: `w3c Date` + +* Input values: + 1. `[--12-31]` + +* Returns: + + → `[]` + + +--- +#### Example 13: + +* Parameters + * *alternativeInputFormat*: `yyyy-MM-dd HH:mm:ss.SSS` + * *outputDateFormatId*: `w3cDateTime` + +* Input values: + 1. `[2020-02-22 16:34:14.000]` + +* Returns: + + → `[2020-02-22T16:34:14]` + + +--- +#### Example 14: + +* Parameters + * *inputDateFormatId*: `dateTime with month abbr. (DE)` + * *outputDateFormatId*: `dateTime with month abbr. (US)` + * *inputLocale*: `en_US` + * *outputLocale*: `de` + +* Input values: + 1. `[24-Dec-2021 14:50:05 +02:00]` + +* Returns: + + → `[24-Dez.-2021 14:50:05 +02:00]` + + +--- +#### Example 15: + +* Parameters + * *inputDateFormatId*: `dateTime with month abbr. (US)` + * *outputDateFormatId*: `dateTime with month abbr. (DE)` + * *inputLocale*: `de` + * *outputLocale*: `en` + +* Input values: + 1. `[24-Dez.-2021 14:50:05 +02:00]` + +* Returns: + + → `[24-Dec-2021 14:50:05 +02:00]` + + +--- +#### Example 16: + +* Parameters + * *outputLocale*: `fr` + * *alternativeInputFormat*: `MMM yyyy` + * *outputDateFormatId*: `dateTime with month abbr. (DE)` + * *inputLocale*: `de` + * *alternativeOutputFormat*: `MMM uuuu` + * *inputDateFormatId*: `dateTime with month abbr. (US)` + +* Input values: + 1. `[Dez. 2021]` + +* Returns: + + → `[déc. 2021]` + + +--- +#### Example 17: + +* Parameters + * *alternativeInputFormat*: `MMMM, uuuu` + * *alternativeOutputFormat*: `MMMM, uuuu` + * *inputLocale*: `en_US` + * *outputLocale*: `de` + +* Input values: + 1. `[February, 2024]` + +* Returns: + + → `[Februar, 2024]` + + + + +## Parameter + +### Input format + +The input date/time format used for parsing the date/time string. + +- Datatype: `option[enumeration]` +- Default Value: `w3c Date` + + + +### Alternative input format + +An input format string that should be used instead of the selected input format. Java DateFormat string. + +- Datatype: `string` +- Default Value: `None` + + + +### Alternative input locale + +Optional locale for the (alternative) input format. If not set the system's locale will be used or the locale of the input format, if set. + +- Datatype: `option[locale]` +- Default Value: `None` + + + +### Output format + +The output date/time format used for parsing the date/time string. + +- Datatype: `option[enumeration]` +- Default Value: `w3c Date` + + + +### Alternative output format + +An output format string that should be used instead of the selected output format. Java DateFormat string. + +- Datatype: `string` +- Default Value: `None` + + + +### Alternative output locale + +Optional locale for the (alternative) output format. If not set the system's locale will be used or the locale of the output format, if set. + +- Datatype: `option[locale]` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/Excel_ABS.md b/docs/build/reference/transformer/Excel_ABS.md new file mode 100644 index 000000000..24ed4717e --- /dev/null +++ b/docs/build/reference/transformer/Excel_ABS.md @@ -0,0 +1,25 @@ +--- +title: "Abs" +description: "Excel ABS(number): Returns the absolute value of the given number." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Abs + + + + +Excel ABS(number): Returns the absolute value of the given number. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `ABS` + + + diff --git a/docs/build/reference/transformer/Excel_ACOS.md b/docs/build/reference/transformer/Excel_ACOS.md new file mode 100644 index 000000000..c0839adfa --- /dev/null +++ b/docs/build/reference/transformer/Excel_ACOS.md @@ -0,0 +1,25 @@ +--- +title: "Acos" +description: "Excel ACOS(number): Returns the inverse cosine of the given number in radians." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Acos + + + + +Excel ACOS(number): Returns the inverse cosine of the given number in radians. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `ACOS` + + + diff --git a/docs/build/reference/transformer/Excel_ACOSH.md b/docs/build/reference/transformer/Excel_ACOSH.md new file mode 100644 index 000000000..f61ee22c0 --- /dev/null +++ b/docs/build/reference/transformer/Excel_ACOSH.md @@ -0,0 +1,25 @@ +--- +title: "Acosh" +description: "Excel ACOSH(number): Returns the inverse hyperbolic cosine of the given number in radians." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Acosh + + + + +Excel ACOSH(number): Returns the inverse hyperbolic cosine of the given number in radians. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `ACOSH` + + + diff --git a/docs/build/reference/transformer/Excel_AND.md b/docs/build/reference/transformer/Excel_AND.md new file mode 100644 index 000000000..553bbff5a --- /dev/null +++ b/docs/build/reference/transformer/Excel_AND.md @@ -0,0 +1,25 @@ +--- +title: "And" +description: "Excel AND(argument1; argument2 ...argument30): Returns TRUE if all the arguments are considered TRUE, and FALSE otherwise." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# And + + + + +Excel AND(argument1; argument2 ...argument30): Returns TRUE if all the arguments are considered TRUE, and FALSE otherwise. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `AND` + + + diff --git a/docs/build/reference/transformer/Excel_ASIN.md b/docs/build/reference/transformer/Excel_ASIN.md new file mode 100644 index 000000000..b3e077ed7 --- /dev/null +++ b/docs/build/reference/transformer/Excel_ASIN.md @@ -0,0 +1,25 @@ +--- +title: "Asin" +description: "Excel ASIN(number): Returns the inverse sine of the given number in radians." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Asin + + + + +Excel ASIN(number): Returns the inverse sine of the given number in radians. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `ASIN` + + + diff --git a/docs/build/reference/transformer/Excel_ASINH.md b/docs/build/reference/transformer/Excel_ASINH.md new file mode 100644 index 000000000..9c8a62bd0 --- /dev/null +++ b/docs/build/reference/transformer/Excel_ASINH.md @@ -0,0 +1,25 @@ +--- +title: "Asinh" +description: "Excel ASINH(number): Returns the inverse hyperbolic sine of the given number in radians." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Asinh + + + + +Excel ASINH(number): Returns the inverse hyperbolic sine of the given number in radians. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `ASINH` + + + diff --git a/docs/build/reference/transformer/Excel_ATAN.md b/docs/build/reference/transformer/Excel_ATAN.md new file mode 100644 index 000000000..5c9a820cf --- /dev/null +++ b/docs/build/reference/transformer/Excel_ATAN.md @@ -0,0 +1,25 @@ +--- +title: "Atan" +description: "Excel ATAN(number): Returns the inverse tangent of the given number in radians." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Atan + + + + +Excel ATAN(number): Returns the inverse tangent of the given number in radians. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `ATAN` + + + diff --git a/docs/build/reference/transformer/Excel_ATAN2.md b/docs/build/reference/transformer/Excel_ATAN2.md new file mode 100644 index 000000000..991973609 --- /dev/null +++ b/docs/build/reference/transformer/Excel_ATAN2.md @@ -0,0 +1,25 @@ +--- +title: "Atan2" +description: "Excel ATAN2(number_x; number_y): Returns the inverse tangent of the specified x and y coordinates. Number_x is the value for the x coordinate. Number_y is the value for the y coordinate." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Atan2 + + + + +Excel ATAN2(number_x; number_y): Returns the inverse tangent of the specified x and y coordinates. Number_x is the value for the x coordinate. Number_y is the value for the y coordinate. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `ATAN2` + + + diff --git a/docs/build/reference/transformer/Excel_ATANH.md b/docs/build/reference/transformer/Excel_ATANH.md new file mode 100644 index 000000000..fa37c2f47 --- /dev/null +++ b/docs/build/reference/transformer/Excel_ATANH.md @@ -0,0 +1,25 @@ +--- +title: "Atanh" +description: "Excel ATANH(number): Returns the inverse hyperbolic tangent of the given number. (Angle is returned in radians.)" +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Atanh + + + + +Excel ATANH(number): Returns the inverse hyperbolic tangent of the given number. (Angle is returned in radians.) + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `ATANH` + + + diff --git a/docs/build/reference/transformer/Excel_AVEDEV.md b/docs/build/reference/transformer/Excel_AVEDEV.md new file mode 100644 index 000000000..3ee6e63c3 --- /dev/null +++ b/docs/build/reference/transformer/Excel_AVEDEV.md @@ -0,0 +1,25 @@ +--- +title: "Avedev" +description: "Excel AVEDEV(number1; number2; ... number_30): Returns the average of the absolute deviations of data points from their mean. Displays the diffusion in a data set. Number_1; number_2; ... number_30 are values or ranges that represent a sample. Each number can also be replaced by a reference." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Avedev + + + + +Excel AVEDEV(number1; number2; ... number_30): Returns the average of the absolute deviations of data points from their mean. Displays the diffusion in a data set. Number_1; number_2; ... number_30 are values or ranges that represent a sample. Each number can also be replaced by a reference. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `AVEDEV` + + + diff --git a/docs/build/reference/transformer/Excel_AVERAGE.md b/docs/build/reference/transformer/Excel_AVERAGE.md new file mode 100644 index 000000000..fc778c356 --- /dev/null +++ b/docs/build/reference/transformer/Excel_AVERAGE.md @@ -0,0 +1,25 @@ +--- +title: "Average" +description: "Excel AVERAGE(number_1; number_2; ... number_30): Returns the average of the arguments. Number_1; number_2; ... number_30 are numerical values or ranges. Text is ignored." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Average + + + + +Excel AVERAGE(number_1; number_2; ... number_30): Returns the average of the arguments. Number_1; number_2; ... number_30 are numerical values or ranges. Text is ignored. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `AVERAGE` + + + diff --git a/docs/build/reference/transformer/Excel_AVERAGEA.md b/docs/build/reference/transformer/Excel_AVERAGEA.md new file mode 100644 index 000000000..424497f28 --- /dev/null +++ b/docs/build/reference/transformer/Excel_AVERAGEA.md @@ -0,0 +1,25 @@ +--- +title: "Averagea" +description: "Excel AVERAGEA(value_1; value_2; ... value_30): Returns the average of the arguments. The value of a text is 0. Value_1; value_2; ... value_30 are values or ranges." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Averagea + + + + +Excel AVERAGEA(value_1; value_2; ... value_30): Returns the average of the arguments. The value of a text is 0. Value_1; value_2; ... value_30 are values or ranges. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `AVERAGEA` + + + diff --git a/docs/build/reference/transformer/Excel_CEILING.md b/docs/build/reference/transformer/Excel_CEILING.md new file mode 100644 index 000000000..bd641283b --- /dev/null +++ b/docs/build/reference/transformer/Excel_CEILING.md @@ -0,0 +1,25 @@ +--- +title: "Ceiling" +description: "Excel CEILING(number; significance; mode): Rounds the given number to the nearest integer or multiple of significance. Significance is the value to whose multiple of ten the value is to be rounded up (.01, .1, 1, 10, etc.). Mode is an optional value. If it is indicated and non-zero and if the number and significance are negative, rounding up is carried out based on that value." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Ceiling + + + + +Excel CEILING(number; significance; mode): Rounds the given number to the nearest integer or multiple of significance. Significance is the value to whose multiple of ten the value is to be rounded up (.01, .1, 1, 10, etc.). Mode is an optional value. If it is indicated and non-zero and if the number and significance are negative, rounding up is carried out based on that value. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `CEILING` + + + diff --git a/docs/build/reference/transformer/Excel_CHOOSE.md b/docs/build/reference/transformer/Excel_CHOOSE.md new file mode 100644 index 000000000..ea54401c8 --- /dev/null +++ b/docs/build/reference/transformer/Excel_CHOOSE.md @@ -0,0 +1,25 @@ +--- +title: "Choose" +description: "Excel CHOOSE(index; value1; ... value30): Uses an index to return a value from a list of up to 30 values. Index is a reference or number between 1 and 30 indicating which value is to be taken from the list. Value1; ... value30 is the list of values entered as a reference to a cell or as individual values." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Choose + + + + +Excel CHOOSE(index; value1; ... value30): Uses an index to return a value from a list of up to 30 values. Index is a reference or number between 1 and 30 indicating which value is to be taken from the list. Value1; ... value30 is the list of values entered as a reference to a cell or as individual values. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `CHOOSE` + + + diff --git a/docs/build/reference/transformer/Excel_CLEAN.md b/docs/build/reference/transformer/Excel_CLEAN.md new file mode 100644 index 000000000..04357dd04 --- /dev/null +++ b/docs/build/reference/transformer/Excel_CLEAN.md @@ -0,0 +1,25 @@ +--- +title: "Clean" +description: "Excel CLEAN(text): Removes all non-printing characters from the string. Text refers to the text from which to remove all non-printable characters." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Clean + + + + +Excel CLEAN(text): Removes all non-printing characters from the string. Text refers to the text from which to remove all non-printable characters. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `CLEAN` + + + diff --git a/docs/build/reference/transformer/Excel_CODE.md b/docs/build/reference/transformer/Excel_CODE.md new file mode 100644 index 000000000..2a04cd2b2 --- /dev/null +++ b/docs/build/reference/transformer/Excel_CODE.md @@ -0,0 +1,25 @@ +--- +title: "Code" +description: "Excel CODE(text): Returns a numeric code for the first character in a text string. Text is the text for which the code of the first character is to be found." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Code + + + + +Excel CODE(text): Returns a numeric code for the first character in a text string. Text is the text for which the code of the first character is to be found. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `CODE` + + + diff --git a/docs/build/reference/transformer/Excel_COMBIN.md b/docs/build/reference/transformer/Excel_COMBIN.md new file mode 100644 index 000000000..490a0ffd2 --- /dev/null +++ b/docs/build/reference/transformer/Excel_COMBIN.md @@ -0,0 +1,25 @@ +--- +title: "Combin" +description: "Excel COMBIN(count_1; count_2): Returns the number of combinations for a given number of objects. Count_1 is the total number of elements. Count_2 is the selected count from the elements. This is the same as the nCr function on a calculator." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Combin + + + + +Excel COMBIN(count_1; count_2): Returns the number of combinations for a given number of objects. Count_1 is the total number of elements. Count_2 is the selected count from the elements. This is the same as the nCr function on a calculator. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `COMBIN` + + + diff --git a/docs/build/reference/transformer/Excel_CORREL.md b/docs/build/reference/transformer/Excel_CORREL.md new file mode 100644 index 000000000..cfc0de35b --- /dev/null +++ b/docs/build/reference/transformer/Excel_CORREL.md @@ -0,0 +1,25 @@ +--- +title: "Correl" +description: "Excel CORREL(data_1; data_2): Returns the correlation coefficient between two data sets. Data_1 is the first data set. Data_2 is the second data set." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Correl + + + + +Excel CORREL(data_1; data_2): Returns the correlation coefficient between two data sets. Data_1 is the first data set. Data_2 is the second data set. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `CORREL` + + + diff --git a/docs/build/reference/transformer/Excel_COS.md b/docs/build/reference/transformer/Excel_COS.md new file mode 100644 index 000000000..6232cbac2 --- /dev/null +++ b/docs/build/reference/transformer/Excel_COS.md @@ -0,0 +1,25 @@ +--- +title: "Cos" +description: "Excel COS(number): Returns the cosine of the given number (angle in radians)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Cos + + + + +Excel COS(number): Returns the cosine of the given number (angle in radians). + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `COS` + + + diff --git a/docs/build/reference/transformer/Excel_COSH.md b/docs/build/reference/transformer/Excel_COSH.md new file mode 100644 index 000000000..850ae3376 --- /dev/null +++ b/docs/build/reference/transformer/Excel_COSH.md @@ -0,0 +1,25 @@ +--- +title: "Cosh" +description: "Excel COSH(number): Returns the hyperbolic cosine of the given number (angle in radians)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Cosh + + + + +Excel COSH(number): Returns the hyperbolic cosine of the given number (angle in radians). + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `COSH` + + + diff --git a/docs/build/reference/transformer/Excel_COUNT.md b/docs/build/reference/transformer/Excel_COUNT.md new file mode 100644 index 000000000..41176f8b4 --- /dev/null +++ b/docs/build/reference/transformer/Excel_COUNT.md @@ -0,0 +1,25 @@ +--- +title: "Count" +description: "Excel COUNT(value_1; value_2; ... value_30): Counts how many numbers are in the list of arguments. Text entries are ignored. Value_1; value_2; ... value_30 are values or ranges which are to be counted." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Count + + + + +Excel COUNT(value_1; value_2; ... value_30): Counts how many numbers are in the list of arguments. Text entries are ignored. Value_1; value_2; ... value_30 are values or ranges which are to be counted. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `COUNT` + + + diff --git a/docs/build/reference/transformer/Excel_COUNTA.md b/docs/build/reference/transformer/Excel_COUNTA.md new file mode 100644 index 000000000..bb9475a5f --- /dev/null +++ b/docs/build/reference/transformer/Excel_COUNTA.md @@ -0,0 +1,25 @@ +--- +title: "Counta" +description: "Excel COUNTA(value_1; value_2; ... value_30): Counts how many values are in the list of arguments. Text entries are also counted, even when they contain an empty string of length 0. If an argument is an array or reference, empty cells within the array or reference are ignored. value_1; value_2; ... value_30 are up to 30 arguments representing the values to be counted." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Counta + + + + +Excel COUNTA(value_1; value_2; ... value_30): Counts how many values are in the list of arguments. Text entries are also counted, even when they contain an empty string of length 0. If an argument is an array or reference, empty cells within the array or reference are ignored. value_1; value_2; ... value_30 are up to 30 arguments representing the values to be counted. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `COUNTA` + + + diff --git a/docs/build/reference/transformer/Excel_COVAR.md b/docs/build/reference/transformer/Excel_COVAR.md new file mode 100644 index 000000000..01791356e --- /dev/null +++ b/docs/build/reference/transformer/Excel_COVAR.md @@ -0,0 +1,25 @@ +--- +title: "Covar" +description: "Excel COVAR(data_1; data_2): Returns the covariance of the product of paired deviations. Data_1 is the first data set. Data_2 is the second data set." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Covar + + + + +Excel COVAR(data_1; data_2): Returns the covariance of the product of paired deviations. Data_1 is the first data set. Data_2 is the second data set. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `COVAR` + + + diff --git a/docs/build/reference/transformer/Excel_DEGREES.md b/docs/build/reference/transformer/Excel_DEGREES.md new file mode 100644 index 000000000..1fbe7855c --- /dev/null +++ b/docs/build/reference/transformer/Excel_DEGREES.md @@ -0,0 +1,25 @@ +--- +title: "Degrees" +description: "Excel DEGREES(number): Converts the given number in radians to degrees." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Degrees + + + + +Excel DEGREES(number): Converts the given number in radians to degrees. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `DEGREES` + + + diff --git a/docs/build/reference/transformer/Excel_DEVSQ.md b/docs/build/reference/transformer/Excel_DEVSQ.md new file mode 100644 index 000000000..9002adcf0 --- /dev/null +++ b/docs/build/reference/transformer/Excel_DEVSQ.md @@ -0,0 +1,25 @@ +--- +title: "Devsq" +description: "Excel DEVSQ(number_1; number_2; ... number_30): Returns the sum of squares of deviations based on a sample mean. Number_1; number_2; ... number_30 are numerical values or ranges representing a sample." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Devsq + + + + +Excel DEVSQ(number_1; number_2; ... number_30): Returns the sum of squares of deviations based on a sample mean. Number_1; number_2; ... number_30 are numerical values or ranges representing a sample. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `DEVSQ` + + + diff --git a/docs/build/reference/transformer/Excel_EVEN.md b/docs/build/reference/transformer/Excel_EVEN.md new file mode 100644 index 000000000..18e6924bb --- /dev/null +++ b/docs/build/reference/transformer/Excel_EVEN.md @@ -0,0 +1,25 @@ +--- +title: "Even" +description: "Excel EVEN(number): Rounds the given number up to the nearest even integer." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Even + + + + +Excel EVEN(number): Rounds the given number up to the nearest even integer. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `EVEN` + + + diff --git a/docs/build/reference/transformer/Excel_EXACT.md b/docs/build/reference/transformer/Excel_EXACT.md new file mode 100644 index 000000000..8bd3002ce --- /dev/null +++ b/docs/build/reference/transformer/Excel_EXACT.md @@ -0,0 +1,25 @@ +--- +title: "Exact" +description: "Excel EXACT(text_1; text_2): Compares two text strings and returns TRUE if they are identical. This function is case- sensitive. Text_1 is the first text to compare. Text_2 is the second text to compare." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Exact + + + + +Excel EXACT(text_1; text_2): Compares two text strings and returns TRUE if they are identical. This function is case- sensitive. Text_1 is the first text to compare. Text_2 is the second text to compare. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `EXACT` + + + diff --git a/docs/build/reference/transformer/Excel_EXP.md b/docs/build/reference/transformer/Excel_EXP.md new file mode 100644 index 000000000..91c3f0317 --- /dev/null +++ b/docs/build/reference/transformer/Excel_EXP.md @@ -0,0 +1,25 @@ +--- +title: "Exp" +description: "Excel EXP(number): Returns e raised to the power of the given number." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Exp + + + + +Excel EXP(number): Returns e raised to the power of the given number. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `EXP` + + + diff --git a/docs/build/reference/transformer/Excel_FACT.md b/docs/build/reference/transformer/Excel_FACT.md new file mode 100644 index 000000000..20a0d611a --- /dev/null +++ b/docs/build/reference/transformer/Excel_FACT.md @@ -0,0 +1,25 @@ +--- +title: "Fact" +description: "Excel FACT(number): Returns the factorial of the given number." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Fact + + + + +Excel FACT(number): Returns the factorial of the given number. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `FACT` + + + diff --git a/docs/build/reference/transformer/Excel_FALSE.md b/docs/build/reference/transformer/Excel_FALSE.md new file mode 100644 index 000000000..7324eeb18 --- /dev/null +++ b/docs/build/reference/transformer/Excel_FALSE.md @@ -0,0 +1,25 @@ +--- +title: "False" +description: "Excel FALSE(): Set the logical value to FALSE. The FALSE() function does not require any arguments." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# False + + + + +Excel FALSE(): Set the logical value to FALSE. The FALSE() function does not require any arguments. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `FALSE` + + + diff --git a/docs/build/reference/transformer/Excel_FIND.md b/docs/build/reference/transformer/Excel_FIND.md new file mode 100644 index 000000000..f2e78f88a --- /dev/null +++ b/docs/build/reference/transformer/Excel_FIND.md @@ -0,0 +1,25 @@ +--- +title: "Find" +description: "Excel FIND(find_text; text; position): Looks for a string of text within another string. Where to begin the search can also be defined. The search term can be a number or any string of characters. The search is case-sensitive. Find_text is the text to be found. Text is the text where the search takes place. Position (optional) is the position in the text from which the search starts." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Find + + + + +Excel FIND(find_text; text; position): Looks for a string of text within another string. Where to begin the search can also be defined. The search term can be a number or any string of characters. The search is case-sensitive. Find_text is the text to be found. Text is the text where the search takes place. Position (optional) is the position in the text from which the search starts. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `FIND` + + + diff --git a/docs/build/reference/transformer/Excel_FLOOR.md b/docs/build/reference/transformer/Excel_FLOOR.md new file mode 100644 index 000000000..de7c3eb5e --- /dev/null +++ b/docs/build/reference/transformer/Excel_FLOOR.md @@ -0,0 +1,25 @@ +--- +title: "Floor" +description: "Excel FLOOR(number; significance; mode): Rounds the given number down to the nearest multiple of significance. Significance is the value to whose multiple of ten the number is to be rounded down (.01, .1, 1, 10, etc.). Mode is an optional value. If it is indicated and non-zero and if the number and significance are negative, rounding up is carried out based on that value." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Floor + + + + +Excel FLOOR(number; significance; mode): Rounds the given number down to the nearest multiple of significance. Significance is the value to whose multiple of ten the number is to be rounded down (.01, .1, 1, 10, etc.). Mode is an optional value. If it is indicated and non-zero and if the number and significance are negative, rounding up is carried out based on that value. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `FLOOR` + + + diff --git a/docs/build/reference/transformer/Excel_FORECAST.md b/docs/build/reference/transformer/Excel_FORECAST.md new file mode 100644 index 000000000..d895dcfc9 --- /dev/null +++ b/docs/build/reference/transformer/Excel_FORECAST.md @@ -0,0 +1,25 @@ +--- +title: "Forecast" +description: "Excel FORECAST(value; data_Y; data_X): Extrapolates future values based on existing x and y values. Value is the x value, for which the y value of the linear regression is to be returned. Data_Y is the array or range of known y’s. Data_X is the array or range of known x’s. Does not work for exponential functions." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Forecast + + + + +Excel FORECAST(value; data_Y; data_X): Extrapolates future values based on existing x and y values. Value is the x value, for which the y value of the linear regression is to be returned. Data_Y is the array or range of known y’s. Data_X is the array or range of known x’s. Does not work for exponential functions. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `FORECAST` + + + diff --git a/docs/build/reference/transformer/Excel_FV.md b/docs/build/reference/transformer/Excel_FV.md new file mode 100644 index 000000000..fa652067d --- /dev/null +++ b/docs/build/reference/transformer/Excel_FV.md @@ -0,0 +1,25 @@ +--- +title: "Fv" +description: "Excel FV(rate; NPER; PMT; PV; type): Returns the future value of an investment based on periodic, constant payments and a constant interest rate. Rate is the periodic interest rate. NPER is the total number of periods. PMT is the annuity paid regularly per period. PV (optional) is the present cash value of an investment. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Fv + + + + +Excel FV(rate; NPER; PMT; PV; type): Returns the future value of an investment based on periodic, constant payments and a constant interest rate. Rate is the periodic interest rate. NPER is the total number of periods. PMT is the annuity paid regularly per period. PV (optional) is the present cash value of an investment. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `FV` + + + diff --git a/docs/build/reference/transformer/Excel_GEOMEAN.md b/docs/build/reference/transformer/Excel_GEOMEAN.md new file mode 100644 index 000000000..8b5e55e9b --- /dev/null +++ b/docs/build/reference/transformer/Excel_GEOMEAN.md @@ -0,0 +1,25 @@ +--- +title: "Geomean" +description: "Excel GEOMEAN(number_1; number_2; ... number_30): Returns the geometric mean of a sample. Number_1; number_2; ... number_30 are numerical arguments or ranges that represent a random sample." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Geomean + + + + +Excel GEOMEAN(number_1; number_2; ... number_30): Returns the geometric mean of a sample. Number_1; number_2; ... number_30 are numerical arguments or ranges that represent a random sample. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `GEOMEAN` + + + diff --git a/docs/build/reference/transformer/Excel_IF.md b/docs/build/reference/transformer/Excel_IF.md new file mode 100644 index 000000000..dcbd018a4 --- /dev/null +++ b/docs/build/reference/transformer/Excel_IF.md @@ -0,0 +1,25 @@ +--- +title: "If" +description: "Excel IF(test; then_value; otherwise_value): Returns different values based on the test value. Note that in this implementation it will not actually evaluate logical conditions. Then_value is the value that is returned if the test is TRUE. Otherwise_value (optional) is the value that is returned if the test is FALSE." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# If + + + + +Excel IF(test; then_value; otherwise_value): Returns different values based on the test value. Note that in this implementation it will not actually evaluate logical conditions. Then_value is the value that is returned if the test is TRUE. Otherwise_value (optional) is the value that is returned if the test is FALSE. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `IF` + + + diff --git a/docs/build/reference/transformer/Excel_INT.md b/docs/build/reference/transformer/Excel_INT.md new file mode 100644 index 000000000..8be114c76 --- /dev/null +++ b/docs/build/reference/transformer/Excel_INT.md @@ -0,0 +1,25 @@ +--- +title: "Int" +description: "Excel INT(number): Rounds the given number down to the nearest integer." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Int + + + + +Excel INT(number): Rounds the given number down to the nearest integer. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `INT` + + + diff --git a/docs/build/reference/transformer/Excel_INTERCEPT.md b/docs/build/reference/transformer/Excel_INTERCEPT.md new file mode 100644 index 000000000..682e6bee0 --- /dev/null +++ b/docs/build/reference/transformer/Excel_INTERCEPT.md @@ -0,0 +1,25 @@ +--- +title: "Intercept" +description: "Excel INTERCEPT(data_Y; data_X): Calculates the y-value at which a line will intersect the y-axis by using known x-values and y-values. Data_Y is the dependent set of observations or data. Data_X is the independent set of observations or data. Names, arrays or references containing numbers must be used here. Numbers can also be entered directly." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Intercept + + + + +Excel INTERCEPT(data_Y; data_X): Calculates the y-value at which a line will intersect the y-axis by using known x-values and y-values. Data_Y is the dependent set of observations or data. Data_X is the independent set of observations or data. Names, arrays or references containing numbers must be used here. Numbers can also be entered directly. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `INTERCEPT` + + + diff --git a/docs/build/reference/transformer/Excel_IPMT.md b/docs/build/reference/transformer/Excel_IPMT.md new file mode 100644 index 000000000..2f6042004 --- /dev/null +++ b/docs/build/reference/transformer/Excel_IPMT.md @@ -0,0 +1,25 @@ +--- +title: "Ipmt" +description: "Excel IPMT(rate; period; NPER; PV; FV; type): Calculates the periodic amortization for an investment with regular payments and a constant interest rate. Rate is the periodic interest rate. Period is the period for which the compound interest is calculated. NPER is the total number of periods during which annuity is paid. Period=NPER, if compound interest for the last period is calculated. PV is the present cash value in sequence of payments. FV (optional) is the desired value (future value) at the end of the periods. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Ipmt + + + + +Excel IPMT(rate; period; NPER; PV; FV; type): Calculates the periodic amortization for an investment with regular payments and a constant interest rate. Rate is the periodic interest rate. Period is the period for which the compound interest is calculated. NPER is the total number of periods during which annuity is paid. Period=NPER, if compound interest for the last period is calculated. PV is the present cash value in sequence of payments. FV (optional) is the desired value (future value) at the end of the periods. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `IPMT` + + + diff --git a/docs/build/reference/transformer/Excel_IRR.md b/docs/build/reference/transformer/Excel_IRR.md new file mode 100644 index 000000000..d264e1511 --- /dev/null +++ b/docs/build/reference/transformer/Excel_IRR.md @@ -0,0 +1,25 @@ +--- +title: "Irr" +description: "Excel IRR(values; guess): Calculates the internal rate of return for an investment. The values represent cash flow values at regular intervals; at least one value must be negative (payments), and at least one value must be positive (income). Values is an array containing the values. Guess (optional) is the estimated value. If you can provide only a few values, you should provide an initial guess to enable the iteration." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Irr + + + + +Excel IRR(values; guess): Calculates the internal rate of return for an investment. The values represent cash flow values at regular intervals; at least one value must be negative (payments), and at least one value must be positive (income). Values is an array containing the values. Guess (optional) is the estimated value. If you can provide only a few values, you should provide an initial guess to enable the iteration. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `IRR` + + + diff --git a/docs/build/reference/transformer/Excel_LARGE.md b/docs/build/reference/transformer/Excel_LARGE.md new file mode 100644 index 000000000..dde90a180 --- /dev/null +++ b/docs/build/reference/transformer/Excel_LARGE.md @@ -0,0 +1,25 @@ +--- +title: "Large" +description: "Excel LARGE(data; rank_c): Returns the Rank_c-th largest value in a data set. Data is the cell range of data. Rank_c is the ranking of the value (2nd largest, 3rd largest, etc.) written as an integer." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Large + + + + +Excel LARGE(data; rank_c): Returns the Rank_c-th largest value in a data set. Data is the cell range of data. Rank_c is the ranking of the value (2nd largest, 3rd largest, etc.) written as an integer. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `LARGE` + + + diff --git a/docs/build/reference/transformer/Excel_LEFT.md b/docs/build/reference/transformer/Excel_LEFT.md new file mode 100644 index 000000000..9ab67c4e8 --- /dev/null +++ b/docs/build/reference/transformer/Excel_LEFT.md @@ -0,0 +1,25 @@ +--- +title: "Left" +description: "Excel LEFT(text; number): Returns the first character or characters in a text string. Text is the text where the initial partial words are to be determined. Number (optional) is the number of characters for the start text. If this parameter is not defined, one character is returned." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Left + + + + +Excel LEFT(text; number): Returns the first character or characters in a text string. Text is the text where the initial partial words are to be determined. Number (optional) is the number of characters for the start text. If this parameter is not defined, one character is returned. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `LEFT` + + + diff --git a/docs/build/reference/transformer/Excel_LN.md b/docs/build/reference/transformer/Excel_LN.md new file mode 100644 index 000000000..4bf5686fd --- /dev/null +++ b/docs/build/reference/transformer/Excel_LN.md @@ -0,0 +1,25 @@ +--- +title: "Ln" +description: "Excel LN(number): Returns the natural logarithm based on the constant e of the given number." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Ln + + + + +Excel LN(number): Returns the natural logarithm based on the constant e of the given number. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `LN` + + + diff --git a/docs/build/reference/transformer/Excel_LOG.md b/docs/build/reference/transformer/Excel_LOG.md new file mode 100644 index 000000000..c3d840b55 --- /dev/null +++ b/docs/build/reference/transformer/Excel_LOG.md @@ -0,0 +1,25 @@ +--- +title: "Log" +description: "Excel LOG(number; base): Returns the logarithm of the given number to the specified base. Base is the base for the logarithm calculation." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Log + + + + +Excel LOG(number; base): Returns the logarithm of the given number to the specified base. Base is the base for the logarithm calculation. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `LOG` + + + diff --git a/docs/build/reference/transformer/Excel_LOG10.md b/docs/build/reference/transformer/Excel_LOG10.md new file mode 100644 index 000000000..565e52801 --- /dev/null +++ b/docs/build/reference/transformer/Excel_LOG10.md @@ -0,0 +1,25 @@ +--- +title: "Log10" +description: "Excel LOG10(number): Returns the base-10 logarithm of the given number." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Log10 + + + + +Excel LOG10(number): Returns the base-10 logarithm of the given number. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `LOG10` + + + diff --git a/docs/build/reference/transformer/Excel_MAX.md b/docs/build/reference/transformer/Excel_MAX.md new file mode 100644 index 000000000..7de87a687 --- /dev/null +++ b/docs/build/reference/transformer/Excel_MAX.md @@ -0,0 +1,25 @@ +--- +title: "Max" +description: "Excel MAX(number_1; number_2; ... number_30): Returns the maximum value in a list of arguments. Number_1; number_2; ... number_30 are numerical values or ranges." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Max + + + + +Excel MAX(number_1; number_2; ... number_30): Returns the maximum value in a list of arguments. Number_1; number_2; ... number_30 are numerical values or ranges. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `MAX` + + + diff --git a/docs/build/reference/transformer/Excel_MAXA.md b/docs/build/reference/transformer/Excel_MAXA.md new file mode 100644 index 000000000..649242541 --- /dev/null +++ b/docs/build/reference/transformer/Excel_MAXA.md @@ -0,0 +1,25 @@ +--- +title: "Maxa" +description: "Excel MAXA(value_1; value_2; ... value_30): Returns the maximum value in a list of arguments. Unlike MAX, text can be entered. The value of the text is 0. Value_1; value_2; ... value_30 are values or ranges." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Maxa + + + + +Excel MAXA(value_1; value_2; ... value_30): Returns the maximum value in a list of arguments. Unlike MAX, text can be entered. The value of the text is 0. Value_1; value_2; ... value_30 are values or ranges. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `MAXA` + + + diff --git a/docs/build/reference/transformer/Excel_MEDIAN.md b/docs/build/reference/transformer/Excel_MEDIAN.md new file mode 100644 index 000000000..5f42bf255 --- /dev/null +++ b/docs/build/reference/transformer/Excel_MEDIAN.md @@ -0,0 +1,25 @@ +--- +title: "Median" +description: "Excel MEDIAN(number_1; number_2; ... number_30): Returns the median of a set of numbers. Number_1; number_2; ... number_30 are values or ranges, which represent a sample. Each number can also be replaced by a reference." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Median + + + + +Excel MEDIAN(number_1; number_2; ... number_30): Returns the median of a set of numbers. Number_1; number_2; ... number_30 are values or ranges, which represent a sample. Each number can also be replaced by a reference. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `MEDIAN` + + + diff --git a/docs/build/reference/transformer/Excel_MID.md b/docs/build/reference/transformer/Excel_MID.md new file mode 100644 index 000000000..0d35a92ea --- /dev/null +++ b/docs/build/reference/transformer/Excel_MID.md @@ -0,0 +1,25 @@ +--- +title: "Mid" +description: "Excel MID(text; start; number): Returns a text segment of a character string. The parameters specify the starting position and the number of characters. Text is the text containing the characters to extract. Start is the position of the first character in the text to extract. Number is the number of characters in the part of the text." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Mid + + + + +Excel MID(text; start; number): Returns a text segment of a character string. The parameters specify the starting position and the number of characters. Text is the text containing the characters to extract. Start is the position of the first character in the text to extract. Number is the number of characters in the part of the text. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `MID` + + + diff --git a/docs/build/reference/transformer/Excel_MIN.md b/docs/build/reference/transformer/Excel_MIN.md new file mode 100644 index 000000000..11fe84367 --- /dev/null +++ b/docs/build/reference/transformer/Excel_MIN.md @@ -0,0 +1,25 @@ +--- +title: "Min" +description: "Excel MIN(number_1; number_2; ... number_30): Returns the minimum value in a list of arguments. Number_1; number_2; ... number_30 are numerical values or ranges." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Min + + + + +Excel MIN(number_1; number_2; ... number_30): Returns the minimum value in a list of arguments. Number_1; number_2; ... number_30 are numerical values or ranges. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `MIN` + + + diff --git a/docs/build/reference/transformer/Excel_MINA.md b/docs/build/reference/transformer/Excel_MINA.md new file mode 100644 index 000000000..69a687b98 --- /dev/null +++ b/docs/build/reference/transformer/Excel_MINA.md @@ -0,0 +1,25 @@ +--- +title: "Mina" +description: "Excel MINA(value_1; value_2; ... value_30): Returns the minimum value in a list of arguments. Here text can also be entered. The value of the text is 0. Value_1; value_2; ... value_30 are values or ranges." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Mina + + + + +Excel MINA(value_1; value_2; ... value_30): Returns the minimum value in a list of arguments. Here text can also be entered. The value of the text is 0. Value_1; value_2; ... value_30 are values or ranges. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `MINA` + + + diff --git a/docs/build/reference/transformer/Excel_MIRR.md b/docs/build/reference/transformer/Excel_MIRR.md new file mode 100644 index 000000000..0b2daba6e --- /dev/null +++ b/docs/build/reference/transformer/Excel_MIRR.md @@ -0,0 +1,25 @@ +--- +title: "Mirr" +description: "Excel MIRR(values; investment; reinvest_rate): Calculates the modified internal rate of return of a series of investments. Values corresponds to the array or the cell reference for cells whose content corresponds to the payments. Investment is the rate of interest of the investments (the negative values of the array) Reinvest_rate is the rate of interest of the reinvestment (the positive values of the array)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Mirr + + + + +Excel MIRR(values; investment; reinvest_rate): Calculates the modified internal rate of return of a series of investments. Values corresponds to the array or the cell reference for cells whose content corresponds to the payments. Investment is the rate of interest of the investments (the negative values of the array) Reinvest_rate is the rate of interest of the reinvestment (the positive values of the array). + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `MIRR` + + + diff --git a/docs/build/reference/transformer/Excel_MOD.md b/docs/build/reference/transformer/Excel_MOD.md new file mode 100644 index 000000000..5ff6ebe6b --- /dev/null +++ b/docs/build/reference/transformer/Excel_MOD.md @@ -0,0 +1,25 @@ +--- +title: "Mod" +description: "Excel MOD(dividend; divisor): Returns the remainder after a number is divided by a divisor. Dividend is the number which will be divided by the divisor. Divisor is the number by which to divide the dividend." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Mod + + + + +Excel MOD(dividend; divisor): Returns the remainder after a number is divided by a divisor. Dividend is the number which will be divided by the divisor. Divisor is the number by which to divide the dividend. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `MOD` + + + diff --git a/docs/build/reference/transformer/Excel_MODE.md b/docs/build/reference/transformer/Excel_MODE.md new file mode 100644 index 000000000..4294b384e --- /dev/null +++ b/docs/build/reference/transformer/Excel_MODE.md @@ -0,0 +1,25 @@ +--- +title: "Mode" +description: "Excel MODE(number_1; number_2; ... number_30): Returns the most common value in a data set. Number_1; number_2; ... number_30 are numerical values or ranges. If several values have the same frequency, it returns the smallest value. An error occurs when a value does not appear twice." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Mode + + + + +Excel MODE(number_1; number_2; ... number_30): Returns the most common value in a data set. Number_1; number_2; ... number_30 are numerical values or ranges. If several values have the same frequency, it returns the smallest value. An error occurs when a value does not appear twice. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `MODE` + + + diff --git a/docs/build/reference/transformer/Excel_NORMDIST.md b/docs/build/reference/transformer/Excel_NORMDIST.md new file mode 100644 index 000000000..8a35f46ba --- /dev/null +++ b/docs/build/reference/transformer/Excel_NORMDIST.md @@ -0,0 +1,25 @@ +--- +title: "Normdist" +description: "Excel NORMDIST(number; mean; STDEV; C): Returns the normal distribution for the given Number in the distribution. Mean is the mean value of the distribution. STDEV is the standard deviation of the distribution. C = 0 calculates the density function, and C = 1 calculates the distribution." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Normdist + + + + +Excel NORMDIST(number; mean; STDEV; C): Returns the normal distribution for the given Number in the distribution. Mean is the mean value of the distribution. STDEV is the standard deviation of the distribution. C = 0 calculates the density function, and C = 1 calculates the distribution. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `NORMDIST` + + + diff --git a/docs/build/reference/transformer/Excel_NORMINV.md b/docs/build/reference/transformer/Excel_NORMINV.md new file mode 100644 index 000000000..5c4fc6f19 --- /dev/null +++ b/docs/build/reference/transformer/Excel_NORMINV.md @@ -0,0 +1,25 @@ +--- +title: "Norminv" +description: "Excel NORMINV(number; mean; STDEV): Returns the inverse of the normal distribution for the given Number in the distribution. Mean is the mean value in the normal distribution. STDEV is the standard deviation of the normal distribution." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Norminv + + + + +Excel NORMINV(number; mean; STDEV): Returns the inverse of the normal distribution for the given Number in the distribution. Mean is the mean value in the normal distribution. STDEV is the standard deviation of the normal distribution. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `NORMINV` + + + diff --git a/docs/build/reference/transformer/Excel_NORMSDIST.md b/docs/build/reference/transformer/Excel_NORMSDIST.md new file mode 100644 index 000000000..3ad8965f9 --- /dev/null +++ b/docs/build/reference/transformer/Excel_NORMSDIST.md @@ -0,0 +1,25 @@ +--- +title: "Normsdist" +description: "Excel NORMSDIST(number): Returns the standard normal cumulative distribution for the given Number." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Normsdist + + + + +Excel NORMSDIST(number): Returns the standard normal cumulative distribution for the given Number. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `NORMSDIST` + + + diff --git a/docs/build/reference/transformer/Excel_NORMSINV.md b/docs/build/reference/transformer/Excel_NORMSINV.md new file mode 100644 index 000000000..101e23b2c --- /dev/null +++ b/docs/build/reference/transformer/Excel_NORMSINV.md @@ -0,0 +1,25 @@ +--- +title: "Normsinv" +description: "Excel NORMSINV(number): Returns the inverse of the standard normal distribution for the given Number, a probability value." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Normsinv + + + + +Excel NORMSINV(number): Returns the inverse of the standard normal distribution for the given Number, a probability value. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `NORMSINV` + + + diff --git a/docs/build/reference/transformer/Excel_NOT.md b/docs/build/reference/transformer/Excel_NOT.md new file mode 100644 index 000000000..e4f61b3e5 --- /dev/null +++ b/docs/build/reference/transformer/Excel_NOT.md @@ -0,0 +1,25 @@ +--- +title: "Not" +description: "Excel NOT(logical_value): Reverses the logical value. Logical_value is any value to be reversed." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Not + + + + +Excel NOT(logical_value): Reverses the logical value. Logical_value is any value to be reversed. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `NOT` + + + diff --git a/docs/build/reference/transformer/Excel_NPER.md b/docs/build/reference/transformer/Excel_NPER.md new file mode 100644 index 000000000..b54bc0bbe --- /dev/null +++ b/docs/build/reference/transformer/Excel_NPER.md @@ -0,0 +1,25 @@ +--- +title: "Nper" +description: "Excel NPER(rate; PMT; PV; FV; type): Returns the number of periods for an investment based on periodic, constant payments and a constant interest rate. Rate is the periodic interest rate. PMT is the constant annuity paid in each period. PV is the present value (cash value) in a sequence of payments. FV (optional) is the future value, which is reached at the end of the last period. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Nper + + + + +Excel NPER(rate; PMT; PV; FV; type): Returns the number of periods for an investment based on periodic, constant payments and a constant interest rate. Rate is the periodic interest rate. PMT is the constant annuity paid in each period. PV is the present value (cash value) in a sequence of payments. FV (optional) is the future value, which is reached at the end of the last period. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `NPER` + + + diff --git a/docs/build/reference/transformer/Excel_NPV.md b/docs/build/reference/transformer/Excel_NPV.md new file mode 100644 index 000000000..bcd930b32 --- /dev/null +++ b/docs/build/reference/transformer/Excel_NPV.md @@ -0,0 +1,25 @@ +--- +title: "Npv" +description: "Excel NPV(Rate; value_1; value_2; ... value_30): Returns the net present value of an investment based on a series of periodic cash flows and a discount rate. Rate is the discount rate for a period. Value_1; value_2;... value_30 are values representing deposits or withdrawals." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Npv + + + + +Excel NPV(Rate; value_1; value_2; ... value_30): Returns the net present value of an investment based on a series of periodic cash flows and a discount rate. Rate is the discount rate for a period. Value_1; value_2;... value_30 are values representing deposits or withdrawals. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `NPV` + + + diff --git a/docs/build/reference/transformer/Excel_ODD.md b/docs/build/reference/transformer/Excel_ODD.md new file mode 100644 index 000000000..553a55069 --- /dev/null +++ b/docs/build/reference/transformer/Excel_ODD.md @@ -0,0 +1,25 @@ +--- +title: "Odd" +description: "Excel ODD(number): Rounds the given number up to the nearest odd integer." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Odd + + + + +Excel ODD(number): Rounds the given number up to the nearest odd integer. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `ODD` + + + diff --git a/docs/build/reference/transformer/Excel_OR.md b/docs/build/reference/transformer/Excel_OR.md new file mode 100644 index 000000000..16bdedcfa --- /dev/null +++ b/docs/build/reference/transformer/Excel_OR.md @@ -0,0 +1,25 @@ +--- +title: "Or" +description: "Excel OR(logical_value_1; logical_value_2; ...logical_value_30): Returns TRUE if at least one argument is TRUE. Returns the value FALSE if all the arguments have the logical value FALSE. Logical_value_1; logical_value_2; ...logical_value_30 are conditions to be checked. All conditions can be either TRUE or FALSE. If a range is entered as a parameter, the function uses the value from the range that is in the current column or row." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Or + + + + +Excel OR(logical_value_1; logical_value_2; ...logical_value_30): Returns TRUE if at least one argument is TRUE. Returns the value FALSE if all the arguments have the logical value FALSE. Logical_value_1; logical_value_2; ...logical_value_30 are conditions to be checked. All conditions can be either TRUE or FALSE. If a range is entered as a parameter, the function uses the value from the range that is in the current column or row. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `OR` + + + diff --git a/docs/build/reference/transformer/Excel_PEARSON.md b/docs/build/reference/transformer/Excel_PEARSON.md new file mode 100644 index 000000000..4febddb34 --- /dev/null +++ b/docs/build/reference/transformer/Excel_PEARSON.md @@ -0,0 +1,25 @@ +--- +title: "Pearson" +description: "Excel PEARSON(data_1; data_2): Returns the Pearson product moment correlation coefficient r. Data_1 is the array of the first data set. Data_2 is the array of the second data set." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Pearson + + + + +Excel PEARSON(data_1; data_2): Returns the Pearson product moment correlation coefficient r. Data_1 is the array of the first data set. Data_2 is the array of the second data set. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `PEARSON` + + + diff --git a/docs/build/reference/transformer/Excel_PERCENTILE.md b/docs/build/reference/transformer/Excel_PERCENTILE.md new file mode 100644 index 000000000..9004a5952 --- /dev/null +++ b/docs/build/reference/transformer/Excel_PERCENTILE.md @@ -0,0 +1,25 @@ +--- +title: "Percentile" +description: "Excel PERCENTILE(data; alpha): Returns the alpha-percentile of data values in an array. Data is the array of data. Alpha is the percentage of the scale between 0 and 1." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Percentile + + + + +Excel PERCENTILE(data; alpha): Returns the alpha-percentile of data values in an array. Data is the array of data. Alpha is the percentage of the scale between 0 and 1. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `PERCENTILE` + + + diff --git a/docs/build/reference/transformer/Excel_PERCENTRANK.md b/docs/build/reference/transformer/Excel_PERCENTRANK.md new file mode 100644 index 000000000..e61b3aefa --- /dev/null +++ b/docs/build/reference/transformer/Excel_PERCENTRANK.md @@ -0,0 +1,25 @@ +--- +title: "Percentrank" +description: "Excel PERCENTRANK(data; value): Returns the percentage rank (percentile) of the given value in a sample. Data is the array of data in the sample." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Percentrank + + + + +Excel PERCENTRANK(data; value): Returns the percentage rank (percentile) of the given value in a sample. Data is the array of data in the sample. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `PERCENTRANK` + + + diff --git a/docs/build/reference/transformer/Excel_PI.md b/docs/build/reference/transformer/Excel_PI.md new file mode 100644 index 000000000..3e78e404e --- /dev/null +++ b/docs/build/reference/transformer/Excel_PI.md @@ -0,0 +1,25 @@ +--- +title: "Pi" +description: "Excel PI(): Returns the value of PI to fourteen decimal places." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Pi + + + + +Excel PI(): Returns the value of PI to fourteen decimal places. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `PI` + + + diff --git a/docs/build/reference/transformer/Excel_PMT.md b/docs/build/reference/transformer/Excel_PMT.md new file mode 100644 index 000000000..db0df54af --- /dev/null +++ b/docs/build/reference/transformer/Excel_PMT.md @@ -0,0 +1,25 @@ +--- +title: "Pmt" +description: "Excel PMT(rate; NPER; PV; FV; type): Returns the periodic payment for an annuity with constant interest rates. Rate is the periodic interest rate. NPER is the number of periods in which annuity is paid. PV is the present value (cash value) in a sequence of payments. FV (optional) is the desired value (future value) to be reached at the end of the periodic payments. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Pmt + + + + +Excel PMT(rate; NPER; PV; FV; type): Returns the periodic payment for an annuity with constant interest rates. Rate is the periodic interest rate. NPER is the number of periods in which annuity is paid. PV is the present value (cash value) in a sequence of payments. FV (optional) is the desired value (future value) to be reached at the end of the periodic payments. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `PMT` + + + diff --git a/docs/build/reference/transformer/Excel_POISSON.md b/docs/build/reference/transformer/Excel_POISSON.md new file mode 100644 index 000000000..8d11b7895 --- /dev/null +++ b/docs/build/reference/transformer/Excel_POISSON.md @@ -0,0 +1,25 @@ +--- +title: "Poisson" +description: "Excel POISSON(number; mean; C): Returns the Poisson distribution for the given Number. Mean is the middle value of the Poisson distribution. C = 0 calculates the density function, and C = 1 calculates the distribution." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Poisson + + + + +Excel POISSON(number; mean; C): Returns the Poisson distribution for the given Number. Mean is the middle value of the Poisson distribution. C = 0 calculates the density function, and C = 1 calculates the distribution. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `POISSON` + + + diff --git a/docs/build/reference/transformer/Excel_POWER.md b/docs/build/reference/transformer/Excel_POWER.md new file mode 100644 index 000000000..65ae576d1 --- /dev/null +++ b/docs/build/reference/transformer/Excel_POWER.md @@ -0,0 +1,25 @@ +--- +title: "Power" +description: "Excel POWER(base; power): Returns the result of a number raised to a power. Base is the number that is to be raised to the given power. Power is the exponent by which the base is to be raised." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Power + + + + +Excel POWER(base; power): Returns the result of a number raised to a power. Base is the number that is to be raised to the given power. Power is the exponent by which the base is to be raised. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `POWER` + + + diff --git a/docs/build/reference/transformer/Excel_PPMT.md b/docs/build/reference/transformer/Excel_PPMT.md new file mode 100644 index 000000000..d9532fa8e --- /dev/null +++ b/docs/build/reference/transformer/Excel_PPMT.md @@ -0,0 +1,25 @@ +--- +title: "Ppmt" +description: "Excel PPMT(rate; period; NPER; PV; FV; type): Returns for a given period the payment on the principal for an investment that is based on periodic and constant payments and a constant interest rate. Rate is the periodic interest rate. Period is the amortization period. NPER is the total number of periods during which annuity is paid. PV is the present value in the sequence of payments. FV (optional) is the desired (future) value. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Ppmt + + + + +Excel PPMT(rate; period; NPER; PV; FV; type): Returns for a given period the payment on the principal for an investment that is based on periodic and constant payments and a constant interest rate. Rate is the periodic interest rate. Period is the amortization period. NPER is the total number of periods during which annuity is paid. PV is the present value in the sequence of payments. FV (optional) is the desired (future) value. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `PPMT` + + + diff --git a/docs/build/reference/transformer/Excel_PRODUCT.md b/docs/build/reference/transformer/Excel_PRODUCT.md new file mode 100644 index 000000000..038557e00 --- /dev/null +++ b/docs/build/reference/transformer/Excel_PRODUCT.md @@ -0,0 +1,25 @@ +--- +title: "Product" +description: "Excel PRODUCT(number 1 to 30): Multiplies all the numbers given as arguments and returns the product. Number 1 to number 30 are up to 30 arguments whose product is to be calculated, separated by semi-colons." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Product + + + + +Excel PRODUCT(number 1 to 30): Multiplies all the numbers given as arguments and returns the product. Number 1 to number 30 are up to 30 arguments whose product is to be calculated, separated by semi-colons. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `PRODUCT` + + + diff --git a/docs/build/reference/transformer/Excel_PROPER.md b/docs/build/reference/transformer/Excel_PROPER.md new file mode 100644 index 000000000..c9b1ba1fc --- /dev/null +++ b/docs/build/reference/transformer/Excel_PROPER.md @@ -0,0 +1,25 @@ +--- +title: "Proper" +description: "Excel PROPER(text): Capitalizes the first letter in all words of a text string. Text is the text to be converted." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Proper + + + + +Excel PROPER(text): Capitalizes the first letter in all words of a text string. Text is the text to be converted. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `PROPER` + + + diff --git a/docs/build/reference/transformer/Excel_PV.md b/docs/build/reference/transformer/Excel_PV.md new file mode 100644 index 000000000..84d820b4c --- /dev/null +++ b/docs/build/reference/transformer/Excel_PV.md @@ -0,0 +1,25 @@ +--- +title: "Pv" +description: "Excel PV(rate; NPER; PMT; FV; type): Returns the present value of an investment resulting from a series of regular payments. Rate defines the interest rate per period. NPER is the total number of payment periods. PMT is the regular payment made per period. FV (optional) defines the future value remaining after the final installment has been made. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Pv + + + + +Excel PV(rate; NPER; PMT; FV; type): Returns the present value of an investment resulting from a series of regular payments. Rate defines the interest rate per period. NPER is the total number of payment periods. PMT is the regular payment made per period. FV (optional) defines the future value remaining after the final installment has been made. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `PV` + + + diff --git a/docs/build/reference/transformer/Excel_RADIANS.md b/docs/build/reference/transformer/Excel_RADIANS.md new file mode 100644 index 000000000..c2236c844 --- /dev/null +++ b/docs/build/reference/transformer/Excel_RADIANS.md @@ -0,0 +1,25 @@ +--- +title: "Radians" +description: "Excel RADIANS(number): Converts the given number in degrees to radians." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Radians + + + + +Excel RADIANS(number): Converts the given number in degrees to radians. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `RADIANS` + + + diff --git a/docs/build/reference/transformer/Excel_RAND.md b/docs/build/reference/transformer/Excel_RAND.md new file mode 100644 index 000000000..aee0fc7ec --- /dev/null +++ b/docs/build/reference/transformer/Excel_RAND.md @@ -0,0 +1,25 @@ +--- +title: "Rand" +description: "Excel RAND(): Returns a random number between 0 and 1." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Rand + + + + +Excel RAND(): Returns a random number between 0 and 1. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `RAND` + + + diff --git a/docs/build/reference/transformer/Excel_RANK.md b/docs/build/reference/transformer/Excel_RANK.md new file mode 100644 index 000000000..95cd78e69 --- /dev/null +++ b/docs/build/reference/transformer/Excel_RANK.md @@ -0,0 +1,25 @@ +--- +title: "Rank" +description: "Excel RANK(value; data; type): Returns the rank of the given Value in a sample. Data is the array or range of data in the sample. Type (optional) is the sequence order, either ascending (0) or descending (1)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Rank + + + + +Excel RANK(value; data; type): Returns the rank of the given Value in a sample. Data is the array or range of data in the sample. Type (optional) is the sequence order, either ascending (0) or descending (1). + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `RANK` + + + diff --git a/docs/build/reference/transformer/Excel_RATE.md b/docs/build/reference/transformer/Excel_RATE.md new file mode 100644 index 000000000..4fd5fe1d3 --- /dev/null +++ b/docs/build/reference/transformer/Excel_RATE.md @@ -0,0 +1,25 @@ +--- +title: "Rate" +description: "Excel RATE(NPER; PMT; PV; FV; type; guess): Returns the constant interest rate per period of an annuity. NPER is the total number of periods, during which payments are made (payment period). PMT is the constant payment (annuity) paid during each period. PV is the cash value in the sequence of payments. FV (optional) is the future value, which is reached at the end of the periodic payments. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. Guess (optional) determines the estimated value of the interest with iterative calculation." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Rate + + + + +Excel RATE(NPER; PMT; PV; FV; type; guess): Returns the constant interest rate per period of an annuity. NPER is the total number of periods, during which payments are made (payment period). PMT is the constant payment (annuity) paid during each period. PV is the cash value in the sequence of payments. FV (optional) is the future value, which is reached at the end of the periodic payments. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. Guess (optional) determines the estimated value of the interest with iterative calculation. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `RATE` + + + diff --git a/docs/build/reference/transformer/Excel_REPLACE.md b/docs/build/reference/transformer/Excel_REPLACE.md new file mode 100644 index 000000000..9feff28e9 --- /dev/null +++ b/docs/build/reference/transformer/Excel_REPLACE.md @@ -0,0 +1,25 @@ +--- +title: "Replace" +description: "Excel REPLACE(text; position; length; new_text): Replaces part of a text string with a different text string. This function can be used to replace both characters and numbers (which are automatically converted to text). The result of the function is always displayed as text. To perform further calculations with a number which has been replaced by text, convert it back to a number using the VALUE function. Any text containing numbers must be enclosed in quotation marks so it is not interpreted as a number and automatically converted to text. Text is text of which a part will be replaced. Position is the position within the text where the replacement will begin. Length is the number of characters in text to be replaced. New_text is the text which replaces text.." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Replace + + + + +Excel REPLACE(text; position; length; new_text): Replaces part of a text string with a different text string. This function can be used to replace both characters and numbers (which are automatically converted to text). The result of the function is always displayed as text. To perform further calculations with a number which has been replaced by text, convert it back to a number using the VALUE function. Any text containing numbers must be enclosed in quotation marks so it is not interpreted as a number and automatically converted to text. Text is text of which a part will be replaced. Position is the position within the text where the replacement will begin. Length is the number of characters in text to be replaced. New_text is the text which replaces text.. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `REPLACE` + + + diff --git a/docs/build/reference/transformer/Excel_REPT.md b/docs/build/reference/transformer/Excel_REPT.md new file mode 100644 index 000000000..29dd899c7 --- /dev/null +++ b/docs/build/reference/transformer/Excel_REPT.md @@ -0,0 +1,25 @@ +--- +title: "Rept" +description: "Excel REPT(text; number): Repeats a character string by the given number of copies. Text is the text to be repeated. Number is the number of repetitions. The result can be a maximum of 255 characters." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Rept + + + + +Excel REPT(text; number): Repeats a character string by the given number of copies. Text is the text to be repeated. Number is the number of repetitions. The result can be a maximum of 255 characters. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `REPT` + + + diff --git a/docs/build/reference/transformer/Excel_RIGHT.md b/docs/build/reference/transformer/Excel_RIGHT.md new file mode 100644 index 000000000..2c4a867db --- /dev/null +++ b/docs/build/reference/transformer/Excel_RIGHT.md @@ -0,0 +1,25 @@ +--- +title: "Right" +description: "Excel RIGHT(text; number): Defines the last character or characters in a text string. Text is the text of which the right part is to be determined. Number (optional) is the number of characters from the right part of the text." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Right + + + + +Excel RIGHT(text; number): Defines the last character or characters in a text string. Text is the text of which the right part is to be determined. Number (optional) is the number of characters from the right part of the text. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `RIGHT` + + + diff --git a/docs/build/reference/transformer/Excel_ROMAN.md b/docs/build/reference/transformer/Excel_ROMAN.md new file mode 100644 index 000000000..d63b1683c --- /dev/null +++ b/docs/build/reference/transformer/Excel_ROMAN.md @@ -0,0 +1,25 @@ +--- +title: "Roman" +description: "Excel ROMAN(number; mode): Converts a number into a Roman numeral. The value range must be between 0 and 3999; the modes can be integers from 0 to 4. Number is the number that is to be converted into a Roman numeral. Mode (optional) indicates the degree of simplification. The higher the value, the greater is the simplification of the Roman numeral." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Roman + + + + +Excel ROMAN(number; mode): Converts a number into a Roman numeral. The value range must be between 0 and 3999; the modes can be integers from 0 to 4. Number is the number that is to be converted into a Roman numeral. Mode (optional) indicates the degree of simplification. The higher the value, the greater is the simplification of the Roman numeral. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `ROMAN` + + + diff --git a/docs/build/reference/transformer/Excel_ROUND.md b/docs/build/reference/transformer/Excel_ROUND.md new file mode 100644 index 000000000..93eb76d6b --- /dev/null +++ b/docs/build/reference/transformer/Excel_ROUND.md @@ -0,0 +1,25 @@ +--- +title: "Round" +description: "Excel ROUND(number; count): Rounds the given number to a certain number of decimal places according to valid mathematical criteria. Count (optional) is the number of the places to which the value is to be rounded. If the count parameter is negative, only the whole number portion is rounded. It is rounded to the place indicated by the count." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Round + + + + +Excel ROUND(number; count): Rounds the given number to a certain number of decimal places according to valid mathematical criteria. Count (optional) is the number of the places to which the value is to be rounded. If the count parameter is negative, only the whole number portion is rounded. It is rounded to the place indicated by the count. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `ROUND` + + + diff --git a/docs/build/reference/transformer/Excel_ROUNDDOWN.md b/docs/build/reference/transformer/Excel_ROUNDDOWN.md new file mode 100644 index 000000000..b75d28e5d --- /dev/null +++ b/docs/build/reference/transformer/Excel_ROUNDDOWN.md @@ -0,0 +1,25 @@ +--- +title: "Rounddown" +description: "Excel ROUNDDOWN(number; count): Rounds the given number. Count (optional) is the number of digits to be rounded down to. If the count parameter is negative, only the whole number portion is rounded. It is rounded to the place indicated by the count." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Rounddown + + + + +Excel ROUNDDOWN(number; count): Rounds the given number. Count (optional) is the number of digits to be rounded down to. If the count parameter is negative, only the whole number portion is rounded. It is rounded to the place indicated by the count. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `ROUNDDOWN` + + + diff --git a/docs/build/reference/transformer/Excel_ROUNDUP.md b/docs/build/reference/transformer/Excel_ROUNDUP.md new file mode 100644 index 000000000..f6df535d7 --- /dev/null +++ b/docs/build/reference/transformer/Excel_ROUNDUP.md @@ -0,0 +1,25 @@ +--- +title: "Roundup" +description: "Excel ROUNDUP(number; count): Rounds the given number up. Count (optional) is the number of digits to which rounding up is to be done. If the count parameter is negative, only the whole number portion is rounded. It is rounded to the place indicated by the count." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Roundup + + + + +Excel ROUNDUP(number; count): Rounds the given number up. Count (optional) is the number of digits to which rounding up is to be done. If the count parameter is negative, only the whole number portion is rounded. It is rounded to the place indicated by the count. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `ROUNDUP` + + + diff --git a/docs/build/reference/transformer/Excel_SEARCH.md b/docs/build/reference/transformer/Excel_SEARCH.md new file mode 100644 index 000000000..020235346 --- /dev/null +++ b/docs/build/reference/transformer/Excel_SEARCH.md @@ -0,0 +1,25 @@ +--- +title: "Search" +description: "Excel SEARCH(find_text; text; position): Returns the position of a text segment within a character string. The start of the search can be set as an option. The search text can be a number or any sequence of characters. The search is not case-sensitive. The search supports regular expressions. Find_text is the text to be searched for. Text is the text where the search will take place. Position (optional) is the position in the text where the search is to start." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Search + + + + +Excel SEARCH(find_text; text; position): Returns the position of a text segment within a character string. The start of the search can be set as an option. The search text can be a number or any sequence of characters. The search is not case-sensitive. The search supports regular expressions. Find_text is the text to be searched for. Text is the text where the search will take place. Position (optional) is the position in the text where the search is to start. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SEARCH` + + + diff --git a/docs/build/reference/transformer/Excel_SIGN.md b/docs/build/reference/transformer/Excel_SIGN.md new file mode 100644 index 000000000..cc06236a6 --- /dev/null +++ b/docs/build/reference/transformer/Excel_SIGN.md @@ -0,0 +1,25 @@ +--- +title: "Sign" +description: "Excel SIGN(number): Returns the sign of the given number. The function returns the result 1 for a positive sign, – 1 for a negative sign, and 0 for zero." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Sign + + + + +Excel SIGN(number): Returns the sign of the given number. The function returns the result 1 for a positive sign, – 1 for a negative sign, and 0 for zero. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SIGN` + + + diff --git a/docs/build/reference/transformer/Excel_SIN.md b/docs/build/reference/transformer/Excel_SIN.md new file mode 100644 index 000000000..6b4345986 --- /dev/null +++ b/docs/build/reference/transformer/Excel_SIN.md @@ -0,0 +1,25 @@ +--- +title: "Sin" +description: "Excel SIN(number): Returns the sine of the given number (angle in radians)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Sin + + + + +Excel SIN(number): Returns the sine of the given number (angle in radians). + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SIN` + + + diff --git a/docs/build/reference/transformer/Excel_SINH.md b/docs/build/reference/transformer/Excel_SINH.md new file mode 100644 index 000000000..8052f61ac --- /dev/null +++ b/docs/build/reference/transformer/Excel_SINH.md @@ -0,0 +1,25 @@ +--- +title: "Sinh" +description: "Excel SINH(number): Returns the hyperbolic sine of the given number (angle in radians)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Sinh + + + + +Excel SINH(number): Returns the hyperbolic sine of the given number (angle in radians). + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SINH` + + + diff --git a/docs/build/reference/transformer/Excel_SLOPE.md b/docs/build/reference/transformer/Excel_SLOPE.md new file mode 100644 index 000000000..831e6844f --- /dev/null +++ b/docs/build/reference/transformer/Excel_SLOPE.md @@ -0,0 +1,25 @@ +--- +title: "Slope" +description: "Excel SLOPE(data_Y; data_X): Returns the slope of the linear regression line. Data_Y is the array or matrix of Y data. Data_X is the array or matrix of X data." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Slope + + + + +Excel SLOPE(data_Y; data_X): Returns the slope of the linear regression line. Data_Y is the array or matrix of Y data. Data_X is the array or matrix of X data. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SLOPE` + + + diff --git a/docs/build/reference/transformer/Excel_SMALL.md b/docs/build/reference/transformer/Excel_SMALL.md new file mode 100644 index 000000000..133c4a1c3 --- /dev/null +++ b/docs/build/reference/transformer/Excel_SMALL.md @@ -0,0 +1,25 @@ +--- +title: "Small" +description: "Excel SMALL(data; rank_c): Returns the Rank_c-th smallest value in a data set. Data is the cell range of data. Rank_c is the rank of the value (2nd smallest, 3rd smallest, etc.) written as an integer." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Small + + + + +Excel SMALL(data; rank_c): Returns the Rank_c-th smallest value in a data set. Data is the cell range of data. Rank_c is the rank of the value (2nd smallest, 3rd smallest, etc.) written as an integer. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SMALL` + + + diff --git a/docs/build/reference/transformer/Excel_SQRT.md b/docs/build/reference/transformer/Excel_SQRT.md new file mode 100644 index 000000000..046428fd3 --- /dev/null +++ b/docs/build/reference/transformer/Excel_SQRT.md @@ -0,0 +1,25 @@ +--- +title: "Sqrt" +description: "Excel SQRT(number): Returns the positive square root of the given number. The value of the number must be positive." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Sqrt + + + + +Excel SQRT(number): Returns the positive square root of the given number. The value of the number must be positive. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SQRT` + + + diff --git a/docs/build/reference/transformer/Excel_STANDARDIZE.md b/docs/build/reference/transformer/Excel_STANDARDIZE.md new file mode 100644 index 000000000..a16ebba4c --- /dev/null +++ b/docs/build/reference/transformer/Excel_STANDARDIZE.md @@ -0,0 +1,25 @@ +--- +title: "Standardize" +description: "Excel STANDARDIZE(number; mean; STDEV): Converts a random variable to a normalized value. Number is the value to be standardized. Mean is the arithmetic mean of the distribution. STDEV is the standard deviation of the distribution." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Standardize + + + + +Excel STANDARDIZE(number; mean; STDEV): Converts a random variable to a normalized value. Number is the value to be standardized. Mean is the arithmetic mean of the distribution. STDEV is the standard deviation of the distribution. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `STANDARDIZE` + + + diff --git a/docs/build/reference/transformer/Excel_STDEV.md b/docs/build/reference/transformer/Excel_STDEV.md new file mode 100644 index 000000000..363cbf6b2 --- /dev/null +++ b/docs/build/reference/transformer/Excel_STDEV.md @@ -0,0 +1,25 @@ +--- +title: "Stdev" +description: "Excel STDEV(number_1; number_2; ... number_30): Estimates the standard deviation based on a sample. Number_1; number_2; ... number_30 are numerical values or ranges representing a sample based on an entire population." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Stdev + + + + +Excel STDEV(number_1; number_2; ... number_30): Estimates the standard deviation based on a sample. Number_1; number_2; ... number_30 are numerical values or ranges representing a sample based on an entire population. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `STDEV` + + + diff --git a/docs/build/reference/transformer/Excel_STDEVA.md b/docs/build/reference/transformer/Excel_STDEVA.md new file mode 100644 index 000000000..63c4edcd2 --- /dev/null +++ b/docs/build/reference/transformer/Excel_STDEVA.md @@ -0,0 +1,25 @@ +--- +title: "Stdeva" +description: "Excel STDEVA(value_1; value_2; ... value_30): Calculates the standard deviation of an estimation based on a sample. Value_1; value_2; ... value_30 are values or ranges representing a sample derived from an entire population. Text has the value 0." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Stdeva + + + + +Excel STDEVA(value_1; value_2; ... value_30): Calculates the standard deviation of an estimation based on a sample. Value_1; value_2; ... value_30 are values or ranges representing a sample derived from an entire population. Text has the value 0. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `STDEVA` + + + diff --git a/docs/build/reference/transformer/Excel_STDEVP.md b/docs/build/reference/transformer/Excel_STDEVP.md new file mode 100644 index 000000000..e33178ece --- /dev/null +++ b/docs/build/reference/transformer/Excel_STDEVP.md @@ -0,0 +1,25 @@ +--- +title: "Stdevp" +description: "Excel STDEVP(number_1; number_2; ... number_30): Calculates the standard deviation based on the entire population. Number_1; number_2; ... number_30 are numerical values or ranges representing a sample based on an entire population." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Stdevp + + + + +Excel STDEVP(number_1; number_2; ... number_30): Calculates the standard deviation based on the entire population. Number_1; number_2; ... number_30 are numerical values or ranges representing a sample based on an entire population. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `STDEVP` + + + diff --git a/docs/build/reference/transformer/Excel_STDEVPA.md b/docs/build/reference/transformer/Excel_STDEVPA.md new file mode 100644 index 000000000..3007385a3 --- /dev/null +++ b/docs/build/reference/transformer/Excel_STDEVPA.md @@ -0,0 +1,25 @@ +--- +title: "Stdevpa" +description: "Excel STDEVPA(value_1; value_2; ... value_30): Calculates the standard deviation based on the entire population. Value_1; value_2; ... value_30 are values or ranges representing a sample derived from an entire population. Text has the value 0." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Stdevpa + + + + +Excel STDEVPA(value_1; value_2; ... value_30): Calculates the standard deviation based on the entire population. Value_1; value_2; ... value_30 are values or ranges representing a sample derived from an entire population. Text has the value 0. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `STDEVPA` + + + diff --git a/docs/build/reference/transformer/Excel_SUBSTITUTE.md b/docs/build/reference/transformer/Excel_SUBSTITUTE.md new file mode 100644 index 000000000..06f1c66d8 --- /dev/null +++ b/docs/build/reference/transformer/Excel_SUBSTITUTE.md @@ -0,0 +1,25 @@ +--- +title: "Substitute" +description: "Excel SUBSTITUTE(text; search_text; new text; occurrence): Substitutes new text for old text in a string. Text is the text in which text segments are to be exchanged. Search_text is the text segment that is to be replaced (a number of times). New text is the text that is to replace the text segment. Occurrence (optional) indicates how many occurrences of the search text are to be replaced. If this parameter is missing, the search text is replaced throughout." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Substitute + + + + +Excel SUBSTITUTE(text; search_text; new text; occurrence): Substitutes new text for old text in a string. Text is the text in which text segments are to be exchanged. Search_text is the text segment that is to be replaced (a number of times). New text is the text that is to replace the text segment. Occurrence (optional) indicates how many occurrences of the search text are to be replaced. If this parameter is missing, the search text is replaced throughout. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SUBSTITUTE` + + + diff --git a/docs/build/reference/transformer/Excel_SUM.md b/docs/build/reference/transformer/Excel_SUM.md new file mode 100644 index 000000000..067a0f266 --- /dev/null +++ b/docs/build/reference/transformer/Excel_SUM.md @@ -0,0 +1,25 @@ +--- +title: "Sum" +description: "Excel SUM(number_1; number_2; ... number_30): Adds all the numbers in a range of cells. Number_1; number_2;... number_30 are up to 30 arguments whose sum is to be calculated. You can also enter a range using cell references." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Sum + + + + +Excel SUM(number_1; number_2; ... number_30): Adds all the numbers in a range of cells. Number_1; number_2;... number_30 are up to 30 arguments whose sum is to be calculated. You can also enter a range using cell references. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SUM` + + + diff --git a/docs/build/reference/transformer/Excel_SUMPRODUCT.md b/docs/build/reference/transformer/Excel_SUMPRODUCT.md new file mode 100644 index 000000000..6baa7fe90 --- /dev/null +++ b/docs/build/reference/transformer/Excel_SUMPRODUCT.md @@ -0,0 +1,25 @@ +--- +title: "Sumproduct" +description: "Excel SUMPRODUCT(array 1; array 2; ...array 30): Multiplies corresponding elements in the given arrays, and returns the sum of those products. Array 1; array 2;...array 30 are arrays whose corresponding elements are to be multiplied. At least one array must be part of the argument list. If only one array is given, all array elements are summed." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Sumproduct + + + + +Excel SUMPRODUCT(array 1; array 2; ...array 30): Multiplies corresponding elements in the given arrays, and returns the sum of those products. Array 1; array 2;...array 30 are arrays whose corresponding elements are to be multiplied. At least one array must be part of the argument list. If only one array is given, all array elements are summed. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SUMPRODUCT` + + + diff --git a/docs/build/reference/transformer/Excel_SUMSQ.md b/docs/build/reference/transformer/Excel_SUMSQ.md new file mode 100644 index 000000000..cbca814ad --- /dev/null +++ b/docs/build/reference/transformer/Excel_SUMSQ.md @@ -0,0 +1,25 @@ +--- +title: "Sumsq" +description: "Excel SUMSQ(number_1; number_2; ... number_30): Calculates the sum of the squares of numbers (totaling up of the squares of the arguments) Number_1; number_2;... number_30 are up to 30 arguments, the sum of whose squares is to be calculated." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Sumsq + + + + +Excel SUMSQ(number_1; number_2; ... number_30): Calculates the sum of the squares of numbers (totaling up of the squares of the arguments) Number_1; number_2;... number_30 are up to 30 arguments, the sum of whose squares is to be calculated. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SUMSQ` + + + diff --git a/docs/build/reference/transformer/Excel_SUMX2MY2.md b/docs/build/reference/transformer/Excel_SUMX2MY2.md new file mode 100644 index 000000000..c27566aac --- /dev/null +++ b/docs/build/reference/transformer/Excel_SUMX2MY2.md @@ -0,0 +1,25 @@ +--- +title: "Sumx2my2" +description: "Excel SUMX2MY2(array_X; array_Y): Returns the sum of the difference of squares of corresponding values in two arrays. Array_X is the first array whose elements are to be squared and added. Array_Y is the second array whose elements are to be squared and subtracted." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Sumx2my2 + + + + +Excel SUMX2MY2(array_X; array_Y): Returns the sum of the difference of squares of corresponding values in two arrays. Array_X is the first array whose elements are to be squared and added. Array_Y is the second array whose elements are to be squared and subtracted. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SUMX2MY2` + + + diff --git a/docs/build/reference/transformer/Excel_SUMX2PY2.md b/docs/build/reference/transformer/Excel_SUMX2PY2.md new file mode 100644 index 000000000..07861caff --- /dev/null +++ b/docs/build/reference/transformer/Excel_SUMX2PY2.md @@ -0,0 +1,25 @@ +--- +title: "Sumx2py2" +description: "Excel SUMX2PY2(array_X; array_Y): Returns the sum of the sum of squares of corresponding values in two arrays. Array_X is the first array whose arguments are to be squared and added. Array_Y is the second array, whose elements are to be added and squared." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Sumx2py2 + + + + +Excel SUMX2PY2(array_X; array_Y): Returns the sum of the sum of squares of corresponding values in two arrays. Array_X is the first array whose arguments are to be squared and added. Array_Y is the second array, whose elements are to be added and squared. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SUMX2PY2` + + + diff --git a/docs/build/reference/transformer/Excel_SUMXMY2.md b/docs/build/reference/transformer/Excel_SUMXMY2.md new file mode 100644 index 000000000..4f09bd5e8 --- /dev/null +++ b/docs/build/reference/transformer/Excel_SUMXMY2.md @@ -0,0 +1,25 @@ +--- +title: "Sumxmy2" +description: "Excel SUMXMY2(array_X; array_Y): Adds the squares of the variance between corresponding values in two arrays. Array_X is the first array whose elements are to be subtracted and squared. Array_Y is the second array, whose elements are to be subtracted and squared." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Sumxmy2 + + + + +Excel SUMXMY2(array_X; array_Y): Adds the squares of the variance between corresponding values in two arrays. Array_X is the first array whose elements are to be subtracted and squared. Array_Y is the second array, whose elements are to be subtracted and squared. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `SUMXMY2` + + + diff --git a/docs/build/reference/transformer/Excel_TAN.md b/docs/build/reference/transformer/Excel_TAN.md new file mode 100644 index 000000000..a8ac59567 --- /dev/null +++ b/docs/build/reference/transformer/Excel_TAN.md @@ -0,0 +1,25 @@ +--- +title: "Tan" +description: "Excel TAN(number): Returns the tangent of the given number (angle in radians)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Tan + + + + +Excel TAN(number): Returns the tangent of the given number (angle in radians). + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `TAN` + + + diff --git a/docs/build/reference/transformer/Excel_TANH.md b/docs/build/reference/transformer/Excel_TANH.md new file mode 100644 index 000000000..cc7aa0ccb --- /dev/null +++ b/docs/build/reference/transformer/Excel_TANH.md @@ -0,0 +1,25 @@ +--- +title: "Tanh" +description: "Excel TANH(number): Returns the hyperbolic tangent of the given number (angle in radians)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Tanh + + + + +Excel TANH(number): Returns the hyperbolic tangent of the given number (angle in radians). + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `TANH` + + + diff --git a/docs/build/reference/transformer/Excel_TDIST.md b/docs/build/reference/transformer/Excel_TDIST.md new file mode 100644 index 000000000..8d6b1beb1 --- /dev/null +++ b/docs/build/reference/transformer/Excel_TDIST.md @@ -0,0 +1,25 @@ +--- +title: "Tdist" +description: "Excel TDIST(number; degrees_freedom; mode): Returns the t-distribution for the given Number. Degrees_freedom is the number of degrees of freedom for the t-distribution. Mode = 1 returns the one-tailed test, Mode = 2 returns the two-tailed test." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Tdist + + + + +Excel TDIST(number; degrees_freedom; mode): Returns the t-distribution for the given Number. Degrees_freedom is the number of degrees of freedom for the t-distribution. Mode = 1 returns the one-tailed test, Mode = 2 returns the two-tailed test. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `TDIST` + + + diff --git a/docs/build/reference/transformer/Excel_TRUE.md b/docs/build/reference/transformer/Excel_TRUE.md new file mode 100644 index 000000000..4cbde3038 --- /dev/null +++ b/docs/build/reference/transformer/Excel_TRUE.md @@ -0,0 +1,25 @@ +--- +title: "True" +description: "Excel TRUE(): Sets the logical value to TRUE. The TRUE() function does not require any arguments." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# True + + + + +Excel TRUE(): Sets the logical value to TRUE. The TRUE() function does not require any arguments. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `TRUE` + + + diff --git a/docs/build/reference/transformer/Excel_TRUNC.md b/docs/build/reference/transformer/Excel_TRUNC.md new file mode 100644 index 000000000..aba082f19 --- /dev/null +++ b/docs/build/reference/transformer/Excel_TRUNC.md @@ -0,0 +1,25 @@ +--- +title: "Trunc" +description: "Excel TRUNC(number; count): Truncates a number to an integer by removing the fractional part of the number according to the precision specified in Tools > Options > OpenOffice.org Calc > Calculate. Number is the number whose decimal places are to be cut off. Count is the number of decimal places which are not cut off." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Trunc + + + + +Excel TRUNC(number; count): Truncates a number to an integer by removing the fractional part of the number according to the precision specified in Tools > Options > OpenOffice.org Calc > Calculate. Number is the number whose decimal places are to be cut off. Count is the number of decimal places which are not cut off. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `TRUNC` + + + diff --git a/docs/build/reference/transformer/Excel_VAR.md b/docs/build/reference/transformer/Excel_VAR.md new file mode 100644 index 000000000..455bf9e9a --- /dev/null +++ b/docs/build/reference/transformer/Excel_VAR.md @@ -0,0 +1,25 @@ +--- +title: "Var" +description: "Excel VAR(number_1; number_2; ... number_30): Estimates the variance based on a sample. Number_1; number_2; ... number_30 are numerical values or ranges representing a sample based on an entire population." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Var + + + + +Excel VAR(number_1; number_2; ... number_30): Estimates the variance based on a sample. Number_1; number_2; ... number_30 are numerical values or ranges representing a sample based on an entire population. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `VAR` + + + diff --git a/docs/build/reference/transformer/Excel_VARA.md b/docs/build/reference/transformer/Excel_VARA.md new file mode 100644 index 000000000..93b7024ee --- /dev/null +++ b/docs/build/reference/transformer/Excel_VARA.md @@ -0,0 +1,25 @@ +--- +title: "Vara" +description: "Excel VARA(value_1; value_2; ... value_30): Estimates a variance based on a sample. The value of text is 0. Value_1; value_2; ... value_30 are values or ranges representing a sample derived from an entire population. Text has the value 0." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Vara + + + + +Excel VARA(value_1; value_2; ... value_30): Estimates a variance based on a sample. The value of text is 0. Value_1; value_2; ... value_30 are values or ranges representing a sample derived from an entire population. Text has the value 0. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `VARA` + + + diff --git a/docs/build/reference/transformer/Excel_VARP.md b/docs/build/reference/transformer/Excel_VARP.md new file mode 100644 index 000000000..55802cd8b --- /dev/null +++ b/docs/build/reference/transformer/Excel_VARP.md @@ -0,0 +1,25 @@ +--- +title: "Varp" +description: "Excel VARP(Number_1; number_2; ... number_30): Calculates a variance based on the entire population. Number_1; number_2; ... number_30 are numerical values or ranges representing an entire population." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Varp + + + + +Excel VARP(Number_1; number_2; ... number_30): Calculates a variance based on the entire population. Number_1; number_2; ... number_30 are numerical values or ranges representing an entire population. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `VARP` + + + diff --git a/docs/build/reference/transformer/Excel_VARPA.md b/docs/build/reference/transformer/Excel_VARPA.md new file mode 100644 index 000000000..0bce4baed --- /dev/null +++ b/docs/build/reference/transformer/Excel_VARPA.md @@ -0,0 +1,25 @@ +--- +title: "Varpa" +description: "Excel VARPA(value_1; value_2; .. .value_30): Calculates the variance based on the entire population. The value of text is 0. Value_1; value_2; ... value_30 are values or ranges representing an entire population." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Varpa + + + + +Excel VARPA(value_1; value_2; .. .value_30): Calculates the variance based on the entire population. The value of text is 0. Value_1; value_2; ... value_30 are values or ranges representing an entire population. + +## Parameter + +### Function name + +The name of the Excel function + +- Datatype: `string` +- Default Value: `VARPA` + + + diff --git a/docs/build/reference/transformer/FloatTypeParser.md b/docs/build/reference/transformer/FloatTypeParser.md new file mode 100644 index 000000000..3ced89217 --- /dev/null +++ b/docs/build/reference/transformer/FloatTypeParser.md @@ -0,0 +1,43 @@ +--- +title: "Parse float" +description: "Parses and normalizes float values." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Parse float + + + + +Parses and normalizes float values. + +## Parameter + +### Comma as decimal point + +No description + +- Datatype: `boolean` +- Default Value: `false` + + + +### Thousand separator + +No description + +- Datatype: `boolean` +- Default Value: `false` + + + +### Brackets for negative + +No description + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/GeoCoordinateParser.md b/docs/build/reference/transformer/GeoCoordinateParser.md new file mode 100644 index 000000000..50b19dbed --- /dev/null +++ b/docs/build/reference/transformer/GeoCoordinateParser.md @@ -0,0 +1,17 @@ +--- +title: "Parse geo coordinate" +description: "Parses and normalizes geo coordinates." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Parse geo coordinate + + + + +Parses and normalizes geo coordinates. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/GeoLocationParser.md b/docs/build/reference/transformer/GeoLocationParser.md new file mode 100644 index 000000000..edc40f61c --- /dev/null +++ b/docs/build/reference/transformer/GeoLocationParser.md @@ -0,0 +1,34 @@ +--- +title: "Parse geo location" +description: "Parses and normalizes geo locations like continents, countries, states and cities." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Parse geo location + + + + +Parses and normalizes geo locations like continents, countries, states and cities. + +## Parameter + +### Parse type id + +What type of location should be parsed. + +- Datatype: `enumeration` +- Default Value: `None` + + + +### Full state name + +Set to true if the full state name should be output instead of the 2-letter code. + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/transformer/IntegerParser.md b/docs/build/reference/transformer/IntegerParser.md new file mode 100644 index 000000000..1a8113395 --- /dev/null +++ b/docs/build/reference/transformer/IntegerParser.md @@ -0,0 +1,34 @@ +--- +title: "Parse integer" +description: "Parses integer values." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Parse integer + + + + +Parses integer values. + +## Parameter + +### Comma as decimal point + +Use comma as decimal point (uses a point, otherwise) + +- Datatype: `boolean` +- Default Value: `false` + + + +### Thousand separator + +Use comma or point to separate digits + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/IsinParser.md b/docs/build/reference/transformer/IsinParser.md new file mode 100644 index 000000000..865da3b7b --- /dev/null +++ b/docs/build/reference/transformer/IsinParser.md @@ -0,0 +1,17 @@ +--- +title: "Parse ISIN" +description: "Parses International Securities Identification Numbers (ISIN) values and fails if the String is no valid ISIN." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Parse ISIN + + + + +Parses International Securities Identification Numbers (ISIN) values and fails if the String is no valid ISIN. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/NYSIIS.md b/docs/build/reference/transformer/NYSIIS.md new file mode 100644 index 000000000..7da84e135 --- /dev/null +++ b/docs/build/reference/transformer/NYSIIS.md @@ -0,0 +1,25 @@ +--- +title: "NYSIIS" +description: "NYSIIS phonetic encoding." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# NYSIIS + + + + +NYSIIS phonetic encoding. + +## Parameter + +### Refined + +No description + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/transformer/PhysicalQuantitiesNormalizer.md b/docs/build/reference/transformer/PhysicalQuantitiesNormalizer.md new file mode 100644 index 000000000..d26e2cbb4 --- /dev/null +++ b/docs/build/reference/transformer/PhysicalQuantitiesNormalizer.md @@ -0,0 +1,359 @@ +--- +title: "Normalize physical quantity" +description: "Normalizes physical quantities. Can either convert to a configured unit or to SI base units. For instance for lengths, values will be converted to metres if no target unit is configured. Will output the pure numeric value without the unit. If one input is provided, the physical quantities are parsed from the provided strings of the form "1 km". If two inputs are provided, the numeric values are parsed from the first input and the units are parsed from the second inputs." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Normalize physical quantity + + + + +SI units and common derived units are supported. The following section lists all supported units. By default, all quantities are normalized to their base unit. For instance, lengths will be normalized to metres. +### Supported units + +#### Time + +Time is expressed in seconds (symbol: `s`). +The following alternative symbols are supported: +* `mo_s`: day*29.53059 +* `mo_g`: year/12.0 +* `a`: day*365.25 +* `min`: min +* `a_g`: year +* `mo`: (day*365.25)/12.0 +* `mo_j`: (day*365.25)/12.0 +* `a_j`: day*365.25 +* `h`: h +* `a_t`: day*365.24219 +* `d`: day + + +#### Length + +Length is expressed in metres (symbol: `m`). +The following alternative symbols are supported: +* `in`: c(cm*254.0) +* `nmi`: m*1852.0 +* `Ao`: dnm +* `mil`: m(c(cm*254.0)) +* `yd`: ((c(cm*254.0))*12.0)*3.0 +* `AU`: m*1.49597871E11 +* `ft`: (c(cm*254.0))*12.0 +* `pc`: m*3.085678E16 +* `fth`: ((c(cm*254.0))*12.0)*6.0 +* `mi`: ((c(cm*254.0))*12.0)*5280.0 +* `hd`: (c(cm*254.0))*4.0 + + +#### Mass + +Mass is expressed in kilograms (symbol: `kg`). +The following alternative symbols are supported: +* `lb`: lb +* `ston`: hlb*20.0 +* `t`: Mg +* `stone`: lb*14.0 +* `u`: AMU +* `gr`: (mg*6479891.0)/100000.0 +* `lcwt`: lb*112.0 +* `oz`: oz +* `g`: g +* `scwt`: hlb +* `dr`: oz/16.0 +* `lton`: (lb*112.0)*20.0 + + +#### Electric current + +Electric current is expressed in amperes (symbol: `A`). +The following alternative symbols are supported: +* `Bi`: daA +* `Gb`: cm·(A/m)*250.0/[one?] + + +#### Temperature + +Temperature is expressed in kelvins (symbol: `K`). +The following alternative symbols are supported: +* `Cel`: ℃ + + +#### Amount of substance + +Amount of substance is expressed in moles (symbol: `mol`). + +#### Luminous intensity + +Luminous intensity is expressed in candelas (symbol: `cd`). + +#### Area + +Area is expressed in square metres (symbol: `m²`). +The following alternative symbols are supported: +* `m2`: m² +* `ar`: hm² +* `syd`: ((c(cm*254.0))*12.0)*3.0² +* `cml`: [one?]/4.0·m(c(cm*254.0))² +* `b`: hfm² +* `sft`: (c(cm*254.0))*12.0² +* `sin`: c(cm*254.0)² + + +#### Volume + +Volume is expressed in cubic metres (symbol: `㎥`). +The following alternative symbols are supported: +* `st`: [㎥?] +* `bf`: (c(cm*254.0)³)*144.0 +* `cyd`: ((c(cm*254.0))*12.0)*3.0³ +* `cr`: ((c(cm*254.0))*12.0³)*128.0 +* `L`: L +* `l`: l +* `cin`: c(cm*254.0)³ +* `cft`: (c(cm*254.0))*12.0³ +* `m3`: ㎥ + + +#### Energy + +Energy is expressed in joules (symbol: `J`). +The following alternative symbols are supported: +* `cal_IT`: (J*41868.0)/10000.0 +* `eV`: J*1.602176487E-19 +* `cal_m`: (J*419002.0)/100000.0 +* `cal`: m(J*4184.0) +* `cal_th`: m(J*4184.0) + + +#### Angle + +Angle is expressed in radians (symbol: `rad`). +The following alternative symbols are supported: +* `circ`: [one?]·rad*2.0 +* `gon`: ([one?]·rad/180.0)*0.9 +* `deg`: [one?]·rad/180.0 +* `'`: ([one?]·rad/180.0)/60.0 +* `''`: (([one?]·rad/180.0)/60.0)/60.0 + + +#### Others + +- `1/m`, derived units: `Ky`: c(1/m) +- `kg/(m·s)`, derived units: `P`: g/(s·cm) +- `bit/s`, derived units: `Bd`: bit/s +- `bit`, derived units: `By`: bit*8.0 +- `Sv` +- `N` +- `Ω`, derived units: `Ohm`: Ω +- `T`, derived units: `G`: T/10000.0 +- `sr`, derived units: `sph`: [one?]·sr*4.0 +- `F` +- `C/kg`, derived units: `R`: (C/kg)*2.58E-4 +- `cd/m²`, derived units: `sb`: cd/cm², `Lmb`: cd/([one?]·cm²) +- `Pa`, derived units: `bar`: Pa*100000.0, `atm`: Pa*101325.0 +- `kg/(m·s²)`, derived units: `att`: k(g·(m/s²)*9.80665)/cm² +- `m²/s`, derived units: `St`: cm²/s +- `A/m`, derived units: `Oe`: (A/m)*250.0/[one?] +- `kg·m²/s²`, derived units: `erg`: cm²·g/s² +- `kg/m³`, derived units: `g%`: g/dl +- `mho` +- `V` +- `lx`, derived units: `ph`: lx/10000.0 +- `m/s²`, derived units: `Gal`: cm/s², `m/s2`: m/s² +- `m/s`, derived units: `kn`: m*1852.0/h +- `m·kg/s²`, derived units: `gf`: g·(m/s²)*9.80665, `lbf`: lb·(m/s²)*9.80665, `dyn`: cm·g/s² +- `m²/s²`, derived units: `RAD`: cm²·g/(s²·hg), `REM`: cm²·g/(s²·hg) +- `C` +- `Gy` +- `Hz` +- `H` +- `lm` +- `W` +- `Wb`, derived units: `Mx`: Wb/1.0E8 +- `Bq`, derived units: `Ci`: Bq*3.7E10 +- `S` + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[1 km]` + +* Returns: + + → `[1000.0]` + + +--- +#### Example 2: + +* Input values: + 1. `[1.0000 ft]` + +* Returns: + + → `[0.3048]` + + +--- +#### Example 3: + +* Input values: + 1. `[1.0lb]` + +* Returns: + + → `[0.45359237]` + + +--- +#### Example 4: + +* Input values: + 1. `[1000000000.0 nm]` + +* Returns: + + → `[1.0]` + + +--- +#### Example 5: + +* Input values: + 1. `[-1E6 m]` + +* Returns: + + → `[-1000000.0]` + + +--- +#### Example 6: + +* Parameters + * *numberFormat*: `de` + +* Input values: + 1. `[1.000,5 m]` + +* Returns: + + → `[1000.5]` + + +--- +#### Example 7: + +* Input values: + 1. `[1,000.5 m]` + +* Returns: + + → `[1000.5]` + + +--- +#### Example 8: + +* Parameters + * *targetUnit*: `mi` + +* Input values: + 1. `[1 km]` + +* Returns: + + → `[0.621371192237334]` + + +--- +#### Example 9: + +* Parameters + * *targetUnit*: `m` + +* Input values: + 1. `[1 kg]` + +* Returns: + + → `[]` + + +--- +#### Example 10: + +* Input values: + 1. `[100.0]` + +* Returns: + + → `[]` + + +--- +#### Example 11: + +* Input values: + 1. `[1]` + 2. `[km]` + +* Returns: + + → `[1000.0]` + + +--- +#### Example 12: + +* Input values: + 1. `[1, 10000]` + 2. `[km, mm]` + +* Returns: + + → `[1000.0, 10.0]` + + +--- +#### Example 13: + +* Input values: + 1. `[1, 10000, 10]` + 2. `[km, mm]` + +* Returns: + + → `[]` + + + + +## Parameter + +### Target unit + +Target unit. Can be left empty to convert to the respective SI base units. + +- Datatype: `string` +- Default Value: `None` + + + +### Number format + +The IETF BCP 47 language tag, e.g., 'en'. + +- Datatype: `string` +- Default Value: `en` + + + diff --git a/docs/build/reference/transformer/RetrieveCoordinates.md b/docs/build/reference/transformer/RetrieveCoordinates.md new file mode 100644 index 000000000..0b5b4544a --- /dev/null +++ b/docs/build/reference/transformer/RetrieveCoordinates.md @@ -0,0 +1,58 @@ +--- +title: "Retrieve coordinates" +description: "Retrieves geographic coordinates using Nominatim." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Retrieve coordinates + + + + + +**Configuration** + +The geocoding service to be queried for searches can be set up in the configuration. +The default configuration is as follows: + + com.eccenca.di.geo = { + # The URL of the geocoding service + # url = "https://nominatim.eccenca.com/search" + url = "https://photon.komoot.de/api" + # url = https://api-adresse.data.gouv.fr/search + + # Additional URL parameters to be attached to all HTTP search requests. Example: '&countrycodes=de&addressdetails=1'. + # Will be attached in addition to the parameters set on each search operator directly. + searchParameters = "" + + # The minimum pause time between subsequent queries + pauseTime = 1s + + # Number of coordinates to be cached in-memory + cacheSize = 10 + } + +In general, all services adhering to the [Nominatim search API](https://nominatim.org/release-docs/develop/api/Search/) should be usable. +Please note that when using public services, the pause time should be set to avoid overloading. + +**Logging** + +By default, individual requests to the geocoding service are not logged. To enable logging each request, the following configuration option can be set: + + logging.level { + com.eccenca.di.geo=DEBUG + } + + +## Parameter + +### Additional parameters + +Additional URL parameters to be attached to each HTTP search request. Example: '&countrycodes=de&addressdetails=1'. Consult the API documentation for a list of available parameters. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/RetrieveLatitude.md b/docs/build/reference/transformer/RetrieveLatitude.md new file mode 100644 index 000000000..abaecce55 --- /dev/null +++ b/docs/build/reference/transformer/RetrieveLatitude.md @@ -0,0 +1,58 @@ +--- +title: "Retrieve latitude" +description: "Retrieves geographic coordinates using Nominatim and returns the latitude." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Retrieve latitude + + + + + +**Configuration** + +The geocoding service to be queried for searches can be set up in the configuration. +The default configuration is as follows: + + com.eccenca.di.geo = { + # The URL of the geocoding service + # url = "https://nominatim.eccenca.com/search" + url = "https://photon.komoot.de/api" + # url = https://api-adresse.data.gouv.fr/search + + # Additional URL parameters to be attached to all HTTP search requests. Example: '&countrycodes=de&addressdetails=1'. + # Will be attached in addition to the parameters set on each search operator directly. + searchParameters = "" + + # The minimum pause time between subsequent queries + pauseTime = 1s + + # Number of coordinates to be cached in-memory + cacheSize = 10 + } + +In general, all services adhering to the [Nominatim search API](https://nominatim.org/release-docs/develop/api/Search/) should be usable. +Please note that when using public services, the pause time should be set to avoid overloading. + +**Logging** + +By default, individual requests to the geocoding service are not logged. To enable logging each request, the following configuration option can be set: + + logging.level { + com.eccenca.di.geo=DEBUG + } + + +## Parameter + +### Additional parameters + +Additional URL parameters to be attached to each HTTP search request. Example: '&countrycodes=de&addressdetails=1'. Consult the API documentation for a list of available parameters. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/RetrieveLongitude.md b/docs/build/reference/transformer/RetrieveLongitude.md new file mode 100644 index 000000000..beb28c2b2 --- /dev/null +++ b/docs/build/reference/transformer/RetrieveLongitude.md @@ -0,0 +1,58 @@ +--- +title: "Retrieve longitude" +description: "Retrieves geographic coordinates using Nominatim and returns the longitude." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Retrieve longitude + + + + + +**Configuration** + +The geocoding service to be queried for searches can be set up in the configuration. +The default configuration is as follows: + + com.eccenca.di.geo = { + # The URL of the geocoding service + # url = "https://nominatim.eccenca.com/search" + url = "https://photon.komoot.de/api" + # url = https://api-adresse.data.gouv.fr/search + + # Additional URL parameters to be attached to all HTTP search requests. Example: '&countrycodes=de&addressdetails=1'. + # Will be attached in addition to the parameters set on each search operator directly. + searchParameters = "" + + # The minimum pause time between subsequent queries + pauseTime = 1s + + # Number of coordinates to be cached in-memory + cacheSize = 10 + } + +In general, all services adhering to the [Nominatim search API](https://nominatim.org/release-docs/develop/api/Search/) should be usable. +Please note that when using public services, the pause time should be set to avoid overloading. + +**Logging** + +By default, individual requests to the geocoding service are not logged. To enable logging each request, the following configuration option can be set: + + logging.level { + com.eccenca.di.geo=DEBUG + } + + +## Parameter + +### Additional parameters + +Additional URL parameters to be attached to each HTTP search request. Example: '&countrycodes=de&addressdetails=1'. Consult the API documentation for a list of available parameters. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/SkosTypeParser.md b/docs/build/reference/transformer/SkosTypeParser.md new file mode 100644 index 000000000..de8b02959 --- /dev/null +++ b/docs/build/reference/transformer/SkosTypeParser.md @@ -0,0 +1,25 @@ +--- +title: "Parse SKOS term" +description: "Parses values from a SKOS ontology." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Parse SKOS term + + + + +Parses values from a SKOS ontology. + +## Parameter + +### Surface form to representation mapping + +No description + +- Datatype: `stringmap` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/StringParser.md b/docs/build/reference/transformer/StringParser.md new file mode 100644 index 000000000..0a3f9b033 --- /dev/null +++ b/docs/build/reference/transformer/StringParser.md @@ -0,0 +1,17 @@ +--- +title: "Parse string" +description: "Parses string values, basically an identity function." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Parse string + + + + +Parses string values, basically an identity function. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/TemplateTransformer.md b/docs/build/reference/transformer/TemplateTransformer.md new file mode 100644 index 000000000..f5a637a8d --- /dev/null +++ b/docs/build/reference/transformer/TemplateTransformer.md @@ -0,0 +1,123 @@ +--- +title: "Evaluate template" +description: "Evaluates a template. Input values can be addressed using the variables 'input1', 'input2', etc. Global variables are available in the 'global' scope, e.g., 'global.myVar'." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Evaluate template + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *template*: `Hello {{input1}} {{input2}}, + +How are you today?` + +* Input values: + 1. `[John]` + 2. `[Doe]` + +* Returns: + + → `[Hello John Doe, + +How are you today?]` + + +--- +#### Example 2: + +* Parameters + * *template*: `Hello {{badVariable}} {{input1}}` + +* Input values: + 1. `[John]` + 2. `[Doe]` + +* Returns: + + → `[]` + + +--- +#### Example 3: + +* Parameters + * *template*: `Hello {{input01}}` + +* Returns: + + → `[]` + + +--- +#### Example 4: + +* Parameters + * *template*: `Hello {{input1}}` + +* Returns: + + → `[]` + + +--- +#### Example 5: + +* Parameters + * *template*: `Hello {{input1}}` + +* Input values: + 1. `[A, B]` + +* Returns: + + → `[Hello AB]` + + +--- +#### Example 6: + +* Parameters + * *template*: `Hello {% for value in input1 %}{{value}}, {% endfor %}how are you doing?` + +* Input values: + 1. `[Bob, Eve]` + +* Returns: + + → `[Hello Bob, Eve, how are you doing?]` + + + + +## Parameter + +### Template + +The template + +- Datatype: `template` +- Default Value: `None` + + + +### Language + +The template language. Currently, Jinja is supported. + +- Datatype: `string` +- Default Value: `jinja` + + + diff --git a/docs/build/reference/transformer/aggregateNumbers.md b/docs/build/reference/transformer/aggregateNumbers.md new file mode 100644 index 000000000..5d09c38f7 --- /dev/null +++ b/docs/build/reference/transformer/aggregateNumbers.md @@ -0,0 +1,25 @@ +--- +title: "Aggregate numbers" +description: "Aggregates all numbers in this set using a mathematical operation." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Aggregate numbers + + + + +Aggregates all numbers in this set using a mathematical operation. + +## Parameter + +### Operator + +One of '+', '*', 'min', 'max', 'average'. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/alphaReduce.md b/docs/build/reference/transformer/alphaReduce.md new file mode 100644 index 000000000..d08bb56f7 --- /dev/null +++ b/docs/build/reference/transformer/alphaReduce.md @@ -0,0 +1,17 @@ +--- +title: "Strip non-alphabetic characters" +description: "Strips all non-alphabetic characters from a string. Spaces are retained." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Strip non-alphabetic characters + + + + +Strips all non-alphabetic characters from a string. Spaces are retained. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/camelCase.md b/docs/build/reference/transformer/camelCase.md new file mode 100644 index 000000000..b6f1dda51 --- /dev/null +++ b/docs/build/reference/transformer/camelCase.md @@ -0,0 +1,114 @@ +--- +title: "Camel case" +description: "Converts a string to camel case. Upper camel case is the default, lower camel case can be chosen." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Camel case + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### A sentence with several words is converted to a single word written in UpperCamelCase: + +* Parameters + * *isDromedary*: `false` + +* Input values: + 1. `[hello world]` + +* Returns: + + → `[HelloWorld]` + + +--- +#### A sentence with several words is converted to a single word written in lowerCamelCase: + +* Parameters + * *isDromedary*: `true` + +* Input values: + 1. `[hello world]` + +* Returns: + + → `[helloWorld]` + + +--- +#### A single lowercase letter is converted to UpperCamelCase, i.e. capitalized: + +* Parameters + * *isDromedary*: `false` + +* Input values: + 1. `[h]` + +* Returns: + + → `[H]` + + +--- +#### A single lowercase letter is converted to lowerCamelCase (aka. dromedary case), i.e. uncapitalized: + +* Parameters + * *isDromedary*: `true` + +* Input values: + 1. `[h]` + +* Returns: + + → `[h]` + + +--- +#### An empty space is removed. The dromedary/lower case is irrelevant here: + +* Parameters + * *isDromedary*: `true` + +* Input values: + 1. `[ ]` + +* Returns: + + → `[]` + + +--- +#### An empty space is removed. The upper case is irrelevant here: + +* Parameters + * *isDromedary*: `false` + +* Input values: + 1. `[ ]` + +* Returns: + + → `[]` + + + + +## Parameter + +### Dromedary case + +If true, lower camel case (aka. dromedary case) is used, otherwise upper camel case is used. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/camelcasetokenizer.md b/docs/build/reference/transformer/camelcasetokenizer.md new file mode 100644 index 000000000..c7c8b213b --- /dev/null +++ b/docs/build/reference/transformer/camelcasetokenizer.md @@ -0,0 +1,44 @@ +--- +title: "Camel case tokenizer" +description: "Tokenizes a camel case string. That is it splits strings between a lower case character and an upper case character." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Camel case tokenizer + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[camelCaseString]` + +* Returns: + + → `[camel, Case, String]` + + +--- +#### Example 2: + +* Input values: + 1. `[nocamelcase]` + +* Returns: + + → `[nocamelcase]` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/capitalize.md b/docs/build/reference/transformer/capitalize.md new file mode 100644 index 000000000..b3883467d --- /dev/null +++ b/docs/build/reference/transformer/capitalize.md @@ -0,0 +1,58 @@ +--- +title: "Capitalize" +description: "Capitalizes the string i.e. converts the first character to upper case. If 'allWords' is set to true, all words are capitalized and not only the first character." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Capitalize + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *allWords*: `false` + +* Input values: + 1. `[capitalize me]` + +* Returns: + + → `[Capitalize me]` + + +--- +#### Example 2: + +* Parameters + * *allWords*: `true` + +* Input values: + 1. `[capitalize me]` + +* Returns: + + → `[Capitalize Me]` + + + + +## Parameter + +### All words + +No description + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/cmem-plugin-jq-transform.md b/docs/build/reference/transformer/cmem-plugin-jq-transform.md new file mode 100644 index 000000000..9a96e4104 --- /dev/null +++ b/docs/build/reference/transformer/cmem-plugin-jq-transform.md @@ -0,0 +1,98 @@ +--- +title: "jq" +description: "Process a JSON path with a jq filter / program." +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# jq + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +> [jq](https://jqlang.org/) is like sed for JSON data - you can use it to +> slice and filter and map and transform structured data with the same ease that sed, awk, +> grep and friends let you play with text. + +In order to test jq expressions, you can use [play.jqlang.org](https://play.jqlang.org/). + +## Basic concepts: + +- Filters separated by a comma will produce multiple independent outputs: `,` +- Will ignores error if the type is unexpected: `?` +- Array construction: `[]` +- Object construction: `{}` +- Concatenate or Add: `+` +- Difference of sets or Substract: `-` +- Size of selected element: `length` +- Pipes are used to chain commands in a similar fashion than bash: `|` + +## Dealing with JSON objects + +- Display all keys: `keys` +- Adds + 1 to all items: `map_values(.+1)` +- Delete a key: `del(.foo)` +- Convert an object to array: `to_entries | map([.key, .value])` + +## Dealing with fields + +- Concatenate two fields: `fieldNew=.field1+' '+.field2` + +## Dealing with arrays: Slicing and Filtering + +- All: `.[]` +- First: `.[0]` +- Range: `.[2:4]` +- First 3: `.[:3]` +- Last 2: `.[-2:]` +- Before Last: `.[-2]` +- Select array of int by value: `map(select(. >= 2))` +- Select array of objects by value: `.[] | select(.id == "second")` +- Select by type: `.[] | numbers` + +Types can be `arrays`, `objects`, `iterables`, `booleans`, `numbers`, `normals`, + `finites`, `strings`, `nulls`, `values` and `scalars`. + +## Dealing with arrays: Mapping and Transforming + +- Add + 1 to all items: `map(.+1)` +- Delete 2 items: `del(.[1, 2])` +- Concatenate arrays: `add` +- Flatten an array: `flatten` +- Create a range of numbers: `[range(2;4)]` +- Display the type of each item: `map(type)` +- Sort an array of basic type: `sort` +- Sort an array of objects: `sort_by(.foo)` +- Group by a key - opposite to flatten: `group_by(.foo)` +- Minimum value of an array: `min` (see also `max`, `min_by(path_exp)`, `max_by(path_exp)`) +- Remove duplicates: `unique` or `unique_by(.foo)` or `unique_by(length)` +- Reverse an array: `reverse` + + + +## Parameter + +### jq Expression + +The jq program to apply to the input JSON string. + +- Datatype: `string` +- Default Value: `.` + + + +### Output list with one item as string + + + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/transformer/cmem-plugin-number-conversion.md b/docs/build/reference/transformer/cmem-plugin-number-conversion.md new file mode 100644 index 000000000..05c3dda71 --- /dev/null +++ b/docs/build/reference/transformer/cmem-plugin-number-conversion.md @@ -0,0 +1,41 @@ +--- +title: "Convert Number Base" +description: "Convert numbers between different number bases (binary, octal, decimal, hexadecimal)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# Convert Number Base + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +Transform plugin allows users to easily convert numbers + from one base to another. With support for binary, octal, decimal, and hexadecimal, + users can choose the source and target bases to suit their needs. + +## Parameter + +### Source Base + +Source Number Base + +- Datatype: `string` +- Default Value: `None` + + + +### Target Base + +Source Number Base + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/cmem-plugin-ulid.md b/docs/build/reference/transformer/cmem-plugin-ulid.md new file mode 100644 index 000000000..98eabd02a --- /dev/null +++ b/docs/build/reference/transformer/cmem-plugin-ulid.md @@ -0,0 +1,55 @@ +--- +title: "ULID" +description: "Generate ULID strings - Universally Unique Lexicographically Sortable Identifiers." +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# ULID + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +ULID is a proposed identifier scheme, which produces time-based, random +and sortable strings. The following features are highlighted +[in the specification](https://github.com/ulid/spec): + +- 128-bit compatibility with UUID +- 1.21e+24 unique ULIDs per millisecond +- Lexicographically sortable! +- Canonically encoded as a 26 character string, as opposed to the 36 character UUID +- Uses Crockford's base32 for better efficiency and readability (5 bits per character) +- Case insensitive +- No special characters (URL safe) +- Monotonic sort order (correctly detects and handles the same millisecond) + +This transform plugin allows for creation of ULID based identifiers (plain or URN). +It does not support any input entities. + + +## Parameter + +### Number of Values + +Number of values to generate per entity. + +- Datatype: `Long` +- Default Value: `1` + + + +### Generate URNs + +Generate 'urn:x-ulid:*' strings. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/cmem_plugin_currencies-transform.md b/docs/build/reference/transformer/cmem_plugin_currencies-transform.md new file mode 100644 index 000000000..293348991 --- /dev/null +++ b/docs/build/reference/transformer/cmem_plugin_currencies-transform.md @@ -0,0 +1,93 @@ +--- +title: "Convert currency values" +description: "Converts currencies values with current and historical exchange rates" +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# Convert currency values + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +This transform plugin allows you to convert currencies from one currency to another. +It uses the Euro foreign exchange reference rates from the +[European Central Bank](https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/index.en.html) +to first convert a currency value to EUR (if needed) and then to another currency. + +The plugin contains a data dump which starts with data from 1999-01-04 +(and ends the day before it was downloaded) see the +[change log](https://github.com/eccenca/cmem-plugin-currencies/blob/main/CHANGELOG.md) +for more details. It will use the [frankfurter.app](https://www.frankfurter.app/docs/) +API to receive rates from dates which are not part of the data dump. +This API will throw an error for future days and returns data from the last trading +day for dates where it has no data. +The API reference rates are usually updated at around 16:00 CET every working day +(so you get different rates before and after 16:00 CET in case you ask for TODAYs rates). + +The plugins can work with up-to 4 inputs: + +1. Input: The values which you want to convert. +1. Input: The currency code of your values. If this is not `EUR`, + the plugin will first convert your value to `EUR`. +1. Input: The date from when you want to use the exchange rate. +1. Input: The target currency code. + +For the inputs 2-4, you can define static options as well. +In addition to that, there is a debug switch which outputs more background data than +just the plain values. + +Here is an example of the plugin in action: +![cmem-plugin-currencies Example](https://raw.githubusercontent.com/eccenca/cmem-plugin-currencies/main/README.png) + +The +[following currency codes](https://github.com/eccenca/cmem-plugin-currencies/blob/cf2ee5332ad5243da8c70ade1ed8f4612f48ba33/cmem_plugin_currencies/eurofxref-hist.csv#L1) +can be used with the plugin. +Please be aware that not all of the rates are available for all dates +(e.g. after 2022-03-01 there is no RUB rate available anymore). + + +## Parameter + +### 1. Source Currency + +The currency code you want to convert from (e.g. USD). + +- Datatype: `string` +- Default Value: `USD` + + + +### 2. Date + +Set date (e.g.YYYY-MM-DD) to convert currencies based on historic rates. + +- Datatype: `string` +- Default Value: `2025-08-12` + + + +### 3. Target Currency + +Enter the currency code you want to convert to (e.g.USD). + +- Datatype: `string` +- Default Value: `EUR` + + + +### Debug Output + +Instead of plain values, output additional background information. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID1.md b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID1.md new file mode 100644 index 000000000..cfeeaff69 --- /dev/null +++ b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID1.md @@ -0,0 +1,43 @@ +--- +title: "UUID1" +description: "Generate a UUIDv1 from a host ID, sequence number, and the current time" +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# UUID1 + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +UUIDv1 is generated from a host ID, sequence number, and the current +time. + + + +## Parameter + +### Node (default: hardware address) + +Node value in the form "01:23:45:67:89:AB", 01-23-45-67-89-AB", or "0123456789AB". If not given, it is attempted to obtain the hardware address. If this is unsuccessful, a random 48-bit number is chosen. + +- Datatype: `string` +- Default Value: `None` + + + +### Clock sequence (default: random) + +If clock sequence is given, it is used as the sequence number. Otherwise a random 14-bit sequence number is chosen. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID1ToUUID6.md b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID1ToUUID6.md new file mode 100644 index 000000000..95e3c98b5 --- /dev/null +++ b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID1ToUUID6.md @@ -0,0 +1,27 @@ +--- +title: "UUID1 to UUID6" +description: "Generate UUIDv6 from a UUIDv1." +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# UUID1 to UUID6 + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +UUIDv6 is a field-compatible version of UUIDv1, reordered for +improved DB locality. It is expected that UUIDv6 will primarily be +used in contexts where there are existing v1 UUIDs. Systems that do +not involve legacy UUIDv1 SHOULD consider using UUIDv7 instead. + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID3.md b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID3.md new file mode 100644 index 000000000..ce4b28902 --- /dev/null +++ b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID3.md @@ -0,0 +1,40 @@ +--- +title: "UUID3" +description: "Generate a UUIDv3" +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# UUID3 + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +UUID3 is based on the MD5 hash of a namespace identifier (which + is a UUID) and a name (which is a string). + +## Parameter + +### Namespace + +The namespace. + +- Datatype: `string` +- Default Value: `None` + + + +### Namespace as UUID + +Applies only if none of the pre-defined namespaces is selected. If enabled, the namespace string needs to be a valid UUID. Otherwise, the namespace UUID is a UUIDv1 derived from the MD5 hash of the namespace string. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID4.md b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID4.md new file mode 100644 index 000000000..995b18e97 --- /dev/null +++ b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID4.md @@ -0,0 +1,22 @@ +--- +title: "UUID4" +description: "Generate a random UUIDv4." +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# UUID4 + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +UUIDv4 specifies a random UUID. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID5.md b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID5.md new file mode 100644 index 000000000..a941ca6ab --- /dev/null +++ b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID5.md @@ -0,0 +1,40 @@ +--- +title: "UUID5" +description: "Generate a UUIDv5" +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# UUID5 + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +UUID5 is based on the SHA1 hash of a namespace identifier (which + is a UUID) and a name (which is a string). + +## Parameter + +### Namespace + +If 'namespace' is not given, the input string is used. + +- Datatype: `string` +- Default Value: `None` + + + +### Namespace as UUID + +Applies only if none of the pre-defined namespaces is selected. If enabled, the namespace string needs to be a valid UUID. Otherwise, the namespace UUID is a UUIDv1 derived from the SHA1 hash of the namespace string. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID6.md b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID6.md new file mode 100644 index 000000000..6824ca34e --- /dev/null +++ b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID6.md @@ -0,0 +1,47 @@ +--- +title: "UUID6" +description: "Generate a UUIDv6 from a host ID, sequence number, and the current time" +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# UUID6 + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + + +UUIDv6 is generated from a host ID, sequence number, and the current +time. + +UUIDv6 is a field-compatible version of UUIDv1, reordered for +improved DB locality. It is expected that UUIDv6 will primarily be +used in contexts where there are existing v1 UUIDs. Systems that do +not involve legacy UUIDv1 SHOULD consider using UUIDv7 instead. + + +## Parameter + +### Node (default: hardware address) + +Node value in the form "01:23:45:67:89:AB", 01-23-45-67-89-AB", or "0123456789AB". If not given, a random 48-bit number is chosen. + +- Datatype: `string` +- Default Value: `None` + + + +### Clock sequence (default: random) + +If clock sequence is given, it is used as the sequence number. Otherwise a random 14-bit number is chosen. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID7.md b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID7.md new file mode 100644 index 000000000..9df88025c --- /dev/null +++ b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID7.md @@ -0,0 +1,29 @@ +--- +title: "UUID7" +description: "Generate a UUIDv7 from a random number, and the current time." +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# UUID7 + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +UUIDv7 features a time-ordered value field derived from the +widely implemented and well known Unix Epoch timestamp source, the +number of milliseconds since midnight 1 Jan 1970 UTC, leap seconds +excluded. As well as improved entropy characteristics over versions +1 or 6. +Implementations SHOULD utilize UUIDv7 over UUIDv1 and +6 if possible. + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID8.md b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID8.md new file mode 100644 index 000000000..62b41866c --- /dev/null +++ b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUID8.md @@ -0,0 +1,26 @@ +--- +title: "UUID8" +description: "Generate a UUIDv8 from a random number, and the current time." +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# UUID8 + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +UUIDv8 features a time-ordered value field derived from the +widely implemented and well known Unix Epoch timestamp source, the +number of nanoseconds since midnight 1 Jan 1970 UTC, leap seconds +excluded. + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUIDConvert.md b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUIDConvert.md new file mode 100644 index 000000000..91d70e575 --- /dev/null +++ b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUIDConvert.md @@ -0,0 +1,44 @@ +--- +title: "UUID Convert" +description: "Convert a UUID string representation" +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# UUID Convert + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +Convert a UUID string with 32 hexadecimal digits to a 16-byte + string containing the six integer fields in big-endian byte order, a 16-byte string + the six integer fields in little-endian byte order, a 32-character lowercase + hexadecimal string, a 128-bit integer, or a URN. Strings in the correct format, + however, the log will show a warning if the input does not comply with the standard + specified in RFC 4122 and the proposed updates + +## Parameter + +### From + +Input string format + +- Datatype: `string` +- Default Value: `uuid_hex` + + + +### To + +Output string format + +- Datatype: `string` +- Default Value: `hex` + + + diff --git a/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUIDVersion.md b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUIDVersion.md new file mode 100644 index 000000000..a43f8e19d --- /dev/null +++ b/docs/build/reference/transformer/cmem_plugin_uuid-plugin_uuid-UUIDVersion.md @@ -0,0 +1,22 @@ +--- +title: "UUID Version" +description: "Outputs UUID version number of input" +icon: octicons/cross-reference-24 +tags: + - TransformOperator + - PythonPlugin +--- +# UUID Version + + +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). + +Input: UUID string, output: UUID version number of input. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/coalesce.md b/docs/build/reference/transformer/coalesce.md new file mode 100644 index 000000000..3ea31952e --- /dev/null +++ b/docs/build/reference/transformer/coalesce.md @@ -0,0 +1,93 @@ +--- +title: "Coalesce (first non-empty input)" +description: "Forwards the first non-empty input, i.e. for which any value(s) exist. A single empty string is considered a value." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Coalesce (first non-empty input) + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[]` + 2. `[]` + 3. `[]` + +* Returns: + + → `[]` + + +--- +#### Example 2: + +* Input values: + 1. `[]` + 2. `[]` + +* Returns: + + → `[]` + + +--- +#### Example 3: + +* Returns: + + → `[]` + + +--- +#### Example 4: + +* Input values: + 1. `[]` + 2. `[first]` + 3. `[second]` + +* Returns: + + → `[first]` + + +--- +#### Example 5: + +* Input values: + 1. `[]` + 2. `[first A, first B]` + 3. `[second]` + +* Returns: + + → `[first A, first B]` + + +--- +#### Example 6: + +* Input values: + 1. `[first]` + 2. `[second]` + +* Returns: + + → `[first]` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/compareDates.md b/docs/build/reference/transformer/compareDates.md new file mode 100644 index 000000000..814a16975 --- /dev/null +++ b/docs/build/reference/transformer/compareDates.md @@ -0,0 +1,120 @@ +--- +title: "Compare dates" +description: "Compares two dates. Returns 1 if the comparison yields true and 0 otherwise. If there are multiple dates in both sets, the comparator must be true for all dates. For instance, {2014-08-02,2014-08-03} < {2014-08-03} yields 0 as not all dates in the first set are smaller than in the second." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Compare dates + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *comparator*: `<` + +* Input values: + 1. `[2017-01-01]` + 2. `[2017-01-02]` + +* Returns: + + → `[1]` + + +--- +#### Example 2: + +* Parameters + * *comparator*: `<` + +* Input values: + 1. `[2017-01-02]` + 2. `[2017-01-01]` + +* Returns: + + → `[0]` + + +--- +#### Example 3: + +* Parameters + * *comparator*: `>` + +* Input values: + 1. `[2017-01-02]` + 2. `[2017-01-01]` + +* Returns: + + → `[1]` + + +--- +#### Example 4: + +* Parameters + * *comparator*: `>` + +* Input values: + 1. `[2017-01-01]` + 2. `[2017-01-02]` + +* Returns: + + → `[0]` + + +--- +#### Example 5: + +* Parameters + * *comparator*: `=` + +* Input values: + 1. `[2017-01-01]` + 2. `[2017-01-01]` + +* Returns: + + → `[1]` + + +--- +#### Example 6: + +* Parameters + * *comparator*: `=` + +* Input values: + 1. `[2017-01-02]` + 2. `[2017-01-01]` + +* Returns: + + → `[0]` + + + + +## Parameter + +### Comparator + +No description + +- Datatype: `enumeration` +- Default Value: `<` + + + diff --git a/docs/build/reference/transformer/compareNumbers.md b/docs/build/reference/transformer/compareNumbers.md new file mode 100644 index 000000000..f4fb99af9 --- /dev/null +++ b/docs/build/reference/transformer/compareNumbers.md @@ -0,0 +1,25 @@ +--- +title: "Compare numbers" +description: "Compares the numbers of two sets. Returns 1 if the comparison yields true and 0 otherwise. If there are multiple numbers in both sets, the comparator must be true for all numbers. For instance, {1,2} < {2,3} yields 0 as not all numbers in the first set are smaller than in the second." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Compare numbers + + + + +Compares the numbers of two sets. Returns 1 if the comparison yields true and 0 otherwise. If there are multiple numbers in both sets, the comparator must be true for all numbers. For instance, {1,2} < {2,3} yields 0 as not all numbers in the first set are smaller than in the second. + +## Parameter + +### Comparator + +No description + +- Datatype: `enumeration` +- Default Value: `<` + + + diff --git a/docs/build/reference/transformer/concat.md b/docs/build/reference/transformer/concat.md new file mode 100644 index 000000000..19c023997 --- /dev/null +++ b/docs/build/reference/transformer/concat.md @@ -0,0 +1,180 @@ +--- +title: "Concatenate" +description: "Concatenates strings from multiple inputs." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Concatenate + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Returns: + + → `[]` + + +--- +#### Example 2: + +* Input values: + 1. `[a]` + +* Returns: + + → `[a]` + + +--- +#### Example 3: + +* Input values: + 1. `[a]` + 2. `[b]` + +* Returns: + + → `[ab]` + + +--- +#### Example 4: + +* Parameters + * *glue*: `-` + +* Input values: + 1. `[First]` + 2. `[Last]` + +* Returns: + + → `[First-Last]` + + +--- +#### Example 5: + +* Parameters + * *glue*: `-` + +* Input values: + 1. `[First]` + 2. `[Second, Third]` + +* Returns: + + → `[First-Second, First-Third]` + + +--- +#### Example 6: + +* Parameters + * *glue*: `-` + +* Input values: + 1. `[First]` + 2. `[]` + 3. `[Second]` + +* Returns: + + → `[First--Second]` + + +--- +#### Example 7: + +* Parameters + * *glue*: `-` + +* Input values: + 1. `[First]` + 2. `[]` + 3. `[Second]` + +* Returns: + + → `[]` + + +--- +#### Example 8: + +* Parameters + * *glue*: `-` + * *missingValuesAsEmptyStrings*: `true` + +* Input values: + 1. `[First]` + 2. `[]` + 3. `[Second]` + +* Returns: + + → `[First--Second]` + + +--- +#### Example 9: + +* Parameters + * *glue*: `\n` + +* Input values: + 1. `[First]` + 2. `[Second]` + +* Returns: + + → `[First +Second]` + + +--- +#### Example 10: + +* Parameters + * *glue*: `\t\\\a` + +* Input values: + 1. `[First]` + 2. `[Second]` + +* Returns: + + → `[First \\aSecond]` + + + + +## Parameter + +### Glue + +Separator to be inserted between two concatenated strings. The text can contain escaped characters \n, \t and \\ that are replaced by a newline, tab or backslash respectively. + +- Datatype: `string` +- Default Value: `None` + + + +### Missing values as empty strings + +Handle missing values as empty strings. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/concatMultiValues.md b/docs/build/reference/transformer/concatMultiValues.md new file mode 100644 index 000000000..03d50f46e --- /dev/null +++ b/docs/build/reference/transformer/concatMultiValues.md @@ -0,0 +1,112 @@ +--- +title: "Concatenate multiple values" +description: "Concatenates multiple values received for an input. If applied to multiple inputs, yields at most one value per input. Optionally removes duplicate values." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Concatenate multiple values + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Returns: + + → `[]` + + +--- +#### Example 2: + +* Input values: + 1. `[a]` + +* Returns: + + → `[a]` + + +--- +#### Example 3: + +* Input values: + 1. `[a, b]` + +* Returns: + + → `[ab]` + + +--- +#### Example 4: + +* Parameters + * *glue*: `x` + +* Input values: + 1. `[a, b]` + +* Returns: + + → `[axb]` + + +--- +#### Example 5: + +* Input values: + 1. `[a, b]` + 2. `[1, 2]` + +* Returns: + + → `[ab, 12]` + + +--- +#### Example 6: + +* Parameters + * *glue*: `\n\t\\` + +* Input values: + 1. `[a + \b, c]` + +* Returns: + + → `[a + \b + \c]` + + + + +## Parameter + +### Glue + +No description + +- Datatype: `string` +- Default Value: `None` + + + +### Remove duplicates + +No description + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/concatPairwise.md b/docs/build/reference/transformer/concatPairwise.md new file mode 100644 index 000000000..377fed209 --- /dev/null +++ b/docs/build/reference/transformer/concatPairwise.md @@ -0,0 +1,86 @@ +--- +title: "Concatenate pairwise" +description: "Concatenates the values of multiple inputs pairwise." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Concatenate pairwise + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Values of two inputs are concatenated pairwise: + +* Input values: + 1. `[a, b, c]` + 2. `[1, 2, 3]` + +* Returns: + + → `[a1, b2, c3]` + + +--- +#### More than two inputs are supported as well: + +* Input values: + 1. `[a, b, c]` + 2. `[1, 2, 3]` + 3. `[x, y, z]` + +* Returns: + + → `[a1x, b2y, c3z]` + + +--- +#### If one of the inputs has more values than the other, its remaining values are ignored: + +* Input values: + 1. `[a, b, c]` + 2. `[1, 2]` + +* Returns: + + → `[a1, b2]` + + +--- +#### Empty input leads to empty output: + +* Returns: + + → `[]` + + +--- +#### A single input is just forwarded: + +* Input values: + 1. `[a]` + +* Returns: + + → `[a]` + + + + +## Parameter + +### Glue + +Separator to be inserted between two concatenated strings. The text can contain escaped characters \n, \t and \\ that are replaced by a newline, tab or backslash respectively. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/constant.md b/docs/build/reference/transformer/constant.md new file mode 100644 index 000000000..67e6b63a6 --- /dev/null +++ b/docs/build/reference/transformer/constant.md @@ -0,0 +1,41 @@ +--- +title: "Constant" +description: "Generates a constant value." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Constant + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Always outputs the specified value: + +* Parameters + * *value*: `John` + +* Returns: + + → `[John]` + + + + +## Parameter + +### Value + +The constant value to be generated + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/constantUri.md b/docs/build/reference/transformer/constantUri.md new file mode 100644 index 000000000..45bd8c675 --- /dev/null +++ b/docs/build/reference/transformer/constantUri.md @@ -0,0 +1,25 @@ +--- +title: "Constant URI" +description: "Generates a constant URI." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Constant URI + + + + +Generates a constant URI. + +## Parameter + +### Value + +The constant URI to be generated + +- Datatype: `uri` +- Default Value: `owl:Class` + + + diff --git a/docs/build/reference/transformer/containsAllOf.md b/docs/build/reference/transformer/containsAllOf.md new file mode 100644 index 000000000..f028071c8 --- /dev/null +++ b/docs/build/reference/transformer/containsAllOf.md @@ -0,0 +1,106 @@ +--- +title: "Contains all of" +description: "Accepts two inputs. If the first input contains all of the second input values it returns 'true', else 'false' is returned." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Contains all of + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[A, B, C]` + 2. `[A, B]` + +* Returns: + + → `[true]` + + +--- +#### Example 2: + +* Input values: + 1. `[A, B, C]` + 2. `[A, D]` + +* Returns: + + → `[false]` + + +--- +#### Example 3: + +* Input values: + 1. `[A, B, C]` + 2. `[D]` + +* Returns: + + → `[false]` + + +--- +#### Example 4: + +* Input values: + 1. `[A, B, C]` + 2. `[A, B, C]` + +* Returns: + + → `[true]` + + +--- +#### Example 5: + +* Input values: + 1. `[A, B, C]` + 2. `[]` + +* Returns: + + → `[]` + + +--- +#### Example 6: + +* Input values: + 1. `[A]` + 2. `[A]` + 3. `[A]` + +* Returns: + + → `[]` + + +--- +#### Example 7: + +* Input values: + 1. `[A]` + +* Returns: + + → `[]` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/containsAnyOf.md b/docs/build/reference/transformer/containsAnyOf.md new file mode 100644 index 000000000..aff76e777 --- /dev/null +++ b/docs/build/reference/transformer/containsAnyOf.md @@ -0,0 +1,106 @@ +--- +title: "Contains any of" +description: "Accepts two inputs. If the first input contains any of the second input values it returns 'true', else 'false' is returned." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Contains any of + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[A, B, C]` + 2. `[A, B]` + +* Returns: + + → `[true]` + + +--- +#### Example 2: + +* Input values: + 1. `[A, B, C]` + 2. `[A, D]` + +* Returns: + + → `[true]` + + +--- +#### Example 3: + +* Input values: + 1. `[A, B, C]` + 2. `[D]` + +* Returns: + + → `[false]` + + +--- +#### Example 4: + +* Input values: + 1. `[A, B, C]` + 2. `[A, B, C]` + +* Returns: + + → `[true]` + + +--- +#### Example 5: + +* Input values: + 1. `[A, B, C]` + 2. `[]` + +* Returns: + + → `[]` + + +--- +#### Example 6: + +* Input values: + 1. `[A]` + 2. `[A]` + 3. `[A]` + +* Returns: + + → `[]` + + +--- +#### Example 7: + +* Input values: + 1. `[A]` + +* Returns: + + → `[]` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/convertCharset.md b/docs/build/reference/transformer/convertCharset.md new file mode 100644 index 000000000..233237709 --- /dev/null +++ b/docs/build/reference/transformer/convertCharset.md @@ -0,0 +1,34 @@ +--- +title: "Convert charset" +description: "Convert the string from "sourceCharset" to "targetCharset"." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Convert charset + + + + +Convert the string from "sourceCharset" to "targetCharset". + +## Parameter + +### Source charset + +No description + +- Datatype: `string` +- Default Value: `ISO-8859-1` + + + +### Target charset + +No description + +- Datatype: `string` +- Default Value: `UTF-8` + + + diff --git a/docs/build/reference/transformer/count.md b/docs/build/reference/transformer/count.md new file mode 100644 index 000000000..5fa551615 --- /dev/null +++ b/docs/build/reference/transformer/count.md @@ -0,0 +1,44 @@ +--- +title: "Count values" +description: "Counts the number of values." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Count values + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[value1]` + +* Returns: + + → `[1]` + + +--- +#### Example 2: + +* Input values: + 1. `[value1, value2]` + +* Returns: + + → `[2]` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/currentDate.md b/docs/build/reference/transformer/currentDate.md new file mode 100644 index 000000000..7f01eb50a --- /dev/null +++ b/docs/build/reference/transformer/currentDate.md @@ -0,0 +1,17 @@ +--- +title: "Current date" +description: "Outputs the current date." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Current date + + + + +Outputs the current date. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/datasetParameter.md b/docs/build/reference/transformer/datasetParameter.md new file mode 100644 index 000000000..09fdc7fdd --- /dev/null +++ b/docs/build/reference/transformer/datasetParameter.md @@ -0,0 +1,52 @@ +--- +title: "Dataset parameter" +description: "Reads a meta data parameter from a dataset in Corporate Memory. If authentication is enabled, workbench.superuser must be configured." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Dataset parameter + + + + +Reads a meta data parameter from a dataset in Corporate Memory. If authentication is enabled, workbench.superuser must be configured. + +## Parameter + +### Project + +The project of the dataset. + +- Datatype: `project` +- Default Value: `cmem` + + + +### Dataset + +The dataset the meta data parameter is read from. + +- Datatype: `task` +- Default Value: `None` + + + +### Key + +No description + +- Datatype: `string` +- Default Value: `None` + + + +### Lang + +No description + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/datetoTimestamp.md b/docs/build/reference/transformer/datetoTimestamp.md new file mode 100644 index 000000000..29c2f88e0 --- /dev/null +++ b/docs/build/reference/transformer/datetoTimestamp.md @@ -0,0 +1,77 @@ +--- +title: "Date to timestamp" +description: "Convert an xsd:dateTime to a timestamp. Returns the passed time since the Unix Epoch (1970-01-01)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Date to timestamp + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[2017-07-03T21:32:52Z]` + +* Returns: + + → `[1499117572000]` + + +--- +#### Example 2: + +* Input values: + 1. `[2017-07-03T21:32:52+01:00]` + +* Returns: + + → `[1499113972000]` + + +--- +#### Example 3: + +* Parameters + * *unit*: `seconds` + +* Input values: + 1. `[2017-07-03T21:32:52+01:00]` + +* Returns: + + → `[1499113972]` + + +--- +#### Example 4: + +* Input values: + 1. `[2017-07-03]` + +* Returns: + + → `[1499040000000]` + + + + +## Parameter + +### Unit + +No description + +- Datatype: `enumeration` +- Default Value: `milliseconds` + + + diff --git a/docs/build/reference/transformer/defaultValue.md b/docs/build/reference/transformer/defaultValue.md new file mode 100644 index 000000000..085517d96 --- /dev/null +++ b/docs/build/reference/transformer/defaultValue.md @@ -0,0 +1,55 @@ +--- +title: "Default Value" +description: "Generates a default value, if the input values are empty. Forwards any non-empty values." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Default Value + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Forwards input values: + +* Input values: + 1. `[input value]` + +* Returns: + + → `[input value]` + + +--- +#### Outputs the default value, if the inputs are empty: + +* Parameters + * *value*: `default value` + +* Input values: + 1. `[]` + +* Returns: + + → `[default value]` + + + + +## Parameter + +### Value + +The default value to be generated, if input values are empty + +- Datatype: `string` +- Default Value: `default` + + + diff --git a/docs/build/reference/transformer/duration.md b/docs/build/reference/transformer/duration.md new file mode 100644 index 000000000..47ca5847b --- /dev/null +++ b/docs/build/reference/transformer/duration.md @@ -0,0 +1,17 @@ +--- +title: "Duration" +description: "Computes the time difference between two data times." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Duration + + + + +Computes the time difference between two data times. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/durationInDays.md b/docs/build/reference/transformer/durationInDays.md new file mode 100644 index 000000000..75230b89d --- /dev/null +++ b/docs/build/reference/transformer/durationInDays.md @@ -0,0 +1,17 @@ +--- +title: "Duration in days" +description: "Converts an xsd:duration to days." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Duration in days + + + + +Converts an xsd:duration to days. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/durationInSeconds.md b/docs/build/reference/transformer/durationInSeconds.md new file mode 100644 index 000000000..4ba98304e --- /dev/null +++ b/docs/build/reference/transformer/durationInSeconds.md @@ -0,0 +1,17 @@ +--- +title: "Duration in seconds" +description: "Converts an xsd:duration to seconds." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Duration in seconds + + + + +Converts an xsd:duration to seconds. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/durationInYears.md b/docs/build/reference/transformer/durationInYears.md new file mode 100644 index 000000000..48cfaf01c --- /dev/null +++ b/docs/build/reference/transformer/durationInYears.md @@ -0,0 +1,17 @@ +--- +title: "Duration in years" +description: "Converts an xsd:duration to years." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Duration in years + + + + +Converts an xsd:duration to years. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/emptyValue.md b/docs/build/reference/transformer/emptyValue.md new file mode 100644 index 000000000..dc8876480 --- /dev/null +++ b/docs/build/reference/transformer/emptyValue.md @@ -0,0 +1,17 @@ +--- +title: "Empty value" +description: "Generates an empty value." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Empty value + + + + +Generates an empty value. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/excelMap.md b/docs/build/reference/transformer/excelMap.md new file mode 100644 index 000000000..87be5eee0 --- /dev/null +++ b/docs/build/reference/transformer/excelMap.md @@ -0,0 +1,61 @@ +--- +title: "Excel map" +description: "Replaces values based on a map of values read from a file in Open XML format (XLSX). The XLSX file may contain several sheets of the form: mapFrom,mapTo , ... and more An empty string can be created in Excel and alternatives by inserting ="" in the input line of a cell. If there are multiple values for a single key, all values will be returned for the given key. Note that the mapping table will be cached in memory. If the Excel file is updated (even while transforming), the map will be reloaded within seconds." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Excel map + + + + +Replaces values based on a map of values read from a file in Open XML format (XLSX). The XLSX file may contain several sheets of the form: mapFrom,mapTo , ... and more An empty string can be created in Excel and alternatives by inserting ="" in the input line of a cell. If there are multiple values for a single key, all values will be returned for the given key. Note that the mapping table will be cached in memory. If the Excel file is updated (even while transforming), the map will be reloaded within seconds. + +## Parameter + +### Excel file + +Excel file inside the resources directory containing one or more sheets with mapping tables. + +- Datatype: `resource` +- Default Value: `None` + + + +### Sheet name + +The sheet that contains the mapping table or empty if the first sheet should be taken. + +- Datatype: `string` +- Default Value: `None` + + + +### Skip lines + +How many rows to skip before reading the mapping table. By default the expected header row is skipped. + +- Datatype: `int` +- Default Value: `1` + + + +### Strict + +If set to true, the operator throws validation errors for values it cannot map. If set to false, the chosen conflict strategy will be applied for missing values. + +- Datatype: `boolean` +- Default Value: `true` + + + +### Conflict strategy + +Determines how values that cannot be found in the mapping table are treated. Only has an effect if 'strict' is set to false. If 'retain' is chosen, the original value will be forwarded. If 'remove' is chosen, no value will be output. + +- Datatype: `enumeration` +- Default Value: `retain` + + + diff --git a/docs/build/reference/transformer/extractPhysicalQuantity.md b/docs/build/reference/transformer/extractPhysicalQuantity.md new file mode 100644 index 000000000..39524fd0a --- /dev/null +++ b/docs/build/reference/transformer/extractPhysicalQuantity.md @@ -0,0 +1,52 @@ +--- +title: "Extract physical quantity" +description: "Extracts physical quantities, such as length or weight values. Values are expected of the form '{Number}{UnitPrefix}{Symbol}' and are converted to the base unit. Example: - Given a value '10km, 3mg'. - If the symbol parameter is set to 'm', the extracted value is 10000. - If the symbol parameter is set to 'g', the extracted value is 0.001." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Extract physical quantity + + + + +Extracts physical quantities, such as length or weight values. Values are expected of the form '{Number}{UnitPrefix}{Symbol}' and are converted to the base unit. Example: - Given a value '10km, 3mg'. - If the symbol parameter is set to 'm', the extracted value is 10000. - If the symbol parameter is set to 'g', the extracted value is 0.001. + +## Parameter + +### Symbol + +The symbol of the dimension, e.g., 'm' for meter. + +- Datatype: `string` +- Default Value: `None` + + + +### Number format + +The IETF BCP 47 language tag, e.g. 'en'. + +- Datatype: `string` +- Default Value: `en` + + + +### Filter + +Only extracts from values that contain the given regex (case-insensitive). + +- Datatype: `string` +- Default Value: `None` + + + +### Index + +If there are multiple matches, retrieve the value with the given index (zero-based). + +- Datatype: `int` +- Default Value: `0` + + + diff --git a/docs/build/reference/transformer/fileHash.md b/docs/build/reference/transformer/fileHash.md new file mode 100644 index 000000000..be8e06c44 --- /dev/null +++ b/docs/build/reference/transformer/fileHash.md @@ -0,0 +1,34 @@ +--- +title: "File hash" +description: "Calculates the hash sum of a file. The hash sum is cached so that subsequent calls to this operator are fast. Note that initially and every time the specified resource has been updated, this operator might take a long time (depending on the file size). This operator supports using different hash algorithms from the [Secure Hash Algorithms family](https://en.wikipedia.org/wiki/Secure_Hash_Algorithms) (SHA, e.g. SHA256) and two algorithms from the [Message-Digest Algorithm family](https://en.wikipedia.org/wiki/MD5) (MD2 / MD5). Please be aware that some of these algorithms are not secure regarding collision- and other attacks. Note: This transform operator ignores any inputs." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# File hash + + + + +Calculates the hash sum of a file. The hash sum is cached so that subsequent calls to this operator are fast. Note that initially and every time the specified resource has been updated, this operator might take a long time (depending on the file size). This operator supports using different hash algorithms from the [Secure Hash Algorithms family](https://en.wikipedia.org/wiki/Secure_Hash_Algorithms) (SHA, e.g. SHA256) and two algorithms from the [Message-Digest Algorithm family](https://en.wikipedia.org/wiki/MD5) (MD2 / MD5). Please be aware that some of these algorithms are not secure regarding collision- and other attacks. Note: This transform operator ignores any inputs. + +## Parameter + +### File + +File for which the hash sum will be calculated. If left empty, the file of the input dataset is used. + +- Datatype: `resource` +- Default Value: `None` + + + +### Algorithm + +The hash algorithm to be used. + +- Datatype: `string` +- Default Value: `SHA256` + + + diff --git a/docs/build/reference/transformer/filterByLength.md b/docs/build/reference/transformer/filterByLength.md new file mode 100644 index 000000000..31f557a7b --- /dev/null +++ b/docs/build/reference/transformer/filterByLength.md @@ -0,0 +1,34 @@ +--- +title: "Filter by length" +description: "Removes all strings that are shorter than 'min' characters and longer than 'max' characters." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Filter by length + + + + +Removes all strings that are shorter than 'min' characters and longer than 'max' characters. + +## Parameter + +### Min + +No description + +- Datatype: `int` +- Default Value: `0` + + + +### Max + +No description + +- Datatype: `int` +- Default Value: `2147483647` + + + diff --git a/docs/build/reference/transformer/filterByRegex.md b/docs/build/reference/transformer/filterByRegex.md new file mode 100644 index 000000000..06c0ebf5c --- /dev/null +++ b/docs/build/reference/transformer/filterByRegex.md @@ -0,0 +1,34 @@ +--- +title: "Filter by regex" +description: "Removes all strings that do NOT match a regex. If 'negate' is true, only strings will be removed that match the regex." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Filter by regex + + + + +Removes all strings that do NOT match a regex. If 'negate' is true, only strings will be removed that match the regex. + +## Parameter + +### Regex + +No description + +- Datatype: `string` +- Default Value: `None` + + + +### Negate + +No description + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/formatNumber.md b/docs/build/reference/transformer/formatNumber.md new file mode 100644 index 000000000..a8cfb867e --- /dev/null +++ b/docs/build/reference/transformer/formatNumber.md @@ -0,0 +1,152 @@ +--- +title: "Format number" +description: "Formats a number according to a user-defined pattern. The pattern syntax is documented at: https://docs.oracle.com/javase/8/docs/api/java/text/DecimalFormat.html" +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Format number + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *pattern*: `000` + +* Input values: + 1. `[1]` + +* Returns: + + → `[001]` + + +--- +#### Example 2: + +* Parameters + * *pattern*: `000000.000` + +* Input values: + 1. `[123.78]` + +* Returns: + + → `[000123.780]` + + +--- +#### Example 3: + +* Parameters + * *pattern*: `###,###.###` + +* Input values: + 1. `[123456.789]` + +* Returns: + + → `[123,456.789]` + + +--- +#### Example 4: + +* Parameters + * *pattern*: `###.###,###` + * *locale*: `de` + +* Input values: + 1. `[123456.789]` + +* Returns: + + → `[123.456,789]` + + +--- +#### Example 5: + +* Parameters + * *pattern*: `# apples` + +* Input values: + 1. `[10]` + +* Returns: + + → `[10 apples]` + + +--- +#### Example 6: + +* Parameters + * *pattern*: `000'0'` + +* Input values: + 1. `[1]` + +* Returns: + + → `[0010]` + + +--- +#### Example 7: + +* Parameters + * *pattern*: `0` + +* Input values: + 1. `[1.0]` + +* Returns: + + → `[1]` + + +--- +#### Example 8: + +* Parameters + * *pattern*: `0.0` + +* Input values: + 1. `[0000123.4]` + +* Returns: + + → `[123.4]` + + + + +## Parameter + +### Pattern + +No description + +- Datatype: `string` +- Default Value: `None` + + + +### Locale + +No description + +- Datatype: `string` +- Default Value: `en` + + + diff --git a/docs/build/reference/transformer/getValueByIndex.md b/docs/build/reference/transformer/getValueByIndex.md new file mode 100644 index 000000000..2e46f1a56 --- /dev/null +++ b/docs/build/reference/transformer/getValueByIndex.md @@ -0,0 +1,43 @@ +--- +title: "Get value by index" +description: "Returns the value found at the specified index. Fails or returns an empty result depending on failIfNoFound is set or not. Please be aware that this will work only if the data source supports some kind of ordering like XML or JSON. This is probably not a good idea to do with RDF models. If emptyStringToEmptyResult is true then instead of a result with an empty String, an empty result is returned." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Get value by index + + + + +Returns the value found at the specified index. Fails or returns an empty result depending on failIfNoFound is set or not. Please be aware that this will work only if the data source supports some kind of ordering like XML or JSON. This is probably not a good idea to do with RDF models. If emptyStringToEmptyResult is true then instead of a result with an empty String, an empty result is returned. + +## Parameter + +### Index + +No description + +- Datatype: `int` +- Default Value: `None` + + + +### Fail if not found + +No description + +- Datatype: `boolean` +- Default Value: `false` + + + +### Empty string to empty result + +No description + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/htmlCleaner.md b/docs/build/reference/transformer/htmlCleaner.md new file mode 100644 index 000000000..d7e65345c --- /dev/null +++ b/docs/build/reference/transformer/htmlCleaner.md @@ -0,0 +1,61 @@ +--- +title: "Clean HTML" +description: "Cleans HTML using a tag white list and allows selection of HTML sections with XPath or CSS selector expressions. If the tag or attribute white lists are left empty default white lists will be used (this behaviour can be changed). To remove all HTML markup and retain text, keep the defaults and turn off the "Default tags and attributes" toggle. The operator takes two inputs: the page HTML and (optional) the page Url which may be needed to resolve relative links in the page HTML." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Clean HTML + + + + +Cleans HTML using a tag white list and allows selection of HTML sections with XPath or CSS selector expressions. If the tag or attribute white lists are left empty default white lists will be used (this behaviour can be changed). To remove all HTML markup and retain text, keep the defaults and turn off the "Default tags and attributes" toggle. The operator takes two inputs: the page HTML and (optional) the page Url which may be needed to resolve relative links in the page HTML. + +## Parameter + +### Tag white list + +Tags to keep in the cleaned output. + +- Datatype: `traversable[string]` +- Default Value: `None` + + + +### Attribute white list + +Attributes to keep in the cleaned output. + +- Datatype: `traversable[string]` +- Default Value: `None` + + + +### Selectors + +CSS or XPath queries for selection of content. CSS selectors can be pipe separated for non-sequential execution. + +- Datatype: `traversable[string]` +- Default Value: `None` + + + +### Method + +Selects use of XPath or CSS selectors. + +- Datatype: `enumeration` +- Default Value: `xPath` + + + +### Default tags and attributes + +Use defaults for empty tag and attribute whitelists. If the attribute while list is empty, it will default to: "class", "id", "href", "src" If the tag while list is empty, it will default to: "a", "b", "blockquote", "br", "caption", "cite", "code", "col", "colgroup", "dd", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6","i", "img", "li", "ol", "p", "pre", "q", "small", "span", "strike", "strong","sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "u", "ul". + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/transformer/ifContains.md b/docs/build/reference/transformer/ifContains.md new file mode 100644 index 000000000..7bf5635aa --- /dev/null +++ b/docs/build/reference/transformer/ifContains.md @@ -0,0 +1,76 @@ +--- +title: "If contains" +description: "Accepts two or three inputs. If the first input contains the given value, the second input is forwarded. Otherwise, the third input is forwarded (if present)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# If contains + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *search*: `match` + +* Input values: + 1. `[matching string]` + 2. `[this is a match]` + +* Returns: + + → `[this is a match]` + + +--- +#### Example 2: + +* Parameters + * *search*: `match` + +* Input values: + 1. `[different string]` + 2. `[this is a match]` + +* Returns: + + → `[]` + + +--- +#### Example 3: + +* Parameters + * *search*: `match` + +* Input values: + 1. `[different string]` + 2. `[this is a match]` + 3. `[this is no match]` + +* Returns: + + → `[this is no match]` + + + + +## Parameter + +### Search + +No description + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/ifExists.md b/docs/build/reference/transformer/ifExists.md new file mode 100644 index 000000000..03587e90e --- /dev/null +++ b/docs/build/reference/transformer/ifExists.md @@ -0,0 +1,60 @@ +--- +title: "If exists" +description: "Accepts two or three inputs. If the first input provides a value, the second input is forwarded. Otherwise, the third input is forwarded (if present)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# If exists + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[value]` + 2. `[yes]` + 3. `[no]` + +* Returns: + + → `[yes]` + + +--- +#### Example 2: + +* Input values: + 1. `[]` + 2. `[yes]` + 3. `[no]` + +* Returns: + + → `[no]` + + +--- +#### Example 3: + +* Input values: + 1. `[value]` + 2. `[]` + +* Returns: + + → `[]` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/ifMatchesRegex.md b/docs/build/reference/transformer/ifMatchesRegex.md new file mode 100644 index 000000000..c8665b6a7 --- /dev/null +++ b/docs/build/reference/transformer/ifMatchesRegex.md @@ -0,0 +1,34 @@ +--- +title: "If matches regex" +description: "Accepts two or three inputs. If any value of the first input matches the regex, the second input is forwarded. Otherwise, the third input is forwarded (if present)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# If matches regex + + + + +Accepts two or three inputs. If any value of the first input matches the regex, the second input is forwarded. Otherwise, the third input is forwarded (if present). + +## Parameter + +### Regex + +No description + +- Datatype: `string` +- Default Value: `None` + + + +### Negate + +No description + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/index.md b/docs/build/reference/transformer/index.md new file mode 100644 index 000000000..707827ab0 --- /dev/null +++ b/docs/build/reference/transformer/index.md @@ -0,0 +1,245 @@ +--- +title: "Transformers" +icon: octicons/cross-reference-24 +tags: + - Build + - Reference +--- +# Transformers + + +Transform operators transform a one or more sequences of string values to a sequence of string values. + +**:octicons-people-24: Intended audience:** Linked Data Experts and Domain Experts + +| Name | Description | +|------------------------:| :--------- | +|[Abs](Excel_ABS.md) | Excel ABS(number): Returns the absolute value of the given number. | +|[Acos](Excel_ACOS.md) | Excel ACOS(number): Returns the inverse cosine of the given number in radians. | +|[Acosh](Excel_ACOSH.md) | Excel ACOSH(number): Returns the inverse hyperbolic cosine of the given number in radians. | +|[Aggregate numbers](aggregateNumbers.md) | Aggregates all numbers in this set using a mathematical operation. | +|[And](Excel_AND.md) | Excel AND(argument1; argument2 ...argument30): Returns TRUE if all the arguments are considered TRUE, and FALSE otherwise. | +|[Asin](Excel_ASIN.md) | Excel ASIN(number): Returns the inverse sine of the given number in radians. | +|[Asinh](Excel_ASINH.md) | Excel ASINH(number): Returns the inverse hyperbolic sine of the given number in radians. | +|[Atan](Excel_ATAN.md) | Excel ATAN(number): Returns the inverse tangent of the given number in radians. | +|[Atan2](Excel_ATAN2.md) | Excel ATAN2(number_x; number_y): Returns the inverse tangent of the specified x and y coordinates. Number_x is the value for the x coordinate. Number_y is the value for the y coordinate. | +|[Atanh](Excel_ATANH.md) | Excel ATANH(number): Returns the inverse hyperbolic tangent of the given number. (Angle is returned in radians.) | +|[Avedev](Excel_AVEDEV.md) | Excel AVEDEV(number1; number2; ... number_30): Returns the average of the absolute deviations of data points from their mean. Displays the diffusion in a data set. Number_1; number_2; ... number_30 are values or ranges that represent a sample. Each number can also be replaced by a reference. | +|[Average](Excel_AVERAGE.md) | Excel AVERAGE(number_1; number_2; ... number_30): Returns the average of the arguments. Number_1; number_2; ... number_30 are numerical values or ranges. Text is ignored. | +|[Averagea](Excel_AVERAGEA.md) | Excel AVERAGEA(value_1; value_2; ... value_30): Returns the average of the arguments. The value of a text is 0. Value_1; value_2; ... value_30 are values or ranges. | +|[Camel case](camelCase.md) | Converts a string to camel case. Upper camel case is the default, lower camel case can be chosen. | +|[Camel case tokenizer](camelcasetokenizer.md) | Tokenizes a camel case string. That is it splits strings between a lower case character and an upper case character. | +|[Capitalize](capitalize.md) | Capitalizes the string i.e. converts the first character to upper case. If 'allWords' is set to true, all words are capitalized and not only the first character. | +|[Ceiling](Excel_CEILING.md) | Excel CEILING(number; significance; mode): Rounds the given number to the nearest integer or multiple of significance. Significance is the value to whose multiple of ten the value is to be rounded up (.01, .1, 1, 10, etc.). Mode is an optional value. If it is indicated and non-zero and if the number and significance are negative, rounding up is carried out based on that value. | +|[Choose](Excel_CHOOSE.md) | Excel CHOOSE(index; value1; ... value30): Uses an index to return a value from a list of up to 30 values. Index is a reference or number between 1 and 30 indicating which value is to be taken from the list. Value1; ... value30 is the list of values entered as a reference to a cell or as individual values. | +|[Clean](Excel_CLEAN.md) | Excel CLEAN(text): Removes all non-printing characters from the string. Text refers to the text from which to remove all non-printable characters. | +|[Clean HTML](htmlCleaner.md) | Cleans HTML using a tag white list and allows selection of HTML sections with XPath or CSS selector expressions. If the tag or attribute white lists are left empty default white lists will be used (this behaviour can be changed). To remove all HTML markup and retain text, keep the defaults and turn off the "Default tags and attributes" toggle. The operator takes two inputs: the page HTML and (optional) the page Url which may be needed to resolve relative links in the page HTML. | +|[Coalesce (first non-empty input)](coalesce.md) | Forwards the first non-empty input, i.e. for which any value(s) exist. A single empty string is considered a value. | +|[Code](Excel_CODE.md) | Excel CODE(text): Returns a numeric code for the first character in a text string. Text is the text for which the code of the first character is to be found. | +|[Combin](Excel_COMBIN.md) | Excel COMBIN(count_1; count_2): Returns the number of combinations for a given number of objects. Count_1 is the total number of elements. Count_2 is the selected count from the elements. This is the same as the nCr function on a calculator. | +|[Compare dates](compareDates.md) | Compares two dates. Returns 1 if the comparison yields true and 0 otherwise. If there are multiple dates in both sets, the comparator must be true for all dates. For instance, {2014-08-02,2014-08-03} < {2014-08-03} yields 0 as not all dates in the first set are smaller than in the second. | +|[Compare numbers](compareNumbers.md) | Compares the numbers of two sets. Returns 1 if the comparison yields true and 0 otherwise. If there are multiple numbers in both sets, the comparator must be true for all numbers. For instance, {1,2} < {2,3} yields 0 as not all numbers in the first set are smaller than in the second. | +|[Concatenate](concat.md) | Concatenates strings from multiple inputs. | +|[Concatenate multiple values](concatMultiValues.md) | Concatenates multiple values received for an input. If applied to multiple inputs, yields at most one value per input. Optionally removes duplicate values. | +|[Concatenate pairwise](concatPairwise.md) | Concatenates the values of multiple inputs pairwise. | +|[Constant](constant.md) | Generates a constant value. | +|[Constant URI](constantUri.md) | Generates a constant URI. | +|[Contains all of](containsAllOf.md) | Accepts two inputs. If the first input contains all of the second input values it returns 'true', else 'false' is returned. | +|[Contains any of](containsAnyOf.md) | Accepts two inputs. If the first input contains any of the second input values it returns 'true', else 'false' is returned. | +|[Convert charset](convertCharset.md) | Convert the string from "sourceCharset" to "targetCharset". | +|[Convert currency values](cmem_plugin_currencies-transform.md) | Converts currencies values with current and historical exchange rates | +|[Convert Number Base](cmem-plugin-number-conversion.md) | Convert numbers between different number bases (binary, octal, decimal, hexadecimal). | +|[Correl](Excel_CORREL.md) | Excel CORREL(data_1; data_2): Returns the correlation coefficient between two data sets. Data_1 is the first data set. Data_2 is the second data set. | +|[Cos](Excel_COS.md) | Excel COS(number): Returns the cosine of the given number (angle in radians). | +|[Cosh](Excel_COSH.md) | Excel COSH(number): Returns the hyperbolic cosine of the given number (angle in radians). | +|[Count](Excel_COUNT.md) | Excel COUNT(value_1; value_2; ... value_30): Counts how many numbers are in the list of arguments. Text entries are ignored. Value_1; value_2; ... value_30 are values or ranges which are to be counted. | +|[Count values](count.md) | Counts the number of values. | +|[Counta](Excel_COUNTA.md) | Excel COUNTA(value_1; value_2; ... value_30): Counts how many values are in the list of arguments. Text entries are also counted, even when they contain an empty string of length 0. If an argument is an array or reference, empty cells within the array or reference are ignored. value_1; value_2; ... value_30 are up to 30 arguments representing the values to be counted. | +|[Covar](Excel_COVAR.md) | Excel COVAR(data_1; data_2): Returns the covariance of the product of paired deviations. Data_1 is the first data set. Data_2 is the second data set. | +|[Current date](currentDate.md) | Outputs the current date. | +|[Dataset parameter](datasetParameter.md) | Reads a meta data parameter from a dataset in Corporate Memory. If authentication is enabled, workbench.superuser must be configured. | +|[Date to timestamp](datetoTimestamp.md) | Convert an xsd:dateTime to a timestamp. Returns the passed time since the Unix Epoch (1970-01-01). | +|[Default Value](defaultValue.md) | Generates a default value, if the input values are empty. Forwards any non-empty values. | +|[Degrees](Excel_DEGREES.md) | Excel DEGREES(number): Converts the given number in radians to degrees. | +|[Devsq](Excel_DEVSQ.md) | Excel DEVSQ(number_1; number_2; ... number_30): Returns the sum of squares of deviations based on a sample mean. Number_1; number_2; ... number_30 are numerical values or ranges representing a sample. | +|[Duration](duration.md) | Computes the time difference between two data times. | +|[Duration in days](durationInDays.md) | Converts an xsd:duration to days. | +|[Duration in seconds](durationInSeconds.md) | Converts an xsd:duration to seconds. | +|[Duration in years](durationInYears.md) | Converts an xsd:duration to years. | +|[Empty value](emptyValue.md) | Generates an empty value. | +|[Encode URL](urlEncode.md) | URL encodes the string. | +|[Evaluate template](TemplateTransformer.md) | Evaluates a template. Input values can be addressed using the variables 'input1', 'input2', etc. Global variables are available in the 'global' scope, e.g., 'global.myVar'. | +|[Even](Excel_EVEN.md) | Excel EVEN(number): Rounds the given number up to the nearest even integer. | +|[Exact](Excel_EXACT.md) | Excel EXACT(text_1; text_2): Compares two text strings and returns TRUE if they are identical. This function is case- sensitive. Text_1 is the first text to compare. Text_2 is the second text to compare. | +|[Excel map](excelMap.md) | Replaces values based on a map of values read from a file in Open XML format (XLSX). The XLSX file may contain several sheets of the form: mapFrom,mapTo , ... and more An empty string can be created in Excel and alternatives by inserting ="" in the input line of a cell. If there are multiple values for a single key, all values will be returned for the given key. Note that the mapping table will be cached in memory. If the Excel file is updated (even while transforming), the map will be reloaded within seconds. | +|[Exp](Excel_EXP.md) | Excel EXP(number): Returns e raised to the power of the given number. | +|[Extract physical quantity](extractPhysicalQuantity.md) | Extracts physical quantities, such as length or weight values. Values are expected of the form '{Number}{UnitPrefix}{Symbol}' and are converted to the base unit. Example: - Given a value '10km, 3mg'. - If the symbol parameter is set to 'm', the extracted value is 10000. - If the symbol parameter is set to 'g', the extracted value is 0.001. | +|[Fact](Excel_FACT.md) | Excel FACT(number): Returns the factorial of the given number. | +|[False](Excel_FALSE.md) | Excel FALSE(): Set the logical value to FALSE. The FALSE() function does not require any arguments. | +|[File hash](fileHash.md) | Calculates the hash sum of a file. The hash sum is cached so that subsequent calls to this operator are fast. Note that initially and every time the specified resource has been updated, this operator might take a long time (depending on the file size). This operator supports using different hash algorithms from the [Secure Hash Algorithms family](https://en.wikipedia.org/wiki/Secure_Hash_Algorithms) (SHA, e.g. SHA256) and two algorithms from the [Message-Digest Algorithm family](https://en.wikipedia.org/wiki/MD5) (MD2 / MD5). Please be aware that some of these algorithms are not secure regarding collision- and other attacks. Note: This transform operator ignores any inputs. | +|[Filter by length](filterByLength.md) | Removes all strings that are shorter than 'min' characters and longer than 'max' characters. | +|[Filter by regex](filterByRegex.md) | Removes all strings that do NOT match a regex. If 'negate' is true, only strings will be removed that match the regex. | +|[Find](Excel_FIND.md) | Excel FIND(find_text; text; position): Looks for a string of text within another string. Where to begin the search can also be defined. The search term can be a number or any string of characters. The search is case-sensitive. Find_text is the text to be found. Text is the text where the search takes place. Position (optional) is the position in the text from which the search starts. | +|[Fix URI](uriFix.md) | Generates valid absolute URIs from the given values. Already valid absolute URIs are left untouched. | +|[Floor](Excel_FLOOR.md) | Excel FLOOR(number; significance; mode): Rounds the given number down to the nearest multiple of significance. Significance is the value to whose multiple of ten the number is to be rounded down (.01, .1, 1, 10, etc.). Mode is an optional value. If it is indicated and non-zero and if the number and significance are negative, rounding up is carried out based on that value. | +|[Forecast](Excel_FORECAST.md) | Excel FORECAST(value; data_Y; data_X): Extrapolates future values based on existing x and y values. Value is the x value, for which the y value of the linear regression is to be returned. Data_Y is the array or range of known y’s. Data_X is the array or range of known x’s. Does not work for exponential functions. | +|[Format number](formatNumber.md) | Formats a number according to a user-defined pattern. The pattern syntax is documented at: https://docs.oracle.com/javase/8/docs/api/java/text/DecimalFormat.html | +|[Fv](Excel_FV.md) | Excel FV(rate; NPER; PMT; PV; type): Returns the future value of an investment based on periodic, constant payments and a constant interest rate. Rate is the periodic interest rate. NPER is the total number of periods. PMT is the annuity paid regularly per period. PV (optional) is the present cash value of an investment. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. | +|[Geomean](Excel_GEOMEAN.md) | Excel GEOMEAN(number_1; number_2; ... number_30): Returns the geometric mean of a sample. Number_1; number_2; ... number_30 are numerical arguments or ranges that represent a random sample. | +|[Get value by index](getValueByIndex.md) | Returns the value found at the specified index. Fails or returns an empty result depending on failIfNoFound is set or not. Please be aware that this will work only if the data source supports some kind of ordering like XML or JSON. This is probably not a good idea to do with RDF models. If emptyStringToEmptyResult is true then instead of a result with an empty String, an empty result is returned. | +|[If](Excel_IF.md) | Excel IF(test; then_value; otherwise_value): Returns different values based on the test value. Note that in this implementation it will not actually evaluate logical conditions. Then_value is the value that is returned if the test is TRUE. Otherwise_value (optional) is the value that is returned if the test is FALSE. | +|[If contains](ifContains.md) | Accepts two or three inputs. If the first input contains the given value, the second input is forwarded. Otherwise, the third input is forwarded (if present). | +|[If exists](ifExists.md) | Accepts two or three inputs. If the first input provides a value, the second input is forwarded. Otherwise, the third input is forwarded (if present). | +|[If matches regex](ifMatchesRegex.md) | Accepts two or three inputs. If any value of the first input matches the regex, the second input is forwarded. Otherwise, the third input is forwarded (if present). | +|[Input file attributes](inputFileAttributes.md) | Retrieves a metadata attribute from the input file (such as the file name). | +|[Input hash](inputHash.md) | Calculates the hash sum of the input values. Generates a single hash sum for all input values combined. This operator supports using different hash algorithms from the [Secure Hash Algorithms family](https://en.wikipedia.org/wiki/Secure_Hash_Algorithms) (SHA, e.g. SHA256) and two algorithms from the [Message-Digest Algorithm family](https://en.wikipedia.org/wiki/MD5) (MD2 / MD5). Please be aware that some of these algorithms are not secure regarding collision- and other attacks. | +|[Input task attributes](inputTaskAttributes.md) | Retrieves individual attributes from the input task (such as the modified date) or the entire task as JSON. | +|[Int](Excel_INT.md) | Excel INT(number): Rounds the given number down to the nearest integer. | +|[Intercept](Excel_INTERCEPT.md) | Excel INTERCEPT(data_Y; data_X): Calculates the y-value at which a line will intersect the y-axis by using known x-values and y-values. Data_Y is the dependent set of observations or data. Data_X is the independent set of observations or data. Names, arrays or references containing numbers must be used here. Numbers can also be entered directly. | +|[Ipmt](Excel_IPMT.md) | Excel IPMT(rate; period; NPER; PV; FV; type): Calculates the periodic amortization for an investment with regular payments and a constant interest rate. Rate is the periodic interest rate. Period is the period for which the compound interest is calculated. NPER is the total number of periods during which annuity is paid. Period=NPER, if compound interest for the last period is calculated. PV is the present cash value in sequence of payments. FV (optional) is the desired value (future value) at the end of the periods. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. | +|[Irr](Excel_IRR.md) | Excel IRR(values; guess): Calculates the internal rate of return for an investment. The values represent cash flow values at regular intervals; at least one value must be negative (payments), and at least one value must be positive (income). Values is an array containing the values. Guess (optional) is the estimated value. If you can provide only a few values, you should provide an initial guess to enable the iteration. | +|[jq](cmem-plugin-jq-transform.md) | Process a JSON path with a jq filter / program. | +|[Large](Excel_LARGE.md) | Excel LARGE(data; rank_c): Returns the Rank_c-th largest value in a data set. Data is the cell range of data. Rank_c is the ranking of the value (2nd largest, 3rd largest, etc.) written as an integer. | +|[Left](Excel_LEFT.md) | Excel LEFT(text; number): Returns the first character or characters in a text string. Text is the text where the initial partial words are to be determined. Number (optional) is the number of characters for the start text. If this parameter is not defined, one character is returned. | +|[Ln](Excel_LN.md) | Excel LN(number): Returns the natural logarithm based on the constant e of the given number. | +|[Log](Excel_LOG.md) | Excel LOG(number; base): Returns the logarithm of the given number to the specified base. Base is the base for the logarithm calculation. | +|[Log10](Excel_LOG10.md) | Excel LOG10(number): Returns the base-10 logarithm of the given number. | +|[Logarithm](log.md) | Transforms all numbers by applying the logarithm function. Non-numeric values are left unchanged. | +|[Lower case](lowerCase.md) | Converts a string to lower case. | +|[Map](map.md) | Replaces values based on a map of values. | +|[Map with default](mapWithDefaultInput.md) | Takes two inputs. Tries to map the first input based on the map of values parameter config. If the input value is not found in the map, it takes the value of the second input. The indexes of the mapped value and the default value match. If there are less default values than values to map, the last default value is replicated to match the count. | +|[Max](Excel_MAX.md) | Excel MAX(number_1; number_2; ... number_30): Returns the maximum value in a list of arguments. Number_1; number_2; ... number_30 are numerical values or ranges. | +|[Maxa](Excel_MAXA.md) | Excel MAXA(value_1; value_2; ... value_30): Returns the maximum value in a list of arguments. Unlike MAX, text can be entered. The value of the text is 0. Value_1; value_2; ... value_30 are values or ranges. | +|[Median](Excel_MEDIAN.md) | Excel MEDIAN(number_1; number_2; ... number_30): Returns the median of a set of numbers. Number_1; number_2; ... number_30 are values or ranges, which represent a sample. Each number can also be replaced by a reference. | +|[Merge](merge.md) | Merges the values of all inputs. | +|[Metaphone](metaphone.md) | Metaphone phonetic encoding. | +|[Mid](Excel_MID.md) | Excel MID(text; start; number): Returns a text segment of a character string. The parameters specify the starting position and the number of characters. Text is the text containing the characters to extract. Start is the position of the first character in the text to extract. Number is the number of characters in the part of the text. | +|[Min](Excel_MIN.md) | Excel MIN(number_1; number_2; ... number_30): Returns the minimum value in a list of arguments. Number_1; number_2; ... number_30 are numerical values or ranges. | +|[Mina](Excel_MINA.md) | Excel MINA(value_1; value_2; ... value_30): Returns the minimum value in a list of arguments. Here text can also be entered. The value of the text is 0. Value_1; value_2; ... value_30 are values or ranges. | +|[Mirr](Excel_MIRR.md) | Excel MIRR(values; investment; reinvest_rate): Calculates the modified internal rate of return of a series of investments. Values corresponds to the array or the cell reference for cells whose content corresponds to the payments. Investment is the rate of interest of the investments (the negative values of the array) Reinvest_rate is the rate of interest of the reinvestment (the positive values of the array). | +|[Mod](Excel_MOD.md) | Excel MOD(dividend; divisor): Returns the remainder after a number is divided by a divisor. Dividend is the number which will be divided by the divisor. Divisor is the number by which to divide the dividend. | +|[Mode](Excel_MODE.md) | Excel MODE(number_1; number_2; ... number_30): Returns the most common value in a data set. Number_1; number_2; ... number_30 are numerical values or ranges. If several values have the same frequency, it returns the smallest value. An error occurs when a value does not appear twice. | +|[Negate binary (NOT)](negateTransformer.md) | Accepts one input, which is either 'true', '1' or 'false', '0' and negates it. | +|[Normalize chars](normalizeChars.md) | Replaces diacritical characters with non-diacritical ones (eg, ö -> o), plus some specialities like transforming æ -> ae, ß -> ss. | +|[Normalize physical quantity](PhysicalQuantitiesNormalizer.md) | Normalizes physical quantities. Can either convert to a configured unit or to SI base units. For instance for lengths, values will be converted to metres if no target unit is configured. Will output the pure numeric value without the unit. If one input is provided, the physical quantities are parsed from the provided strings of the form "1 km". If two inputs are provided, the numeric values are parsed from the first input and the units are parsed from the second inputs. | +|[Normdist](Excel_NORMDIST.md) | Excel NORMDIST(number; mean; STDEV; C): Returns the normal distribution for the given Number in the distribution. Mean is the mean value of the distribution. STDEV is the standard deviation of the distribution. C = 0 calculates the density function, and C = 1 calculates the distribution. | +|[Norminv](Excel_NORMINV.md) | Excel NORMINV(number; mean; STDEV): Returns the inverse of the normal distribution for the given Number in the distribution. Mean is the mean value in the normal distribution. STDEV is the standard deviation of the normal distribution. | +|[Normsdist](Excel_NORMSDIST.md) | Excel NORMSDIST(number): Returns the standard normal cumulative distribution for the given Number. | +|[Normsinv](Excel_NORMSINV.md) | Excel NORMSINV(number): Returns the inverse of the standard normal distribution for the given Number, a probability value. | +|[Not](Excel_NOT.md) | Excel NOT(logical_value): Reverses the logical value. Logical_value is any value to be reversed. | +|[Nper](Excel_NPER.md) | Excel NPER(rate; PMT; PV; FV; type): Returns the number of periods for an investment based on periodic, constant payments and a constant interest rate. Rate is the periodic interest rate. PMT is the constant annuity paid in each period. PV is the present value (cash value) in a sequence of payments. FV (optional) is the future value, which is reached at the end of the last period. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. | +|[Npv](Excel_NPV.md) | Excel NPV(Rate; value_1; value_2; ... value_30): Returns the net present value of an investment based on a series of periodic cash flows and a discount rate. Rate is the discount rate for a period. Value_1; value_2;... value_30 are values representing deposits or withdrawals. | +|[Number to duration](numberToDuration.md) | Converts a number to an xsd:duration. | +|[Numeric operation](numOperation.md) | Applies a numeric operation to the values of multiple input operators. Uses double-precision floating-point numbers for computation. | +|[Numeric reduce](numReduce.md) | Strip all non-numeric characters from a string. | +|[NYSIIS](NYSIIS.md) | NYSIIS phonetic encoding. | +|[Odd](Excel_ODD.md) | Excel ODD(number): Rounds the given number up to the nearest odd integer. | +|[Or](Excel_OR.md) | Excel OR(logical_value_1; logical_value_2; ...logical_value_30): Returns TRUE if at least one argument is TRUE. Returns the value FALSE if all the arguments have the logical value FALSE. Logical_value_1; logical_value_2; ...logical_value_30 are conditions to be checked. All conditions can be either TRUE or FALSE. If a range is entered as a parameter, the function uses the value from the range that is in the current column or row. | +|[Parse date](DateTypeParser.md) | Parses and normalizes dates in different formats. | +|[Parse date pattern](parseDate.md) | Parses a date based on a specified pattern, returning an xsd:date. | +|[Parse float](FloatTypeParser.md) | Parses and normalizes float values. | +|[Parse geo coordinate](GeoCoordinateParser.md) | Parses and normalizes geo coordinates. | +|[Parse geo location](GeoLocationParser.md) | Parses and normalizes geo locations like continents, countries, states and cities. | +|[Parse integer](IntegerParser.md) | Parses integer values. | +|[Parse ISIN](IsinParser.md) | Parses International Securities Identification Numbers (ISIN) values and fails if the String is no valid ISIN. | +|[Parse SKOS term](SkosTypeParser.md) | Parses values from a SKOS ontology. | +|[Parse string](StringParser.md) | Parses string values, basically an identity function. | +|[Pearson](Excel_PEARSON.md) | Excel PEARSON(data_1; data_2): Returns the Pearson product moment correlation coefficient r. Data_1 is the array of the first data set. Data_2 is the array of the second data set. | +|[Percentile](Excel_PERCENTILE.md) | Excel PERCENTILE(data; alpha): Returns the alpha-percentile of data values in an array. Data is the array of data. Alpha is the percentage of the scale between 0 and 1. | +|[Percentrank](Excel_PERCENTRANK.md) | Excel PERCENTRANK(data; value): Returns the percentage rank (percentile) of the given value in a sample. Data is the array of data in the sample. | +|[Pi](Excel_PI.md) | Excel PI(): Returns the value of PI to fourteen decimal places. | +|[Pmt](Excel_PMT.md) | Excel PMT(rate; NPER; PV; FV; type): Returns the periodic payment for an annuity with constant interest rates. Rate is the periodic interest rate. NPER is the number of periods in which annuity is paid. PV is the present value (cash value) in a sequence of payments. FV (optional) is the desired value (future value) to be reached at the end of the periodic payments. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. | +|[Poisson](Excel_POISSON.md) | Excel POISSON(number; mean; C): Returns the Poisson distribution for the given Number. Mean is the middle value of the Poisson distribution. C = 0 calculates the density function, and C = 1 calculates the distribution. | +|[Power](Excel_POWER.md) | Excel POWER(base; power): Returns the result of a number raised to a power. Base is the number that is to be raised to the given power. Power is the exponent by which the base is to be raised. | +|[Ppmt](Excel_PPMT.md) | Excel PPMT(rate; period; NPER; PV; FV; type): Returns for a given period the payment on the principal for an investment that is based on periodic and constant payments and a constant interest rate. Rate is the periodic interest rate. Period is the amortization period. NPER is the total number of periods during which annuity is paid. PV is the present value in the sequence of payments. FV (optional) is the desired (future) value. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. | +|[Product](Excel_PRODUCT.md) | Excel PRODUCT(number 1 to 30): Multiplies all the numbers given as arguments and returns the product. Number 1 to number 30 are up to 30 arguments whose product is to be calculated, separated by semi-colons. | +|[Proper](Excel_PROPER.md) | Excel PROPER(text): Capitalizes the first letter in all words of a text string. Text is the text to be converted. | +|[Pv](Excel_PV.md) | Excel PV(rate; NPER; PMT; FV; type): Returns the present value of an investment resulting from a series of regular payments. Rate defines the interest rate per period. NPER is the total number of payment periods. PMT is the regular payment made per period. FV (optional) defines the future value remaining after the final installment has been made. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. | +|[Radians](Excel_RADIANS.md) | Excel RADIANS(number): Converts the given number in degrees to radians. | +|[Rand](Excel_RAND.md) | Excel RAND(): Returns a random number between 0 and 1. | +|[Random number](randomNumber.md) | Generates a set of random numbers. | +|[Rank](Excel_RANK.md) | Excel RANK(value; data; type): Returns the rank of the given Value in a sample. Data is the array or range of data in the sample. Type (optional) is the sequence order, either ascending (0) or descending (1). | +|[Rate](Excel_RATE.md) | Excel RATE(NPER; PMT; PV; FV; type; guess): Returns the constant interest rate per period of an annuity. NPER is the total number of periods, during which payments are made (payment period). PMT is the constant payment (annuity) paid during each period. PV is the cash value in the sequence of payments. FV (optional) is the future value, which is reached at the end of the periodic payments. Type (optional) defines whether the payment is due at the beginning (1) or the end (0) of a period. Guess (optional) determines the estimated value of the interest with iterative calculation. | +|[Read parameter](readParameter.md) | Reads a parameter from a Java Properties file. | +|[Regex extract](regexExtract.md) | Extracts occurrences of a regex "regex" in a string. If there is at least one capture group, it will return the string of the first capture group instead. | +|[Regex replace](regexReplace.md) | Replace all occurrences of a regex "regex" with "replace" in a string. | +|[Regex selection](regexSelect.md) | This transformer takes 3 inputs. The first input should have exactly one value that should be passed out again untouched. The second input has at least two Regex values - two in order to make sense. The third input should have exactly one value which is checked against the regexes. The result of the transformer is a sequence with the same length of number of regexes. For the output value (of the first input) is set to each position in this sequence where the related regex also matched. If `oneOnly` is true only the position of the **first** matching regex will be set to the output value. | +|[Remove blanks](removeBlanks.md) | Remove whitespace from a string. | +|[Remove duplicates](removeDuplicates.md) | Removes duplicated values, making a value sequence distinct. | +|[Remove empty values](removeEmptyValues.md) | Removes empty values. | +|[Remove parentheses](removeParentheses.md) | Remove all parentheses including their content, e.g., transforms 'Berlin (City)' -> 'Berlin'. | +|[Remove special chars](removeSpecialChars.md) | Remove special characters (including punctuation) from a string. | +|[Remove stopwords](removeStopwords.md) | Removes stopwords from all values. Each line in the stopword list contains a stopword. The separator defines a regex that is used for detecting words. | +|[Remove stopwords (remote stopword list)](removeRemoteStopwords.md) | Removes stopwords from all values. The stopword list is retrieved via a http connection (e.g. https://sites.google.com/site/kevinbouge/stopwords-lists/stopwords_de.txt). Each line in the stopword list contains a stopword. The separator defines a regex that is used for detecting words. | +|[Remove values](removeValues.md) | Removes values that contain words from a blacklist. The blacklist values are separated with commas. | +|[Replace](replace.md) | Replace all occurrences of a string "search" with "replace" in a string. | +|[Replace](Excel_REPLACE.md) | Excel REPLACE(text; position; length; new_text): Replaces part of a text string with a different text string. This function can be used to replace both characters and numbers (which are automatically converted to text). The result of the function is always displayed as text. To perform further calculations with a number which has been replaced by text, convert it back to a number using the VALUE function. Any text containing numbers must be enclosed in quotation marks so it is not interpreted as a number and automatically converted to text. Text is text of which a part will be replaced. Position is the position within the text where the replacement will begin. Length is the number of characters in text to be replaced. New_text is the text which replaces text.. | +|[Rept](Excel_REPT.md) | Excel REPT(text; number): Repeats a character string by the given number of copies. Text is the text to be repeated. Number is the number of repetitions. The result can be a maximum of 255 characters. | +|[Retrieve coordinates](RetrieveCoordinates.md) | Retrieves geographic coordinates using Nominatim. | +|[Retrieve latitude](RetrieveLatitude.md) | Retrieves geographic coordinates using Nominatim and returns the latitude. | +|[Retrieve longitude](RetrieveLongitude.md) | Retrieves geographic coordinates using Nominatim and returns the longitude. | +|[Right](Excel_RIGHT.md) | Excel RIGHT(text; number): Defines the last character or characters in a text string. Text is the text of which the right part is to be determined. Number (optional) is the number of characters from the right part of the text. | +|[Roman](Excel_ROMAN.md) | Excel ROMAN(number; mode): Converts a number into a Roman numeral. The value range must be between 0 and 3999; the modes can be integers from 0 to 4. Number is the number that is to be converted into a Roman numeral. Mode (optional) indicates the degree of simplification. The higher the value, the greater is the simplification of the Roman numeral. | +|[Round](Excel_ROUND.md) | Excel ROUND(number; count): Rounds the given number to a certain number of decimal places according to valid mathematical criteria. Count (optional) is the number of the places to which the value is to be rounded. If the count parameter is negative, only the whole number portion is rounded. It is rounded to the place indicated by the count. | +|[Rounddown](Excel_ROUNDDOWN.md) | Excel ROUNDDOWN(number; count): Rounds the given number. Count (optional) is the number of digits to be rounded down to. If the count parameter is negative, only the whole number portion is rounded. It is rounded to the place indicated by the count. | +|[Roundup](Excel_ROUNDUP.md) | Excel ROUNDUP(number; count): Rounds the given number up. Count (optional) is the number of digits to which rounding up is to be done. If the count parameter is negative, only the whole number portion is rounded. It is rounded to the place indicated by the count. | +|[Search](Excel_SEARCH.md) | Excel SEARCH(find_text; text; position): Returns the position of a text segment within a character string. The start of the search can be set as an option. The search text can be a number or any sequence of characters. The search is not case-sensitive. The search supports regular expressions. Find_text is the text to be searched for. Text is the text where the search will take place. Position (optional) is the position in the text where the search is to start. | +|[Sequence values to indexes](toSequenceIndex.md) | Transforms the sequence of values to their respective indexes in the sequence. Example: - ("a", "b", "c") becomes (0, 1, 2) If there is more than one input, the values are numbered from the first input on and continued for the next inputs. Applied against an RDF source the order might not be deterministic. | +|[Sign](Excel_SIGN.md) | Excel SIGN(number): Returns the sign of the given number. The function returns the result 1 for a positive sign, – 1 for a negative sign, and 0 for zero. | +|[Sin](Excel_SIN.md) | Excel SIN(number): Returns the sine of the given number (angle in radians). | +|[Sinh](Excel_SINH.md) | Excel SINH(number): Returns the hyperbolic sine of the given number (angle in radians). | +|[Slope](Excel_SLOPE.md) | Excel SLOPE(data_Y; data_X): Returns the slope of the linear regression line. Data_Y is the array or matrix of Y data. Data_X is the array or matrix of X data. | +|[Small](Excel_SMALL.md) | Excel SMALL(data; rank_c): Returns the Rank_c-th smallest value in a data set. Data is the cell range of data. Rank_c is the rank of the value (2nd smallest, 3rd smallest, etc.) written as an integer. | +|[Sort](sort.md) | Sorts values lexicographically. | +|[Sort words](sortWords.md) | Sorts all words in each value lexicographically. | +|[Soundex](soundex.md) | Soundex algorithm. | +|[Sqrt](Excel_SQRT.md) | Excel SQRT(number): Returns the positive square root of the given number. The value of the number must be positive. | +|[Standardize](Excel_STANDARDIZE.md) | Excel STANDARDIZE(number; mean; STDEV): Converts a random variable to a normalized value. Number is the value to be standardized. Mean is the arithmetic mean of the distribution. STDEV is the standard deviation of the distribution. | +|[Stdev](Excel_STDEV.md) | Excel STDEV(number_1; number_2; ... number_30): Estimates the standard deviation based on a sample. Number_1; number_2; ... number_30 are numerical values or ranges representing a sample based on an entire population. | +|[Stdeva](Excel_STDEVA.md) | Excel STDEVA(value_1; value_2; ... value_30): Calculates the standard deviation of an estimation based on a sample. Value_1; value_2; ... value_30 are values or ranges representing a sample derived from an entire population. Text has the value 0. | +|[Stdevp](Excel_STDEVP.md) | Excel STDEVP(number_1; number_2; ... number_30): Calculates the standard deviation based on the entire population. Number_1; number_2; ... number_30 are numerical values or ranges representing a sample based on an entire population. | +|[Stdevpa](Excel_STDEVPA.md) | Excel STDEVPA(value_1; value_2; ... value_30): Calculates the standard deviation based on the entire population. Value_1; value_2; ... value_30 are values or ranges representing a sample derived from an entire population. Text has the value 0. | +|[Stem](stem.md) | Stems a string using the Porter Stemmer. | +|[Strip non-alphabetic characters](alphaReduce.md) | Strips all non-alphabetic characters from a string. Spaces are retained. | +|[Strip postfix](stripPostfix.md) | Strips a postfix of a string. | +|[Strip prefix](stripPrefix.md) | Strips a prefix of a string. | +|[Strip URI prefix](stripUriPrefix.md) | Strips the URI prefix and decodes the remainder. Leaves values unchanged which are not a valid URI. | +|[Substitute](Excel_SUBSTITUTE.md) | Excel SUBSTITUTE(text; search_text; new text; occurrence): Substitutes new text for old text in a string. Text is the text in which text segments are to be exchanged. Search_text is the text segment that is to be replaced (a number of times). New text is the text that is to replace the text segment. Occurrence (optional) indicates how many occurrences of the search text are to be replaced. If this parameter is missing, the search text is replaced throughout. | +|[Substring](substring.md) | Returns a substring between 'beginIndex' (inclusive) and 'endIndex' (exclusive). If 'endIndex' is 0 (default), it is ignored and the entire remaining string starting with 'beginIndex' is returned. If 'endIndex' is negative, -endIndex characters are removed from the end. | +|[Sum](Excel_SUM.md) | Excel SUM(number_1; number_2; ... number_30): Adds all the numbers in a range of cells. Number_1; number_2;... number_30 are up to 30 arguments whose sum is to be calculated. You can also enter a range using cell references. | +|[Sumproduct](Excel_SUMPRODUCT.md) | Excel SUMPRODUCT(array 1; array 2; ...array 30): Multiplies corresponding elements in the given arrays, and returns the sum of those products. Array 1; array 2;...array 30 are arrays whose corresponding elements are to be multiplied. At least one array must be part of the argument list. If only one array is given, all array elements are summed. | +|[Sumsq](Excel_SUMSQ.md) | Excel SUMSQ(number_1; number_2; ... number_30): Calculates the sum of the squares of numbers (totaling up of the squares of the arguments) Number_1; number_2;... number_30 are up to 30 arguments, the sum of whose squares is to be calculated. | +|[Sumx2my2](Excel_SUMX2MY2.md) | Excel SUMX2MY2(array_X; array_Y): Returns the sum of the difference of squares of corresponding values in two arrays. Array_X is the first array whose elements are to be squared and added. Array_Y is the second array whose elements are to be squared and subtracted. | +|[Sumx2py2](Excel_SUMX2PY2.md) | Excel SUMX2PY2(array_X; array_Y): Returns the sum of the sum of squares of corresponding values in two arrays. Array_X is the first array whose arguments are to be squared and added. Array_Y is the second array, whose elements are to be added and squared. | +|[Sumxmy2](Excel_SUMXMY2.md) | Excel SUMXMY2(array_X; array_Y): Adds the squares of the variance between corresponding values in two arrays. Array_X is the first array whose elements are to be subtracted and squared. Array_Y is the second array, whose elements are to be subtracted and squared. | +|[Tan](Excel_TAN.md) | Excel TAN(number): Returns the tangent of the given number (angle in radians). | +|[Tanh](Excel_TANH.md) | Excel TANH(number): Returns the hyperbolic tangent of the given number (angle in radians). | +|[Tdist](Excel_TDIST.md) | Excel TDIST(number; degrees_freedom; mode): Returns the t-distribution for the given Number. Degrees_freedom is the number of degrees of freedom for the t-distribution. Mode = 1 returns the one-tailed test, Mode = 2 returns the two-tailed test. | +|[Timestamp to date](timeToDate.md) | Convert a timestamp to xsd:date format. Expects an integer that denotes the passed time since the Unix Epoch (1970-01-01) | +|[Tokenize](tokenize.md) | Tokenizes all input values. | +|[Trim](trim.md) | Remove leading and trailing whitespaces. | +|[True](Excel_TRUE.md) | Excel TRUE(): Sets the logical value to TRUE. The TRUE() function does not require any arguments. | +|[Trunc](Excel_TRUNC.md) | Excel TRUNC(number; count): Truncates a number to an integer by removing the fractional part of the number according to the precision specified in Tools > Options > OpenOffice.org Calc > Calculate. Number is the number whose decimal places are to be cut off. Count is the number of decimal places which are not cut off. | +|[ULID](cmem-plugin-ulid.md) | Generate ULID strings - Universally Unique Lexicographically Sortable Identifiers. | +|[Until character](untilCharacter.md) | Extracts the substring until the character given. | +|[Upper case](upperCase.md) | Converts a string to upper case. | +|[UUID](uuid.md) | Generates UUIDs. If no input value is provided, a random UUID (type 4) is generated using a cryptographically strong pseudo random number generator. If input values are provided, a name-based UUID (type 3) is generated for each input value. Each input value will generate a separate UUID. For building a UUID from multiple inputs, the Concatenate operator can be used. | +|[UUID Convert](cmem_plugin_uuid-plugin_uuid-UUIDConvert.md) | Convert a UUID string representation | +|[UUID Version](cmem_plugin_uuid-plugin_uuid-UUIDVersion.md) | Outputs UUID version number of input | +|[UUID1](cmem_plugin_uuid-plugin_uuid-UUID1.md) | Generate a UUIDv1 from a host ID, sequence number, and the current time | +|[UUID1 to UUID6](cmem_plugin_uuid-plugin_uuid-UUID1ToUUID6.md) | Generate UUIDv6 from a UUIDv1. | +|[UUID3](cmem_plugin_uuid-plugin_uuid-UUID3.md) | Generate a UUIDv3 | +|[UUID4](cmem_plugin_uuid-plugin_uuid-UUID4.md) | Generate a random UUIDv4. | +|[UUID5](cmem_plugin_uuid-plugin_uuid-UUID5.md) | Generate a UUIDv5 | +|[UUID6](cmem_plugin_uuid-plugin_uuid-UUID6.md) | Generate a UUIDv6 from a host ID, sequence number, and the current time | +|[UUID7](cmem_plugin_uuid-plugin_uuid-UUID7.md) | Generate a UUIDv7 from a random number, and the current time. | +|[UUID8](cmem_plugin_uuid-plugin_uuid-UUID8.md) | Generate a UUIDv8 from a random number, and the current time. | +|[Validate date after](validateDateAfter.md) | Validates if the first input date is after the second input date. Outputs the first input if the validation is successful. | +|[Validate date range](validateDateRange.md) | Validates if dates are within a specified range. | +|[Validate number of values](validateNumberOfValues.md) | Validates that the number of values lies in a specified range. | +|[Validate numeric range](validateNumericRange.md) | Validates if a number is within a specified range. | +|[Validate regex](validateRegex.md) | Validates if all values match a regular expression. | +|[Var](Excel_VAR.md) | Excel VAR(number_1; number_2; ... number_30): Estimates the variance based on a sample. Number_1; number_2; ... number_30 are numerical values or ranges representing a sample based on an entire population. | +|[Vara](Excel_VARA.md) | Excel VARA(value_1; value_2; ... value_30): Estimates a variance based on a sample. The value of text is 0. Value_1; value_2; ... value_30 are values or ranges representing a sample derived from an entire population. Text has the value 0. | +|[Varp](Excel_VARP.md) | Excel VARP(Number_1; number_2; ... number_30): Calculates a variance based on the entire population. Number_1; number_2; ... number_30 are numerical values or ranges representing an entire population. | +|[Varpa](Excel_VARPA.md) | Excel VARPA(value_1; value_2; .. .value_30): Calculates the variance based on the entire population. The value of text is 0. Value_1; value_2; ... value_30 are values or ranges representing an entire population. | diff --git a/docs/build/reference/transformer/inputFileAttributes.md b/docs/build/reference/transformer/inputFileAttributes.md new file mode 100644 index 000000000..b294bf7bd --- /dev/null +++ b/docs/build/reference/transformer/inputFileAttributes.md @@ -0,0 +1,25 @@ +--- +title: "Input file attributes" +description: "Retrieves a metadata attribute from the input file (such as the file name)." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Input file attributes + + + + +Retrieves a metadata attribute from the input file (such as the file name). + +## Parameter + +### Attribute + +File attribute to be retrieved from the input dataset. + +- Datatype: `enumeration` +- Default Value: `name` + + + diff --git a/docs/build/reference/transformer/inputHash.md b/docs/build/reference/transformer/inputHash.md new file mode 100644 index 000000000..d4ec242f0 --- /dev/null +++ b/docs/build/reference/transformer/inputHash.md @@ -0,0 +1,41 @@ +--- +title: "Input hash" +description: "Calculates the hash sum of the input values. Generates a single hash sum for all input values combined. This operator supports using different hash algorithms from the [Secure Hash Algorithms family](https://en.wikipedia.org/wiki/Secure_Hash_Algorithms) (SHA, e.g. SHA256) and two algorithms from the [Message-Digest Algorithm family](https://en.wikipedia.org/wiki/MD5) (MD2 / MD5). Please be aware that some of these algorithms are not secure regarding collision- and other attacks." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Input hash + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[input value]` + +* Returns: + + → `[f708c2afff0ed197e8551c4dd549ee5b848e0b407106cbdb8e451c8cd1479362]` + + + + +## Parameter + +### Algorithm + +The hash algorithm to be used. + +- Datatype: `string` +- Default Value: `SHA256` + + + diff --git a/docs/build/reference/transformer/inputTaskAttributes.md b/docs/build/reference/transformer/inputTaskAttributes.md new file mode 100644 index 000000000..166671939 --- /dev/null +++ b/docs/build/reference/transformer/inputTaskAttributes.md @@ -0,0 +1,25 @@ +--- +title: "Input task attributes" +description: "Retrieves individual attributes from the input task (such as the modified date) or the entire task as JSON." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Input task attributes + + + + +Retrieves individual attributes from the input task (such as the modified date) or the entire task as JSON. + +## Parameter + +### Path + +Path to retrieve from the JSON, such as 'metadata/modified'. If left empty, the entire JSON will be returned. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/log.md b/docs/build/reference/transformer/log.md new file mode 100644 index 000000000..ae7c0b037 --- /dev/null +++ b/docs/build/reference/transformer/log.md @@ -0,0 +1,25 @@ +--- +title: "Logarithm" +description: "Transforms all numbers by applying the logarithm function. Non-numeric values are left unchanged." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Logarithm + + + + +Transforms all numbers by applying the logarithm function. Non-numeric values are left unchanged. + +## Parameter + +### Base + +No description + +- Datatype: `int` +- Default Value: `10` + + + diff --git a/docs/build/reference/transformer/lowerCase.md b/docs/build/reference/transformer/lowerCase.md new file mode 100644 index 000000000..46e1d248d --- /dev/null +++ b/docs/build/reference/transformer/lowerCase.md @@ -0,0 +1,33 @@ +--- +title: "Lower case" +description: "Converts a string to lower case." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Lower case + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Transforms all values to lower case: + +* Input values: + 1. `[JoHN, LeNA]` + +* Returns: + + → `[john, lena]` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/map.md b/docs/build/reference/transformer/map.md new file mode 100644 index 000000000..8dec75f1b --- /dev/null +++ b/docs/build/reference/transformer/map.md @@ -0,0 +1,69 @@ +--- +title: "Map" +description: "Replaces values based on a map of values." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Map + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *map*: `Key1:Value1,Key2:Value2` + * *default*: `Undefined` + +* Input values: + 1. `[Key1]` + +* Returns: + + → `[Value1]` + + +--- +#### Example 2: + +* Parameters + * *map*: `Key1:Value1,Key2:Value2` + * *default*: `Undefined` + +* Input values: + 1. `[Key1X]` + +* Returns: + + → `[Undefined]` + + + + +## Parameter + +### Map + +A map of values + +- Datatype: `stringmap` +- Default Value: `None` + + + +### Default + +Default if the map defines no value + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/mapWithDefaultInput.md b/docs/build/reference/transformer/mapWithDefaultInput.md new file mode 100644 index 000000000..a540b5768 --- /dev/null +++ b/docs/build/reference/transformer/mapWithDefaultInput.md @@ -0,0 +1,25 @@ +--- +title: "Map with default" +description: "Takes two inputs. Tries to map the first input based on the map of values parameter config. If the input value is not found in the map, it takes the value of the second input. The indexes of the mapped value and the default value match. If there are less default values than values to map, the last default value is replicated to match the count." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Map with default + + + + +Takes two inputs. Tries to map the first input based on the map of values parameter config. If the input value is not found in the map, it takes the value of the second input. The indexes of the mapped value and the default value match. If there are less default values than values to map, the last default value is replicated to match the count. + +## Parameter + +### Map + +A map of values + +- Datatype: `stringmap` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/merge.md b/docs/build/reference/transformer/merge.md new file mode 100644 index 000000000..3f7a025a3 --- /dev/null +++ b/docs/build/reference/transformer/merge.md @@ -0,0 +1,42 @@ +--- +title: "Merge" +description: "Merges the values of all inputs." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Merge + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Returns: + + → `[]` + + +--- +#### Example 2: + +* Input values: + 1. `[a, b]` + 2. `[c]` + +* Returns: + + → `[a, b, c]` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/metaphone.md b/docs/build/reference/transformer/metaphone.md new file mode 100644 index 000000000..2c0499236 --- /dev/null +++ b/docs/build/reference/transformer/metaphone.md @@ -0,0 +1,17 @@ +--- +title: "Metaphone" +description: "Metaphone phonetic encoding." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Metaphone + + + + +Metaphone phonetic encoding. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/negateTransformer.md b/docs/build/reference/transformer/negateTransformer.md new file mode 100644 index 000000000..e0d5d4f88 --- /dev/null +++ b/docs/build/reference/transformer/negateTransformer.md @@ -0,0 +1,55 @@ +--- +title: "Negate binary (NOT)" +description: "Accepts one input, which is either 'true', '1' or 'false', '0' and negates it." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Negate binary (NOT) + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[0, 1, false, true, False, True]` + +* Returns: + + → `[1, 0, true, false, true, false]` + + +--- +#### Example 2: + +* Input values: + 1. `[falsee, true]` + +* Returns: + + → `[]` + + +--- +#### Example 3: + +* Input values: + 1. `[]` + +* Returns: + + → `[]` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/normalizeChars.md b/docs/build/reference/transformer/normalizeChars.md new file mode 100644 index 000000000..c60fdac6f --- /dev/null +++ b/docs/build/reference/transformer/normalizeChars.md @@ -0,0 +1,17 @@ +--- +title: "Normalize chars" +description: "Replaces diacritical characters with non-diacritical ones (eg, ö -> o), plus some specialities like transforming æ -> ae, ß -> ss." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Normalize chars + + + + +Replaces diacritical characters with non-diacritical ones (eg, ö -> o), plus some specialities like transforming æ -> ae, ß -> ss. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/numOperation.md b/docs/build/reference/transformer/numOperation.md new file mode 100644 index 000000000..0bc78d1bb --- /dev/null +++ b/docs/build/reference/transformer/numOperation.md @@ -0,0 +1,135 @@ +--- +title: "Numeric operation" +description: "Applies a numeric operation to the values of multiple input operators. Uses double-precision floating-point numbers for computation." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Numeric operation + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *operator*: `+` + +* Input values: + 1. `[1]` + 2. `[1]` + +* Returns: + + → `[2.0]` + + +--- +#### Example 2: + +* Parameters + * *operator*: `-` + +* Input values: + 1. `[1]` + 2. `[1]` + +* Returns: + + → `[0.0]` + + +--- +#### Example 3: + +* Parameters + * *operator*: `*` + +* Input values: + 1. `[5]` + 2. `[6]` + +* Returns: + + → `[30.0]` + + +--- +#### Example 4: + +* Parameters + * *operator*: `/` + +* Input values: + 1. `[5]` + 2. `[2]` + +* Returns: + + → `[2.5]` + + +--- +#### Example 5: + +* Parameters + * *operator*: `+` + +* Input values: + 1. `[1]` + 2. `[no number]` + +* Returns: + + → `[]` + + +--- +#### Example 6: + +* Parameters + * *operator*: `*` + +* Input values: + 1. `[1]` + 2. `[]` + +* Returns: + + → `[1.0]` + + +--- +#### Example 7: + +* Parameters + * *operator*: `+` + +* Input values: + 1. `[1, 1]` + 2. `[1]` + +* Returns: + + → `[3.0]` + + + + +## Parameter + +### Operator + +The operator to be applied to all values. One of '+', '-', '*', '/' + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/numReduce.md b/docs/build/reference/transformer/numReduce.md new file mode 100644 index 000000000..739aa8639 --- /dev/null +++ b/docs/build/reference/transformer/numReduce.md @@ -0,0 +1,58 @@ +--- +title: "Numeric reduce" +description: "Strip all non-numeric characters from a string." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Numeric reduce + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *keepPunctuation*: `false` + +* Input values: + 1. `[some1.2Value]` + +* Returns: + + → `[12]` + + +--- +#### Example 2: + +* Parameters + * *keepPunctuation*: `true` + +* Input values: + 1. `[some1.2Value]` + +* Returns: + + → `[1.2]` + + + + +## Parameter + +### Keep punctuation + +No description + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/transformer/numberToDuration.md b/docs/build/reference/transformer/numberToDuration.md new file mode 100644 index 000000000..b1b84d6e7 --- /dev/null +++ b/docs/build/reference/transformer/numberToDuration.md @@ -0,0 +1,25 @@ +--- +title: "Number to duration" +description: "Converts a number to an xsd:duration." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Number to duration + + + + +Converts a number to an xsd:duration. + +## Parameter + +### Unit + +No description + +- Datatype: `enumeration` +- Default Value: `day` + + + diff --git a/docs/build/reference/transformer/parseDate.md b/docs/build/reference/transformer/parseDate.md new file mode 100644 index 000000000..ef79d3bf8 --- /dev/null +++ b/docs/build/reference/transformer/parseDate.md @@ -0,0 +1,150 @@ +--- +title: "Parse date pattern" +description: "Parses a date based on a specified pattern, returning an xsd:date." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Parse date pattern + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *format*: `dd.MM.yyyy` + +* Input values: + 1. `[03.04.2015]` + +* Returns: + + → `[2015-04-03]` + + +--- +#### Example 2: + +* Parameters + * *format*: `dd.MM.yyyy` + +* Input values: + 1. `[3.4.2015]` + +* Returns: + + → `[2015-04-03]` + + +--- +#### Example 3: + +* Parameters + * *format*: `yyyyMMdd` + +* Input values: + 1. `[20150403]` + +* Returns: + + → `[2015-04-03]` + + +--- +#### Example 4: + +* Parameters + * *format*: `MMM yyyy` + * *locale*: `en` + +* Input values: + 1. `[May 2024]` + +* Returns: + + → `[2024-05-01]` + + +--- +#### Example 5: + +* Parameters + * *format*: `MMM yyyy` + * *locale*: `de` + +* Input values: + 1. `[Mai 2024]` + +* Returns: + + → `[2024-05-01]` + + +--- +#### Example 6: + +* Parameters + * *format*: `MMM yyyy` + * *locale*: `de` + +* Input values: + 1. `[May 2024]` + +* Returns: + + → `[]` + + +--- +#### Example 7: + +* Parameters + * *format*: `yyyyMMdd` + * *lenient*: `false` + +* Input values: + 1. `[20150000]` + +* Returns: + + → `[]` + + + + +## Parameter + +### Format + +The date pattern used to parse the input values + +- Datatype: `string` +- Default Value: `dd-MM-yyyy` + + + +### Lenient + +If set to true, the parser tries to use heuristics to parse dates with invalid fields (such as a day of zero). + +- Datatype: `boolean` +- Default Value: `false` + + + +### Locale + +Optional locale for the date format. If not set the system's locale will be used. + +- Datatype: `option[locale]` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/randomNumber.md b/docs/build/reference/transformer/randomNumber.md new file mode 100644 index 000000000..4c805b1df --- /dev/null +++ b/docs/build/reference/transformer/randomNumber.md @@ -0,0 +1,52 @@ +--- +title: "Random number" +description: "Generates a set of random numbers." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Random number + + + + +Generates a set of random numbers. + +## Parameter + +### Min + +The smallest number that could be generated. + +- Datatype: `double` +- Default Value: `0.0` + + + +### Max + +The largest number that could be generated. + +- Datatype: `double` +- Default Value: `100.0` + + + +### Min count + +The minimum number of values to generate in each set. + +- Datatype: `int` +- Default Value: `1` + + + +### Max count + +The maximum number of values to generate in each set. + +- Datatype: `int` +- Default Value: `1` + + + diff --git a/docs/build/reference/transformer/readParameter.md b/docs/build/reference/transformer/readParameter.md new file mode 100644 index 000000000..a41c72f31 --- /dev/null +++ b/docs/build/reference/transformer/readParameter.md @@ -0,0 +1,34 @@ +--- +title: "Read parameter" +description: "Reads a parameter from a Java Properties file." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Read parameter + + + + +Reads a parameter from a Java Properties file. + +## Parameter + +### Resource + +The Java properties file to read the parameter from. + +- Datatype: `resource` +- Default Value: `None` + + + +### Parameter + +The name of the parameter. + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/regexExtract.md b/docs/build/reference/transformer/regexExtract.md new file mode 100644 index 000000000..4c31a3252 --- /dev/null +++ b/docs/build/reference/transformer/regexExtract.md @@ -0,0 +1,110 @@ +--- +title: "Regex extract" +description: "Extracts occurrences of a regex "regex" in a string. If there is at least one capture group, it will return the string of the first capture group instead." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Regex extract + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### returns the first match: + +* Parameters + * *regex*: `[a-z]{2,4}123` + +* Input values: + 1. `[afe123_abc123]` + +* Returns: + + → `[afe123]` + + +--- +#### returns all matches, if extractAll = true: + +* Parameters + * *regex*: `[a-z]{2,4}123` + * *extractAll*: `true` + +* Input values: + 1. `[afe123_abc123]` + +* Returns: + + → `[afe123, abc123]` + + +--- +#### returns an empty list if nothing matches: + +* Parameters + * *regex*: `^[a-z]{2,4}123` + +* Input values: + 1. `[abcdef123]` + +* Returns: + + → `[]` + + +--- +#### returns the match of the first capture group that matches: + +* Parameters + * *regex*: `^([a-z]{2,4})123([a-z]+)` + +* Input values: + 1. `[abcd123xyz]` + +* Returns: + + → `[abcd]` + + +--- +#### Example 5: + +* Parameters + * *regex*: `"bedeutungen"\s*:\s*\[\s*(?:"([^"]*)"(?:\s*,\s*"([^"]*)")*)*\s*\]` + +* Input values: + 1. `["bedeutungen" : [ ]]` + +* Returns: + + → `[]` + + + + +## Parameter + +### Regex + +Regular expression + +- Datatype: `string` +- Default Value: `None` + + + +### Extract all + +If true, all matches are extracted. If false, only the first match is extracted. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/regexReplace.md b/docs/build/reference/transformer/regexReplace.md new file mode 100644 index 000000000..2a48ebaf0 --- /dev/null +++ b/docs/build/reference/transformer/regexReplace.md @@ -0,0 +1,34 @@ +--- +title: "Regex replace" +description: "Replace all occurrences of a regex "regex" with "replace" in a string." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Regex replace + + + + +Replace all occurrences of a regex "regex" with "replace" in a string. + +## Parameter + +### Regex + +The regular expression to search for + +- Datatype: `string` +- Default Value: `None` + + + +### Replace + +The string that will replace each match + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/regexSelect.md b/docs/build/reference/transformer/regexSelect.md new file mode 100644 index 000000000..5f85aa108 --- /dev/null +++ b/docs/build/reference/transformer/regexSelect.md @@ -0,0 +1,25 @@ +--- +title: "Regex selection" +description: "This transformer takes 3 inputs. The first input should have exactly one value that should be passed out again untouched. The second input has at least two Regex values - two in order to make sense. The third input should have exactly one value which is checked against the regexes. The result of the transformer is a sequence with the same length of number of regexes. For the output value (of the first input) is set to each position in this sequence where the related regex also matched. If `oneOnly` is true only the position of the **first** matching regex will be set to the output value." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Regex selection + + + + +This transformer takes 3 inputs. The first input should have exactly one value that should be passed out again untouched. The second input has at least two Regex values - two in order to make sense. The third input should have exactly one value which is checked against the regexes. The result of the transformer is a sequence with the same length of number of regexes. For the output value (of the first input) is set to each position in this sequence where the related regex also matched. If `oneOnly` is true only the position of the **first** matching regex will be set to the output value. + +## Parameter + +### One only + +No description + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/removeBlanks.md b/docs/build/reference/transformer/removeBlanks.md new file mode 100644 index 000000000..37ce21449 --- /dev/null +++ b/docs/build/reference/transformer/removeBlanks.md @@ -0,0 +1,17 @@ +--- +title: "Remove blanks" +description: "Remove whitespace from a string." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Remove blanks + + + + +Remove whitespace from a string. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/removeDuplicates.md b/docs/build/reference/transformer/removeDuplicates.md new file mode 100644 index 000000000..3d7b61e99 --- /dev/null +++ b/docs/build/reference/transformer/removeDuplicates.md @@ -0,0 +1,17 @@ +--- +title: "Remove duplicates" +description: "Removes duplicated values, making a value sequence distinct." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Remove duplicates + + + + +Removes duplicated values, making a value sequence distinct. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/removeEmptyValues.md b/docs/build/reference/transformer/removeEmptyValues.md new file mode 100644 index 000000000..5b334c6bb --- /dev/null +++ b/docs/build/reference/transformer/removeEmptyValues.md @@ -0,0 +1,44 @@ +--- +title: "Remove empty values" +description: "Removes empty values." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Remove empty values + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[value1, , value2]` + +* Returns: + + → `[value1, value2]` + + +--- +#### Example 2: + +* Input values: + 1. `[, ]` + +* Returns: + + → `[]` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/removeParentheses.md b/docs/build/reference/transformer/removeParentheses.md new file mode 100644 index 000000000..66560f563 --- /dev/null +++ b/docs/build/reference/transformer/removeParentheses.md @@ -0,0 +1,17 @@ +--- +title: "Remove parentheses" +description: "Remove all parentheses including their content, e.g., transforms 'Berlin (City)' -> 'Berlin'." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Remove parentheses + + + + +Remove all parentheses including their content, e.g., transforms 'Berlin (City)' -> 'Berlin'. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/removeRemoteStopwords.md b/docs/build/reference/transformer/removeRemoteStopwords.md new file mode 100644 index 000000000..c4e55b146 --- /dev/null +++ b/docs/build/reference/transformer/removeRemoteStopwords.md @@ -0,0 +1,34 @@ +--- +title: "Remove stopwords (remote stopword list)" +description: "Removes stopwords from all values. The stopword list is retrieved via a http connection (e.g. https://sites.google.com/site/kevinbouge/stopwords-lists/stopwords_de.txt). Each line in the stopword list contains a stopword. The separator defines a regex that is used for detecting words." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Remove stopwords (remote stopword list) + + + + +Removes stopwords from all values. The stopword list is retrieved via a http connection (e.g. https://sites.google.com/site/kevinbouge/stopwords-lists/stopwords_de.txt). Each line in the stopword list contains a stopword. The separator defines a regex that is used for detecting words. + +## Parameter + +### Stop word list url + +No description + +- Datatype: `string` +- Default Value: `None` + + + +### Separator + +No description + +- Datatype: `string` +- Default Value: `[\s-]+` + + + diff --git a/docs/build/reference/transformer/removeSpecialChars.md b/docs/build/reference/transformer/removeSpecialChars.md new file mode 100644 index 000000000..6ac5099b5 --- /dev/null +++ b/docs/build/reference/transformer/removeSpecialChars.md @@ -0,0 +1,17 @@ +--- +title: "Remove special chars" +description: "Remove special characters (including punctuation) from a string." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Remove special chars + + + + +Remove special characters (including punctuation) from a string. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/removeStopwords.md b/docs/build/reference/transformer/removeStopwords.md new file mode 100644 index 000000000..87ef4a200 --- /dev/null +++ b/docs/build/reference/transformer/removeStopwords.md @@ -0,0 +1,34 @@ +--- +title: "Remove stopwords" +description: "Removes stopwords from all values. Each line in the stopword list contains a stopword. The separator defines a regex that is used for detecting words." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Remove stopwords + + + + +Removes stopwords from all values. Each line in the stopword list contains a stopword. The separator defines a regex that is used for detecting words. + +## Parameter + +### Stopword list + +No description + +- Datatype: `resource` +- Default Value: `None` + + + +### Separator + +No description + +- Datatype: `string` +- Default Value: `[\s-]+` + + + diff --git a/docs/build/reference/transformer/removeValues.md b/docs/build/reference/transformer/removeValues.md new file mode 100644 index 000000000..e826eff92 --- /dev/null +++ b/docs/build/reference/transformer/removeValues.md @@ -0,0 +1,25 @@ +--- +title: "Remove values" +description: "Removes values that contain words from a blacklist. The blacklist values are separated with commas." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Remove values + + + + +Removes values that contain words from a blacklist. The blacklist values are separated with commas. + +## Parameter + +### Blacklist + +No description + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/replace.md b/docs/build/reference/transformer/replace.md new file mode 100644 index 000000000..47af98cda --- /dev/null +++ b/docs/build/reference/transformer/replace.md @@ -0,0 +1,34 @@ +--- +title: "Replace" +description: "Replace all occurrences of a string "search" with "replace" in a string." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Replace + + + + +Replace all occurrences of a string "search" with "replace" in a string. + +## Parameter + +### Search + +The string to search for + +- Datatype: `string` +- Default Value: `None` + + + +### Replace + +The string that will replace each match + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/sort.md b/docs/build/reference/transformer/sort.md new file mode 100644 index 000000000..d3d5626e9 --- /dev/null +++ b/docs/build/reference/transformer/sort.md @@ -0,0 +1,55 @@ +--- +title: "Sort" +description: "Sorts values lexicographically." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Sort + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[]` + +* Returns: + + → `[]` + + +--- +#### Example 2: + +* Input values: + 1. `[c, a, b]` + +* Returns: + + → `[a, b, c]` + + +--- +#### Example 3: + +* Input values: + 1. `[Hans, Hansa, Hamburg]` + +* Returns: + + → `[Hamburg, Hans, Hansa]` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/sortWords.md b/docs/build/reference/transformer/sortWords.md new file mode 100644 index 000000000..6d500ed7c --- /dev/null +++ b/docs/build/reference/transformer/sortWords.md @@ -0,0 +1,72 @@ +--- +title: "Sort words" +description: "Sorts all words in each value lexicographically." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Sort words + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[]` + +* Returns: + + → `[]` + + +--- +#### Example 2: + +* Input values: + 1. `[c a b]` + +* Returns: + + → `[a b c]` + + +--- +#### Example 3: + +* Input values: + 1. `[Hans Hansa Hamburg, München Marburg]` + +* Returns: + + → `[Hamburg Hans Hansa, Marburg München]` + + + + +## Parameter + +### Split regex + +The regular expression used to split values into words. + +- Datatype: `string` +- Default Value: `\s+` + + + +### Glue + +Separator to be inserted between sorted words. + +- Datatype: `string` +- Default Value: ` ` + + + diff --git a/docs/build/reference/transformer/soundex.md b/docs/build/reference/transformer/soundex.md new file mode 100644 index 000000000..bb3b49a00 --- /dev/null +++ b/docs/build/reference/transformer/soundex.md @@ -0,0 +1,25 @@ +--- +title: "Soundex" +description: "Soundex algorithm." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Soundex + + + + +Soundex algorithm. + +## Parameter + +### Refined + +No description + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/transformer/stem.md b/docs/build/reference/transformer/stem.md new file mode 100644 index 000000000..10c18398e --- /dev/null +++ b/docs/build/reference/transformer/stem.md @@ -0,0 +1,17 @@ +--- +title: "Stem" +description: "Stems a string using the Porter Stemmer." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Stem + + + + +Stems a string using the Porter Stemmer. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/stripPostfix.md b/docs/build/reference/transformer/stripPostfix.md new file mode 100644 index 000000000..6e02ceeae --- /dev/null +++ b/docs/build/reference/transformer/stripPostfix.md @@ -0,0 +1,58 @@ +--- +title: "Strip postfix" +description: "Strips a postfix of a string." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Strip postfix + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *postfix*: `Postfix` + +* Input values: + 1. `[valuePostfix]` + +* Returns: + + → `[value]` + + +--- +#### Example 2: + +* Parameters + * *postfix*: `Postfix` + +* Input values: + 1. `[Value]` + +* Returns: + + → `[Value]` + + + + +## Parameter + +### Postfix + +No description + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/stripPrefix.md b/docs/build/reference/transformer/stripPrefix.md new file mode 100644 index 000000000..b1365b13a --- /dev/null +++ b/docs/build/reference/transformer/stripPrefix.md @@ -0,0 +1,58 @@ +--- +title: "Strip prefix" +description: "Strips a prefix of a string." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Strip prefix + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *prefix*: `prefix` + +* Input values: + 1. `[prefixValue]` + +* Returns: + + → `[Value]` + + +--- +#### Example 2: + +* Parameters + * *prefix*: `prefix` + +* Input values: + 1. `[ValueWithoutPrefix]` + +* Returns: + + → `[ValueWithoutPrefix]` + + + + +## Parameter + +### Prefix + +No description + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/stripUriPrefix.md b/docs/build/reference/transformer/stripUriPrefix.md new file mode 100644 index 000000000..765810180 --- /dev/null +++ b/docs/build/reference/transformer/stripUriPrefix.md @@ -0,0 +1,66 @@ +--- +title: "Strip URI prefix" +description: "Strips the URI prefix and decodes the remainder. Leaves values unchanged which are not a valid URI." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Strip URI prefix + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[http://example.org/some/path/to/value]` + +* Returns: + + → `[value]` + + +--- +#### Example 2: + +* Input values: + 1. `[urn:scheme:value]` + +* Returns: + + → `[value]` + + +--- +#### Example 3: + +* Input values: + 1. `[http://example.org/some/path/to/encoded%20v%C3%A4lue]` + +* Returns: + + → `[encoded välue]` + + +--- +#### Example 4: + +* Input values: + 1. `[value]` + +* Returns: + + → `[value]` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/substring.md b/docs/build/reference/transformer/substring.md new file mode 100644 index 000000000..a312fc01a --- /dev/null +++ b/docs/build/reference/transformer/substring.md @@ -0,0 +1,170 @@ +--- +title: "Substring" +description: "Returns a substring between 'beginIndex' (inclusive) and 'endIndex' (exclusive). If 'endIndex' is 0 (default), it is ignored and the entire remaining string starting with 'beginIndex' is returned. If 'endIndex' is negative, -endIndex characters are removed from the end." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Substring + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *beginIndex*: `0` + * *endIndex*: `1` + +* Input values: + 1. `[abc]` + +* Returns: + + → `[a]` + + +--- +#### Example 2: + +* Parameters + * *beginIndex*: `2` + * *endIndex*: `3` + +* Input values: + 1. `[abc]` + +* Returns: + + → `[c]` + + +--- +#### Example 3: + +* Parameters + * *beginIndex*: `3` + * *endIndex*: `3` + +* Input values: + 1. `[abc]` + +* Returns: + + → `[]` + + +--- +#### Example 4: + +* Parameters + * *beginIndex*: `2` + * *endIndex*: `4` + +* Input values: + 1. `[abc]` + +* Returns: + + → `[c]` + + +--- +#### Example 5: + +* Parameters + * *beginIndex*: `2` + * *endIndex*: `4` + * *stringMustBeInRange*: `false` + +* Input values: + 1. `[abc]` + +* Returns: + + → `[c]` + + +--- +#### Example 6: + +* Parameters + * *beginIndex*: `10` + * *endIndex*: `20` + * *stringMustBeInRange*: `false` + +* Input values: + 1. `[abc]` + +* Returns: + + → `[]` + + +--- +#### Example 7: + +* Parameters + * *beginIndex*: `0` + * *endIndex*: `-1` + +* Input values: + 1. `[abc]` + +* Returns: + + → `[ab]` + + +--- +#### Example 8: + +* Parameters + * *beginIndex*: `1` + * *endIndex*: `0` + +* Input values: + 1. `[abc]` + +* Returns: + + → `[bc]` + + + + +## Parameter + +### Begin index + +The beginning index, inclusive. + +- Datatype: `int` +- Default Value: `0` + + + +### End index + +The end index, exclusive. Ignored if set to 0, i.e., the entire remaining string starting with 'beginIndex' is returned. If negative, -endIndex characters are removed from the end + +- Datatype: `int` +- Default Value: `0` + + + +### String must be in range + +If true, only strings will be accepted that are within the start and end indices, throwing a validating error if an index is out of range. + +- Datatype: `boolean` +- Default Value: `true` + + + diff --git a/docs/build/reference/transformer/timeToDate.md b/docs/build/reference/transformer/timeToDate.md new file mode 100644 index 000000000..2819f3f54 --- /dev/null +++ b/docs/build/reference/transformer/timeToDate.md @@ -0,0 +1,79 @@ +--- +title: "Timestamp to date" +description: "Convert a timestamp to xsd:date format. Expects an integer that denotes the passed time since the Unix Epoch (1970-01-01)" +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Timestamp to date + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[1499117572000]` + +* Returns: + + → `[2017-07-03T21:32:52Z]` + + +--- +#### Example 2: + +* Parameters + * *format*: `yyyy-MM-dd` + +* Input values: + 1. `[1499040000000]` + +* Returns: + + → `[2017-07-03]` + + +--- +#### Example 3: + +* Parameters + * *format*: `yyyy-MM-dd` + * *unit*: `seconds` + +* Input values: + 1. `[1499040000]` + +* Returns: + + → `[2017-07-03]` + + + + +## Parameter + +### Format + +Custom output format (e.g., 'yyyy-MM-dd'). If left empty, a full xsd:dateTime (UTC) is returned. + +- Datatype: `string` +- Default Value: `None` + + + +### Unit + +No description + +- Datatype: `enumeration` +- Default Value: `milliseconds` + + + diff --git a/docs/build/reference/transformer/toSequenceIndex.md b/docs/build/reference/transformer/toSequenceIndex.md new file mode 100644 index 000000000..6b88e2b98 --- /dev/null +++ b/docs/build/reference/transformer/toSequenceIndex.md @@ -0,0 +1,17 @@ +--- +title: "Sequence values to indexes" +description: "Transforms the sequence of values to their respective indexes in the sequence. Example: - ("a", "b", "c") becomes (0, 1, 2) If there is more than one input, the values are numbered from the first input on and continued for the next inputs. Applied against an RDF source the order might not be deterministic." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Sequence values to indexes + + + + +Transforms the sequence of values to their respective indexes in the sequence. Example: - ("a", "b", "c") becomes (0, 1, 2) If there is more than one input, the values are numbered from the first input on and continued for the next inputs. Applied against an RDF source the order might not be deterministic. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/tokenize.md b/docs/build/reference/transformer/tokenize.md new file mode 100644 index 000000000..1bebea97e --- /dev/null +++ b/docs/build/reference/transformer/tokenize.md @@ -0,0 +1,55 @@ +--- +title: "Tokenize" +description: "Tokenizes all input values." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Tokenize + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### By default, splits values at whitespaces: + +* Input values: + 1. `[Hello World]` + +* Returns: + + → `[Hello, World]` + + +--- +#### Optionally, splits values at the provided regex: + +* Parameters + * *regex*: `,` + +* Input values: + 1. `[.175,.050]` + +* Returns: + + → `[.175, .050]` + + + + +## Parameter + +### Regex + +The regular expression used to split values. + +- Datatype: `string` +- Default Value: `\s` + + + diff --git a/docs/build/reference/transformer/trim.md b/docs/build/reference/transformer/trim.md new file mode 100644 index 000000000..ba2dfb447 --- /dev/null +++ b/docs/build/reference/transformer/trim.md @@ -0,0 +1,17 @@ +--- +title: "Trim" +description: "Remove leading and trailing whitespaces." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Trim + + + + +Remove leading and trailing whitespaces. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/untilCharacter.md b/docs/build/reference/transformer/untilCharacter.md new file mode 100644 index 000000000..64725ada5 --- /dev/null +++ b/docs/build/reference/transformer/untilCharacter.md @@ -0,0 +1,58 @@ +--- +title: "Until character" +description: "Extracts the substring until the character given." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Until character + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *untilCharacter*: `c` + +* Input values: + 1. `[abcde]` + +* Returns: + + → `[ab]` + + +--- +#### Example 2: + +* Parameters + * *untilCharacter*: `c` + +* Input values: + 1. `[abab]` + +* Returns: + + → `[abab]` + + + + +## Parameter + +### Until character + +No description + +- Datatype: `char` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/upperCase.md b/docs/build/reference/transformer/upperCase.md new file mode 100644 index 000000000..b601ae1a0 --- /dev/null +++ b/docs/build/reference/transformer/upperCase.md @@ -0,0 +1,17 @@ +--- +title: "Upper case" +description: "Converts a string to upper case." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Upper case + + + + +Converts a string to upper case. + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/uriFix.md b/docs/build/reference/transformer/uriFix.md new file mode 100644 index 000000000..fd891c67f --- /dev/null +++ b/docs/build/reference/transformer/uriFix.md @@ -0,0 +1,140 @@ +--- +title: "Fix URI" +description: "Generates valid absolute URIs from the given values. Already valid absolute URIs are left untouched." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Fix URI + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Non-absolute URIs are prefixed with the configured URI prefix: + +* Input values: + 1. `[ab]` + +* Returns: + + → `[urn:url-encoded-value:ab]` + + +--- +#### URI reserved characters are encoded: + +* Input values: + 1. `[a&b]` + +* Returns: + + → `[urn:url-encoded-value:a%26b]` + + +--- +#### Valid absolute URIs are forwarded unchanged: + +* Input values: + 1. `[http://example.org/some/path]` + +* Returns: + + → `[http://example.org/some/path]` + + +--- +#### Query parameters and fragments are left unchanged: + +* Input values: + 1. `[http://example.org/path?query=some+stuff#hashtag]` + +* Returns: + + → `[http://example.org/path?query=some+stuff#hashtag]` + + +--- +#### Valid URNs are forwarded unchanged: + +* Input values: + 1. `[urn:valid:uri]` + +* Returns: + + → `[urn:valid:uri]` + + +--- +#### Special characters are encoded: + +* Input values: + 1. `[http://www.broken domain.com/broken weird path äöü/nice/path/andNowSomeFragment#fragmentäöü]` + +* Returns: + + → `[http://www.broken%20domain.com/broken%20weird%20path%20%C3%A4%C3%B6%C3%BC/nice/path/andNowSomeFragment#fragment%C3%A4%C3%B6%C3%BC]` + + +--- +#### Hash signs are only encoded if they don't denote a fragment: + +* Input values: + 1. `[http://domain/##path#]` + +* Returns: + + → `[http://domain/#%23path%23]` + + +--- +#### Invalid URIs are fully encoded: + +* Input values: + 1. `[http : invalid URI]` + +* Returns: + + → `[urn:url-encoded-value:http+%3A+invalid+URI]` + + +--- +#### Leading and trailing spaces are removed: + +* Input values: + 1. `[ http://domain.com/[squareBrackets] ]` + +* Returns: + + → `[http://domain.com/%5BsquareBrackets%5D]` + + +--- +#### Example 10: + +* Input values: + 1. `[100%]` + +* Returns: + + → `[urn:url-encoded-value:100%25]` + + + + +## Parameter + +### Uri prefix + +No description + +- Datatype: `string` +- Default Value: `urn:url-encoded-value:` + + + diff --git a/docs/build/reference/transformer/urlEncode.md b/docs/build/reference/transformer/urlEncode.md new file mode 100644 index 000000000..cac324169 --- /dev/null +++ b/docs/build/reference/transformer/urlEncode.md @@ -0,0 +1,63 @@ +--- +title: "Encode URL" +description: "URL encodes the string." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Encode URL + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[ab]` + +* Returns: + + → `[ab]` + + +--- +#### Example 2: + +* Input values: + 1. `[a&b]` + +* Returns: + + → `[a%26b]` + + +--- +#### Example 3: + +* Input values: + 1. `[http://example.org/some/path]` + +* Returns: + + → `[http%3A%2F%2Fexample.org%2Fsome%2Fpath]` + + + + +## Parameter + +### Encoding + +The character encoding. + +- Datatype: `string` +- Default Value: `UTF-8` + + + diff --git a/docs/build/reference/transformer/uuid.md b/docs/build/reference/transformer/uuid.md new file mode 100644 index 000000000..8d4f93813 --- /dev/null +++ b/docs/build/reference/transformer/uuid.md @@ -0,0 +1,44 @@ +--- +title: "UUID" +description: "Generates UUIDs. If no input value is provided, a random UUID (type 4) is generated using a cryptographically strong pseudo random number generator. If input values are provided, a name-based UUID (type 3) is generated for each input value. Each input value will generate a separate UUID. For building a UUID from multiple inputs, the Concatenate operator can be used." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# UUID + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[input value]` + +* Returns: + + → `[cee963a2-8f70-3e97-b51a-85ef732e66dd]` + + +--- +#### Example 2: + +* Input values: + 1. `[üöä!, êéè]` + +* Returns: + + → `[690802dd-a317-335f-807c-e4e1e32b7b5b, 925cbd7f-377b-3fbd-8f4c-ca41529b74ad]` + + + + +## Parameter + +`None` \ No newline at end of file diff --git a/docs/build/reference/transformer/validateDateAfter.md b/docs/build/reference/transformer/validateDateAfter.md new file mode 100644 index 000000000..d4eaf1220 --- /dev/null +++ b/docs/build/reference/transformer/validateDateAfter.md @@ -0,0 +1,84 @@ +--- +title: "Validate date after" +description: "Validates if the first input date is after the second input date. Outputs the first input if the validation is successful." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Validate date after + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Input values: + 1. `[2015-04-02]` + 2. `[2015-04-03]` + +* Returns: + + → `[]` + + +--- +#### Example 2: + +* Input values: + 1. `[2015-04-04]` + 2. `[2015-04-03]` + +* Returns: + + → `[2015-04-04]` + + +--- +#### Example 3: + +* Parameters + * *allowEqual*: `true` + +* Input values: + 1. `[2015-04-03]` + 2. `[2015-04-03]` + +* Returns: + + → `[2015-04-03]` + + +--- +#### Example 4: + +* Parameters + * *allowEqual*: `false` + +* Input values: + 1. `[2015-04-03]` + 2. `[2015-04-03]` + +* Returns: + + → `[]` + + + + +## Parameter + +### Allow equal + +Allow both dates to be equal. + +- Datatype: `boolean` +- Default Value: `false` + + + diff --git a/docs/build/reference/transformer/validateDateRange.md b/docs/build/reference/transformer/validateDateRange.md new file mode 100644 index 000000000..c0dcef97a --- /dev/null +++ b/docs/build/reference/transformer/validateDateRange.md @@ -0,0 +1,34 @@ +--- +title: "Validate date range" +description: "Validates if dates are within a specified range." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Validate date range + + + + +Validates if dates are within a specified range. + +## Parameter + +### Min date + +Earliest allowed date in YYYY-MM-DD + +- Datatype: `string` +- Default Value: `None` + + + +### Max date + +Latest allowed data in YYYY-MM-DD + +- Datatype: `string` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/validateNumberOfValues.md b/docs/build/reference/transformer/validateNumberOfValues.md new file mode 100644 index 000000000..310665481 --- /dev/null +++ b/docs/build/reference/transformer/validateNumberOfValues.md @@ -0,0 +1,69 @@ +--- +title: "Validate number of values" +description: "Validates that the number of values lies in a specified range." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Validate number of values + + + + + +### Examples + +**Notation:** List of values are represented via square brackets. Example: `[first, second]` represents a list of two values "first" and "second". + +--- +#### Example 1: + +* Parameters + * *min*: `0` + * *max*: `1` + +* Input values: + 1. `[value1]` + +* Returns: + + → `[value1]` + + +--- +#### Example 2: + +* Parameters + * *min*: `0` + * *max*: `1` + +* Input values: + 1. `[value1, value2]` + +* Returns: + + → `[]` + + + + +## Parameter + +### Min + +Minimum allowed number of values + +- Datatype: `int` +- Default Value: `0` + + + +### Max + +Maximum allowed number of values + +- Datatype: `int` +- Default Value: `1` + + + diff --git a/docs/build/reference/transformer/validateNumericRange.md b/docs/build/reference/transformer/validateNumericRange.md new file mode 100644 index 000000000..6503a36d2 --- /dev/null +++ b/docs/build/reference/transformer/validateNumericRange.md @@ -0,0 +1,34 @@ +--- +title: "Validate numeric range" +description: "Validates if a number is within a specified range." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Validate numeric range + + + + +Validates if a number is within a specified range. + +## Parameter + +### Min + +Minimum allowed number + +- Datatype: `double` +- Default Value: `None` + + + +### Max + +Maximum allowed number + +- Datatype: `double` +- Default Value: `None` + + + diff --git a/docs/build/reference/transformer/validateRegex.md b/docs/build/reference/transformer/validateRegex.md new file mode 100644 index 000000000..080f02de4 --- /dev/null +++ b/docs/build/reference/transformer/validateRegex.md @@ -0,0 +1,25 @@ +--- +title: "Validate regex" +description: "Validates if all values match a regular expression." +icon: octicons/cross-reference-24 +tags: + - TransformOperator +--- +# Validate regex + + + + +Validates if all values match a regular expression. + +## Parameter + +### Regex + +regular expression + +- Datatype: `string` +- Default Value: `\w*` + + + diff --git a/poetry.lock b/poetry.lock index 72dab5efc..5288358b7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -135,6 +135,18 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "annotated-types" +version = "0.7.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + [[package]] name = "attrs" version = "25.3.0" @@ -477,6 +489,25 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +[[package]] +name = "cmem-cmempy" +version = "25.3.0" +description = "API for eccenca Corporate Memory" +optional = false +python-versions = "<4.0,>=3.9" +groups = ["main"] +files = [ + {file = "cmem_cmempy-25.3.0-py3-none-any.whl", hash = "sha256:75f9c6900661b5573615b43086897eb4b5fccdb1ec953fa9e20cdaecaeea75c2"}, + {file = "cmem_cmempy-25.3.0.tar.gz", hash = "sha256:ccef1410bde7e248d4b89b37366e7c386c8a1558190a07090f0d3c11e3b16ff4"}, +] + +[package.dependencies] +certifi = ">=2023.7.22" +pyparsing = ">=3.2.3,<4.0.0" +rdflib = ">=7.1.4,<8.0.0" +requests = ">=2.32.4,<3.0.0" +requests-toolbelt = ">=1.0.0,<2.0.0" + [[package]] name = "colorama" version = "0.4.6" @@ -1534,6 +1565,140 @@ files = [ {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] +[[package]] +name = "pydantic" +version = "2.11.7" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pydantic-2.11.7-py3-none-any.whl", hash = "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b"}, + {file = "pydantic-2.11.7.tar.gz", hash = "sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db"}, +] + +[package.dependencies] +annotated-types = ">=0.6.0" +pydantic-core = "2.33.2" +typing-extensions = ">=4.12.2" +typing-inspection = ">=0.4.0" + +[package.extras] +email = ["email-validator (>=2.0.0)"] +timezone = ["tzdata"] + +[[package]] +name = "pydantic-core" +version = "2.33.2" +description = "Core functionality for Pydantic validation and serialization" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8"}, + {file = "pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2"}, + {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a"}, + {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac"}, + {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a"}, + {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b"}, + {file = "pydantic_core-2.33.2-cp310-cp310-win32.whl", hash = "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22"}, + {file = "pydantic_core-2.33.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640"}, + {file = "pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7"}, + {file = "pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef"}, + {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e"}, + {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d"}, + {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30"}, + {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf"}, + {file = "pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51"}, + {file = "pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab"}, + {file = "pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65"}, + {file = "pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc"}, + {file = "pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab"}, + {file = "pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f"}, + {file = "pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9"}, + {file = "pydantic_core-2.33.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a2b911a5b90e0374d03813674bf0a5fbbb7741570dcd4b4e85a2e48d17def29d"}, + {file = "pydantic_core-2.33.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6fa6dfc3e4d1f734a34710f391ae822e0a8eb8559a85c6979e14e65ee6ba2954"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c54c939ee22dc8e2d545da79fc5381f1c020d6d3141d3bd747eab59164dc89fb"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53a57d2ed685940a504248187d5685e49eb5eef0f696853647bf37c418c538f7"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09fb9dd6571aacd023fe6aaca316bd01cf60ab27240d7eb39ebd66a3a15293b4"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e6116757f7959a712db11f3e9c0a99ade00a5bbedae83cb801985aa154f071b"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d55ab81c57b8ff8548c3e4947f119551253f4e3787a7bbc0b6b3ca47498a9d3"}, + {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c20c462aa4434b33a2661701b861604913f912254e441ab8d78d30485736115a"}, + {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44857c3227d3fb5e753d5fe4a3420d6376fa594b07b621e220cd93703fe21782"}, + {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:eb9b459ca4df0e5c87deb59d37377461a538852765293f9e6ee834f0435a93b9"}, + {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9fcd347d2cc5c23b06de6d3b7b8275be558a0c90549495c699e379a80bf8379e"}, + {file = "pydantic_core-2.33.2-cp39-cp39-win32.whl", hash = "sha256:83aa99b1285bc8f038941ddf598501a86f1536789740991d7d8756e34f1e74d9"}, + {file = "pydantic_core-2.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:f481959862f57f29601ccced557cc2e817bce7533ab8e01a797a48b49c9692b3"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c"}, + {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb"}, + {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:87acbfcf8e90ca885206e98359d7dca4bcbb35abdc0ff66672a293e1d7a19101"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7f92c15cd1e97d4b12acd1cc9004fa092578acfa57b67ad5e43a197175d01a64"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3f26877a748dc4251cfcfda9dfb5f13fcb034f5308388066bcfe9031b63ae7d"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac89aea9af8cd672fa7b510e7b8c33b0bba9a43186680550ccf23020f32d535"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:970919794d126ba8645f3837ab6046fb4e72bbc057b3709144066204c19a455d"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3eb3fe62804e8f859c49ed20a8451342de53ed764150cb14ca71357c765dc2a6"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:3abcd9392a36025e3bd55f9bd38d908bd17962cc49bc6da8e7e96285336e2bca"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:3a1c81334778f9e3af2f8aeb7a960736e5cab1dfebfb26aabca09afd2906c039"}, + {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2807668ba86cb38c6817ad9bc66215ab8584d1d304030ce4f0887336f28a5e27"}, + {file = "pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + [[package]] name = "pygments" version = "2.19.2" @@ -1688,6 +1853,28 @@ files = [ [package.dependencies] pyyaml = "*" +[[package]] +name = "rdflib" +version = "7.1.4" +description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." +optional = false +python-versions = "<4.0.0,>=3.8.1" +groups = ["main"] +files = [ + {file = "rdflib-7.1.4-py3-none-any.whl", hash = "sha256:72f4adb1990fa5241abd22ddaf36d7cafa5d91d9ff2ba13f3086d339b213d997"}, + {file = "rdflib-7.1.4.tar.gz", hash = "sha256:fed46e24f26a788e2ab8e445f7077f00edcf95abb73bcef4b86cefa8b62dd174"}, +] + +[package.dependencies] +pyparsing = ">=2.1.0,<4" + +[package.extras] +berkeleydb = ["berkeleydb (>=18.1.0,<19.0.0)"] +html = ["html5rdf (>=1.2,<2)"] +lxml = ["lxml (>=4.3,<6.0)"] +networkx = ["networkx (>=2,<4)"] +orjson = ["orjson (>=3.9.14,<4)"] + [[package]] name = "requests" version = "2.32.4" @@ -1710,6 +1897,21 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +description = "A utility belt for advanced users of python-requests" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["main"] +files = [ + {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, + {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, +] + +[package.dependencies] +requests = ">=2.0.1,<3.0.0" + [[package]] name = "six" version = "1.17.0" @@ -1810,6 +2012,21 @@ files = [ {file = "typing_extensions-4.14.0.tar.gz", hash = "sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4"}, ] +[[package]] +name = "typing-inspection" +version = "0.4.1" +description = "Runtime typing introspection tools" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51"}, + {file = "typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28"}, +] + +[package.dependencies] +typing-extensions = ">=4.12.0" + [[package]] name = "urllib3" version = "2.5.0" @@ -2055,4 +2272,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "602e2b654e4d668d857bd51c3f7ad50e2696087e9b60a37b2cfd40a5cdbc3b25" +content-hash = "a0e36e00724c0eadf029aa4d5c5358bf2942ef4cc6eb162dcb36e95a8aa7907d" diff --git a/pyproject.toml b/pyproject.toml index 8bcfef7c7..644097b6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,9 @@ mkdocs-redirects = "^1.2.2" mkdocs-swagger-ui-tag = "^0.7.1" # if you change this - change also the version ID in Taskfile.yml mkdocs-material = {git = "git@github.com:eccenca/mkdocs-material-insiders.git", rev = "9.6.14-insiders-4.53.16"} +cmem-cmempy = "^25.3.0" +pydantic = "^2.11.7" +jinja2 = "^3.1.6" [tool.poetry.group.dev.dependencies] linkcheckmd = "^1.4.0" diff --git a/tests/test_update_di_reference.py b/tests/test_update_di_reference.py new file mode 100644 index 000000000..330e4421a --- /dev/null +++ b/tests/test_update_di_reference.py @@ -0,0 +1,8 @@ +"""Test update DI references""" +from tools.update_di_reference import get_plugin_descriptions + + +def test_get_plugin_descriptions(): + """Test get DI plugin descriptions""" + descriptions = get_plugin_descriptions() + pass diff --git a/tools/__init__.py b/tools/__init__.py index 97c2cc67d..fa6443570 100644 --- a/tools/__init__.py +++ b/tools/__init__.py @@ -2,6 +2,7 @@ from typing import List import click +from tools.update_di_reference import update_di_reference from tools.update_icons import update_icons @@ -10,3 +11,4 @@ def cli(): """documentation.eccenca.com build tool""" cli.add_command(update_icons) +cli.add_command(update_di_reference) diff --git a/tools/templates/aggregator_base.md b/tools/templates/aggregator_base.md new file mode 100644 index 000000000..92882bd7e --- /dev/null +++ b/tools/templates/aggregator_base.md @@ -0,0 +1,15 @@ +--- +title: "Aggregators" +icon: octicons/cross-reference-24 +tags: + - Build + - Reference +--- +# Aggregators + + +This kind of task aggregates multiple similarity scores. + +**:octicons-people-24: Intended audience:** Linked Data Experts and Domain Experts + +{{items}} diff --git a/tools/templates/customtask_base.md b/tools/templates/customtask_base.md new file mode 100644 index 000000000..34e812dc6 --- /dev/null +++ b/tools/templates/customtask_base.md @@ -0,0 +1,15 @@ +--- +title: "Custom Workflow Tasks" +icon: octicons/cross-reference-24 +tags: + - Build + - Reference +--- +# Custom Workflow Tasks + + +A custom workflow task is an operator that can be used in a workflow. + +**:octicons-people-24: Intended audience:** Linked Data Experts and Domain Experts + +{{items}} diff --git a/tools/templates/dataset_base.md b/tools/templates/dataset_base.md new file mode 100644 index 000000000..2745f6cc6 --- /dev/null +++ b/tools/templates/dataset_base.md @@ -0,0 +1,15 @@ +--- +title: "Datasets" +icon: octicons/cross-reference-24 +tags: + - Build + - Reference +--- +# Datasets + + +Datasets are collections of data that can be read or written. + +**:octicons-people-24: Intended audience:** Linked Data Experts and Domain Experts + +{{items}} diff --git a/tools/templates/distancemeasure_base.md b/tools/templates/distancemeasure_base.md new file mode 100644 index 000000000..c60483586 --- /dev/null +++ b/tools/templates/distancemeasure_base.md @@ -0,0 +1,15 @@ +--- +title: "Distance Measures" +icon: octicons/cross-reference-24 +tags: + - Build + - Reference +--- +# Distance Measures + + +Distance Measures compute a distance metric between two sets of strings. + +**:octicons-people-24: Intended audience:** Linked Data Experts and Domain Experts + +{{items}} diff --git a/tools/templates/operator_table.md b/tools/templates/operator_table.md new file mode 100644 index 000000000..bd00b2ec6 --- /dev/null +++ b/tools/templates/operator_table.md @@ -0,0 +1,4 @@ +| Name | Description | +|------------------------:| :--------- | +{% for plugin in plugins if not plugin.is_deprecated %}|[{{plugin.title}}]({{plugin.pluginId}}.md) | {{plugin.description}} | +{% endfor %} diff --git a/tools/templates/parameter.md b/tools/templates/parameter.md new file mode 100644 index 000000000..e41352ec2 --- /dev/null +++ b/tools/templates/parameter.md @@ -0,0 +1,16 @@ +### {{property.title}} + +{{property.description}} + +- Datatype: `{{property.parameterType}}` +- Default Value: `{{property.value if property.value != "" else "None"}}` + +{% for sub_property in property.properties.values() %} +#### {{sub_property.title}} + +{{sub_property.description}} + +- Datatype: `{{sub_property.parameterType}}` +- Default Value: `{{sub_property.value if sub_property.value != "" else "None"}}` + +{% endfor %} diff --git a/tools/templates/plugin.md b/tools/templates/plugin.md new file mode 100644 index 000000000..8648a86a0 --- /dev/null +++ b/tools/templates/plugin.md @@ -0,0 +1,23 @@ +--- +title: "{{plugin.title}}" +description: "{{plugin.description}}" +icon: octicons/cross-reference-24 +tags: {% for tag in plugin.tags %} + - {{tag}}{% endfor %} +--- +# {{plugin.title}} + + +{% if plugin.is_python -%} +!!! note inline end "Python Plugin" + + This is a [Python Plugin](../../../develop/python-plugins/index.md). + In order to use it, you need to install it, + e.g. [with cmemc](../../../automate/cmemc-command-line-interface/command-reference/admin/workspace/python/index.md). +{%- endif %} + +{{ plugin.markdownDocumentation if plugin.markdownDocumentation else plugin.description }} + +## Parameter + +{{parameters if plugin.properties else "`None`"}} diff --git a/tools/templates/references_base.md b/tools/templates/references_base.md new file mode 100644 index 000000000..d9f96ccfb --- /dev/null +++ b/tools/templates/references_base.md @@ -0,0 +1,45 @@ +--- +title: "Task and Operator Reference" +icon: octicons/cross-reference-24 +tags: + - Build + - Reference +--- +# Task and Operator Reference + + +**:octicons-people-24: Intended audience:** Linked Data Experts and Domain Experts + +
+ +- [Aggregators](aggregator/index.md) + + --- + + This kind of task aggregates multiple similarity scores. + +- [Custom Workflow Tasks](customtask/index.md) + + --- + + An operator that can be used in a workflow. + +- [Datasets](dataset/index.md) + + --- + + A collection of data that can be read or written. + +- [Distance Measures](distancemeasure/index.md) + + --- + + Computes the distance between two sets of strings. + +- [Transformer](transformer/index.md) + + --- + + Transforms a sequence of string values. + +
diff --git a/tools/templates/transformer_base.md b/tools/templates/transformer_base.md new file mode 100644 index 000000000..a724e06a8 --- /dev/null +++ b/tools/templates/transformer_base.md @@ -0,0 +1,15 @@ +--- +title: "Transformers" +icon: octicons/cross-reference-24 +tags: + - Build + - Reference +--- +# Transformers + + +Transform operators transform a one or more sequences of string values to a sequence of string values. + +**:octicons-people-24: Intended audience:** Linked Data Experts and Domain Experts + +{{items}} diff --git a/tools/update_di_reference.py b/tools/update_di_reference.py new file mode 100644 index 000000000..d100b1ea6 --- /dev/null +++ b/tools/update_di_reference.py @@ -0,0 +1,214 @@ +"""Update DI Reference documentation""" + +import json +import re +from pathlib import Path +from shutil import rmtree +from typing import List, Annotated, Self + +import click +from cmem.cmempy import config +from cmem.cmempy.api import send_request +from jinja2 import Environment, PackageLoader, select_autoescape, StrictUndefined +from pydantic import ( + BaseModel, + validator, + AfterValidator, + model_validator, + field_validator, + Field, +) +from pydantic_core.core_schema import ValidationInfo + +jinja_environment = Environment( + loader=PackageLoader("tools"), + autoescape=select_autoescape(), + undefined=StrictUndefined +) + +def stripped_single_line(value: str) -> str: + return re.sub(r"\s+", " ", value).strip() + +class ActionDescription(BaseModel): + """Action description""" + + label: str + description: str + icon: str | None = None + + +class PropertyDescription(BaseModel): + """Property description""" + + title: str + description: Annotated[str, AfterValidator(stripped_single_line)] + type: str + parameterType: str + value: str | None | dict + advanced: bool + visibleInDialog: bool + properties: dict[str, dict] = {} + + +class PluginDescription(BaseModel): + """Plugin description.""" + + pluginId: str + title: str + categories: List[str] + description: Annotated[str, AfterValidator(stripped_single_line)] + markdownDocumentation: str | None = None + pluginIcon: str | None = None + properties: dict[str, PropertyDescription] + actions: dict[str, ActionDescription] + required: list[str] + distanceMeasureRange: str | None = None + is_python: bool | None = None + is_deprecated: bool | None = None + tags: list[str] = Field(default_factory=list) + pluginType: str | None = None + + @model_validator(mode="after") + def set_python(self) -> Self: + self.is_python = False + if self.pluginId.startswith("cmem-plugin"): + self.is_python = True + if self.pluginId.startswith("cmem_plugin"): + self.is_python = True + return self + + @model_validator(mode="after") + def check_tags(self) -> Self: + if self.pluginType == "customtask": + self.tags.append("WorkflowTask") + if self.pluginType == "dataset": + self.tags.append("Dataset") + if self.pluginType == "distancemeasure": + self.tags.append("DistanceMeasure") + if self.pluginType == "transformer": + self.tags.append("TransformOperator") + if self.is_python: + self.tags.append("PythonPlugin") + return self + + @model_validator(mode="after") + def set_deprecated(self) -> Self: + self.is_deprecated = False + if "deprecated" in self.title: + self.is_deprecated = True + if "deprecated" in self.description: + self.is_deprecated = True + if "deprecated" in self.categories: + self.is_deprecated = True + return self + + +def get_plugin_descriptions() -> dict[str, list[PluginDescription]]: + """Return list of plugin descriptions.""" + plugin_types = [ + "org.silkframework.config.CustomTask", + "org.silkframework.dataset.Dataset", + "org.silkframework.rule.similarity.DistanceMeasure", + "org.silkframework.rule.input.Transformer", + "org.silkframework.rule.similarity.Aggregator" + ] + plugins = {} + for plugin_type in plugin_types: + type_id = plugin_type.split(".")[-1].lower() + response = send_request( + config.get_di_api_endpoint() + f"/core/plugins/{plugin_type}", + params={"addMarkdownDocumentation": "true"} + ) + plugins_dict = json.loads(response.decode("utf-8")) + plugins_of_type = [] + for plugin_dict in plugins_dict.values(): + plugin_dict["pluginType"] = type_id + plugin = PluginDescription(**plugin_dict) + plugins_of_type.append(plugin) + plugins_of_type.sort(key=lambda p: p.title.lower()) + plugins[type_id] = plugins_of_type + return plugins + +def create_plugin_markdown(plugin: PluginDescription, plugin_type: str, base_dir: Path) -> None: + """Create markdown document from plugin description.""" + if plugin.is_deprecated: + click.echo(f"Ignore deprecated plugin {plugin.pluginId}") + return + click.echo(f"Create reference documentation for {plugin.pluginId}") + plugin_template = jinja_environment.get_template(f"plugin.md") + parameter_template = jinja_environment.get_template(f"parameter.md") + parameter_content = "" + for _ in plugin.properties.values(): + parameter_content += parameter_template.render(property=_) + "\n\n" + content = plugin_template.render(plugin=plugin, parameters=parameter_content) + file = base_dir / plugin_type / f"{plugin.pluginId}.md" + with file.open("w", encoding="utf-8") as f: + f.write(content) + +def create_umbrella_pages(plugins: dict[str, list[PluginDescription]], base_dir: Path) -> None: + """Create umbrellas markdown documents""" + reference_base_template = jinja_environment.get_template("references_base.md") + reference_base_file = base_dir / f"index.md" + with reference_base_file.open("w", encoding="utf-8") as f: + click.echo(f"Create the main index.md file: {reference_base_file}") + f.write(reference_base_template.render()) + + # Create the main .pages file + reference_base_pages_file = base_dir / f".pages" + with reference_base_pages_file.open("w", encoding="utf-8") as f: + click.echo(f"Create the main .pages file: {reference_base_pages_file}") + content = """nav: + - "Task and Operator Reference": index.md + - "Aggregators": aggregator + - "Custom Workflow Tasks": customtask + - "Datasets": dataset + - "Distance Measures": distancemeasure + - "Transformers": transformer""" + f.write(content) + + table_template = jinja_environment.get_template(f"operator_table.md") + for plugin_type in plugins: + plugins_of_type = plugins[plugin_type] + + # Create type-specific index.md file + index_file = base_dir / f"{plugin_type}/index.md" + index_template = jinja_environment.get_template(f"{plugin_type}_base.md") + items = table_template.render(plugins=plugins_of_type) + with index_file.open("w", encoding="utf-8") as f: + click.echo(f"Create {plugin_type} index file in {index_file}") + f.write(index_template.render(items=items)) + + # Create the .pages file + pages_file = base_dir / f"{plugin_type}/.pages" + pages_content = "nav:\n - index.md" + for plugin in plugins_of_type: + if plugin.is_deprecated: + continue + pages_content += f"\n - \"{plugin.title}\": {plugin.pluginId}.md" + with pages_file.open("w", encoding="utf-8") as f: + click.echo(f"Create .pages file {pages_file}") + f.write(pages_content) + + +@click.command() +@click.option( + "--output-dir", "-o", + type=click.Path(exists=False, dir_okay=True, file_okay=False), + default="docs/build/reference", + help="Where to save the markdown files", + show_default=True, +) +def update_di_reference(output_dir): + """Update DI Reference documentation.""" + basedir = Path(output_dir) + click.echo(f"Creating DI reference documentation in {basedir}") + plugins = get_plugin_descriptions() + + # create directory structure + rmtree(basedir, ignore_errors=True) + basedir.mkdir(parents=True, exist_ok=True) + for type_id in plugins: + Path(basedir / type_id).mkdir(parents=True, exist_ok=True) + for plugin in plugins[type_id]: + create_plugin_markdown(plugin, type_id, basedir) + create_umbrella_pages(plugins=plugins, base_dir=basedir)