From 51b2753412076289feaa1f59d9f440e146cc79ad Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Mon, 7 Apr 2025 10:47:22 -0400 Subject: [PATCH 1/6] jupyter - JSON-encode compound metadata keys --- src/core/jupyter/jupyter.ts | 38 +++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/core/jupyter/jupyter.ts b/src/core/jupyter/jupyter.ts index 1550877cab..497bed811f 100644 --- a/src/core/jupyter/jupyter.ts +++ b/src/core/jupyter/jupyter.ts @@ -174,6 +174,7 @@ import { jupyterCellSrcAsLines, jupyterCellSrcAsStr, } from "./jupyter-shared.ts"; +import { error } from "../../deno_ral/log.ts"; export const kQuartoMimeType = "quarto_mimetype"; export const kQuartoOutputOrder = "quarto_order"; @@ -921,8 +922,31 @@ export function jupyterCellWithOptions( } }; + const validMetadata: Record = {}; + for (const key of Object.keys(cell.metadata)) { + const value = cell.metadata[key]; + if (value !== undefined) { + if (value && typeof value === "object") { + // we need to json-encode this and signal the encoding in the key + validMetadata[ + `quarto-json-encoded-${key.replaceAll(/[^A-Za-z]/g, "_")}` + ] = JSON.stringify({ key, value }); + } else if ( + typeof value === "string" || typeof value === "number" || + typeof value === "boolean" + ) { + validMetadata[key] = value; + } else { + error( + `Invalid metadata type for key ${key}: ${typeof value}. Entry will not be serialized.`, + ); + } + } + } + return { ...cell, + metadata: validMetadata, id: cellId(cell), source, optionsSource, @@ -1766,7 +1790,10 @@ function isMarkdown(output: JupyterOutput, options: JupyterToMarkdownOptions) { return isDisplayDataType(output, options, displayDataIsMarkdown); } -async function mdOutputStream(output: JupyterOutputStream, options: JupyterToMarkdownOptions) { +async function mdOutputStream( + output: JupyterOutputStream, + options: JupyterToMarkdownOptions, +) { let text: string[] = []; if (typeof output.text === "string") { text = [output.text]; @@ -1873,8 +1900,11 @@ async function mdOutputDisplayData( // if output is invalid, warn and emit empty const data = output.data[mimeType] as unknown; if (!Array.isArray(data) || data.some((s) => typeof s !== "string")) { - return await mdWarningOutput(`Unable to process text plain output data -which does not appear to be plain text: ${JSON.stringify(data)}`, options); + return await mdWarningOutput( + `Unable to process text plain output data +which does not appear to be plain text: ${JSON.stringify(data)}`, + options, + ); } const lines = data as string[]; // pandas inexplicably outputs html tables as text/plain with an enclosing single-quote @@ -1911,7 +1941,7 @@ which does not appear to be plain text: ${JSON.stringify(data)}`, options); // no type match found return await mdWarningOutput( "Unable to display output for mime type(s): " + - Object.keys(output.data).join(", "), + Object.keys(output.data).join(", "), options, ); } From e007711a01bd6c2d0fcda5995272e0f0f902dd69 Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Mon, 7 Apr 2025 10:50:35 -0400 Subject: [PATCH 2/6] regression test --- .../docs/smoke-all/2025/04/07/issue-9089.qmd | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tests/docs/smoke-all/2025/04/07/issue-9089.qmd diff --git a/tests/docs/smoke-all/2025/04/07/issue-9089.qmd b/tests/docs/smoke-all/2025/04/07/issue-9089.qmd new file mode 100644 index 0000000000..682848e9e5 --- /dev/null +++ b/tests/docs/smoke-all/2025/04/07/issue-9089.qmd @@ -0,0 +1,42 @@ +--- +title: DataBricks Notebooks +engine: jupyter +keep-md: true +execute: + eval: false +_quarto: + tests: + html: + ensureFileRegexMatches: + - [] + - [":::"] +--- + + +## Introduction + +In this notebook, we try Quarto with DataBricks. + +## Chapter + +In the first chapter, we try multiple commands and observe their results. + + +```{python} +#| application/vnd.databricks.v1+cell: {cellMetadata: {byteLimit: 2048000, rowLimit: 10000}, inputWidgets: {}, nuid: 7039bc23-d898-4506-b24d-8f1002a66d18, showTitle: false, title: ''} +df = spark.read.table("samples.nyctaxi.trips") +df.show(5) +``` + +This is text in-between the commands. + +```{python} +#| application/vnd.databricks.v1+cell: {cellMetadata: {byteLimit: 2048000, rowLimit: 10000}, inputWidgets: {}, nuid: 21c1cb83-83cc-40c8-9a8b-f5378d3f29be, showTitle: false, title: ''} +from databricks.sdk.runtime import dbutils +dbutils.fs.ls("dbfs:/Workspace/Users/") +``` + +## Conclusion + +Currently, Quarto does not fully work, at least not rendering. + From 6be2beb0a90d7ebddfd0ff6d309484957b5f1a8f Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Mon, 7 Apr 2025 10:59:31 -0400 Subject: [PATCH 3/6] use better key prefix --- src/core/jupyter/jupyter.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/core/jupyter/jupyter.ts b/src/core/jupyter/jupyter.ts index 497bed811f..bed0f5846e 100644 --- a/src/core/jupyter/jupyter.ts +++ b/src/core/jupyter/jupyter.ts @@ -925,11 +925,19 @@ export function jupyterCellWithOptions( const validMetadata: Record = {}; for (const key of Object.keys(cell.metadata)) { const value = cell.metadata[key]; + let jsonEncodedKeyIndex = 0; if (value !== undefined) { if (value && typeof value === "object") { + // https://github.com/quarto-dev/quarto-cli/issues/9089 // we need to json-encode this and signal the encoding in the key + // we can't use the key as is since it may contain invalid characters + // and modifying the key might introduce collisions + // we ensure the key is unique with a counter, and assume + // "quarto-private-*" to be a private namespace for quarto. + // we'd prefer to use _quarto-* instead, but Pandoc doesn't allow keys to start + // with an underscore. validMetadata[ - `quarto-json-encoded-${key.replaceAll(/[^A-Za-z]/g, "_")}` + `quarto-private-${++jsonEncodedKeyIndex}` ] = JSON.stringify({ key, value }); } else if ( typeof value === "string" || typeof value === "number" || From edc808080114ada1c9e52d4eab6021287f09c0e3 Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Mon, 7 Apr 2025 10:59:39 -0400 Subject: [PATCH 4/6] changelog --- news/changelog-1.7.md | 1 + 1 file changed, 1 insertion(+) diff --git a/news/changelog-1.7.md b/news/changelog-1.7.md index dfc6945434..9918c7e1ff 100644 --- a/news/changelog-1.7.md +++ b/news/changelog-1.7.md @@ -140,6 +140,7 @@ All changes included in 1.7: ### `jupyter` +- ([#9089](https://github.com/quarto-dev/quarto-cli/issues/9089)): Compound jupyter metadata is now serialized into a special key-value attribute to not break Pandoc's fenced div parsing. - ([#12114](https://github.com/quarto-dev/quarto-cli/issues/12114)): `JUPYTERCACHE` environment variable from [Jupyter cache CLI](https://jupyter-cache.readthedocs.io/en/latest/using/cli.html) is now respected by Quarto when `cache: true` is used. This environment variable allows to change the path of the cache directory. - ([#12374](https://github.com/quarto-dev/quarto-cli/issues/12374)): Detect language properly in Jupyter notebooks that lack the `language` field in their `kernelspec`s. - ([#12228](https://github.com/quarto-dev/quarto-cli/issues/12228)): `quarto render` will now fails if errors are detected at IPython display level. Setting `error: true` globally or at cell level will keep the error to show in output and not stop the rendering. From 1f6c0b930d286a0b242b8809f74ea71ccd83264a Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Mon, 7 Apr 2025 12:09:47 -0400 Subject: [PATCH 5/6] restore key: null behavior --- src/core/jupyter/jupyter.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/core/jupyter/jupyter.ts b/src/core/jupyter/jupyter.ts index bed0f5846e..dffe4e4ef7 100644 --- a/src/core/jupyter/jupyter.ts +++ b/src/core/jupyter/jupyter.ts @@ -175,6 +175,7 @@ import { jupyterCellSrcAsStr, } from "./jupyter-shared.ts"; import { error } from "../../deno_ral/log.ts"; +import { valid } from "semver/mod.ts"; export const kQuartoMimeType = "quarto_mimetype"; export const kQuartoOutputOrder = "quarto_order"; @@ -922,12 +923,14 @@ export function jupyterCellWithOptions( } }; - const validMetadata: Record = {}; + const validMetadata: Record = {}; for (const key of Object.keys(cell.metadata)) { const value = cell.metadata[key]; let jsonEncodedKeyIndex = 0; if (value !== undefined) { - if (value && typeof value === "object") { + if (!value && typeof value === "object") { + validMetadata[key] = null; + } else if (value && typeof value === "object") { // https://github.com/quarto-dev/quarto-cli/issues/9089 // we need to json-encode this and signal the encoding in the key // we can't use the key as is since it may contain invalid characters From 5bbcc1a22c932e8e1f211b1bfffe2125f74987d1 Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Mon, 7 Apr 2025 12:21:12 -0400 Subject: [PATCH 6/6] don't touch arrays either --- src/core/jupyter/jupyter.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/core/jupyter/jupyter.ts b/src/core/jupyter/jupyter.ts index dffe4e4ef7..a3e5923f79 100644 --- a/src/core/jupyter/jupyter.ts +++ b/src/core/jupyter/jupyter.ts @@ -923,14 +923,17 @@ export function jupyterCellWithOptions( } }; - const validMetadata: Record = {}; + const validMetadata: Record< + string, + string | number | boolean | null | Array + > = {}; for (const key of Object.keys(cell.metadata)) { const value = cell.metadata[key]; let jsonEncodedKeyIndex = 0; if (value !== undefined) { if (!value && typeof value === "object") { validMetadata[key] = null; - } else if (value && typeof value === "object") { + } else if (value && typeof value === "object" && !Array.isArray(value)) { // https://github.com/quarto-dev/quarto-cli/issues/9089 // we need to json-encode this and signal the encoding in the key // we can't use the key as is since it may contain invalid characters @@ -944,7 +947,7 @@ export function jupyterCellWithOptions( ] = JSON.stringify({ key, value }); } else if ( typeof value === "string" || typeof value === "number" || - typeof value === "boolean" + typeof value === "boolean" || Array.isArray(value) ) { validMetadata[key] = value; } else {