diff --git a/package-lock.json b/package-lock.json index 724eb5171c4..d506b92ba76 100644 --- a/package-lock.json +++ b/package-lock.json @@ -17,6 +17,7 @@ "command-line-usage": "^7.0.3", "commonmark": "^0.31.2", "dagre": "^0.8.5", + "diff": "^8.0.3", "gray-matter": "^4.0.3", "joi": "^18.0.1", "lz-string": "^1.5.0", @@ -1799,6 +1800,17 @@ "release-it": "16 || 17 || 18 || 19" } }, + "node_modules/@j-ulrich/release-it-regex-bumper/node_modules/diff": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/diff/-/diff-5.2.2.tgz", + "integrity": "sha512-vtcDfH3TOjP8UekytvnHH1o1P4FcUdt4eQ1Y+Abap1tk/OB2MWQvcwS2ClCd1zuIhc3JKOx6p3kod8Vfys3E+A==", + "dev": true, + "license": "BSD-3-Clause", + "optional": true, + "engines": { + "node": ">=0.3.1" + } + }, "node_modules/@jridgewell/gen-mapping": { "version": "0.3.13", "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", @@ -5113,12 +5125,10 @@ "license": "MIT" }, "node_modules/diff": { - "version": "5.2.2", - "resolved": "https://registry.npmjs.org/diff/-/diff-5.2.2.tgz", - "integrity": "sha512-vtcDfH3TOjP8UekytvnHH1o1P4FcUdt4eQ1Y+Abap1tk/OB2MWQvcwS2ClCd1zuIhc3JKOx6p3kod8Vfys3E+A==", - "dev": true, + "version": "8.0.3", + "resolved": "https://registry.npmjs.org/diff/-/diff-8.0.3.tgz", + "integrity": "sha512-qejHi7bcSD4hQAZE0tNAawRK1ZtafHDmMTMkrrIGgSLl7hTnQHmKCeB45xAcbfTqK2zowkM3j3bHt/4b/ARbYQ==", "license": "BSD-3-Clause", - "optional": true, "engines": { "node": ">=0.3.1" } diff --git a/package.json b/package.json index 33e9e219582..242a0d5da76 100644 --- a/package.json +++ b/package.json @@ -207,6 +207,7 @@ "command-line-usage": "^7.0.3", "commonmark": "^0.31.2", "dagre": "^0.8.5", + "diff": "^8.0.3", "gray-matter": "^4.0.3", "joi": "^18.0.1", "lz-string": "^1.5.0", diff --git a/src/config.ts b/src/config.ts index f46fed7641a..2a560647890 100644 --- a/src/config.ts +++ b/src/config.ts @@ -250,7 +250,7 @@ export const FlowrConfig = { * The default configuration for flowR, used when no config file is found or when a config file is missing some options. * You can use this as a base for your own config and only specify the options you want to change. */ - default(): FlowrConfig { + default(this: void): FlowrConfig { return { ignoreSourceCalls: false, semantics: { @@ -364,7 +364,7 @@ export const FlowrConfig = { /** * Parses the given JSON string as a flowR config file, returning the resulting config object if the parsing and validation were successful, or `undefined` if there was an error. */ - parse(jsonString: string): FlowrConfig | undefined { + parse(this: void, jsonString: string): FlowrConfig | undefined { try { const parsed = JSON.parse(jsonString) as FlowrConfig; const validate = FlowrConfig.Schema.validate(parsed); @@ -383,14 +383,14 @@ export const FlowrConfig = { * Creates a new flowr config that has the updated values. */ // eslint-disable-next-line @typescript-eslint/no-invalid-void-type - amend(config: FlowrConfig, amendmentFunc: (config: DeepWritable) => FlowrConfig | void): FlowrConfig { + amend(this: void, config: FlowrConfig, amendmentFunc: (config: DeepWritable) => FlowrConfig | void): FlowrConfig { const newConfig = FlowrConfig.clone(config); return amendmentFunc(newConfig as DeepWritable) ?? newConfig; }, /** * Clones the given flowr config object. */ - clone(config: FlowrConfig): FlowrConfig { + clone(this: void, config: FlowrConfig): FlowrConfig { return deepClonePreserveUnclonable(config); }, /** @@ -399,7 +399,7 @@ export const FlowrConfig = { * infer the config from flowR's default locations. * This is mostly useful for user-facing features. */ - fromFile(configFile?: string, configWorkingDirectory = process.cwd()): FlowrConfig { + fromFile(this: void, configFile?: string, configWorkingDirectory = process.cwd()): FlowrConfig { try { return loadConfigFromFile(configFile, configWorkingDirectory); } catch(e) { @@ -410,7 +410,7 @@ export const FlowrConfig = { /** * Gets the configuration for the given engine type from the config. */ - getForEngine(config: FlowrConfig, engine: T): EngineConfig & { type: T } | undefined { + getForEngine(this: void, config: FlowrConfig, engine: T): EngineConfig & { type: T } | undefined { const engines = config.engines; if(engines.length > 0) { return engines.find(e => e.type === engine) as EngineConfig & { type: T } | undefined; @@ -429,7 +429,7 @@ export const FlowrConfig = { * console.log(newConfig.solver.variables); // Output: "builtin" * ``` */ - setInConfig(config: FlowrConfig, key: Path, value: PathValue): FlowrConfig { + setInConfig(this: void, config: FlowrConfig, key: Path, value: PathValue): FlowrConfig { const clone = FlowrConfig.clone(config); objectPath.set(clone, key, value); return clone; @@ -438,7 +438,7 @@ export const FlowrConfig = { * Modifies the given config object in place by setting the given value at the given key, where the key is a dot-separated path to the value in the config object. * @see {@link setInConfig} for a version that returns a new config object instead of modifying the given one in place. */ - setInConfigInPlace(config: FlowrConfig, key: Path, value: PathValue): void { + setInConfigInPlace(this: void, config: FlowrConfig, key: Path, value: PathValue): void { objectPath.set(config, key, value); } } as const; diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts index 38e5414a52f..61ac760f984 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts @@ -246,7 +246,7 @@ export function sourceRequest(rootId: NodeId, request: RParseRequest } else { guard(textRequest !== undefined, `Expected text request to be defined for sourced file ${JSON.stringify(request)}`); } - const parsed = (!data.parser.async ? data.parser : new RShellExecutor()).parse(textRequest.r); + const parsed = (!data.parser.async ? data.parser : new RShellExecutor()).parse(textRequest.r, data.ctx); const normalized = (typeof parsed !== 'string' ? normalizeTreeSitter({ files: [{ parsed, filePath: textRequest.path }] }, getId, data.ctx.config) : normalize({ files: [{ parsed, filePath: textRequest.path }] }, getId)) as NormalizedAst; diff --git a/src/documentation/wiki-analyzer.ts b/src/documentation/wiki-analyzer.ts index 160fe363cf3..f1427c3e582 100644 --- a/src/documentation/wiki-analyzer.ts +++ b/src/documentation/wiki-analyzer.ts @@ -39,7 +39,9 @@ import { FlowrAnalyzerPlugin } from '../project/plugins/flowr-analyzer-plugin'; import { FlowrAnalyzerEnvironmentContext } from '../project/context/flowr-analyzer-environment-context'; import { FlowrAnalyzerFunctionsContext } from '../project/context/flowr-analyzer-functions-context'; import { FlowrAnalyzerMetaContext } from '../project/context/flowr-analyzer-meta-context'; +import { FlowrAnalyzerIncrementalAnalysisContext } from '../project/context/flowr-analyzer-incremental-analysis-context'; import { FlowrConfig } from '../config'; +import { FlowrInlineTextFile } from '../project/context/flowr-file'; async function analyzerQuickExample() { const analyzer = await new FlowrAnalyzerBuilder() @@ -99,11 +101,12 @@ ${ 'How to add a new plugin': undefined, }, 'Context Information': { - 'Files Context': undefined, - 'Loading Order Context': undefined, - 'Dependencies Context': undefined, - 'Environment Context': undefined, - 'Meta Context': undefined, + 'Files Context': undefined, + 'Loading Order Context': undefined, + 'Dependencies Context': undefined, + 'Environment Context': undefined, + 'Meta Context': undefined, + 'Incremental Analysis Context': undefined, }, 'Caching': undefined }) @@ -478,6 +481,50 @@ and the project namespace via ${ctx.linkM(FlowrAnalyzerMetaContext, 'getNamespace', { codeFont: true, realNameWrapper: 'i' })}. +${section('Incremental Analysis Context', 3)} + +The ${ctx.link(FlowrAnalyzerIncrementalAnalysisContext)} is a context that stores analysis information needed for making the next analysis run incremental by reusing the previous analysis results: + +${ctx.hierarchy(FlowrAnalyzerIncrementalAnalysisContext, { showImplSnippet: false })} + +This context is not an analysis-result cache by itself. +Instead, it carries forward the minimal state needed by future incremental phases after an invalidation happened. +At the moment, it is used for incremental parsing with Tree-sitter, but it is intended to become the shared context for additional incremental analysis stages as well. + +If the analyzer or context is reset, the incremental information is discarded via +${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'reset', { codeFont: true, realNameWrapper: 'i' })}. +In other words, this context only transports incremental handoff state between analysis runs. + +${section('Incremental Parsing', 4)} + +Currently, the implemented use of this context is Tree-sitter's incremental parsing support. +When a file is represented by a mutable file provider such as ${ctx.link('FlowrInlineTextFile')} and its content is invalidated via +${ctx.linkM(FlowrInlineTextFile, 'invalidate', { codeFont: true, realNameWrapper: 'i' })}, +the analyzer receives a file invalidation event. +At that point, the incremental context only records the file path together with the old source text. +No edit region is computed eagerly during invalidation. + +After a successful parse-oriented analysis run, the analyzer cache stores the latest Tree-sitter parse trees in this context via +${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'storeOldParseResults', { codeFont: true, realNameWrapper: 'i' })}. +This gives the next parse run access to the last completed parse snapshot for each file path. + +On the next parse run, Tree-sitter combines both pieces of information lazily: + +* the previous parse tree obtained from + ${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'getOldParseResultOf', { codeFont: true, realNameWrapper: 'i' })} +* the old source text obtained from + ${ctx.linkM(FlowrAnalyzerIncrementalAnalysisContext, 'getAndRemoveOldContentOf', { codeFont: true, realNameWrapper: 'i' })} + +Using these together with the current file content, flowR computes a minimal ${ctx.link('Parser.Edit')} only when a new parse is actually requested. +If the file content did not change, the previous tree can be reused directly. +Otherwise, the edit is applied to the previous tree and Tree-sitter reparses incrementally instead of starting from scratch. +The stored old-content entry is consumed when it is used, so invalidation state only survives until the next relevant parse. + +${section('Incremental Dataflow', 4)} + +This context is planned to also support future incremental dataflow graph computation. + + ${section('Caching', 2)} To speed up analyses, flowR provides a caching mechanism that stores intermediate results of the analysis. diff --git a/src/project/cache/flowr-analyzer-cache.ts b/src/project/cache/flowr-analyzer-cache.ts index 478e498a8d4..34f0ddd06dc 100644 --- a/src/project/cache/flowr-analyzer-cache.ts +++ b/src/project/cache/flowr-analyzer-cache.ts @@ -1,5 +1,5 @@ -import type { KnownParser } from '../../r-bridge/parser'; -import { type CacheInvalidationEvent, CacheInvalidationEventType, FlowrCache } from './flowr-cache'; +import type { KnownParser, ParseStepOutput } from '../../r-bridge/parser'; +import { type InvalidationEvent, InvalidationEventType, FlowrCache } from './flowr-cache'; import { createDataflowPipeline, type DEFAULT_DATAFLOW_PIPELINE, @@ -18,7 +18,7 @@ import type { FlowrAnalyzerContext } from '../context/flowr-analyzer-context'; import { FlowrAnalyzerControlFlowCache } from './flowr-analyzer-controlflow-cache'; import type { CallGraph } from '../../dataflow/graph/call-graph'; import { computeCallGraph } from '../../dataflow/graph/call-graph'; - +import type { Tree } from 'web-tree-sitter'; interface FlowrAnalyzerCacheOptions { parser: Parser; context: FlowrAnalyzerContext; @@ -56,30 +56,33 @@ export class FlowrAnalyzerCache extends FlowrCache; this.controlFlowCache = new FlowrAnalyzerControlFlowCache(); this.callGraphCache = undefined; + this.computeIfAbsent(true, () => this.pipeline?.getResults(true)); } public static create(data: FlowrAnalyzerCacheOptions): FlowrAnalyzerCache { return new FlowrAnalyzerCache(data); } - public override receive(event: CacheInvalidationEvent): void { + public override receive(event: InvalidationEvent): void { super.receive(event); - switch(event.type) { - case CacheInvalidationEventType.Full: + const type = event.type; + switch(type) { + case InvalidationEventType.Full: + case InvalidationEventType.FileInvalidate: this.initCacheProviders(); break; default: - assertUnreachable(event.type); + assertUnreachable(type); } } private get(): AnalyzerCacheType { /* this will do a ref assignment, so indirect force */ - return this.computeIfAbsent(false, () => this.pipeline.getResults(true)); + return this.computeIfAbsent(false, () => this.pipeline?.getResults(true)); } public reset() { - this.receive({ type: CacheInvalidationEventType.Full }); + this.receive({ type: InvalidationEventType.Full }); } private async runTapeUntil(force: boolean | undefined, until: () => T | undefined): Promise { @@ -92,10 +95,26 @@ export class FlowrAnalyzerCache extends FlowrCache // cast needed because of TypeScript's limited narrowing capabilities + ); + } + } + /** * Get the parse output for the request, parsing if necessary. * @param force - Do not use the cache, instead force a new parse. @@ -112,7 +131,7 @@ export class FlowrAnalyzerCache extends FlowrCache['parse']> | undefined { - return this.get().parse; + return this.get()?.parse; } /** @@ -131,7 +150,7 @@ export class FlowrAnalyzerCache extends FlowrCache['normalize']> | undefined { - return this.get().normalize; + return this.get()?.normalize; } /** @@ -150,7 +169,7 @@ export class FlowrAnalyzerCache extends FlowrCache['dataflow']> | undefined { - return this.get().dataflow; + return this.get()?.dataflow; } /** diff --git a/src/project/cache/flowr-cache.ts b/src/project/cache/flowr-cache.ts index ef10fcd4839..fdf58a5bbbf 100644 --- a/src/project/cache/flowr-cache.ts +++ b/src/project/cache/flowr-cache.ts @@ -1,37 +1,52 @@ import { assertUnreachable } from '../../util/assert'; +import type { StringableContent } from '../context/flowr-file'; -export const enum CacheInvalidationEventType { - Full = 'full' +export const enum InvalidationEventType { + Full = 'full', + FileInvalidate = 'file-invalidate', } -export type CacheInvalidationEvent = - { type: CacheInvalidationEventType.Full }; -export interface CacheInvalidationEventReceiver { - receive(event: CacheInvalidationEvent): void +export interface FileContentInvalidateEvent { + readonly type: InvalidationEventType.FileInvalidate; + readonly oldContent: Content | undefined; + readonly filePath: string; +} + +export type InvalidationEvent = + { type: InvalidationEventType.Full } + | FileContentInvalidateEvent; + + +export type InvalidationEventHandler = (event: InvalidationEvent) => void; + +export interface InvalidationEventReceiver { + receive: InvalidationEventHandler } /** * Central class for caching analysis results in FlowR. */ -export abstract class FlowrCache implements CacheInvalidationEventReceiver { +export abstract class FlowrCache implements InvalidationEventReceiver { private value: Cache | undefined = undefined; - private dependents: CacheInvalidationEventReceiver[] = []; + private dependents: InvalidationEventReceiver[] = []; - public registerDependent(dependent: CacheInvalidationEventReceiver) { + public registerDependent(dependent: InvalidationEventReceiver) { this.dependents.push(dependent); } - public removeDependent(dependent: CacheInvalidationEventReceiver) { + public removeDependent(dependent: InvalidationEventReceiver) { this.dependents = this.dependents.filter(d => d !== dependent); } - receive(event: CacheInvalidationEvent): void { + receive(event: InvalidationEvent): void { + const type = event.type; /* we will update this as soon as we support incremental update patterns */ - switch(event.type) { - case CacheInvalidationEventType.Full: + switch(type) { + case InvalidationEventType.Full: + case InvalidationEventType.FileInvalidate: this.value = undefined; break; default: - assertUnreachable(event.type); + assertUnreachable(type); } /* in the future we want to defer this *after* the dataflow is re-computed, then all receivers can decide whether they need to update */ this.notifyDependents(event); @@ -40,7 +55,7 @@ export abstract class FlowrCache implements CacheInvalidationEventReceive /** * Notify all dependents of a cache invalidation event. */ - public notifyDependents(event: CacheInvalidationEvent) { + public notifyDependents(event: InvalidationEvent) { for(const dependent of this.dependents) { dependent.receive(event); } diff --git a/src/project/context/flowr-analyzer-context.ts b/src/project/context/flowr-analyzer-context.ts index fe0a029d698..922629fd765 100644 --- a/src/project/context/flowr-analyzer-context.ts +++ b/src/project/context/flowr-analyzer-context.ts @@ -31,6 +31,18 @@ import { FlowrAnalyzerEnvironmentContext } from './flowr-analyzer-environment-co import type { ReadOnlyFlowrAnalyzerMetaContext } from './flowr-analyzer-meta-context'; import { FlowrAnalyzerMetaContext } from './flowr-analyzer-meta-context'; import type { FlowrAnalyzer } from '../flowr-analyzer'; +import type { + ReadOnlyFlowrAnalyzerIncrementalAnalysisContext +} from './flowr-analyzer-incremental-analysis-context'; +import { + FlowrAnalyzerIncrementalAnalysisContext +} from './flowr-analyzer-incremental-analysis-context'; +import type { + InvalidationEvent, + InvalidationEventReceiver } from '../cache/flowr-cache'; +import { + InvalidationEventType +} from '../cache/flowr-cache'; /** * This is a read-only interface to the {@link FlowrAnalyzerContext}. @@ -54,6 +66,10 @@ export interface ReadOnlyFlowrAnalyzerContext { * The environment context provides access to the environment information used during analysis. */ readonly env: ReadOnlyFlowrAnalyzerEnvironmentContext; + /** + * The incremental context provides potential information for the next incremental analysis run + */ + readonly inc: ReadOnlyFlowrAnalyzerIncrementalAnalysisContext; /** * The configuration options used by the analyzer. */ @@ -76,11 +92,12 @@ export interface ReadOnlyFlowrAnalyzerContext { * {@link deps.getDependency}. * If you are just interested in inspecting the context, you can use {@link ReadOnlyFlowrAnalyzerContext} instead (e.g., via {@link inspect}). */ -export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext { +export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext, InvalidationEventReceiver { public readonly meta: FlowrAnalyzerMetaContext; public readonly files: FlowrAnalyzerFilesContext; public readonly deps: FlowrAnalyzerDependenciesContext; public readonly env: FlowrAnalyzerEnvironmentContext; + public readonly inc: FlowrAnalyzerIncrementalAnalysisContext; private _analyzer: FlowrAnalyzer | undefined; public readonly config: FlowrConfig; @@ -88,11 +105,12 @@ export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext { constructor(config: FlowrConfig, plugins: ReadonlyMap) { this.config = config; const loadingOrder = new FlowrAnalyzerLoadingOrderContext(this, plugins.get(PluginType.LoadingOrder) as FlowrAnalyzerLoadingOrderPlugin[]); - this.files = new FlowrAnalyzerFilesContext(loadingOrder, (plugins.get(PluginType.ProjectDiscovery) ?? []) as FlowrAnalyzerProjectDiscoveryPlugin[], + this.files = new FlowrAnalyzerFilesContext(this, loadingOrder, (plugins.get(PluginType.ProjectDiscovery) ?? []) as FlowrAnalyzerProjectDiscoveryPlugin[], (plugins.get(PluginType.FileLoad) ?? []) as FlowrAnalyzerFilePlugin[]); - this.env = new FlowrAnalyzerEnvironmentContext(this); + this.env = new FlowrAnalyzerEnvironmentContext(this); + this.inc = new FlowrAnalyzerIncrementalAnalysisContext(this); const functions = new FlowrAnalyzerFunctionsContext(this); - this.deps = new FlowrAnalyzerDependenciesContext(functions, (plugins.get(PluginType.DependencyIdentification) ?? []) as FlowrAnalyzerPackageVersionsPlugin[]); + this.deps = new FlowrAnalyzerDependenciesContext(functions, (plugins.get(PluginType.DependencyIdentification) ?? []) as FlowrAnalyzerPackageVersionsPlugin[]); this.meta = new FlowrAnalyzerMetaContext(); } @@ -141,9 +159,14 @@ export class FlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerContext { * Reset the context to its initial state, e.g., removing all files, dependencies, and loading orders. */ public reset(): void { - this.files.reset(); - this.deps.reset(); - this.meta.reset(); + this.receive( { type: InvalidationEventType.Full }); + } + + receive(event: InvalidationEvent): void { + this.meta.receive(event); + this.files.receive(event); + this.deps.receive(event); + this.inc.receive(event); } } diff --git a/src/project/context/flowr-analyzer-dependencies-context.ts b/src/project/context/flowr-analyzer-dependencies-context.ts index 6470b33e0de..34c9a0a647b 100644 --- a/src/project/context/flowr-analyzer-dependencies-context.ts +++ b/src/project/context/flowr-analyzer-dependencies-context.ts @@ -4,6 +4,9 @@ import { } from '../plugins/package-version-plugins/flowr-analyzer-package-versions-plugin'; import type { Package } from '../plugins/package-version-plugins/package'; import type { FlowrAnalyzerFunctionsContext, ReadOnlyFlowrAnalyzerFunctionsContext } from './flowr-analyzer-functions-context'; +import type { InvalidationEvent, InvalidationEventReceiver } from '../cache/flowr-cache'; +import { InvalidationEventType } from '../cache/flowr-cache'; +import { assertUnreachable } from '../../util/assert'; /** * This is a read-only interface to the {@link FlowrAnalyzerDependenciesContext}. @@ -39,7 +42,7 @@ export interface ReadOnlyFlowrAnalyzerDependenciesContext { * * If you are interested in inspecting these dependencies, refer to {@link ReadOnlyFlowrAnalyzerDependenciesContext}. */ -export class FlowrAnalyzerDependenciesContext extends AbstractFlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerDependenciesContext { +export class FlowrAnalyzerDependenciesContext extends AbstractFlowrAnalyzerContext implements ReadOnlyFlowrAnalyzerDependenciesContext, InvalidationEventReceiver { public readonly name = 'flowr-analyzer-dependencies-context'; public readonly functionsContext: FlowrAnalyzerFunctionsContext; @@ -52,6 +55,20 @@ export class FlowrAnalyzerDependenciesContext extends AbstractFlowrAnalyzerConte this.staticsLoaded = false; } + receive(event: InvalidationEvent): void { + const type = event.type; + switch(type) { + case InvalidationEventType.Full: + this.reset(); + break; + case InvalidationEventType.FileInvalidate: + // nothing to do + break; + default: + assertUnreachable(type); + } + } + public constructor(functionsContext: FlowrAnalyzerFunctionsContext, plugins?: readonly FlowrAnalyzerPackageVersionsPlugin[]) { super(functionsContext.getAttachedContext(), FlowrAnalyzerPackageVersionsPlugin.defaultPlugin(), plugins); this.functionsContext = functionsContext; diff --git a/src/project/context/flowr-analyzer-files-context.ts b/src/project/context/flowr-analyzer-files-context.ts index 0476ab141c6..aae1af13fe2 100644 --- a/src/project/context/flowr-analyzer-files-context.ts +++ b/src/project/context/flowr-analyzer-files-context.ts @@ -4,7 +4,7 @@ import type { RParseRequest, RParseRequestFromFile } from '../../r-bridge/retriever'; import { isParseRequest } from '../../r-bridge/retriever'; -import { guard } from '../../util/assert'; +import { assertUnreachable, guard } from '../../util/assert'; import type { FlowrAnalyzerLoadingOrderContext, ReadOnlyFlowrAnalyzerLoadingOrderContext @@ -20,6 +20,11 @@ import fs from 'fs'; import path from 'path'; import type { FlowrNewsFile } from '../plugins/file-plugins/files/flowr-news-file'; import type { FlowrNamespaceFile } from '../plugins/file-plugins/files/flowr-namespace-file'; +import { FlowrAnalyzer } from '../flowr-analyzer'; +import type { FlowrAnalyzerContext } from './flowr-analyzer-context'; +import type { InvalidationEvent, InvalidationEventReceiver } from '../cache/flowr-cache'; +import { InvalidationEventType } from '../cache/flowr-cache'; + const fileLog = log.getSubLogger({ name: 'flowr-analyzer-files-context' }); @@ -121,7 +126,7 @@ export interface ReadOnlyFlowrAnalyzerFilesContext { * If you are interested in inspecting these files, refer to {@link ReadOnlyFlowrAnalyzerFilesContext}. * Plugins, however, can use this context directly to modify files. */ -export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext)[], FlowrAnalyzerProjectDiscoveryPlugin> implements ReadOnlyFlowrAnalyzerFilesContext { +export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext)[], FlowrAnalyzerProjectDiscoveryPlugin> implements ReadOnlyFlowrAnalyzerFilesContext, InvalidationEventReceiver { public readonly name = 'flowr-analyzer-files-context'; public readonly loadingOrder: FlowrAnalyzerLoadingOrderContext; @@ -129,6 +134,7 @@ export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext = new Map(); private inlineFiles: FlowrFileProvider[] = []; private readonly fileLoaders: readonly FlowrAnalyzerFilePlugin[]; + private readonly context: FlowrAnalyzerContext; /** these are all the paths of files that have been considered by the dataflow graph (even if not added) */ private readonly consideredFiles: string[] = []; @@ -136,11 +142,13 @@ export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext(Object.values(FileRole).map(k => [k, []])) as RoleBasedFiles; constructor( + context: FlowrAnalyzerContext, loadingOrder: FlowrAnalyzerLoadingOrderContext, plugins: readonly FlowrAnalyzerProjectDiscoveryPlugin[], fileLoaders: readonly FlowrAnalyzerFilePlugin[] ) { super(loadingOrder.getAttachedContext(), FlowrAnalyzerProjectDiscoveryPlugin.defaultPlugin(), plugins); + this.context = context; this.fileLoaders = [...fileLoaders, FlowrAnalyzerFilePlugin.defaultPlugin()]; this.loadingOrder = loadingOrder; } @@ -153,6 +161,20 @@ export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext(Object.values(FileRole).map(k => [k, []])) as RoleBasedFiles; } + receive(event: InvalidationEvent): void { + const type = event.type; + switch(type) { + case InvalidationEventType.Full: + this.reset(); + break; + case InvalidationEventType.FileInvalidate: + // nothing to do + break; + default: + assertUnreachable(type); + } + } + /** * Record that a file has been considered during dataflow analysis. */ @@ -211,6 +233,10 @@ export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext { + this.context.analyzer?.receive(c); + }); + if(f.path() === FlowrFile.INLINE_PATH) { this.inlineFiles.push(f); } else { @@ -348,4 +374,8 @@ export class FlowrAnalyzerFilesContext extends AbstractFlowrAnalyzerContext = new Map(); + private oldParseResults: Map = new Map(); + + + constructor(context: FlowrAnalyzerContext) { + this.context = context; + } + + public reset(): void { + this.changedFilesWithOldContent = new Map(); + this.oldParseResults = new Map(); + } + + handleFileInvalidate(filePath: FilePath, oldContent: string): void { + if(this.changedFilesWithOldContent.has(filePath)) { + // If a file is changed multiple times since the last analysis, we only want to store the original old content as the old analysis results were computed with that. + return; + } + + this.changedFilesWithOldContent.set(filePath, oldContent); + } + + receive(event: InvalidationEvent): void { + const type = event.type; + switch(type) { + case InvalidationEventType.Full: + this.reset(); + break; + case InvalidationEventType.FileInvalidate: + this.handleFileInvalidate(event.filePath, event.oldContent?.toString() ?? ''); + break; + default: + assertUnreachable(type); + } + } + + public storeOldParseResults(parseStepOutput: ParseStepOutput): void { + for(const parsedStepSingleOutput of parseStepOutput.files) { + if(parsedStepSingleOutput.filePath === undefined) { + // there could be multiple files without a file path, making a distinction impossible + continue; + } + + this.oldParseResults.set(parsedStepSingleOutput.filePath, parsedStepSingleOutput.parsed); + } + } + + public getOldParseResultOf(filePath: FilePath): Parser.Tree | undefined { + return this.oldParseResults.get(filePath); + } + + public getAndRemoveOldContentOf(filePath: FilePath): string | undefined { + const oldContent = this.changedFilesWithOldContent.get(filePath); + this.changedFilesWithOldContent.delete(filePath); + return oldContent; + } +} diff --git a/src/project/context/flowr-analyzer-meta-context.ts b/src/project/context/flowr-analyzer-meta-context.ts index 01913b05a81..38eb9a4d0bb 100644 --- a/src/project/context/flowr-analyzer-meta-context.ts +++ b/src/project/context/flowr-analyzer-meta-context.ts @@ -1,4 +1,7 @@ import type { SemVer } from 'semver'; +import type { InvalidationEvent, InvalidationEventReceiver } from '../cache/flowr-cache'; +import { InvalidationEventType } from '../cache/flowr-cache'; +import { assertUnreachable } from '../../util/assert'; export interface ReadOnlyFlowrAnalyzerMetaContext { @@ -30,7 +33,7 @@ export interface ReadOnlyFlowrAnalyzerMetaContext { * * If you are interested in inspecting this metadata, refer to {@link ReadOnlyFlowrAnalyzerMetaContext}. */ -export class FlowrAnalyzerMetaContext implements ReadOnlyFlowrAnalyzerMetaContext { +export class FlowrAnalyzerMetaContext implements ReadOnlyFlowrAnalyzerMetaContext, InvalidationEventReceiver { public readonly name = 'flowr-analyzer-meta-context'; private projectName: string | undefined; private projectTitle: string | undefined; @@ -44,6 +47,20 @@ export class FlowrAnalyzerMetaContext implements ReadOnlyFlowrAnalyzerMetaContex this.namespace = undefined; } + receive(event: InvalidationEvent): void { + const type = event.type; + switch(type) { + case InvalidationEventType.Full: + this.reset(); + break; + case InvalidationEventType.FileInvalidate: + // nothing to do + break; + default: + assertUnreachable(type); + } + } + public setProjectName(name: string): void { this.projectName = name; } diff --git a/src/project/context/flowr-file.ts b/src/project/context/flowr-file.ts index bd0aacb5fdb..137dc12473f 100644 --- a/src/project/context/flowr-file.ts +++ b/src/project/context/flowr-file.ts @@ -1,6 +1,8 @@ import type { PathLike } from 'fs'; import fs from 'fs'; import type { RParseRequest } from '../../r-bridge/retriever'; +import type { InvalidationEventHandler } from '../cache/flowr-cache'; +import { InvalidationEventType } from '../cache/flowr-cache'; /** * Just a readable alias for file paths, mostly for documentation purposes. @@ -41,6 +43,8 @@ export enum FileRole { export type StringableContent = { toString(): string }; + + /** * This is the basic interface for all files known to the FlowrAnalyzer. * You can implement this interface to provide custom file loading mechanisms. @@ -79,6 +83,21 @@ export interface FlowrFileProvider): void; + + /** + * Remove a previously added callback + */ + removeOnInvalidate(callback: InvalidationEventHandler): void; + + /** + * Reload the file content because something has changed. + */ + invalidate(): void; } /** @@ -90,6 +109,7 @@ export abstract class FlowrFile[] = []; public static readonly INLINE_PATH = '@inline'; public constructor(filePath: PathLike, roles?: readonly FileRole[]) { @@ -145,6 +165,22 @@ export abstract class FlowrFile): void { + this.onInvalidate.push(callback); + } + + public removeOnInvalidate(callback: InvalidationEventHandler): void { + this.onInvalidate = this.onInvalidate.filter(cb => cb !== callback); + } + + public invalidate(): void { + const oldContent = this.contentCache; + this.contentCache = undefined; + for(const invalidator of this.onInvalidate) { + invalidator({ type: InvalidationEventType.FileInvalidate, oldContent, filePath: this.path() }); + } + } } /** @@ -162,7 +198,7 @@ export class FlowrTextFile extends FlowrFile { * These will be handled by the {@link FlowrAnalyzerDescriptionFilePlugin} (e.g., by using the {@link FlowrDescriptionFile#from} method decorator). */ export class FlowrInlineTextFile extends FlowrFile { - private readonly contentStr: string; + private contentStr: string; constructor(path: PathLike, content: string) { super(path); @@ -172,4 +208,13 @@ export class FlowrInlineTextFile extends FlowrFile { protected loadContent(): string { return this.contentStr; } + + /** + * Update the content of this inline file and invalidate the cache to trigger updates in the analysis. + * @see {@link FlowrFile#invalidate} + */ + public updateInlineContent(newContent: string): void { + this.contentStr = newContent; + this.invalidate(); + } } diff --git a/src/project/flowr-analyzer-builder.ts b/src/project/flowr-analyzer-builder.ts index 46dc0609b76..6c8f56e0889 100644 --- a/src/project/flowr-analyzer-builder.ts +++ b/src/project/flowr-analyzer-builder.ts @@ -79,6 +79,9 @@ export class FlowrAnalyzerBuilder { } // we have a type safe export to ease auto-completion + /** + * Set a specific value in the configuration used by the resulting analyzer. + */ public configure>( key: K, value: PathValue diff --git a/src/project/flowr-analyzer.ts b/src/project/flowr-analyzer.ts index a5536824274..b73c7b83e1d 100644 --- a/src/project/flowr-analyzer.ts +++ b/src/project/flowr-analyzer.ts @@ -21,6 +21,7 @@ import type { CallGraph } from '../dataflow/graph/call-graph'; import type { Tree } from 'web-tree-sitter'; import { normalizeTreeSitterTreeToAst } from '../r-bridge/lang-4.x/tree-sitter/tree-sitter-normalize'; import { TreeSitterExecutor } from '../r-bridge/lang-4.x/tree-sitter/tree-sitter-executor'; +import type { InvalidationEvent } from './cache/flowr-cache'; /** * Extends the {@link ReadonlyFlowrAnalysisProvider} with methods that allow modifying the analyzer state. @@ -49,6 +50,11 @@ export interface FlowrAnalysisProvider * Reset the analyzer state, including the context and the cache. */ reset(): void; + + /** + * Receive cache invalidation events from the cache and propagate them to the context and other relevant components. + */ + receive(event: InvalidationEvent): void } /** @@ -203,13 +209,19 @@ export class FlowrAnalyzer implements this.cache.reset(); } + public receive(event: InvalidationEvent): void { + this.ctx.receive(event); + this.cache.receive(event); + } + public parseStandalone(data: `${typeof fileProtocol}${string}` | string | RParseRequest): Tree { + console.log('parseStandalone'); const request = isParseRequest(data) ? data : requestFromInput(data); if(this.parser.name === 'tree-sitter') { - return this.parser.parse(request); + return this.parser.parse(request, this.ctx); } else { const ts = new TreeSitterExecutor(); - return ts.parse(request); + return ts.parse(request, this.ctx); } } diff --git a/src/project/incremental/incremental-parse/edit-computation.ts b/src/project/incremental/incremental-parse/edit-computation.ts new file mode 100644 index 00000000000..2ad92b406b7 --- /dev/null +++ b/src/project/incremental/incremental-parse/edit-computation.ts @@ -0,0 +1,63 @@ +import type Parser from 'web-tree-sitter'; + + +/** + * Computes a single minimal change region ({@link Parser.Edit}) that contains all modifications. + * @param oldContent - The original content. + * @param newContent - The changed content. + */ +export function computeEditRegion(oldContent: string, newContent: string): Parser.Edit { + const oldLen = oldContent.length; + const newLen = newContent.length; + + // 1) Longest common prefix + let startIndex = 0; + while( + startIndex < oldLen && + startIndex < newLen && + oldContent[startIndex] === newContent[startIndex] + ) { + startIndex++; + } + + // 2) Longest common suffix, without overlapping the prefix + let oldSuffixIndex = oldLen; + let newSuffixIndex = newLen; + while( + oldSuffixIndex > startIndex && + newSuffixIndex > startIndex && + oldContent[oldSuffixIndex - 1] === newContent[newSuffixIndex - 1] + ) { + oldSuffixIndex--; + newSuffixIndex--; + } + + const oldEndIndex = oldSuffixIndex; + const newEndIndex = newSuffixIndex; + + return { + startIndex, + oldEndIndex, + newEndIndex, + startPosition: indexToPoint(oldContent, startIndex), + oldEndPosition: indexToPoint(oldContent, oldEndIndex), + newEndPosition: indexToPoint(newContent, newEndIndex), + }; +} + + +function indexToPoint(text: string, index: number): Parser.Point { + let row = 0; + let column = 0; + + for(let i = 0; i < index; i++) { + if(text[i] === '\n') { + row++; + column = 0; + } else { + column++; + } + } + + return { row, column }; +} \ No newline at end of file diff --git a/src/project/incremental/incremental-parse/incremental-parse.ts b/src/project/incremental/incremental-parse/incremental-parse.ts new file mode 100644 index 00000000000..2ca1738c418 --- /dev/null +++ b/src/project/incremental/incremental-parse/incremental-parse.ts @@ -0,0 +1,47 @@ +import type { FlowrAnalyzerContext } from '../../context/flowr-analyzer-context'; +import type Parser from 'web-tree-sitter'; +import type { FilePath } from '../../context/flowr-file'; +import { computeEditRegion } from './edit-computation'; + + +export interface ReparseInfo { + readonly previousTree: Parser.Tree; + readonly editRegion: Parser.Edit | undefined; +} + + +/** + * Computes the information needed to reparse a file incrementally with tree-sitter. + * Returns `undefined` if incremental reparsing is not possible. + */ +export function computeReparseInfo(ctx: FlowrAnalyzerContext, filePath: FilePath): ReparseInfo | undefined { + const previousTree = ctx.inc.getOldParseResultOf(filePath); + if(!previousTree) { + // this file was not parsed before + return undefined; + } + + const oldContent = ctx.inc.getAndRemoveOldContentOf(filePath); + if(oldContent === undefined) { + // this file has not been invalidated since the last parse, no reparse needed + return { + previousTree, + editRegion: undefined + }; + } + + const newContent = ctx.files.getFile(filePath)?.content().toString() ?? ''; + if(newContent === oldContent) { + // this file was invalidated, but the content did not change, no reparse needed + return { + previousTree, + editRegion: undefined + }; + } + + const editRegion = computeEditRegion(oldContent, newContent); + return { + previousTree, + editRegion + }; +} \ No newline at end of file diff --git a/src/project/plugins/file-plugins/files/flowr-jupyter-file.ts b/src/project/plugins/file-plugins/files/flowr-jupyter-file.ts index 57f04dbf9b8..fb31b6ce38a 100644 --- a/src/project/plugins/file-plugins/files/flowr-jupyter-file.ts +++ b/src/project/plugins/file-plugins/files/flowr-jupyter-file.ts @@ -7,7 +7,7 @@ import { FileRole, FlowrFile } from '../../../context/flowr-file'; * This decorates a text file and parses its contents as a Jupyter file. * Finnaly, it provides access to the single cells, and all cells fused together as one R file. */ -export class FlowrJupyterFile extends FlowrFile { +export class FlowrJupyterFile extends FlowrFile { private readonly wrapped: FlowrFileProvider; /** diff --git a/src/project/plugins/file-plugins/files/flowr-rmarkdown-file.ts b/src/project/plugins/file-plugins/files/flowr-rmarkdown-file.ts index f201afcef6f..531e5566aae 100644 --- a/src/project/plugins/file-plugins/files/flowr-rmarkdown-file.ts +++ b/src/project/plugins/file-plugins/files/flowr-rmarkdown-file.ts @@ -10,7 +10,7 @@ import { log } from '../../../../util/log'; * This decorates a text file and parses its contents as a R Markdown file. * Finnaly, it provides access to the single cells, and all cells fused together as one R file. */ -export class FlowrRMarkdownFile extends FlowrFile { +export class FlowrRMarkdownFile extends FlowrFile { private data?: RmdInfo; private readonly wrapped: FlowrFileProvider; diff --git a/src/project/plugins/file-plugins/files/flowr-sweave-file.ts b/src/project/plugins/file-plugins/files/flowr-sweave-file.ts index 9f03459a83e..4af0d895a47 100644 --- a/src/project/plugins/file-plugins/files/flowr-sweave-file.ts +++ b/src/project/plugins/file-plugins/files/flowr-sweave-file.ts @@ -7,7 +7,7 @@ import { guard } from '../../../../util/assert'; * Finally, it provides access to the single cells, and all cells fused together as one R file. * So far, this does *not* support `\Sexpr` calls. */ -export class FlowrSweaveFile extends FlowrFile { +export class FlowrSweaveFile extends FlowrFile { private readonly wrapped: FlowrFileProvider; private data?: SweaveInfo; diff --git a/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts b/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts index 946150a52c3..57aae3e13d1 100644 --- a/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts +++ b/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts @@ -6,6 +6,8 @@ import type { TreeSitterEngineConfig } from '../../../config'; import { log } from '../../../util/log'; import fs from 'fs'; import type { ReadonlyFlowrAnalysisProvider } from '../../../project/flowr-analyzer'; +import type { FlowrAnalyzerContext } from '../../../project/context/flowr-analyzer-context'; +import { computeReparseInfo } from '../../../project/incremental/incremental-parse/incremental-parse'; export const DEFAULT_TREE_SITTER_R_WASM_PATH = './node_modules/@eagleoutice/tree-sitter-r/tree-sitter-r.wasm'; export const DEFAULT_TREE_SITTER_WASM_PATH = './node_modules/web-tree-sitter/tree-sitter.wasm'; @@ -16,10 +18,10 @@ const wasmLog = log.getSubLogger({ name: 'tree-sitter-wasm' }); * Synchronous and (way) faster alternative to the {@link RShell} using tree-sitter. */ export class TreeSitterExecutor implements SyncParser { - public readonly name = 'tree-sitter'; private readonly parser: Parser; private static language: Parser.Language; + public incremental = true; /** * Initializes the underlying tree-sitter parser. This only needs to be called once globally. @@ -69,14 +71,31 @@ export class TreeSitterExecutor implements SyncParser { return this.parser.getLanguage().version; } - public parse(request: RParseRequest): Parser.Tree { + public parse(request: RParseRequest & { filePath?: string }, ctx: FlowrAnalyzerContext): Parser.Tree { let sourceCode: string; if(request.request === 'file') { sourceCode = fs.readFileSync(request.content, 'utf8'); } else { sourceCode = request.content; } - return this.parser.parse(sourceCode); + + if(request.filePath === undefined) { + return this.parser.parse(sourceCode); + } + + const reparseInfo = computeReparseInfo(ctx, request.filePath); + if(!reparseInfo) { + // incremental parsing not possible + return this.parser.parse(sourceCode); + } + + if(!reparseInfo.editRegion) { + return reparseInfo.previousTree; + } + + const previousTree = reparseInfo.previousTree; + previousTree.edit(reparseInfo.editRegion); + return this.parser.parse(sourceCode, previousTree); } public createQuery(source: string): Query { diff --git a/src/r-bridge/parser.ts b/src/r-bridge/parser.ts index 1750e437800..fa8f4733c6f 100644 --- a/src/r-bridge/parser.ts +++ b/src/r-bridge/parser.ts @@ -7,9 +7,18 @@ import type { FlowrAnalysisProvider } from '../project/flowr-analyzer'; import type { FlowrAnalyzerContext } from '../project/context/flowr-analyzer-context'; interface ParserContent { - readonly name: string; + readonly name: string; + /** + * Whether the parser has incremental parsing capabilities + */ + readonly incremental: boolean; information(analyzer: FlowrAnalysisProvider): BaseParserInformation; - parse(request: RParseRequestFromText): T; + + /** + * Parses the given request and uses the provided context (only if the parser + * itself supports incrementality {@link ParserContent#incremental}). + */ + parse(request: RParseRequestFromText & { filePath?: string }, inc: FlowrAnalyzerContext | undefined): T; close(): void; } @@ -93,7 +102,8 @@ function countChildren(node: SyntaxNode): number { */ export async function parseRequests(_results: unknown, input: Partial>): Promise> { - const loadingOrder = (input.context as FlowrAnalyzerContext).files.loadingOrder.getLoadingOrder(); + const ctx = input.context as FlowrAnalyzerContext; + const loadingOrder = ctx.files.loadingOrder.getLoadingOrder(); /* in the future, we want to expose all cases */ const translatedRequests = loadingOrder.map(r => (input.context as FlowrAnalyzerContext).files.resolveRequest(r)); @@ -101,7 +111,7 @@ Promise> { /* sadly we cannot Promise.all with the Rshell as it has to process commands in order and is not thread safe */ const files: ParseStepOutputSingleFile[] = []; for(const req of translatedRequests) { - const parsed = await (input.parser).parse(req.r); + const parsed = await (input.parser).parse(req.r, ctx); files.push({ parsed, filePath: req.path, @@ -115,7 +125,9 @@ Promise> { const p = input.parser as SyncParser; return { files: translatedRequests.map(r => { - const parsed = p.parse(r.r); + const withPath: RParseRequestFromText & { filePath?: string } = r.r; + withPath.filePath = r.path; + const parsed = p.parse(withPath, ctx); return { parsed, filePath: r.path, diff --git a/src/r-bridge/shell-executor.ts b/src/r-bridge/shell-executor.ts index 625c352f2d9..d19824b45b1 100644 --- a/src/r-bridge/shell-executor.ts +++ b/src/r-bridge/shell-executor.ts @@ -23,6 +23,7 @@ const executorLog = log.getSubLogger({ name: 'RShellExecutor' }); */ export class RShellExecutor implements SyncParser { public readonly name = 'r-shell'; + public readonly incremental = false; public readonly options: Readonly; private readonly prerequisites: string[]; diff --git a/src/r-bridge/shell.ts b/src/r-bridge/shell.ts index 46fe5352d45..e2864253d15 100644 --- a/src/r-bridge/shell.ts +++ b/src/r-bridge/shell.ts @@ -141,10 +141,10 @@ export function getDefaultRShellOptions(config?: RShellEngineConfig): RShellOpti * (leaving this as a legacy mode :D) */ export class RShell implements AsyncParser { - public readonly name = 'r-shell'; public readonly async = true; public readonly options: Readonly; + public readonly incremental = false; private session: RShellSession; private readonly log: Logger; private versionCache: SemVer | null = null; diff --git a/test/functionality/incremental/edit-computation.test.ts b/test/functionality/incremental/edit-computation.test.ts new file mode 100644 index 00000000000..167343a4372 --- /dev/null +++ b/test/functionality/incremental/edit-computation.test.ts @@ -0,0 +1,91 @@ +import { describe, expect, it } from 'vitest'; +import { computeEditRegion } from '../../../src/project/incremental/incremental-parse/edit-computation'; + +describe('computeEdit', () => { + it('returns a no-op edit for identical content', () => { + const result = computeEditRegion('abc', 'abc'); + + expect(result.startIndex).toEqual(result.oldEndIndex); + expect(result.startIndex).toEqual(result.newEndIndex); + + expect(result.startPosition).toEqual(result.oldEndPosition); + expect(result.startPosition).toEqual(result.newEndPosition); + }); + + it('detects an insertion in the middle', () => { + expect(computeEditRegion('abef', 'abcdef')).toEqual({ + startIndex: 2, + oldEndIndex: 2, + newEndIndex: 4, + startPosition: { row: 0, column: 2 }, + oldEndPosition: { row: 0, column: 2 }, + newEndPosition: { row: 0, column: 4 }, + }); + }); + + it('detects a deletion in the middle', () => { + expect(computeEditRegion('abcdef', 'abef')).toEqual({ + startIndex: 2, + oldEndIndex: 4, + newEndIndex: 2, + startPosition: { row: 0, column: 2 }, + oldEndPosition: { row: 0, column: 4 }, + newEndPosition: { row: 0, column: 2 }, + }); + }); + + it('detects a replacement in the middle', () => { + expect(computeEditRegion('abcdef', 'abXYef')).toEqual({ + startIndex: 2, + oldEndIndex: 4, + newEndIndex: 4, + startPosition: { row: 0, column: 2 }, + oldEndPosition: { row: 0, column: 4 }, + newEndPosition: { row: 0, column: 4 }, + }); + }); + + it('detects an insertion at the beginning', () => { + expect(computeEditRegion('world', 'hello world')).toEqual({ + startIndex: 0, + oldEndIndex: 0, + newEndIndex: 6, + startPosition: { row: 0, column: 0 }, + oldEndPosition: { row: 0, column: 0 }, + newEndPosition: { row: 0, column: 6 }, + }); + }); + + it('detects a replacement of the whole content', () => { + expect(computeEditRegion('abc', 'xyz')).toEqual({ + startIndex: 0, + oldEndIndex: 3, + newEndIndex: 3, + startPosition: { row: 0, column: 0 }, + oldEndPosition: { row: 0, column: 3 }, + newEndPosition: { row: 0, column: 3 }, + }); + }); + + it('computes row/column positions correctly for multi-line edits', () => { + expect(computeEditRegion('a\nbc\ndef', 'a\nXY\ndef')).toEqual({ + startIndex: 2, + oldEndIndex: 4, + newEndIndex: 4, + startPosition: { row: 1, column: 0 }, + oldEndPosition: { row: 1, column: 2 }, + newEndPosition: { row: 1, column: 2 }, + }); + }); + + it('does not let suffix matching overlap with the prefix', () => { + expect(computeEditRegion('aaa', 'aa')).toEqual({ + startIndex: 2, + oldEndIndex: 3, + newEndIndex: 2, + startPosition: { row: 0, column: 2 }, + oldEndPosition: { row: 0, column: 3 }, + newEndPosition: { row: 0, column: 2 }, + }); + }); +}); \ No newline at end of file diff --git a/test/functionality/incremental/incremental-parsing.test.ts b/test/functionality/incremental/incremental-parsing.test.ts new file mode 100644 index 00000000000..18689a4d8fc --- /dev/null +++ b/test/functionality/incremental/incremental-parsing.test.ts @@ -0,0 +1,872 @@ +import { assert, describe, expect, it, vi } from 'vitest'; +import { FlowrAnalyzerBuilder } from '../../../src/project/flowr-analyzer-builder'; +import { FlowrInlineTextFile } from '../../../src/project/context/flowr-file'; +import type { NormalizedAst } from '../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; +import { printNormalizedAstToMermaid } from '../../../src/core/print/normalize-printer'; +import type { TreeSitterExecutor } from '../../../src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor'; +import type { FlowrAnalyzer } from '../../../src/project/flowr-analyzer'; +import type { Tree } from 'web-tree-sitter'; +import type { ParseStepOutput, ParseStepOutputSingleFile } from '../../../src/r-bridge/parser'; + + +interface FileState { + path: string; + content: string; +} + +interface IncrementalParsingScenario { + initialFiles: readonly FileState[]; + fileUpdates: readonly (readonly FileState[])[]; +} + +interface IncrementalParseCall { + filePath: string; + previousTree: Tree | undefined; +} + +function applyUpdateStepToFileStates( + fileStates: Map, + updateStep: readonly FileState[] +): void { + for(const update of updateStep) { + assert(fileStates.has(update.path), `All paths must be present in initialFiles, missing ${update.path}`); + fileStates.set(update.path, update.content); + } +} + +function changedPathsBetween( + beforeStep: ReadonlyMap, + afterStep: ReadonlyMap +): string[] { + return Array.from(beforeStep.keys()).filter(path => beforeStep.get(path) !== afterStep.get(path)); +} + +function unchangedPathsBetween( + beforeStep: ReadonlyMap, + afterStep: ReadonlyMap +): string[] { + return Array.from(beforeStep.keys()).filter(path => beforeStep.get(path) === afterStep.get(path)); +} + +async function createAnalyzerForFiles( + initialFiles: readonly FileState[] +): Promise<{ analyzer: FlowrAnalyzer; files: Map }> { + const analyzer = await new FlowrAnalyzerBuilder() + .setEngine('tree-sitter') + .build(); + const files = new Map(); + + for(const initialFile of initialFiles) { + const file = new FlowrInlineTextFile(initialFile.path, initialFile.content); + analyzer.addFile(file); + analyzer.addRequest({ request: 'file', content: initialFile.path }); + files.set(initialFile.path, file); + } + + return { analyzer, files }; +} + +function applyUpdateStepToAnalyzer( + files: ReadonlyMap, + updateStep: readonly FileState[] +): void { + for(const update of updateStep) { + const file = files.get(update.path); + assert(file !== undefined, `All paths must be present in initialFiles, missing ${update.path}`); + file.updateInlineContent(update.content); + } +} + +function createIncrementalParseTracer(analyzer: FlowrAnalyzer): { + trace(run: () => Promise): Promise<{ result: T; incrementalParseCalls: IncrementalParseCall[] }>; + restore(): void; +} { + const executor = analyzer['parser'] as TreeSitterExecutor; + const parser = executor['parser']; + const originalExecutorParse = executor.parse.bind(executor); + const originalParserParse = parser.parse.bind(parser); + let currentFilePath: string | undefined; + const incrementalParseCalls: IncrementalParseCall[] = []; + + // The outer executor still knows which file is being parsed, so we capture that path for the nested Tree-sitter call. + const executorSpy = vi.spyOn(executor, 'parse').mockImplementation((request, ctx) => { + currentFilePath = request.filePath; + try { + return originalExecutorParse(request, ctx); + } finally { + currentFilePath = undefined; + } + }); + + // The inner Tree-sitter parser sees the reused previous tree but not the file path, so we pair it with the path captured above. + const parserSpy = vi.spyOn(parser, 'parse').mockImplementation((sourceCode, previousTree) => { + assert(currentFilePath !== undefined, 'inner Tree-sitter parse should only be called while handling a file-backed parse request'); + incrementalParseCalls.push({ + filePath: currentFilePath, + previousTree: previousTree + }); + return originalParserParse(sourceCode, previousTree); + }); + + return { + async trace(run: () => Promise): Promise<{ result: T; incrementalParseCalls: IncrementalParseCall[] }> { + incrementalParseCalls.length = 0; + return { + result: await run(), + incrementalParseCalls: [...incrementalParseCalls] + }; + }, + restore(): void { + executorSpy.mockRestore(); + parserSpy.mockRestore(); + } + }; +} + +function capturePreviousTrees(analyzer: FlowrAnalyzer): Map { + const initialParse = analyzer.peekParse(); + assert(initialParse !== undefined); + + const previousTrees = new Map(); + for(const parsedFile of initialParse.files) { + assert(parsedFile.filePath !== undefined); + previousTrees.set(parsedFile.filePath, parsedFile.parsed as Tree); + } + return previousTrees; +} + +function assertChangedFilesUseIncrementalParse( + changedPaths: readonly string[], + previousTrees: ReadonlyMap, + incrementalParseCalls: readonly IncrementalParseCall[] +): void { + expect(incrementalParseCalls).toHaveLength(changedPaths.length); + + for(const changedPath of changedPaths) { + const previousTree = previousTrees.get(changedPath); + assert(previousTree !== undefined, `Missing previous tree for ${changedPath}`); + + const parseCall = incrementalParseCalls.find(call => call.filePath === changedPath); + assert(parseCall !== undefined, `Missing incremental parse call for ${changedPath}`); + expect(parseCall.previousTree).toBe(previousTree); + } +} + +function assertUnchangedFilesReusePreviousTrees( + unchangedPaths: readonly string[], + previousTrees: ReadonlyMap, + reparsedTrees: ParseStepOutputSingleFile[] +): void { + for(const unchangedPath of unchangedPaths) { + const previousTree = previousTrees.get(unchangedPath); + const reparsedTree = reparsedTrees.find(file => file.filePath === unchangedPath)?.parsed; + assert(previousTree !== undefined, `Missing previous tree for ${unchangedPath}`); + assert(reparsedTree !== undefined, `Missing reparsed tree for ${unchangedPath}`); + expect(reparsedTree, `no-op invalidation should reuse the previous tree for ${unchangedPath}`).toBe(previousTree); + } +} + +function assertPipelineStateAfterUpdateStep( + analyzer: FlowrAnalyzer, + updateStep: readonly FileState[] +): void { + if(updateStep.length === 0) { + assert(analyzer.peekParse() !== undefined, 'without any file updates, the previous pipeline should remain available'); + return; + } + + assert(analyzer.peekParse() === undefined, 'changing the content of parsed files should reset the previous pipeline'); +} + +async function executeFullParse(fileStates: readonly FileState[]): Promise { + const { analyzer } = await createAnalyzerForFiles(fileStates); + return await analyzer.normalize(); +} + +async function executeAndCompareScenario(scenario: IncrementalParsingScenario): Promise { + const { analyzer, files } = await createAnalyzerForFiles(scenario.initialFiles); + const incrementalParseTracer = createIncrementalParseTracer(analyzer); + await analyzer.normalize(); + const currentFileStates: Map = new Map(scenario.initialFiles.map(fileState => [fileState.path, fileState.content])); + + try { + for(const updateStep of scenario.fileUpdates) { + const previousTrees = capturePreviousTrees(analyzer); + const previousFileStates = new Map(currentFileStates); + + applyUpdateStepToAnalyzer(files, updateStep); + applyUpdateStepToFileStates(currentFileStates, updateStep); + assertPipelineStateAfterUpdateStep(analyzer, updateStep); + + const changedPaths = changedPathsBetween(previousFileStates, currentFileStates); + const unchangedPaths = unchangedPathsBetween(previousFileStates, currentFileStates); + const { result: incrementalResult, incrementalParseCalls } = await incrementalParseTracer.trace( + async() => await analyzer.normalize() + ); + const reparsed = analyzer.peekParse() as ParseStepOutput; + assert(reparsed !== undefined, 'after parsing once more, the pipeline must contain the analysis results again'); + + assertChangedFilesUseIncrementalParse(changedPaths, previousTrees, incrementalParseCalls); + assertUnchangedFilesReusePreviousTrees(unchangedPaths, previousTrees, reparsed.files); + + const fileStatesFromMap = Array.from(currentFileStates, ([path, content]) => ({ path, content })); + const fullReparseResult = await executeFullParse(fileStatesFromMap); + assert.equal( + printNormalizedAstToMermaid(fullReparseResult), + printNormalizedAstToMermaid(incrementalResult), + 'The incremental parse result does not match the full parse result' + ); + } + } finally { + incrementalParseTracer.restore(); + } +} + +const lines = (...xs: string[]): string => xs.join('\n'); + +const file = ( + path: string, + content: string +): FileState => ({ + path, + content +}); + +const step = (...files: FileState[]): readonly FileState[] => files; + +const scenario = ( + initialFiles: readonly FileState[], + ...fileUpdates: readonly (readonly FileState[])[] +): IncrementalParsingScenario => ({ + initialFiles, + fileUpdates +}); + + +describe('Incremental Parsing produces same results as Full Parsing', () => { + describe('one update set', () => { + describe('single-file', () => { + describe('no-op', () => { + it('empty file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', '')], + step(file('a.R', '')) + )); + }); + + it('file with top-level content', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 42')) + )); + }); + + it('file with nested content', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))], + step(file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))) + )); + }); + + it('syntactically invalid file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(')], + step(file('a.R', 'print(')) + )); + }); + }); + + describe('insert', () => { + it('one full line into an empty file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', '')], + step(file('a.R', 'x <- 42')) + )); + }); + + it('one full line at the start of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', lines('y <- 21', 'x <- 42'))) + )); + }); + + it('one full line in the middle of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'))) + )); + }); + + it('one full line at the end of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', lines('x <- 42', 'print(x)'))) + )); + }); + + it('multiple lines into an empty file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', '')], + step(file('a.R', lines('x <- 42', 'y <- 21', 'z <- 10'))) + )); + }); + + it('multiple lines at different positions', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))) + )); + }); + + it('a single character inside a number', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 420')) + )); + }); + + it('a single character inside an identifier', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'xy <- 42')) + )); + }); + + it('a token inside an expression', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1 + 2')], + step(file('a.R', 'x <- 1 + 2 + 3')) + )); + }); + + it('a token inside a nested argument list', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(sum(1, 3))')], + step(file('a.R', 'print(sum(1, 2, 3))')) + )); + }); + + it('a trailing newline at end of file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 42\n')) + )); + }); + }); + + describe('remove', () => { + it('one full line such that the file becomes empty', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', '')) + )); + }); + + it('one full line at the start of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('y <- 21', 'x <- 42'))], + step(file('a.R', 'x <- 42')) + )); + }); + + it('one full line in the middle of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'))], + step(file('a.R', lines('x <- 42', 'print(x)'))) + )); + }); + + it('one full line at the end of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', 'x <- 42')) + )); + }); + + it('multiple lines such that the file becomes empty', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'y <- 21', 'z <- 10'))], + step(file('a.R', '')) + )); + }); + + it('multiple lines at different positions', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))], + step(file('a.R', lines('x <- 42', 'print(x)'))) + )); + }); + + it('a single character from a number', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 420')], + step(file('a.R', 'x <- 42')) + )); + }); + + it('a single character from an identifier', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'xy <- 42')], + step(file('a.R', 'x <- 42')) + )); + }); + + it('a token from an expression', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1 + 2 + 3')], + step(file('a.R', 'x <- 1 + 2')) + )); + }); + + it('a token from a nested argument list', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(sum(1, 2, 3))')], + step(file('a.R', 'print(sum(1, 3))')) + )); + }); + + it('a trailing newline at end of file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42\n')], + step(file('a.R', 'x <- 42')) + )); + }); + }); + + describe('replace', () => { + it('one full line at the start of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('y <- 21', 'x <- 42'))], + step(file('a.R', lines('x <- 84', 'x <- 42'))) + )); + }); + + it('one full line in the middle of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'x <- 2 * x', 'print(x)'))], + step(file('a.R', lines('x <- 42', 'y <- 21', 'print(x)'))) + )); + }); + + it('one full line at the end of a file', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', lines('x <- 42', 'x <- x * x'))) + )); + }); + + it('a partially replaced multi-line region', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))], + step(file('a.R', lines('y <- 21', 'x <- 21', 'y <- y * y', 'print(x)', 'print(y)'))) + )); + }); + + it('a fully replaced content', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('y <- 21', 'x <- 42', 'y <- y * 2', 'print(x)', 'print(y)'))], + step(file('a.R', lines('z <- 10', 'z <- z + 32', 'print(z)'))) + )); + }); + + it('a single character in a number', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 43')) + )); + }); + + it('an operator token', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1 + 2')], + step(file('a.R', 'x <- 1 * 2')) + )); + }); + + it('an identifier token', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('x <- 42', 'print(x)'))], + step(file('a.R', lines('value <- 42', 'print(value)'))) + )); + }); + + it('part of a single line expression', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- (1 + 2) * 3')], + step(file('a.R', 'x <- (1 + 20) * 3')) + )); + }); + + it('whitespace only on a single line', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <- 42')) + )); + }); + + it('whitespace only across multiple lines', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))], + step(file('a.R', lines('f <- function(x) {', '\t', '\ty <- x + 1', '\tprint(y)', '}'))) + )); + }); + + it('comment text', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42 # old comment')], + step(file('a.R', 'x <- 42 # new comment')) + )); + }); + + it('a string literal', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'msg <- "abc"')], + step(file('a.R', 'msg <- "abcd"')) + )); + }); + + it('a UTF-8 string literal', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'msg <- "äöü"')], + step(file('a.R', 'msg <- "äöü€"')) + )); + }); + + it('a UTF-8 comment', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42 # gruß')], + step(file('a.R', 'x <- 42 # grüße €')) + )); + }); + }); + + describe('syntax transitions', () => { + it('valid to invalid by removing the right-hand side of an assignment', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 42')], + step(file('a.R', 'x <-')) + )); + }); + + it('valid to invalid by removing a closing brace', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('f <- function(x) {', '\tprint(x)', '}'))], + step(file('a.R', lines('f <- function(x) {', '\tprint(x)'))) + )); + }); + + it('valid to invalid by removing a closing parenthesis', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(sum(1, 2))')], + step(file('a.R', 'print(sum(1, 2)')) + )); + }); + + it('invalid to valid by completing an assignment', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <-')], + step(file('a.R', 'x <- 42')) + )); + }); + + it('invalid to valid by restoring a closing brace', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('f <- function(x) {', '\tprint(x)'))], + step(file('a.R', lines('f <- function(x) {', '\tprint(x)', '}'))) + )); + }); + + it('invalid to valid by restoring a closing parenthesis', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(sum(1, 2)')], + step(file('a.R', 'print(sum(1, 2))')) + )); + }); + + it('invalid to invalid across different incomplete forms', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(')], + step(file('a.R', 'function(,')) + )); + }); + }); + + describe('nested structures', () => { + it('inside a function body', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}'))], + step(file('a.R', lines('f <- function(x) {', '\ty <- x * 2', '\tprint(y)', '}'))) + )); + }); + + it('inside an if branch', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('if (x > 0) {', '\ty <- 1', '}'))], + step(file('a.R', lines('if (x > 0) {', '\ty <- 1', '\tz <- 2', '}'))) + )); + }); + + it('inside a for loop body', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', lines('for (i in 1:3) {', '\tprint(i)', '}'))], + step(file('a.R', lines('for (i in 1:3) {', '\ttotal <- i + 1', '\tprint(total)', '}'))) + )); + }); + + it('inside a nested argument list', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'print(sum(1, 2, 3))')], + step(file('a.R', 'print(sum(1, 20, 3))')) + )); + }); + + it('inside nested brackets and subexpressions', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- list(a = list(b = 1))')], + step(file('a.R', 'x <- list(a = list(b = 2))')) + )); + }); + }); + + }); + + describe('multi-file', () => { + it('editing only the first file while the second file stays unchanged', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)')) + ], + step( + file('a.R', lines('x <- 42', 'x <- x + 1', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)')) + ) + )); + }); + + it('editing only the second file while the first file stays unchanged', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)')) + ], + step( + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('y <- 21', 'y <- y * 2', 'print(y)')) + ) + )); + }); + + it('editing both files independently in the same run', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 1', 'print(x)')), + file('b.R', lines('y <- 10', 'print(y)')) + ], + step( + file('a.R', lines('x <- 2', 'x <- x * 3', 'print(x)')), + file('b.R', lines('z <- 10', 'print(z + 1)')) + ) + )); + }); + + it('adding a new file while another file stays unchanged', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', '') + ], + step( + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('helper <- function(x) {', '\tx * 2', '}', 'print(helper(21))')) + ) + )); + }); + + it('removing one file while another file stays unchanged', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', lines('tmp <- 1', 'print(tmp)')) + ], + step( + file('a.R', lines('x <- 42', 'print(x)')), + file('b.R', '') + ) + )); + }); + + it('mixing file modification, file addition, and file removal in one run', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 1', 'print(x)')), + file('b.R', ''), + file('c.R', lines('obsolete <- TRUE', 'print(obsolete)')) + ], + step( + file('a.R', lines('x <- 1', 'x <- x + 1', 'print(x)')), + file('b.R', lines('y <- 21', 'print(y)')), + file('c.R', '') + ) + )); + }); + + it('making one file invalid while another file remains unchanged and valid', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('f <- function(x) {', '\tprint(x)', '}')), + file('b.R', lines('y <- 21', 'print(y)')) + ], + step( + file('a.R', lines('f <- function(x) {', '\tprint(x)')), + file('b.R', lines('y <- 21', 'print(y)')) + ) + )); + }); + + it('editing UTF-8 content in one file while another file stays unchanged', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('msg <- "äöü"', 'print(msg)')), + file('b.R', lines('x <- 42', 'print(x)')) + ], + step( + file('a.R', lines('msg <- "äöü€"', 'print(msg)')), + file('b.R', lines('x <- 42', 'print(x)')) + ) + )); + }); + + it('editing inside a nested construct in one file and at top level in another', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('f <- function(x) {', '\ty <- x + 1', '\tprint(y)', '}')), + file('b.R', lines('z <- 3', 'print(z)')) + ], + step( + file('a.R', lines('f <- function(x) {', '\ty <- x * 2', '\tprint(y)', '}')), + file('b.R', lines('z <- 3', 'z <- z + 1', 'print(z)')) + ) + )); + }); + + it('editing only one of two syntactically invalid files', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', 'print('), + file('b.R', 'x <-') + ], + step( + file('a.R', 'print(1)'), + file('b.R', 'x <-') + ) + )); + }); + + }); + }); + + describe('multiple update sets', () => { + describe('single-file', () => { + it('keeps the cached pipeline across an empty update step before a later real edit', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1')], + step(), + step(file('a.R', 'x <- 2')) + )); + }); + + it('reuses the previous tree for a no-op invalidation after a prior real edit', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1')], + step(file('a.R', 'x <- 10')), + step(file('a.R', 'x <- 10')) + )); + }); + + it('handles multiple updates in one step whose final content matches the original content', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1')], + step( + file('a.R', 'x <- 10'), + file('a.R', 'x <- 1') + ) + )); + }); + + it('recovers across valid, invalid, cached, and valid states on the same analyzer instance', async() => { + await executeAndCompareScenario(scenario( + [file('a.R', 'x <- 1')], + step(file('a.R', 'x <-')), + step(), + step(file('a.R', 'x <- 1')) + )); + }); + }); + + describe('multi-file', () => { + it('keeps the cached pipeline on an empty step before changing only one file', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', 'x <- 1'), + file('b.R', 'y <- 2') + ], + step(), + step( + file('a.R', 'x <- 10'), + file('b.R', 'y <- 2') + ) + )); + }); + + it('handles a no-op invalidation for one file while another file changes in the next step', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', 'x <- 1'), + file('b.R', 'y <- 2') + ], + step( + file('a.R', 'x <- 10'), + file('b.R', 'y <- 2') + ), + step( + file('a.R', 'x <- 10'), + file('b.R', 'y <- 20') + ) + )); + }); + + it('handles repeated updates to one file in a step while another file ends up truly changed', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', 'x <- 1'), + file('b.R', 'y <- 2') + ], + step( + file('a.R', 'x <- 10'), + file('a.R', 'x <- 1'), + file('b.R', 'y <- 20') + ) + )); + }); + + it('switches which file changes across successive steps while the other is reused', async() => { + await executeAndCompareScenario(scenario( + [ + file('a.R', lines('x <- 1', 'print(x)')), + file('b.R', lines('y <- 2', 'print(y)')) + ], + step( + file('a.R', lines('x <- 10', 'print(x)')), + file('b.R', lines('y <- 2', 'print(y)')) + ), + step( + file('a.R', lines('x <- 10', 'print(x)')), + file('b.R', lines('y <- 20', 'print(y)')) + ) + )); + }); + }); + }); +});