Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 23 additions & 2 deletions src/node/db/Pad.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,17 @@ exports.cleanText = (txt:string): string => txt.replace(/\r\n/g, '\n')
.replace(/\t/g, ' ');

class Pad {
/**
* Stable author id used to attribute inserts coming from internal callers
* (HTTP API setText/appendText with no authorId, plugins like ep_post_data,
* server-side import flows). Without ANY author attribute, pad.atext.text
* and pad.atext.attribs drift out of sync — clients then fail
* setDocAText reconciliation in ace2_inner.ts when loading the pad. Using
* a fixed system author keeps the AText well-formed without requiring
* every plugin to allocate its own author up-front.
*/
static readonly SYSTEM_AUTHOR_ID = 'a.etherpad-system';
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can that be abused? If I name myself a.etherpad-system as author. Can this have consequences?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

patch incoming


private db: Database;
private atext: AText;
private pool: AttributePool;
Expand Down Expand Up @@ -415,9 +426,19 @@ class Pad {
(!ins && start > 0 && orig[start - 1] === '\n');
if (!willEndWithNewline) ins += '\n';
if (ndel === 0 && ins.length === 0) return;
const attribs = authorId ? [['author', authorId] as [string, string]] : undefined;
// An unattributed insert (empty authorId + non-empty ins) would produce
// an AText where `text` and `attribs` disagree on length — clients fail
// setDocAText reconciliation on load. Backward-compat fix: if the caller
// didn't provide an authorId, attribute the insert to a stable system
// author. ep_post_data and other plugins that want named attribution
// should still pass an explicit authorId.
const effectiveAuthorId =
(ins.length > 0 && !authorId) ? Pad.SYSTEM_AUTHOR_ID : authorId;
const attribs = effectiveAuthorId
? [['author', effectiveAuthorId] as [string, string]]
: undefined;
const changeset = makeSplice(orig, start, ndel, ins, attribs, this.pool);
await this.appendRevision(changeset, authorId);
await this.appendRevision(changeset, effectiveAuthorId);
}

/**
Expand Down
65 changes: 56 additions & 9 deletions src/node/handler/PadMessageHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import {MapArrayType} from "../types/MapType";
import AttributeMap from '../../static/js/AttributeMap';
const padManager = require('../db/PadManager');
const padDeletionManager = require('../db/PadDeletionManager');
import {checkRep, cloneAText, compose, deserializeOps, follow, identity, inverse, makeAText, makeSplice, moveOpsToNewPool, mutateAttributionLines, mutateTextLines, oldLen, prepareForWire, splitAttributionLines, splitTextLines, unpack} from '../../static/js/Changeset';
import {applyToText, checkRep, cloneAText, compose, deserializeOps, follow, identity, inverse, makeAText, moveOpsToNewPool, mutateAttributionLines, mutateTextLines, oldLen, prepareForWire, splitAttributionLines, splitTextLines, unpack} from '../../static/js/Changeset';
import ChatMessage from '../../static/js/ChatMessage';
import AttributePool from '../../static/js/AttributePool';
const AttributeManager = require('../../static/js/AttributeManager');
Expand Down Expand Up @@ -875,12 +875,49 @@ const handleUserChanges = async (socket:any, message: {
`${opAuthorId} in changeset ${changeset}`);
}
}
// Reject '+' ops that do not carry the author attribute. The standard
// JS client always tags inserts with the author; rejecting unattributed
// inserts here keeps pad.atext.text and pad.atext.attribs in lock-step.
// Without this check, an insert op with empty attribs grows the text
// without contributing matching markers to the attribs string, leaving
// the stored AText in a state where the two iterables disagree on
// length — applyToAText then desyncs and breaks reconciliation in
// every client that later loads the pad.
if (op.opcode === '+' && !opAuthorId) {
throw new Error(`Author ${thisSession.author} submitted an insert without an ` +
`author attribute in changeset ${changeset}`);
}
// Defense-in-depth: reject any wire-borne `*N` that resolves to the
// system author. The session-author equality check above already
// catches the case where `*N` claims a different real user as
// author, but `Pad.SYSTEM_AUTHOR_ID` is server-internal — it's
// only used when `spliceText` / `setText` are called with an empty
// authorId from HTTP API or plugin paths. No legitimate
// socket.io session ever writes as the system author, so a wire
// op that names it is either a confused client or an attempt to
// launder writes through a reserved attribution slot. Either way,
// refuse.
// Hardcoded mirror of `Pad.SYSTEM_AUTHOR_ID` from src/node/db/Pad.ts.
// A `const {Pad} = require('../db/Pad')` at module scope returned
// a partially-initialized class here (circular load via padManager),
// so the static-field access ended up undefined and short-circuited
// the check at runtime. Inline literal is the simplest fix.
if (opAuthorId === 'a.etherpad-system') {
throw new Error(`Author ${thisSession.author} attempted to submit changes as the ` +
`reserved system author ${opAuthorId} in changeset ${changeset}`);
}
Comment on lines +905 to +908
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

2. System-author undo rejected 🐞 Bug ≡ Correctness

handleUserChanges() rejects any op whose resolved authorId is 'a.etherpad-system', including '=' ops
that restore existing authorship. Pads can now legitimately contain system-attributed text via
server-side spliceText()/setText() without an authorId, so undoing “clear authorship colors” can
disconnect clients with badChangeset when the undo attempts to restore the system author.
Agent Prompt
### Issue description
`handleUserChanges()` rejects any op whose `author` attribute resolves to the reserved system author (`a.etherpad-system`). This breaks legitimate client flows (notably undo of “clear authorship colors”) on pads that contain system-authored text, because undo restores author attributes via `'='` ops.

### Issue Context
* Server-side internal callers now attribute inserts to `Pad.SYSTEM_AUTHOR_ID` when no `authorId` is provided, so pads can contain system-authored segments.
* The editor’s “clearauthorship” command sets author to `''` across the doc; undo restores prior author attributes (including non-self authors) via `'='` ops.
* The new unconditional rejection of `a.etherpad-system` blocks that restoration.

### Fix Focus Areas
- src/node/handler/PadMessageHandler.ts[878-908]
- src/node/db/Pad.ts[416-442]
- src/static/js/ace2_inner.ts[610-623]
- src/tests/backend/specs/undo_clear_authorship.ts[168-195]

### Proposed fix approach
1. Narrow the reserved-system-author rejection so it only blocks actual attempts to *write as* the system author (for example: reject if `thisSession.author === 'a.etherpad-system'`, and/or reject only `'+'` ops whose `opAuthorId` is the system author).
2. Do **not** reject `'='` ops solely because they restore the system author; those are needed to undo/redo attribution changes on existing system-authored content.
3. Add/adjust a regression test that creates system-authored content (via `pad.setText(..., '')` or `spliceText(..., '')`), then performs clear-authorship + undo via USER_CHANGES and asserts it is accepted.

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

}

// ex. adoptChangesetAttribs

// Afaik, it copies the new attributes from the changeset, to the global Attribute Pool
let rebasedChangeset = moveOpsToNewPool(changeset, wireApool, pad.pool);
// Snapshot the post-pool-mapping form so the retransmission check below
// can recognise our changeset against the stored revision form. Comparing
// the raw client `changeset` against `c` would miss legitimate
// retransmissions whenever moveOpsToNewPool renumbered an attribute
// (e.g. `*0` -> `*1` because the pad pool already had something at slot 0).
const canonicalCs = rebasedChangeset;

// ex. applyUserChanges
let r = baseRev;
Expand All @@ -891,9 +928,9 @@ const handleUserChanges = async (socket:any, message: {
while (r < pad.getHeadRevisionNumber()) {
r++;
const {changeset: c, meta: {author: authorId}} = await pad.getRevision(r);
if (changeset === c && thisSession.author === authorId) {
if (canonicalCs === c && thisSession.author === authorId) {
// Assume this is a retransmission of an already applied changeset.
rebasedChangeset = identity(unpack(changeset).oldLen);
rebasedChangeset = identity(unpack(canonicalCs).oldLen);
}
// At this point, both "c" (from the pad) and "changeset" (from the
// client) are relative to revision r - 1. The follow function
Expand All @@ -909,6 +946,22 @@ const handleUserChanges = async (socket:any, message: {
`Can't apply changeset ${rebasedChangeset} with oldLen ` +
`${oldLen(rebasedChangeset)} to document of length ${prevText.length}`);
}
// Defensive: reject any rebased changeset whose application would leave
// the pad text not ending with '\n'. Previously the server silently
// appended a separate `nlChangeset` correction revision; that worked
// for the stored pad but the FIRST broadcast (the malformed user
// revision) reached browsers BEFORE the correction did, and the
// browser's line assembler asserts "line assembler not finished" on
// a doc that doesn't end with '\n', taking the session out. Refuse to
// accept such changesets — clients must always preserve the
// trailing-newline invariant.
const projectedText = applyToText(rebasedChangeset, prevText);
if (!projectedText.endsWith('\n')) {
throw new Error(
`Rejected USER_CHANGES whose application would leave the pad ` +
`without a trailing '\\n' (length ${projectedText.length}). ` +
`Every USER_CHANGES must preserve the "doc ends with \\n" invariant.`);
}
Comment on lines +949 to +964
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. Trailing-newline auto-fix removed 📘 Rule violation ☼ Reliability

handleUserChanges now rejects any USER_CHANGES changeset that would leave the pad text not
ending with \n, instead of tolerating and correcting it. This removes a previously supported
behavior without a WARN-level deprecation period, which can break older/third-party clients that
rely on server-side correction.
Agent Prompt
## Issue description
The server now hard-rejects USER_CHANGES that would violate the trailing newline invariant, which is a breaking behavior change and effectively removes the prior tolerance/correction behavior without a deprecation path.

## Issue Context
Per compliance, feature/behavior removals must be deprecated first with a WARN log, then removed/enforced in a later version. Current code throws an error immediately for these changesets.

## Fix Focus Areas
- src/node/handler/PadMessageHandler.ts[949-964]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools


const newRev = await pad.appendRevision(rebasedChangeset, thisSession.author);
// The head revision will either stay the same or increase by 1 depending on whether the
Expand All @@ -920,12 +973,6 @@ const handleUserChanges = async (socket:any, message: {
await pad.appendRevision(correctionChangeset, thisSession.author);
}

// Make sure the pad always ends with an empty line.
if (pad.text().lastIndexOf('\n') !== pad.text().length - 1) {
const nlChangeset = makeSplice(pad.text(), pad.text().length - 1, 0, '\n');
await pad.appendRevision(nlChangeset, thisSession.author);
}

// The client assumes that ACCEPT_COMMIT and NEW_CHANGES messages arrive in order. Make sure we
// have already sent any previous ACCEPT_COMMIT and NEW_CHANGES messages.
assert.equal(thisSession.rev, r);
Expand Down
2 changes: 1 addition & 1 deletion src/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@
"chokidar": "^5.0.0",
"eslint": "^10.4.0",
"eslint-config-etherpad": "^4.0.5",
"etherpad-cli-client": "^4.0.2",
"etherpad-cli-client": "^4.0.3",
"mocha": "^11.7.5",
"mocha-froth": "^0.2.10",
"nodeify": "^1.0.1",
Expand Down
25 changes: 23 additions & 2 deletions src/tests/backend/specs/Pad.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,36 @@ describe(__filename, function () {
pad = await padManager.getPad(padId, '');
// spliceText is an existing runtime Pad method; cast avoids
// adding a type-only declaration to PadType in this PR.
await (pad as any).spliceText(0, 0, '100\u00a0km');
await (pad as any).spliceText(0, 0, '100\u00a0km', 'a.test');
assert.equal(pad!.text(), '100\u00a0km\n');
});

it('setText round-trips U+00A0', async function () {
pad = await padManager.getPad(padId, '');
await pad!.setText('a\u00a0b\n');
await pad!.setText('a\u00a0b\n', 'a.test');
assert.equal(pad!.text(), 'a\u00a0b\n');
});

it('spliceText with empty authorId attributes to the system author', async function () {
pad = await padManager.getPad(padId, '');
// An unattributed insert (empty authorId, non-empty ins) used to
// produce an AText where text and attribs disagreed on length \u2014
// clients then failed setDocAText reconciliation on load. The
// server now substitutes a stable system author so the AText stays
// well-formed without forcing every caller to allocate one up-front.
await (pad as any).spliceText(0, 0, 'plugin-text', '');
assert.equal(pad!.text(), 'plugin-text\n');
const pool: any = (pad as any).pool;
let sawSystemAuthor = false;
for (const k of Object.keys(pool.numToAttrib || {})) {
const a = pool.numToAttrib[k];
if (a[0] === 'author' && a[1] === 'a.etherpad-system') {
sawSystemAuthor = true;
break;
}
}
assert(sawSystemAuthor, 'expected system-author binding in pad pool');
});
});

describe('padDefaultContent hook', function () {
Expand Down
Loading
Loading