Skip to content

Commit 7fa3f60

Browse files
authored
add chunkText helper (#35)
1 parent 2c9fcda commit 7fa3f60

6 files changed

Lines changed: 95 additions & 1 deletion

File tree

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# deverything
22

3+
## 4.10.0
4+
5+
### Minor Changes
6+
7+
- chunkText
8+
39
## 4.9.0
410

511
### Minor Changes

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ Contributions always welcome!
9696
- `arrayIntersection()` get the intersection of two arrays
9797
- `capitalize()` word => Word
9898
- `chunkArray()` split array into chunks
99+
- `chunkText()` split text into chunks of a given character size, optionally preserving sentence boundaries via `Intl.Segmenter`
99100
- `chunkedAll()` process all items in chunks
100101
- `chunkedAsync()` process async operations in chunks
101102
- `chunkedDynamic()` process with dynamic chunk sizes

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "deverything",
3-
"version": "4.9.0",
3+
"version": "4.10.0",
44
"description": "Everything you need for Dev",
55
"main": "./dist/index.js",
66
"module": "./dist/index.mjs",

src/helpers/chunkText.test.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import { describe, expect, test } from "vitest";
2+
import { chunkText } from "./chunkText";
3+
4+
describe("chunkText", () => {
5+
test("splits text into chunks of the given size", () => {
6+
expect(chunkText("abcdefgh", 3)).toEqual(["abc", "def", "gh"]);
7+
expect(chunkText("hello", 5)).toEqual(["hello"]);
8+
expect(chunkText("hello", 10)).toEqual(["hello"]);
9+
expect(chunkText("", 3)).toEqual([]);
10+
});
11+
12+
const text = "Hello world. How are you? I am fine.";
13+
14+
test("preserves sentence shorter than chunk size", () => {
15+
const chunks = chunkText(
16+
text,
17+
5, // shorter than sentence
18+
{ preserveOnBreak: "sentence" }
19+
);
20+
expect(chunks).toEqual([
21+
"Hello",
22+
" worl",
23+
"d. ",
24+
"How a",
25+
"re yo",
26+
"u? ",
27+
"I am ",
28+
"fine.",
29+
]);
30+
expect(chunks.some((chunk) => chunk.length > 5)).toBe(false);
31+
});
32+
33+
test("preserves sentence shorter than chunk size", () => {
34+
const chunks = chunkText(text, 25, { preserveOnBreak: "sentence" });
35+
expect(chunks).toEqual(["Hello world. ", "How are you? I am fine."]);
36+
expect(chunks.some((chunk) => chunk.length > 25)).toBe(false);
37+
});
38+
39+
test("preserves words shorter than chunk size", () => {
40+
const chunks = chunkText(text, 25, { preserveOnBreak: "word" });
41+
expect(chunks).toEqual(["Hello world. How are you?", " I am fine."]);
42+
expect(chunks.some((chunk) => chunk.length > 25)).toBe(false);
43+
});
44+
});

src/helpers/chunkText.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import { chunkArray } from "./chunkArray";
2+
3+
type ChunkTextOptions = {
4+
preserveOnBreak?: "sentence" | "word";
5+
};
6+
7+
export const chunkText = (
8+
text: string,
9+
chunkMaxSize: number,
10+
options?: ChunkTextOptions
11+
): string[] => {
12+
const segmenter = new Intl.Segmenter(undefined, {
13+
granularity: options?.preserveOnBreak,
14+
});
15+
const segments = [...segmenter.segment(text)].map((s) => s.segment);
16+
17+
const chunks: string[] = [];
18+
let current = "";
19+
20+
for (const segment of segments) {
21+
if (segment.length > chunkMaxSize) {
22+
if (current) {
23+
chunks.push(current);
24+
current = "";
25+
}
26+
chunks.push(
27+
...chunkArray([...segment], chunkMaxSize).map((chars) => chars.join(""))
28+
);
29+
} else if (current.length + segment.length > chunkMaxSize) {
30+
chunks.push(current);
31+
current = segment;
32+
} else {
33+
current += segment;
34+
}
35+
}
36+
37+
if (current) {
38+
chunks.push(current);
39+
}
40+
41+
return chunks;
42+
};

src/helpers/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ export * from "./arrayDiff";
33
export * from "./arrayIntersection";
44
export * from "./capitalize";
55
export * from "./chunkArray";
6+
export * from "./chunkText";
67
export * from "./chunkedAll";
78
export * from "./chunkedAsync";
89
export * from "./chunkedDynamic";

0 commit comments

Comments
 (0)