Skip to content

Commit e2b95d0

Browse files
authored
Merge pull request #4 from magicdrive/feature/improved_arklite
Fix improved arklite format.
2 parents faaa873 + 673aaad commit e2b95d0

8 files changed

Lines changed: 143 additions & 62 deletions

File tree

.gitignore

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
ark-output.txt
33
ark-output.md
44
ark-output.xml
5-
ark-output.arklite
6-
*.arklite
7-
!/internal/**/*.arklite
5+
ark-output.arklite.txt
6+
*.arklite.txt
7+
!/internal/**/*.arklite.txt
88
/build
99
/tmp
1010

internal/commandline/option.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,15 +277,16 @@ func (cr *Option) Normalize() error {
277277
case model.XML:
278278
cr.OutputFilename = "ark-output.xml"
279279
case model.Arklite:
280-
cr.OutputFilename = "ark-output.arklite"
280+
cr.OutputFilename = "ark-output.arklite.txt"
281281
default:
282282
cr.OutputFilename = "ark-output.txt"
283283
}
284284
}
285285

286286
// compless
287287
if cr.OutputFormat.CanCompless() && cr.ComplessFlag {
288-
cr.OutputFilename = fmt.Sprintf("%s%s", cr.OutputFilename, ".arklite")
288+
cr.DeleteCommentsFlag = true
289+
cr.OutputFilename = fmt.Sprintf("%s%s", cr.OutputFilename, ".arklite.txt")
289290
}
290291

291292
// gitignorerule

internal/commandline/option_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ func TestOptParse_ValidInputs(t *testing.T) {
3636
t.Fatalf("Expected no error, got: %v", err)
3737
}
3838

39-
if opt.OutputFilename != "out.txt.arklite" {
40-
t.Errorf("Expected OutputFilename = out.txt.arklite, got %s", opt.OutputFilename)
39+
if opt.OutputFilename != "out.txt.arklite.txt" {
40+
t.Errorf("Expected OutputFilename = out.txt.arklite.txt, got %s", opt.OutputFilename)
4141
}
4242
if opt.ScanBuffer.String() != "20K" {
4343
t.Errorf("Expected ScanBuffer = 20K, got %s", opt.ScanBuffer.String())

internal/textbank/description_template/arklite_compless_header.arklite

Lines changed: 0 additions & 30 deletions
This file was deleted.
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# Arklite Format Overview
2+
3+
**Project:** %s
4+
**Path:** %s
5+
6+
This file is a **full project export** written in the Arklite format — a compact, LLM-optimized representation of a codebase.
7+
It is designed to make large-scale projects easy for LLMs and automation tools to consume and analyze.
8+
9+
---
10+
11+
## Quick Format Summary
12+
13+
- The Arklite dump consists of **three sections**:
14+
1. **Description** — This explanation and project info.
15+
2. **Directory Structure (JSON)** — Hierarchical JSON tree of the project layout.
16+
3. **File Dump (Arklite format)** — Compact, one-line-per-file contents.
17+
18+
- **File Dump Rules:**
19+
- Each file starts with a line like `@relative/path/to/file.ext`
20+
- The **next line** contains the file’s entire contents, as a single line.
21+
- All newlines (`\n`) are replaced with `␤` (Unicode U+2424).
22+
- **All comments** (e.g. `//`, `#`, `/* */`, `<!-- -->`, etc.) are removed.
23+
- File order and relative paths reflect the directory structure.
24+
25+
---
26+
27+
## How to Reconstruct Files (Step-by-Step)
28+
29+
1. **Find file sections**: Look for lines starting with `@` (at-sign).
30+
Each such line indicates the start of a new file and gives its relative path.
31+
2. **Get file content**: The line *immediately after* each `@` line is the entire content of that file, with all line breaks replaced by `␤`.
32+
3. **Restore line breaks**: In each file’s content, replace all `␤` with real newlines (`\n`) to reconstruct the original file.
33+
4. **Save files**: Use the given relative paths to rebuild the directory and file structure.
34+
35+
**Sample:**
36+
37+
```
38+
@main.go
39+
package main␤func main() {␤ println("hello, world")␤}
40+
@README.md
41+
Arklite project dump␤See main.go for code.␤
42+
```
43+
44+
---
45+
46+
## IMPORTANT — For LLMs and Automation Tools
47+
48+
- **Always** reconstruct the full project files using the above steps *before* attempting any code analysis, summarization, or refactoring.
49+
- Do **not** process or interpret the file dump lines directly.
50+
Always decode (expand) the contents first.
51+
- After decoding, you will obtain the original project in `%s` formats.
52+
53+
---
54+
55+
## Use Cases
56+
57+
- Efficient LLM-based code analysis, summarization, and refactoring.
58+
- Fast, lossless sharing of large codebases for machine processing.
59+
- Compact project archival.
60+
61+
---
62+
63+
## Technical Notes
64+
65+
- The Arklite format removes comments to reduce token count and focus on core code logic.
66+
- The directory tree in JSON format at the top of this file reflects the entire layout for context.
67+
- If you are an LLM or tool, **always decode before analysis** — never analyze the dump lines as-is.
68+
69+
---
70+
71+
*This format is designed for high-efficiency AI workflows and large-scale automation.*
72+

internal/textbank/description_template/description.arklite

Lines changed: 0 additions & 23 deletions
This file was deleted.
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Arklite Format Overview
2+
3+
**Project:** %s
4+
**Path:** %s
5+
6+
This file is written in **Arklite format** — a compact, LLM-optimized snapshot of an entire project directory.
7+
8+
---
9+
10+
## Sections
11+
12+
1. **Description**
13+
Explanation of the project and the Arklite format.
14+
15+
2. **Directory Structure (JSON)**
16+
Hierarchical representation of the directory layout as a JSON tree.
17+
18+
3. **File Dump (Arklite format)**
19+
The full content of each file, stored compactly — one file per line.
20+
21+
---
22+
23+
## Arklite File Dump Format
24+
25+
- Each file starts with a line like: `@<relative/path/to/file.ext>`
26+
- The **next line** is the entire file content, all on a single line.
27+
- **Newlines** inside files are replaced with `␤` (U+2424).
28+
- **All comments** (`//`, `#`, `/* */`, `<!-- -->`, etc.) are stripped out.
29+
- File order and relative paths match the original directory tree.
30+
31+
---
32+
33+
### How to Restore Original Files
34+
35+
1. For each line that starts with `@`, treat it as a new file path.
36+
2. The line *immediately after* is the compressed file contents (with `␤` as newlines).
37+
3. Reconstruct each file by:
38+
- Creating the path as given after `@`
39+
- Replacing all `␤` with newline (`\n`) characters
40+
41+
**Example:**
42+
43+
```
44+
@main.go
45+
package main␤func main() {␤ println("hello, world")␤}
46+
@README.md
47+
Arklite project dump␤See main.go for code.␤
48+
```
49+
50+
---
51+
52+
## Note for LLMs & Automation Tools
53+
54+
- **Always reconstruct** the files using the rules above *before* analysis or code search.
55+
- Do **not** analyze, search, or refactor on the compressed file dump lines directly — always decode (expand) first.
56+
- After decoding, you will have all original files as plain text, ready for any further processing.
57+
58+
---
59+
60+
This format enables *large projects* to be shared, analyzed, or archived in a highly efficient, LLM-friendly way.
61+

internal/textbank/mod.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ var DescriptionTemplateText string
1111
//go:embed description_template/description.xml
1212
var DescriptionTemplateXML string
1313

14-
//go:embed description_template/description.arklite
14+
//go:embed description_template/description.arklite.txt
1515
var DescriptionTemplateArklite string
1616

17-
//go:embed description_template/arklite_compless_header.arklite
17+
//go:embed description_template/arklite_compless_header.arklite.txt
1818
var ArkliteComplessHeaderTemplate string
1919

2020
const (

0 commit comments

Comments
 (0)