Skip to content

Commit ca1dfa1

Browse files
committed
feat(knowledge): enhance document update functionality with docType handling
- Introduced a new `docType` field in the `UpdateKnowledgeDocumentInput` to manage document type changes. - Updated the `doUpdate` method to clear subtype fields (GuideType, AnswerType, CodeLang) when the document type changes. - Added comprehensive tests for various document type transitions to ensure correct behavior and state preservation. - Updated GraphQL schema and generated models to accommodate the new `docType` field.
1 parent 4675cf8 commit ca1dfa1

5 files changed

Lines changed: 396 additions & 3 deletions

File tree

backend/pkg/database/knowledge/knowledge.go

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,7 @@ func (ks *knowledgeStore) doUpdate(ctx context.Context, userID int64, id string,
533533
if input.Description != nil {
534534
meta.Description = *input.Description
535535
}
536+
536537
if input.GuideType != nil {
537538
meta.GuideType = string(*input.GuideType)
538539
}
@@ -542,8 +543,35 @@ func (ks *knowledgeStore) doUpdate(ctx context.Context, userID int64, id string,
542543
if input.CodeLang != nil {
543544
meta.CodeLang = *input.CodeLang
544545
}
545-
meta.PartSize = len(content)
546-
meta.TotalSize = len(content)
546+
547+
// Map of sub-type fields to their pointers to be cleared when DocType changes.
548+
subTypeFields := map[string]*string{
549+
"guide": &meta.GuideType,
550+
"answer": &meta.AnswerType,
551+
"code": &meta.CodeLang,
552+
}
553+
554+
if input.DocType != nil {
555+
newDocType := string(*input.DocType)
556+
for subType, field := range subTypeFields {
557+
if newDocType != subType {
558+
*field = ""
559+
}
560+
}
561+
meta.DocType = newDocType
562+
}
563+
564+
deltaContentLen := len(content) - len(existing.Content)
565+
if existing.PartSize <= 0 {
566+
meta.PartSize = len(content)
567+
} else {
568+
meta.PartSize = existing.PartSize + deltaContentLen
569+
}
570+
if existing.TotalSize <= 0 {
571+
meta.TotalSize = len(content)
572+
} else {
573+
meta.TotalSize = existing.TotalSize + deltaContentLen
574+
}
547575

548576
// Compute new embedding.
549577
vecs, err := ks.embedder.EmbedDocuments(ctx, []string{content})

backend/pkg/database/knowledge/knowledge_test.go

Lines changed: 340 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,6 +1346,346 @@ func TestUpdateDocument(t *testing.T) {
13461346
})
13471347
}
13481348

1349+
func TestUpdateDocumentDocTypeChange(t *testing.T) {
1350+
ctx := context.Background()
1351+
const userID = int64(20)
1352+
1353+
// buildDB returns a mockDB whose updateKnowledge echoes back the stored
1354+
// cmetadata so that the returned document reflects what was actually written.
1355+
buildDB := func(initialMeta string) (*mockDB, *string) {
1356+
stored := new(string)
1357+
return &mockDB{
1358+
getKnowledge: func(_ context.Context, uuid string) (database.GetKnowledgeDocumentRow, error) {
1359+
return makeRow(uuid, "old", initialMeta), nil
1360+
},
1361+
updateKnowledge: func(_ context.Context, arg database.UpdateKnowledgeDocumentParams) (database.UpdateKnowledgeDocumentRow, error) {
1362+
*stored = string(arg.Column4.RawMessage)
1363+
return database.UpdateKnowledgeDocumentRow{
1364+
ID: arg.Column1.String,
1365+
Document: arg.Column3.String,
1366+
Cmetadata: sql.NullString{
1367+
String: string(arg.Column4.RawMessage),
1368+
Valid: true,
1369+
},
1370+
}, nil
1371+
},
1372+
}, stored
1373+
}
1374+
newKS := func(db *mockDB) *knowledgeStore {
1375+
return &knowledgeStore{
1376+
db: db,
1377+
embedder: &mockEmbedder{available: true},
1378+
newKnp: newPublisherFactory(&mockPublisher{}),
1379+
}
1380+
}
1381+
1382+
t.Run("guide→answer: clears GuideType, sets AnswerType from input", func(t *testing.T) {
1383+
db, stored := buildDB(`{"doc_type":"guide","guide_type":"pentest","question":"q"}`)
1384+
ks := newKS(db)
1385+
1386+
doc, err := ks.UpdateDocument(ctx, userID, "id1", model.UpdateKnowledgeDocumentInput{
1387+
Content: "new content",
1388+
DocType: ptr(model.KnowledgeDocTypeAnswer),
1389+
AnswerType: ptr(model.KnowledgeAnswerTypeVulnerability),
1390+
})
1391+
if err != nil {
1392+
t.Fatal(err)
1393+
}
1394+
if doc.DocType != model.KnowledgeDocTypeAnswer {
1395+
t.Fatalf("DocType: want answer, got %s", doc.DocType)
1396+
}
1397+
if doc.GuideType != nil {
1398+
t.Fatalf("GuideType must be cleared after doc_type change, got %v", *doc.GuideType)
1399+
}
1400+
if doc.AnswerType == nil || *doc.AnswerType != model.KnowledgeAnswerTypeVulnerability {
1401+
t.Fatal("AnswerType mismatch")
1402+
}
1403+
1404+
meta := parseMeta(*stored)
1405+
if meta.GuideType != "" {
1406+
t.Fatalf("stored guide_type must be empty, got %q", meta.GuideType)
1407+
}
1408+
if meta.AnswerType != "vulnerability" {
1409+
t.Fatalf("stored answer_type: want vulnerability, got %q", meta.AnswerType)
1410+
}
1411+
})
1412+
1413+
t.Run("answer→code: clears AnswerType, sets CodeLang from input", func(t *testing.T) {
1414+
db, stored := buildDB(`{"doc_type":"answer","answer_type":"vulnerability","question":"q"}`)
1415+
ks := newKS(db)
1416+
1417+
doc, err := ks.UpdateDocument(ctx, userID, "id2", model.UpdateKnowledgeDocumentInput{
1418+
Content: "code here",
1419+
DocType: ptr(model.KnowledgeDocTypeCode),
1420+
CodeLang: ptr("python"),
1421+
})
1422+
if err != nil {
1423+
t.Fatal(err)
1424+
}
1425+
if doc.DocType != model.KnowledgeDocTypeCode {
1426+
t.Fatalf("DocType: want code, got %s", doc.DocType)
1427+
}
1428+
if doc.AnswerType != nil {
1429+
t.Fatalf("AnswerType must be cleared after doc_type change, got %v", *doc.AnswerType)
1430+
}
1431+
if doc.CodeLang == nil || *doc.CodeLang != "python" {
1432+
t.Fatal("CodeLang mismatch")
1433+
}
1434+
1435+
meta := parseMeta(*stored)
1436+
if meta.AnswerType != "" {
1437+
t.Fatalf("stored answer_type must be empty, got %q", meta.AnswerType)
1438+
}
1439+
if meta.CodeLang != "python" {
1440+
t.Fatalf("stored code_lang: want python, got %q", meta.CodeLang)
1441+
}
1442+
})
1443+
1444+
t.Run("code→guide: clears CodeLang, sets GuideType from input", func(t *testing.T) {
1445+
db, stored := buildDB(`{"doc_type":"code","code_lang":"go","question":"q"}`)
1446+
ks := newKS(db)
1447+
1448+
doc, err := ks.UpdateDocument(ctx, userID, "id3", model.UpdateKnowledgeDocumentInput{
1449+
Content: "guide text",
1450+
DocType: ptr(model.KnowledgeDocTypeGuide),
1451+
GuideType: ptr(model.KnowledgeGuideTypePentest),
1452+
})
1453+
if err != nil {
1454+
t.Fatal(err)
1455+
}
1456+
if doc.DocType != model.KnowledgeDocTypeGuide {
1457+
t.Fatalf("DocType: want guide, got %s", doc.DocType)
1458+
}
1459+
if doc.CodeLang != nil {
1460+
t.Fatalf("CodeLang must be cleared after doc_type change, got %v", *doc.CodeLang)
1461+
}
1462+
if doc.GuideType == nil || *doc.GuideType != model.KnowledgeGuideTypePentest {
1463+
t.Fatal("GuideType mismatch")
1464+
}
1465+
1466+
meta := parseMeta(*stored)
1467+
if meta.CodeLang != "" {
1468+
t.Fatalf("stored code_lang must be empty, got %q", meta.CodeLang)
1469+
}
1470+
if meta.GuideType != "pentest" {
1471+
t.Fatalf("stored guide_type: want pentest, got %q", meta.GuideType)
1472+
}
1473+
})
1474+
1475+
t.Run("guide→answer: clears GuideType even without AnswerType in input", func(t *testing.T) {
1476+
db, stored := buildDB(`{"doc_type":"guide","guide_type":"install","question":"q"}`)
1477+
ks := newKS(db)
1478+
1479+
doc, err := ks.UpdateDocument(ctx, userID, "id4", model.UpdateKnowledgeDocumentInput{
1480+
Content: "new",
1481+
DocType: ptr(model.KnowledgeDocTypeAnswer),
1482+
// AnswerType intentionally omitted
1483+
})
1484+
if err != nil {
1485+
t.Fatal(err)
1486+
}
1487+
if doc.GuideType != nil {
1488+
t.Fatalf("GuideType must be nil after switching away from guide, got %v", *doc.GuideType)
1489+
}
1490+
if doc.AnswerType != nil {
1491+
t.Fatalf("AnswerType should be nil when not supplied, got %v", *doc.AnswerType)
1492+
}
1493+
1494+
meta := parseMeta(*stored)
1495+
if meta.GuideType != "" {
1496+
t.Fatalf("stored guide_type must be empty, got %q", meta.GuideType)
1497+
}
1498+
})
1499+
1500+
t.Run("same DocType: sub-type fields preserved without clearing", func(t *testing.T) {
1501+
db, stored := buildDB(`{"doc_type":"guide","guide_type":"pentest","question":"q"}`)
1502+
ks := newKS(db)
1503+
1504+
doc, err := ks.UpdateDocument(ctx, userID, "id5", model.UpdateKnowledgeDocumentInput{
1505+
Content: "updated guide",
1506+
DocType: ptr(model.KnowledgeDocTypeGuide), // same type
1507+
// GuideType not passed — should remain "pentest" from existing
1508+
})
1509+
if err != nil {
1510+
t.Fatal(err)
1511+
}
1512+
if doc.GuideType == nil || *doc.GuideType != model.KnowledgeGuideTypePentest {
1513+
t.Fatal("GuideType must be preserved when DocType is unchanged and no new GuideType supplied")
1514+
}
1515+
1516+
meta := parseMeta(*stored)
1517+
if meta.GuideType != "pentest" {
1518+
t.Fatalf("stored guide_type must remain pentest, got %q", meta.GuideType)
1519+
}
1520+
})
1521+
1522+
t.Run("DocType nil: existing sub-type fields preserved", func(t *testing.T) {
1523+
db, stored := buildDB(`{"doc_type":"code","code_lang":"rust","question":"q"}`)
1524+
ks := newKS(db)
1525+
1526+
doc, err := ks.UpdateDocument(ctx, userID, "id6", model.UpdateKnowledgeDocumentInput{
1527+
Content: "updated code",
1528+
// DocType nil — no type change
1529+
})
1530+
if err != nil {
1531+
t.Fatal(err)
1532+
}
1533+
if doc.DocType != model.KnowledgeDocTypeCode {
1534+
t.Fatalf("DocType should stay code, got %s", doc.DocType)
1535+
}
1536+
if doc.CodeLang == nil || *doc.CodeLang != "rust" {
1537+
t.Fatal("CodeLang must be preserved when DocType is not provided")
1538+
}
1539+
1540+
meta := parseMeta(*stored)
1541+
if meta.CodeLang != "rust" {
1542+
t.Fatalf("stored code_lang must remain rust, got %q", meta.CodeLang)
1543+
}
1544+
})
1545+
1546+
t.Run("same DocType with new sub-type: updates sub-type", func(t *testing.T) {
1547+
db, stored := buildDB(`{"doc_type":"answer","answer_type":"vulnerability","question":"q"}`)
1548+
ks := newKS(db)
1549+
1550+
doc, err := ks.UpdateDocument(ctx, userID, "id7", model.UpdateKnowledgeDocumentInput{
1551+
Content: "updated",
1552+
DocType: ptr(model.KnowledgeDocTypeAnswer),
1553+
AnswerType: ptr(model.KnowledgeAnswerTypeCode),
1554+
})
1555+
if err != nil {
1556+
t.Fatal(err)
1557+
}
1558+
if doc.AnswerType == nil || *doc.AnswerType != model.KnowledgeAnswerTypeCode {
1559+
t.Fatal("AnswerType should be updated")
1560+
}
1561+
1562+
meta := parseMeta(*stored)
1563+
if meta.AnswerType != "code" {
1564+
t.Fatalf("stored answer_type: want code, got %q", meta.AnswerType)
1565+
}
1566+
})
1567+
}
1568+
1569+
func TestUpdateDocumentSizeCalculation(t *testing.T) {
1570+
ctx := context.Background()
1571+
const userID = int64(20)
1572+
1573+
buildDB := func(existingContent, existingMeta string) (*mockDB, *string) {
1574+
stored := new(string)
1575+
return &mockDB{
1576+
getKnowledge: func(_ context.Context, uuid string) (database.GetKnowledgeDocumentRow, error) {
1577+
row := makeRow(uuid, existingContent, existingMeta)
1578+
return row, nil
1579+
},
1580+
updateKnowledge: func(_ context.Context, arg database.UpdateKnowledgeDocumentParams) (database.UpdateKnowledgeDocumentRow, error) {
1581+
*stored = string(arg.Column4.RawMessage)
1582+
return database.UpdateKnowledgeDocumentRow{
1583+
ID: arg.Column1.String,
1584+
Document: arg.Column3.String,
1585+
Cmetadata: sql.NullString{
1586+
String: string(arg.Column4.RawMessage),
1587+
Valid: true,
1588+
},
1589+
}, nil
1590+
},
1591+
}, stored
1592+
}
1593+
newKS := func(db *mockDB) *knowledgeStore {
1594+
return &knowledgeStore{
1595+
db: db,
1596+
embedder: &mockEmbedder{available: true},
1597+
newKnp: newPublisherFactory(&mockPublisher{}),
1598+
}
1599+
}
1600+
1601+
t.Run("single-chunk doc: sizes equal new content length", func(t *testing.T) {
1602+
// part_size == total_size == len(old content) = 10
1603+
db, stored := buildDB("0123456789", `{"doc_type":"answer","part_size":10,"total_size":10}`)
1604+
ks := newKS(db)
1605+
1606+
_, err := ks.UpdateDocument(ctx, userID, "id", model.UpdateKnowledgeDocumentInput{
1607+
Content: "hello", // 5 chars, delta = -5
1608+
})
1609+
if err != nil {
1610+
t.Fatal(err)
1611+
}
1612+
meta := parseMeta(*stored)
1613+
if meta.PartSize != 5 {
1614+
t.Fatalf("PartSize: want 5, got %d", meta.PartSize)
1615+
}
1616+
if meta.TotalSize != 5 {
1617+
t.Fatalf("TotalSize: want 5, got %d", meta.TotalSize)
1618+
}
1619+
})
1620+
1621+
t.Run("multi-chunk doc: TotalSize adjusted by delta, PartSize adjusted independently", func(t *testing.T) {
1622+
// 3 chunks: this chunk is 100 chars, total document is 300 chars
1623+
db, stored := buildDB(
1624+
string(make([]byte, 100)),
1625+
`{"doc_type":"guide","part_size":100,"total_size":300}`,
1626+
)
1627+
ks := newKS(db)
1628+
1629+
newContent := string(make([]byte, 80)) // 80 chars, delta = -20
1630+
_, err := ks.UpdateDocument(ctx, userID, "id", model.UpdateKnowledgeDocumentInput{
1631+
Content: newContent,
1632+
})
1633+
if err != nil {
1634+
t.Fatal(err)
1635+
}
1636+
meta := parseMeta(*stored)
1637+
if meta.PartSize != 80 {
1638+
t.Fatalf("PartSize: want 80 (100-20), got %d", meta.PartSize)
1639+
}
1640+
if meta.TotalSize != 280 {
1641+
t.Fatalf("TotalSize: want 280 (300-20), got %d", meta.TotalSize)
1642+
}
1643+
})
1644+
1645+
t.Run("multi-chunk doc: content grows, TotalSize increases", func(t *testing.T) {
1646+
db, stored := buildDB(
1647+
string(make([]byte, 50)),
1648+
`{"doc_type":"code","part_size":50,"total_size":150}`,
1649+
)
1650+
ks := newKS(db)
1651+
1652+
newContent := string(make([]byte, 70)) // delta = +20
1653+
_, err := ks.UpdateDocument(ctx, userID, "id", model.UpdateKnowledgeDocumentInput{
1654+
Content: newContent,
1655+
})
1656+
if err != nil {
1657+
t.Fatal(err)
1658+
}
1659+
meta := parseMeta(*stored)
1660+
if meta.PartSize != 70 {
1661+
t.Fatalf("PartSize: want 70, got %d", meta.PartSize)
1662+
}
1663+
if meta.TotalSize != 170 {
1664+
t.Fatalf("TotalSize: want 170 (150+20), got %d", meta.TotalSize)
1665+
}
1666+
})
1667+
1668+
t.Run("zero existing sizes fall back to new content length", func(t *testing.T) {
1669+
// legacy doc without size metadata
1670+
db, stored := buildDB("old", `{"doc_type":"answer"}`)
1671+
ks := newKS(db)
1672+
1673+
_, err := ks.UpdateDocument(ctx, userID, "id", model.UpdateKnowledgeDocumentInput{
1674+
Content: "new content",
1675+
})
1676+
if err != nil {
1677+
t.Fatal(err)
1678+
}
1679+
meta := parseMeta(*stored)
1680+
if meta.PartSize != len("new content") {
1681+
t.Fatalf("PartSize: want %d, got %d", len("new content"), meta.PartSize)
1682+
}
1683+
if meta.TotalSize != len("new content") {
1684+
t.Fatalf("TotalSize: want %d, got %d", len("new content"), meta.TotalSize)
1685+
}
1686+
})
1687+
}
1688+
13491689
func TestUpdateDocumentPreservesOriginalOwner(t *testing.T) {
13501690
// SECURITY: when an admin (userID=1) updates a document that belongs to
13511691
// user 99, the stored user_id in cmetadata must remain 99, not be replaced

0 commit comments

Comments
 (0)