Skip to content

Commit 36bc39c

Browse files
authored
Feature/merge nodes suggestion (#1090)
* feat: update readme kg * feat: update readme kg * feat: update readme kg * feat: update readme kg * feat: update readme kg * feat: update readme kg * feat: update readme kg * feat: update readme kg * feat: update readme kg * feat: update readme kg * feat: update readme kg * feat: update readme kg
1 parent 3fe0ef1 commit 36bc39c

20 files changed

Lines changed: 1286 additions & 745 deletions

aperag/api/components/schemas/graph.yaml

Lines changed: 201 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -142,43 +142,85 @@ graphLabelsResponse:
142142
required:
143143
- labels
144144

145+
targetEntityDataRequest:
146+
type: object
147+
description: Optional target entity configuration. If not specified, auto-select entity with highest degree.
148+
properties:
149+
entity_name:
150+
type: string
151+
description: Target entity name. If not specified, auto-select entity with highest degree
152+
entity_type:
153+
type: string
154+
description: Entity type for the target entity
155+
description:
156+
type: string
157+
description: Description for the target entity
158+
source_id:
159+
type: string
160+
description: Source ID for the target entity
161+
file_path:
162+
type: string
163+
description: File path for the target entity
164+
nullable: true
165+
166+
targetEntityDataResponse:
167+
type: object
168+
description: Complete data of the target entity after merge
169+
properties:
170+
entity_name:
171+
type: string
172+
description: The entity name that was kept (merge target)
173+
example: "墨香居"
174+
entity_type:
175+
type: string
176+
description: Entity type of the target entity
177+
example: "ORGANIZATION"
178+
description:
179+
type: string
180+
description: Merged description of the target entity
181+
example: "墨香居是这条老巷子里唯一的旧书店,经营着各种书籍,承载了老板李明华的情怀。"
182+
source_id:
183+
type: string
184+
description: Source ID information
185+
example: "chunk-001,chunk-002"
186+
file_path:
187+
type: string
188+
description: File path information
189+
example: "story.txt,book.txt"
190+
required:
191+
- entity_name
192+
- entity_type
193+
- description
194+
145195
nodeMergeRequest:
146196
type: object
147-
description: Request to merge multiple graph nodes
197+
description: |
198+
Request to merge multiple graph nodes.
199+
200+
You can provide either:
201+
- entity_ids: List of entity IDs to merge directly
202+
- suggestion_id: ID of a suggestion to merge
203+
204+
If both are provided, suggestion_id takes precedence and entity_ids will be ignored.
148205
properties:
149206
entity_ids:
150207
type: array
151208
items:
152209
type: string
153-
description: List of entity IDs to merge (supports 1 or more entities)
210+
description: List of entity IDs to merge (supports 1 or more entities). Ignored if suggestion_id is also provided.
154211
example: ["墨香居", "书店", "旧书店"]
155212
minItems: 1
156-
target_entity_data:
157-
type: object
158-
description: Optional target entity configuration. If not specified or empty, auto-select entity with highest degree
159-
properties:
160-
entity_name:
161-
type: string
162-
description: Target entity name. If not specified, auto-select entity with highest degree
163-
example: "墨香居"
164-
entity_type:
165-
type: string
166-
description: Entity type for the target entity
167-
example: "ORGANIZATION"
168-
description:
169-
type: string
170-
description: Description for the target entity
171-
example: "A comprehensive bookstore and cultural space"
172-
source_id:
173-
type: string
174-
description: Source ID for the target entity
175-
file_path:
176-
type: string
177-
description: File path for the target entity
178213
nullable: true
179-
required:
180-
- entity_ids
214+
suggestion_id:
215+
type: string
216+
description: Single suggestion ID to merge. If provided, takes precedence over entity_ids.
217+
example: "msug123"
218+
nullable: true
219+
target_entity_data:
220+
$ref: '#/targetEntityDataRequest'
181221
additionalProperties: false
222+
example:
223+
suggestion_id: "msug123"
182224

183225
nodeMergeResponse:
184226
type: object
@@ -193,35 +235,14 @@ nodeMergeResponse:
193235
type: string
194236
description: Detailed message about the merge operation
195237
example: "Successfully merged 2 entities into 墨香居"
238+
entity_ids:
239+
type: array
240+
items:
241+
type: string
242+
description: Entity IDs that were merged
243+
example: ["墨香居", "书店", "旧书店"]
196244
target_entity_data:
197-
type: object
198-
description: Complete data of the target entity after merge
199-
properties:
200-
entity_name:
201-
type: string
202-
description: The entity name that was kept (merge target)
203-
example: "墨香居"
204-
entity_type:
205-
type: string
206-
description: Entity type of the target entity
207-
example: "ORGANIZATION"
208-
description:
209-
type: string
210-
description: Merged description of the target entity
211-
example: "墨香居是这条老巷子里唯一的旧书店,经营着各种书籍,承载了老板李明华的情怀。"
212-
source_id:
213-
type: string
214-
description: Source ID information
215-
example: "chunk-001,chunk-002"
216-
file_path:
217-
type: string
218-
description: File path information
219-
example: "story.txt,book.txt"
220-
required:
221-
- entity_name
222-
- entity_type
223-
- description
224-
nullable: true
245+
$ref: '#/targetEntityDataResponse'
225246
source_entities:
226247
type: array
227248
items:
@@ -238,9 +259,15 @@ nodeMergeResponse:
238259
description: Length of the merged description
239260
example: 512
240261
minimum: 0
262+
suggestion_id:
263+
type: string
264+
description: Suggestion ID if this merge was based on a suggestion
265+
example: "msug123"
266+
nullable: true
241267
required:
242268
- status
243269
- message
270+
- entity_ids
244271
- target_entity_data
245272
- source_entities
246273
- redirected_edges
@@ -329,29 +356,90 @@ mergeSuggestionsRequest:
329356
max_suggestions:
330357
type: integer
331358
minimum: 1
332-
maximum: 50
333-
default: 10
359+
maximum: 100
360+
default: 50
334361
description: Maximum number of merge suggestions to return
335-
example: 10
336-
entity_types:
337-
type: array
338-
items:
339-
type: string
340-
description: Only consider entities of specified types (empty means all types)
341-
example: ["PERSON", "ORGANIZATION"]
342-
nullable: true
343-
debug_mode:
344-
type: boolean
345-
default: false
346-
description: Enable debug mode with lower confidence threshold (0.3 vs 0.6) and verbose logging
347-
example: false
362+
example: 50
348363
max_concurrent_llm_calls:
349364
type: integer
350365
minimum: 1
351366
maximum: 16
352367
default: 4
353368
description: Maximum number of concurrent LLM calls for batch analysis
354369
example: 4
370+
force_refresh:
371+
type: boolean
372+
default: false
373+
description: Force regeneration of suggestions even if valid cached suggestions exist
374+
example: false
375+
376+
mergeSuggestionItem:
377+
type: object
378+
description: Individual merge suggestion item
379+
properties:
380+
id:
381+
type: string
382+
description: Suggestion ID
383+
example: "msug123"
384+
collection_id:
385+
type: string
386+
description: Collection ID
387+
example: "col123"
388+
suggestion_batch_id:
389+
type: string
390+
description: Suggestion batch ID
391+
example: "batch456"
392+
entity_ids:
393+
type: array
394+
items:
395+
type: string
396+
description: Entity IDs suggested for merging
397+
example: ["墨香居", "书店", "旧书店"]
398+
confidence_score:
399+
type: number
400+
format: float
401+
minimum: 0.0
402+
maximum: 1.0
403+
description: LLM confidence score for this merge suggestion
404+
example: 0.85
405+
merge_reason:
406+
type: string
407+
description: LLM-generated reason for suggesting this merge
408+
example: "两个实体都描述同一个书店,'墨香居'是具体名称,'旧书店'是通用描述,应该合并为同一实体"
409+
suggested_target_entity:
410+
$ref: '#/mergeSuggestionTargetEntity'
411+
status:
412+
type: string
413+
enum: ["PENDING", "ACCEPTED", "REJECTED", "EXPIRED"]
414+
description: Status of the suggestion
415+
example: "PENDING"
416+
created:
417+
type: string
418+
format: date-time
419+
description: Creation timestamp
420+
example: "2025-01-07T10:00:00Z"
421+
expires_at:
422+
type: string
423+
format: date-time
424+
description: Expiration timestamp
425+
example: "2025-01-14T10:00:00Z"
426+
operated_at:
427+
type: string
428+
format: date-time
429+
description: User operation timestamp
430+
example: "2025-01-08T15:30:00Z"
431+
nullable: true
432+
required:
433+
- id
434+
- collection_id
435+
- suggestion_batch_id
436+
- entity_ids
437+
- confidence_score
438+
- merge_reason
439+
- suggested_target_entity
440+
- status
441+
- created
442+
- expires_at
355443

356444
mergeSuggestionsResponse:
357445
type: object
@@ -360,7 +448,7 @@ mergeSuggestionsResponse:
360448
suggestions:
361449
type: array
362450
items:
363-
$ref: '#/mergeSuggestion'
451+
$ref: '#/mergeSuggestionItem'
364452
description: List of merge suggestions ordered by confidence score (highest first)
365453
total_analyzed_nodes:
366454
type: integer
@@ -373,7 +461,49 @@ mergeSuggestionsResponse:
373461
description: Processing time in seconds
374462
example: 12.5
375463
minimum: 0.0
464+
from_cache:
465+
type: boolean
466+
description: Whether suggestions were loaded from cache
467+
example: false
468+
default: false
469+
generated_at:
470+
type: string
471+
format: date-time
472+
description: Generation timestamp
473+
example: "2025-01-07T10:00:00Z"
474+
total_suggestions:
475+
type: integer
476+
description: Total number of suggestions
477+
example: 5
478+
minimum: 0
479+
pending_count:
480+
type: integer
481+
description: Number of pending suggestions
482+
example: 3
483+
minimum: 0
484+
accepted_count:
485+
type: integer
486+
description: Number of accepted suggestions
487+
example: 1
488+
minimum: 0
489+
rejected_count:
490+
type: integer
491+
description: Number of rejected suggestions
492+
example: 1
493+
minimum: 0
494+
expired_count:
495+
type: integer
496+
description: Number of expired suggestions
497+
example: 0
498+
minimum: 0
376499
required:
377500
- suggestions
378501
- total_analyzed_nodes
379-
- processing_time_seconds
502+
- processing_time_seconds
503+
- from_cache
504+
- generated_at
505+
- total_suggestions
506+
- pending_count
507+
- accepted_count
508+
- rejected_count
509+
- expired_count

0 commit comments

Comments
 (0)