-
Notifications
You must be signed in to change notification settings - Fork 43
Expand file tree
/
Copy pathDocumentationDocument.cs
More file actions
197 lines (160 loc) · 7.53 KB
/
DocumentationDocument.cs
File metadata and controls
197 lines (160 loc) · 7.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
// Licensed to Elasticsearch B.V under one or more agreements.
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information
using System.Text.Json.Serialization;
using Elastic.Mapping;
namespace Elastic.Documentation.Search;
public record ParentDocument
{
[JsonPropertyName("title")]
public required string Title { get; set; }
[Keyword]
[JsonPropertyName("url")]
public required string Url { get; set; }
}
public record DocumentationDocument
{
[AiInput]
[JsonPropertyName("title")]
public required string Title { get; set; }
/// <summary>
/// Search title is a combination of the title and the url components.
/// This is used for querying to not reward documents with short titles contributing to heavily to scoring
/// </summary>
[JsonPropertyName("search_title")]
public required string SearchTitle { get; set; }
[Keyword(Normalizer = "keyword_normalizer")]
[JsonPropertyName("type")]
public required string Type { get; set; } = "doc";
#pragma warning disable IDE0032 // Backing field: ContentType getter/setter normalizes away values equal to Type for stable JSON.
private string? _contentType;
#pragma warning restore IDE0032
/// <summary>
/// Indexed document kind for filtering, stored as <c>content_type</c> alongside <see cref="Type"/> (<c>type</c>).
/// Both fields exist for future shared <c>_source</c> with website-ai-search, where CLR type may stay JSON-ignored
/// to avoid System.Text.Json polymorphic <c>$type</c> clashes with a serialized <c>type</c> property; the persisted
/// filter value then uses <c>content_type</c>. Values present in JSON win over <see cref="Type"/>; otherwise
/// <see cref="Type"/> applies, then <c>doc</c> when both are absent (sparse hits).
/// </summary>
[Keyword]
[JsonPropertyName("content_type")]
public string ContentType
{
get => _contentType ?? Type ?? "doc";
set
{
if (string.IsNullOrWhiteSpace(value))
{
_contentType = null;
return;
}
_contentType = string.Equals(value, Type, StringComparison.Ordinal) ? null : value;
}
}
/// <summary>
/// The canonical/primary product for this document (nested object with id and repository).
/// Name and version are looked up dynamically by product id.
/// </summary>
[Object]
[JsonPropertyName("product")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public IndexedProduct? Product { get; set; }
/// <summary>
/// All related products found during inference (from legacy mappings, applicability, etc.)
/// </summary>
[Object]
[JsonPropertyName("related_products")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public IndexedProduct[]? RelatedProducts { get; set; }
[Id]
[Keyword]
[JsonPropertyName("url")]
public required string Url { get; set; } = string.Empty;
[ContentHash]
[Keyword]
[JsonPropertyName("hash")]
public string Hash { get; set; } = string.Empty;
[JsonPropertyName("navigation_depth")]
public int NavigationDepth { get; set; } = 50; //default to a high number so that omission gets penalized.
[JsonPropertyName("navigation_table_of_contents")]
public int NavigationTableOfContents { get; set; } = 50; //default to a high number so that omission gets penalized.
[Keyword(Normalizer = "keyword_normalizer")]
[JsonPropertyName("navigation_section")]
public string? NavigationSection { get; set; }
/// The date of the batch update this document was part of last.
/// This date could be higher than the date_last_updated.
[BatchIndexDate]
[JsonPropertyName("batch_index_date")]
public DateTimeOffset BatchIndexDate { get; set; }
/// The date this document was last updated,
[LastUpdated]
[Timestamp]
[JsonPropertyName("last_updated")]
public DateTimeOffset LastUpdated { get; set; }
/// The date this document's content (stripped_body) was last updated.
/// Only advances when the whitespace-normalized content hash changes.
[JsonPropertyName("content_last_updated")]
public DateTimeOffset ContentLastUpdated { get; set; }
/// A hash of the whitespace-normalized stripped_body, used to detect content-only changes.
[Keyword]
[JsonPropertyName("content_hash")]
public string ContentBodyHash { get; set; } = string.Empty;
[JsonPropertyName("description")]
public string? Description { get; set; }
[Text]
[JsonPropertyName("headings")]
public string[] Headings { get; set; } = [];
[JsonPropertyName("links")]
public string[] Links { get; set; } = [];
[Nested]
[JsonPropertyName("applies_to")]
public IReadOnlyCollection<AppliesToEntry>? Applies { get; set; }
[JsonPropertyName("body")]
public string? Body { get; set; }
/// Stripped body is the body with Markdown removed, suitable for search indexing
[AiInput]
[JsonPropertyName("stripped_body")]
public string? StrippedBody { get; set; }
[JsonPropertyName("abstract")]
public string? Abstract { get; set; }
[Object]
[JsonPropertyName("parents")]
public ParentDocument[] Parents { get; set; } = [];
[JsonPropertyName("hidden")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
public bool Hidden { get; set; }
// AI Enrichment fields - populated post-indexing by AiEnrichmentOrchestrator
[AiField("3-5 sentences densely packed with technical entities for semantic vector matching. Include: API endpoint names, method names, parameter names, configuration options, data types, and core functionality. Write for RAG retrieval - someone asking 'how do I configure X' should match this text.")]
[Text]
[JsonPropertyName("ai_rag_optimized_summary")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? AiRagOptimizedSummary { get; set; }
[AiField("Exactly 5-10 words for UI tooltip or search snippet. Action-oriented, starts with a verb. Example: 'Configure index lifecycle policies for data retention'")]
[Text]
[JsonPropertyName("ai_short_summary")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? AiShortSummary { get; set; }
[AiField("3-8 keywords representing a realistic search query a developer would type. Include product name and key technical terms. Example: 'elasticsearch bulk api batch indexing'")]
[Keyword]
[JsonPropertyName("ai_search_query")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? AiSearchQuery { get; set; }
[AiField("Natural questions a dev would ask (6-15 words). Not too short, not too verbose. Examples: 'How do I bulk index documents?', 'What format does the bulk API use?', 'Why is my bulk request failing?'",
MinItems = 3, MaxItems = 5)]
[Text]
[JsonPropertyName("ai_questions")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string[]? AiQuestions { get; set; }
[AiField("Short, simple questions a user would type into a search bar (3-10 words). Think autocomplete — what someone types when they're starting to explore a topic, not deep technical questions. Use the feature or product name but keep the question simple. Avoid jargon or implementation details. Examples: 'What is the bulk API?', 'How do I index documents?', 'Why is indexing slow?'",
MinItems = 3, MaxItems = 5)]
[Text]
[JsonPropertyName("ai_autocomplete_questions")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string[]? AiAutocompleteQuestions { get; set; }
[AiField("Simple 2-4 word tasks a dev wants to do. Examples: 'index documents', 'check cluster health', 'enable TLS', 'fix slow queries', 'backup data'",
MinItems = 2, MaxItems = 4)]
[Text]
[JsonPropertyName("ai_use_cases")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string[]? AiUseCases { get; set; }
}