Skip to content

Commit 4f1b22c

Browse files
committed
kv snapshots save and load last logits for correctness. added some text for musicui, updated docs
1 parent 54cf43a commit 4f1b22c

4 files changed

Lines changed: 178 additions & 14 deletions

File tree

embd_res/kcpp_docs.embd

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,144 @@
16441644
},
16451645
}
16461646
},
1647+
"/api/extra/music/prepare": {
1648+
"post": {
1649+
"summary": "Creates song generation parameters such as caption, lyrics, BPM and duration",
1650+
"description": "Creates song generation parameters such as caption, lyrics, BPM and duration. This should be called to produce the generation input for /api/extra/music/generate",
1651+
"requestBody": {
1652+
"content": {
1653+
"application/json": {
1654+
"example": {
1655+
"caption": "An emotional rap song about the kobold war.",
1656+
},
1657+
"schema": {
1658+
"properties": {
1659+
"caption": {
1660+
"type": "string",
1661+
"description": "A short description of the song to create"
1662+
}
1663+
},
1664+
"type": "object"
1665+
}
1666+
}
1667+
},
1668+
"required": true
1669+
},
1670+
"tags": [
1671+
"api/extra"
1672+
],
1673+
"responses": {
1674+
"200": {
1675+
"content": {
1676+
"application/json": {
1677+
"example":
1678+
{
1679+
"caption": "A melancholic and narrative-driven rap track built around a heavy beat",
1680+
"lyrics": "\n[Verse 1]\nKobold is love, yeah, kobold is life\nKobold is the only way we survive\nShe said okay, yo, keep it that way\nIn kobold we trust, just kobold today\n\n[Chorus]\nKobold, kobold, we carry on now\nKobold, kobold, each way and how\nKobold, kobold, right here we go\nKobold, kobold, all that I know.\n\n[Outro]",
1681+
"bpm": 120,
1682+
"duration": 64.0,
1683+
"keyscale": "G minor",
1684+
"timesignature": "2",
1685+
"vocal_language": "en",
1686+
"task_type": "text2music",
1687+
"seed": 622315,
1688+
"thinking": false,
1689+
"lm_temperature": 0.85,
1690+
"lm_cfg_scale": 2.0,
1691+
"lm_top_p": 0.9,
1692+
"lm_negative_prompt": "",
1693+
"inference_steps": 8,
1694+
"guidance_scale": 1.0,
1695+
"shift": 3.0,
1696+
"audio_codes": ""
1697+
},
1698+
"schema": {
1699+
"properties": {},
1700+
"type": "object"
1701+
}
1702+
}
1703+
},
1704+
"description": "Successful request"
1705+
}
1706+
},
1707+
}
1708+
},
1709+
"/api/extra/music/generate": {
1710+
"post": {
1711+
"summary": "Generates music based on provided captions, lyrics and configurations",
1712+
"description": "Generates music based on provided captions, lyrics and configurations. The config can be generated using /api/extra/music/prepare or crafted manually",
1713+
"requestBody": {
1714+
"content": {
1715+
"application/json": {
1716+
"example": {
1717+
"caption": "A melancholic and narrative-driven rap track built around a heavy beat",
1718+
"lyrics": "\n[Verse 1]\nKobold is love, yeah, kobold is life\nKobold is the only way we survive\nShe said okay, yo, keep it that way\nIn kobold we trust, just kobold today\n\n[Chorus]\nKobold, kobold, we carry on now\nKobold, kobold, each way and how\nKobold, kobold, right here we go\nKobold, kobold, all that I know.\n\n[Outro]",
1719+
"bpm": 120,
1720+
"duration": 64.0,
1721+
"keyscale": "G minor",
1722+
"timesignature": "2",
1723+
"vocal_language": "en",
1724+
"inference_steps": 8
1725+
},
1726+
"schema": {
1727+
"properties": {
1728+
"caption": {
1729+
"type": "string",
1730+
"description": "A short description of the song to create"
1731+
},
1732+
"lyrics": {
1733+
"type": "string",
1734+
"description": "The full lyrics of the song to generate"
1735+
},
1736+
"bpm": {
1737+
"type": "number",
1738+
"description": "The song Beats Per Minute"
1739+
},
1740+
"duration": {
1741+
"type": "number",
1742+
"description": "The length of the song, in seconds."
1743+
},
1744+
"keyscale": {
1745+
"type": "string",
1746+
"description": "The musical key of the song."
1747+
},
1748+
"timesignature": {
1749+
"type": "string",
1750+
"description": "The musical time signature of the song."
1751+
},
1752+
"vocal_language": {
1753+
"type": "string",
1754+
"description": "The language of the song lyrics."
1755+
},
1756+
"inference_steps": {
1757+
"type": "number",
1758+
"description": "How many diffusion steps to use."
1759+
}
1760+
},
1761+
"type": "object"
1762+
}
1763+
}
1764+
},
1765+
"required": true
1766+
},
1767+
"tags": [
1768+
"api/extra"
1769+
],
1770+
"responses": {
1771+
"200": {
1772+
"content": {
1773+
"audio/wav": {
1774+
"schema": {
1775+
"type": "string",
1776+
"format": "binary"
1777+
}
1778+
}
1779+
},
1780+
"description": "Successful request"
1781+
}
1782+
},
1783+
}
1784+
},
16471785
"/api/extra/json_to_grammar": {
16481786
"post": {
16491787
"summary": "Converts a provided JSON schema into GBNF grammar.",

embd_res/kcpp_musicui.embd

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ body{
2323
color:var(--text);
2424
}
2525
header{
26-
padding:16px 20px;
26+
padding:16px 12px;
2727
font-size:20px;
2828
font-weight:600;
2929
background:rgba(0,0,0,0.3);
@@ -32,8 +32,8 @@ header{
3232
.wrapper{
3333
display:grid;
3434
grid-template-columns:minmax(340px,500px) 1fr;
35-
gap:20px;
36-
padding:20px;
35+
gap:14px;
36+
padding:10px;
3737
}
3838
@media(max-width:1100px){
3939
.wrapper{grid-template-columns:1fr;}
@@ -45,7 +45,7 @@ header{
4545
box-shadow:0 10px 40px rgba(0,0,0,.4);
4646
}
4747
h2{
48-
margin:0 0 14px 0;
48+
margin:0 0 10px 0;
4949
font-size:16px;
5050
color:var(--accent2);
5151
}
@@ -112,7 +112,7 @@ button{
112112
audio{width:100%;margin-top:6px;}
113113
.advanced-toggle{
114114
margin-top:8px;
115-
font-size:12px;
115+
font-size:14px;
116116
cursor:pointer;
117117
color:var(--accent2);
118118
}
@@ -174,11 +174,11 @@ input[type="checkbox"] {
174174
<h2>Song Setup</h2>
175175

176176
<label>Caption</label>
177-
<input id="caption">
177+
<input id="caption" placeholder="Describe the song">
178178

179179
<div style="margin-top:10px">
180180
<label>Lyrics</label>
181-
<textarea id="lyrics"></textarea>
181+
<textarea id="lyrics" placeholder="Enter song lyrics, or press 'Plan' to generate them."></textarea>
182182
</div>
183183

184184
<div class="form-grid" style="margin-top:12px">
@@ -217,7 +217,7 @@ input[type="checkbox"] {
217217

218218
<div class="actions" id="actionContainer">
219219
<div id="normalActions" style="display:flex; gap:10px; flex-wrap:wrap;">
220-
<button class="secondary" onclick="planSong()">Plan</button>
220+
<button class="primary" onclick="planSong()">Plan</button>
221221
<button class="primary" onclick="generateSong()">Generate</button>
222222
<button class="danger" onclick="clearFields()">Clear</button>
223223
<button onclick="exportPlan()">Export JSON</button>
@@ -229,6 +229,10 @@ input[type="checkbox"] {
229229

230230
<input type="file" id="importFile" hidden accept="application/json" onchange="importPlan(event)">
231231
</div>
232+
<div>
233+
<p style="font-size:14px">Click 'Plan' first to generate lyrics, BPM and duration. Edit as needed.
234+
<br>When satisfied, click 'Generate' to make the music</p>
235+
</div>
232236

233237
</div>
234238

gpttype_adapter.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ static int debugmode = 0; //-1 = hide all, 0 = normal, 1 = showall
127127
static bool is_quiet = false;
128128
static std::vector<gpt_vocab::id> last_n_tokens;
129129
static std::vector<gpt_vocab::id> current_context_tokens;
130+
static std::vector<float> loaded_latest_logits; //do not use normally, this is only required when loading state happens and we need to override logits
130131
static size_t mem_per_token = 0;
131132
static std::vector<float> logits;
132133
static std::vector<int> smartcontext;
@@ -4668,12 +4669,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
46684669
}
46694670
while(logits_sampled<logits_to_sample && remaining_tokens>0 && !abort_draft && !early_abort)
46704671
{
4671-
if(!firstdecodedone && current_context_tokens.size()>0)
4672-
{
4673-
embd.clear();
4674-
embd.push_back(current_context_tokens[current_context_tokens.size()-1]);
4675-
break;
4676-
}
46774672
if(logits_sampled>0)
46784673
{
46794674
//this is not the first loop, so we need to increment some things
@@ -4708,6 +4703,28 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
47084703
lowestLogit = LowestLogit(logits);
47094704
}
47104705

4706+
if(!firstdecodedone && current_context_tokens.size()>0)
4707+
{
4708+
if(loaded_latest_logits.size()>0)
4709+
{
4710+
if(debugmode==1 && !is_quiet)
4711+
{
4712+
printf("\nLoading %d saved logits...\n",loaded_latest_logits.size());
4713+
}
4714+
//first decode was not done. this can happen when reloading from a perfectly matched state.
4715+
//to prevent a catastrophic failure, we must prepare emergency logits for usage
4716+
logitsPtr = loaded_latest_logits.data();
4717+
lowestLogit = LowestLogit(logitsPtr,n_vocab);
4718+
}
4719+
else
4720+
{
4721+
printf("\nNo cached logits and we need them, emergency fallback with degraded quality...\n");
4722+
embd.clear();
4723+
embd.push_back(current_context_tokens[current_context_tokens.size()-1]);
4724+
break;
4725+
}
4726+
}
4727+
47114728
//if adaptive p sampling is used, we need to cache the original probabilities
47124729
std::vector<llama_token_data> original_candidates;
47134730
if(adaptive_target > 0.0f)
@@ -5237,6 +5254,7 @@ size_t gpttype_save_state_kv(int slot)
52375254
savestates[slot].current_savestate_buffer.clear();
52385255
savestates[slot].current_draft_savestate_buffer.clear();
52395256
savestates[slot].savestate_context_tokens.clear();
5257+
savestates[slot].latest_logits.clear();
52405258
savestates[slot].current_savestate_size = 0;
52415259
savestates[slot].current_draft_savestate_size = 0;
52425260
savestates[slot].media_signature = "";
@@ -5258,6 +5276,8 @@ size_t gpttype_save_state_kv(int slot)
52585276
savestates[slot].current_savestate_size = newsize;
52595277
savestates[slot].savestate_context_tokens = current_context_tokens;
52605278
savestates[slot].media_signature = media_composite_image_signature;
5279+
float * lgptr = llama_get_logits(llama_ctx_v4);
5280+
savestates[slot].latest_logits.assign(lgptr,lgptr+n_vocab);
52615281
int maxedpos = llama_memory_seq_pos_max(llama_get_memory(llama_ctx_v4),0);
52625282
//kcpp: so maxedpos appears to always be equal to ctx tokens - 2, if savestate_ctx_tokens > maxedpos + 2 then trim excess
52635283
if(maxedpos > 0 && savestates[slot].savestate_context_tokens.size() > maxedpos + 2)
@@ -5316,6 +5336,7 @@ bool gpttype_load_state_kv(int slot)
53165336
if(res > 0)
53175337
{
53185338
current_context_tokens = savestates[slot].savestate_context_tokens;
5339+
loaded_latest_logits = savestates[slot].latest_logits;
53195340
printf("\nKV Load SaveState %d: Restored KV with %zu tokens.\n", slot,current_context_tokens.size());
53205341
if(draft_ctx && savestates[slot].current_draft_savestate_size>0)
53215342
{

otherarch/otherarch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,7 @@ struct savestate_data
537537
size_t current_draft_savestate_size = 0;
538538
std::vector<uint8_t> current_draft_savestate_buffer;
539539
std::vector<gpt_vocab::id> savestate_context_tokens; //for context clones
540+
std::vector<float> latest_logits;
540541
int64_t last_used = 0; //unix timestamp, updated on save or load
541542
std::string media_signature = "";
542543
};

0 commit comments

Comments
 (0)