@@ -147,12 +147,14 @@ def generate_embeddings(payload: dict) -> list:
147147 Returns a list of floats (the embedding vector).
148148 """
149149 text = payload ["text" ]
150- model = os .environ .get ("EMBEDDING_MODEL " , "text-embedding-3-large" )
150+ model = os .environ .get ("AI_FOUNDRY_EMBEDDING_MODEL " , "text-embedding-3-large" )
151151
152+ dimensions = int (os .environ .get ("EMBEDDING_DIMENSION" , "1536" ))
152153 client = _get_embeddings_client ()
153154 response = client .embeddings .create (
154155 input = [text ],
155156 model = model ,
157+ dimensions = dimensions ,
156158 )
157159 return response .data [0 ].embedding
158160
@@ -222,7 +224,7 @@ def generate_thread_summary(payload: dict) -> dict:
222224 user_id = payload ["user_id" ]
223225 thread_id = payload ["thread_id" ]
224226 recent_k = payload .get ("recent_k" )
225- model = os .environ .get ("LLM_MODEL" , "gpt-4o " )
227+ model = os .environ .get ("LLM_MODEL" , "gpt-5-nano " )
226228 container = _get_cosmos_container ()
227229
228230 # ---- 1. Check for an existing thread summary ----
@@ -309,11 +311,13 @@ def generate_thread_summary(payload: dict) -> dict:
309311 summary_text = response .choices [0 ].message .content
310312
311313 # ---- 6. Generate embedding ----
312- embedding_model = os .environ .get ("EMBEDDING_MODEL" , "text-embedding-3-large" )
314+ embedding_model = os .environ .get ("AI_FOUNDRY_EMBEDDING_MODEL" , "text-embedding-3-large" )
315+ dimensions = int (os .environ .get ("EMBEDDING_DIMENSION" , "1536" ))
313316 emb_client = _get_embeddings_client ()
314317 emb_response = emb_client .embeddings .create (
315318 input = [summary_text ],
316319 model = embedding_model ,
320+ dimensions = dimensions ,
317321 )
318322 summary_embedding = emb_response .data [0 ].embedding
319323
@@ -370,7 +374,7 @@ def extract_facts(payload: dict) -> dict:
370374 user_id = payload ["user_id" ]
371375 thread_id = payload ["thread_id" ]
372376 recent_k = payload .get ("recent_k" )
373- model = os .environ .get ("LLM_MODEL " , "gpt-4o " )
377+ model = os .environ .get ("AI_FOUNDRY_LLM " , "gpt-5-nano " )
374378
375379 # ---- 1. Query Cosmos DB ----
376380 container = _get_cosmos_container ()
@@ -434,7 +438,8 @@ def extract_facts(payload: dict) -> dict:
434438 fact_lines = [facts_text .strip ()]
435439
436440 # ---- 5. Generate embeddings and store each fact ----
437- embedding_model = os .environ .get ("EMBEDDING_MODEL" , "text-embedding-3-large" )
441+ embedding_model = os .environ .get ("AI_FOUNDRY_EMBEDDING_MODEL" , "text-embedding-3-large" )
442+ dimensions = int (os .environ .get ("EMBEDDING_DIMENSION" , "1536" ))
438443 emb_client = _get_embeddings_client ()
439444 now = datetime .now (timezone .utc ).isoformat ()
440445 facts_docs = []
@@ -443,6 +448,7 @@ def extract_facts(payload: dict) -> dict:
443448 emb_response = emb_client .embeddings .create (
444449 input = [fact ],
445450 model = embedding_model ,
451+ dimensions = dimensions ,
446452 )
447453 fact_doc = {
448454 "id" : str (uuid .uuid4 ()),
@@ -490,7 +496,7 @@ def generate_user_summary(payload: dict) -> dict:
490496 user_id = payload ["user_id" ]
491497 thread_ids = payload .get ("thread_ids" )
492498 recent_k = payload .get ("recent_k" )
493- model = os .environ .get ("LLM_MODEL " , "gpt-4o " )
499+ model = os .environ .get ("AI_FOUNDRY_LLM " , "gpt-5-nano " )
494500 container = _get_cosmos_container ()
495501
496502 # ---- 1. Check for an existing user summary ----
@@ -598,11 +604,13 @@ def generate_user_summary(payload: dict) -> dict:
598604 summary_text = response .choices [0 ].message .content
599605
600606 # ---- 6. Generate embedding ----
601- embedding_model = os .environ .get ("EMBEDDING_MODEL" , "text-embedding-3-large" )
607+ embedding_model = os .environ .get ("AI_FOUNDRY_EMBEDDING_MODEL" , "text-embedding-3-large" )
608+ dimensions = int (os .environ .get ("EMBEDDING_DIMENSION" , "1536" ))
602609 emb_client = _get_embeddings_client ()
603610 emb_response = emb_client .embeddings .create (
604611 input = [summary_text ],
605612 model = embedding_model ,
613+ dimensions = dimensions ,
606614 )
607615 summary_embedding = emb_response .data [0 ].embedding
608616
0 commit comments