|
| 1 | +import time |
| 2 | +from datetime import datetime, timedelta |
| 3 | +from llama_index.core import get_response_synthesizer |
| 4 | +from llama_index.core.response_synthesizers.type import ResponseMode |
| 5 | +from llama_index.core.tools import QueryEngineTool, ToolMetadata, FunctionTool |
| 6 | +from llama_index.core.agent import ReActAgent |
| 7 | +from llama_index.core.workflow import ( |
| 8 | + StartEvent, |
| 9 | + StopEvent, |
| 10 | + Workflow, |
| 11 | + step, |
| 12 | + Event, |
| 13 | + Context, |
| 14 | +) |
| 15 | +from langchain_community.tools.tavily_search import TavilySearchResults |
| 16 | + |
| 17 | +from uipath_llamaindex.llms import UiPathOpenAI |
| 18 | +from uipath_llamaindex.query_engines import ContextGroundingQueryEngine |
| 19 | +from uipath import UiPath |
| 20 | + |
| 21 | +INDEX_NAME = "News-Index" |
| 22 | +FOLDER_PATH = "Shared" |
| 23 | +FRESHNESS_HOURS = 24 |
| 24 | + |
| 25 | +uipath = UiPath() |
| 26 | +tavily_tool = TavilySearchResults(max_results=5) |
| 27 | + |
| 28 | + |
| 29 | +class CheckIndexEvent(Event): |
| 30 | + topic: str |
| 31 | + |
| 32 | + |
| 33 | +class SearchWebEvent(Event): |
| 34 | + topic: str |
| 35 | + |
| 36 | + |
| 37 | +class AddToIndexEvent(Event): |
| 38 | + topic: str |
| 39 | + web_results: str |
| 40 | + |
| 41 | + |
| 42 | +class WaitForIngestionEvent(Event): |
| 43 | + pass |
| 44 | + |
| 45 | + |
| 46 | +class QueryIndexEvent(Event): |
| 47 | + topic: str |
| 48 | + |
| 49 | + |
| 50 | +async def check_index_freshness(topic: str) -> bool: |
| 51 | + try: |
| 52 | + index = await uipath.context_grounding.retrieve_async( |
| 53 | + INDEX_NAME, folder_path=FOLDER_PATH |
| 54 | + ) |
| 55 | + |
| 56 | + if index.last_ingested is None: |
| 57 | + print("Index has never been ingested") |
| 58 | + return False |
| 59 | + |
| 60 | + last_ingested_time = index.last_ingested |
| 61 | + current_time = datetime.now(last_ingested_time.tzinfo) |
| 62 | + time_diff = current_time - last_ingested_time |
| 63 | + |
| 64 | + print(f"Last ingested: {last_ingested_time}") |
| 65 | + print(f"Time since last ingestion: {time_diff}") |
| 66 | + |
| 67 | + if time_diff < timedelta(hours=FRESHNESS_HOURS): |
| 68 | + print(f"Data is fresh (less than {FRESHNESS_HOURS} hours old)") |
| 69 | + return True |
| 70 | + else: |
| 71 | + print(f"Data is stale (more than {FRESHNESS_HOURS} hours old)") |
| 72 | + return False |
| 73 | + except Exception: |
| 74 | + return False |
| 75 | + |
| 76 | + |
| 77 | +async def in_progress_ingestion() -> bool: |
| 78 | + try: |
| 79 | + index = await uipath.context_grounding.retrieve_async( |
| 80 | + INDEX_NAME, folder_path=FOLDER_PATH |
| 81 | + ) |
| 82 | + status = index.last_ingestion_status |
| 83 | + return status in ['Queued', 'InProgress', 'Running'] |
| 84 | + except Exception as e: |
| 85 | + return False |
| 86 | + |
| 87 | + |
| 88 | +class NewsAggregatorWorkflow(Workflow): |
| 89 | + |
| 90 | + def __init__(self, **kwargs): |
| 91 | + super().__init__(**kwargs) |
| 92 | + self.llm = UiPathOpenAI(model="gpt-4o-2024-11-20") |
| 93 | + |
| 94 | + @step |
| 95 | + async def start(self, ctx: Context, ev: StartEvent) -> CheckIndexEvent: |
| 96 | + query = ev.get("query", "") |
| 97 | + |
| 98 | + if not query: |
| 99 | + return StopEvent(result="No query provided") |
| 100 | + |
| 101 | + await ctx.store.set("original_query", query) |
| 102 | + |
| 103 | + topic_response = await self.llm.acomplete( |
| 104 | + f"Extract the main topic/subject from this query. Return only the topic name, nothing else: {query}" |
| 105 | + ) |
| 106 | + topic = str(topic_response).strip() |
| 107 | + await ctx.store.set("topic", topic) |
| 108 | + |
| 109 | + return CheckIndexEvent(topic=topic) |
| 110 | + |
| 111 | + @step |
| 112 | + async def check_index(self, ctx: Context, ev: CheckIndexEvent) -> SearchWebEvent | QueryIndexEvent: |
| 113 | + has_fresh_data = await check_index_freshness(ev.topic) |
| 114 | + |
| 115 | + if has_fresh_data: |
| 116 | + print(f"Found fresh data about {ev.topic} in index") |
| 117 | + return QueryIndexEvent(topic=ev.topic) |
| 118 | + else: |
| 119 | + print(f"No fresh data found, searching web for {ev.topic}") |
| 120 | + return SearchWebEvent(topic=ev.topic) |
| 121 | + |
| 122 | + @step |
| 123 | + async def search_web(self, ctx: Context, ev: SearchWebEvent) -> AddToIndexEvent: |
| 124 | + print(f"Searching web for: {ev.topic}") |
| 125 | + |
| 126 | + results = tavily_tool.invoke({"query": f"latest news about {ev.topic}"}) |
| 127 | + |
| 128 | + formatted_results = f"News about {ev.topic} (Retrieved: {datetime.now().isoformat()})\n\n" |
| 129 | + for i, result in enumerate(results, 1): |
| 130 | + formatted_results += f"{i}. {result.get('content', '')}\n" |
| 131 | + formatted_results += f" Source: {result.get('url', 'N/A')}\n\n" |
| 132 | + |
| 133 | + print(f"Found {len(results)} results") |
| 134 | + return AddToIndexEvent(topic=ev.topic, web_results=formatted_results) |
| 135 | + |
| 136 | + @step |
| 137 | + async def add_to_index(self, ctx: Context, ev: AddToIndexEvent) -> WaitForIngestionEvent: |
| 138 | + timestamp = int(time.time()) |
| 139 | + file_name_response = await self.llm.acomplete( |
| 140 | + f"""Generate a file name from this topic, replacing spaces with underscores. |
| 141 | + For instance, 'Tesla news' should be 'tesla_news'. |
| 142 | + Topic: {ev.topic} |
| 143 | + Return only the filename without extension.""" |
| 144 | + ) |
| 145 | + file_name = str(file_name_response).strip().replace(" ", "_") |
| 146 | + |
| 147 | + print(f"Adding data to index with filename: {file_name}-{timestamp}.txt") |
| 148 | + await uipath.context_grounding.add_to_index_async( |
| 149 | + name=INDEX_NAME, |
| 150 | + blob_file_path=f"{file_name}-{timestamp}.txt", |
| 151 | + content_type="application/txt", |
| 152 | + content=ev.web_results, |
| 153 | + folder_path=FOLDER_PATH, |
| 154 | + ) |
| 155 | + |
| 156 | + return WaitForIngestionEvent() |
| 157 | + |
| 158 | + @step |
| 159 | + async def wait_for_ingestion(self, ctx: Context, ev: WaitForIngestionEvent) -> QueryIndexEvent | StopEvent: |
| 160 | + no_of_tries = 10 |
| 161 | + wait_seconds = 5 |
| 162 | + |
| 163 | + while no_of_tries > 0: |
| 164 | + if not await in_progress_ingestion(): |
| 165 | + print(f"Ingestion complete!") |
| 166 | + topic = await ctx.store.get("topic") |
| 167 | + return QueryIndexEvent(topic=topic) |
| 168 | + |
| 169 | + no_of_tries -= 1 |
| 170 | + print(f"Waiting for ingestion... Retrying {no_of_tries} more time(s)") |
| 171 | + time.sleep(wait_seconds) |
| 172 | + |
| 173 | + return StopEvent(result="Index ingestion took too long. Please try again later.") |
| 174 | + |
| 175 | + @step |
| 176 | + async def query_index(self, ctx: Context, ev: QueryIndexEvent) -> StopEvent: |
| 177 | + print(f"Querying index for: {ev.topic}") |
| 178 | + |
| 179 | + response_synthesizer = get_response_synthesizer( |
| 180 | + response_mode=ResponseMode.SIMPLE_SUMMARIZE, |
| 181 | + llm=self.llm |
| 182 | + ) |
| 183 | + |
| 184 | + query_engine = ContextGroundingQueryEngine( |
| 185 | + index_name=INDEX_NAME, |
| 186 | + folder_path=FOLDER_PATH, |
| 187 | + response_synthesizer=response_synthesizer, |
| 188 | + ) |
| 189 | + |
| 190 | + tool = QueryEngineTool( |
| 191 | + query_engine=query_engine, |
| 192 | + metadata=ToolMetadata( |
| 193 | + name="news_search", |
| 194 | + description=f"Search through indexed news articles about {ev.topic}", |
| 195 | + ), |
| 196 | + ) |
| 197 | + |
| 198 | + agent = ReActAgent(tools=[tool], llm=self.llm, verbose=True) |
| 199 | + original_query = await ctx.store.get("original_query") |
| 200 | + response = await agent.run(user_msg=original_query) |
| 201 | + |
| 202 | + return StopEvent(result=str(response)) |
| 203 | + |
| 204 | + |
| 205 | +agent = NewsAggregatorWorkflow(timeout=180, verbose=True) |
0 commit comments