llm-research-engine/chain_4_1.py at master · bdeva1975/llm-research-engine · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from llm_models import get_llm
from web_scraping import web_scrape_many
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnableParallel
from prompts import SUMMARY_PROMPT_TEMPLATE

RESULT_TEXT_MAX_CHARACTERS = 5000  # A reduced from 10000


def _batch_scrape(url_dicts: list) -> list:  # B
    """
    Receives a list of dicts, each with keys:
      result_url, search_query, user_question
    Scrapes all URLs concurrently in one async batch,
    then returns enriched dicts with search_result_text added.
    """
    urls = [x['result_url'] for x in url_dicts]
    scraped_texts = web_scrape_many(urls)  # C all URLs fetched simultaneously

    return [
        {
            'search_result_text': text[:RESULT_TEXT_MAX_CHARACTERS],
            'result_url': x['result_url'],
            'search_query': x['search_query'],
            'user_question': x['user_question']
        }
        for x, text in zip(url_dicts, scraped_texts)
    ]


def _format_summary(x: dict) -> dict:  # D
    return {
        'summary': f"Source Url: {x['result_url']}\nSummary: {x['text_summary']}",
        'user_question': x['user_question']
    }


# E Per-item summarization chain — operates on a single enriched dict
_per_item_summary_chain = (
    RunnableParallel(
        {
            'text_summary': SUMMARY_PROMPT_TEMPLATE | get_llm() | StrOutputParser(),
            'result_url':   lambda x: x['result_url'],
            'user_question': lambda x: x['user_question']
        }
    )
    | RunnableLambda(_format_summary)
)

# F Full chain: batch-scrape all URLs first, then summarize each item in parallel
search_result_text_and_summary_chain = (
    RunnableLambda(_batch_scrape)          # G scrape all URLs concurrently
    | _per_item_summary_chain.map()        # H summarize each item — LangChain threads
)

# A Halved token budget per LLM summarization call — speeds up each call,
#   reduces cost, and smarter content extraction means less boilerplate anyway
# B Takes the full list of URL dicts from chain_3_1 in one shot
# C web_scrape_many uses httpx + asyncio.gather — all URLs fetched in parallel,
#   not sequentially — this is the biggest performance win in the entire pipeline
# D Helper to format the final summary dict cleanly
# E Summarization sub-chain — unchanged from original logic
# F The new combined chain replaces the old per-URL RunnableLambda approach
# G _batch_scrape runs before .map() so all HTTP I/O is done before LLM calls start
# H .map() parallelizes the LLM summarization across the scraped items