-
Notifications
You must be signed in to change notification settings - Fork 317
Expand file tree
/
Copy path05_hf.py
More file actions
69 lines (59 loc) · 2.78 KB
/
05_hf.py
File metadata and controls
69 lines (59 loc) · 2.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
# hub_llm = HuggingFaceHub(repo_id="mrm8488/t5-base-finetuned-wikiSQL")
# prompt = PromptTemplate(
# input_variables=["question"],
# template="Translate English to SQL: {question}"
# )
# hub_chain = LLMChain(prompt=prompt, llm=hub_llm, verbose=True)
# print(hub_chain.run("What is the average age of the respondents using a mobile device?"))
# Note: Original code used remote HuggingFace Inference API, which is incompatible with
# current package versions. Switched to local execution using HuggingFacePipeline.
# See commented-out code below for original remote API approach (non-functional).
# ============================================================================
# ORIGINAL APPROACH (REMOTE API) - COMMENTED OUT DUE TO COMPATIBILITY ISSUES
# ============================================================================
# from langchain_community.llms import HuggingFaceHub
#
# # Original remote API approach using deprecated HuggingFaceHub
# # Issues:
# # - HuggingFaceHub is deprecated and broken with huggingface_hub>=1.0
# # - HuggingFaceEndpoint (recommended replacement) has compatibility issues
# # - Requires HUGGINGFACEHUB_API_TOKEN in .env
#
# hub_llm = HuggingFaceHub(
# repo_id='gpt2',
# model_kwargs={'temperature': 0.7, 'max_length': 100}
# )
# ============================================================================
# ============================================================================
# WORKING APPROACH (LOCAL EXECUTION)
# ============================================================================
from langchain_huggingface import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
load_dotenv()
# Local execution using HuggingFacePipeline (downloads model on first run ~500MB)
model_id = 'gpt2'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_length=100, # Total length (not new tokens)
do_sample=True,
temperature=0.7,
pad_token_id=tokenizer.eos_token_id # Suppress pad_token warning
)
hub_llm = HuggingFacePipeline(pipeline=pipe)
# ============================================================================
prompt = PromptTemplate(
input_variables=["profession"],
template="You had one job 😡! You're the {profession} and you didn't have to be sarcastic"
)
# LCEL (pipe syntax) for LangChain 1.x compatibility
hub_chain = prompt | hub_llm
print(hub_chain.invoke({"profession": "customer service agent"}))
print(hub_chain.invoke({"profession": "politician"}))
print(hub_chain.invoke({"profession": "Fintech CEO"}))
print(hub_chain.invoke({"profession": "insurance agent"}))