-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcsvdata_to_vec.py
More file actions
30 lines (26 loc) · 937 Bytes
/
csvdata_to_vec.py
File metadata and controls
30 lines (26 loc) · 937 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import json
import codecs
import csv
import os
from langchain_community.embeddings import QianfanEmbeddingsEndpoint
from langchain_community.vectorstores.chroma import Chroma
# 设置 Qianfan 的 AK 和 SK 环境变量
os.environ["QIANFAN_AK"] = "G5NhjZhLEdw53lVZKeQM3eaD"
os.environ["QIANFAN_SK"] = "Hxkgkd6iee9FMLVucKnTUNg59jw3JDWK"
embeddings=QianfanEmbeddingsEndpoint(model='bge-large-zh')
num = 0
with codecs.open('./file/内科.csv') as f:
new_json=[]
for row in csv.DictReader(f, skipinitialspace=True):
data={}
data['question']=row['ask']
data['answer'] = row['answer']
data_str = str(data)
num = num + 1
new_json.append(data_str)
if num > 1000:
break
print("开始编码")
vector_db = Chroma.from_texts(new_json, embedding=embeddings, persist_directory="./chroma_db")
vector_db.persist()
print("医疗数据已加载进知识库")