Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"description": "Fix deprecated imports in example notebooks (ModelType, LanguageModelConfig, ModelManager).",
"type": "patch"
}
111 changes: 3 additions & 108 deletions docs/examples_notebooks/drift_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,119 +15,14 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"import pandas as pd\n",
"from graphrag.config.enums import ModelType\n",
"from graphrag.config.models.drift_search_config import DRIFTSearchConfig\n",
"from graphrag.config.models.language_model_config import LanguageModelConfig\n",
"from graphrag.language_model.manager import ModelManager\n",
"from graphrag.query.indexer_adapters import (\n",
" read_indexer_entities,\n",
" read_indexer_relationships,\n",
" read_indexer_report_embeddings,\n",
" read_indexer_reports,\n",
" read_indexer_text_units,\n",
")\n",
"from graphrag.query.structured_search.drift_search.drift_context import (\n",
" DRIFTSearchContextBuilder,\n",
")\n",
"from graphrag.query.structured_search.drift_search.search import DRIFTSearch\n",
"from graphrag.tokenizer.get_tokenizer import get_tokenizer\n",
"from graphrag_vectors.lancedb import LanceDBVectorStore\n",
"\n",
"INPUT_DIR = \"./inputs/operation dulce\"\n",
"LANCEDB_URI = f\"{INPUT_DIR}/lancedb\"\n",
"\n",
"COMMUNITY_REPORT_TABLE = \"community_reports\"\n",
"COMMUNITY_TABLE = \"communities\"\n",
"ENTITY_TABLE = \"entities\"\n",
"RELATIONSHIP_TABLE = \"relationships\"\n",
"COVARIATE_TABLE = \"covariates\"\n",
"TEXT_UNIT_TABLE = \"text_units\"\n",
"COMMUNITY_LEVEL = 2\n",
"\n",
"\n",
"# read nodes table to get community and degree data\n",
"entity_df = pd.read_parquet(f\"{INPUT_DIR}/{ENTITY_TABLE}.parquet\")\n",
"community_df = pd.read_parquet(f\"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet\")\n",
"\n",
"print(f\"Entity df columns: {entity_df.columns}\")\n",
"\n",
"entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)\n",
"\n",
"# load description embeddings to an in-memory lancedb vectorstore\n",
"# to connect to a remote db, specify url and port values.\n",
"description_embedding_store = LanceDBVectorStore(\n",
" db_uri=LANCEDB_URI,\n",
" index_name=\"entity_description\",\n",
")\n",
"description_embedding_store.connect()\n",
"\n",
"full_content_embedding_store = LanceDBVectorStore(\n",
" db_uri=LANCEDB_URI,\n",
" index_name=\"community_full_content\",\n",
")\n",
"full_content_embedding_store.connect()\n",
"\n",
"print(f\"Entity count: {len(entity_df)}\")\n",
"entity_df.head()\n",
"\n",
"relationship_df = pd.read_parquet(f\"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet\")\n",
"relationships = read_indexer_relationships(relationship_df)\n",
"\n",
"print(f\"Relationship count: {len(relationship_df)}\")\n",
"relationship_df.head()\n",
"\n",
"text_unit_df = pd.read_parquet(f\"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet\")\n",
"text_units = read_indexer_text_units(text_unit_df)\n",
"\n",
"print(f\"Text unit records: {len(text_unit_df)}\")\n",
"text_unit_df.head()\n",
"\n",
"report_df = pd.read_parquet(f\"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet\")\n",
"reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)\n",
"read_indexer_report_embeddings(reports, full_content_embedding_store)"
]
"source": "import os\n\nimport pandas as pd\nfrom graphrag.config.models.drift_search_config import DRIFTSearchConfig\nfrom graphrag.query.indexer_adapters import (\n read_indexer_entities,\n read_indexer_relationships,\n read_indexer_report_embeddings,\n read_indexer_reports,\n read_indexer_text_units,\n)\nfrom graphrag.query.structured_search.drift_search.drift_context import (\n DRIFTSearchContextBuilder,\n)\nfrom graphrag.query.structured_search.drift_search.search import DRIFTSearch\nfrom graphrag_llm.completion import create_completion\nfrom graphrag_llm.config import ModelConfig\nfrom graphrag_llm.embedding import create_embedding\nfrom graphrag_vectors.lancedb import LanceDBVectorStore\n\nINPUT_DIR = \"./inputs/operation dulce\"\nLANCEDB_URI = f\"{INPUT_DIR}/lancedb\"\n\nCOMMUNITY_REPORT_TABLE = \"community_reports\"\nCOMMUNITY_TABLE = \"communities\"\nENTITY_TABLE = \"entities\"\nRELATIONSHIP_TABLE = \"relationships\"\nCOVARIATE_TABLE = \"covariates\"\nTEXT_UNIT_TABLE = \"text_units\"\nCOMMUNITY_LEVEL = 2\n\n\n# read nodes table to get community and degree data\nentity_df = pd.read_parquet(f\"{INPUT_DIR}/{ENTITY_TABLE}.parquet\")\ncommunity_df = pd.read_parquet(f\"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet\")\n\nprint(f\"Entity df columns: {entity_df.columns}\")\n\nentities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)\n\n# load description embeddings to an in-memory lancedb vectorstore\n# to connect to a remote db, specify url and port values.\ndescription_embedding_store = LanceDBVectorStore(\n db_uri=LANCEDB_URI,\n index_name=\"entity_description\",\n)\ndescription_embedding_store.connect()\n\nfull_content_embedding_store = LanceDBVectorStore(\n db_uri=LANCEDB_URI,\n index_name=\"community_full_content\",\n)\nfull_content_embedding_store.connect()\n\nprint(f\"Entity count: {len(entity_df)}\")\nentity_df.head()\n\nrelationship_df = pd.read_parquet(f\"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet\")\nrelationships = read_indexer_relationships(relationship_df)\n\nprint(f\"Relationship count: {len(relationship_df)}\")\nrelationship_df.head()\n\ntext_unit_df = pd.read_parquet(f\"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet\")\ntext_units = read_indexer_text_units(text_unit_df)\n\nprint(f\"Text unit records: {len(text_unit_df)}\")\ntext_unit_df.head()\n\nreport_df = pd.read_parquet(f\"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet\")\nreports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)\nread_indexer_report_embeddings(reports, full_content_embedding_store)"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n",
"\n",
"chat_config = LanguageModelConfig(\n",
" api_key=api_key,\n",
" type=ModelType.Chat,\n",
" model_provider=\"openai\",\n",
" model=\"gpt-4.1\",\n",
" max_retries=20,\n",
")\n",
"chat_model = ModelManager().get_or_create_chat_model(\n",
" name=\"local_search\",\n",
" model_type=ModelType.Chat,\n",
" config=chat_config,\n",
")\n",
"\n",
"tokenizer = get_tokenizer(chat_config)\n",
"\n",
"embedding_config = LanguageModelConfig(\n",
" api_key=api_key,\n",
" type=ModelType.Embedding,\n",
" model_provider=\"openai\",\n",
" model=\"text-embedding-3-large\",\n",
" max_retries=20,\n",
")\n",
"\n",
"text_embedder = ModelManager().get_or_create_embedding_model(\n",
" name=\"local_search_embedding\",\n",
" model_type=ModelType.Embedding,\n",
" config=embedding_config,\n",
")"
]
"source": "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n\nchat_config = ModelConfig(\n api_key=api_key,\n model_provider=\"openai\",\n model=\"gpt-4.1\",\n)\nchat_model = create_completion(chat_config)\ntokenizer = chat_model.tokenizer\n\nembedding_config = ModelConfig(\n api_key=api_key,\n model_provider=\"openai\",\n model=\"text-embedding-3-large\",\n)\ntext_embedder = create_embedding(embedding_config)"
},
{
"cell_type": "code",
Expand Down Expand Up @@ -207,4 +102,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}
40 changes: 3 additions & 37 deletions docs/examples_notebooks/global_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"import pandas as pd\n",
"from graphrag.config.enums import ModelType\n",
"from graphrag.config.models.language_model_config import LanguageModelConfig\n",
"from graphrag.language_model.manager import ModelManager\n",
"from graphrag.query.indexer_adapters import (\n",
" read_indexer_communities,\n",
" read_indexer_entities,\n",
" read_indexer_reports,\n",
")\n",
"from graphrag.query.structured_search.global_search.community_context import (\n",
" GlobalCommunityContext,\n",
")\n",
"from graphrag.query.structured_search.global_search.search import GlobalSearch\n",
"from graphrag.tokenizer.get_tokenizer import get_tokenizer"
]
"source": "import os\n\nimport pandas as pd\nfrom graphrag.query.indexer_adapters import (\n read_indexer_communities,\n read_indexer_entities,\n read_indexer_reports,\n)\nfrom graphrag.query.structured_search.global_search.community_context import (\n GlobalCommunityContext,\n)\nfrom graphrag.query.structured_search.global_search.search import GlobalSearch\nfrom graphrag_llm.completion import create_completion\nfrom graphrag_llm.config import ModelConfig"
},
{
"cell_type": "markdown",
Expand All @@ -55,24 +38,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n",
"\n",
"config = LanguageModelConfig(\n",
" api_key=api_key,\n",
" type=ModelType.Chat,\n",
" model_provider=\"openai\",\n",
" model=\"gpt-4.1\",\n",
" max_retries=20,\n",
")\n",
"model = ModelManager().get_or_create_chat_model(\n",
" name=\"global_search\",\n",
" model_type=ModelType.Chat,\n",
" config=config,\n",
")\n",
"\n",
"tokenizer = get_tokenizer(config)"
]
"source": "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n\nconfig = ModelConfig(\n api_key=api_key,\n model_provider=\"openai\",\n model=\"gpt-4.1\",\n)\nmodel = create_completion(config)\ntokenizer = model.tokenizer"
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -259,4 +225,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"import pandas as pd\n",
"from graphrag.config.enums import ModelType\n",
"from graphrag.config.models.language_model_config import LanguageModelConfig\n",
"from graphrag.language_model.manager import ModelManager\n",
"from graphrag.query.indexer_adapters import (\n",
" read_indexer_communities,\n",
" read_indexer_entities,\n",
" read_indexer_reports,\n",
")\n",
"from graphrag.query.structured_search.global_search.community_context import (\n",
" GlobalCommunityContext,\n",
")\n",
"from graphrag.query.structured_search.global_search.search import GlobalSearch"
]
"source": "import os\n\nimport pandas as pd\nfrom graphrag.query.indexer_adapters import (\n read_indexer_communities,\n read_indexer_entities,\n read_indexer_reports,\n)\nfrom graphrag.query.structured_search.global_search.community_context import (\n GlobalCommunityContext,\n)\nfrom graphrag.query.structured_search.global_search.search import GlobalSearch\nfrom graphrag_llm.completion import create_completion\nfrom graphrag_llm.config import ModelConfig"
},
{
"cell_type": "markdown",
Expand All @@ -54,26 +38,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from graphrag.tokenizer.get_tokenizer import get_tokenizer\n",
"\n",
"api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n",
"\n",
"config = LanguageModelConfig(\n",
" api_key=api_key,\n",
" type=ModelType.Chat,\n",
" model_provider=\"openai\",\n",
" model=\"gpt-4.1\",\n",
" max_retries=20,\n",
")\n",
"model = ModelManager().get_or_create_chat_model(\n",
" name=\"global_search\",\n",
" model_type=ModelType.Chat,\n",
" config=config,\n",
")\n",
"\n",
"tokenizer = get_tokenizer(config)"
]
"source": "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n\nconfig = ModelConfig(\n api_key=api_key,\n model_provider=\"openai\",\n model=\"gpt-4.1\",\n)\nmodel = create_completion(config)\ntokenizer = model.tokenizer"
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -292,4 +257,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}
60 changes: 3 additions & 57 deletions docs/examples_notebooks/local_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"import pandas as pd\n",
"from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey\n",
"from graphrag.query.indexer_adapters import (\n",
" read_indexer_covariates,\n",
" read_indexer_entities,\n",
" read_indexer_relationships,\n",
" read_indexer_reports,\n",
" read_indexer_text_units,\n",
")\n",
"from graphrag.query.question_gen.local_gen import LocalQuestionGen\n",
"from graphrag.query.structured_search.local_search.mixed_context import (\n",
" LocalSearchMixedContext,\n",
")\n",
"from graphrag.query.structured_search.local_search.search import LocalSearch\n",
"from graphrag_vectors import IndexSchema, LanceDBVectorStore"
]
"source": "import os\n\nimport pandas as pd\nfrom graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey\nfrom graphrag.query.indexer_adapters import (\n read_indexer_covariates,\n read_indexer_entities,\n read_indexer_relationships,\n read_indexer_reports,\n read_indexer_text_units,\n)\nfrom graphrag.query.question_gen.local_gen import LocalQuestionGen\nfrom graphrag.query.structured_search.local_search.mixed_context import (\n LocalSearchMixedContext,\n)\nfrom graphrag.query.structured_search.local_search.search import LocalSearch\nfrom graphrag_llm.completion import create_completion\nfrom graphrag_llm.config import ModelConfig\nfrom graphrag_llm.embedding import create_embedding\nfrom graphrag_vectors import IndexSchema\nfrom graphrag_vectors.lancedb import LanceDBVectorStore"
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -189,43 +171,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from graphrag.config.enums import ModelType\n",
"from graphrag.config.models.language_model_config import LanguageModelConfig\n",
"from graphrag.language_model.manager import ModelManager\n",
"from graphrag.tokenizer.get_tokenizer import get_tokenizer\n",
"\n",
"api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n",
"\n",
"chat_config = LanguageModelConfig(\n",
" api_key=api_key,\n",
" type=ModelType.Chat,\n",
" model_provider=\"openai\",\n",
" model=\"gpt-4.1\",\n",
" max_retries=20,\n",
")\n",
"chat_model = ModelManager().get_or_create_chat_model(\n",
" name=\"local_search\",\n",
" model_type=ModelType.Chat,\n",
" config=chat_config,\n",
")\n",
"\n",
"embedding_config = LanguageModelConfig(\n",
" api_key=api_key,\n",
" type=ModelType.Embedding,\n",
" model_provider=\"openai\",\n",
" model=\"text-embedding-3-small\",\n",
" max_retries=20,\n",
")\n",
"\n",
"text_embedder = ModelManager().get_or_create_embedding_model(\n",
" name=\"local_search_embedding\",\n",
" model_type=ModelType.Embedding,\n",
" config=embedding_config,\n",
")\n",
"\n",
"tokenizer = get_tokenizer(chat_config)"
]
"source": "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n\nchat_config = ModelConfig(\n api_key=api_key,\n model_provider=\"openai\",\n model=\"gpt-4.1\",\n)\nchat_model = create_completion(chat_config)\ntokenizer = chat_model.tokenizer\n\nembedding_config = ModelConfig(\n api_key=api_key,\n model_provider=\"openai\",\n model=\"text-embedding-3-small\",\n)\ntext_embedder = create_embedding(embedding_config)"
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -469,4 +415,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}