diff --git a/.semversioner/next-release/6788f527-7aea-4338-9a55-dd2fb0b31851.json b/.semversioner/next-release/6788f527-7aea-4338-9a55-dd2fb0b31851.json new file mode 100644 index 0000000000..c752b65e71 --- /dev/null +++ b/.semversioner/next-release/6788f527-7aea-4338-9a55-dd2fb0b31851.json @@ -0,0 +1,4 @@ +{ + "description": "Fix deprecated imports in example notebooks (ModelType, LanguageModelConfig, ModelManager).", + "type": "patch" +} diff --git a/docs/examples_notebooks/drift_search.ipynb b/docs/examples_notebooks/drift_search.ipynb index 8d53c7d9cc..b47aa20d2e 100644 --- a/docs/examples_notebooks/drift_search.ipynb +++ b/docs/examples_notebooks/drift_search.ipynb @@ -15,119 +15,14 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "import os\n", - "\n", - "import pandas as pd\n", - "from graphrag.config.enums import ModelType\n", - "from graphrag.config.models.drift_search_config import DRIFTSearchConfig\n", - "from graphrag.config.models.language_model_config import LanguageModelConfig\n", - "from graphrag.language_model.manager import ModelManager\n", - "from graphrag.query.indexer_adapters import (\n", - " read_indexer_entities,\n", - " read_indexer_relationships,\n", - " read_indexer_report_embeddings,\n", - " read_indexer_reports,\n", - " read_indexer_text_units,\n", - ")\n", - "from graphrag.query.structured_search.drift_search.drift_context import (\n", - " DRIFTSearchContextBuilder,\n", - ")\n", - "from graphrag.query.structured_search.drift_search.search import DRIFTSearch\n", - "from graphrag.tokenizer.get_tokenizer import get_tokenizer\n", - "from graphrag_vectors.lancedb import LanceDBVectorStore\n", - "\n", - "INPUT_DIR = \"./inputs/operation dulce\"\n", - "LANCEDB_URI = f\"{INPUT_DIR}/lancedb\"\n", - "\n", - "COMMUNITY_REPORT_TABLE = \"community_reports\"\n", - "COMMUNITY_TABLE = \"communities\"\n", - "ENTITY_TABLE = \"entities\"\n", - "RELATIONSHIP_TABLE = \"relationships\"\n", - "COVARIATE_TABLE = \"covariates\"\n", - "TEXT_UNIT_TABLE = \"text_units\"\n", - "COMMUNITY_LEVEL = 2\n", - "\n", - "\n", - "# read nodes table to get community and degree data\n", - "entity_df = pd.read_parquet(f\"{INPUT_DIR}/{ENTITY_TABLE}.parquet\")\n", - "community_df = pd.read_parquet(f\"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet\")\n", - "\n", - "print(f\"Entity df columns: {entity_df.columns}\")\n", - "\n", - "entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)\n", - "\n", - "# load description embeddings to an in-memory lancedb vectorstore\n", - "# to connect to a remote db, specify url and port values.\n", - "description_embedding_store = LanceDBVectorStore(\n", - " db_uri=LANCEDB_URI,\n", - " index_name=\"entity_description\",\n", - ")\n", - "description_embedding_store.connect()\n", - "\n", - "full_content_embedding_store = LanceDBVectorStore(\n", - " db_uri=LANCEDB_URI,\n", - " index_name=\"community_full_content\",\n", - ")\n", - "full_content_embedding_store.connect()\n", - "\n", - "print(f\"Entity count: {len(entity_df)}\")\n", - "entity_df.head()\n", - "\n", - "relationship_df = pd.read_parquet(f\"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet\")\n", - "relationships = read_indexer_relationships(relationship_df)\n", - "\n", - "print(f\"Relationship count: {len(relationship_df)}\")\n", - "relationship_df.head()\n", - "\n", - "text_unit_df = pd.read_parquet(f\"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet\")\n", - "text_units = read_indexer_text_units(text_unit_df)\n", - "\n", - "print(f\"Text unit records: {len(text_unit_df)}\")\n", - "text_unit_df.head()\n", - "\n", - "report_df = pd.read_parquet(f\"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet\")\n", - "reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)\n", - "read_indexer_report_embeddings(reports, full_content_embedding_store)" - ] + "source": "import os\n\nimport pandas as pd\nfrom graphrag.config.models.drift_search_config import DRIFTSearchConfig\nfrom graphrag.query.indexer_adapters import (\n read_indexer_entities,\n read_indexer_relationships,\n read_indexer_report_embeddings,\n read_indexer_reports,\n read_indexer_text_units,\n)\nfrom graphrag.query.structured_search.drift_search.drift_context import (\n DRIFTSearchContextBuilder,\n)\nfrom graphrag.query.structured_search.drift_search.search import DRIFTSearch\nfrom graphrag_llm.completion import create_completion\nfrom graphrag_llm.config import ModelConfig\nfrom graphrag_llm.embedding import create_embedding\nfrom graphrag_vectors.lancedb import LanceDBVectorStore\n\nINPUT_DIR = \"./inputs/operation dulce\"\nLANCEDB_URI = f\"{INPUT_DIR}/lancedb\"\n\nCOMMUNITY_REPORT_TABLE = \"community_reports\"\nCOMMUNITY_TABLE = \"communities\"\nENTITY_TABLE = \"entities\"\nRELATIONSHIP_TABLE = \"relationships\"\nCOVARIATE_TABLE = \"covariates\"\nTEXT_UNIT_TABLE = \"text_units\"\nCOMMUNITY_LEVEL = 2\n\n\n# read nodes table to get community and degree data\nentity_df = pd.read_parquet(f\"{INPUT_DIR}/{ENTITY_TABLE}.parquet\")\ncommunity_df = pd.read_parquet(f\"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet\")\n\nprint(f\"Entity df columns: {entity_df.columns}\")\n\nentities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)\n\n# load description embeddings to an in-memory lancedb vectorstore\n# to connect to a remote db, specify url and port values.\ndescription_embedding_store = LanceDBVectorStore(\n db_uri=LANCEDB_URI,\n index_name=\"entity_description\",\n)\ndescription_embedding_store.connect()\n\nfull_content_embedding_store = LanceDBVectorStore(\n db_uri=LANCEDB_URI,\n index_name=\"community_full_content\",\n)\nfull_content_embedding_store.connect()\n\nprint(f\"Entity count: {len(entity_df)}\")\nentity_df.head()\n\nrelationship_df = pd.read_parquet(f\"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet\")\nrelationships = read_indexer_relationships(relationship_df)\n\nprint(f\"Relationship count: {len(relationship_df)}\")\nrelationship_df.head()\n\ntext_unit_df = pd.read_parquet(f\"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet\")\ntext_units = read_indexer_text_units(text_unit_df)\n\nprint(f\"Text unit records: {len(text_unit_df)}\")\ntext_unit_df.head()\n\nreport_df = pd.read_parquet(f\"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet\")\nreports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)\nread_indexer_report_embeddings(reports, full_content_embedding_store)" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n", - "\n", - "chat_config = LanguageModelConfig(\n", - " api_key=api_key,\n", - " type=ModelType.Chat,\n", - " model_provider=\"openai\",\n", - " model=\"gpt-4.1\",\n", - " max_retries=20,\n", - ")\n", - "chat_model = ModelManager().get_or_create_chat_model(\n", - " name=\"local_search\",\n", - " model_type=ModelType.Chat,\n", - " config=chat_config,\n", - ")\n", - "\n", - "tokenizer = get_tokenizer(chat_config)\n", - "\n", - "embedding_config = LanguageModelConfig(\n", - " api_key=api_key,\n", - " type=ModelType.Embedding,\n", - " model_provider=\"openai\",\n", - " model=\"text-embedding-3-large\",\n", - " max_retries=20,\n", - ")\n", - "\n", - "text_embedder = ModelManager().get_or_create_embedding_model(\n", - " name=\"local_search_embedding\",\n", - " model_type=ModelType.Embedding,\n", - " config=embedding_config,\n", - ")" - ] + "source": "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n\nchat_config = ModelConfig(\n api_key=api_key,\n model_provider=\"openai\",\n model=\"gpt-4.1\",\n)\nchat_model = create_completion(chat_config)\ntokenizer = chat_model.tokenizer\n\nembedding_config = ModelConfig(\n api_key=api_key,\n model_provider=\"openai\",\n model=\"text-embedding-3-large\",\n)\ntext_embedder = create_embedding(embedding_config)" }, { "cell_type": "code", @@ -207,4 +102,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/docs/examples_notebooks/global_search.ipynb b/docs/examples_notebooks/global_search.ipynb index 605f704bd2..2ada2c2f23 100644 --- a/docs/examples_notebooks/global_search.ipynb +++ b/docs/examples_notebooks/global_search.ipynb @@ -15,24 +15,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "import os\n", - "\n", - "import pandas as pd\n", - "from graphrag.config.enums import ModelType\n", - "from graphrag.config.models.language_model_config import LanguageModelConfig\n", - "from graphrag.language_model.manager import ModelManager\n", - "from graphrag.query.indexer_adapters import (\n", - " read_indexer_communities,\n", - " read_indexer_entities,\n", - " read_indexer_reports,\n", - ")\n", - "from graphrag.query.structured_search.global_search.community_context import (\n", - " GlobalCommunityContext,\n", - ")\n", - "from graphrag.query.structured_search.global_search.search import GlobalSearch\n", - "from graphrag.tokenizer.get_tokenizer import get_tokenizer" - ] + "source": "import os\n\nimport pandas as pd\nfrom graphrag.query.indexer_adapters import (\n read_indexer_communities,\n read_indexer_entities,\n read_indexer_reports,\n)\nfrom graphrag.query.structured_search.global_search.community_context import (\n GlobalCommunityContext,\n)\nfrom graphrag.query.structured_search.global_search.search import GlobalSearch\nfrom graphrag_llm.completion import create_completion\nfrom graphrag_llm.config import ModelConfig" }, { "cell_type": "markdown", @@ -55,24 +38,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n", - "\n", - "config = LanguageModelConfig(\n", - " api_key=api_key,\n", - " type=ModelType.Chat,\n", - " model_provider=\"openai\",\n", - " model=\"gpt-4.1\",\n", - " max_retries=20,\n", - ")\n", - "model = ModelManager().get_or_create_chat_model(\n", - " name=\"global_search\",\n", - " model_type=ModelType.Chat,\n", - " config=config,\n", - ")\n", - "\n", - "tokenizer = get_tokenizer(config)" - ] + "source": "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n\nconfig = ModelConfig(\n api_key=api_key,\n model_provider=\"openai\",\n model=\"gpt-4.1\",\n)\nmodel = create_completion(config)\ntokenizer = model.tokenizer" }, { "cell_type": "markdown", @@ -259,4 +225,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/docs/examples_notebooks/global_search_with_dynamic_community_selection.ipynb b/docs/examples_notebooks/global_search_with_dynamic_community_selection.ipynb index 6b3763d73b..95616cf3a0 100644 --- a/docs/examples_notebooks/global_search_with_dynamic_community_selection.ipynb +++ b/docs/examples_notebooks/global_search_with_dynamic_community_selection.ipynb @@ -15,23 +15,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "import os\n", - "\n", - "import pandas as pd\n", - "from graphrag.config.enums import ModelType\n", - "from graphrag.config.models.language_model_config import LanguageModelConfig\n", - "from graphrag.language_model.manager import ModelManager\n", - "from graphrag.query.indexer_adapters import (\n", - " read_indexer_communities,\n", - " read_indexer_entities,\n", - " read_indexer_reports,\n", - ")\n", - "from graphrag.query.structured_search.global_search.community_context import (\n", - " GlobalCommunityContext,\n", - ")\n", - "from graphrag.query.structured_search.global_search.search import GlobalSearch" - ] + "source": "import os\n\nimport pandas as pd\nfrom graphrag.query.indexer_adapters import (\n read_indexer_communities,\n read_indexer_entities,\n read_indexer_reports,\n)\nfrom graphrag.query.structured_search.global_search.community_context import (\n GlobalCommunityContext,\n)\nfrom graphrag.query.structured_search.global_search.search import GlobalSearch\nfrom graphrag_llm.completion import create_completion\nfrom graphrag_llm.config import ModelConfig" }, { "cell_type": "markdown", @@ -54,26 +38,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "from graphrag.tokenizer.get_tokenizer import get_tokenizer\n", - "\n", - "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n", - "\n", - "config = LanguageModelConfig(\n", - " api_key=api_key,\n", - " type=ModelType.Chat,\n", - " model_provider=\"openai\",\n", - " model=\"gpt-4.1\",\n", - " max_retries=20,\n", - ")\n", - "model = ModelManager().get_or_create_chat_model(\n", - " name=\"global_search\",\n", - " model_type=ModelType.Chat,\n", - " config=config,\n", - ")\n", - "\n", - "tokenizer = get_tokenizer(config)" - ] + "source": "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n\nconfig = ModelConfig(\n api_key=api_key,\n model_provider=\"openai\",\n model=\"gpt-4.1\",\n)\nmodel = create_completion(config)\ntokenizer = model.tokenizer" }, { "cell_type": "markdown", @@ -292,4 +257,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/docs/examples_notebooks/local_search.ipynb b/docs/examples_notebooks/local_search.ipynb index f7f0c5a54b..339bdcd009 100644 --- a/docs/examples_notebooks/local_search.ipynb +++ b/docs/examples_notebooks/local_search.ipynb @@ -15,25 +15,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "import os\n", - "\n", - "import pandas as pd\n", - "from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey\n", - "from graphrag.query.indexer_adapters import (\n", - " read_indexer_covariates,\n", - " read_indexer_entities,\n", - " read_indexer_relationships,\n", - " read_indexer_reports,\n", - " read_indexer_text_units,\n", - ")\n", - "from graphrag.query.question_gen.local_gen import LocalQuestionGen\n", - "from graphrag.query.structured_search.local_search.mixed_context import (\n", - " LocalSearchMixedContext,\n", - ")\n", - "from graphrag.query.structured_search.local_search.search import LocalSearch\n", - "from graphrag_vectors import IndexSchema, LanceDBVectorStore" - ] + "source": "import os\n\nimport pandas as pd\nfrom graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey\nfrom graphrag.query.indexer_adapters import (\n read_indexer_covariates,\n read_indexer_entities,\n read_indexer_relationships,\n read_indexer_reports,\n read_indexer_text_units,\n)\nfrom graphrag.query.question_gen.local_gen import LocalQuestionGen\nfrom graphrag.query.structured_search.local_search.mixed_context import (\n LocalSearchMixedContext,\n)\nfrom graphrag.query.structured_search.local_search.search import LocalSearch\nfrom graphrag_llm.completion import create_completion\nfrom graphrag_llm.config import ModelConfig\nfrom graphrag_llm.embedding import create_embedding\nfrom graphrag_vectors import IndexSchema\nfrom graphrag_vectors.lancedb import LanceDBVectorStore" }, { "cell_type": "markdown", @@ -189,43 +171,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "from graphrag.config.enums import ModelType\n", - "from graphrag.config.models.language_model_config import LanguageModelConfig\n", - "from graphrag.language_model.manager import ModelManager\n", - "from graphrag.tokenizer.get_tokenizer import get_tokenizer\n", - "\n", - "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n", - "\n", - "chat_config = LanguageModelConfig(\n", - " api_key=api_key,\n", - " type=ModelType.Chat,\n", - " model_provider=\"openai\",\n", - " model=\"gpt-4.1\",\n", - " max_retries=20,\n", - ")\n", - "chat_model = ModelManager().get_or_create_chat_model(\n", - " name=\"local_search\",\n", - " model_type=ModelType.Chat,\n", - " config=chat_config,\n", - ")\n", - "\n", - "embedding_config = LanguageModelConfig(\n", - " api_key=api_key,\n", - " type=ModelType.Embedding,\n", - " model_provider=\"openai\",\n", - " model=\"text-embedding-3-small\",\n", - " max_retries=20,\n", - ")\n", - "\n", - "text_embedder = ModelManager().get_or_create_embedding_model(\n", - " name=\"local_search_embedding\",\n", - " model_type=ModelType.Embedding,\n", - " config=embedding_config,\n", - ")\n", - "\n", - "tokenizer = get_tokenizer(chat_config)" - ] + "source": "api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n\nchat_config = ModelConfig(\n api_key=api_key,\n model_provider=\"openai\",\n model=\"gpt-4.1\",\n)\nchat_model = create_completion(chat_config)\ntokenizer = chat_model.tokenizer\n\nembedding_config = ModelConfig(\n api_key=api_key,\n model_provider=\"openai\",\n model=\"text-embedding-3-small\",\n)\ntext_embedder = create_embedding(embedding_config)" }, { "cell_type": "markdown", @@ -469,4 +415,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file