diff --git a/README.md b/README.md index d6d2836b..daa91df5 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Through this LinkedIn MCP server, AI assistants like Claude can connect to your | `get_company_profile` | Extract company information with explicit section selection (posts, jobs); about-section references may include a `company_urn` entry carrying the numeric id used by LinkedIn's people-search `currentCompany` URL facet | working | | `get_company_posts` | Get recent posts from a company's LinkedIn feed | working | | `search_jobs` | Search for jobs with keywords and location filters | working | -| `search_people` | Search for people by keywords and location | working | +| `search_people` | Search for people by keywords, location, connection degree (1st/2nd/3rd), and current company | working | | `get_job_details` | Get detailed information about a specific job posting | working | | `close_session` | Close browser session and clean up resources | working | diff --git a/linkedin_mcp_server/scraping/extractor.py b/linkedin_mcp_server/scraping/extractor.py index 0e35e5be..15bb0244 100644 --- a/linkedin_mcp_server/scraping/extractor.py +++ b/linkedin_mcp_server/scraping/extractor.py @@ -4,6 +4,7 @@ import asyncio from dataclasses import dataclass +import json import logging import re from typing import TYPE_CHECKING, Any, Literal @@ -88,6 +89,10 @@ _SORT_BY_MAP = {"date": "DD", "relevance": "R"} +# Valid tokens for the people-search ``network`` facet. +# LinkedIn accepts "F" (1st-degree), "S" (2nd-degree), "O" (3rd-degree and beyond). +_NETWORK_TOKENS = ("F", "S", "O") + _DIALOG_SELECTOR = 'dialog[open], [role="dialog"]' _DIALOG_TEXTAREA_SELECTOR = '[role="dialog"] textarea, dialog textarea' @@ -268,6 +273,16 @@ def _normalize_csv(value: str, mapping: dict[str, str]) -> str: return ",".join(mapping.get(p, p) for p in parts) +def _encode_list_facet(values: list[str]) -> str: + """Encode a list of string values for a LinkedIn people-search list facet. + + LinkedIn's people-search URL uses JSON-list encoded facets of the form + ``["A","B"]``. This helper URL-encodes the rendered JSON so the final URL + contains e.g. ``%5B%22F%22%5D`` for ``["F"]``. + """ + return quote_plus(json.dumps(values, separators=(",", ":"))) + + # Patterns that mark the start of LinkedIn page chrome (sidebar/footer). # Everything from the earliest match onwards is stripped. _NOISE_MARKERS: list[re.Pattern[str]] = [ @@ -304,6 +319,16 @@ class ExtractedSection: error: dict[str, Any] | None = None +class FilterValidationError(ValueError): + """Invalid ``search_people`` filter input (network token / URN shape). + + Subclassing ``ValueError`` keeps backward-compatible behaviour for + direct extractor callers (``pytest.raises(ValueError)`` matches), while + letting the MCP tool wrapper catch this case precisely and surface the + actionable message past ``mask_error_details``. + """ + + def strip_linkedin_noise(text: str) -> str: """Remove LinkedIn page chrome (footer, sidebar recommendations) from innerText. @@ -2346,15 +2371,52 @@ async def search_people( self, keywords: str, location: str | None = None, + network: list[str] | None = None, + current_company: str | None = None, ) -> dict[str, Any]: """Search for people and extract the results page. + Args: + keywords: Free-text query ("software engineer", "recruiter at Google"). + location: Optional location filter ("New York", "Remote"). + network: Optional connection-degree filter. Each element is one of + ``"F"`` (1st-degree), ``"S"`` (2nd-degree), ``"O"`` (3rd-degree + and beyond). Example: ``["F"]`` to only return 1st-degree + connections. Invalid tokens raise ``ValueError``. + current_company: Optional current-employer filter. LinkedIn's + ``currentCompany`` facet only filters on the numeric company + URN id (e.g. ``"1115"`` for SAP); plain company names are + accepted by the URL but ignored by LinkedIn and return the + unfiltered result set. Look up a company's URN via + ``get_company_profile`` -- it is exposed under + ``references["about"]``. + Returns: {url, sections: {name: text}} """ + if network is not None: + invalid = [t for t in network if t not in _NETWORK_TOKENS] + if invalid: + raise FilterValidationError( + "Invalid network token(s) " + f"{invalid!r}; expected any of {list(_NETWORK_TOKENS)!r}" + ) + + if current_company and not re.fullmatch(r"[0-9]+", current_company): + raise FilterValidationError( + f"current_company must be a numeric LinkedIn company URN id " + f"(e.g. '1115' for SAP); got {current_company!r}. Plain-text " + f"company names are silently ignored by LinkedIn. Look up the " + f'URN via get_company_profile -> references["about"].' + ) + params = f"keywords={quote_plus(keywords)}" if location: params += f"&location={quote_plus(location)}" + if network: + params += f"&network={_encode_list_facet(network)}" + if current_company: + params += f"¤tCompany={_encode_list_facet([current_company])}" url = f"https://www.linkedin.com/search/results/people/?{params}" extracted = await self.extract_page(url, section_name="search_results") diff --git a/linkedin_mcp_server/tools/person.py b/linkedin_mcp_server/tools/person.py index 66160ddd..6e1737d3 100644 --- a/linkedin_mcp_server/tools/person.py +++ b/linkedin_mcp_server/tools/person.py @@ -9,6 +9,7 @@ from typing import Annotated, Any from fastmcp import Context, FastMCP +from fastmcp.exceptions import ToolError from pydantic import Field from linkedin_mcp_server.callbacks import MCPContextProgressCallback @@ -17,6 +18,7 @@ from linkedin_mcp_server.dependencies import get_ready_extractor, handle_auth_error from linkedin_mcp_server.error_handler import raise_tool_error from linkedin_mcp_server.scraping import parse_person_sections +from linkedin_mcp_server.scraping.extractor import FilterValidationError logger = logging.getLogger(__name__) @@ -110,6 +112,8 @@ async def search_people( keywords: str, ctx: Context, location: str | None = None, + network: list[str] | None = None, + current_company: str | None = None, extractor: Any | None = None, ) -> dict[str, Any]: """ @@ -119,6 +123,15 @@ async def search_people( keywords: Search keywords (e.g., "software engineer", "recruiter at Google") ctx: FastMCP context for progress reporting location: Optional location filter (e.g., "New York", "Remote") + network: Optional connection-degree filter. Each element is one of + "F" (1st-degree), "S" (2nd-degree), "O" (3rd-degree and beyond). + Example: ["F"] to only return 1st-degree connections. + current_company: Optional current-employer filter. LinkedIn's + currentCompany facet only filters on the numeric company URN id + (e.g. "1115" for SAP); plain company names are accepted by the + URL but ignored by LinkedIn and return the unfiltered result + set. Look up a company's URN via get_company_profile -- it is + exposed under references["about"]. Returns: Dict with url, sections (name -> raw text), and optional references. @@ -129,21 +142,38 @@ async def search_people( ctx, tool_name="search_people" ) logger.info( - "Searching people: keywords='%s', location='%s'", + "Searching people: keywords='%s', location='%s', network=%s, current_company='%s'", keywords, location, + network, + current_company, ) await ctx.report_progress( progress=0, total=100, message="Starting people search" ) - result = await extractor.search_people(keywords, location) + try: + result = await extractor.search_people( + keywords, + location, + network=network, + current_company=current_company, + ) + except FilterValidationError as e: + # Validation messages carry actionable detail; surface + # them as ToolError so mask_error_details doesn't reduce + # them to "Error calling tool 'search_people'". + raise ToolError(str(e)) from e await ctx.report_progress(progress=100, total=100, message="Complete") return result + except ToolError: + # Already a properly formatted client-facing error; do not + # log it as "Unexpected error" via raise_tool_error. + raise except AuthenticationError as e: try: await handle_auth_error(e, ctx) diff --git a/manifest.json b/manifest.json index 3d3513a0..1b87a5cd 100644 --- a/manifest.json +++ b/manifest.json @@ -75,7 +75,7 @@ }, { "name": "search_people", - "description": "Search for people on LinkedIn by keywords and location" + "description": "Search for people on LinkedIn by keywords, location, connection degree (1st/2nd/3rd), and current company" }, { "name": "get_inbox", diff --git a/tests/test_scraping.py b/tests/test_scraping.py index e3f19258..ba71574b 100644 --- a/tests/test_scraping.py +++ b/tests/test_scraping.py @@ -2227,6 +2227,91 @@ async def test_search_people_omits_orphaned_references(self, mock_page): assert result["sections"] == {} assert "references" not in result + async def test_search_people_network_filter_first_degree(self, mock_page): + extractor = LinkedInExtractor(mock_page) + with patch.object( + extractor, + "extract_page", + new_callable=AsyncMock, + return_value=extracted("Jane Doe"), + ): + result = await extractor.search_people("engineer", network=["F"]) + + assert "network=%5B%22F%22%5D" in result["url"] + + async def test_search_people_network_filter_multi_degree(self, mock_page): + extractor = LinkedInExtractor(mock_page) + with patch.object( + extractor, + "extract_page", + new_callable=AsyncMock, + return_value=extracted("Jane Doe"), + ): + result = await extractor.search_people("engineer", network=["F", "S"]) + + assert "network=%5B%22F%22%2C%22S%22%5D" in result["url"] + + async def test_search_people_current_company_filter(self, mock_page): + extractor = LinkedInExtractor(mock_page) + with patch.object( + extractor, + "extract_page", + new_callable=AsyncMock, + return_value=extracted("Jane Doe"), + ): + result = await extractor.search_people("engineer", current_company="1115") + + assert "currentCompany=%5B%221115%22%5D" in result["url"] + + async def test_search_people_invalid_network_token_raises(self, mock_page): + extractor = LinkedInExtractor(mock_page) + with pytest.raises(ValueError, match="Invalid network token"): + await extractor.search_people("engineer", network=["X"]) + + async def test_search_people_rejects_plain_company_name(self, mock_page): + extractor = LinkedInExtractor(mock_page) + with pytest.raises(ValueError, match="must be a numeric"): + await extractor.search_people("engineer", current_company="SAP") + + async def test_search_people_rejects_unicode_digit_company(self, mock_page): + """LinkedIn URN ids are ASCII decimal; reject Unicode digits even + though ``str.isdigit()`` would accept them.""" + extractor = LinkedInExtractor(mock_page) + with pytest.raises(ValueError, match="must be a numeric"): + await extractor.search_people("engineer", current_company="١١١٥") + + async def test_search_people_empty_current_company_is_noop(self, mock_page): + extractor = LinkedInExtractor(mock_page) + with patch.object( + extractor, + "extract_page", + new_callable=AsyncMock, + return_value=extracted("Jane Doe"), + ): + result = await extractor.search_people("engineer", current_company="") + + assert "currentCompany" not in result["url"] + + async def test_search_people_combines_all_filters(self, mock_page): + extractor = LinkedInExtractor(mock_page) + with patch.object( + extractor, + "extract_page", + new_callable=AsyncMock, + return_value=extracted("Jane Doe"), + ): + result = await extractor.search_people( + "engineer", + location="Seattle", + network=["F"], + current_company="1115", + ) + + assert "keywords=engineer" in result["url"] + assert "location=Seattle" in result["url"] + assert "network=%5B%22F%22%5D" in result["url"] + assert "currentCompany=%5B%221115%22%5D" in result["url"] + class TestStripLinkedInNoise: def test_strips_footer(self): diff --git a/tests/test_tools.py b/tests/test_tools.py index 75ac2576..352efe1f 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -254,7 +254,74 @@ async def test_search_people(self, mock_context): ) assert "search_results" in result["sections"] assert "pages_visited" not in result - mock_extractor.search_people.assert_awaited_once_with("AI engineer", "New York") + mock_extractor.search_people.assert_awaited_once_with( + "AI engineer", + "New York", + network=None, + current_company=None, + ) + + async def test_search_people_with_network_and_company_filters(self, mock_context): + expected = { + "url": ( + "https://www.linkedin.com/search/results/people/" + "?keywords=engineer&network=%5B%22F%22%5D" + "¤tCompany=%5B%221115%22%5D" + ), + "sections": { + "search_results": "Jennifer Bonuso\nPresident Americas at SAP" + }, + } + mock_extractor = _make_mock_extractor(expected) + + from linkedin_mcp_server.tools.person import register_person_tools + + mcp = FastMCP("test") + register_person_tools(mcp) + + tool_fn = await get_tool_fn(mcp, "search_people") + result = await tool_fn( + "engineer", + mock_context, + network=["F"], + current_company="1115", + extractor=mock_extractor, + ) + assert "search_results" in result["sections"] + mock_extractor.search_people.assert_awaited_once_with( + "engineer", + None, + network=["F"], + current_company="1115", + ) + + async def test_search_people_validation_error_surfaced_as_tool_error( + self, mock_context + ): + """A FilterValidationError raised by the extractor should surface to + the MCP client as a ToolError carrying the same message, rather than + being collapsed to the generic "Error calling tool" mask.""" + from fastmcp.exceptions import ToolError + + from linkedin_mcp_server.scraping.extractor import FilterValidationError + from linkedin_mcp_server.tools.person import register_person_tools + + mock_extractor = MagicMock() + mock_extractor.search_people = AsyncMock( + side_effect=FilterValidationError("must be a numeric URN") + ) + + mcp = FastMCP("test") + register_person_tools(mcp) + tool_fn = await get_tool_fn(mcp, "search_people") + + with pytest.raises(ToolError, match="must be a numeric URN"): + await tool_fn( + "engineer", + mock_context, + current_company="SAP", + extractor=mock_extractor, + ) async def test_connect_with_person(self, mock_context): expected = {