Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Through this LinkedIn MCP server, AI assistants like Claude can connect to your
| `get_company_profile` | Extract company information with explicit section selection (posts, jobs); about-section references may include a `company_urn` entry carrying the numeric id used by LinkedIn's people-search `currentCompany` URL facet | working |
| `get_company_posts` | Get recent posts from a company's LinkedIn feed | working |
| `search_jobs` | Search for jobs with keywords and location filters | working |
| `search_people` | Search for people by keywords and location | working |
| `search_people` | Search for people by keywords, location, connection degree (1st/2nd/3rd), and current company | working |
| `get_job_details` | Get detailed information about a specific job posting | working |
| `close_session` | Close browser session and clean up resources | working |

Expand Down
62 changes: 62 additions & 0 deletions linkedin_mcp_server/scraping/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import asyncio
from dataclasses import dataclass
import json
import logging
import re
from typing import TYPE_CHECKING, Any, Literal
Expand Down Expand Up @@ -88,6 +89,10 @@

_SORT_BY_MAP = {"date": "DD", "relevance": "R"}

# Valid tokens for the people-search ``network`` facet.
# LinkedIn accepts "F" (1st-degree), "S" (2nd-degree), "O" (3rd-degree and beyond).
_NETWORK_TOKENS = ("F", "S", "O")

_DIALOG_SELECTOR = 'dialog[open], [role="dialog"]'
_DIALOG_TEXTAREA_SELECTOR = '[role="dialog"] textarea, dialog textarea'

Expand Down Expand Up @@ -268,6 +273,16 @@ def _normalize_csv(value: str, mapping: dict[str, str]) -> str:
return ",".join(mapping.get(p, p) for p in parts)


def _encode_list_facet(values: list[str]) -> str:
"""Encode a list of string values for a LinkedIn people-search list facet.

LinkedIn's people-search URL uses JSON-list encoded facets of the form
``["A","B"]``. This helper URL-encodes the rendered JSON so the final URL
contains e.g. ``%5B%22F%22%5D`` for ``["F"]``.
"""
return quote_plus(json.dumps(values, separators=(",", ":")))


# Patterns that mark the start of LinkedIn page chrome (sidebar/footer).
# Everything from the earliest match onwards is stripped.
_NOISE_MARKERS: list[re.Pattern[str]] = [
Expand Down Expand Up @@ -304,6 +319,16 @@ class ExtractedSection:
error: dict[str, Any] | None = None


class FilterValidationError(ValueError):
"""Invalid ``search_people`` filter input (network token / URN shape).

Subclassing ``ValueError`` keeps backward-compatible behaviour for
direct extractor callers (``pytest.raises(ValueError)`` matches), while
letting the MCP tool wrapper catch this case precisely and surface the
actionable message past ``mask_error_details``.
"""


def strip_linkedin_noise(text: str) -> str:
"""Remove LinkedIn page chrome (footer, sidebar recommendations) from innerText.

Expand Down Expand Up @@ -2346,15 +2371,52 @@ async def search_people(
self,
keywords: str,
location: str | None = None,
network: list[str] | None = None,
current_company: str | None = None,
) -> dict[str, Any]:
"""Search for people and extract the results page.

Args:
keywords: Free-text query ("software engineer", "recruiter at Google").
location: Optional location filter ("New York", "Remote").
network: Optional connection-degree filter. Each element is one of
``"F"`` (1st-degree), ``"S"`` (2nd-degree), ``"O"`` (3rd-degree
and beyond). Example: ``["F"]`` to only return 1st-degree
connections. Invalid tokens raise ``ValueError``.
current_company: Optional current-employer filter. LinkedIn's
``currentCompany`` facet only filters on the numeric company
URN id (e.g. ``"1115"`` for SAP); plain company names are
accepted by the URL but ignored by LinkedIn and return the
unfiltered result set. Look up a company's URN via
``get_company_profile`` -- it is exposed under
``references["about"]``.

Returns:
{url, sections: {name: text}}
"""
if network is not None:
invalid = [t for t in network if t not in _NETWORK_TOKENS]
if invalid:
raise FilterValidationError(
"Invalid network token(s) "
f"{invalid!r}; expected any of {list(_NETWORK_TOKENS)!r}"
)

if current_company and not re.fullmatch(r"[0-9]+", current_company):
raise FilterValidationError(
f"current_company must be a numeric LinkedIn company URN id "
f"(e.g. '1115' for SAP); got {current_company!r}. Plain-text "
f"company names are silently ignored by LinkedIn. Look up the "
f'URN via get_company_profile -> references["about"].'
)

params = f"keywords={quote_plus(keywords)}"
if location:
params += f"&location={quote_plus(location)}"
if network:
params += f"&network={_encode_list_facet(network)}"
if current_company:
params += f"&currentCompany={_encode_list_facet([current_company])}"

url = f"https://www.linkedin.com/search/results/people/?{params}"
extracted = await self.extract_page(url, section_name="search_results")
Expand Down
34 changes: 32 additions & 2 deletions linkedin_mcp_server/tools/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import Annotated, Any

from fastmcp import Context, FastMCP
from fastmcp.exceptions import ToolError
from pydantic import Field

from linkedin_mcp_server.callbacks import MCPContextProgressCallback
Expand All @@ -17,6 +18,7 @@
from linkedin_mcp_server.dependencies import get_ready_extractor, handle_auth_error
from linkedin_mcp_server.error_handler import raise_tool_error
from linkedin_mcp_server.scraping import parse_person_sections
from linkedin_mcp_server.scraping.extractor import FilterValidationError

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -110,6 +112,8 @@ async def search_people(
keywords: str,
ctx: Context,
location: str | None = None,
network: list[str] | None = None,
current_company: str | None = None,
extractor: Any | None = None,
) -> dict[str, Any]:
"""
Expand All @@ -119,6 +123,15 @@ async def search_people(
keywords: Search keywords (e.g., "software engineer", "recruiter at Google")
ctx: FastMCP context for progress reporting
location: Optional location filter (e.g., "New York", "Remote")
network: Optional connection-degree filter. Each element is one of
"F" (1st-degree), "S" (2nd-degree), "O" (3rd-degree and beyond).
Example: ["F"] to only return 1st-degree connections.
current_company: Optional current-employer filter. LinkedIn's
currentCompany facet only filters on the numeric company URN id
(e.g. "1115" for SAP); plain company names are accepted by the
URL but ignored by LinkedIn and return the unfiltered result
set. Look up a company's URN via get_company_profile -- it is
exposed under references["about"].

Returns:
Dict with url, sections (name -> raw text), and optional references.
Expand All @@ -129,21 +142,38 @@ async def search_people(
ctx, tool_name="search_people"
)
logger.info(
"Searching people: keywords='%s', location='%s'",
"Searching people: keywords='%s', location='%s', network=%s, current_company='%s'",
keywords,
location,
network,
current_company,
)

await ctx.report_progress(
progress=0, total=100, message="Starting people search"
)

result = await extractor.search_people(keywords, location)
try:
result = await extractor.search_people(
keywords,
location,
network=network,
current_company=current_company,
)
except FilterValidationError as e:
# Validation messages carry actionable detail; surface
# them as ToolError so mask_error_details doesn't reduce
# them to "Error calling tool 'search_people'".
raise ToolError(str(e)) from e

await ctx.report_progress(progress=100, total=100, message="Complete")

return result

except ToolError:
# Already a properly formatted client-facing error; do not
# log it as "Unexpected error" via raise_tool_error.
raise
except AuthenticationError as e:
try:
await handle_auth_error(e, ctx)
Expand Down
2 changes: 1 addition & 1 deletion manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
},
{
"name": "search_people",
"description": "Search for people on LinkedIn by keywords and location"
"description": "Search for people on LinkedIn by keywords, location, connection degree (1st/2nd/3rd), and current company"
},
{
"name": "get_inbox",
Expand Down
85 changes: 85 additions & 0 deletions tests/test_scraping.py
Original file line number Diff line number Diff line change
Expand Up @@ -2227,6 +2227,91 @@ async def test_search_people_omits_orphaned_references(self, mock_page):
assert result["sections"] == {}
assert "references" not in result

async def test_search_people_network_filter_first_degree(self, mock_page):
extractor = LinkedInExtractor(mock_page)
with patch.object(
extractor,
"extract_page",
new_callable=AsyncMock,
return_value=extracted("Jane Doe"),
):
result = await extractor.search_people("engineer", network=["F"])

assert "network=%5B%22F%22%5D" in result["url"]

async def test_search_people_network_filter_multi_degree(self, mock_page):
extractor = LinkedInExtractor(mock_page)
with patch.object(
extractor,
"extract_page",
new_callable=AsyncMock,
return_value=extracted("Jane Doe"),
):
result = await extractor.search_people("engineer", network=["F", "S"])

assert "network=%5B%22F%22%2C%22S%22%5D" in result["url"]

async def test_search_people_current_company_filter(self, mock_page):
extractor = LinkedInExtractor(mock_page)
with patch.object(
extractor,
"extract_page",
new_callable=AsyncMock,
return_value=extracted("Jane Doe"),
):
result = await extractor.search_people("engineer", current_company="1115")

assert "currentCompany=%5B%221115%22%5D" in result["url"]

async def test_search_people_invalid_network_token_raises(self, mock_page):
extractor = LinkedInExtractor(mock_page)
with pytest.raises(ValueError, match="Invalid network token"):
await extractor.search_people("engineer", network=["X"])

async def test_search_people_rejects_plain_company_name(self, mock_page):
extractor = LinkedInExtractor(mock_page)
with pytest.raises(ValueError, match="must be a numeric"):
await extractor.search_people("engineer", current_company="SAP")

async def test_search_people_rejects_unicode_digit_company(self, mock_page):
"""LinkedIn URN ids are ASCII decimal; reject Unicode digits even
though ``str.isdigit()`` would accept them."""
extractor = LinkedInExtractor(mock_page)
with pytest.raises(ValueError, match="must be a numeric"):
await extractor.search_people("engineer", current_company="١١١٥")

async def test_search_people_empty_current_company_is_noop(self, mock_page):
extractor = LinkedInExtractor(mock_page)
with patch.object(
extractor,
"extract_page",
new_callable=AsyncMock,
return_value=extracted("Jane Doe"),
):
result = await extractor.search_people("engineer", current_company="")

assert "currentCompany" not in result["url"]

async def test_search_people_combines_all_filters(self, mock_page):
extractor = LinkedInExtractor(mock_page)
with patch.object(
extractor,
"extract_page",
new_callable=AsyncMock,
return_value=extracted("Jane Doe"),
):
result = await extractor.search_people(
"engineer",
location="Seattle",
network=["F"],
current_company="1115",
)

assert "keywords=engineer" in result["url"]
assert "location=Seattle" in result["url"]
assert "network=%5B%22F%22%5D" in result["url"]
assert "currentCompany=%5B%221115%22%5D" in result["url"]


class TestStripLinkedInNoise:
def test_strips_footer(self):
Expand Down
69 changes: 68 additions & 1 deletion tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,74 @@ async def test_search_people(self, mock_context):
)
assert "search_results" in result["sections"]
assert "pages_visited" not in result
mock_extractor.search_people.assert_awaited_once_with("AI engineer", "New York")
mock_extractor.search_people.assert_awaited_once_with(
"AI engineer",
"New York",
network=None,
current_company=None,
)

async def test_search_people_with_network_and_company_filters(self, mock_context):
expected = {
"url": (
"https://www.linkedin.com/search/results/people/"
"?keywords=engineer&network=%5B%22F%22%5D"
"&currentCompany=%5B%221115%22%5D"
),
"sections": {
"search_results": "Jennifer Bonuso\nPresident Americas at SAP"
},
}
mock_extractor = _make_mock_extractor(expected)

from linkedin_mcp_server.tools.person import register_person_tools

mcp = FastMCP("test")
register_person_tools(mcp)

tool_fn = await get_tool_fn(mcp, "search_people")
result = await tool_fn(
"engineer",
mock_context,
network=["F"],
current_company="1115",
extractor=mock_extractor,
)
assert "search_results" in result["sections"]
mock_extractor.search_people.assert_awaited_once_with(
"engineer",
None,
network=["F"],
current_company="1115",
)

async def test_search_people_validation_error_surfaced_as_tool_error(
self, mock_context
):
"""A FilterValidationError raised by the extractor should surface to
the MCP client as a ToolError carrying the same message, rather than
being collapsed to the generic "Error calling tool" mask."""
from fastmcp.exceptions import ToolError

from linkedin_mcp_server.scraping.extractor import FilterValidationError
from linkedin_mcp_server.tools.person import register_person_tools

mock_extractor = MagicMock()
mock_extractor.search_people = AsyncMock(
side_effect=FilterValidationError("must be a numeric URN")
)

mcp = FastMCP("test")
register_person_tools(mcp)
tool_fn = await get_tool_fn(mcp, "search_people")

with pytest.raises(ToolError, match="must be a numeric URN"):
await tool_fn(
"engineer",
mock_context,
current_company="SAP",
extractor=mock_extractor,
)

async def test_connect_with_person(self, mock_context):
expected = {
Expand Down
Loading