Skip to content

Commit 361ace8

Browse files
committed
More robust parsing of PHPDoc types
1 parent f3c416c commit 361ace8

4 files changed

Lines changed: 848 additions & 2 deletions

File tree

example.php

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1507,3 +1507,95 @@ function isRegularUser(mixed $value): bool
15071507
{
15081508
return !$value instanceof AdminUser;
15091509
}
1510+
1511+
// ─── Multi-line @return & Broken Docblock Recovery ──────────────────────────
1512+
//
1513+
// PHPantomLSP handles @return types that span multiple docblock lines
1514+
// (common in Laravel's Collection, Eloquent Builder, etc.). When a
1515+
// multi-line @return cannot be fully parsed, the base type is recovered
1516+
// (e.g. `static<…broken` → `static`) so resolution still works.
1517+
1518+
/**
1519+
* @template TKey of array-key
1520+
* @template TValue
1521+
*/
1522+
class FluentCollection
1523+
{
1524+
/**
1525+
* Multi-line @return with conditionals inside generics.
1526+
* PHPantomLSP joins the lines and parses the full type.
1527+
*
1528+
* @template TGroupKey of array-key
1529+
*
1530+
* @param (callable(TValue, TKey): TGroupKey)|array|string $groupBy
1531+
* @param bool $preserveKeys
1532+
* @return static<
1533+
* ($groupBy is (array|string)
1534+
* ? array-key
1535+
* : TGroupKey),
1536+
* static<($preserveKeys is true ? TKey : int), TValue>
1537+
* >
1538+
*/
1539+
public function groupBy($groupBy, $preserveKeys = false)
1540+
{
1541+
}
1542+
1543+
/**
1544+
* Single-line @return — works as before.
1545+
*
1546+
* @template TMapValue
1547+
*
1548+
* @param callable(TValue, TKey): TMapValue $callback
1549+
* @return static<TKey, TMapValue>
1550+
*/
1551+
public function map(callable $callback)
1552+
{
1553+
}
1554+
1555+
/**
1556+
* Multi-line @return with nested generics spanning lines.
1557+
*
1558+
* @return array<
1559+
* string,
1560+
* FluentCollection<int, TValue>
1561+
* >
1562+
*/
1563+
public function toGroupedArray()
1564+
{
1565+
}
1566+
1567+
/**
1568+
* @return static<TKey, TValue>
1569+
*/
1570+
public function values()
1571+
{
1572+
}
1573+
}
1574+
1575+
/** @return FluentCollection */
1576+
function collect(mixed $value = []): FluentCollection
1577+
{
1578+
return new FluentCollection();
1579+
}
1580+
1581+
// Try: collect([])-> ← shows map, groupBy, values, toGroupedArray
1582+
// Try: collect([])->map( ← map() resolves correctly despite groupBy's complex @return
1583+
1584+
class BrokenDocRecovery
1585+
{
1586+
/**
1587+
* Broken multi-line @return — base `static` is recovered.
1588+
* @return static<
1589+
*/
1590+
public function broken(): static
1591+
{
1592+
return $this;
1593+
}
1594+
1595+
public function working(): string
1596+
{
1597+
return 'hello';
1598+
}
1599+
}
1600+
1601+
// Try: (new BrokenDocRecovery())->broken()-> ← recovers `static`, shows broken() and working()

src/docblock/tags.rs

Lines changed: 166 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,7 +1130,25 @@ pub fn resolve_effective_type(
11301130
native_type: Option<&str>,
11311131
docblock_type: Option<&str>,
11321132
) -> Option<String> {
1133-
match (native_type, docblock_type) {
1133+
// When the docblock type has unclosed brackets (e.g. a multi-line
1134+
// `@return` that couldn't be fully joined), treat it as broken and
1135+
// attempt partial recovery. If recovery yields nothing useful, fall
1136+
// back to the native type so that resolution is never blocked by a
1137+
// malformed PHPDoc annotation.
1138+
let sanitised_doc = docblock_type.and_then(|doc| {
1139+
if has_unclosed_brackets(doc) {
1140+
let base = recover_base_type(doc);
1141+
if base.is_empty() {
1142+
None
1143+
} else {
1144+
Some(base.to_string())
1145+
}
1146+
} else {
1147+
Some(doc.to_string())
1148+
}
1149+
});
1150+
1151+
match (native_type, sanitised_doc.as_deref()) {
11341152
// Docblock provided, no native hint → use docblock.
11351153
(None, Some(doc)) => Some(doc.to_string()),
11361154
// Both present → override only if compatible.
@@ -1165,7 +1183,10 @@ fn extract_tag_type(docblock: &str, tag: &str) -> Option<String> {
11651183
.strip_suffix("*/")
11661184
.unwrap_or(docblock);
11671185

1168-
for line in inner.lines() {
1186+
let lines: Vec<&str> = inner.lines().collect();
1187+
let mut i = 0;
1188+
while i < lines.len() {
1189+
let line = lines[i];
11691190
// Strip leading whitespace and the `*` gutter common in docblocks.
11701191
let trimmed = line.trim().trim_start_matches('*').trim();
11711192

@@ -1174,6 +1195,7 @@ fn extract_tag_type(docblock: &str, tag: &str) -> Option<String> {
11741195
// at end-of-line, which is invalid and we skip).
11751196
let rest = rest.trim_start();
11761197
if rest.is_empty() {
1198+
i += 1;
11771199
continue;
11781200
}
11791201

@@ -1185,14 +1207,156 @@ fn extract_tag_type(docblock: &str, tag: &str) -> Option<String> {
11851207

11861208
// Extract the type token, respecting `<…>` nesting so that
11871209
// generics like `Collection<int, User>` are treated as one unit.
1210+
//
1211+
// When the type spans multiple docblock lines (e.g.
1212+
// `@return static<\n * int,\n * string\n * >`), the
1213+
// single-line `split_type_token` will hit end-of-line with
1214+
// unclosed brackets. In that case, collect continuation
1215+
// lines until brackets are balanced, then re-parse.
11881216
let (type_str, _remainder) = split_type_token(rest);
1217+
let needs_continuation = has_unclosed_brackets(type_str);
1218+
1219+
if !needs_continuation {
1220+
return Some(clean_type(type_str));
1221+
}
1222+
1223+
// ── Multi-line type: join continuation lines ────────
1224+
let mut joined = rest.to_string();
1225+
let mut j = i + 1;
1226+
while j < lines.len() {
1227+
let cont = lines[j].trim().trim_start_matches('*').trim();
1228+
// Stop if we hit another tag or an empty line.
1229+
if cont.starts_with('@') {
1230+
break;
1231+
}
1232+
joined.push(' ');
1233+
joined.push_str(cont);
1234+
// Check whether brackets are now balanced.
1235+
if !has_unclosed_brackets(&joined) {
1236+
break;
1237+
}
1238+
j += 1;
1239+
}
1240+
1241+
let joined = normalize_bracket_whitespace(&joined);
1242+
let (type_str, _) = split_type_token(&joined);
1243+
let type_str = if has_unclosed_brackets(type_str) {
1244+
// Brackets still unclosed — partially recover by
1245+
// stripping the unclosed generic/brace suffix to get
1246+
// the base type (e.g. `static<…broken` → `static`).
1247+
recover_base_type(type_str)
1248+
} else {
1249+
type_str
1250+
};
11891251

1252+
if type_str.is_empty() {
1253+
return None;
1254+
}
11901255
return Some(clean_type(type_str));
11911256
}
1257+
i += 1;
11921258
}
11931259
None
11941260
}
11951261

1262+
/// Collapse whitespace immediately after `<` or `{` and immediately
1263+
/// before `>` or `}` so that multi-line joined types like
1264+
/// `array< string, int >` become `array<string, int>`.
1265+
fn normalize_bracket_whitespace(s: &str) -> String {
1266+
let mut out = String::with_capacity(s.len());
1267+
let chars: Vec<char> = s.chars().collect();
1268+
let len = chars.len();
1269+
let mut i = 0;
1270+
while i < len {
1271+
let c = chars[i];
1272+
out.push(c);
1273+
// After `<` or `{`, skip whitespace.
1274+
if (c == '<' || c == '{') && i + 1 < len {
1275+
let mut j = i + 1;
1276+
while j < len && chars[j].is_whitespace() {
1277+
j += 1;
1278+
}
1279+
i = j;
1280+
continue;
1281+
}
1282+
// Before `>` or `}`, trim trailing whitespace already in `out`.
1283+
if (c == '>' || c == '}') && !out.is_empty() {
1284+
// We already pushed c — remove it, trim trailing ws, re-push.
1285+
out.pop();
1286+
let trimmed_len = out.trim_end().len();
1287+
out.truncate(trimmed_len);
1288+
out.push(c);
1289+
}
1290+
i += 1;
1291+
}
1292+
out
1293+
}
1294+
1295+
/// Check whether a type string has unclosed `<…>` or `{…}` brackets.
1296+
fn has_unclosed_brackets(s: &str) -> bool {
1297+
let mut angle: i32 = 0;
1298+
let mut brace: i32 = 0;
1299+
for c in s.chars() {
1300+
match c {
1301+
'<' => angle += 1,
1302+
'>' if angle > 0 => angle -= 1,
1303+
'{' => brace += 1,
1304+
'}' if brace > 0 => brace -= 1,
1305+
_ => {}
1306+
}
1307+
}
1308+
angle != 0 || brace != 0
1309+
}
1310+
1311+
/// Attempt to recover a usable base type from a type string with unclosed
1312+
/// brackets. Truncates at the first unclosed `<` or `{` and returns the
1313+
/// base portion (e.g. `static<…broken` → `static`,
1314+
/// `Collection<int, User` → `Collection`). Returns an empty string if
1315+
/// nothing useful can be recovered.
1316+
fn recover_base_type(s: &str) -> &str {
1317+
// Walk forward and find the position where the first `<` or `{`
1318+
// opens without a corresponding close.
1319+
let mut angle: i32 = 0;
1320+
let mut brace: i32 = 0;
1321+
let mut first_unclosed = None;
1322+
for (i, c) in s.char_indices() {
1323+
match c {
1324+
'<' => {
1325+
if angle == 0 && brace == 0 && first_unclosed.is_none() {
1326+
first_unclosed = Some(i);
1327+
}
1328+
angle += 1;
1329+
}
1330+
'>' if angle > 0 => {
1331+
angle -= 1;
1332+
if angle == 0 && brace == 0 {
1333+
first_unclosed = None;
1334+
}
1335+
}
1336+
'{' => {
1337+
if brace == 0 && angle == 0 && first_unclosed.is_none() {
1338+
first_unclosed = Some(i);
1339+
}
1340+
brace += 1;
1341+
}
1342+
'}' if brace > 0 => {
1343+
brace -= 1;
1344+
if brace == 0 && angle == 0 {
1345+
first_unclosed = None;
1346+
}
1347+
}
1348+
_ => {}
1349+
}
1350+
}
1351+
match first_unclosed {
1352+
Some(pos) => {
1353+
let base = s[..pos].trim();
1354+
if base.is_empty() { "" } else { base }
1355+
}
1356+
None => s,
1357+
}
1358+
}
1359+
11961360
/// Parse the parameter list from a `@method` tag.
11971361
///
11981362
/// Handles formats like:

0 commit comments

Comments
 (0)