|
| 1 | +import { CommandExecutionError } from '../../errors.js'; |
| 2 | +import { cli, Strategy } from '../../registry.js'; |
| 3 | +import { |
| 4 | + forceEnglishUrl, |
| 5 | + getCurrentImdbId, |
| 6 | + isChallengePage, |
| 7 | + normalizeImdbId, |
| 8 | + waitForImdbPath, |
| 9 | +} from './utils.js'; |
| 10 | + |
| 11 | +/** |
| 12 | + * Read IMDb person details from public profile pages. |
| 13 | + */ |
| 14 | +cli({ |
| 15 | + site: 'imdb', |
| 16 | + name: 'person', |
| 17 | + description: 'Get actor or director info', |
| 18 | + domain: 'www.imdb.com', |
| 19 | + strategy: Strategy.PUBLIC, |
| 20 | + browser: true, |
| 21 | + args: [ |
| 22 | + { name: 'id', positional: true, required: true, help: 'IMDb person ID (nm0634240) or URL' }, |
| 23 | + { name: 'limit', type: 'int', default: 10, help: 'Max filmography entries' }, |
| 24 | + ], |
| 25 | + columns: ['field', 'value'], |
| 26 | + func: async (page, args) => { |
| 27 | + const id = normalizeImdbId(String(args.id), 'nm'); |
| 28 | + // Clamp to 30 to match the internal evaluate cap |
| 29 | + const limit = Math.max(1, Math.min(Number(args.limit) || 10, 30)); |
| 30 | + const url = forceEnglishUrl(`https://www.imdb.com/name/${id}/`); |
| 31 | + |
| 32 | + await page.goto(url); |
| 33 | + const onPersonPage = await waitForImdbPath(page, `^/name/${id}/`); |
| 34 | + |
| 35 | + if (await isChallengePage(page)) { |
| 36 | + throw new CommandExecutionError( |
| 37 | + 'IMDb blocked this request', |
| 38 | + 'Try again with a normal browser session or extension mode', |
| 39 | + ); |
| 40 | + } |
| 41 | + if (!onPersonPage) { |
| 42 | + throw new CommandExecutionError( |
| 43 | + `Person page did not finish loading: ${id}`, |
| 44 | + 'Retry the command; if it persists, IMDb may have changed their navigation flow', |
| 45 | + ); |
| 46 | + } |
| 47 | + |
| 48 | + const currentId = await getCurrentImdbId(page, 'nm'); |
| 49 | + if (currentId && currentId !== id) { |
| 50 | + throw new CommandExecutionError( |
| 51 | + `IMDb redirected to a different person: ${currentId}`, |
| 52 | + 'Retry the command; if it persists, the person page may have changed', |
| 53 | + ); |
| 54 | + } |
| 55 | + |
| 56 | + const data = await page.evaluate(` |
| 57 | + (function() { |
| 58 | + var result = { |
| 59 | + nameId: '', |
| 60 | + name: '', |
| 61 | + description: '', |
| 62 | + birthDate: '', |
| 63 | + filmography: [] |
| 64 | + }; |
| 65 | +
|
| 66 | + var scripts = document.querySelectorAll('script[type="application/ld+json"]'); |
| 67 | + for (var i = 0; i < scripts.length; i++) { |
| 68 | + try { |
| 69 | + var ld = JSON.parse(scripts[i].textContent || 'null'); |
| 70 | + if (ld && ld['@type'] === 'Person') { |
| 71 | + if (typeof ld.url === 'string') { |
| 72 | + var ldMatch = ld.url.match(/(nm\\d{7,8})/); |
| 73 | + if (ldMatch) { |
| 74 | + result.nameId = ldMatch[1]; |
| 75 | + } |
| 76 | + } |
| 77 | + result.name = result.name || ld.name || ''; |
| 78 | + result.description = result.description || ld.description || ''; |
| 79 | + break; |
| 80 | + } |
| 81 | + } catch (error) { |
| 82 | + void error; |
| 83 | + } |
| 84 | + } |
| 85 | +
|
| 86 | + var nextDataEl = document.getElementById('__NEXT_DATA__'); |
| 87 | + if (!nextDataEl) { |
| 88 | + return result; |
| 89 | + } |
| 90 | +
|
| 91 | + try { |
| 92 | + var nextData = JSON.parse(nextDataEl.textContent || 'null'); |
| 93 | + var pageProps = nextData && nextData.props && nextData.props.pageProps; |
| 94 | + var above = pageProps && (pageProps.aboveTheFold || pageProps.aboveTheFoldData); |
| 95 | + var main = pageProps && (pageProps.mainColumnData || pageProps.belowTheFold); |
| 96 | +
|
| 97 | + if (above) { |
| 98 | + if (!result.nameId && above.id) { |
| 99 | + result.nameId = String(above.id); |
| 100 | + } |
| 101 | + if (!result.name && above.nameText && above.nameText.text) { |
| 102 | + result.name = above.nameText.text; |
| 103 | + } |
| 104 | +
|
| 105 | + if (above.birthDate) { |
| 106 | + if (above.birthDate.displayableProperty && above.birthDate.displayableProperty.value) { |
| 107 | + result.birthDate = above.birthDate.displayableProperty.value.plainText || ''; |
| 108 | + } |
| 109 | + if (!result.birthDate && above.birthDate.dateComponents) { |
| 110 | + var dc = above.birthDate.dateComponents; |
| 111 | + result.birthDate = [dc.year, dc.month, dc.day].filter(Boolean).join('-'); |
| 112 | + } |
| 113 | + } |
| 114 | +
|
| 115 | + if (above.bio && above.bio.text && above.bio.text.plainText) { |
| 116 | + result.description = above.bio.text.plainText.substring(0, 300); |
| 117 | + } |
| 118 | + } |
| 119 | +
|
| 120 | + var pushFilmography = function(title, year, role) { |
| 121 | + if (!title) { |
| 122 | + return; |
| 123 | + } |
| 124 | + result.filmography.push({ |
| 125 | + title: title, |
| 126 | + year: year || '', |
| 127 | + role: role || '' |
| 128 | + }); |
| 129 | + }; |
| 130 | +
|
| 131 | + var knownFor = main && main.knownForFeatureV2; |
| 132 | + if (knownFor && Array.isArray(knownFor.credits)) { |
| 133 | + for (var j = 0; j < knownFor.credits.length; j++) { |
| 134 | + var knownNode = knownFor.credits[j]; |
| 135 | + if (!knownNode || !knownNode.title) { |
| 136 | + continue; |
| 137 | + } |
| 138 | + var knownRole = ''; |
| 139 | + var knownRoleEdge = knownNode.creditedRoles && Array.isArray(knownNode.creditedRoles.edges) |
| 140 | + ? knownNode.creditedRoles.edges[0] |
| 141 | + : null; |
| 142 | + if (knownRoleEdge && knownRoleEdge.node) { |
| 143 | + knownRole = knownRoleEdge.node.text |
| 144 | + || (knownRoleEdge.node.category ? knownRoleEdge.node.category.text || '' : ''); |
| 145 | + } |
| 146 | + pushFilmography( |
| 147 | + knownNode.title.titleText ? knownNode.title.titleText.text : '', |
| 148 | + knownNode.title.releaseYear ? String(knownNode.title.releaseYear.year || '') : '', |
| 149 | + knownRole |
| 150 | + ); |
| 151 | + } |
| 152 | + } |
| 153 | +
|
| 154 | + if (result.filmography.length === 0) { |
| 155 | + var creditSources = []; |
| 156 | + if (main && main.released && Array.isArray(main.released.edges)) { |
| 157 | + creditSources.push(main.released.edges); |
| 158 | + } |
| 159 | + if (main && main.groupings && Array.isArray(main.groupings.edges)) { |
| 160 | + creditSources.push(main.groupings.edges); |
| 161 | + } |
| 162 | +
|
| 163 | + for (var k = 0; k < creditSources.length && result.filmography.length < 30; k++) { |
| 164 | + var groups = creditSources[k]; |
| 165 | + for (var m = 0; m < groups.length && result.filmography.length < 30; m++) { |
| 166 | + var groupNode = groups[m] && groups[m].node; |
| 167 | + if (!groupNode) { |
| 168 | + continue; |
| 169 | + } |
| 170 | +
|
| 171 | + var roleName = groupNode.grouping ? groupNode.grouping.text || '' : ''; |
| 172 | + var credits = groupNode.credits && Array.isArray(groupNode.credits.edges) |
| 173 | + ? groupNode.credits.edges |
| 174 | + : []; |
| 175 | + for (var n = 0; n < credits.length && result.filmography.length < 30; n++) { |
| 176 | + var creditNode = credits[n] && credits[n].node; |
| 177 | + if (!creditNode || !creditNode.title) { |
| 178 | + continue; |
| 179 | + } |
| 180 | + pushFilmography( |
| 181 | + creditNode.title.titleText ? creditNode.title.titleText.text : (creditNode.title.originalTitleText ? creditNode.title.originalTitleText.text : ''), |
| 182 | + creditNode.title.releaseYear ? String(creditNode.title.releaseYear.year || '') : '', |
| 183 | + roleName |
| 184 | + ); |
| 185 | + } |
| 186 | + } |
| 187 | + } |
| 188 | + } |
| 189 | + } catch (error) { |
| 190 | + void error; |
| 191 | + } |
| 192 | +
|
| 193 | + return result; |
| 194 | + })() |
| 195 | + `); |
| 196 | + |
| 197 | + if (!data || typeof data !== 'object' || !('name' in data) || !(data as Record<string, unknown>).name) { |
| 198 | + throw new CommandExecutionError(`Person not found: ${id}`, 'Check the person ID and try again'); |
| 199 | + } |
| 200 | + |
| 201 | + const result = data as Record<string, any>; |
| 202 | + if (result.nameId && result.nameId !== id) { |
| 203 | + throw new CommandExecutionError( |
| 204 | + `IMDb returned a different person payload: ${result.nameId}`, |
| 205 | + 'Retry the command; if it persists, the person parser may need updating', |
| 206 | + ); |
| 207 | + } |
| 208 | + const filmography = Array.isArray(result.filmography) ? result.filmography : []; |
| 209 | + |
| 210 | + // Override url with a clean canonical URL (no query params like ?language=en-US) |
| 211 | + result.url = `https://www.imdb.com/name/${id}/`; |
| 212 | + |
| 213 | + const rows = Object.entries(result) |
| 214 | + .filter(([field, value]) => field !== 'filmography' && field !== 'nameId' && value !== '' && value != null) |
| 215 | + .map(([field, value]) => ({ field, value: String(value) })); |
| 216 | + |
| 217 | + if (filmography.length > 0) { |
| 218 | + rows.push({ field: 'filmography', value: '' }); |
| 219 | + for (const entry of filmography.slice(0, limit)) { |
| 220 | + const suffix = [entry.year ? `(${entry.year})` : '', entry.role ? `[${entry.role}]` : ''] |
| 221 | + .filter(Boolean) |
| 222 | + .join(' '); |
| 223 | + rows.push({ |
| 224 | + field: String(entry.title || ''), |
| 225 | + value: suffix, |
| 226 | + }); |
| 227 | + } |
| 228 | + } |
| 229 | + |
| 230 | + return rows; |
| 231 | + }, |
| 232 | +}); |
0 commit comments