Skip to content

Commit c1aa45f

Browse files
committed
implement scrapers for elite and lambrou
1 parent 40fdc20 commit c1aa45f

3 files changed

Lines changed: 439 additions & 1 deletion

File tree

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
import axios from 'axios';
2+
import { ScrapedProperty } from '../types';
3+
4+
// eslint-disable-next-line @typescript-eslint/no-var-requires
5+
const cheerio: any = require('cheerio');
6+
7+
const BASE_URL = 'https://www.elitecollegerentals.com';
8+
const PROPERTIES_URL = `${BASE_URL}/housing-options`;
9+
const AGENCY = 'elitecollegerentals';
10+
11+
const HEADERS = { 'User-Agent': 'CUApts-scraper/1.0 (+https://cuapts.org)' };
12+
13+
/**
14+
* Splits a bullet like "Bedrooms: 7" into { key: "bedrooms", value: "7" }.
15+
* Returns null if no colon is found.
16+
*/
17+
function parseBullet(text: string): { key: string; value: string } | null {
18+
const idx = text.indexOf(':');
19+
if (idx === -1) return null;
20+
return {
21+
key: text.slice(0, idx).trim().toLowerCase(),
22+
value: text.slice(idx + 1).trim(),
23+
};
24+
}
25+
26+
/** "7" → 7, "two" → 2. Returns null when unparseable. */
27+
function parseNumber(raw: string): number | null {
28+
const n = parseFloat(raw);
29+
if (!Number.isNaN(n)) return n;
30+
const words: Record<string, number> = {
31+
one: 1,
32+
two: 2,
33+
three: 3,
34+
four: 4,
35+
five: 5,
36+
six: 6,
37+
seven: 7,
38+
eight: 8,
39+
};
40+
return words[raw.toLowerCase()] ?? null;
41+
}
42+
43+
/**
44+
* All Elite College Rentals properties listed on a single /housing-options
45+
* page.
46+
* Scrape for all properties associated with a "Bedrooms" bullet (some parking-only lots are listed as well)
47+
*/
48+
async function scrapeEliteCollegeRentals(): Promise<ScrapedProperty[]> {
49+
console.log('[elitecollegerentals] Fetching listings page…');
50+
51+
const { data: html } = await axios.get<string>(PROPERTIES_URL, {
52+
headers: HEADERS,
53+
timeout: 15_000,
54+
});
55+
56+
const $ = cheerio.load(html);
57+
const results: ScrapedProperty[] = [];
58+
59+
// Collect every <ul> that has at least one "Bedrooms:" item
60+
$('ul').each((_: any, ulEl: any) => {
61+
const items = $(ulEl).find('li');
62+
63+
let numBeds: number | null = null;
64+
let numBaths: number | null = null;
65+
let laundry: string | null = null;
66+
let parking: string | null = null;
67+
let porch: boolean | null = null;
68+
69+
let hasBedroomBullet = false;
70+
71+
items.each((_i: any, liEl: any) => {
72+
const text: string = $(liEl).text().trim();
73+
const parsed = parseBullet(text);
74+
if (!parsed) return;
75+
const { key, value } = parsed;
76+
77+
if (key === 'bedrooms') {
78+
hasBedroomBullet = true;
79+
numBeds = parseNumber(value);
80+
} else if (key === 'bathrooms') {
81+
numBaths = parseNumber(value);
82+
} else if (key === 'laundry room access' || key === 'laundry') {
83+
laundry = value;
84+
} else if (key === 'parking spots') {
85+
parking = `${value} spots included`;
86+
} else if (key === 'private patio' || key === 'private balcony' || key === 'porch') {
87+
porch = /yes/i.test(value);
88+
}
89+
});
90+
91+
// Skip non-property sections (e.g. parking-only rows)
92+
if (!hasBedroomBullet) return;
93+
94+
// Walk backwards from the <ul> to find the nearest preceding <h3> title
95+
let title = '';
96+
let description: string | null = null;
97+
let cursor = $(ulEl).prev();
98+
99+
while (cursor.length > 0) {
100+
const tag: string = cursor.prop('tagName')?.toLowerCase() || '';
101+
if (tag === 'h3' || tag === 'h2') {
102+
title = cursor.text().trim();
103+
break;
104+
}
105+
if ((tag === 'p' || tag === 'div') && !description) {
106+
const t = cursor.text().trim();
107+
if (t) description = t;
108+
}
109+
cursor = cursor.prev();
110+
}
111+
112+
if (!title) {
113+
$('h3, h2').each((_j: any, hEl: any) => {
114+
if ($(hEl).nextAll('ul').first().is(ulEl)) {
115+
title = $(hEl).text().trim();
116+
}
117+
});
118+
}
119+
120+
if (!title) return; // if can't identify the property skip
121+
122+
results.push({
123+
address: title,
124+
sourceUrl: PROPERTIES_URL,
125+
agency: AGENCY,
126+
numBeds,
127+
numBaths,
128+
price: null,
129+
priceRaw: null,
130+
utilities: null,
131+
parking,
132+
laundry,
133+
porch,
134+
internet: null,
135+
trash: null,
136+
snowRemoval: null,
137+
availableDate: null,
138+
description,
139+
});
140+
});
141+
142+
console.log(`[elitecollegerentals] Done. Scraped ${results.length} properties.`);
143+
return results;
144+
}
145+
146+
export default scrapeEliteCollegeRentals;

0 commit comments

Comments
 (0)