Skip to content

Commit d2bb645

Browse files
committed
Implement travis hyde, ithaca renting, and urban ithaca scrapers
1 parent 36eb47e commit d2bb645

4 files changed

Lines changed: 499 additions & 1 deletion

File tree

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
import axios from 'axios';
2+
import { ScrapedProperty } from '../types';
3+
4+
// eslint-disable-next-line @typescript-eslint/no-var-requires
5+
const cheerio: any = require('cheerio');
6+
7+
const BASE_URL = 'https://ithacarenting.com';
8+
const HOME_URL = `${BASE_URL}/`;
9+
const AGENCY = 'ithacarenting';
10+
const REQUEST_DELAY_MS = 500;
11+
12+
const HEADERS = { 'User-Agent': 'CUApts-scraper/1.0 (+https://cuapts.org)' };
13+
14+
function sleep(ms: number): Promise<void> {
15+
return new Promise((resolve) => setTimeout(resolve, ms));
16+
}
17+
18+
/** Integer bedroom count from "4 Bedrooms", "2BR", or "Studio". */
19+
function parseBeds(text: string): number | null {
20+
if (/\bstudio\b/i.test(text)) return 0;
21+
const m = text.match(/(\d+)\s*(?:bed(?:room)?s?|BR)\b/i);
22+
return m ? parseInt(m[1], 10) : null;
23+
}
24+
25+
/**
26+
* Bathroom count from "1 Bathroom", "3.5 Baths", etc.
27+
*/
28+
function parseBaths(text: string): number | null {
29+
const m = text.match(/\b(\d+(?:\.\d+)?)\s+(?:bath(?:room)?s?)\b/i);
30+
if (!m) return null;
31+
const n = parseFloat(m[1]);
32+
return n >= 0.5 && n <= 10 ? n : null;
33+
}
34+
35+
/**
36+
* Parses a per-bed price range like "$955 – $1,075" from page text.
37+
*/
38+
function parsePrice(text: string): { price: number | null; priceRaw: string | null } {
39+
const amountPat = /\$(\d{1,3}(?:,\d{3})?)/;
40+
const rangePat = new RegExp(`${amountPat.source}\\s*[–—-]\\s*${amountPat.source}`);
41+
const rangeMatch = text.match(rangePat);
42+
if (rangeMatch) {
43+
const lo = parseInt(rangeMatch[1].replace(',', ''), 10);
44+
const hi = parseInt(rangeMatch[2].replace(',', ''), 10);
45+
if (lo > 0 && hi > lo) {
46+
return { price: lo, priceRaw: `$${lo.toLocaleString()} – $${hi.toLocaleString()} per bed` };
47+
}
48+
}
49+
const singleMatch = text.match(amountPat);
50+
if (singleMatch) {
51+
const val = parseInt(singleMatch[1].replace(',', ''), 10);
52+
if (val > 100) return { price: val, priceRaw: `$${val.toLocaleString()} per bed` };
53+
}
54+
return { price: null, priceRaw: null };
55+
}
56+
57+
/**
58+
* Maps a lease year range ("2026-2027") or semester string ("Fall Semester 2026")
59+
* to an ISO date (move-in start of that academic year).
60+
*/
61+
function parseLeaseDate(text: string): string | null {
62+
const yearRangeMatch = text.match(/(\d{4})\s*[-]\s*\d{4}/);
63+
if (yearRangeMatch) return `${yearRangeMatch[1]}-08-01`;
64+
const fallMatch = text.match(/fall\s+(?:semester\s+)?(\d{4})/i);
65+
if (fallMatch) return `${fallMatch[1]}-08-01`;
66+
const springMatch = text.match(/spring\s+(?:semester\s+)?(\d{4})/i);
67+
if (springMatch) return `${springMatch[1]}-01-01`;
68+
return null;
69+
}
70+
71+
/** Fetches the homepage and returns all unique unit UIDs found in anchor tags. */
72+
async function scrapeUids(): Promise<number[]> {
73+
const { data: html } = await axios.get<string>(HOME_URL, {
74+
headers: HEADERS,
75+
timeout: 15_000,
76+
});
77+
const $ = cheerio.load(html);
78+
const uids = new Set<number>();
79+
$('a[href*="uid="]').each((_: any, el: any) => {
80+
const m = ($(el).attr('href') || '').match(/uid=(\d+)/);
81+
if (m) uids.add(parseInt(m[1], 10));
82+
});
83+
return Array.from(uids);
84+
}
85+
86+
async function scrapeUnitDetail(uid: number): Promise<ScrapedProperty> {
87+
const url = `${BASE_URL}/unit-details/?uid=${uid}`;
88+
const { data: html } = await axios.get<string>(url, {
89+
headers: HEADERS,
90+
timeout: 15_000,
91+
});
92+
const $ = cheerio.load(html);
93+
94+
let bedsHeading = '';
95+
$('h1, h2, h3, h4').each((_: any, el: any) => {
96+
if (bedsHeading) return;
97+
const t: string = $(el).text().trim();
98+
if (/\d\s*(?:bed(?:room)?s?|BR)\b/i.test(t) || /\bstudio\b/i.test(t)) bedsHeading = t;
99+
});
100+
101+
const bodyText: string = $('body').text().replace(/\s+/g, ' ');
102+
const numBeds = parseBeds(bedsHeading) ?? parseBeds(bodyText);
103+
const numBaths = parseBaths(bedsHeading) ?? parseBaths(bodyText);
104+
105+
let address = '';
106+
107+
const collegetownMatch = bodyText.match(
108+
/\b(Collegetown\s+\w+)\s*,\s*(\d+\s+[A-Za-z ]+(?:Road|Rd|Avenue|Ave|Street|St|Drive|Dr|Blvd|Boulevard))/i
109+
);
110+
if (collegetownMatch) {
111+
address = `${collegetownMatch[1].trim()}, ${collegetownMatch[2].trim()}`;
112+
}
113+
114+
if (!address) {
115+
const aptMatch = bodyText.match(
116+
/\b([A-Z][a-zA-Z]+(?: [A-Z][a-zA-Z]+)* Apartments?)\s*,\s*(\d+\s+[A-Za-z ]+(?:Road|Rd|Avenue|Ave|Street|St|Drive|Dr|Blvd|Boulevard))/i
117+
);
118+
if (aptMatch) address = `${aptMatch[1].trim()}, ${aptMatch[2].trim()}`;
119+
}
120+
121+
if (!address) {
122+
const streetMatch = bodyText.match(
123+
/\b(\d+\s+[A-Za-z ]+(?:Road|Rd|Avenue|Ave|Street|St|Drive|Dr|Blvd|Boulevard))\s*,\s*Ithaca/i
124+
);
125+
if (streetMatch) address = `${streetMatch[1].trim()}, Ithaca`;
126+
}
127+
128+
const { price, priceRaw } = parsePrice(bodyText);
129+
130+
let availableDate: string | null = null;
131+
const leaseMatch = bodyText.match(/\b(20\d{2})\s*[-]\s*(20\d{2})\b/);
132+
if (leaseMatch) availableDate = parseLeaseDate(leaseMatch[0]);
133+
if (!availableDate) {
134+
const semMatch = bodyText.match(/(fall|spring)\s+(?:semester\s+)?20\d{2}/i);
135+
if (semMatch) availableDate = parseLeaseDate(semMatch[0]);
136+
}
137+
138+
const includedItems: string[] = [];
139+
$('li').each((_: any, el: any) => {
140+
const t: string = $(el).text().trim();
141+
if (
142+
t.length < 80 &&
143+
/\bincluded\b/i.test(t) &&
144+
/heat|water|trash|utilities?|electricity|electric\b/i.test(t)
145+
) {
146+
includedItems.push(t);
147+
}
148+
});
149+
const utilities: string | null =
150+
includedItems.length > 0 ? includedItems.slice(0, 4).join(', ') : null;
151+
152+
let laundry: string | null = null;
153+
if (
154+
/in[-\s]?unit\s+(?:washer|laundry)/i.test(bodyText) ||
155+
/washer(?:\/dryer)?\s+in\s+(?:unit|apartment)/i.test(bodyText)
156+
) {
157+
laundry = 'In Unit';
158+
} else if (
159+
/laundry\s+(?:on\s+the|located|in\s+(?:the\s+)?building|facilities|room)/i.test(bodyText) ||
160+
/on[-\s]?site\s+laundry/i.test(bodyText)
161+
) {
162+
laundry = 'In Building';
163+
} else if (/laundry/i.test(bodyText)) {
164+
laundry = 'Available';
165+
}
166+
167+
const parking: string | null = /parking.*(?:available|additional|fee)/i.test(bodyText)
168+
? 'Available (additional fee)'
169+
: null;
170+
171+
const internet: string | null = /(?:wired\s+for\s+)?(?:cable\s+and\s+)?internet/i.test(bodyText)
172+
? 'Available'
173+
: null;
174+
175+
const trash: string | null = /trash\s+(?:removal\s+)?included/i.test(bodyText)
176+
? 'Included'
177+
: null;
178+
179+
const porch: boolean | null = /balcon|porch/i.test(bodyText) ? true : null;
180+
181+
const snowRemoval: string | null = /snow\s+removal/i.test(bodyText) ? 'Available' : null;
182+
183+
let description: string | null = null;
184+
$('p').each((_: any, el: any) => {
185+
if (description) return;
186+
const t: string = $(el).text().trim();
187+
if (t.length > 60) description = t;
188+
});
189+
190+
return {
191+
address,
192+
sourceUrl: url,
193+
agency: AGENCY,
194+
numBeds,
195+
numBaths,
196+
price,
197+
priceRaw,
198+
utilities,
199+
parking,
200+
laundry,
201+
porch,
202+
internet,
203+
trash,
204+
snowRemoval,
205+
availableDate,
206+
description,
207+
};
208+
}
209+
210+
async function scrapeIthacarenting(): Promise<ScrapedProperty[]> {
211+
console.log('[ithacarenting] Fetching unit list from homepage…');
212+
const uids = await scrapeUids();
213+
console.log(`[ithacarenting] Found ${uids.length} unit UIDs`);
214+
215+
const results: ScrapedProperty[] = [];
216+
const promises: Promise<void>[] = [];
217+
218+
uids.forEach((uid, i) => {
219+
promises.push(
220+
sleep(REQUEST_DELAY_MS * i).then(() =>
221+
scrapeUnitDetail(uid)
222+
.then((prop) => {
223+
results.push(prop);
224+
})
225+
.catch((err) =>
226+
console.error(`[ithacarenting] Failed to scrape uid=${uid}:`, err?.message ?? err)
227+
)
228+
)
229+
);
230+
});
231+
232+
await Promise.all(promises);
233+
console.log(`[ithacarenting] Done. Scraped ${results.length}/${uids.length} units.`);
234+
return results;
235+
}
236+
237+
export default scrapeIthacarenting;
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import axios from 'axios';
2+
import { ScrapedProperty } from '../types';
3+
4+
const API_URL =
5+
'https://www.travishyde.com/rts/collections/public/46d82ae2/runtime/collection/appfolio-listings/query-data?pageSize=200&pageNumber=0&query=%28%29&language=ENGLISH';
6+
const BASE_URL = 'https://www.travishyde.com';
7+
const AGENCY = 'travishyde';
8+
9+
const HEADERS = { 'User-Agent': 'CUApts-scraper/1.0 (+https://cuapts.org)' };
10+
11+
/** Returns the first regex match for a keyword in a comma-separated amenities string. */
12+
function extractAmenitySnippet(haystack: string, pattern: RegExp): string | null {
13+
const match = haystack.match(pattern);
14+
return match ? match[0].trim() : null;
15+
}
16+
17+
/** True if any keyword appears (case-insensitive) in the string. */
18+
function contains(haystack: string, ...keywords: string[]): boolean {
19+
const lower = haystack.toLowerCase();
20+
return keywords.some((kw) => lower.includes(kw));
21+
}
22+
23+
function mapListing(item: any): ScrapedProperty | null {
24+
const data = item?.data;
25+
if (!data?.full_address) return null;
26+
27+
const address: string = data.full_address;
28+
const numBeds: number | null = data.bedrooms != null ? Number(data.bedrooms) : null;
29+
const numBaths: number | null = data.bathrooms != null ? parseFloat(data.bathrooms) : null;
30+
const price: number | null =
31+
data.market_rent != null ? Math.round(parseFloat(data.market_rent)) : null;
32+
const priceRaw: string | null = price != null ? `$${price}/month` : null;
33+
34+
const availableDate: string | null = data.available_date || null;
35+
36+
const amenitiesStr: string = data.amenities || '';
37+
const utilitiesStr: string = data.utilities || '';
38+
const combined = `${amenitiesStr} ${utilitiesStr}`;
39+
40+
const laundry: string | null = contains(amenitiesStr, 'laundry')
41+
? extractAmenitySnippet(amenitiesStr, /[^,]*laundry[^,]*/i) ?? 'Available'
42+
: null;
43+
44+
const internet: string | null = contains(combined, 'internet', 'wifi', 'fiber')
45+
? 'Available'
46+
: null;
47+
48+
const snowRemoval: string | null = contains(amenitiesStr, 'snow removal') ? 'Included' : null;
49+
50+
const porch: boolean | null = contains(amenitiesStr, 'balcony', 'porch') ? true : null;
51+
52+
const parking: string | null = contains(amenitiesStr, 'parking') ? 'Available' : null;
53+
54+
const trash: string | null = contains(combined, 'trash', 'recycling') ? 'Included' : null;
55+
56+
const listableUid: string = data.listable_uid || item.page_item_url || '';
57+
const sourceUrl = listableUid
58+
? `${BASE_URL}/listings/detail/${listableUid}`
59+
: `${BASE_URL}/availability`;
60+
61+
return {
62+
address,
63+
sourceUrl,
64+
agency: AGENCY,
65+
numBeds,
66+
numBaths,
67+
price,
68+
priceRaw,
69+
utilities: utilitiesStr || null,
70+
parking,
71+
laundry,
72+
porch,
73+
internet,
74+
trash,
75+
snowRemoval,
76+
availableDate,
77+
description: null,
78+
};
79+
}
80+
81+
async function scrapeTravisHyde(): Promise<ScrapedProperty[]> {
82+
console.log('[travishyde] Fetching listings from collection API…');
83+
84+
const { data: responseData } = await axios.get<any>(API_URL, {
85+
headers: HEADERS,
86+
timeout: 20_000,
87+
});
88+
89+
const items: any[] = responseData?.values || [];
90+
console.log(`[travishyde] API returned ${items.length} items`);
91+
92+
const results: ScrapedProperty[] = items
93+
.map(mapListing)
94+
.filter((p): p is ScrapedProperty => p !== null);
95+
96+
console.log(`[travishyde] Done. Scraped ${results.length} properties.`);
97+
return results;
98+
}
99+
100+
export default scrapeTravisHyde;

0 commit comments

Comments
 (0)