88import logging
99import os
1010import re
11- from collections .abc import Sequence
1211from typing import Iterator
1312
1413import requests
1716
1817from abstractions import AdoptablePet , PetSource
1918from adoption_sources .pet_links import reconstruct_adoption_url
20- from config import CITY_NAME , CITY_STATE , PET_SPECIES , POSTAL_CODE , RESCUEGROUPS_LIMIT
19+ from config import CITY_NAME , CITY_STATE , POSTAL_CODE
2120
2221logger = logging .getLogger (__name__ )
2322
2423# Some rescues publish entries like "More Dogs Soon!" to point users at their
2524# website; those should never be posted. Add new names here as we encounter them.
26- PLACEHOLDER_NAMES : tuple [str , ...] = ("more dogs soon!" , "more cats soon!" )
27-
28- SPECIES_SINGULAR = {"dogs" : "dog" , "cats" : "cat" }
25+ PLACEHOLDER_NAMES : tuple [str , ...] = ("more dogs soon!" ,)
2926
3027# The RescueGroups API occasionally times out or returns a transient 5xx. A
3128# single hiccup shouldn't fail the whole run, so retry a few times with
@@ -49,18 +46,6 @@ def _session_with_retries() -> requests.Session:
4946 return session
5047
5148
52- def _build_species_filters (species : Sequence [str ]) -> tuple [list [dict ], str ]:
53- """Build RescueGroups filters and filterProcessing for an OR species search."""
54- filters = [
55- {"fieldName" : "species.plural" , "operation" : "equal" , "criteria" : plural }
56- for plural in species
57- ]
58- if not filters :
59- raise ValueError ("At least one species is required" )
60- filter_processing = " OR " .join (str (index ) for index in range (1 , len (filters ) + 1 ))
61- return filters , filter_processing
62-
63-
6449class SourceRescueGroups (PetSource ):
6550 """
6651 Fetches adoptable pets from RescueGroups.org API.
@@ -75,20 +60,20 @@ def __init__(
7560 api_key : str | None = None ,
7661 postal_code : str = POSTAL_CODE ,
7762 radius_miles : int = 50 ,
78- species : Sequence [ str ] | None = None ,
79- limit : int = RESCUEGROUPS_LIMIT ,
63+ species : str = "dogs" , # "dogs" or "cats"
64+ limit : int = 25 ,
8065 location_label : str = f"{ CITY_NAME } , { CITY_STATE } " ,
8166 ):
8267 self ._api_key = api_key or os .environ .get ("CUTEPETSBOSTON_RESCUEGROUPS_API_KEY" )
8368 self .postal_code = postal_code
8469 self .radius_miles = radius_miles
85- self .species = tuple ( species if species is not None else PET_SPECIES )
70+ self .species = species
8671 self .limit = limit
8772 self .location_label = location_label
8873
8974 @property
9075 def source_name (self ) -> str :
91- return f"RescueGroups ({ ', ' . join ( self .species ) } )"
76+ return f"RescueGroups ({ self .species } )"
9277
9378 def fetch_pets (self ) -> Iterator [AdoptablePet ]:
9479 """
@@ -106,34 +91,29 @@ def fetch_pets(self) -> Iterator[AdoptablePet]:
10691 "RescueGroups API key not configured. "
10792 "Set CUTEPETSBOSTON_RESCUEGROUPS_API_KEY environment variable."
10893 )
109-
94+
11095 url = (
111- f"{ self .BASE_URL } /available/haspic"
112- f"?include=orgs,breeds,locations,species "
96+ f"{ self .BASE_URL } /available/{ self . species } / haspic"
97+ f"?include=orgs,breeds,locations"
11398 f"&sort=random"
11499 f"&limit={ self .limit } "
115100 )
116101 headers = {
117102 "Content-Type" : "application/vnd.api+json" ,
118103 "Authorization" : self ._api_key ,
119104 }
120- species_filters , filter_processing = _build_species_filters (self .species )
121105 payload = {
122106 "data" : {
123107 "filterRadius" : {
124108 "miles" : self .radius_miles ,
125109 "postalcode" : self .postal_code ,
126- },
127- "filters" : species_filters ,
128- "filterProcessing" : filter_processing ,
110+ }
129111 }
130112 }
131113
114+
132115 logger .info (
133- "Fetching %s from RescueGroups within %s miles of %s" ,
134- ", " .join (self .species ),
135- self .radius_miles ,
136- self .postal_code ,
116+ f"Fetching { self .species } from RescueGroups within { self .radius_miles } miles of { self .postal_code } "
137117 )
138118
139119 session = _session_with_retries ()
@@ -142,61 +122,42 @@ def fetch_pets(self) -> Iterator[AdoptablePet]:
142122
143123 body = response .json ()
144124 data = body .get ("data" , [])
145- logger .info ("Received %s pets from RescueGroups" , len ( data ) )
125+ logger .info (f "Received { len ( data ) } pets from RescueGroups" )
146126
147127 orgs_by_id = {
148128 item ["id" ]: item .get ("attributes" , {})
149129 for item in body .get ("included" , [])
150130 if item .get ("type" ) == "orgs"
151131 }
152- species_by_id = {
153- item ["id" ]: item .get ("attributes" , {})
154- for item in body .get ("included" , [])
155- if item .get ("type" ) == "species"
156- }
157132
158133 for animal in data :
159- pet = self ._parse_animal (animal , orgs_by_id , species_by_id )
134+ pet = self ._parse_animal (animal , orgs_by_id )
160135 if not pet :
161136 continue
162137 if self ._is_placeholder_name (pet .name ):
163- logger .info ("Skipping placeholder record: %r" , pet .name )
138+ logger .info (f "Skipping placeholder record: { pet .name !r } " )
164139 continue
165140 yield pet
166141
167- def _parse_animal (
168- self ,
169- animal : dict ,
170- orgs_by_id : dict ,
171- species_by_id : dict ,
172- ) -> AdoptablePet | None :
142+ def _parse_animal (self , animal : dict , orgs_by_id : dict ) -> AdoptablePet | None :
173143 """Parse a single animal record from the API response."""
174144 try :
175145 attrs = animal .get ("attributes" , {})
176146 animal_id = animal .get ("id" , "" )
177147
148+ # Extract and clean the name
178149 name = self ._clean_name (attrs .get ("name" , "Unknown" ))
179150
180- species_id = (
181- animal .get ("relationships" , {})
182- .get ("species" , {})
183- .get ("data" , [{}])[0 ]
184- .get ("id" )
185- )
186- if not species_id :
187- logger .warning ("Skipping animal %s with no species relationship" , animal_id )
188- return None
189-
190- plural = species_by_id .get (species_id , {}).get ("plural" )
191- if plural not in self .species :
192- logger .info ("Skipping animal %s with unconfigured species: %r" , animal_id , plural )
193- return None
194-
195- species = SPECIES_SINGULAR [plural ]
151+ # Determine species from the endpoint we queried
152+ species = "dog" if self .species == "dogs" else "cat"
196153
154+ # Get breed info
197155 breed = attrs .get ("breedString" , attrs .get ("breedPrimary" , "Mixed" ))
156+
157+ # Clean up description (use text version, not HTML)
198158 description = self ._clean_description (attrs .get ("descriptionText" , "" ))
199159
160+ # Get adoption_url
200161 org_id = (
201162 animal .get ("relationships" , {})
202163 .get ("orgs" , {})
@@ -215,15 +176,24 @@ def _parse_animal(
215176 None
216177 )
217178
179+ # Shelter's own animal id (e.g. MSPCA's "A468573"); some orgs' deep
180+ # links are keyed on this rather than the RescueGroups id.
218181 rescue_id = attrs .get ("rescueId" )
182+
183+ # For shelters we have a template for, rebuild a deep link to this
184+ # specific pet; otherwise keep the org landing page from above.
219185 adoption_url = (
220186 reconstruct_adoption_url (url_candidates , animal_id , rescue_id )
221187 or adoption_url
222188 )
223189
190+ # Get best available image
224191 image_url = self ._get_image_url (attrs )
192+
193+ # Location of the adoption org
225194 location = f"{ org_attrs .get ('city' )} , { org_attrs .get ('state' )} "
226195
196+
227197 return AdoptablePet (
228198 name = name ,
229199 species = species ,
@@ -239,7 +209,7 @@ def _parse_animal(
239209 rescue_id = rescue_id ,
240210 )
241211 except Exception as e :
242- logger .warning ("Failed to parse animal %s: %s" , animal .get ("id" , " unknown" ), e )
212+ logger .warning (f "Failed to parse animal { animal .get ('id' , ' unknown' ) } : { e } " )
243213 return None
244214
245215 def _is_placeholder_name (self , name : str ) -> bool :
@@ -253,6 +223,8 @@ def _clean_name(self, name: str) -> str:
253223 "Doli ***Home for the Holidays 1/2 price!" -> "Doli"
254224 "Kathy" -> "Kathy"
255225 """
226+ # Remove common promotional suffixes
227+ # Split on common delimiters and take the first part
256228 cleaned = re .split (r"\s*[\*\-\|]+\s*" , name )[0 ]
257229 return cleaned .strip ()
258230
@@ -261,13 +233,19 @@ def _clean_description(self, description: str) -> str:
261233 if not description :
262234 return ""
263235
236+ # Decode HTML entities
264237 text = html .unescape (description )
238+
239+ # Remove and normalize whitespace
265240 text = text .replace (" " , " " )
266241 text = re .sub (r"\s+" , " " , text )
242+
243+ # Remove promotional headers
267244 text = re .sub (
268245 r"\*\*Home for the Holidays.*?\*\*" , "" , text , flags = re .IGNORECASE
269246 )
270247
248+ # Trim to reasonable length for social posts
271249 text = text .strip ()
272250 if len (text ) > 500 :
273251 text = text [:497 ] + "..."
@@ -278,5 +256,6 @@ def _get_image_url(self, attrs: dict) -> str | None:
278256 """Get the best available image URL."""
279257 thumbnail = attrs .get ("pictureThumbnailUrl" )
280258 if thumbnail :
259+ # Request a larger image instead of the 100px thumbnail
281260 return re .sub (r"\?width=\d+" , "?width=800" , thumbnail )
282261 return None
0 commit comments