diff --git a/adoption_sources/rescue_groups.py b/adoption_sources/rescue_groups.py index d5e8e32..ea944ea 100644 --- a/adoption_sources/rescue_groups.py +++ b/adoption_sources/rescue_groups.py @@ -8,6 +8,7 @@ import logging import os import re +from collections.abc import Sequence from typing import Iterator import requests @@ -16,13 +17,15 @@ from abstractions import AdoptablePet, PetSource from adoption_sources.pet_links import reconstruct_adoption_url -from config import CITY_NAME, CITY_STATE, POSTAL_CODE +from config import CITY_NAME, CITY_STATE, PET_SPECIES, POSTAL_CODE, RESCUEGROUPS_LIMIT logger = logging.getLogger(__name__) # Some rescues publish entries like "More Dogs Soon!" to point users at their # website; those should never be posted. Add new names here as we encounter them. -PLACEHOLDER_NAMES: tuple[str, ...] = ("more dogs soon!",) +PLACEHOLDER_NAMES: tuple[str, ...] = ("more dogs soon!", "more cats soon!") + +SPECIES_SINGULAR = {"dogs": "dog", "cats": "cat"} # The RescueGroups API occasionally times out or returns a transient 5xx. A # single hiccup shouldn't fail the whole run, so retry a few times with @@ -46,6 +49,18 @@ def _session_with_retries() -> requests.Session: return session +def _build_species_filters(species: Sequence[str]) -> tuple[list[dict], str]: + """Build RescueGroups filters and filterProcessing for an OR species search.""" + filters = [ + {"fieldName": "species.plural", "operation": "equal", "criteria": plural} + for plural in species + ] + if not filters: + raise ValueError("At least one species is required") + filter_processing = " OR ".join(str(index) for index in range(1, len(filters) + 1)) + return filters, filter_processing + + class SourceRescueGroups(PetSource): """ Fetches adoptable pets from RescueGroups.org API. @@ -60,20 +75,20 @@ def __init__( api_key: str | None = None, postal_code: str = POSTAL_CODE, radius_miles: int = 50, - species: str = "dogs", # "dogs" or "cats" - limit: int = 25, + species: Sequence[str] | None = None, + limit: int = RESCUEGROUPS_LIMIT, location_label: str = f"{CITY_NAME}, {CITY_STATE}", ): self._api_key = api_key or os.environ.get("CUTEPETSBOSTON_RESCUEGROUPS_API_KEY") self.postal_code = postal_code self.radius_miles = radius_miles - self.species = species + self.species = tuple(species if species is not None else PET_SPECIES) self.limit = limit self.location_label = location_label @property def source_name(self) -> str: - return f"RescueGroups ({self.species})" + return f"RescueGroups ({', '.join(self.species)})" def fetch_pets(self) -> Iterator[AdoptablePet]: """ @@ -91,10 +106,10 @@ def fetch_pets(self) -> Iterator[AdoptablePet]: "RescueGroups API key not configured. " "Set CUTEPETSBOSTON_RESCUEGROUPS_API_KEY environment variable." ) - + url = ( - f"{self.BASE_URL}/available/{self.species}/haspic" - f"?include=orgs,breeds,locations" + f"{self.BASE_URL}/available/haspic" + f"?include=orgs,breeds,locations,species" f"&sort=random" f"&limit={self.limit}" ) @@ -102,18 +117,23 @@ def fetch_pets(self) -> Iterator[AdoptablePet]: "Content-Type": "application/vnd.api+json", "Authorization": self._api_key, } + species_filters, filter_processing = _build_species_filters(self.species) payload = { "data": { "filterRadius": { "miles": self.radius_miles, "postalcode": self.postal_code, - } + }, + "filters": species_filters, + "filterProcessing": filter_processing, } } - logger.info( - f"Fetching {self.species} from RescueGroups within {self.radius_miles} miles of {self.postal_code}" + "Fetching %s from RescueGroups within %s miles of %s", + ", ".join(self.species), + self.radius_miles, + self.postal_code, ) session = _session_with_retries() @@ -122,42 +142,61 @@ def fetch_pets(self) -> Iterator[AdoptablePet]: body = response.json() data = body.get("data", []) - logger.info(f"Received {len(data)} pets from RescueGroups") + logger.info("Received %s pets from RescueGroups", len(data)) orgs_by_id = { item["id"]: item.get("attributes", {}) for item in body.get("included", []) if item.get("type") == "orgs" } + species_by_id = { + item["id"]: item.get("attributes", {}) + for item in body.get("included", []) + if item.get("type") == "species" + } for animal in data: - pet = self._parse_animal(animal, orgs_by_id) + pet = self._parse_animal(animal, orgs_by_id, species_by_id) if not pet: continue if self._is_placeholder_name(pet.name): - logger.info(f"Skipping placeholder record: {pet.name!r}") + logger.info("Skipping placeholder record: %r", pet.name) continue yield pet - def _parse_animal(self, animal: dict, orgs_by_id: dict) -> AdoptablePet | None: + def _parse_animal( + self, + animal: dict, + orgs_by_id: dict, + species_by_id: dict, + ) -> AdoptablePet | None: """Parse a single animal record from the API response.""" try: attrs = animal.get("attributes", {}) animal_id = animal.get("id", "") - # Extract and clean the name name = self._clean_name(attrs.get("name", "Unknown")) - # Determine species from the endpoint we queried - species = "dog" if self.species == "dogs" else "cat" + species_id = ( + animal.get("relationships", {}) + .get("species", {}) + .get("data", [{}])[0] + .get("id") + ) + if not species_id: + logger.warning("Skipping animal %s with no species relationship", animal_id) + return None - # Get breed info - breed = attrs.get("breedString", attrs.get("breedPrimary", "Mixed")) + plural = species_by_id.get(species_id, {}).get("plural") + if plural not in self.species: + logger.info("Skipping animal %s with unconfigured species: %r", animal_id, plural) + return None - # Clean up description (use text version, not HTML) + species = SPECIES_SINGULAR[plural] + + breed = attrs.get("breedString", attrs.get("breedPrimary", "Mixed")) description = self._clean_description(attrs.get("descriptionText", "")) - # Get adoption_url org_id = ( animal.get("relationships", {}) .get("orgs", {}) @@ -176,24 +215,15 @@ def _parse_animal(self, animal: dict, orgs_by_id: dict) -> AdoptablePet | None: None ) - # Shelter's own animal id (e.g. MSPCA's "A468573"); some orgs' deep - # links are keyed on this rather than the RescueGroups id. rescue_id = attrs.get("rescueId") - - # For shelters we have a template for, rebuild a deep link to this - # specific pet; otherwise keep the org landing page from above. adoption_url = ( reconstruct_adoption_url(url_candidates, animal_id, rescue_id) or adoption_url ) - # Get best available image image_url = self._get_image_url(attrs) - - # Location of the adoption org location = f"{org_attrs.get('city')}, {org_attrs.get('state')}" - return AdoptablePet( name=name, species=species, @@ -209,7 +239,7 @@ def _parse_animal(self, animal: dict, orgs_by_id: dict) -> AdoptablePet | None: rescue_id=rescue_id, ) except Exception as e: - logger.warning(f"Failed to parse animal {animal.get('id', 'unknown')}: {e}") + logger.warning("Failed to parse animal %s: %s", animal.get("id", "unknown"), e) return None def _is_placeholder_name(self, name: str) -> bool: @@ -223,8 +253,6 @@ def _clean_name(self, name: str) -> str: "Doli ***Home for the Holidays 1/2 price!" -> "Doli" "Kathy" -> "Kathy" """ - # Remove common promotional suffixes - # Split on common delimiters and take the first part cleaned = re.split(r"\s*[\*\-\|]+\s*", name)[0] return cleaned.strip() @@ -233,19 +261,13 @@ def _clean_description(self, description: str) -> str: if not description: return "" - # Decode HTML entities text = html.unescape(description) - - # Remove   and normalize whitespace text = text.replace(" ", " ") text = re.sub(r"\s+", " ", text) - - # Remove promotional headers text = re.sub( r"\*\*Home for the Holidays.*?\*\*", "", text, flags=re.IGNORECASE ) - # Trim to reasonable length for social posts text = text.strip() if len(text) > 500: text = text[:497] + "..." @@ -256,6 +278,5 @@ def _get_image_url(self, attrs: dict) -> str | None: """Get the best available image URL.""" thumbnail = attrs.get("pictureThumbnailUrl") if thumbnail: - # Request a larger image instead of the 100px thumbnail return re.sub(r"\?width=\d+", "?width=800", thumbnail) return None diff --git a/config.py b/config.py index 0178730..3b21506 100644 --- a/config.py +++ b/config.py @@ -2,3 +2,9 @@ CITY_STATE = "MA" CITY_HASHTAGS = ["Boston"] POSTAL_CODE = "02108" + +# RescueGroups API plural endpoint names for species to fetch. +PET_SPECIES = ("dogs", "cats") + +# Single-call limit; roughly matches two per-species calls at 25 each. +RESCUEGROUPS_LIMIT = 50 diff --git a/main.py b/main.py index 6a05c40..6c522f6 100644 --- a/main.py +++ b/main.py @@ -1,14 +1,20 @@ -import os -import random import argparse import json +import os +import random import sys import traceback +from datetime import datetime, timedelta, timezone from pathlib import Path -from datetime import datetime, timezone, timedelta import requests +from adoption_sources import SourceManual, SourceRescueGroups +from social_posters.bluesky import PosterBluesky +from social_posters.debug import PosterDebug +from social_posters.instagram import PosterInstagram +from social_posters.mastodon import PosterMastodon + def main(): parser = argparse.ArgumentParser() @@ -28,14 +34,9 @@ def main(): def create_posters(debug=False): - from social_posters.debug import PosterDebug - if debug: return [PosterDebug()] - from social_posters.instagram import PosterInstagram - from social_posters.bluesky import PosterBluesky - from social_posters.mastodon import PosterMastodon posters = [] posters.append(PosterMastodon()) @@ -47,15 +48,17 @@ def create_posters(debug=False): def create_sources(debug=False): - from adoption_sources import SourceRescueGroups, SourceManual - if debug: - return [SourceManual()] + cat_fixture_path = Path(__file__).parent / "tests" / "fixtures" / "sample_cats.json" + with open(cat_fixture_path) as f: + cat_animals = json.load(f) + return [ + SourceManual(species="dog"), + SourceManual(species="cat", animals=cat_animals), + ] sources = [] - sources.append(SourceRescueGroups()) - return sources diff --git a/tests/fixtures/sample_cats.json b/tests/fixtures/sample_cats.json new file mode 100644 index 0000000..5b1be59 --- /dev/null +++ b/tests/fixtures/sample_cats.json @@ -0,0 +1,32 @@ +[ + { + "type": "animals", + "id": "99001001", + "attributes": { + "name": "Whiskers", + "breedString": "Domestic Shorthair", + "breedPrimary": "Domestic Shorthair", + "descriptionText": "Whiskers is a friendly tabby who loves sunny windowsills.", + "pictureThumbnailUrl": "https://cdn.rescuegroups.org/example/pictures/whiskers.jpg?width=100", + "slug": "adopt-whiskers-domestic-shorthair-cat", + "sex": "Female", + "sizeGroup": "Medium" + }, + "relationships": {} + }, + { + "type": "animals", + "id": "99001002", + "attributes": { + "name": "Mittens", + "breedString": "Siamese / Mixed", + "breedPrimary": "Siamese", + "descriptionText": "Mittens is a vocal cuddle bug looking for a quiet home.", + "pictureThumbnailUrl": "https://cdn.rescuegroups.org/example/pictures/mittens.jpg?width=100", + "slug": "adopt-mittens-siamese-cat", + "sex": "Male", + "sizeGroup": "Small" + }, + "relationships": {} + } +] diff --git a/tests/test_main.py b/tests/test_main.py index af19a2f..9d1b93a 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,7 +1,10 @@ import unittest +import uuid from abstractions import AdoptablePet, Post, PostResult -from main import create_posters, run +from adoption_sources import SourceManual +from adoption_sources.rescue_groups import SourceRescueGroups +from main import create_posters, create_sources, run class FakeSource: @@ -34,6 +37,7 @@ def publish(self, post): class RunFlowTests(unittest.TestCase): def test_run_calls_source_and_posters(self): + pet_id = f"test-poppy-{uuid.uuid4()}" pet = AdoptablePet( name="Poppy", species="dog", @@ -41,6 +45,7 @@ def test_run_calls_source_and_posters(self): location="Boston, MA", image_url="https://example.com/poppy.jpg", adoption_url="https://example.com/adopt/poppy", + pet_id=pet_id, ) source = FakeSource([pet]) poster_one = FakePoster() @@ -55,6 +60,52 @@ def test_run_calls_source_and_posters(self): self.assertTrue(poster_two.publish_called) self.assertEqual(len(results), 2) + def test_run_with_mixed_species_pool(self): + dog = AdoptablePet( + name="Rex", + species="dog", + breed="mutt", + location="Boston, MA", + image_url="https://example.com/rex.jpg", + adoption_url="https://example.com/adopt/rex", + pet_id=f"test-dog-{uuid.uuid4()}", + ) + cat = AdoptablePet( + name="Luna", + species="cat", + breed="tabby", + location="Boston, MA", + image_url="https://example.com/luna.jpg", + adoption_url="https://example.com/adopt/luna", + pet_id=f"test-cat-{uuid.uuid4()}", + ) + source = FakeSource([dog, cat]) + poster = FakePoster() + + results = run([source], [poster]) + + self.assertTrue(poster.format_called) + self.assertTrue(poster.publish_called) + self.assertEqual(len(results), 1) + + +class CreateSourcesTests(unittest.TestCase): + def test_prod_returns_single_rescuegroups_source(self): + sources = create_sources(debug=False) + + self.assertEqual(len(sources), 1) + self.assertIsInstance(sources[0], SourceRescueGroups) + self.assertEqual(sources[0].species, ("dogs", "cats")) + + def test_debug_returns_manual_sources_for_dogs_and_cats(self): + sources = create_sources(debug=True) + + self.assertEqual(len(sources), 2) + self.assertIsInstance(sources[0], SourceManual) + self.assertIsInstance(sources[1], SourceManual) + self.assertEqual(sources[0].species, "dog") + self.assertEqual(sources[1].species, "cat") + class CreatePostersTests(unittest.TestCase): def test_debug_returns_debug_poster(self): @@ -64,6 +115,5 @@ def test_debug_returns_debug_poster(self): self.assertEqual(posters[0].platform_name, "Debug") - if __name__ == "__main__": unittest.main() diff --git a/tests/test_pet_links.py b/tests/test_pet_links.py index 0348f54..3a35649 100644 --- a/tests/test_pet_links.py +++ b/tests/test_pet_links.py @@ -71,18 +71,22 @@ class ParseAnimalIntegrationTests(unittest.TestCase): def setUp(self): self.source = SourceRescueGroups(api_key="dummy") + self.species_by_id = {"8": {"plural": "dogs"}} def _animal(self): return { "type": "animals", "id": "22506352", "attributes": {"name": "Ketchup", "breedString": "Lab Mix"}, - "relationships": {"orgs": {"data": [{"type": "orgs", "id": "org1"}]}}, + "relationships": { + "orgs": {"data": [{"type": "orgs", "id": "org1"}]}, + "species": {"data": [{"type": "species", "id": "8"}]}, + }, } def test_toolkit_org_gets_deep_link(self): orgs = {"org1": {"city": "Sterling", "state": "MA", "url": "https://sterlingshelter.org/"}} - pet = self.source._parse_animal(self._animal(), orgs) + pet = self.source._parse_animal(self._animal(), orgs, self.species_by_id) self.assertEqual( pet.adoption_url, "https://sterlingshelter.org/pet-finder/#action_0=pet&animalID_0=22506352&petIndex_0=-1", @@ -90,7 +94,7 @@ def test_toolkit_org_gets_deep_link(self): def test_non_toolkit_org_keeps_landing_url(self): orgs = {"org1": {"city": "Boston", "state": "MA", "url": "https://www.mspca.org/"}} - pet = self.source._parse_animal(self._animal(), orgs) + pet = self.source._parse_animal(self._animal(), orgs, self.species_by_id) self.assertEqual(pet.adoption_url, "https://www.mspca.org/") diff --git a/tests/test_rescue_groups.py b/tests/test_rescue_groups.py index 8f0995a..7f630da 100644 --- a/tests/test_rescue_groups.py +++ b/tests/test_rescue_groups.py @@ -1,9 +1,13 @@ import unittest +from unittest.mock import MagicMock, patch -from adoption_sources.rescue_groups import SourceRescueGroups +from adoption_sources.rescue_groups import ( + SourceRescueGroups, + _build_species_filters, +) -def _make_animal(adoption_url=None, **extra_attrs): +def _make_animal(adoption_url=None, species_id="8", **extra_attrs): attrs = { "name": "Buddy", "breedString": "Lab Mix", @@ -16,7 +20,10 @@ def _make_animal(adoption_url=None, **extra_attrs): "type": "animals", "id": "12345", "attributes": attrs, - "relationships": {"orgs": {"data": [{"type": "orgs", "id": "org1"}]}}, + "relationships": { + "orgs": {"data": [{"type": "orgs", "id": "org1"}]}, + "species": {"data": [{"type": "species", "id": species_id}]}, + }, } @@ -29,15 +36,40 @@ def _make_org(adoption_url=None, url=None): return attrs +def _make_species_by_id(plural="dogs", species_id="8"): + return {species_id: {"plural": plural}} + + +class BuildSpeciesFiltersTests(unittest.TestCase): + def test_two_species_uses_or_filter_processing(self): + filters, filter_processing = _build_species_filters(("dogs", "cats")) + + self.assertEqual( + filters, + [ + {"fieldName": "species.plural", "operation": "equal", "criteria": "dogs"}, + {"fieldName": "species.plural", "operation": "equal", "criteria": "cats"}, + ], + ) + self.assertEqual(filter_processing, "1 OR 2") + + def test_single_species(self): + filters, filter_processing = _build_species_filters(("dogs",)) + + self.assertEqual(len(filters), 1) + self.assertEqual(filter_processing, "1") + + class AdoptionUrlTests(unittest.TestCase): def setUp(self): self.source = SourceRescueGroups(api_key="dummy") + self.species_by_id = _make_species_by_id() def test_uses_pet_adoption_url_when_present(self): animal = _make_animal(adoption_url="https://pet.example.com/buddy") orgs = {"org1": _make_org(adoption_url="https://org.example.com", url="https://org.example.com/fallback")} - pet = self.source._parse_animal(animal, orgs) + pet = self.source._parse_animal(animal, orgs, self.species_by_id) self.assertEqual(pet.adoption_url, "https://pet.example.com/buddy") @@ -45,7 +77,7 @@ def test_falls_back_to_org_adoption_url_when_pet_has_none(self): animal = _make_animal() orgs = {"org1": _make_org(adoption_url="https://org.example.com/adopt", url="https://org.example.com")} - pet = self.source._parse_animal(animal, orgs) + pet = self.source._parse_animal(animal, orgs, self.species_by_id) self.assertEqual(pet.adoption_url, "https://org.example.com/adopt") @@ -53,11 +85,41 @@ def test_falls_back_to_org_url_when_neither_pet_nor_org_has_adoption_url(self): animal = _make_animal() orgs = {"org1": _make_org(url="https://org.example.com")} - pet = self.source._parse_animal(animal, orgs) + pet = self.source._parse_animal(animal, orgs, self.species_by_id) self.assertEqual(pet.adoption_url, "https://org.example.com") +class SpeciesParsingTests(unittest.TestCase): + def setUp(self): + self.source = SourceRescueGroups(api_key="dummy") + self.orgs = {"org1": _make_org(url="https://org.example.com")} + + def test_dog_species_from_included(self): + animal = _make_animal(species_id="8") + species_by_id = _make_species_by_id(plural="dogs", species_id="8") + + pet = self.source._parse_animal(animal, self.orgs, species_by_id) + + self.assertEqual(pet.species, "dog") + + def test_cat_species_from_included(self): + animal = _make_animal(species_id="3") + species_by_id = _make_species_by_id(plural="cats", species_id="3") + + pet = self.source._parse_animal(animal, self.orgs, species_by_id) + + self.assertEqual(pet.species, "cat") + + def test_skips_unconfigured_species(self): + animal = _make_animal(species_id="99") + species_by_id = _make_species_by_id(plural="rabbits", species_id="99") + + pet = self.source._parse_animal(animal, self.orgs, species_by_id) + + self.assertIsNone(pet) + + class PlaceholderNameTests(unittest.TestCase): def setUp(self): self.source = SourceRescueGroups(api_key="dummy") @@ -66,10 +128,37 @@ def test_more_dogs_soon_is_placeholder(self): self.assertTrue(self.source._is_placeholder_name("More Dogs Soon!")) self.assertTrue(self.source._is_placeholder_name("MORE DOGS SOON!")) + def test_more_cats_soon_is_placeholder(self): + self.assertTrue(self.source._is_placeholder_name("More Cats Soon!")) + self.assertTrue(self.source._is_placeholder_name("MORE CATS SOON!")) + def test_real_pet_name_is_not_placeholder(self): self.assertFalse(self.source._is_placeholder_name("Pippin")) self.assertFalse(self.source._is_placeholder_name("Buddy")) +class FetchPetsRequestTests(unittest.TestCase): + @patch("adoption_sources.rescue_groups._session_with_retries") + def test_posts_single_multi_species_request(self, mock_session_factory): + mock_session = MagicMock() + mock_session_factory.return_value = mock_session + mock_response = MagicMock() + mock_response.json.return_value = {"data": [], "included": []} + mock_session.post.return_value = mock_response + + source = SourceRescueGroups(api_key="dummy") + list(source.fetch_pets()) + + mock_session.post.assert_called_once() + url = mock_session.post.call_args.args[0] + payload = mock_session.post.call_args.kwargs["json"] + + self.assertIn("/available/haspic", url) + self.assertNotIn("/dogs/", url) + self.assertIn("include=orgs,breeds,locations,species", url) + self.assertEqual(payload["data"]["filterProcessing"], "1 OR 2") + self.assertEqual(len(payload["data"]["filters"]), 2) + + if __name__ == "__main__": unittest.main()