diff --git a/clear_tidal_account.py b/clear_tidal_account.py new file mode 100755 index 0000000..51fe78c --- /dev/null +++ b/clear_tidal_account.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 +""" +Test script to clear all saved albums, playlists, and favorites from Tidal account. + +WARNING: This will permanently delete all your Tidal favorites, saved albums, and user-created playlists! +Use this only for testing purposes on a test account. + +This script integrates with the spotify_to_tidal configuration system: +- Uses the same Tidal authentication flow as the main application +- Loads config.yml (or custom config file) for consistency +- Reuses existing .session.yml file for Tidal authentication +- Does not require Spotify credentials since it only touches Tidal + +Usage: + ./clear_tidal_account.py # Use default config.yml + ./clear_tidal_account.py --config test.yml # Use custom config file + +The script will: +1. Load the config file (optional, mainly for consistency) +2. Authenticate with Tidal using existing session or OAuth flow +3. Clear all favorite tracks from your Tidal library +4. Clear all saved albums from your Tidal collection +5. Delete all user-created playlists (not system playlists like "My Mix") + +Multiple confirmation prompts ensure you don't accidentally delete everything. +""" + +import asyncio +import sys +import yaml +import argparse +from pathlib import Path + +# Add the src directory to the path so we can import the modules +sys.path.insert(0, str(Path(__file__).parent / 'src')) + +from spotify_to_tidal.auth import open_tidal_session +from spotify_to_tidal.tidalapi_patch import get_all_favorites, get_all_playlists, get_all_saved_albums +import tidalapi +from tqdm import tqdm + + +async def clear_favorites(session: tidalapi.Session): + """Clear all favorite tracks""" + print("Fetching all favorite tracks...") + favorites = await get_all_favorites(session.user.favorites) + + if not favorites: + print("No favorite tracks to clear.") + return + + print(f"Found {len(favorites)} favorite tracks. Clearing...") + + # Remove favorites in chunks to avoid API rate limits + chunk_size = 20 + with tqdm(desc="Removing favorite tracks", total=len(favorites)) as progress: + for i in range(0, len(favorites), chunk_size): + chunk = favorites[i:i + chunk_size] + track_ids = [track.id for track in chunk] + + # Remove tracks from favorites + for track_id in track_ids: + try: + session.user.favorites.remove_track(track_id) + except Exception as e: + print(f"Error removing track {track_id}: {e}") + + progress.update(len(chunk)) + + print("✓ All favorite tracks cleared.") + + +async def clear_saved_albums(session: tidalapi.Session): + """Clear all saved albums""" + print("Fetching all saved albums...") + albums = await get_all_saved_albums(session.user) + + if not albums: + print("No saved albums to clear.") + return + + print(f"Found {len(albums)} saved albums. Clearing...") + + # Remove albums in chunks + chunk_size = 20 + with tqdm(desc="Removing saved albums", total=len(albums)) as progress: + for i in range(0, len(albums), chunk_size): + chunk = albums[i:i + chunk_size] + + for album in chunk: + try: + session.user.favorites.remove_album(album.id) + except Exception as e: + print(f"Error removing album {album.id} ({album.name}): {e}") + + progress.update(len(chunk)) + + print("✓ All saved albums cleared.") + + +async def clear_user_playlists(session: tidalapi.Session): + """Clear all user-created playlists (not system playlists)""" + print("Fetching all playlists...") + playlists = await get_all_playlists(session.user) + + # Filter to only user-created playlists (not system ones like "My Mix") + user_playlists = [p for p in playlists if isinstance(p, tidalapi.UserPlaylist) and p.creator.id == session.user.id] + + if not user_playlists: + print("No user-created playlists to clear.") + return + + print(f"Found {len(user_playlists)} user-created playlists:") + for playlist in user_playlists: + print(f" - {playlist.name} ({playlist.num_tracks} tracks)") + + # Ask for confirmation since this is destructive + response = input(f"\nAre you sure you want to DELETE all {len(user_playlists)} user playlists? (yes/no): ") + if response.lower() != 'yes': + print("Playlist deletion cancelled.") + return + + print("Deleting playlists...") + with tqdm(desc="Deleting playlists", total=len(user_playlists)) as progress: + for playlist in user_playlists: + try: + playlist.delete() + print(f"✓ Deleted playlist: {playlist.name}") + except Exception as e: + print(f"✗ Error deleting playlist {playlist.name}: {e}") + + progress.update(1) + + print("✓ All user playlists cleared.") + + +async def main(): + """Main function to clear all Tidal account data""" + # Parse command line arguments + parser = argparse.ArgumentParser(description="Clear all data from Tidal account") + parser.add_argument('--config', default='config.yml', help='location of the config file') + args = parser.parse_args() + + print("🚨 WARNING: This script will permanently delete ALL of the following from your Tidal account:") + print(" - All favorite tracks") + print(" - All saved albums") + print(" - All user-created playlists") + print("\nThis action CANNOT be undone!") + + response = input("\nAre you absolutely sure you want to proceed? (type 'DELETE' to confirm): ") + if response != 'DELETE': + print("Operation cancelled. Your Tidal account is unchanged.") + return + + try: + # Load config if available (mainly for consistency with main app) + try: + with open(args.config, 'r') as f: + config = yaml.safe_load(f) + print(f"✓ Loaded config from {args.config}") + except FileNotFoundError: + print(f"Config file {args.config} not found, proceeding without it...") + config = None + except Exception as e: + print(f"Warning: Could not load config: {e}") + config = None + + # Get Tidal session (same way as main app) + print("\nAuthenticating with Tidal...") + session = open_tidal_session(config.get('tidal') if config else None) + + if not session.check_login(): + print("❌ Could not connect to Tidal") + return 1 + + print(f"✓ Authenticated as: {session.user.first_name} {session.user.last_name}") + + # Clear favorites + print("\n" + "="*50) + await clear_favorites(session) + + # Clear saved albums + print("\n" + "="*50) + await clear_saved_albums(session) + + # Clear user playlists + print("\n" + "="*50) + await clear_user_playlists(session) + + print("\n" + "="*50) + print("🎉 Tidal account successfully cleared!") + print("Your account now has no favorites, saved albums, or user playlists.") + + except Exception as e: + print(f"\n❌ Error: {e}") + return 1 + + return 0 + + +if __name__ == "__main__": + exit_code = asyncio.run(main()) + sys.exit(exit_code) \ No newline at end of file diff --git a/example_config.yml b/example_config.yml index e03971d..6bed742 100644 --- a/example_config.yml +++ b/example_config.yml @@ -15,11 +15,16 @@ spotify: #excluded_playlists: # - spotify:playlist:1ABCDEqsABCD6EaABCDa0a -# default setting for syncing favorites when no command line arguments are provided -# - when true: favorites will be synced by default (overriden when any command line arg provided) -# - when false: favorites can only be synced manually via --sync-favorites argument -sync_favorites_default: true +# default syncing settings for syncing when no command line arguments are provided +sync_playlists_default: true # default to sync playlists +sync_favorites_default: true # default to sync favorites +sync_albums_default: true # default to sync albums # increasing these parameters should increase the search speed, while decreasing reduces likelihood of 429 errors max_concurrency: 10 # max concurrent connections at any given time rate_limit: 10 # max sustained connections per second + +# fuzzy matching settings - helps find tracks with slightly different names/spellings +enable_fuzzy_matching: true # set to false to disable fuzzy matching (uses only exact matching) +fuzzy_name_threshold: 0.80 # similarity threshold for track names (0.0-1.0, higher = more strict) +fuzzy_artist_threshold: 0.75 # similarity threshold for artist names (0.0-1.0, higher = more strict) diff --git a/readme.md b/readme.md index 38f9d2d..8547225 100644 --- a/readme.md +++ b/readme.md @@ -18,23 +18,37 @@ Setup Usage ---- -To synchronize all of your Spotify playlists with your Tidal account run the following from the project root directory +To synchronize all of your Spotify playlists, favourites and albums with your Tidal account run the following from the project root directory Windows ignores python module paths by default, but you can run them using `python3 -m spotify_to_tidal` ```bash spotify_to_tidal ``` -You can also just synchronize a specific playlist by doing the following: +Use `--sync-playlists`, `--sync-favorites` and/or `--sync-albums` to limit the sync to one or more types. For example: + +Synchronise your 'Liked Songs': ```bash -spotify_to_tidal --uri 1ABCDEqsABCD6EaABCDa0a # accepts playlist id or full playlist uri +spotify_to_tidal --sync-favorites ``` -or sync just your 'Liked Songs' with: +Synchronize your saved albums: ```bash -spotify_to_tidal --sync-favorites +spotify_to_tidal --sync-albums +``` + +Options can be combined, for example: + +```bash +spotify_to_tidal --sync-favorites --sync-albums +``` + +You can also just synchronize a specific playlist by doing the following: + +```bash +spotify_to_tidal --uri 1ABCDEqsABCD6EaABCDa0a # accepts playlist id or full playlist uri ``` See example_config.yml for more configuration options, and `spotify_to_tidal --help` for more options. diff --git a/src/spotify_to_tidal/__main__.py b/src/spotify_to_tidal/__main__.py index 8a95fc6..819332b 100644 --- a/src/spotify_to_tidal/__main__.py +++ b/src/spotify_to_tidal/__main__.py @@ -9,37 +9,60 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', default='config.yml', help='location of the config file') parser.add_argument('--uri', help='synchronize a specific URI instead of the one in the config') + parser.add_argument('--sync-playlists', action=argparse.BooleanOptionalAction, help='synchronize the playlists') parser.add_argument('--sync-favorites', action=argparse.BooleanOptionalAction, help='synchronize the favorites') + parser.add_argument('--sync-albums', action=argparse.BooleanOptionalAction, help='synchronize saved albums') args = parser.parse_args() with open(args.config, 'r') as f: config = yaml.safe_load(f) + + # Determine what to sync based on arguments and config + # If no sync options are specified via CLI args, default to syncing everything + # unless config defaults override this behavior + any_sync_args_specified = any([ + args.sync_playlists is not None, + args.sync_favorites is not None, + args.sync_albums is not None + ]) + + if any_sync_args_specified: + # Explicit args provided - only sync what's explicitly enabled + sync_playlists = args.sync_playlists if args.sync_playlists is not None else False + sync_favorites = args.sync_favorites if args.sync_favorites is not None else False + sync_albums = args.sync_albums if args.sync_albums is not None else False + else: + # No explicit args - use config defaults, but default to True if config doesn't specify + sync_playlists = config.get('sync_playlists_default', True) + sync_favorites = config.get('sync_favorites_default', True) + sync_albums = config.get('sync_albums_default', True) + print("Opening Spotify session") spotify_session = _auth.open_spotify_session(config['spotify']) print("Opening Tidal session") tidal_session = _auth.open_tidal_session() if not tidal_session.check_login(): sys.exit("Could not connect to Tidal") - if args.uri: - # if a playlist ID is explicitly provided as a command line argument then use that - spotify_playlist = spotify_session.playlist(args.uri) - tidal_playlists = _sync.get_tidal_playlists_wrapper(tidal_session) - tidal_playlist = _sync.pick_tidal_playlist_for_spotify_playlist(spotify_playlist, tidal_playlists) - _sync.sync_playlists_wrapper(spotify_session, tidal_session, [tidal_playlist], config) - sync_favorites = args.sync_favorites # only sync favorites if command line argument explicitly passed - elif args.sync_favorites: - sync_favorites = True # sync only the favorites - elif config.get('sync_playlists', None): - # if the config contains a sync_playlists list of mappings then use that - _sync.sync_playlists_wrapper(spotify_session, tidal_session, _sync.get_playlists_from_config(spotify_session, tidal_session, config), config) - sync_favorites = args.sync_favorites is None and config.get('sync_favorites_default', True) - else: - # otherwise sync all the user playlists in the Spotify account and favorites unless explicitly disabled - _sync.sync_playlists_wrapper(spotify_session, tidal_session, _sync.get_user_playlist_mappings(spotify_session, tidal_session, config), config) - sync_favorites = args.sync_favorites is None and config.get('sync_favorites_default', True) + + if sync_playlists: + if args.uri: + # if a playlist ID is explicitly provided as a command line argument then use that + spotify_playlist = spotify_session.playlist(args.uri) + tidal_playlists = _sync.get_tidal_playlists_wrapper(tidal_session) + playlist_mapping = _sync.pick_tidal_playlist_for_spotify_playlist(spotify_playlist, tidal_playlists) + _sync.sync_playlists_wrapper(spotify_session, tidal_session, [playlist_mapping], config) + elif config.get('sync_playlists', None): + # if the config contains a sync_playlists list of mappings then use that + _sync.sync_playlists_wrapper(spotify_session, tidal_session, _sync.get_playlists_from_config(spotify_session, tidal_session, config), config) + else: + # otherwise sync all the user playlists in the Spotify account + _sync.sync_playlists_wrapper(spotify_session, tidal_session, _sync.get_user_playlist_mappings(spotify_session, tidal_session, config), config) if sync_favorites: _sync.sync_favorites_wrapper(spotify_session, tidal_session, config) + + if sync_albums: + _sync.sync_albums_wrapper(spotify_session, tidal_session, config) if __name__ == '__main__': main() diff --git a/src/spotify_to_tidal/cache.py b/src/spotify_to_tidal/cache.py index 1ccb300..64ed3e4 100644 --- a/src/spotify_to_tidal/cache.py +++ b/src/spotify_to_tidal/cache.py @@ -79,6 +79,21 @@ def insert(self, mapping: tuple[str, int]): self.data[mapping[0]] = mapping[1] +class AlbumMatchCache: + """ + Non-persistent mapping of spotify album ids -> tidal album ids + This should NOT be accessed concurrently from multiple processes + """ + data: Dict[str, str] = {} + + def get(self, album_id: str) -> str | None: + return self.data.get(album_id, None) + + def insert(self, mapping: tuple[str, str]): + self.data[mapping[0]] = mapping[1] + + # Main singleton instance failure_cache = MatchFailureDatabase() track_match_cache = TrackMatchCache() +album_match_cache = AlbumMatchCache() diff --git a/src/spotify_to_tidal/sync.py b/src/spotify_to_tidal/sync.py index 4d0c482..db5e469 100755 --- a/src/spotify_to_tidal/sync.py +++ b/src/spotify_to_tidal/sync.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import asyncio -from .cache import failure_cache, track_match_cache +from .cache import failure_cache, track_match_cache, album_match_cache import datetime from difflib import SequenceMatcher from functools import partial @@ -11,7 +11,7 @@ import sys import spotipy import tidalapi -from .tidalapi_patch import add_multiple_tracks_to_playlist, clear_tidal_playlist, get_all_favorites, get_all_playlists, get_all_playlist_tracks +from .tidalapi_patch import add_multiple_tracks_to_playlist, clear_tidal_playlist, get_all_favorites, get_all_playlists, get_all_playlist_tracks, get_all_saved_albums, add_album_to_tidal_collection import time from tqdm.asyncio import tqdm as atqdm from tqdm import tqdm @@ -24,9 +24,34 @@ def normalize(s) -> str: return unicodedata.normalize('NFD', s).encode('ascii', 'ignore').decode('ascii') -def simple(input_string: str) -> str: - # only take the first part of a string before any hyphens or brackets to account for different versions - return input_string.split('-')[0].strip().split('(')[0].strip().split('[')[0].strip() +def simple(input_string: str) -> list[str]: + """ + Simple progressive text normalization for matching across platforms. + Returns two variations: exact (normalized) and simplified (without parentheses). + + Args: + input_string: Text to simplify + + Returns: + List with [exact_normalized, simplified] versions + """ + if not input_string: + return [""] + + text = input_string.strip() + + # Exact: just normalize whitespace and dashes + exact = ' '.join(text.split()).replace('–', '-').replace('—', '-').replace('−', '-') + + # Simplified: remove everything in parentheses/brackets + simplified = text.split('(')[0].split('[')[0].strip() + simplified = ' '.join(simplified.split()).replace('–', '-').replace('—', '-').replace('−', '-') + + # Return both variations, avoiding duplicates + if exact == simplified: + return [exact] + else: + return [exact, simplified] def isrc_match(tidal_track: tidalapi.Track, spotify_track) -> bool: if "isrc" in spotify_track["external_ids"]: @@ -50,7 +75,7 @@ def exclusion_rule(pattern: str, tidal_track: tidalapi.Track, spotify_track: t_s # the simplified version of the Spotify track name must be a substring of the Tidal track name # Try with both un-normalized and then normalized - simple_spotify_track = simple(spotify_track['name'].lower()).split('feat.')[0].strip() + simple_spotify_track = simple(spotify_track['name'])[0].lower().split('feat.')[0].strip() return simple_spotify_track in tidal_track.name.lower() or normalize(simple_spotify_track) in normalize(tidal_track.name.lower()) def artist_match(tidal: tidalapi.Track | tidalapi.Album, spotify) -> bool: @@ -59,6 +84,8 @@ def split_artist_name(artist: str) -> Sequence[str]: return artist.split('&') elif ',' in artist: return artist.split(',') + elif ' and ' in artist.lower(): + return artist.lower().split(' and ') else: return [artist] @@ -70,7 +97,7 @@ def get_tidal_artists(tidal: tidalapi.Track | tidalapi.Album, do_normalize=False else: artist_name = artist.name result.extend(split_artist_name(artist_name)) - return set([simple(x.strip().lower()) for x in result]) + return set([simple(x.strip())[0].lower() for x in result]) def get_spotify_artists(spotify, do_normalize=False) -> Set[str]: result: list[str] = [] @@ -80,7 +107,7 @@ def get_spotify_artists(spotify, do_normalize=False) -> Set[str]: else: artist_name = artist['name'] result.extend(split_artist_name(artist_name)) - return set([simple(x.strip().lower()) for x in result]) + return set([simple(x.strip())[0].lower() for x in result]) # There must be at least one overlapping artist between the Tidal and Spotify track # Try with both un-normalized and then normalized if get_tidal_artists(tidal).intersection(get_spotify_artists(spotify)) != set(): @@ -96,13 +123,17 @@ def match(tidal_track, spotify_track) -> bool: ) def test_album_similarity(spotify_album, tidal_album, threshold=0.6): - return SequenceMatcher(None, simple(spotify_album['name']), simple(tidal_album.name)).ratio() >= threshold and artist_match(tidal_album, spotify_album) + spotify_simple = simple(spotify_album['name'])[0] + tidal_simple = simple(tidal_album.name)[0] + return SequenceMatcher(None, spotify_simple, tidal_simple).ratio() >= threshold and artist_match(tidal_album, spotify_album) async def tidal_search(spotify_track, rate_limiter, tidal_session: tidalapi.Session) -> tidalapi.Track | None: def _search_for_track_in_album(): # search for album name and first album artist if 'album' in spotify_track and 'artists' in spotify_track['album'] and len(spotify_track['album']['artists']): - query = simple(spotify_track['album']['name']) + " " + simple(spotify_track['album']['artists'][0]['name']) + album_simple = simple(spotify_track['album']['name'])[0] + artist_simple = simple(spotify_track['album']['artists'][0]['name'])[0] + query = f"{album_simple} {artist_simple}" album_result = tidal_session.search(query, models=[tidalapi.album.Album]) for album in album_result['albums']: if album.num_tracks >= spotify_track['track_number'] and test_album_similarity(spotify_track['album'], album): @@ -117,7 +148,9 @@ def _search_for_track_in_album(): def _search_for_standalone_track(): # if album search fails then search for track name and first artist - query = simple(spotify_track['name']) + ' ' + simple(spotify_track['artists'][0]['name']) + track_simple = simple(spotify_track['name'])[0] + artist_simple = simple(spotify_track['artists'][0]['name'])[0] + query = f"{track_simple} {artist_simple}" for track in tidal_session.search(query, models=[tidalapi.media.Track])['tracks']: if match(track, spotify_track): failure_cache.remove_match_failure(spotify_track['id']) @@ -257,7 +290,7 @@ async def _run_rate_limiter(semaphore): dt = (t - t0).total_seconds() new_items = round(config.get('rate_limit', 10)*dt) t0 = t - [semaphore.release() for i in range(new_items)] # leak new_items from the 'bucket' + [semaphore.release() for _ in range(new_items)] # leak new_items from the 'bucket' # Extract the new tracks that do not already exist in the old tidal tracklist tracks_to_search = get_new_spotify_tracks(spotify_tracks) @@ -356,6 +389,338 @@ def sync_playlists_wrapper(spotify_session: spotipy.Spotify, tidal_session: tida def sync_favorites_wrapper(spotify_session: spotipy.Spotify, tidal_session: tidalapi.Session, config): asyncio.run(main=sync_favorites(spotify_session=spotify_session, tidal_session=tidal_session, config=config)) +async def sync_albums(spotify_session: spotipy.Spotify, tidal_session: tidalapi.Session, config: dict): + """ sync saved albums from Spotify to Tidal """ + async def get_albums_from_spotify_saved() -> List[dict]: + async def _fetch_all_albums_from_spotify_in_chunks(fetch_function: Callable) -> List[dict]: + output = [] + results = fetch_function(0) + output.extend([item['album'] for item in results['items'] if item['album'] is not None]) + + # Get all the remaining albums in parallel + if results['next']: + offsets = [results['limit'] * n for n in range(1, math.ceil(results['total'] / results['limit']))] + extra_results = await atqdm.gather( + *[asyncio.to_thread(fetch_function, offset) for offset in offsets], + desc="Fetching additional data chunks" + ) + for extra_result in extra_results: + output.extend([item['album'] for item in extra_result['items'] if item['album'] is not None]) + + return output + + _get_saved_albums = lambda offset: spotify_session.current_user_saved_albums(offset=offset) + albums = await repeat_on_request_error(_fetch_all_albums_from_spotify_in_chunks, _get_saved_albums) + albums.reverse() + return albums + + def get_new_tidal_albums() -> List[str]: + existing_album_ids = set([album.id for album in old_tidal_albums]) + new_ids = [] + for spotify_album in spotify_albums: + match_id = album_match_cache.get(spotify_album['id']) + if match_id and not match_id in existing_album_ids: + new_ids.append(match_id) + return new_ids + + print("Loading saved albums from Spotify") + spotify_albums = await get_albums_from_spotify_saved() + print("Loading existing albums from Tidal") + old_tidal_albums = await get_all_saved_albums(tidal_session.user) + populate_album_match_cache(spotify_albums, old_tidal_albums, config) + await search_new_albums_on_tidal(tidal_session, spotify_albums, config) + new_tidal_album_ids = get_new_tidal_albums() + + if new_tidal_album_ids: + for tidal_id in tqdm(new_tidal_album_ids, desc="Adding new albums to Tidal"): + add_album_to_tidal_collection(tidal_session, tidal_id) + else: + print("No new albums to add to Tidal") + +def album_match(tidal_album: tidalapi.Album, spotify_album: dict, config: dict = None) -> bool: + """ Check if a Tidal album matches a Spotify album using progressive matching """ + + # Get progressive simplifications for album names (preserve edition info) + spotify_variations = simple(spotify_album['name']) + tidal_variations = simple(tidal_album.name) + + fuzzy_threshold = config.get('fuzzy_name_threshold', 0.80) if config else 0.80 + + # Try each combination of variations (strictest first) + album_name_matches = False + for spotify_name in spotify_variations: + for tidal_name in tidal_variations: + spotify_lower = spotify_name.lower() + tidal_lower = tidal_name.lower() + + # Exact substring match + if spotify_lower in tidal_lower or tidal_lower in spotify_lower: + album_name_matches = True + break + + # Unicode normalized match + norm_spotify = normalize(spotify_lower) + norm_tidal = normalize(tidal_lower) + if norm_spotify in norm_tidal or norm_tidal in norm_spotify: + album_name_matches = True + break + + # Fuzzy matching (if enabled) + if config and config.get('enable_fuzzy_matching', False): + similarity = SequenceMatcher(None, spotify_lower, tidal_lower).ratio() + norm_similarity = SequenceMatcher(None, norm_spotify, norm_tidal).ratio() + + if similarity >= fuzzy_threshold or norm_similarity >= fuzzy_threshold: + album_name_matches = True + break + + if album_name_matches: + break + + if not album_name_matches: + return False + + # Artist matching using progressive simplification + def get_artists(album): + """Extract artist names from an album""" + if hasattr(album, 'artists'): # Tidal album + return [artist.name for artist in album.artists] + else: # Spotify album + return [artist['name'] for artist in album['artists']] + + def split_artists(artist_names): + """Split artist names on common separators""" + result = [] + for artist_name in artist_names: + if '&' in artist_name: + result.extend(artist_name.split('&')) + elif ',' in artist_name: + result.extend(artist_name.split(',')) + elif ' and ' in artist_name.lower(): + result.extend([part for part in artist_name.lower().split(' and ')]) + else: + result.append(artist_name) + return [name.strip() for name in result] + + # Get all artist variations for both albums + tidal_artists = split_artists(get_artists(tidal_album)) + spotify_artists = split_artists(get_artists(spotify_album)) + + fuzzy_artist_threshold = config.get('fuzzy_artist_threshold', 0.75) if config else 0.75 + + # Try progressive matching for artists + for tidal_artist in tidal_artists: + tidal_variations = simple(tidal_artist) + + for spotify_artist in spotify_artists: + spotify_variations = simple(spotify_artist) + + # Try each combination of variations + for tidal_var in tidal_variations: + for spotify_var in spotify_variations: + tidal_lower = tidal_var.lower() + spotify_lower = spotify_var.lower() + + # Exact match + if tidal_lower == spotify_lower: + return True + + # Substring match + if tidal_lower in spotify_lower or spotify_lower in tidal_lower: + return True + + # Unicode normalized match + norm_tidal = normalize(tidal_lower) + norm_spotify = normalize(spotify_lower) + if norm_tidal == norm_spotify: + return True + + # Fuzzy matching + if config and config.get('enable_fuzzy_matching', False): + similarity = SequenceMatcher(None, tidal_lower, spotify_lower).ratio() + norm_similarity = SequenceMatcher(None, norm_tidal, norm_spotify).ratio() + + if similarity >= fuzzy_artist_threshold or norm_similarity >= fuzzy_artist_threshold: + return True + + return False + +def populate_album_match_cache(spotify_albums: Sequence[dict], tidal_albums: Sequence[tidalapi.Album], config: dict = None): + """ + Populate the album match cache with existing albums. + Optimized to O(n*m) complexity using sets to track matched albums. + """ + # Track which albums have already been matched to avoid duplicates + matched_spotify_ids = set() + matched_tidal_ids = set() + + # First pass: match tidal albums to spotify albums + for tidal_album in tidal_albums: + if tidal_album.id in matched_tidal_ids: + continue + + for spotify_album in spotify_albums: + if spotify_album['id'] in matched_spotify_ids: + continue + + if album_match(tidal_album, spotify_album, config): + album_match_cache.insert((spotify_album['id'], tidal_album.id)) + matched_spotify_ids.add(spotify_album['id']) + matched_tidal_ids.add(tidal_album.id) + break + + # Second pass: match remaining spotify albums to remaining tidal albums + for spotify_album in spotify_albums: + if spotify_album['id'] in matched_spotify_ids: + continue + + for tidal_album in tidal_albums: + if tidal_album.id in matched_tidal_ids: + continue + + if album_match(tidal_album, spotify_album, config): + album_match_cache.insert((spotify_album['id'], tidal_album.id)) + matched_spotify_ids.add(spotify_album['id']) + matched_tidal_ids.add(tidal_album.id) + break + +async def search_new_albums_on_tidal(tidal_session: tidalapi.Session, spotify_albums: Sequence[dict], config: dict): + """ Search for Spotify albums on Tidal and cache the results """ + def get_new_spotify_albums(spotify_albums: Sequence[dict]) -> List[dict]: + results = [] + for spotify_album in spotify_albums: + if not spotify_album['id']: continue + if not album_match_cache.get(spotify_album['id']): + results.append(spotify_album) + return results + + async def tidal_album_search(spotify_album, rate_limiter, tidal_session: tidalapi.Session) -> tidalapi.Album | None: + if not ('artists' in spotify_album and len(spotify_album['artists'])): + return None + + # Progressive search strategy - try stronger matches first, then loosen + search_queries = [] + album_name = spotify_album['name'] + artist_name = spotify_album['artists'][0]['name'] + + # Get progressive variations for both album and artist + album_variations = simple(album_name) + artist_variations = simple(artist_name) + + # Create search queries from combinations of variations + for album_var in album_variations: + for artist_var in artist_variations: + # Full search (album + artist) + search_queries.append(f"{album_var} {artist_var}") + + # Album + simplified artist (first part only) + artist_first_part = artist_var.split('&')[0].strip().split(' and ')[0].strip() + if artist_first_part != artist_var: + search_queries.append(f"{album_var} {artist_first_part}") + + # Album only search with the most simplified version + if album_variations: + search_queries.append(album_variations[-1]) # Most simplified version + + # Special case for apostrophes + if "'" in album_name: + no_apostrophe_album = simple(album_name.replace("'", "")) + if no_apostrophe_album and artist_variations: + search_queries.append(f"{no_apostrophe_album[0]} {artist_variations[0]}") + + # Remove duplicates while preserving order + unique_queries = [] + seen = set() + for query in search_queries: + if query not in seen: + unique_queries.append(query) + seen.add(query) + search_queries = unique_queries + + # Try each search query until we find a match + for i, query in enumerate(search_queries): + await rate_limiter.acquire() + try: + album_result = tidal_session.search(query, models=[tidalapi.album.Album]) + if album_result and 'albums' in album_result and len(album_result['albums']) > 0: + print(f" Search query {i+1}/6 '{query}' found {len(album_result['albums'])} results") + for tidal_album in album_result['albums']: + if album_match(tidal_album, spotify_album, config): + print(f" ✓ Match found using query: '{query}'") + return tidal_album + else: + print(f" Search query {i+1}/6 '{query}' found no results") + except Exception as e: + # Continue to next query if this one fails + print(f" Search query {i+1}/6 '{query}' failed: {e}") + continue + + # 6. Last resort: search by artist name only and check all albums + # This handles cases where Tidal search doesn't return albums that exist + await rate_limiter.acquire() + artist_simple = simple(artist_name)[-1] # Most simplified + print(f" Final search: artist-only '{artist_simple}'") + try: + artist_result = tidal_session.search(artist_simple, models=[tidalapi.album.Album]) + if artist_result and 'albums' in artist_result: + print(f" Artist-only search found {len(artist_result['albums'])} albums") + for tidal_album in artist_result['albums']: + if album_match(tidal_album, spotify_album, config): + print(f" ✓ Match found using artist-only search") + return tidal_album + else: + print(f" Artist-only search found no results") + except Exception as e: + print(f" Artist-only search for '{artist_simple}' failed: {e}") + + return None + + # Rate limiter setup similar to track search + async def _run_rate_limiter(semaphore): + _sleep_time = config.get('max_concurrency', 10)/config.get('rate_limit', 10)/4 + t0 = datetime.datetime.now() + while True: + await asyncio.sleep(_sleep_time) + t = datetime.datetime.now() + dt = (t - t0).total_seconds() + new_items = round(config.get('rate_limit', 10)*dt) + t0 = t + [semaphore.release() for _ in range(new_items)] + + albums_to_search = get_new_spotify_albums(spotify_albums) + if not albums_to_search: + return + + # Search for each album on Tidal concurrently + task_description = f"Searching Tidal for {len(albums_to_search)}/{len(spotify_albums)} albums" + semaphore = asyncio.Semaphore(config.get('max_concurrency', 10)) + rate_limiter_task = asyncio.create_task(_run_rate_limiter(semaphore)) + search_results = await atqdm.gather(*[repeat_on_request_error(tidal_album_search, a, semaphore, tidal_session) for a in albums_to_search], desc=task_description) + rate_limiter_task.cancel() + + # Add search results to cache + albums_not_found = [] + for idx, spotify_album in enumerate(albums_to_search): + if search_results[idx]: + album_match_cache.insert((spotify_album['id'], search_results[idx].id)) + else: + album_info = f"{spotify_album['id']}: {','.join([a['name'] for a in spotify_album['artists']])} - {spotify_album['name']}" + albums_not_found.append(album_info) + color = ('\033[91m', '\033[0m') + print(color[0] + "Could not find album " + album_info + color[1]) + + # Log albums not found + if albums_not_found: + file_name = "albums not found.txt" + header = f"==========================\nSaved Albums Sync\n==========================\n" + with open(file_name, "a", encoding="utf-8") as file: + file.write(header) + for album in albums_not_found: + file.write(f"{album}\n") + +def sync_albums_wrapper(spotify_session: spotipy.Spotify, tidal_session: tidalapi.Session, config: dict): + asyncio.run(sync_albums(spotify_session, tidal_session, config)) + def get_tidal_playlists_wrapper(tidal_session: tidalapi.Session) -> Mapping[str, tidalapi.Playlist]: tidal_playlists = asyncio.run(get_all_playlists(tidal_session.user)) return {playlist.name: playlist for playlist in tidal_playlists} diff --git a/src/spotify_to_tidal/tidalapi_patch.py b/src/spotify_to_tidal/tidalapi_patch.py index 9da23cc..ecbf352 100644 --- a/src/spotify_to_tidal/tidalapi_patch.py +++ b/src/spotify_to_tidal/tidalapi_patch.py @@ -77,3 +77,12 @@ async def get_all_playlist_tracks(playlist: tidalapi.Playlist, chunk_size: int=2 print(f"Loading tracks from Tidal playlist '{playlist.name}'") return await _get_all_chunks(f"{playlist._base_url%playlist.id}/tracks", session=playlist.session, parser=playlist.session.parse_track, params=params) +async def get_all_saved_albums(user: tidalapi.User, chunk_size: int=20) -> List[tidalapi.Album]: + """ Get all saved albums from Tidal user favorites """ + print(f"Loading saved albums from Tidal") + return await asyncio.to_thread(user.favorites.albums_paginated) + +def add_album_to_tidal_collection(session: tidalapi.Session, album_id: str): + """ Add album to user's Tidal favorites """ + return session.user.favorites.add_album(album_id) + diff --git a/src/spotify_to_tidal/type/config.py b/src/spotify_to_tidal/type/config.py index 01fd1ad..248576e 100644 --- a/src/spotify_to_tidal/type/config.py +++ b/src/spotify_to_tidal/type/config.py @@ -24,3 +24,10 @@ class SyncConfig(TypedDict): spotify: SpotifyConfig sync_playlists: Optional[List[PlaylistConfig]] excluded_playlists: Optional[List[str]] + sync_favorites_default: Optional[bool] + sync_albums_default: Optional[bool] + max_concurrency: Optional[int] + rate_limit: Optional[int] + enable_fuzzy_matching: Optional[bool] + fuzzy_name_threshold: Optional[float] + fuzzy_artist_threshold: Optional[float] diff --git a/tests/unit/test_album_sync.py b/tests/unit/test_album_sync.py new file mode 100644 index 0000000..9f16948 --- /dev/null +++ b/tests/unit/test_album_sync.py @@ -0,0 +1,446 @@ +import pytest +import sys +from pathlib import Path + +# Add the src directory to the path so we can import the modules +sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'src')) + +from spotify_to_tidal.sync import album_match, populate_album_match_cache +from spotify_to_tidal.cache import album_match_cache + + +class MockTidalAlbum: + """Mock Tidal album for testing""" + def __init__(self, name, artists=None, album_id="12345"): + self.name = name + self.artists = [MockTidalArtist(artist) for artist in (artists or [])] + self.id = album_id + + +class MockTidalArtist: + """Mock Tidal artist for testing""" + def __init__(self, name): + self.name = name + + +class TestAlbumMatching: + """Test album matching functionality""" + + @pytest.fixture + def config_with_fuzzy(self): + return { + 'enable_fuzzy_matching': True, + 'fuzzy_name_threshold': 0.80 + } + + @pytest.fixture + def config_without_fuzzy(self): + return { + 'enable_fuzzy_matching': False + } + + def test_exact_album_match(self, config_with_fuzzy, config_without_fuzzy): + """Test that exact album matches work with both configs""" + tidal_album = MockTidalAlbum("OK Computer", ["Radiohead"]) + spotify_album = { + "name": "OK Computer", + "artists": [{"name": "Radiohead"}] + } + + assert album_match(tidal_album, spotify_album, config_with_fuzzy) + assert album_match(tidal_album, spotify_album, config_without_fuzzy) + + def test_fuzzy_album_name_match(self, config_with_fuzzy, config_without_fuzzy): + """Test fuzzy matching for album names with minor typos""" + tidal_album = MockTidalAlbum("OK Computer", ["Radiohead"]) + spotify_album = { + "name": "OK Komputer", # Minor typo that should fuzzy match + "artists": [{"name": "Radiohead"}] + } + + # Should match with fuzzy enabled but not without + assert album_match(tidal_album, spotify_album, config_with_fuzzy) + assert not album_match(tidal_album, spotify_album, config_without_fuzzy) + + def test_album_artist_mismatch(self, config_with_fuzzy): + """Test that albums with different artists don't match""" + tidal_album = MockTidalAlbum("OK Computer", ["Radiohead"]) + spotify_album = { + "name": "OK Computer", + "artists": [{"name": "Coldplay"}] # Different artist + } + + # Should not match even with same name due to artist mismatch + assert not album_match(tidal_album, spotify_album, config_with_fuzzy) + + def test_album_name_normalization(self, config_with_fuzzy): + """Test that unicode normalization works for album names""" + tidal_album = MockTidalAlbum("Ágætis byrjun", ["Sigur Rós"]) + spotify_album = { + "name": "Agaetis byrjun", # Without special characters + "artists": [{"name": "Sigur Ros"}] + } + + # Should match due to normalization + assert album_match(tidal_album, spotify_album, config_with_fuzzy) + + def test_album_substring_match(self, config_with_fuzzy): + """Test that album substring matching works""" + tidal_album = MockTidalAlbum("The Wall", ["Pink Floyd"]) + spotify_album = { + "name": "The Wall - Remastered", + "artists": [{"name": "Pink Floyd"}] + } + + # Should match even without fuzzy due to substring logic + assert album_match(tidal_album, spotify_album, config_with_fuzzy) + + +class TestAlbumCaching: + """Test album match caching functionality""" + + def setup_method(self): + """Reset the album cache before each test""" + album_match_cache.data = {} + + def test_populate_album_cache_basic(self): + """Test basic album cache population""" + config = {'enable_fuzzy_matching': False} + + spotify_albums = [ + {"id": "spotify_1", "name": "OK Computer", "artists": [{"name": "Radiohead"}]} + ] + + tidal_albums = [ + MockTidalAlbum("OK Computer", ["Radiohead"], "tidal_1") + ] + + populate_album_match_cache(spotify_albums, tidal_albums, config) + + # Should have cached the match + assert album_match_cache.get("spotify_1") == "tidal_1" + + def test_populate_album_cache_no_match(self): + """Test cache population when no matches exist""" + config = {'enable_fuzzy_matching': False} + + spotify_albums = [ + {"id": "spotify_1", "name": "OK Computer", "artists": [{"name": "Radiohead"}]} + ] + + tidal_albums = [ + MockTidalAlbum("Different Album", ["Different Artist"], "tidal_1") + ] + + populate_album_match_cache(spotify_albums, tidal_albums, config) + + # Should not have cached any matches + assert album_match_cache.get("spotify_1") is None + + def test_populate_album_cache_multiple_albums(self): + """Test cache population with multiple albums""" + config = {'enable_fuzzy_matching': False} + + spotify_albums = [ + {"id": "spotify_1", "name": "OK Computer", "artists": [{"name": "Radiohead"}]}, + {"id": "spotify_2", "name": "The Bends", "artists": [{"name": "Radiohead"}]} + ] + + tidal_albums = [ + MockTidalAlbum("OK Computer", ["Radiohead"], "tidal_1"), + MockTidalAlbum("The Bends", ["Radiohead"], "tidal_2") + ] + + populate_album_match_cache(spotify_albums, tidal_albums, config) + + # Should have cached both matches + assert album_match_cache.get("spotify_1") == "tidal_1" + assert album_match_cache.get("spotify_2") == "tidal_2" + + def test_populate_album_cache_fuzzy_matching(self): + """Test cache population with fuzzy matching enabled""" + config = { + 'enable_fuzzy_matching': True, + 'fuzzy_name_threshold': 0.80 + } + + spotify_albums = [ + {"id": "spotify_1", "name": "OK Computer", "artists": [{"name": "Radiohead"}]} + ] + + tidal_albums = [ + MockTidalAlbum("OK Computer (Collector's Edition)", ["Radiohead"], "tidal_1") + ] + + populate_album_match_cache(spotify_albums, tidal_albums, config) + + # Should have cached the fuzzy match + assert album_match_cache.get("spotify_1") == "tidal_1" + + +class TestAlbumSyncIntegration: + """Integration tests for album sync components""" + + def setup_method(self): + """Reset the album cache before each test""" + album_match_cache.data = {} + + def test_album_cache_insert_and_retrieve(self): + """Test basic album cache insert and retrieve operations""" + album_match_cache.insert(("spotify_test", "tidal_test")) + + assert album_match_cache.get("spotify_test") == "tidal_test" + assert album_match_cache.get("nonexistent") is None + + def test_album_match_with_multiple_artists(self): + """Test album matching with multiple artists""" + config = {'enable_fuzzy_matching': True, 'fuzzy_artist_threshold': 0.75} + + tidal_album = MockTidalAlbum("Collaboration Album", ["Artist One", "Artist Two"]) + spotify_album = { + "name": "Collaboration Album", + "artists": [{"name": "Artist One"}, {"name": "Artist Two"}] + } + + assert album_match(tidal_album, spotify_album, config) + + def test_album_match_partial_artist_overlap(self): + """Test album matching with partial artist overlap""" + config = {'enable_fuzzy_matching': True, 'fuzzy_artist_threshold': 0.75} + + tidal_album = MockTidalAlbum("Various Artists Album", ["Artist One", "Artist Two"]) + spotify_album = { + "name": "Various Artists Album", + "artists": [{"name": "Artist One"}] # Only one of the artists + } + + # Should match if at least one artist overlaps + assert album_match(tidal_album, spotify_album, config) + + def test_smashing_pumpkins_case(self): + """Test the specific Smashing Pumpkins case that was failing""" + config = { + 'enable_fuzzy_matching': True, + 'fuzzy_name_threshold': 0.80, + 'fuzzy_artist_threshold': 0.75 + } + + tidal_album = MockTidalAlbum("Mellon Collie and the Infinite Sadness", ["Smashing Pumpkins"]) + spotify_album = { + "name": "Mellon Collie And The Infinite Sadness (Deluxe Edition)", + "artists": [{"name": "The Smashing Pumpkins"}] + } + + # Should match with fuzzy matching despite "The" prefix and case differences + assert album_match(tidal_album, spotify_album, config) + + def test_artist_fuzzy_matching_with_the_prefix(self): + """Test fuzzy artist matching when 'The' prefix is different""" + config = { + 'enable_fuzzy_matching': True, + 'fuzzy_artist_threshold': 0.75 + } + + tidal_album = MockTidalAlbum("Test Album", ["Beatles"]) + spotify_album = { + "name": "Test Album", + "artists": [{"name": "The Beatles"}] + } + + # Should match with fuzzy artist matching + assert album_match(tidal_album, spotify_album, config) + + def test_run_dmc_case(self): + """Test the specific Run-DMC case with special characters""" + config = { + 'enable_fuzzy_matching': True, + 'fuzzy_name_threshold': 0.80, + 'fuzzy_artist_threshold': 0.75 + } + + # Test with en-dash vs regular hyphen + tidal_album = MockTidalAlbum("RUN-DMC", ["Run-D.M.C."]) + spotify_album = { + "name": "RUN-DMC (Expanded Edition)", + "artists": [{"name": "Run–D.M.C."}] # Note: en-dash in Spotify + } + + # Should match with fuzzy matching despite en-dash vs hyphen difference + assert album_match(tidal_album, spotify_album, config) + + # Test with simplified vs full punctuation + tidal_album2 = MockTidalAlbum("RUN-DMC", ["Run-DMC"]) + assert album_match(tidal_album2, spotify_album, config) + + def test_stephen_malkmus_case(self): + """Test the specific Stephen Malkmus case with & in artist name""" + config = { + 'enable_fuzzy_matching': True, + 'fuzzy_name_threshold': 0.80, + 'fuzzy_artist_threshold': 0.75 + } + + # Test exact match with & + tidal_album = MockTidalAlbum("Wig Out at Jagbags", ["Stephen Malkmus & The Jicks"]) + spotify_album = { + "name": "Wig Out at Jagbags", + "artists": [{"name": "Stephen Malkmus & The Jicks"}] + } + + assert album_match(tidal_album, spotify_album, config) + + # Test when Tidal only has the main artist (common case) + tidal_album_main_artist = MockTidalAlbum("Wig Out at Jagbags", ["Stephen Malkmus"]) + assert album_match(tidal_album_main_artist, spotify_album, config) + + def test_king_gizzard_case(self): + """Test the specific King Gizzard case with apostrophe and & vs 'and'""" + config = { + 'enable_fuzzy_matching': True, + 'fuzzy_name_threshold': 0.80, + 'fuzzy_artist_threshold': 0.75 + } + + spotify_album = { + "name": "I'm In Your Mind Fuzz", + "artists": [{"name": "King Gizzard & The Lizard Wizard"}] + } + + # Test exact match with apostrophe and & + tidal_album1 = MockTidalAlbum("I'm In Your Mind Fuzz", ["King Gizzard & The Lizard Wizard"]) + assert album_match(tidal_album1, spotify_album, config) + + # Test without apostrophe + tidal_album2 = MockTidalAlbum("Im In Your Mind Fuzz", ["King Gizzard & The Lizard Wizard"]) + assert album_match(tidal_album2, spotify_album, config) + + # Test with "and" instead of "&" + tidal_album3 = MockTidalAlbum("I'm In Your Mind Fuzz", ["King Gizzard and The Lizard Wizard"]) + assert album_match(tidal_album3, spotify_album, config) + + # Test with shortened artist name + tidal_album4 = MockTidalAlbum("I'm In Your Mind Fuzz", ["King Gizzard"]) + assert album_match(tidal_album4, spotify_album, config) + + def test_ichiko_aoba_case(self): + """Test the specific Ichiko Aoba case that was failing in search""" + config = { + 'enable_fuzzy_matching': True, + 'fuzzy_name_threshold': 0.80, + 'fuzzy_artist_threshold': 0.75 + } + + spotify_album = { + "name": "Asleep Among Endives", + "artists": [{"name": "Ichiko Aoba"}] + } + + # Test exact match + tidal_album1 = MockTidalAlbum("Asleep Among Endives", ["Ichiko Aoba"]) + assert album_match(tidal_album1, spotify_album, config) + + # Test with different casing + tidal_album2 = MockTidalAlbum("asleep among endives", ["ichiko aoba"]) + assert album_match(tidal_album2, spotify_album, config) + + # Test with partial match (album name shortened) + tidal_album3 = MockTidalAlbum("Asleep Among Endives", ["Ichiko Aoba"]) + assert album_match(tidal_album3, spotify_album, config) + + def test_comprehensive_matching_cases(self): + """Comprehensive test covering all the album matching edge cases we've encountered""" + config = { + 'enable_fuzzy_matching': True, + 'fuzzy_name_threshold': 0.80, + 'fuzzy_artist_threshold': 0.75 + } + + test_cases = [ + # Case 1: Nirvana Bleach - colon vs parentheses punctuation + { + 'description': 'Nirvana Bleach : vs ( ) punctuation', + 'spotify': {"name": "Bleach: Deluxe Edition", "artists": [{"name": "Nirvana"}]}, + 'tidal_variants': [ + ("Bleach (Deluxe Edition)", ["Nirvana"], True), # Should match + ("Bleach: Deluxe Edition", ["Nirvana"], True), # Exact match + ] + }, + + # Case 2: Smashing Pumpkins - "The" prefix handling + { + 'description': 'Smashing Pumpkins "The" prefix', + 'spotify': {"name": "Mellon Collie And The Infinite Sadness (Deluxe Edition)", "artists": [{"name": "The Smashing Pumpkins"}]}, + 'tidal_variants': [ + ("Mellon Collie and the Infinite Sadness", ["Smashing Pumpkins"], True), # Should match + ("Mellon Collie And The Infinite Sadness (Deluxe Edition)", ["The Smashing Pumpkins"], True), # Exact + ] + }, + + # Case 3: Run-DMC - en-dash vs hyphen and punctuation variations + { + 'description': 'Run-DMC dash and punctuation variations', + 'spotify': {"name": "RUN-DMC (Expanded Edition)", "artists": [{"name": "Run–D.M.C."}]}, # en-dash + 'tidal_variants': [ + ("RUN-DMC", ["Run-D.M.C."], True), # Should match with fuzzy + ("RUN-DMC", ["Run-DMC"], True), # Should match with fuzzy + ("RUN-DMC (Expanded Edition)", ["Run–D.M.C."], True), # Exact + ] + }, + + # Case 4: Stephen Malkmus - & vs "and" in artist names + { + 'description': 'Stephen Malkmus & vs "and"', + 'spotify': {"name": "Wig Out at Jagbags", "artists": [{"name": "Stephen Malkmus & The Jicks"}]}, + 'tidal_variants': [ + ("Wig Out at Jagbags", ["Stephen Malkmus & The Jicks"], True), # Exact + ("Wig Out at Jagbags", ["Stephen Malkmus and The Jicks"], True), # "and" vs "&" + ("Wig Out at Jagbags", ["Stephen Malkmus"], True), # Main artist only + ] + }, + + # Case 5: King Gizzard - apostrophe and & vs "and" + { + 'description': 'King Gizzard apostrophe and & vs "and"', + 'spotify': {"name": "I'm In Your Mind Fuzz", "artists": [{"name": "King Gizzard & The Lizard Wizard"}]}, + 'tidal_variants': [ + ("I'm In Your Mind Fuzz", ["King Gizzard & The Lizard Wizard"], True), # Exact + ("Im In Your Mind Fuzz", ["King Gizzard & The Lizard Wizard"], True), # No apostrophe + ("I'm In Your Mind Fuzz", ["King Gizzard and The Lizard Wizard"], True), # "and" vs "&" + ("I'm In Your Mind Fuzz", ["King Gizzard"], True), # Shortened artist + ] + }, + + # Case 6: Ichiko Aoba - exact match (would fail in search but should match if found) + { + 'description': 'Ichiko Aoba exact case', + 'spotify': {"name": "Asleep Among Endives", "artists": [{"name": "Ichiko Aoba"}]}, + 'tidal_variants': [ + ("Asleep Among Endives", ["Ichiko Aoba"], True), # Exact + ("asleep among endives", ["ichiko aoba"], True), # Case insensitive + ] + }, + + # Case 7: Fuzzy matching threshold tests + { + 'description': 'Fuzzy matching edge cases', + 'spotify': {"name": "OK Computer", "artists": [{"name": "Radiohead"}]}, + 'tidal_variants': [ + ("OK Komputer", ["Radiohead"], True), # Should match with fuzzy (0.909 > 0.80) + ("Computer OK", ["Radiohead"], False), # Should NOT match (0.727 < 0.80) + ("The OK Computer", ["Radiohead"], True), # Should match (substring) + ] + }, + ] + + for test_case in test_cases: + spotify_album = test_case['spotify'] + print(f"\\nTesting: {test_case['description']}") + + for tidal_name, tidal_artists, should_match in test_case['tidal_variants']: + tidal_album = MockTidalAlbum(tidal_name, tidal_artists) + result = album_match(tidal_album, spotify_album, config) + + print(f" '{spotify_album['name']}' by {[a['name'] for a in spotify_album['artists']]} vs") + print(f" '{tidal_name}' by {tidal_artists} -> {'✓' if result else '✗'}") + + assert result == should_match, f"Expected {should_match}, got {result} for {tidal_name} by {tidal_artists}" \ No newline at end of file diff --git a/tests/unit/test_cache.py b/tests/unit/test_cache.py index 7aa0e15..a6b468f 100644 --- a/tests/unit/test_cache.py +++ b/tests/unit/test_cache.py @@ -5,7 +5,7 @@ import sqlalchemy from sqlalchemy import create_engine, select from unittest import mock -from spotify_to_tidal.cache import MatchFailureDatabase, TrackMatchCache +from spotify_to_tidal.cache import MatchFailureDatabase, TrackMatchCache, AlbumMatchCache # Setup an in-memory SQLite database for testing @@ -77,4 +77,52 @@ def test_track_match_cache_get(): track_cache = TrackMatchCache() track_cache.insert(("spotify_id", 123)) assert track_cache.get("spotify_id") == 123 - assert track_cache.get("nonexistent_id") is None \ No newline at end of file + assert track_cache.get("nonexistent_id") is None + + +# Test AlbumMatchCache +def test_album_match_cache_insert(): + album_cache = AlbumMatchCache() + # Clear any existing data to ensure clean test state + album_cache.data = {} + album_cache.insert(("spotify_album_id", "tidal_album_id")) + assert album_cache.get("spotify_album_id") == "tidal_album_id" + + +def test_album_match_cache_get(): + album_cache = AlbumMatchCache() + # Clear any existing data to ensure clean test state + album_cache.data = {} + album_cache.insert(("spotify_album_id", "tidal_album_id")) + assert album_cache.get("spotify_album_id") == "tidal_album_id" + assert album_cache.get("nonexistent_id") is None + + +def test_album_match_cache_nonexistent_key(): + album_cache = AlbumMatchCache() + # Clear any existing data to ensure clean test state + album_cache.data = {} + assert album_cache.get("nonexistent_key") is None + + +def test_album_match_cache_multiple_operations(): + album_cache = AlbumMatchCache() + # Clear any existing data to ensure clean test state + album_cache.data = {} + + # Insert multiple entries + album_cache.insert(("spotify_1", "tidal_1")) + album_cache.insert(("spotify_2", "tidal_2")) + album_cache.insert(("spotify_3", "tidal_3")) + + # Verify all entries can be retrieved + assert album_cache.get("spotify_1") == "tidal_1" + assert album_cache.get("spotify_2") == "tidal_2" + assert album_cache.get("spotify_3") == "tidal_3" + + # Verify nonexistent keys still return None + assert album_cache.get("spotify_4") is None + + # Test overwriting an existing entry + album_cache.insert(("spotify_1", "new_tidal_1")) + assert album_cache.get("spotify_1") == "new_tidal_1" \ No newline at end of file diff --git a/tests/unit/test_simple_function.py b/tests/unit/test_simple_function.py new file mode 100644 index 0000000..69ad344 --- /dev/null +++ b/tests/unit/test_simple_function.py @@ -0,0 +1,139 @@ +# tests/unit/test_simple_function.py + +import pytest +import sys +from pathlib import Path + +# Add the src directory to the path so we can import the modules +sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'src')) + +from spotify_to_tidal.sync import simple + + +class TestSimpleFunction: + """Test the simple() text normalization function""" + + def test_empty_string(self): + """Test empty string input returns [""]""" + assert simple("") == [""] + assert simple(" ") == [""] # whitespace only should be treated as empty + + def test_simple_strings_without_parentheses(self): + """Test simple strings without parentheses return single-item list""" + assert simple("OK Computer") == ["OK Computer"] + assert simple("The Beatles") == ["The Beatles"] + assert simple("Dark Side of the Moon") == ["Dark Side of the Moon"] + + def test_strings_with_parentheses(self): + """Test strings with parentheses return both exact and simplified versions""" + assert simple("OK Computer (Deluxe Edition)") == ["OK Computer (Deluxe Edition)", "OK Computer"] + assert simple("The Wall (Remastered)") == ["The Wall (Remastered)", "The Wall"] + assert simple("Album Title (Special Edition) (Bonus Tracks)") == ["Album Title (Special Edition) (Bonus Tracks)", "Album Title"] + + def test_strings_with_brackets(self): + """Test strings with square brackets return both exact and simplified versions""" + assert simple("OK Computer [Deluxe Edition]") == ["OK Computer [Deluxe Edition]", "OK Computer"] + assert simple("The Wall [Remastered]") == ["The Wall [Remastered]", "The Wall"] + assert simple("Album [Special Edition] [Bonus]") == ["Album [Special Edition] [Bonus]", "Album"] + + def test_mixed_brackets_and_parentheses(self): + """Test strings with both brackets and parentheses""" + assert simple("Album Title [Deluxe] (Remastered)") == ["Album Title [Deluxe] (Remastered)", "Album Title"] + assert simple("Song (Radio Edit) [Single]") == ["Song (Radio Edit) [Single]", "Song"] + + def test_dash_normalization(self): + """Test that different dash types are normalized to regular hyphens""" + # en-dash (–) + assert simple("Test–Album") == ["Test-Album"] + # em-dash (—) + assert simple("Test—Album") == ["Test-Album"] + # minus sign (−) + assert simple("Test−Album") == ["Test-Album"] + # multiple different dashes + assert simple("Test–Album—Song−Mix") == ["Test-Album-Song-Mix"] + + def test_dash_normalization_with_parentheses(self): + """Test dash normalization works with parentheses removal""" + assert simple("Test–Album (Deluxe Edition)") == ["Test-Album (Deluxe Edition)", "Test-Album"] + assert simple("Artist—Album (Remastered)") == ["Artist-Album (Remastered)", "Artist-Album"] + + def test_whitespace_normalization(self): + """Test that multiple whitespace is normalized to single spaces""" + assert simple("OK Computer") == ["OK Computer"] + assert simple(" OK Computer ") == ["OK Computer"] + assert simple("OK\tComputer\nAlbum") == ["OK Computer Album"] + + def test_whitespace_with_parentheses(self): + """Test whitespace normalization with parentheses""" + assert simple("OK Computer (Deluxe Edition)") == ["OK Computer (Deluxe Edition)", "OK Computer"] + assert simple(" Album Title ( Special ) ") == ["Album Title ( Special )", "Album Title"] + + def test_duplicate_detection(self): + """Test that when exact equals simplified, only one version is returned""" + # No parentheses means exact == simplified + assert simple("Simple Album") == ["Simple Album"] + # Empty parentheses should result in duplicate + assert simple("Album ()") == ["Album ()", "Album"] + # Whitespace-only in parentheses + assert simple("Album ( )") == ["Album ( )", "Album"] + + def test_edge_cases(self): + """Test various edge cases""" + # Only parentheses + assert simple("(Deluxe Edition)") == ["(Deluxe Edition)", ""] + # Only brackets + assert simple("[Remastered]") == ["[Remastered]", ""] + # Parentheses at the beginning + assert simple("(Special) Album Title") == ["(Special) Album Title", ""] + # Multiple parentheses groups + assert simple("Album (Part 1) Title (Part 2)") == ["Album (Part 1) Title (Part 2)", "Album"] + + def test_real_world_examples(self): + """Test with real-world album/track names that have caused issues""" + # Smashing Pumpkins case + assert simple("Mellon Collie And The Infinite Sadness (Deluxe Edition)") == [ + "Mellon Collie And The Infinite Sadness (Deluxe Edition)", + "Mellon Collie And The Infinite Sadness" + ] + + # Run-DMC with en-dash + assert simple("RUN–DMC (Expanded Edition)") == [ + "RUN-DMC (Expanded Edition)", + "RUN-DMC" + ] + + # King Gizzard with apostrophe + assert simple("I'm In Your Mind Fuzz") == ["I'm In Your Mind Fuzz"] + + # Nirvana with colon + assert simple("Bleach: Deluxe Edition") == ["Bleach: Deluxe Edition"] + + # Multiple format indicators + assert simple("Album Title (Deluxe) (Remastered) (Bonus Tracks)") == [ + "Album Title (Deluxe) (Remastered) (Bonus Tracks)", + "Album Title" + ] + + def test_unicode_normalization(self): + """Test that unicode characters are preserved""" + assert simple("Ágætis byrjun") == ["Ágætis byrjun"] + assert simple("Ágætis byrjun (Deluxe)") == ["Ágætis byrjun (Deluxe)", "Ágætis byrjun"] + assert simple("Sigur Rós") == ["Sigur Rós"] + + def test_special_punctuation_preservation(self): + """Test that other special punctuation is preserved""" + # Colons, periods, apostrophes should be preserved + assert simple("OK Computer: OKNOTOK 1997 2017") == ["OK Computer: OKNOTOK 1997 2017"] + assert simple("Don't Look Back") == ["Don't Look Back"] + assert simple("U.S.A.") == ["U.S.A."] + assert simple("Ph.D.") == ["Ph.D."] + + def test_function_return_type(self): + """Test that the function always returns a list""" + result = simple("Test") + assert isinstance(result, list) + assert len(result) >= 1 # Should always return at least one item + + result = simple("Test (Deluxe)") + assert isinstance(result, list) + assert len(result) == 2 # Should return exactly two items when different \ No newline at end of file