import json import re from app.models import Movie # Mood signal → genre boosts and filters MOOD_SIGNALS = { "kids": {"boost": ["Family", "Animation", "Comedy", "Adventure"], "penalize": ["Horror", "Thriller"], "max_rating": "PG-13"}, "children": {"boost": ["Family", "Animation", "Comedy", "Adventure"], "penalize": ["Horror", "Thriller"], "max_rating": "PG"}, "family": {"boost": ["Family", "Animation", "Comedy", "Adventure"], "penalize": ["Horror", "Thriller"], "max_rating": "PG-13"}, "scary": {"boost": ["Horror", "Thriller", "Mystery"], "penalize": [], "max_rating": None}, "horror": {"boost": ["Horror", "Thriller"], "penalize": [], "max_rating": None}, "spooky": {"boost": ["Horror", "Thriller", "Mystery", "Fantasy"], "penalize": [], "max_rating": None}, "creepy": {"boost": ["Horror", "Thriller", "Mystery"], "penalize": [], "max_rating": None}, "funny": {"boost": ["Comedy"], "penalize": ["Horror", "War"], "max_rating": None}, "comedy": {"boost": ["Comedy"], "penalize": [], "max_rating": None}, "laugh": {"boost": ["Comedy"], "penalize": [], "max_rating": None}, "light": {"boost": ["Comedy", "Romance", "Animation", "Family"], "penalize": ["Horror", "Thriller", "War"], "max_rating": None}, "fun": {"boost": ["Comedy", "Adventure", "Animation", "Action"], "penalize": ["Horror", "War"], "max_rating": None}, "feel-good": {"boost": ["Comedy", "Romance", "Family", "Animation"], "penalize": ["Horror", "Thriller", "War"], "max_rating": None}, "relax": {"boost": ["Comedy", "Romance", "Drama"], "penalize": ["Horror", "Thriller", "Action"], "max_rating": None}, "action": {"boost": ["Action", "Adventure", "Sci-Fi", "Thriller"], "penalize": [], "max_rating": None}, "exciting": {"boost": ["Action", "Adventure", "Thriller"], "penalize": [], "max_rating": None}, "adventure": {"boost": ["Adventure", "Action", "Fantasy", "Sci-Fi"], "penalize": [], "max_rating": None}, "intense": {"boost": ["Action", "Thriller", "Drama", "War"], "penalize": [], "max_rating": None}, "romantic": {"boost": ["Romance", "Comedy", "Drama"], "penalize": ["Horror", "War"], "max_rating": None}, "romance": {"boost": ["Romance", "Comedy", "Drama"], "penalize": [], "max_rating": None}, "date night": {"boost": ["Romance", "Comedy", "Drama", "Thriller"], "penalize": [], "max_rating": None}, "date": {"boost": ["Romance", "Comedy", "Drama"], "penalize": [], "max_rating": None}, "sad": {"boost": ["Drama", "Romance"], "penalize": ["Comedy", "Animation"], "max_rating": None}, "cry": {"boost": ["Drama", "Romance", "War"], "penalize": [], "max_rating": None}, "drama": {"boost": ["Drama"], "penalize": [], "max_rating": None}, "sci-fi": {"boost": ["Science Fiction", "Sci-Fi", "Fantasy"], "penalize": [], "max_rating": None}, "space": {"boost": ["Science Fiction", "Sci-Fi"], "penalize": [], "max_rating": None}, "fantasy": {"boost": ["Fantasy", "Adventure"], "penalize": [], "max_rating": None}, "mystery": {"boost": ["Mystery", "Thriller", "Crime"], "penalize": [], "max_rating": None}, "crime": {"boost": ["Crime", "Thriller", "Mystery"], "penalize": [], "max_rating": None}, "documentary": {"boost": ["Documentary"], "penalize": [], "max_rating": None}, "war": {"boost": ["War", "History", "Drama"], "penalize": [], "max_rating": None}, "classic": {"boost": [], "penalize": [], "max_rating": None}, "animated": {"boost": ["Animation"], "penalize": [], "max_rating": None}, "anime": {"boost": ["Animation"], "penalize": [], "max_rating": None}, "music": {"boost": ["Music", "Musical"], "penalize": [], "max_rating": None}, "musical": {"boost": ["Music", "Musical"], "penalize": [], "max_rating": None}, "western": {"boost": ["Western"], "penalize": [], "max_rating": None}, "superhero": {"boost": ["Action", "Adventure", "Science Fiction"], "penalize": [], "max_rating": None}, } # Content rating hierarchy for family filtering RATING_ORDER = ["G", "PG", "PG-13", "R", "NC-17", "NR", "Not Rated", None] def _parse_decade(mood: str) -> tuple[int, int] | None: """Extract decade filter from mood text.""" match = re.search(r"\b(19|20)(\d)0s\b", mood.lower()) if match: decade_start = int(match.group(1) + match.group(2) + "0") return (decade_start, decade_start + 9) match = re.search(r"\b(old|classic|vintage|retro)\b", mood.lower()) if match: return (1920, 1989) return None def _is_rating_appropriate(content_rating: str | None, max_rating: str | None) -> bool: """Check if a movie's content rating is at or below the max allowed.""" if max_rating is None: return True if content_rating is None: return True # Unknown rating, let it through try: movie_idx = RATING_ORDER.index(content_rating) max_idx = RATING_ORDER.index(max_rating) return movie_idx <= max_idx except ValueError: return True # Unknown rating format, let it through def _parse_movie(raw: dict) -> Movie: """Convert a raw DB row dict into a Movie model.""" return Movie( jellyfin_id=raw["jellyfin_id"], title=raw["title"], sort_title=raw.get("sort_title"), year=raw.get("year"), genres=json.loads(raw.get("genres") or "[]"), overview=raw.get("overview"), community_rating=raw.get("community_rating"), critic_rating=raw.get("critic_rating"), runtime_minutes=raw.get("runtime_minutes"), content_rating=raw.get("content_rating"), studios=json.loads(raw.get("studios") or "[]"), people=json.loads(raw.get("people") or "[]"), tags=json.loads(raw.get("tags") or "[]"), ) def prefilter_candidates(movies_raw: list[dict], mood: str, max_candidates: int = 200) -> list[Movie]: """Score and filter movies based on mood signals. Returns top candidates as Movie models.""" mood_lower = mood.lower() # Collect all active signals boost_genres: set[str] = set() penalize_genres: set[str] = set() max_rating: str | None = None decade = _parse_decade(mood) for keyword, signals in MOOD_SIGNALS.items(): if keyword in mood_lower: boost_genres.update(signals["boost"]) penalize_genres.update(signals["penalize"]) if signals["max_rating"] and (max_rating is None or RATING_ORDER.index(signals["max_rating"]) < RATING_ORDER.index(max_rating)): max_rating = signals["max_rating"] # Remove any genres that appear in both boost and penalize penalize_genres -= boost_genres scored: list[tuple[float, dict]] = [] for raw in movies_raw: movie_genres = set(json.loads(raw.get("genres") or "[]")) content_rating = raw.get("content_rating") # Filter by content rating if not _is_rating_appropriate(content_rating, max_rating): continue # Filter by decade year = raw.get("year") if decade and year: if year < decade[0] or year > decade[1]: continue # Score the movie score = 0.0 # Genre match bonus if boost_genres: genre_overlap = len(movie_genres & boost_genres) score += genre_overlap * 3.0 # Genre penalty if penalize_genres: penalty_overlap = len(movie_genres & penalize_genres) score -= penalty_overlap * 2.0 # Rating bonus (higher rated movies get a small boost) rating = raw.get("community_rating") if rating: score += rating * 0.3 # Keyword match in overview overview = (raw.get("overview") or "").lower() mood_words = [w for w in mood_lower.split() if len(w) > 3] for word in mood_words: if word in overview: score += 1.0 scored.append((score, raw)) # Sort by score descending scored.sort(key=lambda x: x[0], reverse=True) # Return top candidates as Movie models return [_parse_movie(raw) for _, raw in scored[:max_candidates]]