Files
movie-night/app/services/prefilter.py
T
kbondelie 3d5de06b44 Initial commit — Movie Night media discovery app
AI-powered web app that recommends unwatched movies from a Jellyfin
library based on natural language mood input. Jellyfin auth, modular
LLM backend (Claude/OpenAI/Ollama), two-tier pre-filter + AI ranking,
mobile-responsive dark theme UI with poster cards and deep links.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 19:20:56 -07:00

169 lines
7.9 KiB
Python

import json
import re
from app.models import Movie
# Mood signal → genre boosts and filters
MOOD_SIGNALS = {
"kids": {"boost": ["Family", "Animation", "Comedy", "Adventure"], "penalize": ["Horror", "Thriller"], "max_rating": "PG-13"},
"children": {"boost": ["Family", "Animation", "Comedy", "Adventure"], "penalize": ["Horror", "Thriller"], "max_rating": "PG"},
"family": {"boost": ["Family", "Animation", "Comedy", "Adventure"], "penalize": ["Horror", "Thriller"], "max_rating": "PG-13"},
"scary": {"boost": ["Horror", "Thriller", "Mystery"], "penalize": [], "max_rating": None},
"horror": {"boost": ["Horror", "Thriller"], "penalize": [], "max_rating": None},
"spooky": {"boost": ["Horror", "Thriller", "Mystery", "Fantasy"], "penalize": [], "max_rating": None},
"creepy": {"boost": ["Horror", "Thriller", "Mystery"], "penalize": [], "max_rating": None},
"funny": {"boost": ["Comedy"], "penalize": ["Horror", "War"], "max_rating": None},
"comedy": {"boost": ["Comedy"], "penalize": [], "max_rating": None},
"laugh": {"boost": ["Comedy"], "penalize": [], "max_rating": None},
"light": {"boost": ["Comedy", "Romance", "Animation", "Family"], "penalize": ["Horror", "Thriller", "War"], "max_rating": None},
"fun": {"boost": ["Comedy", "Adventure", "Animation", "Action"], "penalize": ["Horror", "War"], "max_rating": None},
"feel-good": {"boost": ["Comedy", "Romance", "Family", "Animation"], "penalize": ["Horror", "Thriller", "War"], "max_rating": None},
"relax": {"boost": ["Comedy", "Romance", "Drama"], "penalize": ["Horror", "Thriller", "Action"], "max_rating": None},
"action": {"boost": ["Action", "Adventure", "Sci-Fi", "Thriller"], "penalize": [], "max_rating": None},
"exciting": {"boost": ["Action", "Adventure", "Thriller"], "penalize": [], "max_rating": None},
"adventure": {"boost": ["Adventure", "Action", "Fantasy", "Sci-Fi"], "penalize": [], "max_rating": None},
"intense": {"boost": ["Action", "Thriller", "Drama", "War"], "penalize": [], "max_rating": None},
"romantic": {"boost": ["Romance", "Comedy", "Drama"], "penalize": ["Horror", "War"], "max_rating": None},
"romance": {"boost": ["Romance", "Comedy", "Drama"], "penalize": [], "max_rating": None},
"date night": {"boost": ["Romance", "Comedy", "Drama", "Thriller"], "penalize": [], "max_rating": None},
"date": {"boost": ["Romance", "Comedy", "Drama"], "penalize": [], "max_rating": None},
"sad": {"boost": ["Drama", "Romance"], "penalize": ["Comedy", "Animation"], "max_rating": None},
"cry": {"boost": ["Drama", "Romance", "War"], "penalize": [], "max_rating": None},
"drama": {"boost": ["Drama"], "penalize": [], "max_rating": None},
"sci-fi": {"boost": ["Science Fiction", "Sci-Fi", "Fantasy"], "penalize": [], "max_rating": None},
"space": {"boost": ["Science Fiction", "Sci-Fi"], "penalize": [], "max_rating": None},
"fantasy": {"boost": ["Fantasy", "Adventure"], "penalize": [], "max_rating": None},
"mystery": {"boost": ["Mystery", "Thriller", "Crime"], "penalize": [], "max_rating": None},
"crime": {"boost": ["Crime", "Thriller", "Mystery"], "penalize": [], "max_rating": None},
"documentary": {"boost": ["Documentary"], "penalize": [], "max_rating": None},
"war": {"boost": ["War", "History", "Drama"], "penalize": [], "max_rating": None},
"classic": {"boost": [], "penalize": [], "max_rating": None},
"animated": {"boost": ["Animation"], "penalize": [], "max_rating": None},
"anime": {"boost": ["Animation"], "penalize": [], "max_rating": None},
"music": {"boost": ["Music", "Musical"], "penalize": [], "max_rating": None},
"musical": {"boost": ["Music", "Musical"], "penalize": [], "max_rating": None},
"western": {"boost": ["Western"], "penalize": [], "max_rating": None},
"superhero": {"boost": ["Action", "Adventure", "Science Fiction"], "penalize": [], "max_rating": None},
}
# Content rating hierarchy for family filtering
RATING_ORDER = ["G", "PG", "PG-13", "R", "NC-17", "NR", "Not Rated", None]
def _parse_decade(mood: str) -> tuple[int, int] | None:
"""Extract decade filter from mood text."""
match = re.search(r"\b(19|20)(\d)0s\b", mood.lower())
if match:
decade_start = int(match.group(1) + match.group(2) + "0")
return (decade_start, decade_start + 9)
match = re.search(r"\b(old|classic|vintage|retro)\b", mood.lower())
if match:
return (1920, 1989)
return None
def _is_rating_appropriate(content_rating: str | None, max_rating: str | None) -> bool:
"""Check if a movie's content rating is at or below the max allowed."""
if max_rating is None:
return True
if content_rating is None:
return True # Unknown rating, let it through
try:
movie_idx = RATING_ORDER.index(content_rating)
max_idx = RATING_ORDER.index(max_rating)
return movie_idx <= max_idx
except ValueError:
return True # Unknown rating format, let it through
def _parse_movie(raw: dict) -> Movie:
"""Convert a raw DB row dict into a Movie model."""
return Movie(
jellyfin_id=raw["jellyfin_id"],
title=raw["title"],
sort_title=raw.get("sort_title"),
year=raw.get("year"),
genres=json.loads(raw.get("genres") or "[]"),
overview=raw.get("overview"),
community_rating=raw.get("community_rating"),
critic_rating=raw.get("critic_rating"),
runtime_minutes=raw.get("runtime_minutes"),
content_rating=raw.get("content_rating"),
studios=json.loads(raw.get("studios") or "[]"),
people=json.loads(raw.get("people") or "[]"),
tags=json.loads(raw.get("tags") or "[]"),
)
def prefilter_candidates(movies_raw: list[dict], mood: str, max_candidates: int = 200) -> list[Movie]:
"""Score and filter movies based on mood signals. Returns top candidates as Movie models."""
mood_lower = mood.lower()
# Collect all active signals
boost_genres: set[str] = set()
penalize_genres: set[str] = set()
max_rating: str | None = None
decade = _parse_decade(mood)
for keyword, signals in MOOD_SIGNALS.items():
if keyword in mood_lower:
boost_genres.update(signals["boost"])
penalize_genres.update(signals["penalize"])
if signals["max_rating"] and (max_rating is None or RATING_ORDER.index(signals["max_rating"]) < RATING_ORDER.index(max_rating)):
max_rating = signals["max_rating"]
# Remove any genres that appear in both boost and penalize
penalize_genres -= boost_genres
scored: list[tuple[float, dict]] = []
for raw in movies_raw:
movie_genres = set(json.loads(raw.get("genres") or "[]"))
content_rating = raw.get("content_rating")
# Filter by content rating
if not _is_rating_appropriate(content_rating, max_rating):
continue
# Filter by decade
year = raw.get("year")
if decade and year:
if year < decade[0] or year > decade[1]:
continue
# Score the movie
score = 0.0
# Genre match bonus
if boost_genres:
genre_overlap = len(movie_genres & boost_genres)
score += genre_overlap * 3.0
# Genre penalty
if penalize_genres:
penalty_overlap = len(movie_genres & penalize_genres)
score -= penalty_overlap * 2.0
# Rating bonus (higher rated movies get a small boost)
rating = raw.get("community_rating")
if rating:
score += rating * 0.3
# Keyword match in overview
overview = (raw.get("overview") or "").lower()
mood_words = [w for w in mood_lower.split() if len(w) > 3]
for word in mood_words:
if word in overview:
score += 1.0
scored.append((score, raw))
# Sort by score descending
scored.sort(key=lambda x: x[0], reverse=True)
# Return top candidates as Movie models
return [_parse_movie(raw) for _, raw in scored[:max_candidates]]