Update NPO/__init__.py

user experience improvements and some stability fixes
This commit is contained in:
adef 2026-01-11 11:19:31 +00:00
parent 7709ebbf87
commit b796a820b0

View File

@ -1,5 +1,8 @@
import json import json
import re import re
import time
import base64
import hashlib
from http.cookiejar import CookieJar from http.cookiejar import CookieJar
from typing import Optional from typing import Optional
from langcodes import Language from langcodes import Language
@ -12,7 +15,7 @@ from unshackle.core.credential import Credential
from unshackle.core.manifests import DASH from unshackle.core.manifests import DASH
from unshackle.core.service import Service from unshackle.core.service import Service
from unshackle.core.titles import Episode, Movie, Movies, Series, Title_T, Titles_T from unshackle.core.titles import Episode, Movie, Movies, Series, Title_T, Titles_T
from unshackle.core.tracks import Chapter, Tracks, Subtitle from unshackle.core.tracks import Chapter, Tracks, Subtitle, Chapters
class NPO(Service): class NPO(Service):
@ -27,19 +30,18 @@ class NPO(Service):
Supports: Supports:
Series https://npo.nl/start/serie/{slug} Series https://npo.nl/start/serie/{slug}
Movies https://npo.nl/start/video/{slug} Movies https://npo.nl/start/start/video/{slug}
Note: Movie inside a series can be downloaded as movie by converting URL to: Note: Movie inside a series can be downloaded as movie by converting URL to:
https://npo.nl/start/video/slug https://npo.nl/start/start/video/slug
To change between Widevine and Playready, you need to change the DrmType in config.yaml to either widevine or playready To change between Widevine and Playready, you need to change the DrmType in config.yaml to either widevine or playready
""" """
TITLE_RE = ( TITLE_RE = (
r"^(?:https?://(?:www\.)?npo\.nl/start/)?" r"^(?:https?://(?:www\.)?npo\.nl/start/)?"
r"(?:(?P<type>video|serie)/(?P<slug>[^/]+)" r"(?:(?P<type>video|serie|afspelen)/(?P<slug>[^/]+)"
r"(?:/afleveringen)?" r"(?:/(?P<path>.*))?)?$"
r"(?:/seizoen-(?P<season>[^/]+)/(?P<episode>[^/]+)/afspelen)?)?$"
) )
GEOFENCE = ("NL",) GEOFENCE = ("NL",)
NO_SUBTITLES = False NO_SUBTITLES = False
@ -54,6 +56,11 @@ class NPO(Service):
def __init__(self, ctx, title: str): def __init__(self, ctx, title: str):
super().__init__(ctx) super().__init__(ctx)
self.slug = None
self.kind = None
self.season_slug = None
self.episode_slug = None
m = re.match(self.TITLE_RE, title) m = re.match(self.TITLE_RE, title)
if not m: if not m:
self.search_term = title self.search_term = title
@ -61,12 +68,35 @@ class NPO(Service):
self.slug = m.group("slug") self.slug = m.group("slug")
self.kind = m.group("type") or "video" self.kind = m.group("type") or "video"
self.season_slug = m.group("season") path = m.group("path") or ""
self.episode_slug = m.group("episode")
if self.kind == "afspelen":
self.kind = "video"
if "afleveringen" in path:
self.kind = "serie"
season_match = re.search(r"seizoen-([^/]+)", path)
if season_match:
self.season_slug = season_match.group(1)
episode_match = re.search(r"seizoen-([^/]+)/([^/]+)/afspelen", path)
if episode_match:
self.season_slug = episode_match.group(1)
self.episode_slug = episode_match.group(2)
self.original_title_url = title # Store the original URL for later use
if self.config is None: if self.config is None:
raise EnvironmentError("Missing service config.") raise EnvironmentError("Missing service config.")
# Construct X-Nos header
salt = int(time.time())
user_agent = f"nos;{salt};Google/Nexus;Android/6.0;nl.nos.app/5.1.1"
string_to_hash = f";UB}}7Gaji==JPHtjX3@c{user_agent}"
md5_hash = hashlib.md5(string_to_hash.encode('utf-8')).hexdigest()
xnos = md5_hash + base64.b64encode(user_agent.encode('utf-8')).decode('utf-8')
self.session.headers['X-Nos'] = xnos
# Store CDM reference # Store CDM reference
self.cdm = ctx.obj.cdm self.cdm = ctx.obj.cdm
@ -93,8 +123,11 @@ class NPO(Service):
else: else:
self.log.warning("NPO auth check failed.") self.log.warning("NPO auth check failed.")
def _fetch_next_data(self, slug: str) -> dict: def _fetch_next_data(self, slug: str, full_url: Optional[str] = None) -> dict:
"""Fetch and parse __NEXT_DATA__ from video/series page.""" """Fetch and parse __NEXT_DATA__ from video/series page."""
if full_url:
url = full_url
else:
url = f"https://npo.nl/start/{'video' if self.kind == 'video' else 'serie'}/{slug}" url = f"https://npo.nl/start/{'video' if self.kind == 'video' else 'serie'}/{slug}"
r = self.session.get(url) r = self.session.get(url)
r.raise_for_status() r.raise_for_status()
@ -103,58 +136,256 @@ class NPO(Service):
raise RuntimeError("Failed to extract __NEXT_DATA__") raise RuntimeError("Failed to extract __NEXT_DATA__")
return json.loads(match.group(1)) return json.loads(match.group(1))
def get_widevine_license(self, *, challenge: bytes, title: Title_T, track: AnyTrack) -> bytes:
license_url_base = self.config["endpoints"]["license"]
# Extract drmToken from track.data where the stream response was stored in get_tracks
npo_stream_data = track.data.get("npo_stream_data", {})
stream_details = npo_stream_data.get("stream", {})
drm_token = stream_details.get("drmToken") or stream_details.get("token")
if not drm_token:
raise ValueError("DRM token not found in title data for license request.")
# Construct the license_url with custom_data query parameter
license_url = f"{license_url_base}?custom_data={drm_token}"
# As per working DL.py script, only Content-Type is sent for license request
headers = {'Content-Type': 'application/octet-stream'}
self.log.debug(f"Requesting Widevine license from {license_url} (with custom_data) using minimal headers...")
# The challenge (Widevine PSSH) needs to be sent as the raw binary data.
r = self.session.post(license_url, data=challenge, headers=headers)
r.raise_for_status() # Raise an exception for HTTP errors
self.log.debug(f"Received Widevine license response (status: {r.status_code}, size: {len(r.content)} bytes)")
# The license response should be returned as raw bytes.
return r.content
def get_titles(self) -> Titles_T: def get_titles(self) -> Titles_T:
next_data = self._fetch_next_data(self.slug) # Handle 'afspelen' URLs directly for specific episodes
build_id = next_data["buildId"] # keep if needed elsewhere if self.kind == "video" and not self.season_slug and not self.episode_slug and self.original_title_url:
try:
# Use the original URL to fetch __NEXT_DATA__
next_data = self._fetch_next_data(self.slug, full_url=self.original_title_url)
page_props = next_data["props"]["pageProps"] product_info = None
queries = page_props["dehydratedState"]["queries"] # Check the main program data in pageProps
page_props = next_data.get("props", {}).get("pageProps", {})
if page_props:
program_data = page_props.get("program", {})
if program_data and program_data.get("productId"):
product_info = program_data
else:
# Fallback for video data, if not found in program
video_data = page_props.get("video", {})
if video_data and video_data.get("productId"):
product_info = video_data
def get_data(fragment: str): # Fallback to dehydrated state queries if not found in pageProps directly
return next((q["state"]["data"] for q in queries if fragment in str(q.get("queryKey", ""))), None) if product_info is None:
queries = next_data.get("props", {}).get("pageProps", {}).get("dehydratedState", {}).get("queries", [])
for item in queries:
state = item.get("state", {})
if state:
episode_data = state.get('data', {})
if isinstance(episode_data, dict) and episode_data.get('productId'):
product_info = episode_data
break
if self.kind == "serie": if product_info and product_info.get("productId"):
series_data = get_data("series:detail-") # Check if it's part of a series
if not series_data: if product_info.get("series"):
raise ValueError("Series metadata not found") season_number = product_info.get("season", {}).get("seasonKey")
if season_number is None and product_info.get("season", {}).get("slug"):
season_match = re.search(r"seizoen-(\d+)", product_info["season"]["slug"])
if season_match:
season_number = int(season_match.group(1))
episodes = [] return Series([
seasons = get_data("series:seasons-") or []
for season in seasons:
eps = get_data(f"programs:season-{season['guid']}") or []
for e in eps:
episodes.append(
Episode( Episode(
id_=e["guid"], id_=product_info["productId"],
service=self.__class__, service=self.__class__,
title=series_data["title"], title=product_info["series"]["title"],
season=int(season["seasonKey"]), season=season_number,
number=int(e["programKey"]), number=product_info.get("programKey"),
name=e["title"], name=product_info["title"],
description=(e.get("synopsis", {}) or {}).get("long", ""), description=(product_info.get("synopsis", {}) or {}).get("long", ""),
language=Language.get("nl"), language=Language.get("nl"),
data=e, data=product_info,
)
)
return Series(episodes)
# Movie
item = get_data("program:detail-") or queries[0]["state"]["data"]
synopsis = item.get("synopsis", {})
desc = synopsis.get("long") or synopsis.get("short", "") if isinstance(synopsis, dict) else str(synopsis)
year = (int(item["firstBroadcastDate"]) // 31536000 + 1970) if item.get("firstBroadcastDate") else None
return Movies([
Movie(
id_=item["guid"],
service=self.__class__,
name=item["title"],
description=desc,
year=year,
language=Language.get("nl"),
data=item,
) )
]) ])
else:
# It's a standalone movie/video
return Movies([
Movie(
id_=product_info["productId"],
service=self.__class__,
name=product_info["title"],
description=(product_info.get("synopsis", {}) or {}).get("long", ""),
year=(int(product_info["firstBroadcastDate"]) // 31536000 + 1970) if product_info.get("firstBroadcastDate") else None,
language=Language.get("nl"),
data=product_info,
)
])
except Exception as e:
self.log.debug(f"Direct __NEXT_DATA__ fetch for afspelen URL failed: {e}")
# Prioritize broadcast search for /afspelen/ URLs
if self.kind != 'serie' and not self.season_slug and not self.episode_slug:
search_url_broadcasts = f"https://npo.nl/start/api/domain/search-collection-items?searchType=broadcasts&searchQuery={self.slug}&subscriptionType=anonymous"
broadcast_data = self.session.get(search_url_broadcasts).json()
if broadcast_data.get("items"):
item_data = broadcast_data["items"][0]
# If the item has a 'series' key, it's an episode of a series
if item_data.get("series"):
season_number = item_data.get("season", {}).get("seasonKey")
if season_number is None and item_data.get("season", {}).get("slug"):
# Fallback: Extract season number from slug like "seizoen-5"
season_match = re.search(r"seizoen-(\d+)", item_data["season"]["slug"])
if season_match:
season_number = int(season_match.group(1))
return Series([
Episode(
id_=item_data["productId"],
service=self.__class__,
title=item_data["series"]["title"], # Use series title as main title
season=season_number,
number=item_data.get("programKey"),
name=item_data["title"], # Use episode title as episode name
description=(item_data.get("synopsis", {}) or {}).get("long", ""),
language=Language.get("nl"),
data=item_data,
)
])
else:
# Otherwise, it's a standalone movie
return Movies([
Movie(
id_=item_data["productId"],
service=self.__class__,
name=item_data["title"],
description=(item_data.get("synopsis", {}) or {}).get("long", ""),
year=(int(item_data["firstBroadcastDate"]) // 31536000 + 1970) if item_data.get("firstBroadcastDate") else None,
language=Language.get("nl"),
data=item_data,
)
])
# Fallback to series search if not an /afspelen/ single item or if season/episode slugs are present
search_url_series = f"https://npo.nl/start/api/domain/search-collection-items?searchType=series&searchQuery={self.slug}&subscriptionType=anonymous"
series_data = self.session.get(search_url_series).json()
if series_data.get("items"):
# It's a series
series_info = series_data["items"][0]
series_slug = series_info["slug"]
series_type = series_info["type"]
series_guid = series_info["guid"]
seasons_url = f"https://npo.nl/start/api/domain/series-seasons?slug={series_slug}&type={series_type}"
seasons_data = self.session.get(seasons_url).json()
episodes = []
for season in seasons_data:
if self.season_slug and str(season.get("seasonKey")) != self.season_slug and season.get('slug') != f'seizoen-{self.season_slug}':
continue
season_guid = season["guid"]
episodes_url = f"https://npo.nl/start/api/domain/programs-by-season?guid={season_guid}"
episodes_data = self.session.get(episodes_url).json()
for episode_data in episodes_data:
episodes.append(
Episode(
id_=episode_data["productId"],
service=self.__class__,
title=series_info["title"],
season=episode_data.get("season", {}).get("seasonKey"),
number=episode_data.get("programKey"),
name=episode_data["title"],
description=(episode_data.get("synopsis", {}) or {}).get("long", ""),
language=Language.get("nl"),
data=episode_data,
)
)
if self.episode_slug:
# Filter for the specific episode requested
filtered_episodes = [ep for ep in episodes if ep.data.get("slug") == self.episode_slug]
return Series(filtered_episodes)
else:
return Series(episodes)
# Fallback: If neither broadcast nor series search returned items,
# try to fetch __NEXT_DATA__ for the video page (assuming it's a movie/standalone video)
try:
# Ensure self.kind is set to 'video' for _fetch_next_data to construct the correct URL
original_kind = self.kind
self.kind = "video"
next_data = self._fetch_next_data(self.slug)
self.kind = original_kind # Restore original kind
# Try to find the product info in the dehydrated state
product_info = None
queries = next_data.get("props", {}).get("pageProps", {}).get("dehydratedState", {}).get("queries", [])
for item in queries:
state = item.get("state", {})
if state:
episode_data = state.get('data', {})
if isinstance(episode_data, dict):
# NPO.py uses slug to find, let's use it as well
if episode_data.get('slug') == self.slug:
product_info = episode_data
break
# Fallback if not found in dehydratedState queries (different Next.js version or structure)
if product_info is None:
page_props = next_data.get("props", {}).get("pageProps", {})
if page_props:
# Check for program data
program_data = page_props.get("program", {})
if program_data and program_data.get("slug") == self.slug:
product_info = program_data
else:
# Check for direct video data
video_data = page_props.get("video", {})
if video_data and video_data.get("slug") == self.slug:
product_info = video_data
if product_info and product_info.get("productId"):
# If it has 'series' key, it's likely a series episode, not a standalone movie
if not product_info.get("series"):
return Movies([
Movie(
id_=product_info["productId"],
service=self.__class__,
name=product_info.get("title", self.slug), # Use slug as fallback title
description=product_info.get("synopsis", {}).get("long", ""),
year=(int(product_info["firstBroadcastDate"]) // 31536000 + 1970) if product_info.get("firstBroadcastDate") else None,
language=Language.get("nl"), # NPO is Dutch
data=product_info,
)
])
else:
self.log.debug(f"Content for {self.slug} identified as a series episode via __NEXT_DATA__ fallback, not a standalone movie.")
# If it's a series episode, we don't want to treat it as a movie here.
# The series search path should handle it, or this fallback should be for strict movies.
# For now, let's return empty if it's a series episode.
return []
except Exception as e:
self.log.debug(f"Fallback to __NEXT_DATA__ for video failed: {e}")
# If neither broadcast, series, nor __NEXT_DATA__ fallback returned items, return an empty list
return []
def get_chapters(self, title: Title_T) -> Chapters:
return []
def get_tracks(self, title: Title_T) -> Tracks: def get_tracks(self, title: Title_T) -> Tracks:
product_id = title.data.get("productId") product_id = title.data.get("productId")
@ -193,119 +424,30 @@ class NPO(Service):
if not manifest_url: if not manifest_url:
raise ValueError("No stream URL in response") raise ValueError("No stream URL in response")
is_unencrypted = "unencrypted" in manifest_url.lower() or not any(k in stream for k in ["drmToken", "token"])
# Parse DASH # Parse DASH
tracks = DASH.from_url(manifest_url, session=self.session).to_tracks(language=title.language) tracks = DASH.from_url(manifest_url, session=self.session).to_tracks(language=title.language)
# Store the entire stream response data into track.data so it's accessible later by get_widevine_license
for tr in tracks:
tr.data["npo_stream_data"] = data # Always store stream data for all tracks
# HACK: NPO reports some Dutch audio tracks as English for older content.
# If the title language is Dutch, assume any English audio tracks are also Dutch.
if title.language == Language.get("nl"):
for track in tracks.audio:
if track.language == Language.get("en"):
self.log.debug("Correcting 'en' audio track to 'nl' for Dutch title.")
track.language = Language.get("nl")
# Subtitles # Subtitles
subtitles = [] subtitles = []
for sub in (data.get("assets", {}) or {}).get("subtitles", []) or []: for sub in (data.get("assets", {}) or {}).get("subtitle", []):
if not isinstance(sub, dict): if sub["format"] == "webvtt":
continue subtitles.append(Subtitle(url=sub["url"], language=Language.get(sub["lang"])))
lang = sub.get("iso", "und")
location = sub.get("location")
if not location:
continue # skip if no URL provided
subtitles.append(
Subtitle(
id_=sub.get("name", lang),
url=location.strip(),
language=Language.get(lang),
is_original_lang=lang == "nl",
codec=Subtitle.Codec.WebVTT,
name=sub.get("name", "Unknown"),
forced=False,
sdh=False,
)
)
tracks.subtitles = subtitles
# DRM
if is_unencrypted:
for tr in tracks.videos + tracks.audio:
if hasattr(tr, "drm") and tr.drm:
tr.drm.clear()
else: else:
self.drm_token = stream.get("drmToken") or stream.get("token") or stream.get("drm_token") self.log.warning(f"Unsupported subtitle format: {sub['format']}")
if not self.drm_token:
raise ValueError(f"No DRM token found. Available keys: {list(stream.keys())}")
for tr in tracks.videos + tracks.audio: if not self.NO_SUBTITLES:
if getattr(tr, "drm", None): tracks.subtitles.extend(subtitles)
if drm_type == "playready":
tr.drm.license = lambda challenge, **kw: self.get_playready_license(
challenge=challenge, title=title, track=tr
)
else:
tr.drm.license = lambda challenge, **kw: self.get_widevine_license(
challenge=challenge, title=title, track=tr
)
return tracks return tracks
def get_chapters(self, title: Title_T) -> list[Chapter]:
return []
def get_widevine_license(self, challenge: bytes, title: Title_T, track: AnyTrack) -> bytes:
if not self.drm_token:
raise ValueError("DRM token not set, login or paid content may be required.")
r = self.session.post(
self.config["endpoints"]["license"],
params={"custom_data": self.drm_token},
data=challenge,
)
r.raise_for_status()
return r.content
def get_playready_license(self, challenge: bytes, title: Title_T, track: AnyTrack) -> bytes:
if not self.drm_token:
raise ValueError("DRM token not set, login or paid content may be required.")
headers = {
"Content-Type": "text/xml; charset=utf-8",
"SOAPAction": "http://schemas.microsoft.com/DRM/2007/03/protocols/AcquireLicense",
"Origin": "https://npo.nl",
"Referer": "https://npo.nl/",
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/141.0.0.0 Safari/537.36 Edg/141.0.0.0"
),
}
r = self.session.post(
self.config["endpoints"]["license"],
params={"custom_data": self.drm_token},
data=challenge,
headers=headers,
)
r.raise_for_status()
return r.content
def search(self) -> Generator[SearchResult, None, None]:
query = getattr(self, "search_term", None) or getattr(self, "title", None)
search = self.session.get(
url=self.config["endpoints"]["search"],
params={
"searchQuery": query, # always use the correct attribute
"searchType": "series",
"subscriptionType": "premium",
"includePremiumContent": "true",
},
headers={
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Gecko/20100101 Firefox/143.0",
"Accept": "application/json, text/plain, */*",
"Origin": "https://npo.nl",
"Referer": f"https://npo.nl/start/zoeken?zoekTerm={query}",
}
).json()
for result in search.get("items", []):
yield SearchResult(
id_=result.get("guid"),
title=result.get("title"),
label=result.get("type", "SERIES").upper() if result.get("type") else "SERIES",
url=f"https://npo.nl/start/serie/{result.get('slug')}" if result.get("type") == "timeless_series" else
f"https://npo.nl/start/video/{result.get('slug')}"
)