New fetch methods for build id

This commit is contained in:
FairTrade 2025-12-31 12:36:21 +01:00
parent 4f27e67d51
commit c23e37a73d

View File

@ -5,7 +5,8 @@ from typing import Optional
from langcodes import Language from langcodes import Language
import click import click
from collections.abc import Generator
from unshackle.core.search_result import SearchResult
from unshackle.core.constants import AnyTrack from unshackle.core.constants import AnyTrack
from unshackle.core.credential import Credential from unshackle.core.credential import Credential
from unshackle.core.manifests import DASH from unshackle.core.manifests import DASH
@ -55,10 +56,8 @@ class NPO(Service):
m = re.match(self.TITLE_RE, title) m = re.match(self.TITLE_RE, title)
if not m: if not m:
raise ValueError( self.search_term = title
f"Unsupported NPO URL: {title}\n" return
"Use /video/slug for movies or /serie/slug for series."
)
self.slug = m.group("slug") self.slug = m.group("slug")
self.kind = m.group("type") or "video" self.kind = m.group("type") or "video"
@ -94,28 +93,22 @@ class NPO(Service):
else: else:
self.log.warning("NPO auth check failed.") self.log.warning("NPO auth check failed.")
def _get_build_id(self, slug: str) -> str: def _fetch_next_data(self, slug: str) -> dict:
"""Fetch buildId from the actual video/series page.""" """Fetch and parse __NEXT_DATA__ from video/series page."""
url = f"https://npo.nl/start/{'video' if self.kind == 'video' else 'serie'}/{slug}" url = f"https://npo.nl/start/{'video' if self.kind == 'video' else 'serie'}/{slug}"
r = self.session.get(url) r = self.session.get(url)
r.raise_for_status() r.raise_for_status()
match = re.search(r'<script id="__NEXT_DATA__" type="application/json">({.*?})</script>', r.text, re.DOTALL) match = re.search(r'<script id="__NEXT_DATA__" type="application/json">({.*?})</script>', r.text, re.DOTALL)
if not match: if not match:
raise RuntimeError("Failed to extract __NEXT_DATA__") raise RuntimeError("Failed to extract __NEXT_DATA__")
data = json.loads(match.group(1)) return json.loads(match.group(1))
return data["buildId"]
def get_titles(self) -> Titles_T: def get_titles(self) -> Titles_T:
build_id = self._get_build_id(self.slug) next_data = self._fetch_next_data(self.slug)
build_id = next_data["buildId"] # keep if needed elsewhere
if self.kind == "serie": page_props = next_data["props"]["pageProps"]
url = self.config["endpoints"]["metadata_series"].format(build_id=build_id, slug=self.slug) queries = page_props["dehydratedState"]["queries"]
else:
url = self.config["endpoints"]["metadata"].format(build_id=build_id, slug=self.slug)
resp = self.session.get(url)
resp.raise_for_status()
queries = resp.json()["pageProps"]["dehydratedState"]["queries"]
def get_data(fragment: str): def get_data(fragment: str):
return next((q["state"]["data"] for q in queries if fragment in str(q.get("queryKey", ""))), None) return next((q["state"]["data"] for q in queries if fragment in str(q.get("queryKey", ""))), None)
@ -287,3 +280,32 @@ class NPO(Service):
) )
r.raise_for_status() r.raise_for_status()
return r.content return r.content
def search(self) -> Generator[SearchResult, None, None]:
query = getattr(self, "search_term", None) or getattr(self, "title", None)
search = self.session.get(
url=self.config["endpoints"]["search"],
params={
"searchQuery": query, # always use the correct attribute
"searchType": "series",
"subscriptionType": "premium",
"includePremiumContent": "true",
},
headers={
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Gecko/20100101 Firefox/143.0",
"Accept": "application/json, text/plain, */*",
"Origin": "https://npo.nl",
"Referer": f"https://npo.nl/start/zoeken?zoekTerm={query}",
}
).json()
for result in search.get("items", []):
yield SearchResult(
id_=result.get("guid"),
title=result.get("title"),
label=result.get("type", "SERIES").upper() if result.get("type") else "SERIES",
url=f"https://npo.nl/start/serie/{result.get('slug')}" if result.get("type") == "timeless_series" else
f"https://npo.nl/start/video/{result.get('slug')}"
)