New fetch methods for build id

2025-12-31 12:36:21 +01:00 · 2025-12-31 12:36:21 +01:00 · c23e37a73d
commit c23e37a73d
parent 4f27e67d51
1 changed files with 40 additions and 18 deletions
--- a/NPO/init.py
+++ b/NPO/init.py
@ -5,7 +5,8 @@ from typing import Optional
 from langcodes import Language
 import click
-
+from collections.abc import Generator
 from unshackle.core.search_result import SearchResult
 from unshackle.core.constants import AnyTrack
 from unshackle.core.credential import Credential
 from unshackle.core.manifests import DASH
@ -55,10 +56,8 @@ class NPO(Service):
        m = re.match(self.TITLE_RE, title)
        if not m:
-            raise ValueError(
+            self.search_term = title
-                f"Unsupported NPO URL: {title}\n"
+            return
                "Use /video/slug for movies or /serie/slug for series."
            )
        self.slug = m.group("slug")
        self.kind = m.group("type") or "video"
@ -94,28 +93,22 @@ class NPO(Service):
        else:
            self.log.warning("NPO auth check failed.")
-    def _get_build_id(self, slug: str) -> str:
+    def _fetch_next_data(self, slug: str) -> dict:
-        """Fetch buildId from the actual video/series page."""
+        """Fetch and parse __NEXT_DATA__ from video/series page."""
        url = f"https://npo.nl/start/{'video' if self.kind == 'video' else 'serie'}/{slug}"
        r = self.session.get(url)
        r.raise_for_status()
        match = re.search(r'<script id="__NEXT_DATA__" type="application/json">({.*?})</script>', r.text, re.DOTALL)
        if not match:
            raise RuntimeError("Failed to extract __NEXT_DATA__")
-        data = json.loads(match.group(1))
+        return json.loads(match.group(1))
        return data["buildId"]
    def get_titles(self) -> Titles_T:
-        build_id = self._get_build_id(self.slug)
+        next_data = self._fetch_next_data(self.slug)
        build_id = next_data["buildId"]  # keep if needed elsewhere
-        if self.kind == "serie":
+        page_props = next_data["props"]["pageProps"]
-            url = self.config["endpoints"]["metadata_series"].format(build_id=build_id, slug=self.slug)
+        queries = page_props["dehydratedState"]["queries"]
        else:
            url = self.config["endpoints"]["metadata"].format(build_id=build_id, slug=self.slug)
        resp = self.session.get(url)
        resp.raise_for_status()
        queries = resp.json()["pageProps"]["dehydratedState"]["queries"]
        def get_data(fragment: str):
            return next((q["state"]["data"] for q in queries if fragment in str(q.get("queryKey", ""))), None)
@ -287,3 +280,32 @@ class NPO(Service):
        )
        r.raise_for_status()
        return r.content
    def search(self) -> Generator[SearchResult, None, None]:
        query = getattr(self, "search_term", None) or getattr(self, "title", None)
        search = self.session.get(
            url=self.config["endpoints"]["search"],
            params={
                "searchQuery": query,                # always use the correct attribute
                "searchType": "series", 
                "subscriptionType": "premium",
                "includePremiumContent": "true",
            },
            headers={
                "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Gecko/20100101 Firefox/143.0",
                "Accept": "application/json, text/plain, */*",
                "Origin": "https://npo.nl",
                "Referer": f"https://npo.nl/start/zoeken?zoekTerm={query}",
            }
        ).json()
        for result in search.get("items", []):
            yield SearchResult(
                id_=result.get("guid"),
                title=result.get("title"),
                label=result.get("type", "SERIES").upper() if result.get("type") else "SERIES",
                url=f"https://npo.nl/start/serie/{result.get('slug')}" if result.get("type") == "timeless_series" else
                    f"https://npo.nl/start/video/{result.get('slug')}"
            )