From dcbbdc287a0dda3c7ddaad252c7c960ffc40e5d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Zieli=C5=84ski?= <bimbej@icloud.com>
Date: Thu, 18 Sep 2025 12:11:14 +0200
Subject: [PATCH] release

---
 .gitea/scripts/getLatestRunWithArtifacts.js | 241 +++++++++++++-------
 1 file changed, 164 insertions(+), 77 deletions(-)
diff --git a/.gitea/scripts/getLatestRunWithArtifacts.js b/.gitea/scripts/getLatestRunWithArtifacts.js
index 6523eac..71e282c 100644
--- a/.gitea/scripts/getLatestRunWithArtifacts.js
+++ b/.gitea/scripts/getLatestRunWithArtifacts.js
@@ -1,176 +1,263 @@
 // .gitea/scripts/getLatestRunWithArtifacts.js
-// Robust finder for the latest successful *workflow run* that exposes required artifacts.
-// Works against Gitea's Actions API only (no scraping of UI URLs).
+// Finds the latest successful *workflow run* that exposes all required artifacts.
+// Works against Gitea. Uses API if available; otherwise falls back to HTML scraping
+// of the Actions pages (because many Gitea versions don’t expose runs/artifacts via API).
 
 /* ENVIRONMENT:
    - GITEA_BASE_URL   e.g. https://code.example.com
-   - OWNER            repository owner/user, e.g. mz
-   - REPO             repository name,  e.g. DiunaBI
-   - GITEA_PAT        personal access token with repo read permissions
-   - SCAN_LIMIT       optional; max number of runs to scan across pages (default: 100)
-   - REQUIRED_ARTIFACTS  comma-separated list, e.g. "frontend,webapi" (default: as left)
+   - OWNER            repo owner, e.g. mz
+   - REPO             repo name, e.g. DiunaBI
+   - GITEA_PAT        PAT with repo read permissions (used as "Authorization: token <PAT>")
+   - SCAN_LIMIT       optional; max number of runs to scan (default: 100)
+   - REQUIRED_ARTIFACTS  comma-separated list, default "frontend,webapi"
    - FILTER_BRANCH     optional; if set, only consider runs from this branch (e.g. "main")
-   - INCLUDE_TAGS      optional; "true" to allow refs/tags (default: true)
+   - INCLUDE_TAGS      optional; "true" (default) to allow tag refs
 */
 
 const fs = require("fs");
 const path = require("path");
 
-// ---- Config & defaults ------------------------------------------------------
 const BASE   = process.env.GITEA_BASE_URL;
 const OWNER  = process.env.OWNER;
 const REPO   = process.env.REPO;
 const TOKEN  = process.env.GITEA_PAT;
 
 const SCAN_LIMIT = Number(process.env.SCAN_LIMIT || "100");
-
 const REQUIRED_ARTIFACTS = (process.env.REQUIRED_ARTIFACTS || "frontend,webapi")
-    .split(",")
-    .map(s => s.trim())
-    .filter(Boolean);
-
-const FILTER_BRANCH = (process.env.FILTER_BRANCH || "").trim(); // e.g. "main"
+    .split(",").map(s => s.trim()).filter(Boolean);
+const FILTER_BRANCH = (process.env.FILTER_BRANCH || "").trim();
 const INCLUDE_TAGS  = String(process.env.INCLUDE_TAGS ?? "true").toLowerCase() !== "false";
 
 if (!BASE || !OWNER || !REPO) {
-    console.error("Missing one of: GITEA_BASE_URL, OWNER, REPO");
-    process.exit(1);
-}
-if (!TOKEN) {
-    console.error("Missing GITEA_PAT");
-    process.exit(1);
+    console.error("Missing one of: GITEA_BASE_URL, OWNER, REPO"); process.exit(1);
 }
+if (!TOKEN) { console.error("Missing GITEA_PAT"); process.exit(1); }
 
-// Output dir for passing values between steps
 const cacheDir = path.join(".gitea", ".cache");
 fs.mkdirSync(cacheDir, { recursive: true });
 
-// ---- Small HTTP helper ------------------------------------------------------
+async function http(url, accept = "application/json") {
+    const res = await fetch(url, {
+        headers: {
+            Authorization: `token ${TOKEN}`,
+            Accept: accept,
+        },
+    });
+    return res;
+}
+
 async function apiJSON(url) {
-    const res = await fetch(url, { headers: { Authorization: `token ${TOKEN}` } });
+    const res = await http(url, "application/json");
     if (!res.ok) {
         const t = await res.text().catch(() => "");
-        throw new Error(`API ${res.status} for ${url}\n${t}`);
+        const err = new Error(`API ${res.status} for ${url}\n${t}`);
+        err.status = res.status;
+        throw err;
     }
     return res.json();
 }
 
-// Normalize different shapes of list responses across Gitea versions
 function normalizeRunList(resp) {
     if (Array.isArray(resp)) return resp;
     return resp?.runs || resp?.workflow_runs || resp?.data || resp?.items || [];
 }
 
-// Success predicate at the RUN level
 function isSuccessfulRun(r) {
-    const status = String(r.status || "").toLowerCase();      // "completed", "success" (varies)
-    const concl  = String(r.conclusion || "").toLowerCase();  // "success" (may be empty)
-    return status === "success" || (status === "completed" && concl === "success");
+    const s = String(r.status || "").toLowerCase();     // "completed", "success" (varies)
+    const c = String(r.conclusion || "").toLowerCase(); // "success" (sometimes empty)
+    return s === "success" || (s === "completed" && c === "success");
 }
 
-// Optional ref filter
 function passesRefFilter(run) {
-    // Gitea usually provides head_branch plus ref. We accept:
-    // - FILTER_BRANCH (if set)
-    // - tags (refs/tags/*) when INCLUDE_TAGS=true
     const headBranch = run.head_branch || "";
     const ref = run.ref || "";
     if (FILTER_BRANCH && headBranch && headBranch !== FILTER_BRANCH) {
-        // If FILTER_BRANCH is set, allow tags too if INCLUDE_TAGS=true
         if (!(INCLUDE_TAGS && ref.startsWith("refs/tags/"))) return false;
     }
-    if (!FILTER_BRANCH && !INCLUDE_TAGS) {
-        // No branch required and tags disabled → accept anything on branches only
-        if (ref.startsWith("refs/tags/")) return false;
-    }
+    if (!FILTER_BRANCH && !INCLUDE_TAGS && ref.startsWith("refs/tags/")) return false;
     return true;
 }
 
-// Fetch paginated runs up to SCAN_LIMIT
-async function fetchRunsPaginated() {
-    const pageSize = Math.min(50, SCAN_LIMIT); // be nice to the API
+/* ---------------------------- API STRATEGY ---------------------------- */
+async function tryApiListRuns() {
+    // If the endpoint exists on this Gitea version, this will succeed,
+    // otherwise we’ll get a 404 and fall back to HTML.
+    const pageSize = Math.min(50, SCAN_LIMIT);
     const out = [];
     let page = 1;
 
     while (out.length < SCAN_LIMIT) {
         const url = `${BASE}/api/v1/repos/${OWNER}/${REPO}/actions/runs?limit=${pageSize}&page=${page}`;
-        const resp = await apiJSON(url);
+        let resp;
+        try {
+            resp = await apiJSON(url);
+        } catch (e) {
+            if (e.status === 404) return null; // endpoint not available
+            throw e; // real error
+        }
         const chunk = normalizeRunList(resp);
         if (!Array.isArray(chunk) || chunk.length === 0) break;
         out.push(...chunk);
-        if (chunk.length < pageSize) break; // last page
+        if (chunk.length < pageSize) break;
         page += 1;
     }
-
-    // Trim to SCAN_LIMIT if we over-fetched
     return out.slice(0, SCAN_LIMIT);
 }
 
-// Read artifact names for a given run id
-async function readArtifactNames(runId) {
+async function apiArtifactNames(runId) {
     const url = `${BASE}/api/v1/repos/${OWNER}/${REPO}/actions/runs/${runId}/artifacts`;
-    const resp = await apiJSON(url);
-    const list = Array.isArray(resp?.artifacts) ? resp.artifacts : (Array.isArray(resp) ? resp : []);
+    const res = await http(url, "application/json");
+    if (!res.ok) {
+        const t = await res.text().catch(() => "");
+        const err = new Error(`API ${res.status} for ${url}\n${t}`);
+        err.status = res.status;
+        throw err;
+    }
+    const json = await res.json();
+    const list = Array.isArray(json?.artifacts) ? json.artifacts : (Array.isArray(json) ? json : []);
     return list.map(a => a?.name).filter(Boolean);
 }
 
-// ---- Main -------------------------------------------------------------------
+/* --------------------------- HTML STRATEGY ---------------------------- */
+// Very light HTML parsing using regexes; robust enough for Gitea’s Actions pages.
+async function listRunsFromHtml() {
+    // Actions overview page often lists recent runs and links like: /OWNER/REPO/actions/runs/123
+    const url = `${BASE}/${OWNER}/${REPO}/actions`;
+    const res = await http(url, "text/html");
+    if (!res.ok) {
+        const t = await res.text().catch(() => "");
+        throw new Error(`HTML ${res.status} for ${url}\n${t}`);
+    }
+    const html = await res.text();
+    // Find run links
+    const runIds = Array.from(html.matchAll(/\/actions\/runs\/(\d+)/g))
+        .map(m => Number(m[1]))
+        .filter(n => Number.isFinite(n));
+    // De-duplicate and keep most recent first
+    const unique = [...new Set(runIds)].sort((a, b) => b - a);
+    return unique.slice(0, SCAN_LIMIT).map(id => ({ id }));
+}
+
+async function htmlArtifactNames(runId) {
+    const url = `${BASE}/${OWNER}/${REPO}/actions/runs/${runId}/artifacts`;
+    const res = await http(url, "text/html");
+    if (!res.ok) {
+        // Some Gitea versions show artifacts inline on the run page; try that as fallback.
+        const runUrl = `${BASE}/${OWNER}/${REPO}/actions/runs/${runId}`;
+        const res2 = await http(runUrl, "text/html");
+        if (!res2.ok) {
+            const t = await res.text().catch(() => "");
+            throw new Error(`HTML ${res.status} for ${url}\n${t}`);
+        }
+        const html2 = await res2.text();
+        return extractArtifactNamesFromHtml(html2);
+    }
+    const html = await res.text();
+    return extractArtifactNamesFromHtml(html);
+}
+
+function extractArtifactNamesFromHtml(html) {
+    // Look for common patterns showing artifact names in tables/links/spans.
+    // This is intentionally permissive: any text near "/actions/artifacts/" or "download" buttons.
+    const names = new Set();
+
+    // 1) <a href="/.../actions/artifacts/NNN">NAME</a>
+    for (const m of html.matchAll(/actions\/artifacts\/\d+[^>]*>([^<]+)</g)) {
+        const name = m[1].trim();
+        if (name) names.add(name);
+    }
+    // 2) data-name="NAME"
+    for (const m of html.matchAll(/data-(?:artifact-)?name="([^"]+)"/g)) {
+        const name = m[1].trim();
+        if (name) names.add(name);
+    }
+    // 3) Loose: lines that include “artifact”, capture preceding label-ish word
+    for (const m of html.matchAll(/artifact[^<>\n]*<\/[^>]*>\s*([^<>\n]{2,80})</gi)) {
+        const guess = m[1].trim();
+        if (guess) names.add(guess);
+    }
+    // Return as array of distinct names
+    return Array.from(names);
+}
+
+/* ------------------------------ MAIN --------------------------------- */
 (async () => {
-    const runs = await fetchRunsPaginated();
+    // Strategy A: official API (if present)
+    let runs = await tryApiListRuns();
+
+    // Strategy B: HTML scraping fallback
+    if (!runs) {
+        console.log("Runs API not available on this Gitea – falling back to HTML scraping.");
+        runs = await listRunsFromHtml(); // {id} only
+    }
+
     if (!runs.length) {
-        console.error("No workflow runs returned by API.");
+        console.error("No workflow runs found.");
         process.exit(1);
     }
 
-    // Most recent first by id (Gitea ids are monotonic; if needed, also compare created_at)
+    // Order newest first by id; API runs may include extra fields, HTML gives only id
     const candidates = runs
         .filter(r => r && r.id != null)
-        .sort((a, b) => (b.id ?? 0) - (a.id ?? 0))
-        .filter(isSuccessfulRun)
-        .filter(passesRefFilter);
+        .sort((a, b) => (b.id ?? 0) - (a.id ?? 0));
 
-    if (!candidates.length) {
-        console.error("No successful workflow runs found (after filtering).");
-        process.exit(1);
-    }
-
-    console.log(
-        `Scanning ${candidates.length} successful runs for artifacts: ${REQUIRED_ARTIFACTS.join(", ")}`
-    );
+    console.log(`Scanning ${Math.min(candidates.length, SCAN_LIMIT)} runs for artifacts: ${REQUIRED_ARTIFACTS.join(", ")}`);
 
     let pickedId = null;
 
-    for (const r of candidates) {
+    for (const r of candidates.slice(0, SCAN_LIMIT)) {
         const runId = r.id;
+
+        // Optional API-only filtering (status/branch). If runs came from HTML, we can’t pre-filter reliably,
+        // so we just check artifacts presence (which is what we ultimately need).
+        if (r.status && r.conclusion) {
+            if (!isSuccessfulRun(r)) {
+                console.log(`Run ${runId}: not successful (status=${r.status}, conclusion=${r.conclusion})`);
+                continue;
+            }
+            if (!passesRefFilter(r)) {
+                console.log(`Run ${runId}: skipped by ref filter (branch=${r.head_branch || ""}, ref=${r.ref || ""})`);
+                continue;
+            }
+        }
+
+        let names = [];
         try {
-            const names = await readArtifactNames(runId);
+            // Prefer API artifacts; if 404, use HTML extraction.
+            try {
+                names = await apiArtifactNames(runId);
+            } catch (e) {
+                if (e.status === 404) {
+                    names = await htmlArtifactNames(runId);
+                } else {
+                    throw e;
+                }
+            }
+
+            // Normalize artifact names (trim & case-sensitive match as in UI/API)
+            names = names.map(n => n.trim()).filter(Boolean);
+
             const ok = REQUIRED_ARTIFACTS.every(req => names.includes(req));
             if (ok) {
                 pickedId = runId;
                 break;
             }
-            console.log(
-                `Run ${runId}: lacks required artifacts (has: ${names.join(", ") || "none"})`
-            );
+            console.log(`Run ${runId}: lacks required artifacts (has: ${names.join(", ") || "none"})`);
         } catch (e) {
-            // If artifacts endpoint fails (permissions, transient), keep scanning
-            console.log(
-                `Run ${runId}: cannot read artifacts via API -> ${String(e.message).split("\n")[0]}`
-            );
+            console.log(`Run ${runId}: cannot read artifacts -> ${String(e.message).split("\n")[0]}`);
+            continue;
         }
     }
 
     if (!pickedId) {
-        console.error("No run exposes all required artifacts via API.");
+        console.error("No run exposes all required artifacts.");
         process.exit(1);
     }
 
-    // Write outputs for downstream steps
     fs.writeFileSync(path.join(cacheDir, "run_id"), String(pickedId), "utf8");
     if (process.env.GITHUB_OUTPUT) {
         fs.appendFileSync(process.env.GITHUB_OUTPUT, `run_id=${pickedId}\n`);
     }
-
     console.log(`Picked run_id=${pickedId}`);
 })().catch(err => {
     console.error(err.stack || err.message || String(err));