DiunaBI/.gitea/scripts/getLatestRunWithArtifacts.js

// .gitea/scripts/getLatestRunWithArtifacts.js
// Finds the latest successful *workflow run* that exposes all required artifacts.
// Works against Gitea. Uses API if available; otherwise falls back to HTML scraping
// of the Actions pages (because many Gitea versions don’t expose runs/artifacts via API).

/* ENVIRONMENT:
   - GITEA_BASE_URL   e.g. https://code.example.com
   - OWNER            repo owner, e.g. mz
   - REPO             repo name, e.g. DiunaBI
   - GITEA_PAT        PAT with repo read permissions (used as "Authorization: token <PAT>")
   - SCAN_LIMIT       optional; max number of runs to scan (default: 100)
   - REQUIRED_ARTIFACTS  comma-separated list, default "frontend,webapi"
   - FILTER_BRANCH     optional; if set, only consider runs from this branch (e.g. "main")
   - INCLUDE_TAGS      optional; "true" (default) to allow tag refs
*/

const fs = require("fs");
const path = require("path");

const BASE   = process.env.GITEA_BASE_URL;
const OWNER  = process.env.OWNER;
const REPO   = process.env.REPO;
const TOKEN  = process.env.GITEA_PAT;

const SCAN_LIMIT = Number(process.env.SCAN_LIMIT || "100");
const REQUIRED_ARTIFACTS = (process.env.REQUIRED_ARTIFACTS || "frontend,webapi")
    .split(",").map(s => s.trim()).filter(Boolean);
const FILTER_BRANCH = (process.env.FILTER_BRANCH || "").trim();
const INCLUDE_TAGS  = String(process.env.INCLUDE_TAGS ?? "true").toLowerCase() !== "false";

if (!BASE || !OWNER || !REPO) {
    console.error("Missing one of: GITEA_BASE_URL, OWNER, REPO"); process.exit(1);
}
if (!TOKEN) { console.error("Missing GITEA_PAT"); process.exit(1); }

const cacheDir = path.join(".gitea", ".cache");
fs.mkdirSync(cacheDir, { recursive: true });

async function http(url, accept = "application/json") {
    const res = await fetch(url, {
        headers: {
            Authorization: `token ${TOKEN}`,
            Accept: accept,
        },
    });
    return res;
}

async function apiJSON(url) {
    const res = await http(url, "application/json");
    if (!res.ok) {
        const t = await res.text().catch(() => "");
        const err = new Error(`API ${res.status} for ${url}\n${t}`);
        err.status = res.status;
        throw err;
    }
    return res.json();
}

function normalizeRunList(resp) {
    if (Array.isArray(resp)) return resp;
    return resp?.runs || resp?.workflow_runs || resp?.data || resp?.items || [];
}

function isSuccessfulRun(r) {
    const s = String(r.status || "").toLowerCase();     // "completed", "success" (varies)
    const c = String(r.conclusion || "").toLowerCase(); // "success" (sometimes empty)
    return s === "success" || (s === "completed" && c === "success");
}

function passesRefFilter(run) {
    const headBranch = run.head_branch || "";
    const ref = run.ref || "";
    if (FILTER_BRANCH && headBranch && headBranch !== FILTER_BRANCH) {
        if (!(INCLUDE_TAGS && ref.startsWith("refs/tags/"))) return false;
    }
    if (!FILTER_BRANCH && !INCLUDE_TAGS && ref.startsWith("refs/tags/")) return false;
    return true;
}

/* ---------------------------- API STRATEGY ---------------------------- */
async function tryApiListRuns() {
    // If the endpoint exists on this Gitea version, this will succeed,
    // otherwise we’ll get a 404 and fall back to HTML.
    const pageSize = Math.min(50, SCAN_LIMIT);
    const out = [];
    let page = 1;

    while (out.length < SCAN_LIMIT) {
        const url = `${BASE}/api/v1/repos/${OWNER}/${REPO}/actions/runs?limit=${pageSize}&page=${page}`;
        let resp;
        try {
            resp = await apiJSON(url);
        } catch (e) {
            if (e.status === 404) return null; // endpoint not available
            throw e; // real error
        }
        const chunk = normalizeRunList(resp);
        if (!Array.isArray(chunk) || chunk.length === 0) break;
        out.push(...chunk);
        if (chunk.length < pageSize) break;
        page += 1;
    }
    return out.slice(0, SCAN_LIMIT);
}

async function apiArtifactNames(runId) {
    const url = `${BASE}/api/v1/repos/${OWNER}/${REPO}/actions/runs/${runId}/artifacts`;
    const res = await http(url, "application/json");
    if (!res.ok) {
        const t = await res.text().catch(() => "");
        const err = new Error(`API ${res.status} for ${url}\n${t}`);
        err.status = res.status;
        throw err;
    }
    const json = await res.json();
    const list = Array.isArray(json?.artifacts) ? json.artifacts : (Array.isArray(json) ? json : []);
    return list.map(a => a?.name).filter(Boolean);
}

/* --------------------------- HTML STRATEGY ---------------------------- */
// Very light HTML parsing using regexes; robust enough for Gitea’s Actions pages.
async function listRunsFromHtml() {
    // Actions overview page often lists recent runs and links like: /OWNER/REPO/actions/runs/123
    const url = `${BASE}/${OWNER}/${REPO}/actions`;
    const res = await http(url, "text/html");
    if (!res.ok) {
        const t = await res.text().catch(() => "");
        throw new Error(`HTML ${res.status} for ${url}\n${t}`);
    }
    const html = await res.text();
    // Find run links
    const runIds = Array.from(html.matchAll(/\/actions\/runs\/(\d+)/g))
        .map(m => Number(m[1]))
        .filter(n => Number.isFinite(n));
    // De-duplicate and keep most recent first
    const unique = [...new Set(runIds)].sort((a, b) => b - a);
    return unique.slice(0, SCAN_LIMIT).map(id => ({ id }));
}

async function htmlArtifactNames(runId) {
    const url = `${BASE}/${OWNER}/${REPO}/actions/runs/${runId}/artifacts`;
    const res = await http(url, "text/html");
    if (!res.ok) {
        // Some Gitea versions show artifacts inline on the run page; try that as fallback.
        const runUrl = `${BASE}/${OWNER}/${REPO}/actions/runs/${runId}`;
        const res2 = await http(runUrl, "text/html");
        if (!res2.ok) {
            const t = await res.text().catch(() => "");
            throw new Error(`HTML ${res.status} for ${url}\n${t}`);
        }
        const html2 = await res2.text();
        return extractArtifactNamesFromHtml(html2);
    }
    const html = await res.text();
    return extractArtifactNamesFromHtml(html);
}

function extractArtifactNamesFromHtml(html) {
    // Look for common patterns showing artifact names in tables/links/spans.
    // This is intentionally permissive: any text near "/actions/artifacts/" or "download" buttons.
    const names = new Set();

    // 1) <a href="/.../actions/artifacts/NNN">NAME</a>
    for (const m of html.matchAll(/actions\/artifacts\/\d+[^>]*>([^<]+)</g)) {
        const name = m[1].trim();
        if (name) names.add(name);
    }
    // 2) data-name="NAME"
    for (const m of html.matchAll(/data-(?:artifact-)?name="([^"]+)"/g)) {
        const name = m[1].trim();
        if (name) names.add(name);
    }
    // 3) Loose: lines that include “artifact”, capture preceding label-ish word
    for (const m of html.matchAll(/artifact[^<>\n]*<\/[^>]*>\s*([^<>\n]{2,80})</gi)) {
        const guess = m[1].trim();
        if (guess) names.add(guess);
    }
    // Return as array of distinct names
    return Array.from(names);
}

/* ------------------------------ MAIN --------------------------------- */
(async () => {
    // Strategy A: official API (if present)
    let runs = await tryApiListRuns();

    // Strategy B: HTML scraping fallback
    if (!runs) {
        console.log("Runs API not available on this Gitea – falling back to HTML scraping.");
        runs = await listRunsFromHtml(); // {id} only
    }

    if (!runs.length) {
        console.error("No workflow runs found.");
        process.exit(1);
    }

    // Order newest first by id; API runs may include extra fields, HTML gives only id
    const candidates = runs
        .filter(r => r && r.id != null)
        .sort((a, b) => (b.id ?? 0) - (a.id ?? 0));

    console.log(`Scanning ${Math.min(candidates.length, SCAN_LIMIT)} runs for artifacts: ${REQUIRED_ARTIFACTS.join(", ")}`);

    let pickedId = null;

    for (const r of candidates.slice(0, SCAN_LIMIT)) {
        const runId = r.id;

        // Optional API-only filtering (status/branch). If runs came from HTML, we can’t pre-filter reliably,
        // so we just check artifacts presence (which is what we ultimately need).
        if (r.status && r.conclusion) {
            if (!isSuccessfulRun(r)) {
                console.log(`Run ${runId}: not successful (status=${r.status}, conclusion=${r.conclusion})`);
                continue;
            }
            if (!passesRefFilter(r)) {
                console.log(`Run ${runId}: skipped by ref filter (branch=${r.head_branch || ""}, ref=${r.ref || ""})`);
                continue;
            }
        }

        let names = [];
        try {
            // Prefer API artifacts; if 404, use HTML extraction.
            try {
                names = await apiArtifactNames(runId);
            } catch (e) {
                if (e.status === 404) {
                    names = await htmlArtifactNames(runId);
                } else {
                    throw e;
                }
            }

            // Normalize artifact names (trim & case-sensitive match as in UI/API)
            names = names.map(n => n.trim()).filter(Boolean);

            const ok = REQUIRED_ARTIFACTS.every(req => names.includes(req));
            if (ok) {
                pickedId = runId;
                break;
            }
            console.log(`Run ${runId}: lacks required artifacts (has: ${names.join(", ") || "none"})`);
        } catch (e) {
            console.log(`Run ${runId}: cannot read artifacts -> ${String(e.message).split("\n")[0]}`);
            continue;
        }
    }

    if (!pickedId) {
        console.error("No run exposes all required artifacts.");
        process.exit(1);
    }

    fs.writeFileSync(path.join(cacheDir, "run_id"), String(pickedId), "utf8");
    if (process.env.GITHUB_OUTPUT) {
        fs.appendFileSync(process.env.GITHUB_OUTPUT, `run_id=${pickedId}\n`);
    }
    console.log(`Picked run_id=${pickedId}`);
})().catch(err => {
    console.error(err.stack || err.message || String(err));
    process.exit(1);
});