From 7b5258d5490b42a3f88cf0bac5251e7397cefa36 Mon Sep 17 00:00:00 2001 From: platane Date: Mon, 9 Jan 2023 15:51:09 +0100 Subject: [PATCH] . --- packages/usage-stats/getDependentInfo-api.ts | 53 ++++++++++++ packages/usage-stats/getDependentInfo.ts | 56 +++++++++++++ .../{dependents.ts => getDependents.ts} | 38 +++------ .../{getRunInfo.ts => getRunInfo-api-copy.ts} | 0 packages/usage-stats/httpGet.ts | 84 +++++++++++++++++++ packages/usage-stats/index.ts | 51 +++++++++++ packages/usage-stats/package.json | 5 +- 7 files changed, 260 insertions(+), 27 deletions(-) create mode 100644 packages/usage-stats/getDependentInfo-api.ts create mode 100644 packages/usage-stats/getDependentInfo.ts rename packages/usage-stats/{dependents.ts => getDependents.ts} (60%) rename packages/usage-stats/{getRunInfo.ts => getRunInfo-api-copy.ts} (100%) create mode 100644 packages/usage-stats/httpGet.ts create mode 100644 packages/usage-stats/index.ts diff --git a/packages/usage-stats/getDependentInfo-api.ts b/packages/usage-stats/getDependentInfo-api.ts new file mode 100644 index 0000000..1a7a24b --- /dev/null +++ b/packages/usage-stats/getDependentInfo-api.ts @@ -0,0 +1,53 @@ +import { Octokit } from "octokit"; +import { httpGet } from "./httpGet"; + +require("dotenv").config(); + +const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN }); + +export const getLastRunInfo = async (repo_: string) => { + const [owner, repo] = repo_.split("/"); + + try { + const { + data: { workflow_runs }, + } = await octokit.request( + "GET /repos/{owner}/{repo}/actions/runs{?actor,branch,event,status,per_page,page,created,exclude_pull_requests,check_suite_id,head_sha}", + { owner, repo } + ); + + for (const r of workflow_runs) { + const { + run_started_at: date, + head_sha, + path, + conclusion, + } = r as { + run_started_at: string; + head_sha: string; + path: string; + conclusion: "failure" | "success"; + }; + + const workflow_url = `https://raw.githubusercontent.com/${owner}/${repo}/${head_sha}/${path}`; + + const workflow_code = await httpGet(workflow_url); + + const [_, dependency] = + workflow_code.match(/uses\s*:\s*(Platane\/snk(\/svg-only)?@\w*)/) ?? []; + + const cronMatch = workflow_code.match(/cron\s*:([^\n]*)/); + + if (dependency) + return { + dependency, + success: conclusion === "success", + date, + cron: cronMatch?.[1].replace(/["|']/g, "").trim(), + workflow_code, + }; + } + } catch (err) { + console.error(err); + } +}; diff --git a/packages/usage-stats/getDependentInfo.ts b/packages/usage-stats/getDependentInfo.ts new file mode 100644 index 0000000..7737802 --- /dev/null +++ b/packages/usage-stats/getDependentInfo.ts @@ -0,0 +1,56 @@ +import { load as CheerioLoad } from "cheerio"; +import { httpGet } from "./httpGet"; + +export const getDependentInfo = async (repo: string) => { + const pageText = await httpGet(`https://github.com/${repo}/actions`).catch( + () => null + ); + + if (!pageText) return; + + const $ = CheerioLoad(pageText); + + const runs = $("#partial-actions-workflow-runs [data-url]") + .toArray() + .map((el) => { + const success = + $(el).find('[aria-label="completed successfully"]').toArray().length === + 1; + + const workflow_file_href = $(el) + .find("a") + .toArray() + .map((el) => $(el).attr("href")!) + .find((href) => href.match(/\/actions\/runs\/\d+\/workflow/))!; + + const workflow_file_url = workflow_file_href + ? new URL(workflow_file_href, "https://github.com").toString() + : null; + + const date = $(el).find("relative-time").attr("datetime"); + + return { success, workflow_file_url, date }; + }); + + for (const { workflow_file_url, success, date } of runs) { + if (!workflow_file_url) continue; + + const $ = CheerioLoad(await httpGet(workflow_file_url)); + + const workflow_code = $("table[data-hpc]").text(); + + const [_, dependency] = + workflow_code.match(/uses\s*:\s*(Platane\/snk(\/svg-only)?@\w*)/) ?? []; + + const cronMatch = workflow_code.match(/cron\s*:([^\n]*)/); + + if (dependency) + return { + dependency, + success, + date, + cron: cronMatch?.[1].replace(/["|']/g, "").trim(), + workflow_code, + }; + } +}; diff --git a/packages/usage-stats/dependents.ts b/packages/usage-stats/getDependents.ts similarity index 60% rename from packages/usage-stats/dependents.ts rename to packages/usage-stats/getDependents.ts index 99d9ac2..f530d4f 100644 --- a/packages/usage-stats/dependents.ts +++ b/packages/usage-stats/getDependents.ts @@ -1,11 +1,10 @@ -import * as fs from "fs"; -import fetch from "node-fetch"; import { load as CheerioLoad } from "cheerio"; +import { httpGet } from "./httpGet"; const getPackages = async (repo: string) => { - const pageText = await fetch( + const pageText = await httpGet( `https://github.com/${repo}/network/dependents` - ).then((res) => res.text()); + ); const $ = CheerioLoad(pageText); return $("#dependents .select-menu-list a") @@ -29,17 +28,15 @@ const getDependentByPackage = async (repo: string, packageId: string) => { | null = `https://github.com/${repo}/network/dependents?package_id=${packageId}`; while (url) { - console.log(url, repos.length); + const $ = CheerioLoad(await httpGet(url)); - await wait(1000 + Math.floor(Math.random() * 500)); + console.log(repos.length); - const $ = CheerioLoad(await fetch(url).then((res) => res.text())); - - const rs = $(`#dependents [data-hovercard-type="repository"]`) + const reposOnPage = $(`#dependents [data-hovercard-type="repository"]`) .toArray() .map((el) => $(el).attr("href")!.slice(1)); - repos.push(...rs); + repos.push(...reposOnPage); const nextButton = $(`#dependents a`) .filter((_, el) => $(el).text().trim().toLowerCase() === "next") @@ -47,16 +44,12 @@ const getDependentByPackage = async (repo: string, packageId: string) => { const href = nextButton ? nextButton.attr("href") : null; - pages.push({ url, rs, next: href }); - fs.writeFileSync( - __dirname + `/out-${packageId}.json`, - JSON.stringify(pages) - ); + pages.push({ url, reposOnPage, next: href }); url = href ? new URL(href, "https://github.com").toString() : null; } - return repos; + return { repos, pages }; }; export const getDependents = async (repo: string) => { @@ -65,15 +58,10 @@ export const getDependents = async (repo: string) => { const ps: (typeof packages[number] & { dependents: string[] })[] = []; for (const p of packages) - ps.push({ ...p, dependents: await getDependentByPackage(repo, p.id) }); + ps.push({ + ...p, + dependents: (await getDependentByPackage(repo, p.id)).repos, + }); return ps; }; - -const wait = (delay = 0) => new Promise((r) => setTimeout(r, delay)); - -(async () => { - const res = await getDependents("platane/snk"); - - fs.writeFileSync(__dirname + "/cache/out.json", JSON.stringify(res)); -})(); diff --git a/packages/usage-stats/getRunInfo.ts b/packages/usage-stats/getRunInfo-api-copy.ts similarity index 100% rename from packages/usage-stats/getRunInfo.ts rename to packages/usage-stats/getRunInfo-api-copy.ts diff --git a/packages/usage-stats/httpGet.ts b/packages/usage-stats/httpGet.ts new file mode 100644 index 0000000..062cd82 --- /dev/null +++ b/packages/usage-stats/httpGet.ts @@ -0,0 +1,84 @@ +import fetch from "node-fetch"; +import * as path from "path"; +import * as fs from "fs"; + +const CACHE_DIR = path.join(__dirname, "cache", "http"); +fs.mkdirSync(CACHE_DIR, { recursive: true }); + +const createMutex = () => { + let locked = false; + const q: any[] = []; + + const update = () => { + if (locked) return; + + if (q[0]) { + locked = true; + q.shift()(() => { + locked = false; + update(); + }); + } + }; + + const request = () => + new Promise<() => void>((resolve) => { + q.push(resolve); + update(); + }); + + return request; +}; + +const mutex = createMutex(); + +export const httpGet = async (url: string | URL): Promise => { + const cacheKey = url + .toString() + .replace(/https?:\/\//, "") + .replace(/[^\w=&\?\.]/g, "_"); + + const cacheFilename = path.join(CACHE_DIR, cacheKey); + + if (fs.existsSync(cacheFilename)) + return new Promise((resolve, reject) => + fs.readFile(cacheFilename, (err, data) => + err ? reject(err) : resolve(data.toString()) + ) + ); + + const release = await mutex(); + + try { + const res = await fetch(url); + + if (!res.ok) { + if (res.status === 429 || res.statusText === "Too Many Requests") { + const delay = +(res.headers.get("retry-after") ?? 300) * 1000; + + console.log("Too Many Requests", delay); + + await wait(delay); + + console.log("waited long enough"); + + return httpGet(url); + } + + console.error(url, res.status, res.statusText); + throw new Error("res not ok"); + } + + const text = await res.text(); + + fs.writeFileSync(cacheFilename, text); + + // await wait(Math.random() * 200 + 100); + + return text; + } finally { + release(); + } +}; + +const wait = (delay = 0) => new Promise((r) => setTimeout(r, delay)); diff --git a/packages/usage-stats/index.ts b/packages/usage-stats/index.ts new file mode 100644 index 0000000..9cb367e --- /dev/null +++ b/packages/usage-stats/index.ts @@ -0,0 +1,51 @@ +import { getDependentInfo } from "./getDependentInfo"; +import { getDependents } from "./getDependents"; +import ParkMiller from "park-miller"; + +const toChunk = (arr: T[], n = 1) => + Array.from({ length: Math.ceil(arr.length / n) }, (_, i) => + arr.slice(i * n, (i + 1) * n) + ); + +const random = new ParkMiller(10); + +const shuffle = (array: T[]) => { + for (let i = array.length - 1; i > 0; i--) { + const j = Math.floor(random.float() * (i + 1)); + const temp = array[i]; + array[i] = array[j]; + array[j] = temp; + } +}; + +(async () => { + const packages = await getDependents("Platane/snk"); + + const repos = packages.map((p) => p.dependents).flat(); + + shuffle(repos); + repos.splice(0, repos.length - 5000); + + console.log(repos); + + const infos: any[] = []; + + // for (const chunk of toChunk(repos, 10)) + // await Promise.all( + // chunk.map(async (repo) => { + // console.log( + // infos.length.toString().padStart(5, " "), + // "/", + // repos.length + // ); + + // infos.push({ repo, ...(await getDependentInfo(repo)) }); + // }) + // ); + + for (const repo of repos) { + console.log(infos.length.toString().padStart(5, " "), "/", repos.length); + + infos.push({ repo, ...(await getDependentInfo(repo)) }); + } +})(); diff --git a/packages/usage-stats/package.json b/packages/usage-stats/package.json index 3178d60..b713a3e 100644 --- a/packages/usage-stats/package.json +++ b/packages/usage-stats/package.json @@ -7,9 +7,10 @@ "cheerio": "1.0.0-rc.12", "node-fetch": "2.6.7", "octokit": "2.0.11", - "dotenv": "16.0.3" + "dotenv": "16.0.3", + "park-miller": "1.1.0" }, "scripts": { - "start": "sucrase-node stats.ts" + "start": "sucrase-node index.ts" } }