This commit is contained in:
platane
2023-01-09 15:51:09 +01:00
parent f3820e8edc
commit 7b5258d549
7 changed files with 260 additions and 27 deletions

View File

@@ -0,0 +1,53 @@
import { Octokit } from "octokit";
import { httpGet } from "./httpGet";
require("dotenv").config();
const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN });
export const getLastRunInfo = async (repo_: string) => {
const [owner, repo] = repo_.split("/");
try {
const {
data: { workflow_runs },
} = await octokit.request(
"GET /repos/{owner}/{repo}/actions/runs{?actor,branch,event,status,per_page,page,created,exclude_pull_requests,check_suite_id,head_sha}",
{ owner, repo }
);
for (const r of workflow_runs) {
const {
run_started_at: date,
head_sha,
path,
conclusion,
} = r as {
run_started_at: string;
head_sha: string;
path: string;
conclusion: "failure" | "success";
};
const workflow_url = `https://raw.githubusercontent.com/${owner}/${repo}/${head_sha}/${path}`;
const workflow_code = await httpGet(workflow_url);
const [_, dependency] =
workflow_code.match(/uses\s*:\s*(Platane\/snk(\/svg-only)?@\w*)/) ?? [];
const cronMatch = workflow_code.match(/cron\s*:([^\n]*)/);
if (dependency)
return {
dependency,
success: conclusion === "success",
date,
cron: cronMatch?.[1].replace(/["|']/g, "").trim(),
workflow_code,
};
}
} catch (err) {
console.error(err);
}
};

View File

@@ -0,0 +1,56 @@
import { load as CheerioLoad } from "cheerio";
import { httpGet } from "./httpGet";
export const getDependentInfo = async (repo: string) => {
const pageText = await httpGet(`https://github.com/${repo}/actions`).catch(
() => null
);
if (!pageText) return;
const $ = CheerioLoad(pageText);
const runs = $("#partial-actions-workflow-runs [data-url]")
.toArray()
.map((el) => {
const success =
$(el).find('[aria-label="completed successfully"]').toArray().length ===
1;
const workflow_file_href = $(el)
.find("a")
.toArray()
.map((el) => $(el).attr("href")!)
.find((href) => href.match(/\/actions\/runs\/\d+\/workflow/))!;
const workflow_file_url = workflow_file_href
? new URL(workflow_file_href, "https://github.com").toString()
: null;
const date = $(el).find("relative-time").attr("datetime");
return { success, workflow_file_url, date };
});
for (const { workflow_file_url, success, date } of runs) {
if (!workflow_file_url) continue;
const $ = CheerioLoad(await httpGet(workflow_file_url));
const workflow_code = $("table[data-hpc]").text();
const [_, dependency] =
workflow_code.match(/uses\s*:\s*(Platane\/snk(\/svg-only)?@\w*)/) ?? [];
const cronMatch = workflow_code.match(/cron\s*:([^\n]*)/);
if (dependency)
return {
dependency,
success,
date,
cron: cronMatch?.[1].replace(/["|']/g, "").trim(),
workflow_code,
};
}
};

View File

@@ -1,11 +1,10 @@
import * as fs from "fs";
import fetch from "node-fetch";
import { load as CheerioLoad } from "cheerio"; import { load as CheerioLoad } from "cheerio";
import { httpGet } from "./httpGet";
const getPackages = async (repo: string) => { const getPackages = async (repo: string) => {
const pageText = await fetch( const pageText = await httpGet(
`https://github.com/${repo}/network/dependents` `https://github.com/${repo}/network/dependents`
).then((res) => res.text()); );
const $ = CheerioLoad(pageText); const $ = CheerioLoad(pageText);
return $("#dependents .select-menu-list a") return $("#dependents .select-menu-list a")
@@ -29,17 +28,15 @@ const getDependentByPackage = async (repo: string, packageId: string) => {
| null = `https://github.com/${repo}/network/dependents?package_id=${packageId}`; | null = `https://github.com/${repo}/network/dependents?package_id=${packageId}`;
while (url) { while (url) {
console.log(url, repos.length); const $ = CheerioLoad(await httpGet(url));
await wait(1000 + Math.floor(Math.random() * 500)); console.log(repos.length);
const $ = CheerioLoad(await fetch(url).then((res) => res.text())); const reposOnPage = $(`#dependents [data-hovercard-type="repository"]`)
const rs = $(`#dependents [data-hovercard-type="repository"]`)
.toArray() .toArray()
.map((el) => $(el).attr("href")!.slice(1)); .map((el) => $(el).attr("href")!.slice(1));
repos.push(...rs); repos.push(...reposOnPage);
const nextButton = $(`#dependents a`) const nextButton = $(`#dependents a`)
.filter((_, el) => $(el).text().trim().toLowerCase() === "next") .filter((_, el) => $(el).text().trim().toLowerCase() === "next")
@@ -47,16 +44,12 @@ const getDependentByPackage = async (repo: string, packageId: string) => {
const href = nextButton ? nextButton.attr("href") : null; const href = nextButton ? nextButton.attr("href") : null;
pages.push({ url, rs, next: href }); pages.push({ url, reposOnPage, next: href });
fs.writeFileSync(
__dirname + `/out-${packageId}.json`,
JSON.stringify(pages)
);
url = href ? new URL(href, "https://github.com").toString() : null; url = href ? new URL(href, "https://github.com").toString() : null;
} }
return repos; return { repos, pages };
}; };
export const getDependents = async (repo: string) => { export const getDependents = async (repo: string) => {
@@ -65,15 +58,10 @@ export const getDependents = async (repo: string) => {
const ps: (typeof packages[number] & { dependents: string[] })[] = []; const ps: (typeof packages[number] & { dependents: string[] })[] = [];
for (const p of packages) for (const p of packages)
ps.push({ ...p, dependents: await getDependentByPackage(repo, p.id) }); ps.push({
...p,
dependents: (await getDependentByPackage(repo, p.id)).repos,
});
return ps; return ps;
}; };
const wait = (delay = 0) => new Promise((r) => setTimeout(r, delay));
(async () => {
const res = await getDependents("platane/snk");
fs.writeFileSync(__dirname + "/cache/out.json", JSON.stringify(res));
})();

View File

@@ -0,0 +1,84 @@
import fetch from "node-fetch";
import * as path from "path";
import * as fs from "fs";
const CACHE_DIR = path.join(__dirname, "cache", "http");
fs.mkdirSync(CACHE_DIR, { recursive: true });
const createMutex = () => {
let locked = false;
const q: any[] = [];
const update = () => {
if (locked) return;
if (q[0]) {
locked = true;
q.shift()(() => {
locked = false;
update();
});
}
};
const request = () =>
new Promise<() => void>((resolve) => {
q.push(resolve);
update();
});
return request;
};
const mutex = createMutex();
export const httpGet = async (url: string | URL): Promise<string> => {
const cacheKey = url
.toString()
.replace(/https?:\/\//, "")
.replace(/[^\w=&\?\.]/g, "_");
const cacheFilename = path.join(CACHE_DIR, cacheKey);
if (fs.existsSync(cacheFilename))
return new Promise((resolve, reject) =>
fs.readFile(cacheFilename, (err, data) =>
err ? reject(err) : resolve(data.toString())
)
);
const release = await mutex();
try {
const res = await fetch(url);
if (!res.ok) {
if (res.status === 429 || res.statusText === "Too Many Requests") {
const delay = +(res.headers.get("retry-after") ?? 300) * 1000;
console.log("Too Many Requests", delay);
await wait(delay);
console.log("waited long enough");
return httpGet(url);
}
console.error(url, res.status, res.statusText);
throw new Error("res not ok");
}
const text = await res.text();
fs.writeFileSync(cacheFilename, text);
// await wait(Math.random() * 200 + 100);
return text;
} finally {
release();
}
};
const wait = (delay = 0) => new Promise((r) => setTimeout(r, delay));

View File

@@ -0,0 +1,51 @@
import { getDependentInfo } from "./getDependentInfo";
import { getDependents } from "./getDependents";
import ParkMiller from "park-miller";
const toChunk = <T>(arr: T[], n = 1) =>
Array.from({ length: Math.ceil(arr.length / n) }, (_, i) =>
arr.slice(i * n, (i + 1) * n)
);
const random = new ParkMiller(10);
const shuffle = <T>(array: T[]) => {
for (let i = array.length - 1; i > 0; i--) {
const j = Math.floor(random.float() * (i + 1));
const temp = array[i];
array[i] = array[j];
array[j] = temp;
}
};
(async () => {
const packages = await getDependents("Platane/snk");
const repos = packages.map((p) => p.dependents).flat();
shuffle(repos);
repos.splice(0, repos.length - 5000);
console.log(repos);
const infos: any[] = [];
// for (const chunk of toChunk(repos, 10))
// await Promise.all(
// chunk.map(async (repo) => {
// console.log(
// infos.length.toString().padStart(5, " "),
// "/",
// repos.length
// );
// infos.push({ repo, ...(await getDependentInfo(repo)) });
// })
// );
for (const repo of repos) {
console.log(infos.length.toString().padStart(5, " "), "/", repos.length);
infos.push({ repo, ...(await getDependentInfo(repo)) });
}
})();

View File

@@ -7,9 +7,10 @@
"cheerio": "1.0.0-rc.12", "cheerio": "1.0.0-rc.12",
"node-fetch": "2.6.7", "node-fetch": "2.6.7",
"octokit": "2.0.11", "octokit": "2.0.11",
"dotenv": "16.0.3" "dotenv": "16.0.3",
"park-miller": "1.1.0"
}, },
"scripts": { "scripts": {
"start": "sucrase-node stats.ts" "start": "sucrase-node index.ts"
} }
} }