.
This commit is contained in:
53
packages/usage-stats/getDependentInfo-api.ts
Normal file
53
packages/usage-stats/getDependentInfo-api.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
import { Octokit } from "octokit";
|
||||
import { httpGet } from "./httpGet";
|
||||
|
||||
require("dotenv").config();
|
||||
|
||||
const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN });
|
||||
|
||||
export const getLastRunInfo = async (repo_: string) => {
|
||||
const [owner, repo] = repo_.split("/");
|
||||
|
||||
try {
|
||||
const {
|
||||
data: { workflow_runs },
|
||||
} = await octokit.request(
|
||||
"GET /repos/{owner}/{repo}/actions/runs{?actor,branch,event,status,per_page,page,created,exclude_pull_requests,check_suite_id,head_sha}",
|
||||
{ owner, repo }
|
||||
);
|
||||
|
||||
for (const r of workflow_runs) {
|
||||
const {
|
||||
run_started_at: date,
|
||||
head_sha,
|
||||
path,
|
||||
conclusion,
|
||||
} = r as {
|
||||
run_started_at: string;
|
||||
head_sha: string;
|
||||
path: string;
|
||||
conclusion: "failure" | "success";
|
||||
};
|
||||
|
||||
const workflow_url = `https://raw.githubusercontent.com/${owner}/${repo}/${head_sha}/${path}`;
|
||||
|
||||
const workflow_code = await httpGet(workflow_url);
|
||||
|
||||
const [_, dependency] =
|
||||
workflow_code.match(/uses\s*:\s*(Platane\/snk(\/svg-only)?@\w*)/) ?? [];
|
||||
|
||||
const cronMatch = workflow_code.match(/cron\s*:([^\n]*)/);
|
||||
|
||||
if (dependency)
|
||||
return {
|
||||
dependency,
|
||||
success: conclusion === "success",
|
||||
date,
|
||||
cron: cronMatch?.[1].replace(/["|']/g, "").trim(),
|
||||
workflow_code,
|
||||
};
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
}
|
||||
};
|
||||
56
packages/usage-stats/getDependentInfo.ts
Normal file
56
packages/usage-stats/getDependentInfo.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { load as CheerioLoad } from "cheerio";
|
||||
import { httpGet } from "./httpGet";
|
||||
|
||||
export const getDependentInfo = async (repo: string) => {
|
||||
const pageText = await httpGet(`https://github.com/${repo}/actions`).catch(
|
||||
() => null
|
||||
);
|
||||
|
||||
if (!pageText) return;
|
||||
|
||||
const $ = CheerioLoad(pageText);
|
||||
|
||||
const runs = $("#partial-actions-workflow-runs [data-url]")
|
||||
.toArray()
|
||||
.map((el) => {
|
||||
const success =
|
||||
$(el).find('[aria-label="completed successfully"]').toArray().length ===
|
||||
1;
|
||||
|
||||
const workflow_file_href = $(el)
|
||||
.find("a")
|
||||
.toArray()
|
||||
.map((el) => $(el).attr("href")!)
|
||||
.find((href) => href.match(/\/actions\/runs\/\d+\/workflow/))!;
|
||||
|
||||
const workflow_file_url = workflow_file_href
|
||||
? new URL(workflow_file_href, "https://github.com").toString()
|
||||
: null;
|
||||
|
||||
const date = $(el).find("relative-time").attr("datetime");
|
||||
|
||||
return { success, workflow_file_url, date };
|
||||
});
|
||||
|
||||
for (const { workflow_file_url, success, date } of runs) {
|
||||
if (!workflow_file_url) continue;
|
||||
|
||||
const $ = CheerioLoad(await httpGet(workflow_file_url));
|
||||
|
||||
const workflow_code = $("table[data-hpc]").text();
|
||||
|
||||
const [_, dependency] =
|
||||
workflow_code.match(/uses\s*:\s*(Platane\/snk(\/svg-only)?@\w*)/) ?? [];
|
||||
|
||||
const cronMatch = workflow_code.match(/cron\s*:([^\n]*)/);
|
||||
|
||||
if (dependency)
|
||||
return {
|
||||
dependency,
|
||||
success,
|
||||
date,
|
||||
cron: cronMatch?.[1].replace(/["|']/g, "").trim(),
|
||||
workflow_code,
|
||||
};
|
||||
}
|
||||
};
|
||||
@@ -1,11 +1,10 @@
|
||||
import * as fs from "fs";
|
||||
import fetch from "node-fetch";
|
||||
import { load as CheerioLoad } from "cheerio";
|
||||
import { httpGet } from "./httpGet";
|
||||
|
||||
const getPackages = async (repo: string) => {
|
||||
const pageText = await fetch(
|
||||
const pageText = await httpGet(
|
||||
`https://github.com/${repo}/network/dependents`
|
||||
).then((res) => res.text());
|
||||
);
|
||||
const $ = CheerioLoad(pageText);
|
||||
|
||||
return $("#dependents .select-menu-list a")
|
||||
@@ -29,17 +28,15 @@ const getDependentByPackage = async (repo: string, packageId: string) => {
|
||||
| null = `https://github.com/${repo}/network/dependents?package_id=${packageId}`;
|
||||
|
||||
while (url) {
|
||||
console.log(url, repos.length);
|
||||
const $ = CheerioLoad(await httpGet(url));
|
||||
|
||||
await wait(1000 + Math.floor(Math.random() * 500));
|
||||
console.log(repos.length);
|
||||
|
||||
const $ = CheerioLoad(await fetch(url).then((res) => res.text()));
|
||||
|
||||
const rs = $(`#dependents [data-hovercard-type="repository"]`)
|
||||
const reposOnPage = $(`#dependents [data-hovercard-type="repository"]`)
|
||||
.toArray()
|
||||
.map((el) => $(el).attr("href")!.slice(1));
|
||||
|
||||
repos.push(...rs);
|
||||
repos.push(...reposOnPage);
|
||||
|
||||
const nextButton = $(`#dependents a`)
|
||||
.filter((_, el) => $(el).text().trim().toLowerCase() === "next")
|
||||
@@ -47,16 +44,12 @@ const getDependentByPackage = async (repo: string, packageId: string) => {
|
||||
|
||||
const href = nextButton ? nextButton.attr("href") : null;
|
||||
|
||||
pages.push({ url, rs, next: href });
|
||||
fs.writeFileSync(
|
||||
__dirname + `/out-${packageId}.json`,
|
||||
JSON.stringify(pages)
|
||||
);
|
||||
pages.push({ url, reposOnPage, next: href });
|
||||
|
||||
url = href ? new URL(href, "https://github.com").toString() : null;
|
||||
}
|
||||
|
||||
return repos;
|
||||
return { repos, pages };
|
||||
};
|
||||
|
||||
export const getDependents = async (repo: string) => {
|
||||
@@ -65,15 +58,10 @@ export const getDependents = async (repo: string) => {
|
||||
const ps: (typeof packages[number] & { dependents: string[] })[] = [];
|
||||
|
||||
for (const p of packages)
|
||||
ps.push({ ...p, dependents: await getDependentByPackage(repo, p.id) });
|
||||
ps.push({
|
||||
...p,
|
||||
dependents: (await getDependentByPackage(repo, p.id)).repos,
|
||||
});
|
||||
|
||||
return ps;
|
||||
};
|
||||
|
||||
const wait = (delay = 0) => new Promise((r) => setTimeout(r, delay));
|
||||
|
||||
(async () => {
|
||||
const res = await getDependents("platane/snk");
|
||||
|
||||
fs.writeFileSync(__dirname + "/cache/out.json", JSON.stringify(res));
|
||||
})();
|
||||
84
packages/usage-stats/httpGet.ts
Normal file
84
packages/usage-stats/httpGet.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
import fetch from "node-fetch";
|
||||
import * as path from "path";
|
||||
import * as fs from "fs";
|
||||
|
||||
const CACHE_DIR = path.join(__dirname, "cache", "http");
|
||||
fs.mkdirSync(CACHE_DIR, { recursive: true });
|
||||
|
||||
const createMutex = () => {
|
||||
let locked = false;
|
||||
const q: any[] = [];
|
||||
|
||||
const update = () => {
|
||||
if (locked) return;
|
||||
|
||||
if (q[0]) {
|
||||
locked = true;
|
||||
q.shift()(() => {
|
||||
locked = false;
|
||||
update();
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const request = () =>
|
||||
new Promise<() => void>((resolve) => {
|
||||
q.push(resolve);
|
||||
update();
|
||||
});
|
||||
|
||||
return request;
|
||||
};
|
||||
|
||||
const mutex = createMutex();
|
||||
|
||||
export const httpGet = async (url: string | URL): Promise<string> => {
|
||||
const cacheKey = url
|
||||
.toString()
|
||||
.replace(/https?:\/\//, "")
|
||||
.replace(/[^\w=&\?\.]/g, "_");
|
||||
|
||||
const cacheFilename = path.join(CACHE_DIR, cacheKey);
|
||||
|
||||
if (fs.existsSync(cacheFilename))
|
||||
return new Promise((resolve, reject) =>
|
||||
fs.readFile(cacheFilename, (err, data) =>
|
||||
err ? reject(err) : resolve(data.toString())
|
||||
)
|
||||
);
|
||||
|
||||
const release = await mutex();
|
||||
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
|
||||
if (!res.ok) {
|
||||
if (res.status === 429 || res.statusText === "Too Many Requests") {
|
||||
const delay = +(res.headers.get("retry-after") ?? 300) * 1000;
|
||||
|
||||
console.log("Too Many Requests", delay);
|
||||
|
||||
await wait(delay);
|
||||
|
||||
console.log("waited long enough");
|
||||
|
||||
return httpGet(url);
|
||||
}
|
||||
|
||||
console.error(url, res.status, res.statusText);
|
||||
throw new Error("res not ok");
|
||||
}
|
||||
|
||||
const text = await res.text();
|
||||
|
||||
fs.writeFileSync(cacheFilename, text);
|
||||
|
||||
// await wait(Math.random() * 200 + 100);
|
||||
|
||||
return text;
|
||||
} finally {
|
||||
release();
|
||||
}
|
||||
};
|
||||
|
||||
const wait = (delay = 0) => new Promise((r) => setTimeout(r, delay));
|
||||
51
packages/usage-stats/index.ts
Normal file
51
packages/usage-stats/index.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
import { getDependentInfo } from "./getDependentInfo";
|
||||
import { getDependents } from "./getDependents";
|
||||
import ParkMiller from "park-miller";
|
||||
|
||||
const toChunk = <T>(arr: T[], n = 1) =>
|
||||
Array.from({ length: Math.ceil(arr.length / n) }, (_, i) =>
|
||||
arr.slice(i * n, (i + 1) * n)
|
||||
);
|
||||
|
||||
const random = new ParkMiller(10);
|
||||
|
||||
const shuffle = <T>(array: T[]) => {
|
||||
for (let i = array.length - 1; i > 0; i--) {
|
||||
const j = Math.floor(random.float() * (i + 1));
|
||||
const temp = array[i];
|
||||
array[i] = array[j];
|
||||
array[j] = temp;
|
||||
}
|
||||
};
|
||||
|
||||
(async () => {
|
||||
const packages = await getDependents("Platane/snk");
|
||||
|
||||
const repos = packages.map((p) => p.dependents).flat();
|
||||
|
||||
shuffle(repos);
|
||||
repos.splice(0, repos.length - 5000);
|
||||
|
||||
console.log(repos);
|
||||
|
||||
const infos: any[] = [];
|
||||
|
||||
// for (const chunk of toChunk(repos, 10))
|
||||
// await Promise.all(
|
||||
// chunk.map(async (repo) => {
|
||||
// console.log(
|
||||
// infos.length.toString().padStart(5, " "),
|
||||
// "/",
|
||||
// repos.length
|
||||
// );
|
||||
|
||||
// infos.push({ repo, ...(await getDependentInfo(repo)) });
|
||||
// })
|
||||
// );
|
||||
|
||||
for (const repo of repos) {
|
||||
console.log(infos.length.toString().padStart(5, " "), "/", repos.length);
|
||||
|
||||
infos.push({ repo, ...(await getDependentInfo(repo)) });
|
||||
}
|
||||
})();
|
||||
@@ -7,9 +7,10 @@
|
||||
"cheerio": "1.0.0-rc.12",
|
||||
"node-fetch": "2.6.7",
|
||||
"octokit": "2.0.11",
|
||||
"dotenv": "16.0.3"
|
||||
"dotenv": "16.0.3",
|
||||
"park-miller": "1.1.0"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "sucrase-node stats.ts"
|
||||
"start": "sucrase-node index.ts"
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user