.
This commit is contained in:
53
packages/usage-stats/getDependentInfo-api.ts
Normal file
53
packages/usage-stats/getDependentInfo-api.ts
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
import { Octokit } from "octokit";
|
||||||
|
import { httpGet } from "./httpGet";
|
||||||
|
|
||||||
|
require("dotenv").config();
|
||||||
|
|
||||||
|
const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN });
|
||||||
|
|
||||||
|
export const getLastRunInfo = async (repo_: string) => {
|
||||||
|
const [owner, repo] = repo_.split("/");
|
||||||
|
|
||||||
|
try {
|
||||||
|
const {
|
||||||
|
data: { workflow_runs },
|
||||||
|
} = await octokit.request(
|
||||||
|
"GET /repos/{owner}/{repo}/actions/runs{?actor,branch,event,status,per_page,page,created,exclude_pull_requests,check_suite_id,head_sha}",
|
||||||
|
{ owner, repo }
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const r of workflow_runs) {
|
||||||
|
const {
|
||||||
|
run_started_at: date,
|
||||||
|
head_sha,
|
||||||
|
path,
|
||||||
|
conclusion,
|
||||||
|
} = r as {
|
||||||
|
run_started_at: string;
|
||||||
|
head_sha: string;
|
||||||
|
path: string;
|
||||||
|
conclusion: "failure" | "success";
|
||||||
|
};
|
||||||
|
|
||||||
|
const workflow_url = `https://raw.githubusercontent.com/${owner}/${repo}/${head_sha}/${path}`;
|
||||||
|
|
||||||
|
const workflow_code = await httpGet(workflow_url);
|
||||||
|
|
||||||
|
const [_, dependency] =
|
||||||
|
workflow_code.match(/uses\s*:\s*(Platane\/snk(\/svg-only)?@\w*)/) ?? [];
|
||||||
|
|
||||||
|
const cronMatch = workflow_code.match(/cron\s*:([^\n]*)/);
|
||||||
|
|
||||||
|
if (dependency)
|
||||||
|
return {
|
||||||
|
dependency,
|
||||||
|
success: conclusion === "success",
|
||||||
|
date,
|
||||||
|
cron: cronMatch?.[1].replace(/["|']/g, "").trim(),
|
||||||
|
workflow_code,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
}
|
||||||
|
};
|
||||||
56
packages/usage-stats/getDependentInfo.ts
Normal file
56
packages/usage-stats/getDependentInfo.ts
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
import { load as CheerioLoad } from "cheerio";
|
||||||
|
import { httpGet } from "./httpGet";
|
||||||
|
|
||||||
|
export const getDependentInfo = async (repo: string) => {
|
||||||
|
const pageText = await httpGet(`https://github.com/${repo}/actions`).catch(
|
||||||
|
() => null
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!pageText) return;
|
||||||
|
|
||||||
|
const $ = CheerioLoad(pageText);
|
||||||
|
|
||||||
|
const runs = $("#partial-actions-workflow-runs [data-url]")
|
||||||
|
.toArray()
|
||||||
|
.map((el) => {
|
||||||
|
const success =
|
||||||
|
$(el).find('[aria-label="completed successfully"]').toArray().length ===
|
||||||
|
1;
|
||||||
|
|
||||||
|
const workflow_file_href = $(el)
|
||||||
|
.find("a")
|
||||||
|
.toArray()
|
||||||
|
.map((el) => $(el).attr("href")!)
|
||||||
|
.find((href) => href.match(/\/actions\/runs\/\d+\/workflow/))!;
|
||||||
|
|
||||||
|
const workflow_file_url = workflow_file_href
|
||||||
|
? new URL(workflow_file_href, "https://github.com").toString()
|
||||||
|
: null;
|
||||||
|
|
||||||
|
const date = $(el).find("relative-time").attr("datetime");
|
||||||
|
|
||||||
|
return { success, workflow_file_url, date };
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const { workflow_file_url, success, date } of runs) {
|
||||||
|
if (!workflow_file_url) continue;
|
||||||
|
|
||||||
|
const $ = CheerioLoad(await httpGet(workflow_file_url));
|
||||||
|
|
||||||
|
const workflow_code = $("table[data-hpc]").text();
|
||||||
|
|
||||||
|
const [_, dependency] =
|
||||||
|
workflow_code.match(/uses\s*:\s*(Platane\/snk(\/svg-only)?@\w*)/) ?? [];
|
||||||
|
|
||||||
|
const cronMatch = workflow_code.match(/cron\s*:([^\n]*)/);
|
||||||
|
|
||||||
|
if (dependency)
|
||||||
|
return {
|
||||||
|
dependency,
|
||||||
|
success,
|
||||||
|
date,
|
||||||
|
cron: cronMatch?.[1].replace(/["|']/g, "").trim(),
|
||||||
|
workflow_code,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
@@ -1,11 +1,10 @@
|
|||||||
import * as fs from "fs";
|
|
||||||
import fetch from "node-fetch";
|
|
||||||
import { load as CheerioLoad } from "cheerio";
|
import { load as CheerioLoad } from "cheerio";
|
||||||
|
import { httpGet } from "./httpGet";
|
||||||
|
|
||||||
const getPackages = async (repo: string) => {
|
const getPackages = async (repo: string) => {
|
||||||
const pageText = await fetch(
|
const pageText = await httpGet(
|
||||||
`https://github.com/${repo}/network/dependents`
|
`https://github.com/${repo}/network/dependents`
|
||||||
).then((res) => res.text());
|
);
|
||||||
const $ = CheerioLoad(pageText);
|
const $ = CheerioLoad(pageText);
|
||||||
|
|
||||||
return $("#dependents .select-menu-list a")
|
return $("#dependents .select-menu-list a")
|
||||||
@@ -29,17 +28,15 @@ const getDependentByPackage = async (repo: string, packageId: string) => {
|
|||||||
| null = `https://github.com/${repo}/network/dependents?package_id=${packageId}`;
|
| null = `https://github.com/${repo}/network/dependents?package_id=${packageId}`;
|
||||||
|
|
||||||
while (url) {
|
while (url) {
|
||||||
console.log(url, repos.length);
|
const $ = CheerioLoad(await httpGet(url));
|
||||||
|
|
||||||
await wait(1000 + Math.floor(Math.random() * 500));
|
console.log(repos.length);
|
||||||
|
|
||||||
const $ = CheerioLoad(await fetch(url).then((res) => res.text()));
|
const reposOnPage = $(`#dependents [data-hovercard-type="repository"]`)
|
||||||
|
|
||||||
const rs = $(`#dependents [data-hovercard-type="repository"]`)
|
|
||||||
.toArray()
|
.toArray()
|
||||||
.map((el) => $(el).attr("href")!.slice(1));
|
.map((el) => $(el).attr("href")!.slice(1));
|
||||||
|
|
||||||
repos.push(...rs);
|
repos.push(...reposOnPage);
|
||||||
|
|
||||||
const nextButton = $(`#dependents a`)
|
const nextButton = $(`#dependents a`)
|
||||||
.filter((_, el) => $(el).text().trim().toLowerCase() === "next")
|
.filter((_, el) => $(el).text().trim().toLowerCase() === "next")
|
||||||
@@ -47,16 +44,12 @@ const getDependentByPackage = async (repo: string, packageId: string) => {
|
|||||||
|
|
||||||
const href = nextButton ? nextButton.attr("href") : null;
|
const href = nextButton ? nextButton.attr("href") : null;
|
||||||
|
|
||||||
pages.push({ url, rs, next: href });
|
pages.push({ url, reposOnPage, next: href });
|
||||||
fs.writeFileSync(
|
|
||||||
__dirname + `/out-${packageId}.json`,
|
|
||||||
JSON.stringify(pages)
|
|
||||||
);
|
|
||||||
|
|
||||||
url = href ? new URL(href, "https://github.com").toString() : null;
|
url = href ? new URL(href, "https://github.com").toString() : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return repos;
|
return { repos, pages };
|
||||||
};
|
};
|
||||||
|
|
||||||
export const getDependents = async (repo: string) => {
|
export const getDependents = async (repo: string) => {
|
||||||
@@ -65,15 +58,10 @@ export const getDependents = async (repo: string) => {
|
|||||||
const ps: (typeof packages[number] & { dependents: string[] })[] = [];
|
const ps: (typeof packages[number] & { dependents: string[] })[] = [];
|
||||||
|
|
||||||
for (const p of packages)
|
for (const p of packages)
|
||||||
ps.push({ ...p, dependents: await getDependentByPackage(repo, p.id) });
|
ps.push({
|
||||||
|
...p,
|
||||||
|
dependents: (await getDependentByPackage(repo, p.id)).repos,
|
||||||
|
});
|
||||||
|
|
||||||
return ps;
|
return ps;
|
||||||
};
|
};
|
||||||
|
|
||||||
const wait = (delay = 0) => new Promise((r) => setTimeout(r, delay));
|
|
||||||
|
|
||||||
(async () => {
|
|
||||||
const res = await getDependents("platane/snk");
|
|
||||||
|
|
||||||
fs.writeFileSync(__dirname + "/cache/out.json", JSON.stringify(res));
|
|
||||||
})();
|
|
||||||
84
packages/usage-stats/httpGet.ts
Normal file
84
packages/usage-stats/httpGet.ts
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
import fetch from "node-fetch";
|
||||||
|
import * as path from "path";
|
||||||
|
import * as fs from "fs";
|
||||||
|
|
||||||
|
const CACHE_DIR = path.join(__dirname, "cache", "http");
|
||||||
|
fs.mkdirSync(CACHE_DIR, { recursive: true });
|
||||||
|
|
||||||
|
const createMutex = () => {
|
||||||
|
let locked = false;
|
||||||
|
const q: any[] = [];
|
||||||
|
|
||||||
|
const update = () => {
|
||||||
|
if (locked) return;
|
||||||
|
|
||||||
|
if (q[0]) {
|
||||||
|
locked = true;
|
||||||
|
q.shift()(() => {
|
||||||
|
locked = false;
|
||||||
|
update();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const request = () =>
|
||||||
|
new Promise<() => void>((resolve) => {
|
||||||
|
q.push(resolve);
|
||||||
|
update();
|
||||||
|
});
|
||||||
|
|
||||||
|
return request;
|
||||||
|
};
|
||||||
|
|
||||||
|
const mutex = createMutex();
|
||||||
|
|
||||||
|
export const httpGet = async (url: string | URL): Promise<string> => {
|
||||||
|
const cacheKey = url
|
||||||
|
.toString()
|
||||||
|
.replace(/https?:\/\//, "")
|
||||||
|
.replace(/[^\w=&\?\.]/g, "_");
|
||||||
|
|
||||||
|
const cacheFilename = path.join(CACHE_DIR, cacheKey);
|
||||||
|
|
||||||
|
if (fs.existsSync(cacheFilename))
|
||||||
|
return new Promise((resolve, reject) =>
|
||||||
|
fs.readFile(cacheFilename, (err, data) =>
|
||||||
|
err ? reject(err) : resolve(data.toString())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
const release = await mutex();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch(url);
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
if (res.status === 429 || res.statusText === "Too Many Requests") {
|
||||||
|
const delay = +(res.headers.get("retry-after") ?? 300) * 1000;
|
||||||
|
|
||||||
|
console.log("Too Many Requests", delay);
|
||||||
|
|
||||||
|
await wait(delay);
|
||||||
|
|
||||||
|
console.log("waited long enough");
|
||||||
|
|
||||||
|
return httpGet(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.error(url, res.status, res.statusText);
|
||||||
|
throw new Error("res not ok");
|
||||||
|
}
|
||||||
|
|
||||||
|
const text = await res.text();
|
||||||
|
|
||||||
|
fs.writeFileSync(cacheFilename, text);
|
||||||
|
|
||||||
|
// await wait(Math.random() * 200 + 100);
|
||||||
|
|
||||||
|
return text;
|
||||||
|
} finally {
|
||||||
|
release();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const wait = (delay = 0) => new Promise((r) => setTimeout(r, delay));
|
||||||
51
packages/usage-stats/index.ts
Normal file
51
packages/usage-stats/index.ts
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
import { getDependentInfo } from "./getDependentInfo";
|
||||||
|
import { getDependents } from "./getDependents";
|
||||||
|
import ParkMiller from "park-miller";
|
||||||
|
|
||||||
|
const toChunk = <T>(arr: T[], n = 1) =>
|
||||||
|
Array.from({ length: Math.ceil(arr.length / n) }, (_, i) =>
|
||||||
|
arr.slice(i * n, (i + 1) * n)
|
||||||
|
);
|
||||||
|
|
||||||
|
const random = new ParkMiller(10);
|
||||||
|
|
||||||
|
const shuffle = <T>(array: T[]) => {
|
||||||
|
for (let i = array.length - 1; i > 0; i--) {
|
||||||
|
const j = Math.floor(random.float() * (i + 1));
|
||||||
|
const temp = array[i];
|
||||||
|
array[i] = array[j];
|
||||||
|
array[j] = temp;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
const packages = await getDependents("Platane/snk");
|
||||||
|
|
||||||
|
const repos = packages.map((p) => p.dependents).flat();
|
||||||
|
|
||||||
|
shuffle(repos);
|
||||||
|
repos.splice(0, repos.length - 5000);
|
||||||
|
|
||||||
|
console.log(repos);
|
||||||
|
|
||||||
|
const infos: any[] = [];
|
||||||
|
|
||||||
|
// for (const chunk of toChunk(repos, 10))
|
||||||
|
// await Promise.all(
|
||||||
|
// chunk.map(async (repo) => {
|
||||||
|
// console.log(
|
||||||
|
// infos.length.toString().padStart(5, " "),
|
||||||
|
// "/",
|
||||||
|
// repos.length
|
||||||
|
// );
|
||||||
|
|
||||||
|
// infos.push({ repo, ...(await getDependentInfo(repo)) });
|
||||||
|
// })
|
||||||
|
// );
|
||||||
|
|
||||||
|
for (const repo of repos) {
|
||||||
|
console.log(infos.length.toString().padStart(5, " "), "/", repos.length);
|
||||||
|
|
||||||
|
infos.push({ repo, ...(await getDependentInfo(repo)) });
|
||||||
|
}
|
||||||
|
})();
|
||||||
@@ -7,9 +7,10 @@
|
|||||||
"cheerio": "1.0.0-rc.12",
|
"cheerio": "1.0.0-rc.12",
|
||||||
"node-fetch": "2.6.7",
|
"node-fetch": "2.6.7",
|
||||||
"octokit": "2.0.11",
|
"octokit": "2.0.11",
|
||||||
"dotenv": "16.0.3"
|
"dotenv": "16.0.3",
|
||||||
|
"park-miller": "1.1.0"
|
||||||
},
|
},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"start": "sucrase-node stats.ts"
|
"start": "sucrase-node index.ts"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user