Skip to content

Commit dcb0c79

Browse files
committed
Refactor and add skeleton tests
1 parent 57089cb commit dcb0c79

9 files changed

+223
-132
lines changed

babel.config.js

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module.exports = {
33
["@babel/plugin-transform-private-methods", { loose: "false" }],
44
["@babel/plugin-transform-private-property-in-object", { loose: "false" }],
55
["@babel/plugin-transform-class-properties", { loose: "false" }],
6+
"babel-plugin-transform-import-meta"
67
],
78
"presets": [
89
[

jest.config.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ module.exports = {
1818
testPathIgnorePatterns: [`node_modules`, `\\.cache`, `<rootDir>.*/public`],
1919
// Add ESM dependencies into the group in this pattern
2020
transformIgnorePatterns: [
21-
`<rootDir>/node_modules/(?!(rehype-react|url-exist|is-url-superb|ky-universal|ky|gatsby)/)`,
21+
`<rootDir>/node_modules/(?!(rehype-react|url-exist|is-url-superb|ky-universal|ky|gatsby|lmdb|msgpackr)/)`,
2222
],
2323
globals: {
2424
__PATH_PREFIX__: ``,

package-lock.json

+49-34
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
"@testing-library/user-event": "^14.5.2",
6868
"babel-jest": "^29.6.2",
6969
"babel-plugin-styled-components": "^2.1.4",
70+
"babel-plugin-transform-import-meta": "^2.3.2",
7071
"babel-preset-gatsby": "^3.14.0",
7172
"eslint": "^8.57.1",
7273
"eslint-config-react-app": "^7.0.1",

plugins/github-enricher/gatsby-node.js

+2-97
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
1-
const promiseRetry = require("promise-retry")
2-
3-
const followRedirect = require("follow-redirect-url")
4-
51
const gh = require("parse-github-url")
62
const path = require("path")
7-
const encodeUrl = require("encodeurl")
83
const { createRepository, getResolvers } = require("./repository-creator")
94

105
const { getCache } = require("gatsby/dist/utils/get-cache")
@@ -21,14 +16,13 @@ const {
2116
} = require("./sponsorFinder")
2217
const { getRawFileContents, queryGraphQl } = require("./github-helper")
2318
const yaml = require("js-yaml")
19+
const { getIssueInformationNoCache } = require("./issue-count-helper")
20+
const { normaliseUrl } = require("./url-helper")
2421

2522
const defaultOptions = {
2623
nodeType: "Extension",
2724
}
2825

29-
const RETRY_OPTIONS = { retries: 5, minTimeout: 75 * 1000, factor: 5 }
30-
31-
3226
// To avoid hitting the git rate limiter retrieving information we already know, cache what we can
3327
const DAY_IN_SECONDS = 24 * 60 * 60
3428

@@ -706,95 +700,6 @@ const getIssueInformation = async (coords, labels, scmUrl) => {
706700

707701
}
708702

709-
function normaliseUrl(issuesUrl) {
710-
return removePlainHttp(removeDoubleSlashes(issuesUrl))
711-
}
712-
713-
function removePlainHttp(url) {
714-
return url?.replace("http://github.com", "https://github.com")
715-
}
716-
717-
function removeDoubleSlashes(issuesUrl) {
718-
return issuesUrl.replace(/(?<!:)\/{2,}/, "/")
719-
}
720-
721-
const getIssueInformationNoCache = async (coords, labels, scmUrl) => {
722-
723-
// TODO we can just treat label as an array, almost
724-
const labelFilterString = labels
725-
? `, filterBy: { labels: [${labels.map(label => `"${label}"`).join()}] }`
726-
: ""
727-
728-
// Tolerate scm urls ending in .git, but don't try and turn them into issues urls without patching
729-
const topLevelIssuesUrl = (scmUrl + "/issues").replace("\.git/issues", "/issues")
730-
let issuesUrl = labels
731-
? encodeUrl(
732-
scmUrl +
733-
"/issues?q=is%3Aopen+is%3Aissue+label%3A" +
734-
labels.map(label => label.replace("/", "%2F")).join(",")
735-
)
736-
: topLevelIssuesUrl
737-
738-
// Tidy double slashes
739-
issuesUrl = normaliseUrl(issuesUrl)
740-
741-
742-
// Batching this with other queries is not needed because rate limits are done on query complexity and cost,
743-
// not the number of actual http calls; see https://docs.github.com/en/graphql/overview/resource-limitations
744-
const query = `query {
745-
repository(owner:"${coords.owner}", name:"${coords.name}") {
746-
issues(states:OPEN, ${labelFilterString}) {
747-
totalCount
748-
}
749-
}
750-
}`
751-
752-
const body = query ? await queryGraphQl(query) : undefined
753-
754-
// The parent objects may be undefined and destructuring nested undefineds is not good
755-
const issues = body?.data?.repository?.issues?.totalCount
756-
757-
issuesUrl = await maybeIssuesUrl(issues, issuesUrl)
758-
759-
return { issues, issuesUrl }
760-
}
761-
762-
const maybeIssuesUrl = async (issues, issuesUrl) => {
763-
if (issues && issues > 0) {
764-
return issuesUrl
765-
} else {
766-
// If we got an issue count we can be pretty confident our url will be ok, but otherwise, it might not be,
767-
// so check it. We don't check for every url because otherwise we start getting 429s and dropping good URLs
768-
// We have to access the url exist as a dynamic import (because CJS), await it because dynamic imports give a promise, and then destructure it to get the default
769-
// A simple property read won't work
770-
const {
771-
default: urlExist,
772-
} = await import("url-exist")
773-
774-
console.log("Validating issue url for", issuesUrl, "because issues is", issues)
775-
776-
const isValidUrl = await urlExist(issuesUrl)
777-
778-
let isOriginalUrl = isValidUrl && (!await isRedirectToPulls(issuesUrl))
779-
780-
return isOriginalUrl ? issuesUrl : undefined
781-
}
782-
}
783-
784-
const isRedirectToPulls = async (issuesUrl) => {
785-
return await promiseRetry(async (retry, number) => {
786-
// Being a valid url may not be enough, we also want to check for redirects to /pulls
787-
const urls = await followRedirect.startFollowing(issuesUrl)
788-
console.log("URL chain for", issuesUrl, "is", urls)
789-
const finalUrl = urls[urls.length - 1]
790-
if (finalUrl.status === 429) {
791-
retry(new Error("Issues URL reports 429 on attempt " + number))
792-
}
793-
794-
return (finalUrl.url.includes("/pulls"))
795-
}, RETRY_OPTIONS)
796-
}
797-
798703
// This combines the sponsor opt-in information (which we only fully have after processing all nodes) with the companies and sponsor information for individual nodes,
799704
// to get a sanitised list
800705
exports.createResolvers = ({ createResolvers }) => {

plugins/github-enricher/github-helper.js

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
const promiseRetry = require("promise-retry")
2+
const { source } = require("puppeteer-core/internal/generated/injected")
23
const RETRY_OPTIONS = { retries: 3, minTimeout: 75 * 1000, factor: 3 }
34
const PAGE_INFO_SUBQUERY = "pageInfo {\n" +
45
" hasNextPage\n" +
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import encodeUrl from "encodeurl"
2+
import { normaliseUrl } from "./url-helper"
3+
4+
const promiseRetry = require("promise-retry")
5+
6+
const followRedirect = require("follow-redirect-url")
7+
8+
const { queryGraphQl } = require("./github-helper")
9+
10+
const RETRY_OPTIONS = { retries: 5, minTimeout: 75 * 1000, factor: 5 }
11+
12+
const getIssueInformationNoCache = async (coords, labels, scmUrl) => {
13+
14+
// TODO we can just treat label as an array, almost
15+
const labelFilterString = labels
16+
? `, filterBy: { labels: [${labels.map(label => `"${label}"`).join()}] }`
17+
: ""
18+
19+
// Tolerate scm urls ending in .git, but don't try and turn them into issues urls without patching
20+
const topLevelIssuesUrl = (scmUrl + "/issues").replace("\.git/issues", "/issues")
21+
console.log("toss", topLevelIssuesUrl)
22+
let issuesUrl = labels
23+
? encodeUrl(
24+
scmUrl +
25+
"/issues?q=is%3Aopen+is%3Aissue+label%3A" +
26+
labels.map(label => label.replace("/", "%2F")).join(",")
27+
)
28+
: topLevelIssuesUrl
29+
30+
console.log("pruss", issuesUrl)
31+
32+
// Tidy double slashes
33+
issuesUrl = normaliseUrl(issuesUrl)
34+
35+
36+
// Batching this with other queries is not needed because rate limits are done on query complexity and cost,
37+
// not the number of actual http calls; see https://docs.github.com/en/graphql/overview/resource-limitations
38+
const query = `query {
39+
repository(owner:"${coords.owner}", name:"${coords.name}") {
40+
issues(states:OPEN, ${labelFilterString}) {
41+
totalCount
42+
}
43+
}
44+
}`
45+
46+
const body = query ? await queryGraphQl(query) : undefined
47+
48+
// The parent objects may be undefined and destructuring nested undefineds is not good
49+
const issues = body?.data?.repository?.issues?.totalCount
50+
51+
console.log("uss", issuesUrl)
52+
issuesUrl = await maybeIssuesUrl(issues, issuesUrl)
53+
54+
return { issues, issuesUrl }
55+
}
56+
57+
const maybeIssuesUrl = async (issues, issuesUrl) => {
58+
if (issues && issues > 0) {
59+
return issuesUrl
60+
} else {
61+
// If we got an issue count we can be pretty confident our url will be ok, but otherwise, it might not be,
62+
// so check it. We don't check for every url because otherwise we start getting 429s and dropping good URLs
63+
// We have to access the url exist as a dynamic import (because CJS), await it because dynamic imports give a promise, and then destructure it to get the default
64+
// A simple property read won't work
65+
const {
66+
default: urlExist,
67+
} = await import("url-exist")
68+
69+
console.log("Validating issue url for", issuesUrl, "because issues is", issues)
70+
71+
const isValidUrl = await urlExist(issuesUrl)
72+
73+
let isOriginalUrl = isValidUrl && (!await isRedirectToPulls(issuesUrl))
74+
75+
return isOriginalUrl ? issuesUrl : undefined
76+
}
77+
}
78+
79+
const isRedirectToPulls = async (issuesUrl) => {
80+
return await promiseRetry(async (retry, number) => {
81+
// Being a valid url may not be enough, we also want to check for redirects to /pulls
82+
const urls = await followRedirect.startFollowing(issuesUrl)
83+
const finalUrl = urls[urls.length - 1]
84+
if (finalUrl.status === 429) {
85+
retry(new Error("Issues URL reports 429 on attempt " + number))
86+
}
87+
88+
return (finalUrl.url.includes("/pulls"))
89+
}, RETRY_OPTIONS)
90+
}
91+
92+
93+
module.exports = { getIssueInformationNoCache }

0 commit comments

Comments
 (0)