From 1a0e05d07335f5ce3316bf26847ee6128434d43b Mon Sep 17 00:00:00 2001 From: Waldemar Reusch Date: Mon, 16 Jan 2023 14:42:20 +0100 Subject: [PATCH] rewrite download and unzip to use nodejs native methods --- src/bin/download-builtin-keycloak-theme.ts | 6 +- .../generateKeycloakThemeResources.ts | 10 +- src/bin/keycloakify/keycloakify.ts | 4 +- src/bin/tools/downloadAndUnzip.ts | 345 ++++++++++++++---- 4 files changed, 286 insertions(+), 79 deletions(-) diff --git a/src/bin/download-builtin-keycloak-theme.ts b/src/bin/download-builtin-keycloak-theme.ts index e98eaddc..e1e1515f 100644 --- a/src/bin/download-builtin-keycloak-theme.ts +++ b/src/bin/download-builtin-keycloak-theme.ts @@ -7,11 +7,11 @@ import { promptKeycloakVersion } from "./promptKeycloakVersion"; import { getCliOptions } from "./tools/cliOptions"; import { getLogger } from "./tools/logger"; -export function downloadBuiltinKeycloakTheme(params: { keycloakVersion: string; destDirPath: string; isSilent: boolean }) { +export async function downloadBuiltinKeycloakTheme(params: { keycloakVersion: string; destDirPath: string; isSilent: boolean }) { const { keycloakVersion, destDirPath, isSilent } = params; for (const ext of ["", "-community"]) { - downloadAndUnzip({ + await downloadAndUnzip({ "destDirPath": destDirPath, "url": `https://github.com/keycloak/keycloak/archive/refs/tags/${keycloakVersion}.zip`, "pathOfDirToExtractInArchive": `keycloak-${keycloakVersion}/themes/src/main/resources${ext}/theme`, @@ -31,7 +31,7 @@ if (require.main === module) { logger.log(`Downloading builtins theme of Keycloak ${keycloakVersion} here ${destDirPath}`); - downloadBuiltinKeycloakTheme({ + await downloadBuiltinKeycloakTheme({ keycloakVersion, destDirPath, isSilent diff --git a/src/bin/keycloakify/generateKeycloakThemeResources.ts b/src/bin/keycloakify/generateKeycloakThemeResources.ts index 2f645fc2..8c948d01 100644 --- a/src/bin/keycloakify/generateKeycloakThemeResources.ts +++ b/src/bin/keycloakify/generateKeycloakThemeResources.ts @@ -5,7 +5,6 @@ import { replaceImportsFromStaticInJsCode } from "./replacers/replaceImportsFrom import { replaceImportsInCssCode } from "./replacers/replaceImportsInCssCode"; import { generateFtlFilesCodeFactory, pageIds } from "./generateFtl"; import { downloadBuiltinKeycloakTheme } from "../download-builtin-keycloak-theme"; -import * as child_process from "child_process"; import { mockTestingResourcesCommonPath, mockTestingResourcesPath, mockTestingSubDirOfPublicDirBasename } from "../mockTestingResourcesPath"; import { isInside } from "../tools/isInside"; import type { BuildOptions } from "./BuildOptions"; @@ -53,13 +52,13 @@ export namespace BuildOptionsLike { assert(); } -export function generateKeycloakThemeResources(params: { +export async function generateKeycloakThemeResources(params: { reactAppBuildDirPath: string; keycloakThemeBuildingDirPath: string; keycloakThemeEmailDirPath: string; keycloakVersion: string; buildOptions: BuildOptionsLike; -}): { doBundlesEmailTemplate: boolean } { +}): Promise<{ doBundlesEmailTemplate: boolean }> { const { reactAppBuildDirPath, keycloakThemeBuildingDirPath, keycloakThemeEmailDirPath, keycloakVersion, buildOptions } = params; const logger = getLogger({ isSilent: buildOptions.isSilent }); @@ -155,7 +154,7 @@ export function generateKeycloakThemeResources(params: { { const tmpDirPath = pathJoin(themeDirPath, "..", "tmp_xxKdLpdIdLd"); - downloadBuiltinKeycloakTheme({ + await downloadBuiltinKeycloakTheme({ keycloakVersion, "destDirPath": tmpDirPath, isSilent: buildOptions.isSilent @@ -190,8 +189,7 @@ export function generateKeycloakThemeResources(params: { ); fs.writeFileSync(pathJoin(keycloakResourcesWithinPublicDirPath, ".gitignore"), Buffer.from("*", "utf8")); - - child_process.execSync(`rm -r ${tmpDirPath}`); + fs.rmSync(tmpDirPath, { recursive: true, force: true }); } fs.writeFileSync( diff --git a/src/bin/keycloakify/keycloakify.ts b/src/bin/keycloakify/keycloakify.ts index ab893c18..235934c0 100644 --- a/src/bin/keycloakify/keycloakify.ts +++ b/src/bin/keycloakify/keycloakify.ts @@ -13,7 +13,7 @@ const reactProjectDirPath = process.cwd(); export const keycloakThemeBuildingDirPath = pathJoin(reactProjectDirPath, "build_keycloak"); export const keycloakThemeEmailDirPath = pathJoin(keycloakThemeBuildingDirPath, "..", "keycloak_email"); -export function main() { +export async function main() { const { isSilent, hasExternalAssets } = getCliOptions(process.argv.slice(2)); const logger = getLogger({ isSilent }); logger.log("🔏 Building the keycloak theme...⌚"); @@ -33,7 +33,7 @@ export function main() { "isSilent": isSilent }); - const { doBundlesEmailTemplate } = generateKeycloakThemeResources({ + const { doBundlesEmailTemplate } = await generateKeycloakThemeResources({ keycloakThemeBuildingDirPath, keycloakThemeEmailDirPath, "reactAppBuildDirPath": pathJoin(reactProjectDirPath, "build"), diff --git a/src/bin/tools/downloadAndUnzip.ts b/src/bin/tools/downloadAndUnzip.ts index 9c87fe2e..6e646b9f 100644 --- a/src/bin/tools/downloadAndUnzip.ts +++ b/src/bin/tools/downloadAndUnzip.ts @@ -1,80 +1,289 @@ -import { basename as pathBasename, join as pathJoin } from "path"; -import { execSync } from "child_process"; -import * as fs from "fs"; +import { dirname as pathDirname, basename as pathBasename, join as pathJoin } from "path"; +import { createReadStream, createWriteStream, unlinkSync } from "fs"; +import { stat, mkdir, unlink, readFile, writeFile } from "fs/promises"; import { transformCodebase } from "./transformCodebase"; -import * as crypto from "crypto"; +import { createHash } from "crypto"; +import http from "http"; +import https from "https"; +import { createInflateRaw } from "zlib"; -/** assert url ends with .zip */ -export function downloadAndUnzip(params: { +import type { Readable } from "stream"; + +function hash(s: string) { + return createHash("sha256").update(s).digest("hex"); +} + +async function maybeReadFile(path: string) { + try { + return await readFile(path, "utf-8"); + } catch (error) { + if ((error as Error & { code: string }).code === "ENOENT") return undefined; + throw error; + } +} + +async function maybeStat(path: string) { + try { + return await stat(path); + } catch (error) { + if ((error as Error & { code: string }).code === "ENOENT") return undefined; + throw error; + } +} + +/** + * Download a file from `url` to `dir`. Will try to avoid downloading existing + * files by using an `{hash(url)}.etag` file. If this file exists, we add an + * etag headear, so server can tell us if file changed and we should re-download + * or if our file is up-to-date. + * + * Warning, this method assumes that the target filename can be extracted from + * url, content-disposition headers are ignored. + * + * If the target directory does not exist, it will be created. + * + * If the target file exists and is out of date, it will be overwritten. + * If the target file exists and there is no etag file, the target file will + * be overwritten. + * + * @param url download url + * @param dir target directory + * @returns promise for the full path of the downloaded file + */ +async function download(url: string, dir: string): Promise { + await mkdir(dir, { recursive: true }); + const filename = pathBasename(url); + const filepath = pathJoin(dir, filename); + // If downloaded file exists already and has an `.etag` companion file, + // read the etag from that file. This will avoid re-downloading the file + // if it is up to date. + const exists = await maybeStat(filepath); + const etagFilepath = pathJoin(dir, "_" + hash(url).substring(0, 15) + ".etag"); + const etag = !exists ? undefined : await maybeReadFile(etagFilepath); + + return new Promise((resolve, reject) => { + // use inner method to allow following redirects + function request(url1: URL) { + const headers: Record = {}; + if (etag) headers["If-None-Match"] = etag; + const req = (url1.protocol === "https:" ? https : http).get(url1, { headers }, response => { + if (response.statusCode === 301 || response.statusCode === 302) { + // follow redirects + request(new URL(response.headers.location!!)); + } else if (response.statusCode === 304) { + // up-to-date, resolve now + resolve(filepath); + } else if (response.statusCode !== 200) { + reject(new Error(`Request to ${url1} returned status ${response.statusCode}.`)); + } else { + const fp = createWriteStream(filepath, { autoClose: true }); + fp.on("err", e => { + fp.close(); + unlinkSync(filepath); + reject(e); + }); + fp.on("finish", async () => { + // when targetfile has been written, write etag file so that + // next time around we don't need to re-download + const responseEtag = response.headers.etag; + if (responseEtag) await writeFile(etagFilepath, responseEtag, "utf-8"); + resolve(filepath); + }); + response.pipe(fp); + } + }); + } + request(new URL(url)); + }); +} + +/** + * @typedef + * @type MultiError = Error & { cause: Error[] } + */ + +/** + * Extract the archive `zipFile` into the directory `dir`. If `archiveDir` is given, + * only that directory will be extracted, stripping the given path components. + * + * If dir does not exist, it will be created. + * + * If any archive file exists, it will be overwritten. + * + * Will unzip using all available nodejs worker threads. + * + * Will try to clean up extracted files on failure. + * + * If unpacking fails, will either throw an regular error, or + * possibly an `MultiError`, which contains a `cause` field with + * a number of root cause errors. + * + * Warning this method is not optimized for continuous reading of the zip + * archive, but is a trade-off between simplicity and allowing extraction + * of a single directory from the archive. + * + * @param zipFile the file to unzip + * @param dir the target directory + * @param archiveDir if given, unpack only files from this archive directory + * @throws {MultiError} error + * @returns Promise for a list of full file paths pointing to actually extracted files + */ +async function unzip(zipFile: string, dir: string, archiveDir?: string): Promise { + await mkdir(dir, { recursive: true }); + const promises: Promise[] = []; + + // Iterate over all files in the zip, skip files which are not in archiveDir, + // if given. + for await (const record of iterateZipArchive(zipFile)) { + const { path: recordPath, createReadStream: createRecordReadStream } = record; + const filePath = pathJoin(dir, recordPath); + const parent = pathDirname(filePath); + if (archiveDir && !recordPath.startsWith(archiveDir)) continue; + promises.push( + new Promise(async (resolve, reject) => { + await mkdir(parent, { recursive: true }); + // Pull the file out of the archive, write it to the target directory + const input = createRecordReadStream(); + const output = createWriteStream(filePath); + output.on("error", e => reject(Object.assign(e, { filePath }))); + output.on("finish", () => resolve(filePath)); + input.pipe(output); + }) + ); + } + + // Wait until _all_ files are either extracted or failed + const results = await Promise.allSettled(promises); + const success = results.filter(r => r.status === "fulfilled").map(r => (r as PromiseFulfilledResult).value); + const failure = results.filter(r => r.status === "rejected").map(r => (r as PromiseRejectedResult).reason); + + // If any extraction failed, try to clean up, then throw a MultiError, + // which has a `cause` field, containing a list of root cause errors. + if (failure.length) { + await Promise.all(success.map(path => unlink(path))); + await Promise.all(failure.map(e => e && e.path && unlink(e.path as string))); + const e = new Error("Failed to extract: " + failure.map(e => e.message).join(";")); + (e as any).cause = failure; + throw e; + } + + return success; +} + +/** + * + * @param file fileto read + * @param start first byte to read + * @param end last byte to read + * @returns Promise of a buffer of read bytes + */ +async function readFileChunk(file: string, start: number, end: number): Promise { + const chunks: Buffer[] = []; + return new Promise((resolve, reject) => { + const stream = createReadStream(file, { start, end }); + stream.on("error", e => reject(e)); + stream.on("end", () => resolve(Buffer.concat(chunks))); + stream.on("data", chunk => chunks.push(chunk as Buffer)); + }); +} + +type ZipRecord = { + path: string; + createReadStream: () => Readable; + compressionMethod: "deflate" | undefined; +}; + +type ZipRecordGenerator = AsyncGenerator; + +/** + * Iterate over all records of a zipfile, and yield a ZipRecord. + * Use `record.createReadStream()` to actually read the file. + * + * Warning this method will only work with single-disk zip files. + * Warning this method may fail if the zip archive has an crazy amount + * of files and the central directory is not fully contained within the + * last 65k bytes of the zip file. + * + * @param zipFile + * @returns AsyncGenerator which will yield ZipRecords + */ +async function* iterateZipArchive(zipFile: string): ZipRecordGenerator { + // Need to know zip file size before we can do anything else + const { size } = await stat(zipFile); + const chunkSize = 65_535 + 22 + 1; // max comment size + end header size + wiggle + // Read last ~65k bytes. Zip files have an comment up to 65_535 bytes at the very end, + // before that comes the zip central directory end header. + let chunk = await readFileChunk(zipFile, size - chunkSize, size); + const unread = size - chunk.length; + let i = chunk.length - 4; + let found = false; + // Find central directory end header, reading backwards from the end + while (!found && i-- > 0) if (chunk[i] === 0x50 && chunk.readUInt32LE(i) === 0x06054b50) found = true; + if (!found) throw new Error("Not a zip file"); + // This method will fail on a multi-disk zip, so bail early. + if (chunk.readUInt16LE(i + 4) !== 0) throw new Error("Multi-disk zip not supported"); + let nFiles = chunk.readUint16LE(i + 10); + // Get the position of the central directory + const directorySize = chunk.readUint32LE(i + 12); + const directoryOffset = chunk.readUint32LE(i + 16); + if (directoryOffset === 0xffff_ffff) throw new Error("zip64 not supported"); + if (directoryOffset > size) throw new Error(`Central directory offset ${directoryOffset} is outside file`); + i = directoryOffset - unread; + // If i < 0, it means that the central directory is not contained within `chunk` + if (i < 0) { + chunk = await readFileChunk(zipFile, directoryOffset, directoryOffset + directorySize); + i = 0; + } + // Now iterate the central directory records, yield an `ZipRecord` for every entry + while (nFiles-- > 0) { + // Check for marker bytes + if (chunk.readUInt32LE(i) !== 0x02014b50) throw new Error("No central directory record at position " + (unread + i)); + const compressionMethod = ({ 8: "deflate" } as const)[chunk.readUint16LE(i + 10)]; + const compressedFileSize = chunk.readUint32LE(i + 20); + const filenameLength = chunk.readUint16LE(i + 28); + const extraLength = chunk.readUint16LE(i + 30); + const commentLength = chunk.readUint16LE(i + 32); + // Start of thea actual content byte stream is after the 'local' record header, + // which is 30 bytes long plus filename and extra field + const start = chunk.readUint32LE(i + 42) + 30 + filenameLength + extraLength; + const end = start + compressedFileSize; + const filename = chunk.slice(i + 46, i + 46 + filenameLength).toString("utf-8"); + const createRecordReadStream = () => { + const input = createReadStream(zipFile, { start, end }); + if (compressionMethod === "deflate") { + const inflate = createInflateRaw(); + input.pipe(inflate); + return inflate; + } + return input; + }; + if (end > start) yield { path: filename, createReadStream: createRecordReadStream, compressionMethod }; + // advance pointer to next central directory entry + i += 46 + filenameLength + extraLength + commentLength; + } +} + +export async function downloadAndUnzip({ + url, + destDirPath, + pathOfDirToExtractInArchive, + cacheDirPath +}: { isSilent: boolean; url: string; destDirPath: string; pathOfDirToExtractInArchive?: string; cacheDirPath: string; }) { - const { url, destDirPath, pathOfDirToExtractInArchive, cacheDirPath } = params; + const downloadHash = hash(JSON.stringify({ url, pathOfDirToExtractInArchive })).substring(0, 15); + const extractDirPath = pathJoin(cacheDirPath, `_${downloadHash}`); - const extractDirPath = pathJoin( - cacheDirPath, - `_${crypto.createHash("sha256").update(JSON.stringify({ url, pathOfDirToExtractInArchive })).digest("hex").substring(0, 15)}` - ); + const zipFilepath = await download(url, cacheDirPath); + const zipMtime = (await stat(zipFilepath)).mtimeMs; + const unzipMtime = (await maybeStat(extractDirPath))?.mtimeMs; - fs.mkdirSync(cacheDirPath, { "recursive": true }); + if (!unzipMtime || zipMtime > unzipMtime) await unzip(zipFilepath, extractDirPath, pathOfDirToExtractInArchive); - const { readIsSuccessByExtractDirPath, writeIsSuccessByExtractDirPath } = (() => { - const filePath = pathJoin(cacheDirPath, "isSuccessByExtractDirPath.json"); - - type IsSuccessByExtractDirPath = Record; - - function readIsSuccessByExtractDirPath(): IsSuccessByExtractDirPath { - if (!fs.existsSync(filePath)) { - return {}; - } - - return JSON.parse(fs.readFileSync(filePath).toString("utf8")); - } - - function writeIsSuccessByExtractDirPath(isSuccessByExtractDirPath: IsSuccessByExtractDirPath): void { - fs.writeFileSync(filePath, Buffer.from(JSON.stringify(isSuccessByExtractDirPath, null, 2), "utf8")); - } - - return { readIsSuccessByExtractDirPath, writeIsSuccessByExtractDirPath }; - })(); - - downloadAndUnzip: { - const isSuccessByExtractDirPath = readIsSuccessByExtractDirPath(); - - if (isSuccessByExtractDirPath[extractDirPath]) { - break downloadAndUnzip; - } - - writeIsSuccessByExtractDirPath({ - ...isSuccessByExtractDirPath, - [extractDirPath]: false - }); - - fs.rmSync(extractDirPath, { "recursive": true, "force": true }); - - fs.mkdirSync(extractDirPath); - - const zipFileBasename = pathBasename(url); - - execSync(`curl -L ${url} -o ${zipFileBasename} ${params.isSilent ? "-s" : ""}`, { "cwd": extractDirPath }); - - execSync(`unzip -o ${zipFileBasename}${pathOfDirToExtractInArchive === undefined ? "" : ` "${pathOfDirToExtractInArchive}/**/*"`}`, { - "cwd": extractDirPath - }); - - fs.rmSync(pathJoin(extractDirPath, zipFileBasename), { "recursive": true, "force": true }); - - writeIsSuccessByExtractDirPath({ - ...isSuccessByExtractDirPath, - [extractDirPath]: true - }); - } - - transformCodebase({ - "srcDirPath": pathOfDirToExtractInArchive === undefined ? extractDirPath : pathJoin(extractDirPath, pathOfDirToExtractInArchive), - destDirPath - }); + const srcDirPath = pathOfDirToExtractInArchive === undefined ? extractDirPath : pathJoin(extractDirPath, pathOfDirToExtractInArchive); + transformCodebase({ srcDirPath, destDirPath }); }