Files
anyclip-video-manager/scripts/extract-sources.ts
Varun Shah e32d475aa9 Add AnyClip integration tools and extracted source code
- Add authentication scripts with SubtleCrypto password encryption
- Add sourcemap extraction pipeline (update-urls, download-sourcemaps, extract-sources)
- Add Playwright API interception script for monetization endpoints
- Document two-step auth flow with JWT tokens and dual cookies
- Move extracted source from root to anyclip/ directory
- Add project configuration (.env.example, .gitignore, CLAUDE.md)
2026-01-21 10:36:51 +08:00

198 lines
5.6 KiB
TypeScript

#!/usr/bin/env bun
/**
* Extract source files from sourcemaps into proper directory structure
*
* Usage:
* bun scripts/extract-sources.ts [options]
*
* Options:
* --output, -o <dir> Output directory (default: anyclip)
* --input, -i <dir> Sourcemaps directory (default: sourcemaps)
* --verbose, -v Verbose output
* --no-clean Don't delete output directory first
*/
import { mkdir, rm, stat, unlink } from "fs/promises";
import { dirname, join, normalize, basename } from "path";
import { parseArgs } from "util";
// Parse CLI arguments
const { values: args } = parseArgs({
args: Bun.argv.slice(2),
options: {
output: { type: "string", short: "o", default: "anyclip" },
input: { type: "string", short: "i", default: "sourcemaps" },
verbose: { type: "boolean", short: "v", default: false },
"no-clean": { type: "boolean", default: false },
},
});
const OUTPUT_DIR = args.output!;
const INPUT_DIR = args.input!;
const VERBOSE = args.verbose!;
const NO_CLEAN = args["no-clean"]!;
interface SourceMap {
version: number;
sources: string[];
sourcesContent: (string | null)[];
names?: string[];
mappings?: string;
}
// Collect all files first, then write (to handle conflicts)
const filesToWrite = new Map<string, string>();
/**
* Sanitize a source path from a sourcemap
* Inspired by sourcemapper and shuji
*/
function sanitizePath(source: string): string | null {
let cleanPath = source
// Remove webpack:// prefix (webpack://package-name/path or webpack:///path)
.replace(/^webpack:\/\/[^/]*\//, "")
// Remove leading ./
.replace(/^\.\//, "")
// Remove query strings (?v=123, ?module, etc)
.replace(/\?[^/]*$/, "")
// Remove Windows-illegal characters
.replace(/[?%*|:"<>]/g, "")
// Replace spaces with hyphens
.replace(/ /g, "-");
// Skip empty paths
if (!cleanPath || cleanPath === "") return null;
// Skip node_modules and external dependencies
if (cleanPath.includes("node_modules") || cleanPath.startsWith("external ")) {
return null;
}
// Handle (webpack) paths - put in webpack/ directory
if (cleanPath.includes("(webpack)")) {
cleanPath = "webpack/" + cleanPath.replace(/\(webpack\)\//g, "");
}
// Normalize path: resolve ../ sequences safely
// Prepend a fake root to prevent escaping, then normalize and strip it
const withRoot = "/" + cleanPath;
const normalized = normalize(withRoot).slice(1); // Remove leading /
// Handle webpack runtime files (no extension) - add .js
if (normalized.startsWith("webpack/") && !basename(normalized).includes(".")) {
return normalized + ".js";
}
// Skip paths without file extension (likely directories)
if (!basename(normalized).includes(".")) {
return null;
}
return normalized;
}
async function extractSourcemap(mapFile: string): Promise<number> {
if (VERBOSE) console.log(`Processing ${mapFile}...`);
const fileContent = await Bun.file(mapFile).text();
let sourcemap: SourceMap;
try {
sourcemap = JSON.parse(fileContent);
} catch (e) {
if (VERBOSE) console.log(` Failed to parse: ${e}`);
return 0;
}
if (!sourcemap.sources || !sourcemap.sourcesContent) {
if (VERBOSE) console.log(` No sources found`);
return 0;
}
let collected = 0;
for (let i = 0; i < sourcemap.sources.length; i++) {
const source = sourcemap.sources[i];
const content = sourcemap.sourcesContent[i];
if (!content) continue;
const cleanPath = sanitizePath(source);
if (!cleanPath) continue;
const outputPath = join(OUTPUT_DIR, cleanPath);
// Keep the larger file if there's a conflict (more complete source)
if (!filesToWrite.has(outputPath) || content.length > filesToWrite.get(outputPath)!.length) {
filesToWrite.set(outputPath, content);
collected++;
}
}
if (VERBOSE) console.log(` Collected ${collected} files`);
return collected;
}
// Delete existing output directory (unless --no-clean)
if (!NO_CLEAN) {
console.log(`Cleaning ${OUTPUT_DIR}/...`);
await rm(OUTPUT_DIR, { recursive: true, force: true });
}
// Process all sourcemaps
const sourcemaps = await Array.fromAsync(new Bun.Glob("*.map").scan(INPUT_DIR));
console.log(`Found ${sourcemaps.length} sourcemaps in ${INPUT_DIR}/`);
for (const map of sourcemaps) {
await extractSourcemap(join(INPUT_DIR, map));
}
// Write all files
console.log(`\nWriting ${filesToWrite.size} files to ${OUTPUT_DIR}/...`);
let written = 0;
let skipped = 0;
let errors = 0;
for (const [outputPath, content] of filesToWrite) {
try {
const dir = dirname(outputPath);
// Handle file/directory conflicts
const parts = dir.split("/");
let currentPath = "";
for (const part of parts) {
currentPath = currentPath ? `${currentPath}/${part}` : part;
try {
const s = await stat(currentPath);
if (s.isFile()) {
await unlink(currentPath);
}
} catch {
// Path doesn't exist yet
}
}
await mkdir(dir, { recursive: true, mode: 0o755 });
await Bun.write(outputPath, content);
written++;
if (VERBOSE) console.log(`${outputPath}`);
} catch (e) {
errors++;
if (VERBOSE || errors <= 3) {
console.log(`${outputPath}: ${e}`);
}
}
}
if (!VERBOSE && errors > 3) {
console.log(` ... and ${errors - 3} more errors`);
}
console.log(`\n✅ Extracted ${written} files to ${OUTPUT_DIR}/`);
if (errors > 0) console.log(`⚠️ ${errors} errors`);
// Show directory summary
const dirs = new Set([...filesToWrite.keys()].map(p => p.split("/").slice(0, 2).join("/")));
console.log(`\nDirectories: ${[...dirs].sort().join(", ")}`);