diff --git a/utils/src/ast-grep/shebang.test.ts b/utils/src/ast-grep/shebang.test.ts new file mode 100644 index 00000000..030585c8 --- /dev/null +++ b/utils/src/ast-grep/shebang.test.ts @@ -0,0 +1,211 @@ +import assert from 'node:assert/strict'; +import { describe, it } from 'node:test'; +import astGrep from '@ast-grep/napi'; +import dedent from 'dedent'; +import { getShebang, replaceNodeJsArgs } from './shebang.ts'; + +describe('shebang', () => { + describe('getShebang', () => { + it('should get the shebang line', () => { + const code = dedent` + #!/usr/bin/env node + console.log("Hello, world!"); + `; + const ast = astGrep.parse(astGrep.Lang.JavaScript, code); + + const shebang = getShebang(ast); + + assert.equal(shebang?.text(), '#!/usr/bin/env node'); + }); + + it('should throw an error if multiple shebangs exist on top of the code', () => { + const code = dedent` + #!/usr/bin/env node 1 + #!/usr/bin/env node 2 + console.log("Hello, world!"); + `; + const ast = astGrep.parse(astGrep.Lang.JavaScript, code); + + assert.throws(() => getShebang(ast), { + message: 'Multiple shebang lines found', + }); + }); + + it('should return null if no shebang line', () => { + const code = dedent` + console.log("Hello, world!"); + `; + + const ast = astGrep.parse(astGrep.Lang.JavaScript, code); + + const shebang = getShebang(ast); + + assert.strictEqual(shebang, null); + }); + + it("shouldn't catch shebangs in comments", () => { + const code = dedent` + // #!/usr/bin/env node + console.log("Hello, world!"); + `; + const ast = astGrep.parse(astGrep.Lang.JavaScript, code); + + const shebang = getShebang(ast); + + assert.strictEqual(shebang, null); + }); + + it("shouldn't catch shebang in middle of code", () => { + const code = dedent` + console.log("Hello, world!"); + #!/usr/bin/env node + `; + const ast = astGrep.parse(astGrep.Lang.JavaScript, code); + + const shebang = getShebang(ast); + + assert.strictEqual(shebang, null); + }); + }); + + describe('replaceNodeJsArgs', () => { + it('should replace multiple different arguments in shebang with overlapping names', () => { + const code = dedent` + #!/usr/bin/env node --foo --foobar --bar + console.log("Hello, world!"); + `; + const ast = astGrep.parse(astGrep.Lang.JavaScript, code); + const edits = replaceNodeJsArgs(ast, { + '--foo': '--baz', + '--bar': '--qux', + }); + + assert.strictEqual(edits.length, 2); + assert.strictEqual( + edits[0].insertedText, + '#!/usr/bin/env node --baz --foobar --bar', + ); + assert.strictEqual( + edits[1].insertedText, + '#!/usr/bin/env node --baz --foobar --qux', + ); + }); + + it('should not replace arguments that are substrings of other args', () => { + const code = dedent` + #!/usr/bin/env node --foo --foo-bar --bar + console.log("Hello, world!"); + `; + const ast = astGrep.parse(astGrep.Lang.JavaScript, code); + const edits = replaceNodeJsArgs(ast, { + '--foo': '--baz', + '--bar': '--qux', + }); + + assert.strictEqual(edits.length, 2); + assert.strictEqual( + edits[0].insertedText, + '#!/usr/bin/env node --baz --foo-bar --bar', + ); + assert.strictEqual( + edits[1].insertedText, + '#!/usr/bin/env node --baz --foo-bar --qux', + ); + }); + + it('should handle shebang with multiple spaces between args', () => { + const code = dedent` + #!/usr/bin/env node --foo --bar + console.log("Hello, world!"); + `; + const ast = astGrep.parse(astGrep.Lang.JavaScript, code); + const edits = replaceNodeJsArgs(ast, { + '--foo': '--baz', + '--bar': '--qux', + }); + + assert.strictEqual(edits.length, 2); + assert.strictEqual( + edits[0].insertedText, + '#!/usr/bin/env node --baz --bar', + ); + assert.strictEqual( + edits[1].insertedText, + '#!/usr/bin/env node --baz --qux', + ); + }); + + it('should not replace if argument is at the start of the shebang', () => { + const code = dedent` + #!/usr/bin/env --foo node --bar + console.log("Hello, world!"); + `; + const ast = astGrep.parse(astGrep.Lang.JavaScript, code); + const edits = replaceNodeJsArgs(ast, { '--foo': '--baz' }); + + // Should not replace because node must be present + assert.strictEqual(edits.length, 0); + }); + + it('should replace argument with special characters', () => { + const code = dedent` + #!/usr/bin/env node --foo-bar --bar_foo + console.log("Hello, world!"); + `; + const ast = astGrep.parse(astGrep.Lang.JavaScript, code); + + /** + * replace --foo-bar to --baz-bar + * replace --bar_foo to --qux_foo + */ + const edits = replaceNodeJsArgs(ast, { + '--foo-bar': '--baz-bar', + '--bar_foo': '--qux_foo', + }); + + assert.strictEqual(edits.length, 2); + assert.strictEqual( + edits[0].insertedText, + '#!/usr/bin/env node --baz-bar --bar_foo', + ); + assert.strictEqual( + edits[1].insertedText, + '#!/usr/bin/env node --baz-bar --qux_foo', + ); + }); + + it('should not replace anything if argsToValues is empty', () => { + const code = dedent` + #!/usr/bin/env node --foo --bar + console.log("Hello, world!"); + `; + const ast = astGrep.parse(astGrep.Lang.JavaScript, code); + + const edits = replaceNodeJsArgs(ast, {}); + + assert.strictEqual(edits.length, 0); + }); + + it('should handle shebang with quoted arguments', () => { + const code = dedent` + #!/usr/bin/env node "--foo" '--bar' + console.log("Hello, world!"); + `; + const ast = astGrep.parse(astGrep.Lang.JavaScript, code); + const edits = replaceNodeJsArgs(ast, { + '"--foo"': '"--baz"', + "'--bar'": "'--qux'", + }); + + assert.strictEqual(edits.length, 2); + assert.strictEqual( + edits[0].insertedText, + '#!/usr/bin/env node "--baz" \'--bar\'', + ); + assert.strictEqual( + edits[1].insertedText, + '#!/usr/bin/env node "--baz" \'--qux\'', + ); + }); + }); +}); diff --git a/utils/src/ast-grep/shebang.ts b/utils/src/ast-grep/shebang.ts new file mode 100644 index 00000000..8af592ad --- /dev/null +++ b/utils/src/ast-grep/shebang.ts @@ -0,0 +1,87 @@ +import type { SgRoot, Edit } from '@codemod.com/jssg-types/main'; + +const REGEX_ESCAPE_PATTERN = /[.*+?^${}()|[\]\\]/g; + +/** + * Get the shebang line from the root. + * According to ECMAScript spec, shebangs (InputElementHashbangOrRegExp) are only + * valid at the start of a Script or Module. We find hash_bang_lines that appear + * at the beginning before any actual code. When multiple consecutive shebangs exist at the top, + * we return the last one as it would be the effective shebang used. + * @param root The root node to search. + * @returns The shebang line if found, otherwise null. + */ +export const getShebang = (root: SgRoot) => { + const allShebangs = root.root().findAll({ + rule: { + kind: 'hash_bang_line', + regex: '\\bnode(\\.exe)?\\b', + }, + }); + + // Find the last consecutive shebang from the start of the file + let lastValidShebang = null; + + if (allShebangs.length === 0) return null; + + const firstShebang = allShebangs[0]; + + if (firstShebang.range().start.line !== 0) return null; + + if (allShebangs.length > 1) { + throw new Error('Multiple shebang lines found'); + } + + lastValidShebang = firstShebang; + + return lastValidShebang; +}; + +/** + * Replace Node.js arguments in the shebang line. + * @param root The root node to search. + * @param argsToValues The mapping of argument names to their new values. + * @param edits The list of edits to apply. + * @returns The updated shebang line if any replacements were made, otherwise null. + */ +export const replaceNodeJsArgs = ( + root: SgRoot, + argsToValues: Record, +) => { + const shebang = getShebang(root); + + if (!shebang) return []; + + const edits: Edit[] = []; + const text = shebang.text(); + + // Find the "node" argument in the shebang + const nodeMatch = text.match(/\bnode(\.exe)?\b/); + + if (!nodeMatch) return; + + // We only touch to something after node because before it's env thing + const nodeIdx = nodeMatch.index! + nodeMatch[0].length; + const beforeNode = text.slice(0, nodeIdx); + let afterNode = text.slice(nodeIdx); + + for (const argC of Object.keys(argsToValues)) { + // Escape special regex characters in arg + const esc = argC.replace(REGEX_ESCAPE_PATTERN, '\\$&'); + const regex = new RegExp(`(\\s+)(["']?)${esc}(["']?)(?=\\s|$)`, 'g'); + + // handling quote and whitespaces + const newAfterNode = afterNode.replace(regex, (_unused, ws, q1, q2) => { + const replacement = argsToValues[argC]; + + return `${ws}${q1}${replacement}${q2}`; + }); + + if (newAfterNode !== afterNode) { + edits.push(shebang.replace(beforeNode + newAfterNode)); + afterNode = newAfterNode; + } + } + + return edits; +};