Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 211 additions & 0 deletions utils/src/ast-grep/shebang.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
import assert from 'node:assert/strict';
import { describe, it } from 'node:test';
import astGrep from '@ast-grep/napi';
import dedent from 'dedent';
import { getShebang, replaceNodeJsArgs } from './shebang.ts';

describe('shebang', () => {
describe('getShebang', () => {
it('should get the shebang line', () => {
const code = dedent`
#!/usr/bin/env node
console.log("Hello, world!");
`;
const ast = astGrep.parse(astGrep.Lang.JavaScript, code);

const shebang = getShebang(ast);

assert.equal(shebang?.text(), '#!/usr/bin/env node');
});

it('should throw an error if multiple shebangs exist on top of the code', () => {
const code = dedent`
#!/usr/bin/env node 1
#!/usr/bin/env node 2
console.log("Hello, world!");
`;
const ast = astGrep.parse(astGrep.Lang.JavaScript, code);

assert.throws(() => getShebang(ast), {
message: 'Multiple shebang lines found',
});
});

it('should return null if no shebang line', () => {
const code = dedent`
console.log("Hello, world!");
`;

const ast = astGrep.parse(astGrep.Lang.JavaScript, code);

const shebang = getShebang(ast);

assert.strictEqual(shebang, null);
});

it("shouldn't catch shebangs in comments", () => {
const code = dedent`
// #!/usr/bin/env node
console.log("Hello, world!");
`;
const ast = astGrep.parse(astGrep.Lang.JavaScript, code);

const shebang = getShebang(ast);

assert.strictEqual(shebang, null);
});

it("shouldn't catch shebang in middle of code", () => {
const code = dedent`
console.log("Hello, world!");
#!/usr/bin/env node
`;
const ast = astGrep.parse(astGrep.Lang.JavaScript, code);

const shebang = getShebang(ast);

assert.strictEqual(shebang, null);
});
});

describe('replaceNodeJsArgs', () => {
it('should replace multiple different arguments in shebang with overlapping names', () => {
const code = dedent`
#!/usr/bin/env node --foo --foobar --bar
console.log("Hello, world!");
`;
const ast = astGrep.parse(astGrep.Lang.JavaScript, code);
const edits = replaceNodeJsArgs(ast, {
'--foo': '--baz',
'--bar': '--qux',
});

assert.strictEqual(edits.length, 2);
assert.strictEqual(
edits[0].insertedText,
'#!/usr/bin/env node --baz --foobar --bar',
);
assert.strictEqual(
edits[1].insertedText,
'#!/usr/bin/env node --baz --foobar --qux',
);
});

it('should not replace arguments that are substrings of other args', () => {
const code = dedent`
#!/usr/bin/env node --foo --foo-bar --bar
console.log("Hello, world!");
`;
const ast = astGrep.parse(astGrep.Lang.JavaScript, code);
const edits = replaceNodeJsArgs(ast, {
'--foo': '--baz',
'--bar': '--qux',
});

assert.strictEqual(edits.length, 2);
assert.strictEqual(
edits[0].insertedText,
'#!/usr/bin/env node --baz --foo-bar --bar',
);
assert.strictEqual(
edits[1].insertedText,
'#!/usr/bin/env node --baz --foo-bar --qux',
);
});

it('should handle shebang with multiple spaces between args', () => {
const code = dedent`
#!/usr/bin/env node --foo --bar
console.log("Hello, world!");
`;
const ast = astGrep.parse(astGrep.Lang.JavaScript, code);
const edits = replaceNodeJsArgs(ast, {
'--foo': '--baz',
'--bar': '--qux',
});

assert.strictEqual(edits.length, 2);
assert.strictEqual(
edits[0].insertedText,
'#!/usr/bin/env node --baz --bar',
);
assert.strictEqual(
edits[1].insertedText,
'#!/usr/bin/env node --baz --qux',
);
});

it('should not replace if argument is at the start of the shebang', () => {
const code = dedent`
#!/usr/bin/env --foo node --bar
console.log("Hello, world!");
`;
const ast = astGrep.parse(astGrep.Lang.JavaScript, code);
const edits = replaceNodeJsArgs(ast, { '--foo': '--baz' });

// Should not replace because node must be present
assert.strictEqual(edits.length, 0);
});

it('should replace argument with special characters', () => {
const code = dedent`
#!/usr/bin/env node --foo-bar --bar_foo
console.log("Hello, world!");
`;
const ast = astGrep.parse(astGrep.Lang.JavaScript, code);

/**
* replace --foo-bar to --baz-bar
* replace --bar_foo to --qux_foo
*/
const edits = replaceNodeJsArgs(ast, {
'--foo-bar': '--baz-bar',
'--bar_foo': '--qux_foo',
});

assert.strictEqual(edits.length, 2);
assert.strictEqual(
edits[0].insertedText,
'#!/usr/bin/env node --baz-bar --bar_foo',
);
assert.strictEqual(
edits[1].insertedText,
'#!/usr/bin/env node --baz-bar --qux_foo',
);
});

it('should not replace anything if argsToValues is empty', () => {
const code = dedent`
#!/usr/bin/env node --foo --bar
console.log("Hello, world!");
`;
const ast = astGrep.parse(astGrep.Lang.JavaScript, code);

const edits = replaceNodeJsArgs(ast, {});

assert.strictEqual(edits.length, 0);
});

it('should handle shebang with quoted arguments', () => {
const code = dedent`
#!/usr/bin/env node "--foo" '--bar'
console.log("Hello, world!");
`;
const ast = astGrep.parse(astGrep.Lang.JavaScript, code);
const edits = replaceNodeJsArgs(ast, {
'"--foo"': '"--baz"',
"'--bar'": "'--qux'",
});

assert.strictEqual(edits.length, 2);
assert.strictEqual(
edits[0].insertedText,
'#!/usr/bin/env node "--baz" \'--bar\'',
);
assert.strictEqual(
edits[1].insertedText,
'#!/usr/bin/env node "--baz" \'--qux\'',
);
});
});
});
87 changes: 87 additions & 0 deletions utils/src/ast-grep/shebang.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import type { SgRoot, Edit } from '@codemod.com/jssg-types/main';

const REGEX_ESCAPE_PATTERN = /[.*+?^${}()|[\]\\]/g;

/**
* Get the shebang line from the root.
* According to ECMAScript spec, shebangs (InputElementHashbangOrRegExp) are only
* valid at the start of a Script or Module. We find hash_bang_lines that appear
* at the beginning before any actual code. When multiple consecutive shebangs exist at the top,
* we return the last one as it would be the effective shebang used.
* @param root The root node to search.
* @returns The shebang line if found, otherwise null.
*/
export const getShebang = (root: SgRoot) => {
const allShebangs = root.root().findAll({
rule: {
Copy link

@HerringtonDarkholme HerringtonDarkholme Jan 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a reasonable fix here would be find the first hash_bang_line in the tree.

According to the grammar spec

There are several situations where the identification of lexical input elements is sensitive to the syntactic grammar context that is consuming the input elements. This requires multiple goal symbols for the lexical grammar. The InputElementHashbangOrRegExp goal is used at the start of a Script or Module.

kind: 'hash_bang_line',
regex: '\\bnode(\\.exe)?\\b',
},
});

// Find the last consecutive shebang from the start of the file
let lastValidShebang = null;

if (allShebangs.length === 0) return null;

const firstShebang = allShebangs[0];

if (firstShebang.range().start.line !== 0) return null;

if (allShebangs.length > 1) {
throw new Error('Multiple shebang lines found');
}

lastValidShebang = firstShebang;

return lastValidShebang;
};

/**
* Replace Node.js arguments in the shebang line.
* @param root The root node to search.
* @param argsToValues The mapping of argument names to their new values.
* @param edits The list of edits to apply.
* @returns The updated shebang line if any replacements were made, otherwise null.
*/
export const replaceNodeJsArgs = (
root: SgRoot,
argsToValues: Record<string, string>,
) => {
const shebang = getShebang(root);

if (!shebang) return [];

const edits: Edit[] = [];
const text = shebang.text();

// Find the "node" argument in the shebang
const nodeMatch = text.match(/\bnode(\.exe)?\b/);

if (!nodeMatch) return;

// We only touch to something after node because before it's env thing
const nodeIdx = nodeMatch.index! + nodeMatch[0].length;
const beforeNode = text.slice(0, nodeIdx);
let afterNode = text.slice(nodeIdx);

for (const argC of Object.keys(argsToValues)) {
// Escape special regex characters in arg
const esc = argC.replace(REGEX_ESCAPE_PATTERN, '\\$&');
const regex = new RegExp(`(\\s+)(["']?)${esc}(["']?)(?=\\s|$)`, 'g');

// handling quote and whitespaces
const newAfterNode = afterNode.replace(regex, (_unused, ws, q1, q2) => {
const replacement = argsToValues[argC];

return `${ws}${q1}${replacement}${q2}`;
});

if (newAfterNode !== afterNode) {
edits.push(shebang.replace(beforeNode + newAfterNode));
afterNode = newAfterNode;
}
}

return edits;
};
Loading