Skip to content

rm-comments program to remove comments from many different languages dynamically. #172

@connerohnesorge

Description

@connerohnesorge

Basically, we need a new module in ./modules/programs/rm-comments/rm-comments.nix

{
  delib,
  pkgs,
  ...
}: let
  inherit (delib) singleEnableOption;
goProgram = pkgs.buildGoModule {
  name = "rm-comments";
  src = builtins.fetchGit {
    url = "https://github.com/connero/rm-comments";
    rev = "v0.0.1";
  };
  vendorSha256 = "sha256-0g/4+1/0+0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0";
};

rustProgram = pkgs.buildRustPackage {
  name = "rm-comments";
  src = builtins.fetchGit {
    url = "https://github.com/connero/rm-comments";
    rev = "v0.0.1";
  };
  cargoSha256 = "sha256-0g/4+1/0+0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0";
};

# etc ...



  program = pkgs.writeShellScriptBin "rm-comments" ''
  # If first argument is rust, use rust program
  if [ "$1" == "rust" ]; then
    ${rustProgram}/bin/rm-comments "$@"
    exit $?
  fi
  # If first argument is go, use go program
  if [ "$1" == "go" ]; then
    ${goProgram}/bin/rm-comments "$@"
    exit $?
  fi
  # etc ...
  '';
in
  delib.module {
    name = "programs.rm-comments";

    options = singleEnableOption false;

    nixos.ifEnabled = {myconfig, ...}: {
      environment.systemPackages = [
        program
      ];
    };

    darwin.ifEnabled = {myconfig, ...}: {
      environment.systemPackages = [
        program
      ];
    };
  }

Go program to inline using nix:

package main

import (
	"bytes"
	"errors"
	"flag"
	"fmt"
	"go/ast"
	"go/format"
	"go/parser"
	"go/token"
	"io/fs"
	"os"
	"path/filepath"
	"regexp"
	"strings"
)

var (
	rootDir          string
	writeChanges     bool
	removeDirectives bool
	skipTests        bool
	includeVendor    bool
	quiet            bool
)

func init() {
	flag.StringVar(&rootDir, "root", ".", "Root directory to scan recursively for .go files")
	flag.BoolVar(&writeChanges, "write", false, "Write changes back to files (default is dry-run)")
	flag.BoolVar(&removeDirectives, "remove-directives", false, "Also remove //go:, //go:build, // +build, and cgo preambles (#cgo/#include). Dangerous; may break builds")
	flag.BoolVar(&skipTests, "skip-tests", false, "Skip files matching *_test.go")
	flag.BoolVar(&includeVendor, "include-vendor", false, "Include vendor/ directories")
	flag.BoolVar(&quiet, "quiet", false, "Suppress per-file logs; only print errors")
}

func main() {
	flag.Parse()

	if err := run(); err != nil {
		fmt.Fprintf(os.Stderr, "error: %v\n", err)
		os.Exit(1)
	}
}

func run() error {
	info, err := os.Stat(rootDir)
	if err != nil {
		return fmt.Errorf("stat root: %w", err)
	}
	if !info.IsDir() {
		return errors.New("root must be a directory")
	}

	var changed, scanned, skipped int
	err = filepath.WalkDir(rootDir, func(path string, d fs.DirEntry, walkErr error) error {
		if walkErr != nil {
			return walkErr
		}

		// Skip vendor unless requested
		if d.IsDir() {
			if d.Name() == ".direnv" {
				return filepath.SkipDir
			}
			if !includeVendor && d.Name() == "vendor" {
				return filepath.SkipDir
			}
			return nil
		}

		if !strings.HasSuffix(d.Name(), ".go") {
			return nil
		}
		if skipTests && strings.HasSuffix(d.Name(), "_test.go") {
			skipped++
			return nil
		}

		scanned++
		modified, err := stripCommentsInFile(path, writeChanges, removeDirectives)
		if err != nil {
			return fmt.Errorf("process %s: %w", path, err)
		}
		if modified {
			changed++
			if !quiet {
				if writeChanges {
					fmt.Printf("updated: %s\n", path)
				} else {
					fmt.Printf("would update: %s\n", path)
				}
			}
		} else if !quiet {
			fmt.Printf("no change: %s\n", path)
		}
		return nil
	})
	if err != nil {
		return err
	}

	if !quiet {
		fmt.Printf("\nScanned: %d, Changed: %d, Skipped: %d, Mode: %s, Directives kept: %t\n",
			scanned, changed, skipped, ternary(writeChanges, "write", "dry-run"), !removeDirectives)
	}
	return nil
}

func stripCommentsInFile(path string, write bool, removeDirectives bool) (bool, error) {
	src, err := os.ReadFile(path)
	if err != nil {
		return false, err
	}

	fset := token.NewFileSet()
	// Parse with comments so we can optionally keep directive groups.
	file, err := parser.ParseFile(fset, path, src, parser.ParseComments)
	if err != nil {
		return false, err
	}

	// Prepare kept comment groups (if preserving directives).
	if !removeDirectives && len(file.Comments) > 0 {
		file.Comments = filterDirectiveCommentGroups(file.Comments)
	} else {
		// Remove all comments.
		file.Comments = nil
	}

	// Re-print formatted source from AST (without the comments we removed).
	var buf bytes.Buffer
	if err := format.Node(&buf, fset, file); err != nil {
		return false, fmt.Errorf("format: %w", err)
	}

	newSrc := buf.Bytes()
	if bytes.Equal(normalizeNL(src), normalizeNL(newSrc)) {
		return false, nil
	}

	if write {
		// Preserve original file mode.
		stat, _ := os.Stat(path)
		mode := fs.FileMode(0644)
		if stat != nil {
			mode = stat.Mode()
		}
		if err := os.WriteFile(path, newSrc, mode); err != nil {
			return false, err
		}
	}
	return true, nil
}

// filterDirectiveCommentGroups keeps only groups that contain build/go/cgo-significant content.
// This helps avoid breaking builds while stripping “ordinary” comments.
func filterDirectiveCommentGroups(groups []*ast.CommentGroup) []*ast.CommentGroup {
	keep := make([]*ast.CommentGroup, 0, len(groups))
	for _, g := range groups {
		text := groupText(g)

		if isSignificantDirective(text) {
			keep = append(keep, g)
		}
	}
	return keep
}

func groupText(g *ast.CommentGroup) string {
	var b strings.Builder
	for _, c := range g.List {
		b.WriteString(c.Text)
		b.WriteByte('\n')
	}
	return b.String()
}

// Directives to keep by default: go:build, +build (legacy), go:generate, go:linkname, go:norace,
// go:noinline, go:uintptrescapes, go:embed, go:wasmimport, go:wasmexport, line directives,
// and cgo preambles (#cgo, #include, #define, etc.).
var (
	reAnyGoDirective = regexp.MustCompile(`(?m)^\s*//\s*go:[A-Za-z0-9_]+`)
	reGoBuild        = regexp.MustCompile(`(?m)^\s*//\s*go:build\b`)
	rePlusBuild      = regexp.MustCompile(`(?m)^\s*//\s*\+build\b`)
	reLineDirective  = regexp.MustCompile(`(?m)^\s*//\s*line\b`)
	reCGOPreamble    = regexp.MustCompile(`(?m)^\s*#(cgo|include|define)\b`)
)

// isSignificantDirective returns true if the comment text should be preserved.
func isSignificantDirective(text string) bool {
	// Fast path: if it doesn't even contain 'go' or '#', likely not a directive.
	if !strings.Contains(text, "go") && !strings.Contains(text, "#") && !strings.Contains(text, "+build") && !strings.Contains(text, "line") {
		return false
	}
	return reGoBuild.MatchString(text) ||
		rePlusBuild.MatchString(text) ||
		reAnyGoDirective.MatchString(text) ||
		reLineDirective.MatchString(text) ||
		reCGOPreamble.MatchString(text)
}

// normalizeNL helps avoid false “changed” due to CRLF vs LF.
func normalizeNL(b []byte) []byte {
	return bytes.ReplaceAll(b, []byte("\r\n"), []byte("\n"))
}

func ternary[T any](cond bool, a, b T) T {
	if cond {
		return a
	}
	return b
}

Python file to inline using nix:

#!/usr/bin/env python3
from __future__ import annotations
import tokenize
from io import BytesIO
from pathlib import Path


def remove_comments_and_docstrings(source: str) -> str:
    """
    Remove all Python comments and docstrings from source code,
    but preserve the first-line shebang (#!...).
    """
    io_obj = BytesIO(source.encode("utf-8"))
    tokens = list(tokenize.tokenize(io_obj.readline))
    result: list[str] = []

    prev_type: int | None = None
    last_lineno: int = -1
    last_col: int = 0

    for i, tok in enumerate(tokens):
        token_type = tok.type
        token_string = tok.string
        start_line, start_col = tok.start
        end_line, end_col = tok.end

        # --- Keep shebang if it's the very first token ---
        if token_type == tokenize.COMMENT and start_line == 1 and token_string.startswith("#!"):
            result.append(token_string + "\n")
            continue

        # --- Skip all other comments ---
        if token_type == tokenize.COMMENT:
            continue

        # --- Skip docstrings (module, class, or function level) ---
        if token_type == tokenize.STRING and (
            prev_type == tokenize.INDENT or (prev_type is None and start_line == 1)
        ):
            continue

        if start_line > last_lineno:
            last_col = 0
        if start_col > last_col:
            result.append(" " * (start_col - last_col))
        result.append(token_string)

        prev_type = token_type
        last_lineno = end_line
        last_col = end_col

    return "".join(result)


def strip_file(path: Path) -> None:
    """Remove comments/docstrings from a Python file in place."""
    source = path.read_text(encoding="utf-8")
    cleaned = remove_comments_and_docstrings(source)
    path.write_text(cleaned, encoding="utf-8")


def strip_directory(directory: str | Path) -> None:
    """Recursively clean all .py files in a directory tree."""
    base = Path(directory)
    for pyfile in base.rglob("*.py"):
        print(f"Stripping {pyfile}")
        strip_file(pyfile)


if __name__ == "__main__":
    import sys

    if len(sys.argv) != 2:
        print(f"Usage: {Path(sys.argv[0]).name} <directory>")
        raise SystemExit(1)

    strip_directory(sys.argv[1])

rust:

use std::env;
use std::fs::{self, File};
use std::io::{Read, Write};
use std::path::{Path, PathBuf};

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum State {
    Normal,
    LineComment,
    BlockComment(usize), // nesting level
    String,              // "..."
    ByteString,          // b"..."
    Char,                // 'x' or escape forms
    RawString(usize),    // r###"..."###  (count is number of #)
    RawByteString(usize) // br###"..."###
}

fn is_ident_start(c: char) -> bool {
    c == '_' || c.is_ascii_alphabetic()
}

fn remove_comments(src: &str) -> String {
    let mut out = String::with_capacity(src.len());
    let bytes = src.as_bytes();
    let mut i = 0usize;
    let mut state = State::Normal;

    while i < bytes.len() {
        let c = bytes[i] as char;

        match state {
            State::Normal => {
                if c == '/' && i + 1 < bytes.len() {
                    let n = bytes[i + 1] as char;
                    if n == '/' {
                        // Start line comment: //
                        state = State::LineComment;
                        i += 2;
                        continue;
                    } else if n == '*' {
                        // Start (possibly nested) block comment: /* */
                        state = State::BlockComment(1);
                        i += 2;
                        continue;
                    }
                }

                // Raw or byte/raw strings: r#"..."#, br#"..."#
                if c == 'r' {
                    // r##?" start?
                    let mut j = i + 1;
                    let mut hash_count = 0usize;
                    while j < bytes.len() && bytes[j] as char == '#' {
                        hash_count += 1;
                        j += 1;
                    }
                    if j < bytes.len() && bytes[j] as char == '"' {
                        // It's a raw string start
                        state = State::RawString(hash_count);
                        // Copy the opener
                        out.push('r');
                        for _ in 0..hash_count { out.push('#'); }
                        out.push('"');
                        i = j + 1;
                        continue;
                    }
                }
                if c == 'b' && i + 1 < bytes.len() {
                    let n = bytes[i + 1] as char;
                    if n == '"' {
                        // Byte string b"..."
                        state = State::ByteString;
                        out.push('b');

tsx:

#!/usr/bin/env -S node --enable-source-maps
// remove-comments.ts
//
// Usage:
//   npx tsx remove-comments.ts [path]
//   # or compile with tsc and run `node dist/remove-comments.js [path]`
//
// Notes:
// - Defaults to current working directory if no path is provided
// - Recursively processes .tsx files
// - Skips node_modules and .direnv
// - In-place edit (only writes if file changes)

import fs from "node:fs/promises";
import path from "node:path";
import * as ts from "typescript";

const IGNORED = new Set(["node_modules", ".direnv"]);
const VALID_EXT = new Set([".tsx"]);

async function main(): Promise<void> {
  const inputPath = process.argv[2] ? path.resolve(process.argv[2]) : process.cwd();
  const stat = await fs.stat(inputPath);

  if (stat.isDirectory()) {
    await processDirectory(inputPath);
  } else if (stat.isFile() && VALID_EXT.has(path.extname(inputPath))) {
    await processFile(inputPath);
  } else {
    console.error(`Nothing to do: ${inputPath}`);
  }
}

async function processDirectory(dir: string): Promise<void> {
  const entries = await fs.readdir(dir, { withFileTypes: true });

  for (const entry of entries) {
    if (entry.isDirectory()) {
      if (IGNORED.has(entry.name)) continue;
      await processDirectory(path.join(dir, entry.name));
    } else if (entry.isFile()) {
      const ext = path.extname(entry.name);
      if (VALID_EXT.has(ext)) {
        await processFile(path.join(dir, entry.name));
      }
    }
  }
}

async function processFile(filePath: string): Promise<void> {
  try {
    const original = await fs.readFile(filePath, "utf8");
    const stripped = stripCommentsTsx(original, filePath);

    if (stripped !== original) {
      await fs.writeFile(filePath, stripped, "utf8");
      console.log(`Updated: ${relativeCwd(filePath)}`);
    }
  } catch (err) {
    console.error(`Failed: ${relativeCwd(filePath)}${(err as Error).message}`);
  }
}

function stripCommentsTsx(code: string, fileName: string): string {
  // 1) Use TS printer to remove JS/TS comments
  const source = ts.createSourceFile(
    fileName,
    code,
    ts.ScriptTarget.Latest,
    /*setParentNodes*/ true,
    ts.ScriptKind.TSX
  );

  const printer = ts.createPrinter({
    removeComments: true,
    newLine: ts.NewLineKind.LineFeed,
  });

  let out = printer.printFile(source);

  // 2) Remove JSX comment expressions like `{/* ... */}` entirely
  //    This avoids leaving behind empty `{}` in JSX.
  //    The [\s\S]*? makes it non-greedy across lines.
  out = out.replace(/\{\s*\/\*[\s\S]*?\*\/\s*\}/g, "");

  // 3) Trim trailing whitespace on lines that might be left after deletion
  out = out.replace(/[ \t]+$/gm, "");

  return out;
}

function relativeCwd(p: string): string {
  return path.relative(process.cwd(), p) || ".";
}

main().catch((err) => {
  console.error((err as Error).stack || (err as Error).message);
  process.exit(1);
});

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions