From 10cb5b3e54f41bf4d8d323226fb7895cfc98ab7d Mon Sep 17 00:00:00 2001 From: J23 Date: Sat, 23 Aug 2025 13:52:23 +0800 Subject: [PATCH] Introduced sha256 support for git-sizer --- git/git.go | 27 ++++++++++++-- git/obj_iter.go | 6 ++-- git/obj_resolver.go | 4 +-- git/oid.go | 63 +++++++++++++++++++++++++++------ git/tree.go | 19 +++++----- git_sizer_test.go | 37 +++++++++++++++++++ internal/testutils/repoutils.go | 2 +- sizes/graph.go | 2 +- sizes/output.go | 4 +-- 9 files changed, 135 insertions(+), 29 deletions(-) diff --git a/git/git.go b/git/git.go index 096ce81..ef3cbc6 100644 --- a/git/git.go +++ b/git/git.go @@ -24,6 +24,8 @@ type Repository struct { // gitBin is the path of the `git` executable that should be used // when running commands in this repository. gitBin string + // hashAgo is repository hash algo + hashAlgo HashAlgo } // smartJoin returns `relPath` if it is an absolute path. If not, it @@ -49,9 +51,18 @@ func NewRepositoryFromGitDir(gitDir string) (*Repository, error) { ) } + hashAlgo := HashSHA1 + cmd := exec.Command(gitBin, "--git-dir", gitDir, "rev-parse", "--show-object-format") //nolint:gosec + if out, err := cmd.Output(); err == nil { + if string(bytes.TrimSpace(out)) == "sha256" { + hashAlgo = HashSHA256 + } + } + repo := Repository{ - gitDir: gitDir, - gitBin: gitBin, + gitDir: gitDir, + gitBin: gitBin, + hashAlgo: hashAlgo, } full, err := repo.IsFull() @@ -170,3 +181,15 @@ func (repo *Repository) GitPath(relPath string) (string, error) { // current directory, we can use it as-is: return string(bytes.TrimSpace(out)), nil } + +func (repo *Repository) HashAlgo() HashAlgo { + return repo.hashAlgo +} + +func (repo *Repository) HashSize() int { + return repo.hashAlgo.HashSize() +} + +func (repo *Repository) NullOID() OID { + return repo.hashAlgo.NullOID() +} diff --git a/git/obj_iter.go b/git/obj_iter.go index cecdc2a..c367f11 100644 --- a/git/obj_iter.go +++ b/git/obj_iter.go @@ -30,7 +30,7 @@ func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error) errCh: make(chan error), headerCh: make(chan BatchHeader), } - + hashHexSize := repo.HashSize() * 2 iter.p.Add( // Read OIDs from `iter.oidCh` and write them to `git // rev-list`: @@ -68,10 +68,10 @@ func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error) pipe.LinewiseFunction( "copy-oids", func(_ context.Context, _ pipe.Env, line []byte, stdout *bufio.Writer) error { - if len(line) < 40 { + if len(line) < hashHexSize { return fmt.Errorf("line too short: '%s'", line) } - if _, err := stdout.Write(line[:40]); err != nil { + if _, err := stdout.Write(line[:hashHexSize]); err != nil { return fmt.Errorf("writing OID to 'git cat-file': %w", err) } if err := stdout.WriteByte('\n'); err != nil { diff --git a/git/obj_resolver.go b/git/obj_resolver.go index 418e293..fbeb246 100644 --- a/git/obj_resolver.go +++ b/git/obj_resolver.go @@ -9,12 +9,12 @@ func (repo *Repository) ResolveObject(name string) (OID, error) { cmd := repo.GitCommand("rev-parse", "--verify", "--end-of-options", name) output, err := cmd.Output() if err != nil { - return NullOID, fmt.Errorf("resolving object %q: %w", name, err) + return repo.NullOID(), fmt.Errorf("resolving object %q: %w", name, err) } oidString := string(bytes.TrimSpace(output)) oid, err := NewOID(oidString) if err != nil { - return NullOID, fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) + return repo.NullOID(), fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) } return oid, nil } diff --git a/git/oid.go b/git/oid.go index 2aefbcb..2a2bdfc 100644 --- a/git/oid.go +++ b/git/oid.go @@ -1,32 +1,75 @@ package git import ( + "bytes" + "crypto/sha1" //nolint:gosec + "crypto/sha256" "encoding/hex" "errors" ) +const ( + HashSizeSHA256 = sha256.Size + HashSizeSHA1 = sha1.Size + HashSizeMax = HashSizeSHA256 +) + +type HashAlgo int + +const ( + HashUnknown HashAlgo = iota + HashSHA1 + HashSHA256 +) + // OID represents the SHA-1 object ID of a Git object, in binary // format. type OID struct { - v [20]byte + v [HashSizeMax]byte + hashSize int } -// NullOID is the null object ID; i.e., all zeros. -var NullOID OID +func (h HashAlgo) NullOID() OID { + switch h { + case HashSHA1: + return OID{hashSize: HashSizeSHA1} + case HashSHA256: + return OID{hashSize: HashSizeSHA256} + } + return OID{} +} + +func (h HashAlgo) HashSize() int { + switch h { + case HashSHA1: + return HashSizeSHA1 + case HashSHA256: + return HashSizeSHA256 + } + return 0 +} + +// defaultNullOID is the null object ID; i.e., all zeros. +var defaultNullOID OID + +func IsNullOID(o OID) bool { + return bytes.Equal(o.v[:], defaultNullOID.v[:]) +} // OIDFromBytes converts a byte slice containing an object ID in // binary format into an `OID`. func OIDFromBytes(oidBytes []byte) (OID, error) { var oid OID - if len(oidBytes) != len(oid.v) { + oidSize := len(oidBytes) + if oidSize != HashSizeSHA1 && oidSize != HashSizeSHA256 { return OID{}, errors.New("bytes oid has the wrong length") } - copy(oid.v[0:20], oidBytes) + oid.hashSize = oidSize + copy(oid.v[0:oidSize], oidBytes) return oid, nil } -// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40}`) -// into an `OID`. +// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40,64}`) into an `OID`. func NewOID(s string) (OID, error) { oidBytes, err := hex.DecodeString(s) if err != nil { @@ -37,18 +80,18 @@ func NewOID(s string) (OID, error) { // String formats `oid` as a string in hex format. func (oid OID) String() string { - return hex.EncodeToString(oid.v[:]) + return hex.EncodeToString(oid.v[:oid.hashSize]) } // Bytes returns a byte slice view of `oid`, in binary format. func (oid OID) Bytes() []byte { - return oid.v[:] + return oid.v[:oid.hashSize] } // MarshalJSON expresses `oid` as a JSON string with its enclosing // quotation marks. func (oid OID) MarshalJSON() ([]byte, error) { - src := oid.v[:] + src := oid.v[:oid.hashSize] dst := make([]byte, hex.EncodedLen(len(src))+2) dst[0] = '"' dst[len(dst)-1] = '"' diff --git a/git/tree.go b/git/tree.go index c31fa78..18cb3ee 100644 --- a/git/tree.go +++ b/git/tree.go @@ -10,13 +10,14 @@ import ( // Tree represents a Git tree object. type Tree struct { - data string + data string + hashSize int } // ParseTree parses the tree object whose contents are contained in // `data`. `oid` is currently unused. func ParseTree(oid OID, data []byte) (*Tree, error) { - return &Tree{string(data)}, nil + return &Tree{string(data), oid.hashSize}, nil } // Size returns the size of the tree object. @@ -36,13 +37,15 @@ type TreeEntry struct { // TreeIter is an iterator over the entries in a Git tree object. type TreeIter struct { // The as-yet-unread part of the tree's data. - data string + data string + hashSize int } // Iter returns an iterator over the entries in `tree`. func (tree *Tree) Iter() *TreeIter { return &TreeIter{ - data: tree.data, + data: tree.data, + hashSize: tree.hashSize, } } @@ -74,12 +77,12 @@ func (iter *TreeIter) NextEntry() (TreeEntry, bool, error) { entry.Name = iter.data[:nulAt] iter.data = iter.data[nulAt+1:] - if len(iter.data) < 20 { + if len(iter.data) < iter.hashSize { return TreeEntry{}, false, errors.New("tree entry ends unexpectedly") } - - copy(entry.OID.v[0:20], iter.data[0:20]) - iter.data = iter.data[20:] + entry.OID.hashSize = iter.hashSize + copy(entry.OID.v[0:iter.hashSize], iter.data[0:iter.hashSize]) + iter.data = iter.data[iter.hashSize:] return entry, true, nil } diff --git a/git_sizer_test.go b/git_sizer_test.go index 8a7a2d2..c74b459 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -849,3 +849,40 @@ func TestSubmodule(t *testing.T) { assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") assert.Equal(t, counts.Count32(3), h.MaxExpandedBlobCount, "max expanded blob count") } + +func TestSHA256(t *testing.T) { + t.Parallel() + + ctx := context.Background() + + t.Helper() + + path, err := os.MkdirTemp("", "sha256") + require.NoError(t, err) + + testRepo := testutils.TestRepo{Path: path} + defer testRepo.Remove(t) + + // Don't use `GitCommand()` because the directory might not + // exist yet: + cmd := exec.Command("git", "init", "--object-format", "sha256", testRepo.Path) + cmd.Env = testutils.CleanGitEnv() + err = cmd.Run() + require.NoError(t, err) + + timestamp := time.Unix(1112911993, 0) + + testRepo.AddFile(t, "hello.txt", "Hello, world!\n") + cmd = testRepo.GitCommand(t, "commit", "-m", "initial") + testutils.AddAuthorInfo(cmd, ×tamp) + require.NoError(t, cmd.Run(), "creating initial commit") + + cmd = testRepo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") + testutils.AddAuthorInfo(cmd, ×tamp) + require.NoError(t, cmd.Run(), "creating commit") + + repo := testRepo.Repository(t) + + _, err = sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) +} diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 48a8759..e14e487 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -165,7 +165,7 @@ func (repo *TestRepo) UpdateRef(t *testing.T, refname string, oid git.OID) { var cmd *exec.Cmd - if oid == git.NullOID { + if git.IsNullOID(oid) { cmd = repo.GitCommand(t, "update-ref", "-d", refname) } else { cmd = repo.GitCommand(t, "update-ref", refname, oid.String()) diff --git a/sizes/graph.go b/sizes/graph.go index 0fb1c8a..2101a00 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -134,7 +134,7 @@ func ScanRepositoryUsingGraph( case "tree": trees = append(trees, ObjectHeader{obj.OID, obj.ObjectSize}) case "commit": - commits = append(commits, CommitHeader{ObjectHeader{obj.OID, obj.ObjectSize}, git.NullOID}) + commits = append(commits, CommitHeader{ObjectHeader{obj.OID, obj.ObjectSize}, repo.NullOID()}) case "tag": tags = append(tags, ObjectHeader{obj.OID, obj.ObjectSize}) default: diff --git a/sizes/output.go b/sizes/output.go index 933cc05..037f905 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -155,7 +155,7 @@ func (i *item) Emit(t *table) { } func (i *item) Footnote(nameStyle NameStyle) string { - if i.path == nil || i.path.OID == git.NullOID { + if i.path == nil || git.IsNullOID(i.path.OID) { return "" } switch nameStyle { @@ -214,7 +214,7 @@ func (i *item) MarshalJSON() ([]byte, error) { LevelOfConcern: float64(value) / i.scale, } - if i.path != nil && i.path.OID != git.NullOID { + if i.path != nil && !git.IsNullOID(i.path.OID) { stat.ObjectName = i.path.OID.String() stat.ObjectDescription = i.path.Path() }