From 072898d0eac5be7a7e701db726c34770a93b59c6 Mon Sep 17 00:00:00 2001 From: 1seal Date: Mon, 26 Jan 2026 17:19:59 +0000 Subject: [PATCH] docker/tarfile: Fix layer size for symlink entries When a layer path in a docker-archive points at a symlink header, prepareLayerData recorded the symlink header size (often 0) while GetBlob followed the symlink and returned the target file bytes. Resolve a single symlink when determining the layer size so the reported size matches the bytes returned by GetBlob, and add a regression test. Signed-off-by: 1seal --- docker/internal/tarfile/reader.go | 38 ++++++++------- docker/internal/tarfile/src.go | 30 ++++++++++-- docker/internal/tarfile/src_test.go | 71 +++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 20 deletions(-) diff --git a/docker/internal/tarfile/reader.go b/docker/internal/tarfile/reader.go index 362657596e..de79a64216 100644 --- a/docker/internal/tarfile/reader.go +++ b/docker/internal/tarfile/reader.go @@ -184,21 +184,16 @@ func (t *tarReadCloser) Close() error { return t.backingFile.Close() } -// openTarComponent returns a ReadCloser for the specific file within the archive. -// This is linear scan; we assume that the tar file will have a fairly small amount of files (~layers), -// and that filesystem caching will make the repeated seeking over the (uncompressed) tarPath cheap enough. -// It is safe to call this method from multiple goroutines simultaneously. -// The caller should call .Close() on the returned stream. -func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, error) { +func (r *Reader) openTarComponentWithHeader(componentPath string) (io.ReadCloser, *tar.Header, error) { // This is only a sanity check; if anyone did concurrently close ra, this access is technically // racy against the write in .Close(). if r.path == "" { - return nil, errors.New("Internal error: trying to read an already closed tarfile.Reader") + return nil, nil, errors.New("Internal error: trying to read an already closed tarfile.Reader") } f, err := os.Open(r.path) if err != nil { - return nil, err + return nil, nil, err } succeeded := false defer func() { @@ -209,32 +204,41 @@ func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, error) { tarReader, header, err := findTarComponent(f, componentPath) if err != nil { - return nil, err + return nil, nil, err } if header == nil { - return nil, os.ErrNotExist + return nil, nil, os.ErrNotExist } - if header.FileInfo().Mode()&os.ModeType == os.ModeSymlink { // FIXME: untested - // We follow only one symlink; so no loops are possible. + if header.FileInfo().Mode()&os.ModeType == os.ModeSymlink { // We follow only one symlink; so no loops are possible. if _, err := f.Seek(0, io.SeekStart); err != nil { - return nil, err + return nil, nil, err } // The new path could easily point "outside" the archive, but we only compare it to existing tar headers without extracting the archive, // so we don't care. tarReader, header, err = findTarComponent(f, path.Join(path.Dir(componentPath), header.Linkname)) if err != nil { - return nil, err + return nil, nil, err } if header == nil { - return nil, os.ErrNotExist + return nil, nil, os.ErrNotExist } } if !header.FileInfo().Mode().IsRegular() { - return nil, fmt.Errorf("Error reading tar archive component %q: not a regular file", header.Name) + return nil, nil, fmt.Errorf("Error reading tar archive component %q: not a regular file", header.Name) } succeeded = true - return &tarReadCloser{Reader: tarReader, backingFile: f}, nil + return &tarReadCloser{Reader: tarReader, backingFile: f}, header, nil +} + +// openTarComponent returns a ReadCloser for the specific file within the archive. +// This is linear scan; we assume that the tar file will have a fairly small amount of files (~layers), +// and that filesystem caching will make the repeated seeking over the (uncompressed) tarPath cheap enough. +// It is safe to call this method from multiple goroutines simultaneously. +// The caller should call .Close() on the returned stream. +func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, error) { + rc, _, err := r.openTarComponentWithHeader(componentPath) + return rc, err } // findTarComponent returns a header and a reader matching componentPath within inputFile, diff --git a/docker/internal/tarfile/src.go b/docker/internal/tarfile/src.go index 3364e6c9f6..ec9a0e7334 100644 --- a/docker/internal/tarfile/src.go +++ b/docker/internal/tarfile/src.go @@ -172,24 +172,48 @@ func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *manif layerPath := path.Clean(h.Name) // FIXME: Cache this data across images in Reader. if li, ok := unknownLayerSizes[layerPath]; ok { + underlyingReader := io.Reader(t) + underlyingSize := h.Size + var underlyingCloser io.Closer + + if h.FileInfo().Mode()&os.ModeType == os.ModeSymlink { + rc, resolvedHeader, err := s.archive.openTarComponentWithHeader(layerPath) + if err != nil { + return nil, fmt.Errorf("opening %q to determine its size: %w", layerPath, err) + } + underlyingReader = rc + underlyingSize = resolvedHeader.Size + underlyingCloser = rc + } + // Since GetBlob will decompress layers that are compressed we need // to do the decompression here as well, otherwise we will // incorrectly report the size. Pretty critical, since tools like // umoci always compress layer blobs. Obviously we only bother with // the slower method of checking if it's compressed. - uncompressedStream, isCompressed, err := compression.AutoDecompress(t) + uncompressedStream, isCompressed, err := compression.AutoDecompress(underlyingReader) if err != nil { + if underlyingCloser != nil { + _ = underlyingCloser.Close() + } return nil, fmt.Errorf("auto-decompressing %q to determine its size: %w", layerPath, err) } - defer uncompressedStream.Close() - uncompressedSize := h.Size + uncompressedSize := underlyingSize if isCompressed { uncompressedSize, err = io.Copy(io.Discard, uncompressedStream) if err != nil { + _ = uncompressedStream.Close() + if underlyingCloser != nil { + _ = underlyingCloser.Close() + } return nil, fmt.Errorf("reading %q to find its size: %w", layerPath, err) } } + _ = uncompressedStream.Close() + if underlyingCloser != nil { + _ = underlyingCloser.Close() + } li.size = uncompressedSize delete(unknownLayerSizes, layerPath) } diff --git a/docker/internal/tarfile/src_test.go b/docker/internal/tarfile/src_test.go index 97c312311b..bf3df0b3ea 100644 --- a/docker/internal/tarfile/src_test.go +++ b/docker/internal/tarfile/src_test.go @@ -1,8 +1,10 @@ package tarfile import ( + "archive/tar" "bytes" "context" + "encoding/json" "io" "strings" "testing" @@ -10,6 +12,7 @@ import ( "github.com/containers/image/v5/manifest" "github.com/containers/image/v5/pkg/blobinfocache/memory" "github.com/containers/image/v5/types" + digest "github.com/opencontainers/go-digest" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -64,3 +67,71 @@ func TestSourcePrepareLayerData(t *testing.T) { } } } + +func TestSourceGetBlobSymlinkLayerSizeMatchesBytesReturned(t *testing.T) { + ctx := context.Background() + cache := memory.New() + + layerBytes := []byte("not empty") + diffID := digest.FromBytes(layerBytes) + configBytes := []byte(`{"rootfs":{"type":"layers","diff_ids":["` + diffID.String() + `"]}}`) + + manifestBytes, err := json.Marshal([]ManifestItem{ + { + Config: "config.json", + Layers: []string{"layer-link.tar"}, + }, + }) + require.NoError(t, err) + + var tarfileBuffer bytes.Buffer + tw := tar.NewWriter(&tarfileBuffer) + require.NoError(t, tw.WriteHeader(&tar.Header{ + Name: "manifest.json", + Mode: 0o644, + Size: int64(len(manifestBytes)), + })) + _, err = tw.Write(manifestBytes) + require.NoError(t, err) + + require.NoError(t, tw.WriteHeader(&tar.Header{ + Name: "config.json", + Mode: 0o644, + Size: int64(len(configBytes)), + })) + _, err = tw.Write(configBytes) + require.NoError(t, err) + + require.NoError(t, tw.WriteHeader(&tar.Header{ + Name: "layer.tar", + Mode: 0o644, + Size: int64(len(layerBytes)), + })) + _, err = tw.Write(layerBytes) + require.NoError(t, err) + + require.NoError(t, tw.WriteHeader(&tar.Header{ + Name: "layer-link.tar", + Typeflag: tar.TypeSymlink, + Linkname: "layer.tar", + Mode: 0o777, + })) + require.NoError(t, tw.Close()) + + reader, err := NewReaderFromStream(nil, &tarfileBuffer) + require.NoError(t, err) + src := NewSource(reader, true, "transport name", nil, -1) + t.Cleanup(func() { _ = src.Close() }) + + layerStream, reportedSize, err := src.GetBlob(ctx, types.BlobInfo{ + Digest: diffID, + Size: -1, + }, cache) + require.NoError(t, err) + defer layerStream.Close() + + readBytes, err := io.ReadAll(layerStream) + require.NoError(t, err) + assert.Equal(t, layerBytes, readBytes) + assert.Equal(t, int64(len(layerBytes)), reportedSize) +}