Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 21 additions & 17 deletions docker/internal/tarfile/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,21 +184,16 @@ func (t *tarReadCloser) Close() error {
return t.backingFile.Close()
}

// openTarComponent returns a ReadCloser for the specific file within the archive.
// This is linear scan; we assume that the tar file will have a fairly small amount of files (~layers),
// and that filesystem caching will make the repeated seeking over the (uncompressed) tarPath cheap enough.
// It is safe to call this method from multiple goroutines simultaneously.
// The caller should call .Close() on the returned stream.
func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, error) {
func (r *Reader) openTarComponentWithHeader(componentPath string) (io.ReadCloser, *tar.Header, error) {
// This is only a sanity check; if anyone did concurrently close ra, this access is technically
// racy against the write in .Close().
if r.path == "" {
return nil, errors.New("Internal error: trying to read an already closed tarfile.Reader")
return nil, nil, errors.New("Internal error: trying to read an already closed tarfile.Reader")
}

f, err := os.Open(r.path)
if err != nil {
return nil, err
return nil, nil, err
}
succeeded := false
defer func() {
Expand All @@ -209,32 +204,41 @@ func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, error) {

tarReader, header, err := findTarComponent(f, componentPath)
if err != nil {
return nil, err
return nil, nil, err
}
if header == nil {
return nil, os.ErrNotExist
return nil, nil, os.ErrNotExist
}
if header.FileInfo().Mode()&os.ModeType == os.ModeSymlink { // FIXME: untested
// We follow only one symlink; so no loops are possible.
if header.FileInfo().Mode()&os.ModeType == os.ModeSymlink { // We follow only one symlink; so no loops are possible.
if _, err := f.Seek(0, io.SeekStart); err != nil {
return nil, err
return nil, nil, err
}
// The new path could easily point "outside" the archive, but we only compare it to existing tar headers without extracting the archive,
// so we don't care.
tarReader, header, err = findTarComponent(f, path.Join(path.Dir(componentPath), header.Linkname))
if err != nil {
return nil, err
return nil, nil, err
}
if header == nil {
return nil, os.ErrNotExist
return nil, nil, os.ErrNotExist
}
}

if !header.FileInfo().Mode().IsRegular() {
return nil, fmt.Errorf("Error reading tar archive component %q: not a regular file", header.Name)
return nil, nil, fmt.Errorf("Error reading tar archive component %q: not a regular file", header.Name)
}
succeeded = true
return &tarReadCloser{Reader: tarReader, backingFile: f}, nil
return &tarReadCloser{Reader: tarReader, backingFile: f}, header, nil
}

// openTarComponent returns a ReadCloser for the specific file within the archive.
// This is linear scan; we assume that the tar file will have a fairly small amount of files (~layers),
// and that filesystem caching will make the repeated seeking over the (uncompressed) tarPath cheap enough.
// It is safe to call this method from multiple goroutines simultaneously.
// The caller should call .Close() on the returned stream.
func (r *Reader) openTarComponent(componentPath string) (io.ReadCloser, error) {
rc, _, err := r.openTarComponentWithHeader(componentPath)
return rc, err
}

// findTarComponent returns a header and a reader matching componentPath within inputFile,
Expand Down
30 changes: 27 additions & 3 deletions docker/internal/tarfile/src.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,24 +172,48 @@ func (s *Source) prepareLayerData(tarManifest *ManifestItem, parsedConfig *manif
layerPath := path.Clean(h.Name)
// FIXME: Cache this data across images in Reader.
if li, ok := unknownLayerSizes[layerPath]; ok {
underlyingReader := io.Reader(t)
underlyingSize := h.Size
var underlyingCloser io.Closer

if h.FileInfo().Mode()&os.ModeType == os.ModeSymlink {
rc, resolvedHeader, err := s.archive.openTarComponentWithHeader(layerPath)
if err != nil {
return nil, fmt.Errorf("opening %q to determine its size: %w", layerPath, err)
}
underlyingReader = rc
underlyingSize = resolvedHeader.Size
underlyingCloser = rc
}

// Since GetBlob will decompress layers that are compressed we need
// to do the decompression here as well, otherwise we will
// incorrectly report the size. Pretty critical, since tools like
// umoci always compress layer blobs. Obviously we only bother with
// the slower method of checking if it's compressed.
uncompressedStream, isCompressed, err := compression.AutoDecompress(t)
uncompressedStream, isCompressed, err := compression.AutoDecompress(underlyingReader)
if err != nil {
if underlyingCloser != nil {
_ = underlyingCloser.Close()
}
return nil, fmt.Errorf("auto-decompressing %q to determine its size: %w", layerPath, err)
}
defer uncompressedStream.Close()

uncompressedSize := h.Size
uncompressedSize := underlyingSize
if isCompressed {
uncompressedSize, err = io.Copy(io.Discard, uncompressedStream)
if err != nil {
_ = uncompressedStream.Close()
if underlyingCloser != nil {
_ = underlyingCloser.Close()
}
return nil, fmt.Errorf("reading %q to find its size: %w", layerPath, err)
}
}
_ = uncompressedStream.Close()
if underlyingCloser != nil {
_ = underlyingCloser.Close()
}
li.size = uncompressedSize
delete(unknownLayerSizes, layerPath)
}
Expand Down
71 changes: 71 additions & 0 deletions docker/internal/tarfile/src_test.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
package tarfile

import (
"archive/tar"
"bytes"
"context"
"encoding/json"
"io"
"strings"
"testing"

"github.com/containers/image/v5/manifest"
"github.com/containers/image/v5/pkg/blobinfocache/memory"
"github.com/containers/image/v5/types"
digest "github.com/opencontainers/go-digest"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -64,3 +67,71 @@ func TestSourcePrepareLayerData(t *testing.T) {
}
}
}

func TestSourceGetBlobSymlinkLayerSizeMatchesBytesReturned(t *testing.T) {
ctx := context.Background()
cache := memory.New()

layerBytes := []byte("not empty")
diffID := digest.FromBytes(layerBytes)
configBytes := []byte(`{"rootfs":{"type":"layers","diff_ids":["` + diffID.String() + `"]}}`)

manifestBytes, err := json.Marshal([]ManifestItem{
{
Config: "config.json",
Layers: []string{"layer-link.tar"},
},
})
require.NoError(t, err)

var tarfileBuffer bytes.Buffer
tw := tar.NewWriter(&tarfileBuffer)
require.NoError(t, tw.WriteHeader(&tar.Header{
Name: "manifest.json",
Mode: 0o644,
Size: int64(len(manifestBytes)),
}))
_, err = tw.Write(manifestBytes)
require.NoError(t, err)

require.NoError(t, tw.WriteHeader(&tar.Header{
Name: "config.json",
Mode: 0o644,
Size: int64(len(configBytes)),
}))
_, err = tw.Write(configBytes)
require.NoError(t, err)

require.NoError(t, tw.WriteHeader(&tar.Header{
Name: "layer.tar",
Mode: 0o644,
Size: int64(len(layerBytes)),
}))
_, err = tw.Write(layerBytes)
require.NoError(t, err)

require.NoError(t, tw.WriteHeader(&tar.Header{
Name: "layer-link.tar",
Typeflag: tar.TypeSymlink,
Linkname: "layer.tar",
Mode: 0o777,
}))
require.NoError(t, tw.Close())

reader, err := NewReaderFromStream(nil, &tarfileBuffer)
require.NoError(t, err)
src := NewSource(reader, true, "transport name", nil, -1)
t.Cleanup(func() { _ = src.Close() })

layerStream, reportedSize, err := src.GetBlob(ctx, types.BlobInfo{
Digest: diffID,
Size: -1,
}, cache)
require.NoError(t, err)
defer layerStream.Close()

readBytes, err := io.ReadAll(layerStream)
require.NoError(t, err)
assert.Equal(t, layerBytes, readBytes)
assert.Equal(t, int64(len(layerBytes)), reportedSize)
}