diff --git a/CHANGELOG.md b/CHANGELOG.md
index a4cb29e5f..6f38e6a42 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,21 +16,29 @@ The following emojis are used to highlight certain changes:
 
 ### Added
 
+- `ipld/unixfs/io`: added `SizeEstimationMode` for configurable HAMT sharding threshold decisions. Supports legacy link-based estimation (`SizeEstimationLinks`), accurate block-based estimation (`SizeEstimationBlock`), or disabling size-based thresholds (`SizeEstimationDisabled`). [#1088](https://github.com/ipfs/boxo/pull/1088), [IPIP-499](https://github.com/ipfs/specs/pull/499)
+- `ipld/unixfs/io`: added `UnixFSProfile` with `UnixFS_v0_2015` and `UnixFS_v1_2025` presets for CID-deterministic file and directory DAG construction. [#1088](https://github.com/ipfs/boxo/pull/1088), [IPIP-499](https://github.com/ipfs/specs/pull/499)
+- `files`: `NewSerialFileWithOptions` now supports controlling whether symlinks are preserved or dereferenced before being added to IPFS. See `SerialFileOptions.DereferenceSymlinks`. [#1088](https://github.com/ipfs/boxo/pull/1088), [IPIP-499](https://github.com/ipfs/specs/pull/499)
+
 ### Changed
 
+- 🛠 `chunker`: `DefaultBlockSize` changed from `const` to `var` to allow runtime configuration via global profiles. [#1088](https://github.com/ipfs/boxo/pull/1088), [IPIP-499](https://github.com/ipfs/specs/pull/499)
 - `gateway`: ✨ [IPIP-523](https://github.com/ipfs/specs/pull/523) `?format=` URL query parameter now takes precedence over `Accept` HTTP header, ensuring deterministic HTTP cache behavior and allowing browsers to use `?format=` even when they send `Accept` headers with specific content types. [#1074](https://github.com/ipfs/boxo/pull/1074)
 
 ### Removed
 
 ### Fixed
 
+- 🛠 `ipld/unixfs/io`: fixed HAMT sharding threshold comparison to use `>` instead of `>=`. A directory exactly at the threshold now stays as a basic (flat) directory, aligning behavior with code documentation and the JS implementation. This is a theoretical breaking change, but unlikely to impact real-world users as it requires a directory to be exactly at the threshold boundary. If you depend on the old behavior, adjust `HAMTShardingSize` to be 1 byte lower. [#1088](https://github.com/ipfs/boxo/pull/1088), [IPIP-499](https://github.com/ipfs/specs/pull/499)
+- `ipld/unixfs/mod`: fixed sparse file writes in MFS. Writing past the end of a file (e.g., `ipfs files write --offset 1000 /file` on a smaller file) would lose data because `expandSparse` created the zero-padding node but didn't update the internal pointer. Subsequent writes went to the old unexpanded node.
+- `ipld/unixfs/io`: fixed mode/mtime metadata loss during Basic<->HAMT directory conversions. Previously, directories with `WithStat(mode, mtime)` would lose this metadata when converting between basic and sharded formats, or when reloading a HAMT directory from disk.
+
 ### Security
 
 
 ## [v0.36.0]
 
 ### Added
-
 - `routing/http`: `GET /routing/v1/dht/closest/peers/{key}` per [IPIP-476](https://github.com/ipfs/specs/pull/476)
 - `ipld/merkledag`: Added fetched node size reporting to the progress tracker. See [kubo#8915](https://github.com/ipfs/kubo/issues/8915)
 - `gateway`: Added a configurable fallback timeout for the gateway handler, defaulting to 1 hour. Configurable via `MaxRequestDuration` in the gateway config.
diff --git a/chunker/parse.go b/chunker/parse.go
index 486cd14ad..0ac17d88e 100644
--- a/chunker/parse.go
+++ b/chunker/parse.go
@@ -8,13 +8,16 @@ import (
 	"strings"
 )
 
-const (
-	// DefaultBlockSize is the chunk size that splitters produce (or aim to).
-	DefaultBlockSize int64 = 1024 * 256
+// DefaultBlockSize is the chunk size that splitters produce (or aim to).
+// Can be modified to change the default for all subsequent chunker operations.
+// For CID-deterministic imports, prefer using UnixFSProfile presets from
+// ipld/unixfs/io/profile.go which set this and other related globals.
+var DefaultBlockSize int64 = 1024 * 256
 
-	// No leaf block should contain more than 1MiB of payload data ( wrapping overhead aside )
-	// This effectively mandates the maximum chunk size
-	// See discussion at https://github.com/ipfs/boxo/chunker/pull/21#discussion_r369124879 for background
+const (
+	// ChunkSizeLimit is the maximum allowed chunk size.
+	// No leaf block should contain more than 1MiB of payload data (wrapping overhead aside).
+	// See discussion at https://github.com/ipfs/go-ipfs-chunker/pull/21#discussion_r369124879
 	ChunkSizeLimit int = 1048576
 )
 
diff --git a/files/serialfile.go b/files/serialfile.go
index a05aabfd7..e2764727b 100644
--- a/files/serialfile.go
+++ b/files/serialfile.go
@@ -9,19 +9,37 @@ import (
 	"time"
 )
 
+// SerialFileOptions configures file traversal behavior for NewSerialFileWithOptions.
+type SerialFileOptions struct {
+	// Filter determines which files to include or exclude during traversal.
+	// If nil, all files are included.
+	Filter *Filter
+
+	// DereferenceSymlinks controls symlink handling during file traversal.
+	// When false (default), symlinks are stored as UnixFS nodes with
+	// Data.Type=symlink (4) containing the target path as specified in
+	// https://specs.ipfs.tech/unixfs/
+	// When true, symlinks are dereferenced and replaced with their target:
+	// symlinks to files become regular file nodes, symlinks to directories
+	// are traversed recursively.
+	DereferenceSymlinks bool
+}
+
 // serialFile implements Node, and reads from a path on the OS filesystem.
 // No more than one file will be opened at a time.
 type serialFile struct {
-	path   string
-	files  []os.FileInfo
-	stat   os.FileInfo
-	filter *Filter
+	path                string
+	files               []os.FileInfo
+	stat                os.FileInfo
+	filter              *Filter
+	dereferenceSymlinks bool
 }
 
 type serialIterator struct {
-	files  []os.FileInfo
-	path   string
-	filter *Filter
+	files               []os.FileInfo
+	path                string
+	filter              *Filter
+	dereferenceSymlinks bool
 
 	curName string
 	curFile Node
@@ -44,10 +62,25 @@ func NewSerialFile(path string, includeHidden bool, stat os.FileInfo) (Node, err
 	return NewSerialFileWithFilter(path, filter, stat)
 }
 
-// NewSerialFileWith takes a filepath, a filter for determining which files should be
+// NewSerialFileWithFilter takes a filepath, a filter for determining which files should be
 // operated upon if the filepath is a directory, and a fileInfo and returns a
 // Node representing file, directory or special file.
 func NewSerialFileWithFilter(path string, filter *Filter, stat os.FileInfo) (Node, error) {
+	return NewSerialFileWithOptions(path, stat, SerialFileOptions{Filter: filter})
+}
+
+// NewSerialFileWithOptions creates a Node from a filesystem path with configurable options.
+// The stat parameter should be obtained via os.Lstat (not os.Stat) to correctly detect symlinks.
+func NewSerialFileWithOptions(path string, stat os.FileInfo, opts SerialFileOptions) (Node, error) {
+	// If dereferencing symlinks and this is a symlink, stat the target instead
+	if opts.DereferenceSymlinks && stat.Mode()&os.ModeSymlink != 0 {
+		targetStat, err := os.Stat(path) // follows symlink
+		if err != nil {
+			return nil, err
+		}
+		stat = targetStat
+	}
+
 	switch mode := stat.Mode(); {
 	case mode.IsRegular():
 		file, err := os.Open(path)
@@ -70,8 +103,15 @@ func NewSerialFileWithFilter(path string, filter *Filter, stat os.FileInfo) (Nod
 			}
 			contents = append(contents, content)
 		}
-		return &serialFile{path, contents, stat, filter}, nil
+		return &serialFile{
+			path:                path,
+			files:               contents,
+			stat:                stat,
+			filter:              opts.Filter,
+			dereferenceSymlinks: opts.DereferenceSymlinks,
+		}, nil
 	case mode&os.ModeSymlink != 0:
+		// Only reached if DereferenceSymlinks is false
 		target, err := os.Readlink(path)
 		if err != nil {
 			return nil, err
@@ -98,7 +138,7 @@ func (it *serialIterator) Next() bool {
 
 	stat := it.files[0]
 	it.files = it.files[1:]
-	for it.filter.ShouldExclude(stat) {
+	for it.filter != nil && it.filter.ShouldExclude(stat) {
 		if len(it.files) == 0 {
 			return false
 		}
@@ -113,7 +153,10 @@ func (it *serialIterator) Next() bool {
 	// recursively call the constructor on the next file
 	// if it's a regular file, we will open it as a ReaderFile
 	// if it's a directory, files in it will be opened serially
-	sf, err := NewSerialFileWithFilter(filePath, it.filter, stat)
+	sf, err := NewSerialFileWithOptions(filePath, stat, SerialFileOptions{
+		Filter:              it.filter,
+		DereferenceSymlinks: it.dereferenceSymlinks,
+	})
 	if err != nil {
 		it.err = err
 		return false
@@ -130,9 +173,10 @@ func (it *serialIterator) Err() error {
 
 func (f *serialFile) Entries() DirIterator {
 	return &serialIterator{
-		path:   f.path,
-		files:  f.files,
-		filter: f.filter,
+		path:                f.path,
+		files:               f.files,
+		filter:              f.filter,
+		dereferenceSymlinks: f.dereferenceSymlinks,
 	}
 }
 
@@ -156,7 +200,7 @@ func (f *serialFile) Size() (int64, error) {
 			return err
 		}
 
-		if f.filter.ShouldExclude(fi) {
+		if f.filter != nil && f.filter.ShouldExclude(fi) {
 			if fi.Mode().IsDir() {
 				return filepath.SkipDir
 			}
diff --git a/files/serialfile_test.go b/files/serialfile_test.go
index 214625432..2c2725c86 100644
--- a/files/serialfile_test.go
+++ b/files/serialfile_test.go
@@ -6,6 +6,7 @@ import (
 	"io"
 	"os"
 	"path/filepath"
+	"runtime"
 	"slices"
 	"strings"
 	"testing"
@@ -189,3 +190,326 @@ testInputs:
 		}
 	}
 }
+
+func TestSerialFileSymlinks(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("skipping symlink test on Windows")
+	}
+
+	tmpdir := t.TempDir()
+
+	// Create test structure:
+	// tmpdir/
+	//   realfile.txt     -> "real content"
+	//   realdir/
+	//     nested.txt     -> "nested content"
+	//   linktofile       -> symlink to realfile.txt
+	//   linktodir        -> symlink to realdir/
+
+	realfile := filepath.Join(tmpdir, "realfile.txt")
+	if err := os.WriteFile(realfile, []byte("real content"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	realdir := filepath.Join(tmpdir, "realdir")
+	if err := os.Mkdir(realdir, 0o755); err != nil {
+		t.Fatal(err)
+	}
+
+	nested := filepath.Join(realdir, "nested.txt")
+	if err := os.WriteFile(nested, []byte("nested content"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	linktofile := filepath.Join(tmpdir, "linktofile")
+	if err := os.Symlink(realfile, linktofile); err != nil {
+		t.Fatal(err)
+	}
+
+	linktodir := filepath.Join(tmpdir, "linktodir")
+	if err := os.Symlink(realdir, linktodir); err != nil {
+		t.Fatal(err)
+	}
+
+	t.Run("preserve symlinks (default)", func(t *testing.T) {
+		stat, err := os.Lstat(tmpdir)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		sf, err := NewSerialFileWithOptions(tmpdir, stat, SerialFileOptions{})
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer sf.Close()
+
+		symlinkCount := 0
+		err = Walk(sf, func(path string, nd Node) error {
+			defer nd.Close()
+			if _, ok := nd.(*Symlink); ok {
+				symlinkCount++
+			}
+			return nil
+		})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if symlinkCount != 2 {
+			t.Errorf("expected 2 symlinks, got %d", symlinkCount)
+		}
+	})
+
+	t.Run("dereference symlinks", func(t *testing.T) {
+		stat, err := os.Lstat(tmpdir)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		sf, err := NewSerialFileWithOptions(tmpdir, stat, SerialFileOptions{
+			DereferenceSymlinks: true,
+		})
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer sf.Close()
+
+		symlinkCount := 0
+		fileContents := make(map[string]string)
+		dirCount := 0
+
+		err = Walk(sf, func(path string, nd Node) error {
+			defer nd.Close()
+			switch n := nd.(type) {
+			case *Symlink:
+				symlinkCount++
+			case File:
+				data, err := io.ReadAll(n)
+				if err != nil {
+					return err
+				}
+				fileContents[path] = string(data)
+			case Directory:
+				if path != "" { // skip root
+					dirCount++
+				}
+			}
+			return nil
+		})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if symlinkCount != 0 {
+			t.Errorf("expected 0 symlinks with dereferencing, got %d", symlinkCount)
+		}
+
+		// linktofile should now be a regular file with same content as realfile
+		if content, ok := fileContents["linktofile"]; !ok {
+			t.Error("linktofile not found as file")
+		} else if content != "real content" {
+			t.Errorf("linktofile content = %q, want %q", content, "real content")
+		}
+
+		// linktodir should now be a directory containing nested.txt
+		if content, ok := fileContents["linktodir/nested.txt"]; !ok {
+			t.Error("linktodir/nested.txt not found")
+		} else if content != "nested content" {
+			t.Errorf("linktodir/nested.txt content = %q, want %q", content, "nested content")
+		}
+	})
+
+	t.Run("dereference single symlink to file", func(t *testing.T) {
+		stat, err := os.Lstat(linktofile)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		sf, err := NewSerialFileWithOptions(linktofile, stat, SerialFileOptions{
+			DereferenceSymlinks: true,
+		})
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer sf.Close()
+
+		// Should be a File, not a Symlink
+		file, ok := sf.(File)
+		if !ok {
+			t.Fatalf("expected File, got %T", sf)
+		}
+
+		data, err := io.ReadAll(file)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if string(data) != "real content" {
+			t.Errorf("content = %q, want %q", string(data), "real content")
+		}
+	})
+
+	t.Run("dereference single symlink to directory", func(t *testing.T) {
+		stat, err := os.Lstat(linktodir)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		sf, err := NewSerialFileWithOptions(linktodir, stat, SerialFileOptions{
+			DereferenceSymlinks: true,
+		})
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer sf.Close()
+
+		// Should be a Directory, not a Symlink
+		dir, ok := sf.(Directory)
+		if !ok {
+			t.Fatalf("expected Directory, got %T", sf)
+		}
+
+		// Should contain nested.txt
+		entries := dir.Entries()
+		found := false
+		for entries.Next() {
+			if entries.Name() == "nested.txt" {
+				found = true
+				break
+			}
+		}
+		if err := entries.Err(); err != nil {
+			t.Fatal(err)
+		}
+		if !found {
+			t.Error("nested.txt not found in dereferenced directory")
+		}
+	})
+}
+
+func TestCircularSymlinks(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("skipping symlink test on Windows")
+	}
+
+	const symlinkErrMsg = "too many levels of symbolic links"
+
+	tmpdir := t.TempDir()
+
+	// Create test structure:
+	// tmpdir/
+	//   linktolinkA      -> symlink to linktolinkB
+	//   linktolinkB      -> symlink to linktolinkC
+	//   linktolinkC      -> symlink to linktolinkA
+
+	linktolinkA := filepath.Join(tmpdir, "linktolinkA")
+	linktolinkB := filepath.Join(tmpdir, "linktolinkB")
+	linktolinkC := filepath.Join(tmpdir, "linktolinkC")
+	if err := os.Symlink(linktolinkB, linktolinkA); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.Symlink(linktolinkC, linktolinkB); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.Symlink(linktolinkA, linktolinkC); err != nil {
+		t.Fatal(err)
+	}
+
+	t.Run("preserve symlinks (default)", func(t *testing.T) {
+		stat, err := os.Lstat(tmpdir)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		sf, err := NewSerialFileWithOptions(tmpdir, stat, SerialFileOptions{})
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer sf.Close()
+
+		symlinkCount := 0
+		err = Walk(sf, func(path string, nd Node) error {
+			defer nd.Close()
+			if _, ok := nd.(*Symlink); ok {
+				symlinkCount++
+			}
+			return nil
+		})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if symlinkCount != 3 {
+			t.Errorf("expected 3 symlinks, got %d", symlinkCount)
+		}
+	})
+
+	t.Run("dereference symlinks", func(t *testing.T) {
+		stat, err := os.Lstat(tmpdir)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		sf, err := NewSerialFileWithOptions(tmpdir, stat, SerialFileOptions{
+			DereferenceSymlinks: true,
+		})
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer sf.Close()
+
+		symlinkCount := 0
+		fileContents := make(map[string]string)
+		dirCount := 0
+
+		err = Walk(sf, func(path string, nd Node) error {
+			defer nd.Close()
+			switch n := nd.(type) {
+			case *Symlink:
+				symlinkCount++
+			case File:
+				data, err := io.ReadAll(n)
+				if err != nil {
+					return err
+				}
+				fileContents[path] = string(data)
+			case Directory:
+				if path != "" { // skip root
+					dirCount++
+				}
+			}
+			return nil
+		})
+		if err == nil {
+			t.Fatal("expected error")
+		}
+		perr, ok := err.(*os.PathError)
+		if !ok {
+			t.Fatal("expected PathError, got:", err)
+		}
+		if !strings.Contains(perr.Err.Error(), symlinkErrMsg) {
+			t.Fatalf("expected error to contain %q, got %q", symlinkErrMsg, perr.Err.Error())
+		}
+	})
+
+	t.Run("dereference single symlink to symlink", func(t *testing.T) {
+		stat, err := os.Lstat(linktolinkA)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		_, err = NewSerialFileWithOptions(linktolinkA, stat, SerialFileOptions{
+			DereferenceSymlinks: true,
+		})
+		if err == nil {
+			t.Fatal("expected error")
+		}
+		perr, ok := err.(*os.PathError)
+		if !ok {
+			t.Fatal("expected PathError, got:", err)
+		}
+		if !strings.Contains(perr.Err.Error(), symlinkErrMsg) {
+			t.Fatalf("expected error to contain %q, got %q", symlinkErrMsg, perr.Err.Error())
+		}
+	})
+}
diff --git a/ipld/unixfs/hamt/hamt.go b/ipld/unixfs/hamt/hamt.go
index 97b9620cf..732aad147 100644
--- a/ipld/unixfs/hamt/hamt.go
+++ b/ipld/unixfs/hamt/hamt.go
@@ -29,6 +29,7 @@ import (
 	"os"
 	"slices"
 	"sync"
+	"time"
 
 	"github.com/gammazero/deque"
 	dag "github.com/ipfs/boxo/ipld/merkledag"
@@ -86,6 +87,22 @@ type Shard struct {
 	// leaf node
 	key string
 	val *ipld.Link
+
+	// Optional metadata for the root shard node
+	mode  os.FileMode
+	mtime time.Time
+}
+
+// SetStat sets optional mode and mtime metadata on the shard.
+// These are included in the UnixFS data field when Node() is called.
+// Pass mode=0 or zero time to leave that field unchanged.
+func (ds *Shard) SetStat(mode os.FileMode, mtime time.Time) {
+	if mode != 0 {
+		ds.mode = mode
+	}
+	if !mtime.IsZero() {
+		ds.mtime = mtime
+	}
 }
 
 // NewShard creates a new, empty HAMT shard with the given size.
@@ -213,7 +230,7 @@ func (ds *Shard) Node() (ipld.Node, error) {
 		sliceIndex++
 	}
 
-	data, err := format.HAMTShardData(ds.childer.bitfield.Bytes(), uint64(ds.tableSize), HashMurmur3)
+	data, err := format.HAMTShardDataWithStat(ds.childer.bitfield.Bytes(), uint64(ds.tableSize), HashMurmur3, ds.mode, ds.mtime)
 	if err != nil {
 		return nil, err
 	}
diff --git a/ipld/unixfs/importer/helpers/helpers.go b/ipld/unixfs/importer/helpers/helpers.go
index db2a3a15d..828d9aaeb 100644
--- a/ipld/unixfs/importer/helpers/helpers.go
+++ b/ipld/unixfs/importer/helpers/helpers.go
@@ -27,6 +27,9 @@ var (
 //	var DefaultLinksPerBlock = (roughLinkBlockSize / roughLinkSize)
 //	                         = ( 8192 / 47 )
 //	                         = (approximately) 174
+//
+// For CID-deterministic imports, prefer using UnixFSProfile presets from
+// ipld/unixfs/io/profile.go which set this and other related globals.
 var DefaultLinksPerBlock = roughLinkBlockSize / roughLinkSize
 
 // ErrSizeLimitExceeded signals that a block is larger than BlockSizeLimit.
diff --git a/ipld/unixfs/importer/trickle/trickle_test.go b/ipld/unixfs/importer/trickle/trickle_test.go
index 18594c0c2..c19f07c56 100644
--- a/ipld/unixfs/importer/trickle/trickle_test.go
+++ b/ipld/unixfs/importer/trickle/trickle_test.go
@@ -746,7 +746,7 @@ func TestMetadata(t *testing.T) {
 }
 
 func testMetadata(t *testing.T, rawLeaves UseRawLeaves) {
-	const nbytes = 3 * chunker.DefaultBlockSize
+	nbytes := 3 * chunker.DefaultBlockSize
 	buf := new(bytes.Buffer)
 	_, err := io.CopyN(buf, random.NewRand(), nbytes)
 	if err != nil {
diff --git a/ipld/unixfs/io/directory.go b/ipld/unixfs/io/directory.go
index bb5bc1257..b32de2ae4 100644
--- a/ipld/unixfs/io/directory.go
+++ b/ipld/unixfs/io/directory.go
@@ -3,10 +3,12 @@ package io
 import (
 	"context"
 	"errors"
+	"math/bits"
 	"os"
 	"time"
 
 	"github.com/alecthomas/units"
+	"github.com/ipfs/boxo/files"
 	mdag "github.com/ipfs/boxo/ipld/merkledag"
 	format "github.com/ipfs/boxo/ipld/unixfs"
 	"github.com/ipfs/boxo/ipld/unixfs/hamt"
@@ -21,15 +23,154 @@ var log = logging.Logger("unixfs")
 // HAMTShardingSize is a global option that allows switching to a HAMTDirectory
 // when the BasicDirectory grows above the size (in bytes) signalled by this
 // flag. The default size of 0 disables the option.
-// The size is not the *exact* block size of the encoded BasicDirectory but just
-// the estimated size based byte length of links name and CID (BasicDirectory's
-// ProtoNode doesn't use the Data field so this estimate is pretty accurate).
+//
+// Size estimation depends on HAMTSizeEstimation mode:
+//   - SizeEstimationLinks (default): estimates using link name + CID byte lengths,
+//     ignoring Tsize, protobuf overhead, and UnixFS Data field (mode/mtime).
+//   - SizeEstimationBlock: computes exact serialized dag-pb block size arithmetically,
+//     including the UnixFS Data field and all protobuf encoding overhead.
+//   - SizeEstimationDisabled: ignores size entirely, uses only MaxLinks.
+//
+// Threshold behavior: directory converts to HAMT when estimatedSize > HAMTShardingSize.
+// A directory exactly at the threshold stays basic (threshold value is NOT included).
+//
+// Thread safety: this global is not safe for concurrent modification.
+// Set it once during program initialization, before starting any imports.
 var HAMTShardingSize = int(256 * units.KiB)
 
 // DefaultShardWidth is the default value used for hamt sharding width.
 // Needs to be a power of two (shard entry size) and multiple of 8 (bitfield size).
+//
+// Thread safety: this global is not safe for concurrent modification.
+// Set it once during program initialization, before starting any imports.
 var DefaultShardWidth = 256
 
+// SizeEstimationMode defines how directory size is estimated for HAMT sharding decisions.
+// If unsure which mode to use, prefer SizeEstimationBlock for accurate estimation.
+type SizeEstimationMode int
+
+const (
+	// SizeEstimationLinks estimates size using link names + CID byte lengths.
+	// This is the legacy behavior: sum(len(link.Name) + len(link.Cid.Bytes()))
+	// This mode ignores Tsize, protobuf overhead, and optional metadata fields
+	// (mode, mtime), which may lead to underestimation and delayed HAMT conversion.
+	// Use only when compatibility with legacy DAGs and software is required.
+	SizeEstimationLinks SizeEstimationMode = iota
+
+	// SizeEstimationBlock estimates size using full serialized dag-pb block size.
+	// This correctly accounts for all fields including Tsize, protobuf varints,
+	// and optional metadata (mode, mtime). Use this mode for accurate HAMT
+	// threshold decisions and cross-implementation CID determinism.
+	SizeEstimationBlock
+
+	// SizeEstimationDisabled disables size-based HAMT threshold entirely.
+	// When set, the decision to convert between BasicDirectory and HAMTDirectory
+	// is based solely on the number of links, controlled by MaxLinks option
+	// (set via WithMaxLinks). HAMTShardingSize is ignored. Use this mode when
+	// you want explicit control over directory sharding based on entry count
+	// rather than serialized size.
+	SizeEstimationDisabled
+)
+
+// HAMTSizeEstimation controls which method is used to estimate directory size
+// for HAMT sharding decisions. Default is SizeEstimationLinks for backward compatibility.
+// Modern software should set this to SizeEstimationBlock for accurate estimation.
+//
+// Thread safety: this global is not safe for concurrent modification.
+// Set it once during program initialization, before starting any imports.
+var HAMTSizeEstimation = SizeEstimationLinks
+
+// varintLen returns the encoded size of a protobuf varint.
+func varintLen(v uint64) int {
+	// Protobuf varints use 7 bits per byte (MSB is continuation flag), so a value
+	// requiring N bits needs ceil(N/7) bytes. This is equivalent to:
+	//   if v == 0 { return 1 }
+	//   return (bits.Len64(v) + 6) / 7
+	// but avoids branching: (9*bitLen + 64) / 64 maps bitLen=0 to 1 and computes
+	// ceil(bitLen/7) for bitLen>0 (since 9/64 approximates 1/7).
+	return int(9*uint32(bits.Len64(v))+64) / 64
+}
+
+// linkSerializedSize returns the exact number of bytes a link adds to a PBNode
+// protobuf encoding. This is used for IPIP-499's "block-bytes" HAMT threshold
+// estimation mode (SizeEstimationBlock), which provides accurate size tracking
+// by accounting for Tsize and protobuf varint overhead that simpler estimation
+// methods ignore.
+func linkSerializedSize(name string, c cid.Cid, tsize uint64) int {
+	cidLen := len(c.Bytes())
+	nameLen := len(name)
+
+	// PBLink encoding (all field tags are 1 byte since field numbers < 16):
+	// - Hash (field 1, bytes): tag(1) + len_varint + cid_bytes
+	// - Name (field 2, string): tag(1) + len_varint + name_bytes
+	// - Tsize (field 3, varint): tag(1) + varint
+	linkLen := 1 + varintLen(uint64(cidLen)) + cidLen +
+		1 + varintLen(uint64(nameLen)) + nameLen +
+		1 + varintLen(tsize)
+
+	// Wrapper in PBNode.Links (field 2): tag(1) + len_varint + message
+	return 1 + varintLen(uint64(linkLen)) + linkLen
+}
+
+// dataFieldSerializedSize returns the exact number of bytes the UnixFS Data field
+// adds to a PBNode protobuf encoding for a BasicDirectory. This computes the size
+// arithmetically without serialization.
+//
+// UnixFS Data fields (https://specs.ipfs.tech/unixfs/#data):
+//   - Type     (field 1, varint):  USED - always Directory (1) for BasicDirectory
+//   - Data     (field 2, bytes):   NOT USED - only for File/Raw content
+//   - filesize (field 3, varint):  NOT USED - only for File nodes
+//   - blocksizes (field 4, repeated varint): NOT USED - only for File nodes
+//   - hashType (field 5, varint):  NOT USED - only for HAMTShard
+//   - fanout   (field 6, varint):  NOT USED - only for HAMTShard
+//   - mode     (field 7, varint):  USED - optional unix permissions
+//   - mtime    (field 8, message): USED - optional modification time
+//
+// If new fields are added to the UnixFS spec for directories, this function
+// must be updated. The test TestDataFieldSerializedSizeMatchesActual verifies
+// this calculation against actual protobuf serialization.
+func dataFieldSerializedSize(mode os.FileMode, mtime time.Time) int {
+	// Inner UnixFS Data message
+	innerSize := 0
+
+	// Type field (field 1, varint): Directory = 1
+	// Protobuf tag = field_number * 8 + wire_type = 1 * 8 + 0 = 8 (1 byte)
+	// Value 1 encodes as 1 byte, so total = 2 bytes
+	innerSize += 2
+
+	// Mode field (field 7, optional varint)
+	if mode != 0 {
+		modeVal := uint64(files.ModePermsToUnixPerms(mode))
+		// Protobuf tag = field_number * 8 + wire_type = 7 * 8 + 0 = 56 (1 byte)
+		innerSize += 1 + varintLen(modeVal)
+	}
+
+	// Mtime field (field 8, optional embedded message)
+	if !mtime.IsZero() {
+		mtimeSize := 0
+		// seconds (field 1, int64 varint)
+		secs := mtime.Unix()
+		if secs >= 0 {
+			mtimeSize += 1 + varintLen(uint64(secs))
+		} else {
+			// Protobuf encodes negative int64 as 10-byte varint (sign-extended to
+			// fill all 64 bits, requiring the maximum varint encoding length).
+			mtimeSize += 1 + 10
+		}
+
+		// nanos (field 2, optional fixed32)
+		if mtime.Nanosecond() > 0 {
+			mtimeSize += 1 + 4 // tag(1) + fixed32(4)
+		}
+
+		// Mtime wrapper: tag(1) + len_varint + message
+		innerSize += 1 + varintLen(uint64(mtimeSize)) + mtimeSize
+	}
+
+	// PBNode.Data wrapper: tag(1) + len_varint + innerSize
+	return 1 + varintLen(uint64(innerSize)) + innerSize
+}
+
 // Directory defines a UnixFS directory. It is used for creating, reading and
 // editing directories. It allows to work with different directory schemes,
 // like the basic or the HAMT implementation.
@@ -89,6 +230,23 @@ type Directory interface {
 	// SetStat sets the stat information for the directory. Used when
 	// converting between Basic and HAMT.
 	SetStat(os.FileMode, time.Time)
+
+	// GetSizeEstimationMode returns the method used to estimate serialized dag-pb block size
+	// for directory type conversion decisions.
+	// Returns the instance-specific mode if set, otherwise the global HAMTSizeEstimation.
+	GetSizeEstimationMode() SizeEstimationMode
+
+	// SetSizeEstimationMode sets the method used to estimate serialized dag-pb block size.
+	// Used when inheriting settings from a parent directory after loading from disk.
+	SetSizeEstimationMode(SizeEstimationMode)
+
+	// GetHAMTShardingSize returns the per-directory threshold for HAMT sharding.
+	// If not set (0), the global HAMTShardingSize is used.
+	GetHAMTShardingSize() int
+
+	// SetHAMTShardingSize sets the per-directory threshold for HAMT sharding.
+	// Used when inheriting settings from a parent directory after loading from disk.
+	SetHAMTShardingSize(size int)
 }
 
 // A DirectoryOption can be used to initialize directories.
@@ -146,6 +304,20 @@ func WithStat(mode os.FileMode, mtime time.Time) DirectoryOption {
 	}
 }
 
+// WithSizeEstimationMode sets the method used to estimate serialized dag-pb block size
+// when deciding whether to convert between BasicDirectory and HAMTDirectory.
+// This must be set at directory creation time; mode cannot be changed afterwards.
+func WithSizeEstimationMode(mode SizeEstimationMode) DirectoryOption {
+	return func(d Directory) {
+		switch dir := d.(type) {
+		case *BasicDirectory:
+			dir.sizeEstimation = &mode
+		case *HAMTDirectory:
+			dir.sizeEstimation = &mode
+		}
+	}
+}
+
 // TODO: Evaluate removing `dserv` from this layer and providing it in MFS.
 // (The functions should in that case add a `DAGService` argument.)
 
@@ -171,13 +343,15 @@ type BasicDirectory struct {
 	node  *mdag.ProtoNode
 	dserv ipld.DAGService
 
-	// Internal variable used to cache the estimated size of the basic directory:
-	// for each link, aggregate link name + link CID. DO NOT CHANGE THIS
-	// as it will affect the HAMT transition behavior in HAMTShardingSize.
-	// (We maintain this value up to date even if the HAMTShardingSize is off
-	// since potentially the option could be activated on the fly.)
+	// Internal variable used to cache the size of the basic directory.
+	// The meaning depends on the size estimation mode (set at creation time):
+	//   - SizeEstimationLinks: sum of link name + CID byte lengths (legacy estimate)
+	//   - SizeEstimationBlock: exact serialized dag-pb block size (computed arithmetically)
+	//   - SizeEstimationDisabled: 0 (size tracking disabled)
+	// DO NOT CHANGE THIS as it will affect the HAMT transition behavior in HAMTShardingSize.
 	estimatedSize int
-	totalLinks    int
+
+	totalLinks int
 
 	// opts
 	// maxNumberOfLinks. If set, can trigger conversion to HAMT directory.
@@ -186,6 +360,13 @@ type BasicDirectory struct {
 	cidBuilder    cid.Builder
 	mode          os.FileMode
 	mtime         time.Time
+
+	// Size estimation mode. If nil, falls back to global HAMTSizeEstimation.
+	// Set at creation time via WithSizeEstimationMode option.
+	sizeEstimation *SizeEstimationMode
+
+	// Per-directory HAMT sharding size threshold. If 0, falls back to global HAMTShardingSize.
+	hamtShardingSize int
 }
 
 // HAMTDirectory is the HAMT implementation of `Directory`.
@@ -205,6 +386,12 @@ type HAMTDirectory struct {
 	// for the HAMTShardingSize option.
 	sizeChange int
 	totalLinks int
+
+	// Size estimation mode. If nil, falls back to global HAMTSizeEstimation.
+	sizeEstimation *SizeEstimationMode
+
+	// Per-directory HAMT sharding size threshold. If 0, falls back to global HAMTShardingSize.
+	hamtShardingSize int
 }
 
 // NewBasicDirectory creates an empty basic directory with the given options.
@@ -230,7 +417,8 @@ func NewBasicDirectory(dserv ipld.DAGService, opts ...DirectoryOption) (*BasicDi
 		return nil, err
 	}
 
-	// Scan node links (if any) to restore estimated size.
+	// Initialize all size tracking fields from the node.
+	// This must be done after node is created and options applied.
 	basicDir.computeEstimatedSizeAndTotalLinks()
 
 	return basicDir, nil
@@ -243,7 +431,17 @@ func NewBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode) *Bas
 	basicDir.node = node
 	basicDir.dserv = dserv
 
-	// Scan node links (if any) to restore estimated size.
+	// Extract mode/mtime from node's UnixFS data for size estimation.
+	// This allows dataFieldSerializedSize to compute the Data field size
+	// without re-parsing or re-serializing the node.
+	if data := node.Data(); len(data) > 0 {
+		if fsNode, err := format.FSNodeFromBytes(data); err == nil {
+			basicDir.mode = fsNode.Mode()
+			basicDir.mtime = fsNode.ModTime()
+		}
+	}
+
+	// Initialize all size tracking fields from the node.
 	basicDir.computeEstimatedSizeAndTotalLinks()
 
 	return basicDir
@@ -261,13 +459,14 @@ func NewHAMTDirectory(dserv ipld.DAGService, sizeChange int, opts ...DirectoryOp
 		opt(dir)
 	}
 
-	// FIXME: do shards not support mtime and mode?
 	shard, err := hamt.NewShard(dir.dserv, dir.maxHAMTFanout)
 	if err != nil {
 		return nil, err
 	}
 
 	shard.SetCidBuilder(dir.cidBuilder)
+	// Propagate mode/mtime to the shard for inclusion in UnixFS data
+	shard.SetStat(dir.mode, dir.mtime)
 	dir.shard = shard
 
 	return dir, nil
@@ -325,6 +524,9 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err
 		if err != nil {
 			return nil, err
 		}
+		// Restore mode/mtime from the HAMT shard's UnixFS metadata.
+		// These are needed for HAMT->Basic conversion to preserve metadata.
+		hamtDir.SetStat(fsNode.Mode(), fsNode.ModTime())
 		return &DynamicDirectory{hamtDir}, nil
 	}
 
@@ -361,8 +563,16 @@ func (d *BasicDirectory) SetMaxHAMTFanout(n int) {
 	d.maxHAMTFanout = n
 }
 
-// SetStat has no effect and only exists to support dynamic directories. Use
-// WithStat when creating a new directory instead.
+// SetStat stores mode and/or mtime values for use during Basic<->HAMT conversions.
+// Pass zero for mode or zero time for mtime to leave that field unchanged.
+// Note: clearing previously set values is not supported; to clear mode/mtime,
+// create a new directory without them.
+//
+// This method does NOT modify the underlying node's Data field or cachedBlockSize.
+// The stored values are only applied when creating a new directory during conversion
+// (via WithStat option). If you need to update an existing node's mode/mtime,
+// modify the node directly and replace the directory, or use MFS which handles
+// this for you.
 func (d *BasicDirectory) SetStat(mode os.FileMode, mtime time.Time) {
 	if mode > 0 {
 		d.mode = mode
@@ -372,27 +582,105 @@ func (d *BasicDirectory) SetStat(mode os.FileMode, mtime time.Time) {
 	}
 }
 
+// GetSizeEstimationMode returns the method used to estimate serialized dag-pb block size
+// for BasicDirectory to HAMTDirectory conversion decisions.
+// Returns the instance-specific mode if set, otherwise the global HAMTSizeEstimation.
+func (d *BasicDirectory) GetSizeEstimationMode() SizeEstimationMode {
+	if d.sizeEstimation != nil {
+		return *d.sizeEstimation
+	}
+	return HAMTSizeEstimation // fall back to global
+}
+
+// SetSizeEstimationMode sets the method used to estimate serialized dag-pb block size.
+// Used when inheriting settings from a parent directory after loading from disk.
+// Note: This only recomputes estimatedSize, not totalLinks, since link count
+// is independent of the estimation mode.
+func (d *BasicDirectory) SetSizeEstimationMode(mode SizeEstimationMode) {
+	oldMode := d.GetSizeEstimationMode()
+	d.sizeEstimation = &mode
+
+	// Only recompute estimatedSize if mode actually changed
+	if mode == oldMode {
+		return
+	}
+
+	// Recompute estimatedSize with new mode, but preserve totalLinks
+	savedTotalLinks := d.totalLinks
+	d.computeEstimatedSizeAndTotalLinks()
+	// Restore totalLinks since it shouldn't change based on mode
+	d.totalLinks = savedTotalLinks
+}
+
+// GetHAMTShardingSize returns the per-directory threshold for HAMT sharding.
+// If not set (0), the global HAMTShardingSize is used.
+func (d *BasicDirectory) GetHAMTShardingSize() int {
+	return d.hamtShardingSize
+}
+
+// SetHAMTShardingSize sets the per-directory threshold for HAMT sharding.
+// Used when inheriting settings from a parent directory after loading from disk.
+func (d *BasicDirectory) SetHAMTShardingSize(size int) {
+	d.hamtShardingSize = size
+}
+
+// computeEstimatedSizeAndTotalLinks initializes size tracking fields from the current node.
+// The estimatedSize is computed based on the current size estimation mode:
+// - SizeEstimationLinks: sum of link name + CID byte lengths
+// - SizeEstimationBlock: full serialized dag-pb block size (computed arithmetically)
+// - SizeEstimationDisabled: 0
 func (d *BasicDirectory) computeEstimatedSizeAndTotalLinks() {
 	d.estimatedSize = 0
-	// err is just breaking the iteration and we always return nil
-	_ = d.ForEachLink(context.TODO(), func(l *ipld.Link) error {
-		d.addToEstimatedSize(l.Name, l.Cid)
+	d.totalLinks = 0
+
+	mode := d.GetSizeEstimationMode()
+	if mode == SizeEstimationBlock && d.node != nil {
+		// Compute data field size from stored metadata (no serialization needed).
+		// The mode and mtime fields are extracted in NewBasicDirectoryFromNode
+		// or set via WithStat option during creation.
+		d.estimatedSize = dataFieldSerializedSize(d.mode, d.mtime)
+
+		// Add link sizes using linkSerializedSize function
+		for _, l := range d.node.Links() {
+			d.estimatedSize += linkSerializedSize(l.Name, l.Cid, l.Size)
+			d.totalLinks++
+		}
+		return
+	}
+
+	// For Links mode, accumulate link sizes; for Disabled mode, just count
+	for _, l := range d.node.Links() {
+		if mode == SizeEstimationLinks {
+			d.estimatedSize += linksize.LinkSizeFunction(l.Name, l.Cid)
+		}
 		d.totalLinks++
-		return nil
-	})
-	// ForEachLink will never fail traversing the BasicDirectory
-	// and neither the inner callback `addToEstimatedSize`.
+	}
 }
 
-func (d *BasicDirectory) addToEstimatedSize(name string, linkCid cid.Cid) {
-	d.estimatedSize += linksize.LinkSizeFunction(name, linkCid)
-}
+// updateEstimatedSize adjusts estimatedSize for link changes.
+// Pass nil for oldLink when adding, nil for newLink when removing.
+// The name parameter is used for size calculation since link.Name may not be set.
+func (d *BasicDirectory) updateEstimatedSize(name string, oldLink, newLink *ipld.Link) {
+	switch d.GetSizeEstimationMode() {
+	case SizeEstimationBlock:
+		if oldLink != nil {
+			d.estimatedSize -= linkSerializedSize(name, oldLink.Cid, oldLink.Size)
+		}
+		if newLink != nil {
+			d.estimatedSize += linkSerializedSize(name, newLink.Cid, newLink.Size)
+		}
+	case SizeEstimationLinks:
+		if oldLink != nil {
+			d.estimatedSize -= linksize.LinkSizeFunction(name, oldLink.Cid)
+		}
+		if newLink != nil {
+			d.estimatedSize += linksize.LinkSizeFunction(name, newLink.Cid)
+		}
+	default:
+		// SizeEstimationDisabled: no-op
+	}
 
-func (d *BasicDirectory) removeFromEstimatedSize(name string, linkCid cid.Cid) {
-	d.estimatedSize -= linksize.LinkSizeFunction(name, linkCid)
 	if d.estimatedSize < 0 {
-		// Something has gone very wrong. Log an error and recompute the
-		// size from scratch.
 		log.Error("BasicDirectory's estimatedSize went below 0")
 		d.computeEstimatedSizeAndTotalLinks()
 	}
@@ -417,11 +705,39 @@ func (d *BasicDirectory) AddChild(ctx context.Context, name string, node ipld.No
 	return d.addLinkChild(ctx, name, link)
 }
 
+// getEffectiveShardingSize returns the per-directory HAMT sharding threshold if set,
+// otherwise falls back to the global HAMTShardingSize.
+func (d *BasicDirectory) getEffectiveShardingSize() int {
+	if d.hamtShardingSize > 0 {
+		return d.hamtShardingSize
+	}
+	return HAMTShardingSize
+}
+
 func (d *BasicDirectory) needsToSwitchToHAMTDir(name string, nodeToAdd ipld.Node) (bool, error) {
-	if HAMTShardingSize == 0 { // Option disabled.
+	shardingSize := d.getEffectiveShardingSize()
+	if shardingSize == 0 { // Option disabled.
 		return false, nil
 	}
 
+	switch d.GetSizeEstimationMode() {
+	case SizeEstimationDisabled:
+		return d.needsToSwitchByLinkCount(name, nodeToAdd)
+	case SizeEstimationBlock:
+		return d.needsToSwitchByBlockSize(name, nodeToAdd)
+	default:
+		return d.needsToSwitchByLinkSize(name, nodeToAdd)
+	}
+}
+
+// needsToSwitchByLinkCount only considers MaxLinks, ignoring size-based threshold.
+func (d *BasicDirectory) needsToSwitchByLinkCount(name string, nodeToAdd ipld.Node) (bool, error) {
+	entryToRemove, _ := d.node.GetNodeLink(name)
+	return d.checkMaxLinksExceeded(nodeToAdd, entryToRemove), nil
+}
+
+// needsToSwitchByLinkSize uses the legacy estimation based on link names + CID bytes.
+func (d *BasicDirectory) needsToSwitchByLinkSize(name string, nodeToAdd ipld.Node) (bool, error) {
 	operationSizeChange := 0
 	// Find if there is an old entry under that name that will be overwritten.
 	entryToRemove, err := d.node.GetNodeLink(name)
@@ -435,40 +751,68 @@ func (d *BasicDirectory) needsToSwitchToHAMTDir(name string, nodeToAdd ipld.Node
 		operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid())
 	}
 
-	switchShardingSize := d.estimatedSize+operationSizeChange >= HAMTShardingSize
-	switchMaxLinks := false
-	// We should switch if maxLinks is set, we have reached it and a new link is being
-	// added.
-	if nodeToAdd != nil && entryToRemove == nil && d.maxLinks > 0 &&
-		(d.totalLinks+1) > d.maxLinks {
-		switchMaxLinks = true
+	// Switch to HAMT when size exceeds threshold (> not >=).
+	// Directory exactly at threshold stays basic.
+	switchShardingSize := d.estimatedSize+operationSizeChange > d.getEffectiveShardingSize()
+	switchMaxLinks := d.checkMaxLinksExceeded(nodeToAdd, entryToRemove)
+	return switchShardingSize || switchMaxLinks, nil
+}
+
+// needsToSwitchByBlockSize uses accurate estimation based on full serialized dag-pb block size.
+// The estimatedSize is kept accurate via linkSerializedSize() and dataFieldSerializedSize()
+// which compute exact protobuf encoding sizes arithmetically.
+func (d *BasicDirectory) needsToSwitchByBlockSize(name string, nodeToAdd ipld.Node) (bool, error) {
+	link, err := ipld.MakeLink(nodeToAdd)
+	if err != nil {
+		return false, err
+	}
+
+	// Calculate size delta for this operation
+	newLinkSize := linkSerializedSize(name, link.Cid, link.Size)
+	oldLinkSize := 0
+	var entryToRemove *ipld.Link
+	if oldLink, err := d.node.GetNodeLink(name); err == nil {
+		entryToRemove = oldLink
+		oldLinkSize = linkSerializedSize(name, oldLink.Cid, oldLink.Size)
 	}
+
+	estimatedNewSize := d.estimatedSize - oldLinkSize + newLinkSize
+
+	// Switch to HAMT when size exceeds threshold (> not >=).
+	// Directory exactly at threshold stays basic.
+	switchShardingSize := estimatedNewSize > d.getEffectiveShardingSize()
+	switchMaxLinks := d.checkMaxLinksExceeded(nodeToAdd, entryToRemove)
 	return switchShardingSize || switchMaxLinks, nil
 }
 
+// checkMaxLinksExceeded returns true if adding a new link would exceed maxLinks.
+func (d *BasicDirectory) checkMaxLinksExceeded(nodeToAdd ipld.Node, entryToRemove *ipld.Link) bool {
+	return nodeToAdd != nil && entryToRemove == nil && d.maxLinks > 0 && (d.totalLinks+1) > d.maxLinks
+}
+
 // addLinkChild adds the link as an entry to this directory under the given
 // name. Plumbing function for the AddChild API.
 func (d *BasicDirectory) addLinkChild(ctx context.Context, name string, link *ipld.Link) error {
 	// Remove old link and account for size change (if it existed; ignore
-	// `ErrNotExist` otherwise).
+	// `ErrNotExist` otherwise). RemoveChild updates both estimatedSize and
+	// totalLinks for the removed link.
 	err := d.RemoveChild(ctx, name)
 	if err != nil {
 		if !errors.Is(err, os.ErrNotExist) {
 			return err
 		}
-	} else { // existed
-		d.totalLinks--
-	}
-
-	if d.maxLinks > 0 && d.totalLinks+1 > d.maxLinks {
-		return errors.New("BasicDirectory: cannot add child: maxLinks reached")
+		// Entry didn't exist, so this is a new link. Check maxLinks.
+		if d.maxLinks > 0 && d.totalLinks+1 > d.maxLinks {
+			return errors.New("BasicDirectory: cannot add child: maxLinks reached")
+		}
 	}
+	// else: entry existed and was removed, no maxLinks check needed for replacement
 
 	err = d.node.AddRawLink(name, link)
 	if err != nil {
 		return err
 	}
-	d.addToEstimatedSize(name, link.Cid)
+	d.updateEstimatedSize(name, nil, link)
 	d.totalLinks++
 	return nil
 }
@@ -536,7 +880,7 @@ func (d *BasicDirectory) RemoveChild(ctx context.Context, name string) error {
 	}
 
 	// The name actually existed so we should update the estimated size.
-	d.removeFromEstimatedSize(link.Name, link.Cid)
+	d.updateEstimatedSize(name, link, nil)
 	d.totalLinks--
 
 	return d.node.RemoveNodeLink(name)
@@ -610,7 +954,10 @@ func (d *HAMTDirectory) SetMaxHAMTFanout(n int) {
 	d.maxHAMTFanout = n
 }
 
-// SetStat has no effect and only exists to support Dynamic directories.
+// SetStat stores mode and/or mtime values for use during HAMT->Basic conversions
+// and also propagates them to the underlying shard for inclusion in GetNode().
+// Pass zero for mode or zero time for mtime to leave that field unchanged.
+// See BasicDirectory.SetStat for full documentation.
 func (d *HAMTDirectory) SetStat(mode os.FileMode, mtime time.Time) {
 	if mode > 0 {
 		d.mode = mode
@@ -618,6 +965,47 @@ func (d *HAMTDirectory) SetStat(mode os.FileMode, mtime time.Time) {
 	if !mtime.IsZero() {
 		d.mtime = mtime
 	}
+	// Also propagate to the shard so GetNode() includes mode/mtime
+	if d.shard != nil {
+		d.shard.SetStat(d.mode, d.mtime)
+	}
+}
+
+// GetSizeEstimationMode returns the method used to estimate serialized dag-pb block size
+// for HAMTDirectory to BasicDirectory conversion decisions.
+// Returns the instance-specific mode if set, otherwise the global HAMTSizeEstimation.
+func (d *HAMTDirectory) GetSizeEstimationMode() SizeEstimationMode {
+	if d.sizeEstimation != nil {
+		return *d.sizeEstimation
+	}
+	return HAMTSizeEstimation // fall back to global
+}
+
+// SetSizeEstimationMode sets the method used to estimate serialized dag-pb block size.
+// Used when inheriting settings from a parent directory after loading from disk.
+func (d *HAMTDirectory) SetSizeEstimationMode(mode SizeEstimationMode) {
+	d.sizeEstimation = &mode
+}
+
+// GetHAMTShardingSize returns the per-directory threshold for HAMT sharding.
+// If not set (0), the global HAMTShardingSize is used.
+func (d *HAMTDirectory) GetHAMTShardingSize() int {
+	return d.hamtShardingSize
+}
+
+// SetHAMTShardingSize sets the per-directory threshold for HAMT sharding.
+// Used when inheriting settings from a parent directory after loading from disk.
+func (d *HAMTDirectory) SetHAMTShardingSize(size int) {
+	d.hamtShardingSize = size
+}
+
+// getEffectiveShardingSize returns the per-directory HAMT sharding threshold if set,
+// otherwise falls back to the global HAMTShardingSize.
+func (d *HAMTDirectory) getEffectiveShardingSize() int {
+	if d.hamtShardingSize > 0 {
+		return d.hamtShardingSize
+	}
+	return HAMTShardingSize
 }
 
 // AddChild implements the `Directory` interface.
@@ -732,25 +1120,50 @@ func (d *HAMTDirectory) removeFromSizeChange(name string, linkCid cid.Cid) {
 // nodeToAdd is nil). We compute both (potential) future subtraction and
 // addition to the size change.
 func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, name string, nodeToAdd ipld.Node) (switchToBasic bool, err error) {
-	if HAMTShardingSize == 0 { // Option disabled.
+	if d.getEffectiveShardingSize() == 0 { // Option disabled.
 		return false, nil
 	}
 
-	operationSizeChange := 0
-
 	// Find if there is an old entry under that name that will be overwritten
 	// (AddEntry) or flat out removed (RemoveEntry).
 	entryToRemove, err := d.shard.Find(ctx, name)
-	if !errors.Is(err, os.ErrNotExist) {
-		if err != nil {
-			return false, err
-		}
-		operationSizeChange -= linksize.LinkSizeFunction(name, entryToRemove.Cid)
+	if err != nil && !errors.Is(err, os.ErrNotExist) {
+		return false, err
 	}
 
-	// For the AddEntry case compute the size addition of the new entry.
+	// Calculate new total link count after this operation
+	newTotalLinks := d.totalLinks
 	if nodeToAdd != nil {
-		operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid())
+		newTotalLinks++
+	}
+	if entryToRemove != nil {
+		newTotalLinks--
+	}
+
+	// Check if we can switch based on MaxLinks constraint
+	canSwitchMaxLinks := true
+	if d.maxLinks > 0 && newTotalLinks > d.maxLinks {
+		// prevent switching as we would end up with too many links
+		canSwitchMaxLinks = false
+	}
+
+	// When size estimation is disabled, only consider link count
+	if d.GetSizeEstimationMode() == SizeEstimationDisabled {
+		// With SizeEstimationDisabled, we only switch back to BasicDirectory
+		// if explicitly allowed by maxLinks (which must be set)
+		return canSwitchMaxLinks && d.maxLinks > 0 && newTotalLinks <= d.maxLinks, nil
+	}
+
+	operationSizeChange := 0
+	if entryToRemove != nil {
+		operationSizeChange -= d.linkSizeFor(entryToRemove)
+	}
+	if nodeToAdd != nil {
+		link, err := ipld.MakeLink(nodeToAdd)
+		if err != nil {
+			return false, err
+		}
+		operationSizeChange += d.linkSizeFor(link)
 	}
 
 	// We must switch if size and maxlinks are below threshold
@@ -763,24 +1176,19 @@ func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, name string
 		}
 	}
 
-	canSwitchMaxLinks := true
-	if d.maxLinks > 0 {
-		total := d.totalLinks
-		if nodeToAdd != nil {
-			total++
-		}
-		if entryToRemove != nil {
-			total--
-		}
-		if total > d.maxLinks {
-			// prevent switching as we would end up with too many links
-			canSwitchMaxLinks = false
-		}
-	}
-
 	return canSwitchSize && canSwitchMaxLinks, nil
 }
 
+// linkSizeFor returns the size contribution of a link based on the current estimation mode.
+func (d *HAMTDirectory) linkSizeFor(link *ipld.Link) int {
+	switch d.GetSizeEstimationMode() {
+	case SizeEstimationBlock:
+		return linkSerializedSize(link.Name, link.Cid, link.Size)
+	default:
+		return linksize.LinkSizeFunction(link.Name, link.Cid)
+	}
+}
+
 // Evaluate directory size and a future sizeChange and check if it will be below
 // HAMTShardingSize threshold (to trigger a transition to a BasicDirectory).
 // Instead of enumerating the entire tree we eagerly call EnumLinksAsync
@@ -788,13 +1196,21 @@ func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, name string
 // to keep counting) or an error occurs (like the context being canceled
 // if we take too much time fetching the necessary shards).
 func (d *HAMTDirectory) sizeBelowThreshold(ctx context.Context, sizeChange int) (bool, error) {
-	if HAMTShardingSize == 0 {
+	shardingSize := d.getEffectiveShardingSize()
+	if shardingSize == 0 {
 		panic("asked to compute HAMT size with HAMTShardingSize option off (0)")
 	}
 
-	// We don't necessarily compute the full size of *all* shards as we might
-	// end early if we already know we're above the threshold or run out of time.
+	// Start with Data field overhead for hypothetical BasicDirectory.
+	// This is the size of the serialized UnixFS Data message that would exist
+	// in a single-block BasicDirectory (Type=Directory + optional mode/mtime).
+	// For SizeEstimationLinks mode, we don't include this overhead to maintain
+	// backward compatibility with the legacy behavior.
 	partialSize := 0
+	if d.GetSizeEstimationMode() == SizeEstimationBlock {
+		partialSize = dataFieldSerializedSize(d.mode, d.mtime)
+	}
+
 	var err error
 	below := true
 
@@ -809,10 +1225,10 @@ func (d *HAMTDirectory) sizeBelowThreshold(ctx context.Context, sizeChange int)
 			break
 		}
 
-		partialSize += linksize.LinkSizeFunction(linkResult.Link.Name, linkResult.Link.Cid)
-		if partialSize+sizeChange >= HAMTShardingSize {
-			// We have already fetched enough shards to assert we are above the
-			// threshold, so no need to keep fetching.
+		partialSize += d.linkSizeFor(linkResult.Link)
+		// Check if size exceeds threshold (> not >=, matching upgrade logic).
+		// Early exit: no need to enumerate more links once we know we're above.
+		if partialSize+sizeChange > shardingSize {
 			below = false
 			break
 		}
@@ -855,10 +1271,9 @@ func (d *DynamicDirectory) SetMaxHAMTFanout(n int) {
 	d.Directory.SetMaxHAMTFanout(n)
 }
 
-// SetStat sets stats information. This operation does not produce any side
-// effects. It is taken into account when converting from HAMT to basic
-// directory. Mode or mtime can be set independently by using zero for mtime
-// or mode respectively.
+// SetStat stores mode and/or mtime values for use during Basic<->HAMT conversions.
+// Pass zero for mode or zero time for mtime to leave that field unchanged.
+// See BasicDirectory.SetStat for full documentation.
 func (d *DynamicDirectory) SetStat(mode os.FileMode, mtime time.Time) {
 	d.Directory.SetStat(mode, mtime)
 }
@@ -913,10 +1328,18 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No
 		hamtFanout = basicDir.maxHAMTFanout
 	}
 
-	hamtDir, err = basicDir.switchToSharding(ctx, WithMaxHAMTFanout(hamtFanout), WithMaxLinks(basicDir.maxLinks), WithCidBuilder(basicDir.GetCidBuilder()))
+	hamtDir, err = basicDir.switchToSharding(ctx,
+		WithMaxHAMTFanout(hamtFanout),
+		WithMaxLinks(basicDir.maxLinks),
+		WithCidBuilder(basicDir.GetCidBuilder()),
+		WithStat(basicDir.mode, basicDir.mtime),
+		WithSizeEstimationMode(basicDir.GetSizeEstimationMode()),
+	)
 	if err != nil {
 		return err
 	}
+	// Propagate per-directory HAMT sharding size (not a DirectoryOption)
+	hamtDir.SetHAMTShardingSize(basicDir.GetHAMTShardingSize())
 	err = hamtDir.AddChild(ctx, name, nd)
 	if err != nil {
 		return err
@@ -949,10 +1372,17 @@ func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error {
 		maxLinks++
 	}
 
-	basicDir, err := hamtDir.switchToBasic(ctx, WithMaxLinks(maxLinks), WithCidBuilder(hamtDir.GetCidBuilder()))
+	basicDir, err := hamtDir.switchToBasic(ctx,
+		WithMaxLinks(maxLinks),
+		WithCidBuilder(hamtDir.GetCidBuilder()),
+		WithStat(hamtDir.mode, hamtDir.mtime),
+		WithSizeEstimationMode(hamtDir.GetSizeEstimationMode()),
+	)
 	if err != nil {
 		return err
 	}
+	// Propagate per-directory HAMT sharding size (not a DirectoryOption)
+	basicDir.SetHAMTShardingSize(hamtDir.GetHAMTShardingSize())
 
 	err = basicDir.RemoveChild(ctx, name)
 	if err != nil {
diff --git a/ipld/unixfs/io/directory_test.go b/ipld/unixfs/io/directory_test.go
index d30f27e19..c369892da 100644
--- a/ipld/unixfs/io/directory_test.go
+++ b/ipld/unixfs/io/directory_test.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"math"
+	"os"
 	"slices"
 	"strconv"
 	"strings"
@@ -525,6 +526,52 @@ func TestBasicDirectoryWithMaxLinks(t *testing.T) {
 	require.Error(t, err)
 }
 
+// TestBasicDirectoryMaxLinksAllowsReplacement verifies that replacing an existing
+// entry does not count against maxLinks. When at maxLinks capacity, replacing an
+// existing entry should succeed (it's not adding a new link).
+func TestBasicDirectoryMaxLinksAllowsReplacement(t *testing.T) {
+	ds := mdtest.Mock()
+	ctx := context.Background()
+
+	dir, err := NewBasicDirectory(ds, WithMaxLinks(2))
+	require.NoError(t, err)
+
+	// add two entries to reach maxLinks
+	child1 := ft.EmptyDirNode()
+	require.NoError(t, ds.Add(ctx, child1))
+	require.NoError(t, dir.AddChild(ctx, "entry1", child1))
+
+	child2 := ft.EmptyDirNode()
+	require.NoError(t, ds.Add(ctx, child2))
+	require.NoError(t, dir.AddChild(ctx, "entry2", child2))
+
+	// adding a third entry should fail (maxLinks reached)
+	child3 := ft.EmptyDirNode()
+	require.NoError(t, ds.Add(ctx, child3))
+	err = dir.AddChild(ctx, "entry3", child3)
+	require.Error(t, err, "adding third entry should fail")
+
+	// but replacing an existing entry should succeed
+	replacement := ft.EmptyDirNode()
+	require.NoError(t, ds.Add(ctx, replacement))
+	err = dir.AddChild(ctx, "entry1", replacement)
+	require.NoError(t, err, "replacing existing entry should succeed even at maxLinks")
+
+	// verify the replacement happened
+	links, err := dir.Links(ctx)
+	require.NoError(t, err)
+	require.Len(t, links, 2)
+
+	found := false
+	for _, l := range links {
+		if l.Name == "entry1" && l.Cid.Equals(replacement.Cid()) {
+			found = true
+			break
+		}
+	}
+	require.True(t, found, "entry1 should point to the replacement node")
+}
+
 // TestHAMTDirectoryWithMaxLinks tests that no HAMT shard as more than MaxLinks.
 func TestHAMTDirectoryWithMaxLinks(t *testing.T) {
 	ds := mdtest.Mock()
@@ -713,3 +760,631 @@ func (d *countGetsDS) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) {
 	}
 	return d.Blockstore.AllKeysChan(ctx)
 }
+
+// TestSizeEstimationMode tests that all estimation modes work correctly.
+func TestSizeEstimationMode(t *testing.T) {
+	// Save and restore global settings after all subtests
+	oldEstimation := HAMTSizeEstimation
+	oldShardingSize := HAMTShardingSize
+	oldLinkSize := linksize.LinkSizeFunction
+	t.Cleanup(func() {
+		HAMTSizeEstimation = oldEstimation
+		HAMTShardingSize = oldShardingSize
+		linksize.LinkSizeFunction = oldLinkSize
+	})
+
+	t.Run("links mode is default", func(t *testing.T) {
+		HAMTSizeEstimation = SizeEstimationLinks // ensure default
+		ds := mdtest.Mock()
+		dir, err := NewBasicDirectory(ds)
+		require.NoError(t, err)
+		assert.Equal(t, SizeEstimationLinks, dir.GetSizeEstimationMode())
+	})
+
+	t.Run("block mode can be set globally", func(t *testing.T) {
+		HAMTSizeEstimation = SizeEstimationBlock
+		ds := mdtest.Mock()
+		dir, err := NewBasicDirectory(ds)
+		require.NoError(t, err)
+		assert.Equal(t, SizeEstimationBlock, dir.GetSizeEstimationMode())
+	})
+
+	t.Run("disabled mode can be set globally", func(t *testing.T) {
+		HAMTSizeEstimation = SizeEstimationDisabled
+		ds := mdtest.Mock()
+		dir, err := NewBasicDirectory(ds)
+		require.NoError(t, err)
+		assert.Equal(t, SizeEstimationDisabled, dir.GetSizeEstimationMode())
+	})
+
+	t.Run("WithSizeEstimationMode overrides global", func(t *testing.T) {
+		HAMTSizeEstimation = SizeEstimationLinks
+		ds := mdtest.Mock()
+		dir, err := NewBasicDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+		require.NoError(t, err)
+		assert.Equal(t, SizeEstimationBlock, dir.GetSizeEstimationMode())
+	})
+}
+
+// TestSizeEstimationBlockMode tests that block estimation mode correctly
+// calculates serialized dag-pb block size for HAMT threshold decisions.
+func TestSizeEstimationBlockMode(t *testing.T) {
+	// Save and restore global settings
+	oldEstimation := HAMTSizeEstimation
+	oldShardingSize := HAMTShardingSize
+	oldLinkSize := linksize.LinkSizeFunction
+	t.Cleanup(func() {
+		HAMTSizeEstimation = oldEstimation
+		HAMTShardingSize = oldShardingSize
+		linksize.LinkSizeFunction = oldLinkSize
+	})
+
+	ds := mdtest.Mock()
+	ctx := context.Background()
+
+	// Create a child node to add to directories
+	child := ft.EmptyDirNode()
+	require.NoError(t, ds.Add(ctx, child))
+
+	t.Run("block estimation uses full serialized size", func(t *testing.T) {
+		// Set a threshold that will be exceeded with accurate block estimation
+		// but not with legacy link estimation
+		HAMTShardingSize = 500
+		HAMTSizeEstimation = SizeEstimationBlock
+
+		dir, err := NewDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+		require.NoError(t, err)
+
+		// Add entries until we're near the threshold
+		for i := range 10 {
+			err = dir.AddChild(ctx, fmt.Sprintf("entry-%03d", i), child)
+			require.NoError(t, err)
+		}
+
+		// Get the node and check its actual serialized size
+		node, err := dir.GetNode()
+		require.NoError(t, err)
+
+		pn, ok := node.(*mdag.ProtoNode)
+		if ok {
+			data, err := pn.EncodeProtobuf(false)
+			require.NoError(t, err)
+			t.Logf("actual block size with 10 entries: %d bytes", len(data))
+		}
+	})
+
+	t.Run("links mode regression", func(t *testing.T) {
+		// Verify that links mode produces the same behavior as before
+		HAMTShardingSize = 1000
+		HAMTSizeEstimation = SizeEstimationLinks
+		linksize.LinkSizeFunction = productionLinkSize
+
+		dir, err := NewDirectory(ds)
+		require.NoError(t, err)
+
+		// Add entries and verify we stay as BasicDirectory with links mode
+		for i := range 10 {
+			err = dir.AddChild(ctx, fmt.Sprintf("entry-%03d", i), child)
+			require.NoError(t, err)
+		}
+
+		// Should still be BasicDirectory with this threshold
+		checkBasicDirectory(t, dir, "should be BasicDirectory with links estimation")
+	})
+}
+
+// TestSizeEstimationDisabled tests that disabled mode ignores size-based threshold.
+func TestSizeEstimationDisabled(t *testing.T) {
+	// Save and restore global settings
+	oldEstimation := HAMTSizeEstimation
+	oldShardingSize := HAMTShardingSize
+	oldLinkSize := linksize.LinkSizeFunction
+	t.Cleanup(func() {
+		HAMTSizeEstimation = oldEstimation
+		HAMTShardingSize = oldShardingSize
+		linksize.LinkSizeFunction = oldLinkSize
+	})
+
+	ds := mdtest.Mock()
+	ctx := context.Background()
+
+	child := ft.EmptyDirNode()
+	require.NoError(t, ds.Add(ctx, child))
+
+	t.Run("disabled mode ignores size threshold", func(t *testing.T) {
+		// Set a very low threshold that would normally trigger HAMT
+		HAMTShardingSize = 100
+		HAMTSizeEstimation = SizeEstimationDisabled
+
+		// Create directory with disabled mode and no MaxLinks set
+		dir, err := NewDirectory(ds, WithSizeEstimationMode(SizeEstimationDisabled))
+		require.NoError(t, err)
+
+		// Add many entries - should stay as BasicDirectory since
+		// SizeEstimationDisabled ignores size-based threshold
+		for i := range 20 {
+			err = dir.AddChild(ctx, fmt.Sprintf("entry-%03d", i), child)
+			require.NoError(t, err)
+		}
+
+		// Should remain BasicDirectory (no MaxLinks constraint set)
+		checkBasicDirectory(t, dir, "should be BasicDirectory when size estimation is disabled and MaxLinks not set")
+	})
+
+	t.Run("disabled mode respects MaxLinks", func(t *testing.T) {
+		HAMTShardingSize = 100
+		HAMTSizeEstimation = SizeEstimationDisabled
+
+		// Create directory with disabled mode but MaxLinks set
+		dir, err := NewDirectory(ds,
+			WithSizeEstimationMode(SizeEstimationDisabled),
+			WithMaxLinks(5))
+		require.NoError(t, err)
+
+		// Add entries up to MaxLinks
+		for i := range 5 {
+			err = dir.AddChild(ctx, fmt.Sprintf("entry-%03d", i), child)
+			require.NoError(t, err)
+		}
+		checkBasicDirectory(t, dir, "should be BasicDirectory at MaxLinks")
+
+		// Adding one more should trigger HAMT conversion
+		err = dir.AddChild(ctx, "entry-005", child)
+		require.NoError(t, err)
+		checkHAMTDirectory(t, dir, "should be HAMTDirectory after exceeding MaxLinks")
+	})
+
+	t.Run("block mode respects MaxLinks", func(t *testing.T) {
+		HAMTShardingSize = 256 * 1024 // 256KB - high enough to not be triggered by size
+		HAMTSizeEstimation = SizeEstimationBlock
+
+		// Create directory with block mode and MaxLinks set
+		dir, err := NewDirectory(ds,
+			WithSizeEstimationMode(SizeEstimationBlock),
+			WithMaxLinks(5))
+		require.NoError(t, err)
+
+		// Add entries up to MaxLinks
+		for i := range 5 {
+			err = dir.AddChild(ctx, fmt.Sprintf("entry-%03d", i), child)
+			require.NoError(t, err)
+		}
+		checkBasicDirectory(t, dir, "should be BasicDirectory at MaxLinks")
+
+		// Adding one more should trigger HAMT conversion
+		err = dir.AddChild(ctx, "entry-005", child)
+		require.NoError(t, err)
+		checkHAMTDirectory(t, dir, "should be HAMTDirectory after exceeding MaxLinks in block mode")
+	})
+}
+
+// TestBlockSizeCalculation verifies the helper functions for block size calculation.
+func TestBlockSizeCalculation(t *testing.T) {
+	t.Run("varintLen", func(t *testing.T) {
+		// Test varint length calculation
+		assert.Equal(t, 1, varintLen(0))
+		assert.Equal(t, 1, varintLen(127))
+		assert.Equal(t, 2, varintLen(128))
+		assert.Equal(t, 2, varintLen(16383))
+		assert.Equal(t, 3, varintLen(16384))
+	})
+
+	t.Run("linkSerializedSize includes all fields", func(t *testing.T) {
+		// Create a test CID
+		c, err := cid.Decode("QmYwAPJzv5CZsnA625s3Xf2nemtYgPpHdWEz79ojWnPbdG")
+		require.NoError(t, err)
+
+		// Calculate size with linkSerializedSize
+		blockSize := linkSerializedSize("test-name", c, 1024)
+
+		// Compare with legacy linksize calculation
+		legacySize := productionLinkSize("test-name", c)
+
+		// Block size should be larger because it includes:
+		// - Tsize field
+		// - Protobuf overhead (field tags, length prefixes)
+		assert.Greater(t, blockSize, legacySize,
+			"block serialized size should be larger than legacy link size")
+
+		t.Logf("legacy size: %d, block size: %d", legacySize, blockSize)
+	})
+
+	t.Run("calculateBlockSize matches actual encoding", func(t *testing.T) {
+		ds := mdtest.Mock()
+		ctx := context.Background()
+
+		basicDir, err := NewBasicDirectory(ds)
+		require.NoError(t, err)
+
+		child := ft.EmptyDirNode()
+		require.NoError(t, ds.Add(ctx, child))
+
+		// Add some entries
+		for i := range 5 {
+			err = basicDir.AddChild(ctx, fmt.Sprintf("file-%d", i), child)
+			require.NoError(t, err)
+		}
+
+		// Calculate size using our function
+		calculatedSize, err := calculateBlockSize(basicDir.node)
+		require.NoError(t, err)
+
+		// Get actual encoded size
+		data, err := basicDir.node.EncodeProtobuf(false)
+		require.NoError(t, err)
+		actualSize := len(data)
+
+		assert.Equal(t, actualSize, calculatedSize,
+			"calculateBlockSize should match actual encoded size")
+	})
+}
+
+// TestHAMTDirectorySizeEstimationMode tests that HAMTDirectory respects
+// the size estimation mode for conversion back to BasicDirectory.
+func TestHAMTDirectorySizeEstimationMode(t *testing.T) {
+	ds := mdtest.Mock()
+
+	t.Run("HAMTDirectory GetSizeEstimationMode defaults to global", func(t *testing.T) {
+		oldEstimation := HAMTSizeEstimation
+		defer func() { HAMTSizeEstimation = oldEstimation }()
+
+		HAMTSizeEstimation = SizeEstimationBlock
+		hamtDir, err := NewHAMTDirectory(ds, 0)
+		require.NoError(t, err)
+		assert.Equal(t, SizeEstimationBlock, hamtDir.GetSizeEstimationMode())
+	})
+
+	t.Run("HAMTDirectory WithSizeEstimationMode overrides global", func(t *testing.T) {
+		oldEstimation := HAMTSizeEstimation
+		defer func() { HAMTSizeEstimation = oldEstimation }()
+
+		HAMTSizeEstimation = SizeEstimationLinks
+		hamtDir, err := NewHAMTDirectory(ds, 0, WithSizeEstimationMode(SizeEstimationBlock))
+		require.NoError(t, err)
+		assert.Equal(t, SizeEstimationBlock, hamtDir.GetSizeEstimationMode())
+	})
+}
+
+// TestNegativeMtimeEncoding verifies dataFieldSerializedSize handles negative
+// timestamps (dates before Unix epoch, e.g., 1960) correctly. Negative int64
+// values require 10-byte varint encoding in protobuf.
+func TestNegativeMtimeEncoding(t *testing.T) {
+	// Date before Unix epoch: January 1, 1960
+	negativeMtime := time.Date(1960, 1, 1, 0, 0, 0, 0, time.UTC)
+	require.True(t, negativeMtime.Unix() < 0, "test requires negative Unix timestamp")
+
+	// Calculate size with negative mtime
+	sizeWithNegativeMtime := dataFieldSerializedSize(0, negativeMtime)
+
+	// Calculate size with positive mtime of similar magnitude
+	positiveMtime := time.Date(2030, 1, 1, 0, 0, 0, 0, time.UTC)
+	sizeWithPositiveMtime := dataFieldSerializedSize(0, positiveMtime)
+
+	// Negative mtime should use more bytes (10-byte varint vs ~5 bytes for positive)
+	assert.Greater(t, sizeWithNegativeMtime, sizeWithPositiveMtime,
+		"negative mtime should use more bytes due to 10-byte varint encoding")
+
+	// Verify by creating actual directory and comparing
+	ds := mdtest.Mock()
+	ctx := context.Background()
+
+	basicDir, err := NewBasicDirectory(ds, WithStat(0, negativeMtime))
+	require.NoError(t, err)
+
+	child := ft.EmptyDirNode()
+	require.NoError(t, ds.Add(ctx, child))
+	require.NoError(t, basicDir.AddChild(ctx, "test", child))
+
+	node, err := basicDir.GetNode()
+	require.NoError(t, err)
+
+	// Verify the node actually has the negative mtime
+	fsn, err := ft.FSNodeFromBytes(node.(*mdag.ProtoNode).Data())
+	require.NoError(t, err)
+	assert.Equal(t, negativeMtime.Unix(), fsn.ModTime().Unix())
+}
+
+// TestSizeEstimationDisabledWithMaxLinksZero verifies behavior when
+// SizeEstimationDisabled is set and maxLinks is 0 (not set). This documents
+// intended behavior: without maxLinks set, there's no criterion to trigger
+// Basic<->HAMT conversion in either direction.
+func TestSizeEstimationDisabledWithMaxLinksZero(t *testing.T) {
+	ds := mdtest.Mock()
+	ctx := context.Background()
+
+	// Save and restore globals
+	oldShardingSize := HAMTShardingSize
+	oldEstimation := HAMTSizeEstimation
+	defer func() {
+		HAMTShardingSize = oldShardingSize
+		HAMTSizeEstimation = oldEstimation
+	}()
+
+	// Configure: SizeEstimationDisabled, maxLinks=0 (unset)
+	HAMTShardingSize = 100 // low threshold that would normally trigger HAMT
+	HAMTSizeEstimation = SizeEstimationDisabled
+
+	child := ft.EmptyDirNode()
+	require.NoError(t, ds.Add(ctx, child))
+
+	t.Run("BasicDirectory stays Basic when adding entries", func(t *testing.T) {
+		// Create BasicDirectory with SizeEstimationDisabled and maxLinks=0
+		basicDir, err := NewBasicDirectory(ds,
+			WithSizeEstimationMode(SizeEstimationDisabled),
+			WithMaxLinks(0))
+		require.NoError(t, err)
+
+		// Add many entries - would normally exceed size threshold
+		for i := range 50 {
+			err = basicDir.AddChild(ctx, fmt.Sprintf("file-%02d", i), child)
+			require.NoError(t, err)
+		}
+
+		// With SizeEstimationDisabled and maxLinks=0, needsToSwitchToHAMTDir
+		// should return false because there's no criterion to trigger conversion
+		needsSwitch, err := basicDir.needsToSwitchToHAMTDir("file-50", child)
+		require.NoError(t, err)
+		assert.False(t, needsSwitch,
+			"BasicDirectory with SizeEstimationDisabled and maxLinks=0 should not switch to HAMT")
+
+		// Verify via DynamicDirectory as well
+		dir, err := NewDirectory(ds,
+			WithSizeEstimationMode(SizeEstimationDisabled),
+			WithMaxLinks(0))
+		require.NoError(t, err)
+
+		for i := range 50 {
+			err = dir.AddChild(ctx, fmt.Sprintf("entry-%02d", i), child)
+			require.NoError(t, err)
+		}
+		checkBasicDirectory(t, dir, "DynamicDirectory should stay Basic with SizeEstimationDisabled and maxLinks=0")
+	})
+
+	t.Run("HAMTDirectory stays HAMT when removing entries", func(t *testing.T) {
+		// Create HAMT directory directly to test reversion behavior
+		hamtDir, err := NewHAMTDirectory(ds, 0,
+			WithSizeEstimationMode(SizeEstimationDisabled),
+			WithMaxLinks(0)) // maxLinks not set
+		require.NoError(t, err)
+
+		// Add some entries
+		for i := range 10 {
+			err = hamtDir.AddChild(ctx, fmt.Sprintf("file-%d", i), child)
+			require.NoError(t, err)
+		}
+
+		// Remove all but one entry
+		for i := range 9 {
+			err = hamtDir.RemoveChild(ctx, fmt.Sprintf("file-%d", i))
+			require.NoError(t, err)
+		}
+
+		// With SizeEstimationDisabled and maxLinks=0, needsToSwitchToBasicDir should
+		// return false because there's no maxLinks criterion to satisfy
+		needsSwitch, err := hamtDir.needsToSwitchToBasicDir(ctx, "file-9", nil)
+		require.NoError(t, err)
+		assert.False(t, needsSwitch,
+			"HAMT with SizeEstimationDisabled and maxLinks=0 should not switch to Basic")
+	})
+}
+
+// TestUnicodeFilenamesInSizeEstimation verifies that size estimation correctly
+// handles multi-byte Unicode characters in filenames.
+func TestUnicodeFilenamesInSizeEstimation(t *testing.T) {
+	ds := mdtest.Mock()
+	ctx := context.Background()
+
+	testCases := []struct {
+		name     string
+		filename string
+		byteLen  int // expected byte length of filename
+	}{
+		{"ASCII", "hello.txt", 9},
+		{"Chinese", "文件.txt", 10}, // 2 Chinese chars (3 bytes each) + 4 ASCII
+		{"Emoji", "📁folder", 10},  // 1 emoji (4 bytes) + 6 ASCII
+		{"Mixed", "日本語ファイル", 21},  // 7 Japanese chars (3 bytes each)
+		{"Combining", "café", 5},  // 'e' + combining acute accent
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			// Verify our expected byte length is correct
+			assert.Equal(t, tc.byteLen, len(tc.filename),
+				"test setup: filename byte length mismatch")
+
+			// Create directory with SizeEstimationBlock for accurate size tracking
+			basicDir, err := NewBasicDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+			require.NoError(t, err)
+
+			child := ft.EmptyDirNode()
+			require.NoError(t, ds.Add(ctx, child))
+
+			// Add child with Unicode filename
+			err = basicDir.AddChild(ctx, tc.filename, child)
+			require.NoError(t, err)
+
+			// Get the node and verify we can retrieve the entry
+			_, err = basicDir.Find(ctx, tc.filename)
+			require.NoError(t, err, "should find entry with Unicode filename")
+
+			// Verify estimated size includes full byte length of filename
+			node, err := basicDir.GetNode()
+			require.NoError(t, err)
+
+			actualSize, err := calculateBlockSize(node.(*mdag.ProtoNode))
+			require.NoError(t, err)
+
+			// The estimated size should match actual encoded size
+			assert.Equal(t, actualSize, basicDir.estimatedSize,
+				"estimated size should match actual for Unicode filename")
+		})
+	}
+}
+
+// TestConcurrentHAMTConversion verifies that concurrent reads don't corrupt
+// directory state during HAMT conversion.
+// TestSequentialHAMTConversion tests that directories handle rapid sequential
+// operations during HAMT conversion correctly.
+// Note: Directory is not thread-safe for concurrent reads and writes.
+// This test verifies that rapid sequential operations work correctly.
+func TestSequentialHAMTConversion(t *testing.T) {
+	ds := mdtest.Mock()
+	ctx := context.Background()
+
+	// Save and restore globals
+	oldShardingSize := HAMTShardingSize
+	defer func() { HAMTShardingSize = oldShardingSize }()
+
+	// Set low threshold to trigger HAMT quickly
+	HAMTShardingSize = 500
+
+	dir, err := NewDirectory(ds)
+	require.NoError(t, err)
+
+	child := ft.EmptyDirNode()
+	require.NoError(t, ds.Add(ctx, child))
+
+	// Add initial entries (but not enough to trigger HAMT)
+	for i := range 5 {
+		err = dir.AddChild(ctx, fmt.Sprintf("initial-%d", i), child)
+		require.NoError(t, err)
+	}
+
+	// Interleave reads and writes to test HAMT conversion during operations
+	for i := range 20 {
+		// Read
+		links, err := dir.Links(ctx)
+		require.NoError(t, err)
+		assert.GreaterOrEqual(t, len(links), 5+i, "should have at least %d entries", 5+i)
+
+		// Write - this may trigger HAMT conversion
+		err = dir.AddChild(ctx, fmt.Sprintf("sequential-%d", i), child)
+		require.NoError(t, err)
+	}
+
+	// Verify final state is consistent
+	links, err := dir.Links(ctx)
+	require.NoError(t, err)
+	assert.Equal(t, 25, len(links), "should have 5 initial + 20 sequential entries")
+}
+
+// TestHAMTDirectoryModeAndMtimeAfterReload verifies that mode and mtime metadata
+// survives HAMT conversion and reload from DAG.
+func TestHAMTDirectoryModeAndMtimeAfterReload(t *testing.T) {
+	ds := mdtest.Mock()
+	ctx := context.Background()
+
+	// Save and restore globals
+	oldShardingSize := HAMTShardingSize
+	defer func() { HAMTShardingSize = oldShardingSize }()
+
+	HAMTShardingSize = 200 // low threshold
+
+	testMode := os.FileMode(0o755)
+	testMtime := time.Date(2024, 6, 15, 12, 0, 0, 0, time.UTC)
+
+	// Create directory with mode and mtime
+	dir, err := NewDirectory(ds, WithStat(testMode, testMtime))
+	require.NoError(t, err)
+
+	child := ft.EmptyDirNode()
+	require.NoError(t, ds.Add(ctx, child))
+
+	// Add entries to trigger HAMT conversion
+	for i := range 15 {
+		err = dir.AddChild(ctx, fmt.Sprintf("file-%02d", i), child)
+		require.NoError(t, err)
+	}
+
+	// Verify it became HAMT
+	checkHAMTDirectory(t, dir, "should be HAMTDirectory after many entries")
+
+	// Get the node
+	node, err := dir.GetNode()
+	require.NoError(t, err)
+
+	// Reload directory from node
+	reloadedDir, err := NewDirectoryFromNode(ds, node.(*mdag.ProtoNode))
+	require.NoError(t, err)
+
+	// Verify we can still access entries
+	links, err := reloadedDir.Links(ctx)
+	require.NoError(t, err)
+	assert.Equal(t, 15, len(links))
+
+	// Remove entries to trigger conversion back to Basic
+	for i := range 12 {
+		err = reloadedDir.RemoveChild(ctx, fmt.Sprintf("file-%02d", i))
+		require.NoError(t, err)
+	}
+
+	// Get node again - may have converted back to Basic
+	node2, err := reloadedDir.GetNode()
+	require.NoError(t, err)
+
+	// Verify mode and mtime are preserved in the UnixFS data
+	fsn, err := ft.FSNodeFromBytes(node2.(*mdag.ProtoNode).Data())
+	require.NoError(t, err)
+
+	// Verify mode and mtime are preserved after Basic<->HAMT conversions and reload.
+	// FSNode.Mode() adds os.ModeDir for directories, so compare permission bits only.
+	if fsn.Type() == ft.TDirectory {
+		assert.Equal(t, testMode.Perm(), fsn.Mode().Perm(), "mode permissions should be preserved after reload and conversion")
+		assert.Equal(t, testMtime.Unix(), fsn.ModTime().Unix(), "mtime should be preserved after reload and conversion")
+	}
+}
+
+// TestHAMTThresholdExactBoundary verifies the exact boundary behavior:
+// directory at threshold stays Basic, threshold+1 triggers HAMT.
+func TestHAMTThresholdExactBoundary(t *testing.T) {
+	ds := mdtest.Mock()
+	ctx := context.Background()
+
+	// Save and restore globals
+	oldShardingSize := HAMTShardingSize
+	oldEstimation := HAMTSizeEstimation
+	defer func() {
+		HAMTShardingSize = oldShardingSize
+		HAMTSizeEstimation = oldEstimation
+	}()
+
+	// Use block estimation for accurate size tracking
+	HAMTSizeEstimation = SizeEstimationBlock
+
+	child := ft.EmptyDirNode()
+	require.NoError(t, ds.Add(ctx, child))
+
+	// Create a directory and measure its size with one entry
+	measureDir, err := NewBasicDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+	require.NoError(t, err)
+	require.NoError(t, measureDir.AddChild(ctx, "a", child))
+	oneEntrySize := measureDir.estimatedSize
+
+	// Set threshold to exactly the size of directory with N entries
+	// We want to test that N entries stays Basic, N+1 triggers HAMT
+	numEntries := 5
+	targetSize := oneEntrySize + (numEntries-1)*linkSerializedSize("a", child.Cid(), 0)
+	HAMTShardingSize = targetSize
+
+	t.Logf("threshold=%d, oneEntrySize=%d", targetSize, oneEntrySize)
+
+	// Create fresh directory
+	dir, err := NewDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+	require.NoError(t, err)
+
+	// Add entries up to threshold
+	for i := range numEntries {
+		name := fmt.Sprintf("%c", 'a'+i) // single char names for consistent size
+		err = dir.AddChild(ctx, name, child)
+		require.NoError(t, err)
+	}
+
+	// Should still be Basic (at threshold, not over)
+	checkBasicDirectory(t, dir, "directory at exact threshold should stay Basic")
+
+	// Add one more entry - should trigger HAMT
+	err = dir.AddChild(ctx, "z", child)
+	require.NoError(t, err)
+
+	checkHAMTDirectory(t, dir, "directory over threshold should become HAMT")
+}
diff --git a/ipld/unixfs/io/doc.go b/ipld/unixfs/io/doc.go
index cf844bd23..b6d524fce 100644
--- a/ipld/unixfs/io/doc.go
+++ b/ipld/unixfs/io/doc.go
@@ -1,3 +1,41 @@
-// Package io implements convenience objects for working with the ipfs
-// unixfs data format.
+// Package io implements convenience objects for working with the IPFS
+// UnixFS data format.
+//
+// # Directory Types
+//
+// The package provides three directory implementations:
+//
+//   - [BasicDirectory]: A flat directory storing all entries in a single node.
+//   - [HAMTDirectory]: A sharded directory using HAMT (Hash Array Mapped Trie)
+//     for large directories.
+//   - [DynamicDirectory]: Automatically switches between Basic and HAMT based
+//     on size thresholds.
+//
+// # HAMT Sharding
+//
+// Directories can automatically convert between basic (flat) and HAMT
+// (sharded) formats based on configurable thresholds:
+//
+//   - [HAMTShardingSize]: Byte threshold for conversion (global default: 256 KiB)
+//   - [HAMTSizeEstimation]: How size is estimated ([SizeEstimationLinks],
+//     [SizeEstimationBlock], or [SizeEstimationDisabled])
+//   - [DefaultShardWidth]: Fanout for HAMT nodes (default: 256)
+//
+// Use [WithMaxLinks], [WithMaxHAMTFanout], and [WithSizeEstimationMode] to
+// configure directories at creation time.
+//
+// # File Reading
+//
+// [DagReader] provides io.Reader and io.Seeker interfaces for reading
+// UnixFS file content from the DAG.
+//
+// # CID Profiles (IPIP-499)
+//
+// [UnixFSProfile] defines import settings for deterministic CID generation.
+// Predefined profiles ensure cross-implementation compatibility:
+//
+//   - [UnixFS_v0_2015]: Legacy CIDv0 settings (256 KiB chunks, dag-pb leaves)
+//   - [UnixFS_v1_2025]: Modern CIDv1 settings (1 MiB chunks, raw leaves)
+//
+// See https://specs.ipfs.tech/ipips/ipip-0499/ for specification details.
 package io
diff --git a/ipld/unixfs/io/profile.go b/ipld/unixfs/io/profile.go
new file mode 100644
index 000000000..db13a4a5a
--- /dev/null
+++ b/ipld/unixfs/io/profile.go
@@ -0,0 +1,120 @@
+package io
+
+import (
+	"github.com/alecthomas/units"
+	chunk "github.com/ipfs/boxo/chunker"
+	"github.com/ipfs/boxo/ipld/unixfs/importer/helpers"
+	"github.com/ipfs/go-cid"
+	mh "github.com/multiformats/go-multihash"
+)
+
+// UnixFSProfile defines a set of UnixFS import settings for CID determinism.
+// Profiles ensure that different implementations produce the same CID for
+// the same input when using the same profile.
+//
+// See IPIP-499 for details: https://github.com/ipfs/specs/pull/499
+type UnixFSProfile struct {
+	// CIDVersion specifies the CID version (0 or 1).
+	// CIDv0 only supports dag-pb codec, CIDv1 supports multiple codecs.
+	CIDVersion int
+
+	// MhType is the multihash function code that determines the hash algorithm.
+	// Historical default is mh.SHA2_256 (0x12).
+	MhType uint64
+
+	// ChunkSize is the maximum size of file chunks (in bytes).
+	// Common values: 256 KiB (legacy), 1 MiB (modern).
+	ChunkSize int64
+
+	// FileDAGWidth is the maximum number of links per file DAG node.
+	// Common values: 174 (legacy), 1024 (modern).
+	FileDAGWidth int
+
+	// RawLeaves controls whether file leaf nodes use raw codec (true)
+	// or dag-pb wrapped UnixFS nodes (false).
+	// Raw leaves require CIDv1; CIDv0 only supports dag-pb leaves.
+	RawLeaves bool
+
+	// HAMTShardingSize is the threshold (in bytes) for switching to HAMT directories.
+	// 0 disables HAMT sharding entirely.
+	HAMTShardingSize int
+
+	// HAMTSizeEstimation controls how directory size is estimated for HAMT threshold.
+	// Use SizeEstimationBlock for accurate estimation, SizeEstimationLinks for legacy
+	// behavior, or SizeEstimationDisabled to rely solely on link count.
+	HAMTSizeEstimation SizeEstimationMode
+
+	// HAMTShardWidth is the fanout for HAMT directory nodes.
+	// Must be a power of 2 and multiple of 8.
+	HAMTShardWidth int
+}
+
+// Predefined profiles matching IPIP-499 specifications.
+var (
+	// UnixFS_v0_2015 matches the unixfs-v0-2015 profile from IPIP-499.
+	// Documents default UnixFS DAG construction parameters used by Kubo
+	// through version 0.39 when producing CIDv0. Use when compatibility
+	// with historical CIDv0 references is required.
+	//
+	// Historical note on FileDAGWidth=174:
+	//
+	//	var roughLinkBlockSize = 1 << 13 // 8KB
+	//	var roughLinkSize = 34 + 8 + 5   // sha256 multihash + size + no name
+	//	                                 // + protobuf framing
+	//	var DefaultLinksPerBlock = (roughLinkBlockSize / roughLinkSize)
+	//	                         = ( 8192 / 47 )
+	//	                         = (approximately) 174
+	UnixFS_v0_2015 = UnixFSProfile{
+		CIDVersion:         0,
+		MhType:             mh.SHA2_256,
+		ChunkSize:          int64(256 * units.KiB),
+		FileDAGWidth:       174,
+		RawLeaves:          false, // dag-pb leaves for CIDv0
+		HAMTShardingSize:   int(256 * units.KiB),
+		HAMTSizeEstimation: SizeEstimationLinks,
+		HAMTShardWidth:     256,
+	}
+
+	// UnixFS_v1_2025 matches the unixfs-v1-2025 profile from IPIP-499.
+	// Opinionated profile for deterministic CID generation with CIDv1.
+	// Use for cross-implementation CID determinism with modern settings.
+	UnixFS_v1_2025 = UnixFSProfile{
+		CIDVersion:         1,
+		MhType:             mh.SHA2_256,
+		ChunkSize:          int64(1 * units.MiB),
+		FileDAGWidth:       1024,
+		RawLeaves:          true, // raw leaves for CIDv1
+		HAMTShardingSize:   int(256 * units.KiB),
+		HAMTSizeEstimation: SizeEstimationBlock,
+		HAMTShardWidth:     256,
+	}
+)
+
+// ApplyGlobals sets the global variables to match this profile's settings.
+// This affects all subsequent file and directory import operations.
+// Note: RawLeaves and CidBuilder are not globals; pass them to DAG builder options.
+//
+// Thread safety: this function modifies global variables and is not safe
+// for concurrent use. Call it once during program initialization, before
+// starting any imports. Do not call from multiple goroutines.
+func (p UnixFSProfile) ApplyGlobals() {
+	// File settings
+	chunk.DefaultBlockSize = p.ChunkSize
+	helpers.DefaultLinksPerBlock = p.FileDAGWidth
+
+	// Directory settings
+	HAMTShardingSize = p.HAMTShardingSize
+	HAMTSizeEstimation = p.HAMTSizeEstimation
+	DefaultShardWidth = p.HAMTShardWidth
+}
+
+// CidBuilder returns a cid.Builder configured for this profile.
+// Pass this to DagBuilderParams.CidBuilder when importing files.
+func (p UnixFSProfile) CidBuilder() cid.Builder {
+	return cid.Prefix{
+		Version:  uint64(p.CIDVersion),
+		Codec:    cid.DagProtobuf,
+		MhType:   p.MhType,
+		MhLength: -1, // default length for the hash function
+	}
+}
diff --git a/ipld/unixfs/io/profile_test.go b/ipld/unixfs/io/profile_test.go
new file mode 100644
index 000000000..688c1e100
--- /dev/null
+++ b/ipld/unixfs/io/profile_test.go
@@ -0,0 +1,1128 @@
+package io
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"testing"
+	"time"
+
+	mdag "github.com/ipfs/boxo/ipld/merkledag"
+	mdtest "github.com/ipfs/boxo/ipld/merkledag/test"
+	ft "github.com/ipfs/boxo/ipld/unixfs"
+	"github.com/ipfs/boxo/ipld/unixfs/private/linksize"
+	cid "github.com/ipfs/go-cid"
+	ipld "github.com/ipfs/go-ipld-format"
+	mh "github.com/multiformats/go-multihash"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// calculateBlockSize returns the actual byte size of the serialized dag-pb block
+// by performing real protobuf encoding. Used only in tests to verify that
+// arithmetic size calculations (dataFieldSerializedSize + linkSerializedSize)
+// match actual protobuf serialization.
+func calculateBlockSize(node *mdag.ProtoNode) (int, error) {
+	data, err := node.EncodeProtobuf(false)
+	if err != nil {
+		return 0, fmt.Errorf("failed to encode directory node: %w", err)
+	}
+	return len(data), nil
+}
+
+func TestUnixFSProfiles(t *testing.T) {
+	t.Run("UnixFS_v0_2015 has correct values", func(t *testing.T) {
+		assert.Equal(t, 0, UnixFS_v0_2015.CIDVersion, "CIDVersion should be 0")
+		assert.Equal(t, uint64(mh.SHA2_256), UnixFS_v0_2015.MhType, "MhType should be SHA2_256")
+		assert.Equal(t, int64(256*1024), UnixFS_v0_2015.ChunkSize, "ChunkSize should be 256 KiB")
+		assert.Equal(t, 174, UnixFS_v0_2015.FileDAGWidth, "FileDAGWidth should be 174")
+		assert.False(t, UnixFS_v0_2015.RawLeaves, "RawLeaves should be false for CIDv0")
+		assert.Equal(t, 256*1024, UnixFS_v0_2015.HAMTShardingSize, "HAMTShardingSize should be 256 KiB")
+		assert.Equal(t, SizeEstimationLinks, UnixFS_v0_2015.HAMTSizeEstimation, "should use links-based estimation")
+		assert.Equal(t, 256, UnixFS_v0_2015.HAMTShardWidth, "HAMTShardWidth should be 256")
+	})
+
+	t.Run("UnixFS_v1_2025 has correct values", func(t *testing.T) {
+		assert.Equal(t, 1, UnixFS_v1_2025.CIDVersion, "CIDVersion should be 1")
+		assert.Equal(t, uint64(mh.SHA2_256), UnixFS_v1_2025.MhType, "MhType should be SHA2_256")
+		assert.Equal(t, int64(1024*1024), UnixFS_v1_2025.ChunkSize, "ChunkSize should be 1 MiB")
+		assert.Equal(t, 1024, UnixFS_v1_2025.FileDAGWidth, "FileDAGWidth should be 1024")
+		assert.True(t, UnixFS_v1_2025.RawLeaves, "RawLeaves should be true for CIDv1")
+		assert.Equal(t, 256*1024, UnixFS_v1_2025.HAMTShardingSize, "HAMTShardingSize should be 256 KiB")
+		assert.Equal(t, SizeEstimationBlock, UnixFS_v1_2025.HAMTSizeEstimation, "should use block-based estimation")
+		assert.Equal(t, 256, UnixFS_v1_2025.HAMTShardWidth, "HAMTShardWidth should be 256")
+	})
+
+	t.Run("CidBuilder returns correct prefix", func(t *testing.T) {
+		t.Run("UnixFS_v0_2015", func(t *testing.T) {
+			builder := UnixFS_v0_2015.CidBuilder()
+			prefix := builder.(cid.Prefix)
+			assert.Equal(t, uint64(0), prefix.Version)
+			assert.Equal(t, uint64(cid.DagProtobuf), prefix.Codec)
+			assert.Equal(t, uint64(mh.SHA2_256), prefix.MhType)
+		})
+
+		t.Run("UnixFS_v1_2025", func(t *testing.T) {
+			builder := UnixFS_v1_2025.CidBuilder()
+			prefix := builder.(cid.Prefix)
+			assert.Equal(t, uint64(1), prefix.Version)
+			assert.Equal(t, uint64(cid.DagProtobuf), prefix.Codec)
+			assert.Equal(t, uint64(mh.SHA2_256), prefix.MhType)
+		})
+	})
+
+	t.Run("ApplyGlobals sets global variables", func(t *testing.T) {
+		// Save original values
+		oldShardingSize := HAMTShardingSize
+		oldEstimation := HAMTSizeEstimation
+		oldShardWidth := DefaultShardWidth
+		t.Cleanup(func() {
+			HAMTShardingSize = oldShardingSize
+			HAMTSizeEstimation = oldEstimation
+			DefaultShardWidth = oldShardWidth
+		})
+
+		// Apply UnixFS_v1_2025
+		UnixFS_v1_2025.ApplyGlobals()
+
+		assert.Equal(t, UnixFS_v1_2025.HAMTShardingSize, HAMTShardingSize)
+		assert.Equal(t, UnixFS_v1_2025.HAMTSizeEstimation, HAMTSizeEstimation)
+		assert.Equal(t, UnixFS_v1_2025.HAMTShardWidth, DefaultShardWidth)
+
+		// Apply UnixFS_v0_2015
+		UnixFS_v0_2015.ApplyGlobals()
+
+		assert.Equal(t, UnixFS_v0_2015.HAMTShardingSize, HAMTShardingSize)
+		assert.Equal(t, UnixFS_v0_2015.HAMTSizeEstimation, HAMTSizeEstimation)
+		assert.Equal(t, UnixFS_v0_2015.HAMTShardWidth, DefaultShardWidth)
+	})
+}
+
+// saveAndRestoreGlobals saves the current global settings and restores them
+// after the test completes. Use this in tests that modify HAMTShardingSize,
+// HAMTSizeEstimation, DefaultShardWidth, or linksize.LinkSizeFunction.
+func saveAndRestoreGlobals(t *testing.T) {
+	oldShardingSize := HAMTShardingSize
+	oldEstimation := HAMTSizeEstimation
+	oldShardWidth := DefaultShardWidth
+	oldLinkSize := linksize.LinkSizeFunction
+	t.Cleanup(func() {
+		HAMTShardingSize = oldShardingSize
+		HAMTSizeEstimation = oldEstimation
+		DefaultShardWidth = oldShardWidth
+		linksize.LinkSizeFunction = oldLinkSize
+	})
+}
+
+func TestProfileHAMTThresholdBehavior(t *testing.T) {
+	// Use fixed link size for predictable testing
+	const fixedLinkSize = 100
+
+	t.Run("SizeEstimationLinks threshold behavior", func(t *testing.T) {
+		saveAndRestoreGlobals(t)
+
+		// Configure for links-based estimation with predictable sizes
+		UnixFS_v0_2015.ApplyGlobals()
+		linksize.LinkSizeFunction = func(name string, c cid.Cid) int {
+			return fixedLinkSize
+		}
+		// Set threshold at exactly 300 bytes (3 links at 100 bytes each)
+		HAMTShardingSize = 300
+
+		ds := mdtest.Mock()
+		ctx := context.Background()
+		child := ft.EmptyDirNode()
+		require.NoError(t, ds.Add(ctx, child))
+
+		t.Run("below threshold stays BasicDirectory", func(t *testing.T) {
+			dir, err := NewDirectory(ds)
+			require.NoError(t, err)
+
+			// Add 2 entries = 200 bytes, below 300 threshold
+			for i := range 2 {
+				err = dir.AddChild(ctx, fmt.Sprintf("e%d", i), child)
+				require.NoError(t, err)
+			}
+
+			_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+			assert.False(t, isHAMT, "should remain BasicDirectory when below threshold (200 < 300)")
+		})
+
+		t.Run("at threshold stays BasicDirectory", func(t *testing.T) {
+			dir, err := NewDirectory(ds)
+			require.NoError(t, err)
+
+			// Add 3 entries = 300 bytes, exactly at threshold
+			// With > comparison, at threshold stays basic
+			for i := range 3 {
+				err = dir.AddChild(ctx, fmt.Sprintf("e%d", i), child)
+				require.NoError(t, err)
+			}
+
+			_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+			assert.False(t, isHAMT, "should stay BasicDirectory when exactly at threshold (300 == 300)")
+		})
+
+		t.Run("one byte over threshold switches to HAMTDirectory", func(t *testing.T) {
+			// Set threshold to 299 so that 3 entries (300 bytes) = threshold + 1.
+			// This also confirms HAMTShardingSize is read dynamically and not
+			// hardcoded elsewhere in the implementation.
+			HAMTShardingSize = 299
+
+			dir, err := NewDirectory(ds)
+			require.NoError(t, err)
+
+			// Add 3 entries = 300 bytes = threshold + 1
+			for i := range 3 {
+				err = dir.AddChild(ctx, fmt.Sprintf("e%d", i), child)
+				require.NoError(t, err)
+			}
+
+			_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+			assert.True(t, isHAMT, "should switch to HAMTDirectory when one byte over threshold (300 > 299)")
+		})
+
+		t.Run("well above threshold switches to HAMTDirectory", func(t *testing.T) {
+			HAMTShardingSize = 300 // reset
+
+			dir, err := NewDirectory(ds)
+			require.NoError(t, err)
+
+			// Add 4 entries = 400 bytes, well above 300 threshold
+			for i := range 4 {
+				err = dir.AddChild(ctx, fmt.Sprintf("e%d", i), child)
+				require.NoError(t, err)
+			}
+
+			_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+			assert.True(t, isHAMT, "should switch to HAMTDirectory when well above threshold (400 > 300)")
+		})
+	})
+
+	t.Run("SizeEstimationBlock threshold behavior", func(t *testing.T) {
+		saveAndRestoreGlobals(t)
+
+		UnixFS_v1_2025.ApplyGlobals()
+
+		ds := mdtest.Mock()
+		ctx := context.Background()
+		child := ft.EmptyDirNode()
+		require.NoError(t, ds.Add(ctx, child))
+
+		// First, measure actual block sizes to set precise threshold
+		dir, err := NewDirectory(ds)
+		require.NoError(t, err)
+
+		// Add entries and measure actual block size
+		for i := range 5 {
+			err = dir.AddChild(ctx, fmt.Sprintf("e%d", i), child)
+			require.NoError(t, err)
+		}
+		node, err := dir.GetNode()
+		require.NoError(t, err)
+		sizeWith5, err := calculateBlockSize(node.(*mdag.ProtoNode))
+		require.NoError(t, err)
+		t.Logf("block size with 5 entries: %d bytes", sizeWith5)
+
+		// Add one more and measure
+		err = dir.AddChild(ctx, "e5", child)
+		require.NoError(t, err)
+		node, err = dir.GetNode()
+		require.NoError(t, err)
+		sizeWith6, err := calculateBlockSize(node.(*mdag.ProtoNode))
+		require.NoError(t, err)
+		t.Logf("block size with 6 entries: %d bytes", sizeWith6)
+
+		// Set threshold between 5 and 6 entries
+		threshold := (sizeWith5 + sizeWith6) / 2
+		t.Logf("setting threshold to %d bytes (between 5 and 6 entries)", threshold)
+
+		t.Run("below threshold stays BasicDirectory", func(t *testing.T) {
+			HAMTShardingSize = threshold
+
+			dir, err := NewDirectory(ds)
+			require.NoError(t, err)
+
+			// Add 5 entries, should be below threshold
+			for i := range 5 {
+				err = dir.AddChild(ctx, fmt.Sprintf("e%d", i), child)
+				require.NoError(t, err)
+			}
+
+			_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+			assert.False(t, isHAMT, "should remain BasicDirectory when below threshold (%d < %d)", sizeWith5, threshold)
+		})
+
+		t.Run("above threshold switches to HAMTDirectory", func(t *testing.T) {
+			HAMTShardingSize = threshold
+
+			dir, err := NewDirectory(ds)
+			require.NoError(t, err)
+
+			// Add 6 entries, should be above threshold
+			for i := range 6 {
+				err = dir.AddChild(ctx, fmt.Sprintf("e%d", i), child)
+				require.NoError(t, err)
+			}
+
+			_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+			assert.True(t, isHAMT, "should switch to HAMTDirectory when above threshold (%d > %d)", sizeWith6, threshold)
+		})
+	})
+
+	t.Run("SizeEstimationBlock is more accurate than SizeEstimationLinks", func(t *testing.T) {
+		saveAndRestoreGlobals(t)
+
+		ds := mdtest.Mock()
+		ctx := context.Background()
+		child := ft.EmptyDirNode()
+		require.NoError(t, ds.Add(ctx, child))
+
+		// Create a directory and add some entries
+		dir, err := NewBasicDirectory(ds)
+		require.NoError(t, err)
+
+		for i := range 5 {
+			err = dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+			require.NoError(t, err)
+		}
+
+		// Get actual serialized block size
+		node, err := dir.GetNode()
+		require.NoError(t, err)
+		actualBlockSize, err := calculateBlockSize(node.(*mdag.ProtoNode))
+		require.NoError(t, err)
+
+		// Calculate links-based estimation (what SizeEstimationLinks uses)
+		linksEstimate := 0
+		for _, link := range node.Links() {
+			linksEstimate += len(link.Name) + link.Cid.ByteLen()
+		}
+
+		t.Logf("actual block size: %d bytes", actualBlockSize)
+		t.Logf("links-based estimate: %d bytes", linksEstimate)
+		t.Logf("difference: %d bytes (%.1f%% underestimate)", actualBlockSize-linksEstimate, float64(actualBlockSize-linksEstimate)/float64(actualBlockSize)*100)
+
+		// Links-based estimation should underestimate because it ignores:
+		// - Tsize field
+		// - Protobuf varints and tags
+		// - UnixFS Data field
+		assert.Greater(t, actualBlockSize, linksEstimate,
+			"links-based estimation should underestimate actual block size")
+	})
+
+	t.Run("SizeEstimationBlock exact threshold boundary", func(t *testing.T) {
+		saveAndRestoreGlobals(t)
+
+		// Test that the HAMT switch happens exactly when size > threshold (not >=)
+		HAMTSizeEstimation = SizeEstimationBlock
+
+		ds := mdtest.Mock()
+		ctx := t.Context()
+		child := ft.EmptyDirNode()
+		require.NoError(t, ds.Add(ctx, child))
+
+		// First, determine the exact block size after adding entries
+		testDir, err := NewDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+		require.NoError(t, err)
+
+		// Add entries and track sizes at each step
+		var sizes []int
+		for i := range 10 {
+			err = testDir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+			require.NoError(t, err)
+
+			node, err := testDir.GetNode()
+			require.NoError(t, err)
+			pn, ok := node.(*mdag.ProtoNode)
+			require.True(t, ok)
+			size, err := calculateBlockSize(pn)
+			require.NoError(t, err)
+			sizes = append(sizes, size)
+		}
+
+		// Set threshold to exactly the size after 5 entries
+		// (entry0..entry4 = 5 entries)
+		exactThreshold := sizes[4]
+		t.Logf("threshold set to exactly %d bytes (size after 5 entries)", exactThreshold)
+
+		t.Run("at exact threshold stays BasicDirectory", func(t *testing.T) {
+			HAMTShardingSize = exactThreshold
+
+			dir, err := NewDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+			require.NoError(t, err)
+
+			// Add 5 entries to reach exactly threshold size
+			for i := range 5 {
+				err = dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+				require.NoError(t, err)
+			}
+
+			node, err := dir.GetNode()
+			require.NoError(t, err)
+			pn := node.(*mdag.ProtoNode)
+			actualSize, _ := calculateBlockSize(pn)
+			t.Logf("actual size: %d, threshold: %d", actualSize, exactThreshold)
+
+			_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+			assert.False(t, isHAMT, "should stay BasicDirectory when size equals threshold (%d == %d)", actualSize, exactThreshold)
+		})
+
+		t.Run("one byte over threshold switches to HAMTDirectory", func(t *testing.T) {
+			// Set threshold to size[4] - 1 so that size[4] is > threshold
+			HAMTShardingSize = sizes[4] - 1
+
+			dir, err := NewDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+			require.NoError(t, err)
+
+			// Add 5 entries - last one should trigger HAMT
+			for i := range 5 {
+				err = dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+				require.NoError(t, err)
+			}
+
+			_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+			assert.True(t, isHAMT, "should switch to HAMTDirectory when size exceeds threshold (%d > %d)", sizes[4], HAMTShardingSize)
+		})
+	})
+}
+
+func TestSizeEstimationBlockWithModeMtime(t *testing.T) {
+	saveAndRestoreGlobals(t)
+	HAMTSizeEstimation = SizeEstimationBlock
+
+	ds := mdtest.Mock()
+	ctx := t.Context()
+
+	t.Run("mode increases block size", func(t *testing.T) {
+		// Directory without mode
+		dirNoMode, err := NewBasicDirectory(ds)
+		require.NoError(t, err)
+
+		nodeNoMode, err := dirNoMode.GetNode()
+		require.NoError(t, err)
+		sizeNoMode, err := calculateBlockSize(nodeNoMode.(*mdag.ProtoNode))
+		require.NoError(t, err)
+
+		// Directory with mode
+		dirWithMode, err := NewBasicDirectory(ds, WithStat(os.FileMode(0o755), time.Time{}))
+		require.NoError(t, err)
+
+		nodeWithMode, err := dirWithMode.GetNode()
+		require.NoError(t, err)
+		sizeWithMode, err := calculateBlockSize(nodeWithMode.(*mdag.ProtoNode))
+		require.NoError(t, err)
+
+		// Mode field in protobuf: tag (1 byte) + varint value (2 bytes for 0755=493)
+		// = 3 bytes overhead
+		expectedModeOverhead := 3
+		assert.Equal(t, expectedModeOverhead, sizeWithMode-sizeNoMode,
+			"mode overhead should be exactly %d bytes", expectedModeOverhead)
+	})
+
+	t.Run("mtime increases block size", func(t *testing.T) {
+		// Directory without mtime
+		dirNoMtime, err := NewBasicDirectory(ds)
+		require.NoError(t, err)
+
+		nodeNoMtime, err := dirNoMtime.GetNode()
+		require.NoError(t, err)
+		sizeNoMtime, err := calculateBlockSize(nodeNoMtime.(*mdag.ProtoNode))
+		require.NoError(t, err)
+
+		// Directory with mtime (seconds only)
+		mtime := time.Unix(1700000000, 0)
+		dirWithMtime, err := NewBasicDirectory(ds, WithStat(0, mtime))
+		require.NoError(t, err)
+
+		nodeWithMtime, err := dirWithMtime.GetNode()
+		require.NoError(t, err)
+		sizeWithMtime, err := calculateBlockSize(nodeWithMtime.(*mdag.ProtoNode))
+		require.NoError(t, err)
+
+		// Mtime field in protobuf (seconds only):
+		// - outer tag (1) + length prefix (1) + inner message
+		// - inner: seconds tag (1) + varint (5 bytes for 1700000000)
+		// = 8 bytes overhead
+		expectedMtimeOverhead := 8
+		assert.Equal(t, expectedMtimeOverhead, sizeWithMtime-sizeNoMtime,
+			"mtime (seconds only) overhead should be exactly %d bytes", expectedMtimeOverhead)
+	})
+
+	t.Run("mtime with nanoseconds increases block size further", func(t *testing.T) {
+		// Directory with mtime (seconds only)
+		mtimeSeconds := time.Unix(1700000000, 0)
+		dirSeconds, err := NewBasicDirectory(ds, WithStat(0, mtimeSeconds))
+		require.NoError(t, err)
+
+		nodeSeconds, err := dirSeconds.GetNode()
+		require.NoError(t, err)
+		sizeSeconds, err := calculateBlockSize(nodeSeconds.(*mdag.ProtoNode))
+		require.NoError(t, err)
+
+		// Directory with mtime (seconds + nanoseconds)
+		mtimeNanos := time.Unix(1700000000, 123456789)
+		dirNanos, err := NewBasicDirectory(ds, WithStat(0, mtimeNanos))
+		require.NoError(t, err)
+
+		nodeNanos, err := dirNanos.GetNode()
+		require.NoError(t, err)
+		sizeNanos, err := calculateBlockSize(nodeNanos.(*mdag.ProtoNode))
+		require.NoError(t, err)
+
+		// Nanoseconds field: tag (1) + varint (4 bytes for 123456789)
+		// = 5 bytes additional overhead
+		expectedNanosOverhead := 5
+		assert.Equal(t, expectedNanosOverhead, sizeNanos-sizeSeconds,
+			"nanoseconds overhead should be exactly %d bytes", expectedNanosOverhead)
+	})
+
+	t.Run("mode and mtime combined", func(t *testing.T) {
+		// Directory without metadata
+		dirPlain, err := NewBasicDirectory(ds)
+		require.NoError(t, err)
+
+		nodePlain, err := dirPlain.GetNode()
+		require.NoError(t, err)
+		sizePlain, err := calculateBlockSize(nodePlain.(*mdag.ProtoNode))
+		require.NoError(t, err)
+
+		// Directory with both mode and mtime
+		mtime := time.Unix(1700000000, 123456789)
+		dirFull, err := NewBasicDirectory(ds, WithStat(os.FileMode(0o755), mtime))
+		require.NoError(t, err)
+
+		nodeFull, err := dirFull.GetNode()
+		require.NoError(t, err)
+		sizeFull, err := calculateBlockSize(nodeFull.(*mdag.ProtoNode))
+		require.NoError(t, err)
+
+		// Total: mode (3) + mtime with nanos (8 + 5) = 16 bytes
+		expectedTotalOverhead := 16
+		assert.Equal(t, expectedTotalOverhead, sizeFull-sizePlain,
+			"total mode+mtime overhead should be exactly %d bytes", expectedTotalOverhead)
+	})
+
+	t.Run("estimatedSize includes mode/mtime overhead", func(t *testing.T) {
+		child := ft.EmptyDirNode()
+		require.NoError(t, ds.Add(ctx, child))
+
+		// Create directory with metadata
+		mtime := time.Unix(1700000000, 123456789)
+		dir, err := NewBasicDirectory(ds,
+			WithStat(os.FileMode(0o755), mtime),
+			WithSizeEstimationMode(SizeEstimationBlock),
+		)
+		require.NoError(t, err)
+
+		// Add some entries
+		for i := range 5 {
+			err = dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+			require.NoError(t, err)
+		}
+
+		// Get actual block size
+		node, err := dir.GetNode()
+		require.NoError(t, err)
+		actualSize, err := calculateBlockSize(node.(*mdag.ProtoNode))
+		require.NoError(t, err)
+
+		// Check estimated size matches actual size
+		assert.Equal(t, actualSize, dir.estimatedSize,
+			"estimatedSize should match actual block size (including mode/mtime)")
+	})
+
+	t.Run("HAMT threshold accounts for mode/mtime overhead", func(t *testing.T) {
+		child := ft.EmptyDirNode()
+		require.NoError(t, ds.Add(ctx, child))
+
+		// Create directories with and without metadata, add entries until we find
+		// a threshold where one switches to HAMT but the other doesn't
+		mtime := time.Unix(1700000000, 123456789)
+
+		// First, find the size difference due to metadata
+		dirPlain, err := NewBasicDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+		require.NoError(t, err)
+		dirMeta, err := NewBasicDirectory(ds,
+			WithStat(os.FileMode(0o755), mtime),
+			WithSizeEstimationMode(SizeEstimationBlock),
+		)
+		require.NoError(t, err)
+
+		// Add entries to both
+		for i := range 3 {
+			err = dirPlain.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+			require.NoError(t, err)
+			err = dirMeta.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+			require.NoError(t, err)
+		}
+
+		nodePlain, _ := dirPlain.GetNode()
+		nodeMeta, _ := dirMeta.GetNode()
+		sizePlain, _ := calculateBlockSize(nodePlain.(*mdag.ProtoNode))
+		sizeMeta, _ := calculateBlockSize(nodeMeta.(*mdag.ProtoNode))
+
+		t.Logf("size without metadata (3 entries): %d", sizePlain)
+		t.Logf("size with metadata (3 entries): %d", sizeMeta)
+
+		// Set threshold between the two sizes
+		if sizeMeta > sizePlain {
+			HAMTShardingSize = (sizePlain + sizeMeta) / 2
+			t.Logf("threshold set to: %d (between plain and meta)", HAMTShardingSize)
+
+			// Create fresh directories with this threshold
+			dirPlain2, err := NewDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+			require.NoError(t, err)
+			dirMeta2, err := NewDirectory(ds,
+				WithStat(os.FileMode(0o755), mtime),
+				WithSizeEstimationMode(SizeEstimationBlock),
+			)
+			require.NoError(t, err)
+
+			// Add entries
+			for i := range 3 {
+				err = dirPlain2.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+				require.NoError(t, err)
+				err = dirMeta2.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+				require.NoError(t, err)
+			}
+
+			// Plain should stay basic, meta should switch to HAMT
+			_, plainIsHAMT := dirPlain2.(*DynamicDirectory).Directory.(*HAMTDirectory)
+			_, metaIsHAMT := dirMeta2.(*DynamicDirectory).Directory.(*HAMTDirectory)
+
+			assert.False(t, plainIsHAMT, "plain directory should stay BasicDirectory")
+			assert.True(t, metaIsHAMT, "directory with metadata should switch to HAMTDirectory")
+		}
+	})
+}
+
+func TestEstimatedSizeAccuracy(t *testing.T) {
+	saveAndRestoreGlobals(t)
+	HAMTSizeEstimation = SizeEstimationBlock
+
+	ds := mdtest.Mock()
+	ctx := t.Context()
+
+	t.Run("estimated size matches actual after multiple operations", func(t *testing.T) {
+		child := ft.EmptyDirNode()
+		require.NoError(t, ds.Add(ctx, child))
+
+		dir, err := NewBasicDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+		require.NoError(t, err)
+
+		// Add entries
+		for i := range 10 {
+			err = dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+			require.NoError(t, err)
+		}
+
+		// Remove some entries
+		for i := range 5 {
+			err = dir.RemoveChild(ctx, fmt.Sprintf("entry%d", i))
+			require.NoError(t, err)
+		}
+
+		// Replace some entries
+		for i := 5; i < 8; i++ {
+			err = dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+			require.NoError(t, err)
+		}
+
+		// Verify estimated size matches actual
+		node, err := dir.GetNode()
+		require.NoError(t, err)
+		actualSize, err := calculateBlockSize(node.(*mdag.ProtoNode))
+		require.NoError(t, err)
+
+		assert.Equal(t, actualSize, dir.estimatedSize,
+			"estimatedSize should match actual after add/remove/replace operations")
+	})
+
+	t.Run("linkSerializedSize matches actual link contribution", func(t *testing.T) {
+		child := ft.EmptyDirNode()
+		require.NoError(t, ds.Add(ctx, child))
+
+		// Create empty directory and get base size
+		dir, err := NewBasicDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+		require.NoError(t, err)
+		emptyNode, _ := dir.GetNode()
+		emptySize, _ := calculateBlockSize(emptyNode.(*mdag.ProtoNode))
+
+		// Add one entry
+		err = dir.AddChild(ctx, "testentry", child)
+		require.NoError(t, err)
+		oneEntryNode, _ := dir.GetNode()
+		oneEntrySize, _ := calculateBlockSize(oneEntryNode.(*mdag.ProtoNode))
+
+		// Calculate expected link size
+		link, _ := oneEntryNode.Links()[0].Cid, oneEntryNode.Links()[0]
+		expectedLinkSize := linkSerializedSize("testentry", link, oneEntryNode.Links()[0].Size)
+
+		actualLinkContribution := oneEntrySize - emptySize
+		t.Logf("empty dir size: %d", emptySize)
+		t.Logf("one entry size: %d", oneEntrySize)
+		t.Logf("actual link contribution: %d", actualLinkContribution)
+		t.Logf("linkSerializedSize result: %d", expectedLinkSize)
+
+		assert.Equal(t, actualLinkContribution, expectedLinkSize,
+			"linkSerializedSize should exactly match actual link contribution")
+	})
+}
+
+func TestBlockSizeEstimationEdgeCases(t *testing.T) {
+	saveAndRestoreGlobals(t)
+	HAMTSizeEstimation = SizeEstimationBlock
+
+	ds := mdtest.Mock()
+	ctx := t.Context()
+
+	t.Run("entry replacement updates estimated size correctly", func(t *testing.T) {
+		child1 := ft.EmptyDirNode()
+		require.NoError(t, ds.Add(ctx, child1))
+
+		// Create a larger child node to have different CID/size
+		child2 := ft.EmptyDirNode()
+		child2.SetData(make([]byte, 100))
+		require.NoError(t, ds.Add(ctx, child2))
+
+		dir, err := NewBasicDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+		require.NoError(t, err)
+
+		// Add entry
+		err = dir.AddChild(ctx, "entry", child1)
+		require.NoError(t, err)
+		node1, _ := dir.GetNode()
+		size1, _ := calculateBlockSize(node1.(*mdag.ProtoNode))
+		estimated1 := dir.estimatedSize
+		t.Logf("after add child1: actual=%d, estimated=%d", size1, estimated1)
+		assert.Equal(t, size1, estimated1)
+
+		// Replace with different child
+		err = dir.AddChild(ctx, "entry", child2)
+		require.NoError(t, err)
+		node2, _ := dir.GetNode()
+		size2, _ := calculateBlockSize(node2.(*mdag.ProtoNode))
+		estimated2 := dir.estimatedSize
+		t.Logf("after replace with child2: actual=%d, estimated=%d", size2, estimated2)
+		assert.Equal(t, size2, estimated2)
+
+		// Size should have changed due to different Tsize
+		t.Logf("size changed by: %d bytes", size2-size1)
+	})
+
+	t.Run("long entry names handled correctly", func(t *testing.T) {
+		child := ft.EmptyDirNode()
+		require.NoError(t, ds.Add(ctx, child))
+
+		dir, err := NewBasicDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+		require.NoError(t, err)
+
+		// Add entries with varying name lengths
+		names := []string{
+			"a",
+			"medium_length_name",
+			"this_is_a_very_long_entry_name_that_should_still_work_correctly_for_size_estimation",
+		}
+
+		for _, name := range names {
+			err = dir.AddChild(ctx, name, child)
+			require.NoError(t, err)
+
+			node, _ := dir.GetNode()
+			actualSize, _ := calculateBlockSize(node.(*mdag.ProtoNode))
+			assert.Equal(t, actualSize, dir.estimatedSize,
+				"estimated size should match actual for name: %s", name)
+		}
+	})
+
+	t.Run("threshold behavior with fast path optimization", func(t *testing.T) {
+		child := ft.EmptyDirNode()
+		require.NoError(t, ds.Add(ctx, child))
+
+		// Calculate size for entries to set appropriate threshold
+		testDir, err := NewBasicDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+		require.NoError(t, err)
+		for i := range 20 {
+			_ = testDir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+		}
+		node, _ := testDir.GetNode()
+		sizeAt20, _ := calculateBlockSize(node.(*mdag.ProtoNode))
+
+		// Set threshold well below the current size (to test fast path for "clearly above")
+		// The margin is 256 bytes, so set threshold such that size is > threshold + 256
+		HAMTShardingSize = sizeAt20 - 500
+		t.Logf("threshold: %d, size at 20 entries: %d, margin: 256", HAMTShardingSize, sizeAt20)
+
+		// Create directory and add entries - should switch to HAMT via fast path
+		dir, err := NewDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+		require.NoError(t, err)
+
+		for i := range 20 {
+			err = dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+			require.NoError(t, err)
+		}
+
+		_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+		assert.True(t, isHAMT, "should switch to HAMT when clearly above threshold")
+	})
+
+	t.Run("threshold behavior near boundary uses exact calculation", func(t *testing.T) {
+		child := ft.EmptyDirNode()
+		require.NoError(t, ds.Add(ctx, child))
+
+		// Calculate exact sizes
+		testDir, err := NewBasicDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+		require.NoError(t, err)
+		for i := range 5 {
+			_ = testDir.AddChild(ctx, fmt.Sprintf("e%d", i), child)
+		}
+		node, _ := testDir.GetNode()
+		sizeAt5, _ := calculateBlockSize(node.(*mdag.ProtoNode))
+
+		// Set threshold to exact size - should NOT switch because we use
+		// "size > threshold" (not >=), so size == threshold stays basic
+		HAMTShardingSize = sizeAt5
+		t.Logf("threshold: %d (exact size at 5 entries)", HAMTShardingSize)
+
+		dir, err := NewDirectory(ds, WithSizeEstimationMode(SizeEstimationBlock))
+		require.NoError(t, err)
+
+		for i := range 5 {
+			err = dir.AddChild(ctx, fmt.Sprintf("e%d", i), child)
+			require.NoError(t, err)
+		}
+
+		_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+		assert.False(t, isHAMT, "should stay BasicDirectory when size equals threshold")
+	})
+}
+
+// TestHAMTToBasicDowngrade tests that HAMTDirectory converts back to BasicDirectory
+// at precise byte boundaries when entries are removed.
+func TestHAMTToBasicDowngrade(t *testing.T) {
+	saveAndRestoreGlobals(t)
+
+	ds := mdtest.Mock()
+	ctx := t.Context()
+
+	child := ft.EmptyDirNode()
+	require.NoError(t, ds.Add(ctx, child))
+
+	// Test both size estimation modes
+	testCases := []struct {
+		name string
+		mode SizeEstimationMode
+		// sizeFunc calculates the "size" for a given number of entries based on mode
+		sizeFunc func(numEntries int) int
+	}{
+		{
+			name: "SizeEstimationLinks",
+			mode: SizeEstimationLinks,
+			sizeFunc: func(numEntries int) int {
+				// Each entry "entryN" has name length 6 (entry0-entry9) + CID length
+				// Using the mock or calculating based on linksize function
+				size := 0
+				for i := range numEntries {
+					name := fmt.Sprintf("entry%d", i)
+					size += len(name) + child.Cid().ByteLen()
+				}
+				return size
+			},
+		},
+		{
+			name:     "SizeEstimationBlock",
+			mode:     SizeEstimationBlock,
+			sizeFunc: nil, // Will measure actual block sizes
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			HAMTSizeEstimation = tc.mode
+
+			// Measure sizes at different entry counts
+			var sizes []int
+			if tc.mode == SizeEstimationBlock {
+				// Measure actual block sizes
+				measureDir, err := NewBasicDirectory(ds, WithSizeEstimationMode(tc.mode))
+				require.NoError(t, err)
+				for i := range 10 {
+					err = measureDir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+					require.NoError(t, err)
+					node, _ := measureDir.GetNode()
+					size, _ := calculateBlockSize(node.(*mdag.ProtoNode))
+					sizes = append(sizes, size)
+				}
+			} else {
+				// Calculate link-based sizes
+				for i := 1; i <= 10; i++ {
+					sizes = append(sizes, tc.sizeFunc(i))
+				}
+			}
+			t.Logf("sizes: 5 entries=%d, 6 entries=%d, 7 entries=%d",
+				sizes[4], sizes[5], sizes[6])
+
+			// Set threshold between 5 and 6 entries
+			threshold := (sizes[4] + sizes[5]) / 2
+			t.Logf("threshold set to %d (between 5 and 6 entries)", threshold)
+
+			t.Run("HAMT stays HAMT when above threshold after remove", func(t *testing.T) {
+				HAMTShardingSize = threshold
+
+				dir, err := NewDirectory(ds, WithSizeEstimationMode(tc.mode))
+				require.NoError(t, err)
+
+				for i := range 8 {
+					err = dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+					require.NoError(t, err)
+				}
+
+				_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+				require.True(t, isHAMT, "should be HAMTDirectory after adding 8 entries")
+
+				// Remove entries down to 6 (still above threshold)
+				err = dir.RemoveChild(ctx, "entry7")
+				require.NoError(t, err)
+				err = dir.RemoveChild(ctx, "entry6")
+				require.NoError(t, err)
+
+				_, isHAMT = dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+				assert.True(t, isHAMT,
+					"should stay HAMTDirectory with 6 entries (size %d > threshold %d)",
+					sizes[5], threshold)
+			})
+
+			t.Run("HAMT downgrades to Basic when at threshold after remove", func(t *testing.T) {
+				// Set threshold to exactly the size at 5 entries
+				HAMTShardingSize = sizes[4]
+				t.Logf("threshold set to exactly %d (size at 5 entries)", HAMTShardingSize)
+
+				dir, err := NewDirectory(ds, WithSizeEstimationMode(tc.mode))
+				require.NoError(t, err)
+
+				for i := range 8 {
+					err = dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+					require.NoError(t, err)
+				}
+
+				_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+				require.True(t, isHAMT, "should be HAMTDirectory after adding 8 entries")
+
+				// Remove down to 5 entries (at threshold)
+				for i := 7; i >= 5; i-- {
+					err = dir.RemoveChild(ctx, fmt.Sprintf("entry%d", i))
+					require.NoError(t, err)
+				}
+
+				_, isHAMT = dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+				assert.False(t, isHAMT,
+					"should downgrade to BasicDirectory when size equals threshold (%d == %d)",
+					sizes[4], HAMTShardingSize)
+			})
+
+			t.Run("HAMT downgrades to Basic when below threshold after remove", func(t *testing.T) {
+				HAMTShardingSize = threshold
+
+				dir, err := NewDirectory(ds, WithSizeEstimationMode(tc.mode))
+				require.NoError(t, err)
+
+				for i := range 8 {
+					err = dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+					require.NoError(t, err)
+				}
+
+				_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+				require.True(t, isHAMT, "should be HAMTDirectory after adding 8 entries")
+
+				// Remove down to 5 entries (below threshold)
+				for i := 7; i >= 5; i-- {
+					err = dir.RemoveChild(ctx, fmt.Sprintf("entry%d", i))
+					require.NoError(t, err)
+				}
+
+				_, isHAMT = dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+				assert.False(t, isHAMT,
+					"should downgrade to BasicDirectory when below threshold (%d < %d)",
+					sizes[4], threshold)
+
+				// Verify all remaining entries are intact
+				links, err := dir.Links(ctx)
+				require.NoError(t, err)
+				assert.Len(t, links, 5, "should have 5 entries after downgrade")
+			})
+
+			t.Run("precise boundary: one entry makes the difference", func(t *testing.T) {
+				// Set threshold so that 6 entries is exactly at threshold
+				HAMTShardingSize = sizes[5]
+				t.Logf("threshold set to exactly %d (size at 6 entries)", HAMTShardingSize)
+
+				dir, err := NewDirectory(ds, WithSizeEstimationMode(tc.mode))
+				require.NoError(t, err)
+
+				// Add 7 entries to get into HAMT territory
+				for i := range 7 {
+					err = dir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+					require.NoError(t, err)
+				}
+
+				_, isHAMT := dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+				require.True(t, isHAMT, "should be HAMTDirectory with 7 entries")
+
+				// Remove one entry -> 6 entries, size == threshold -> should downgrade
+				err = dir.RemoveChild(ctx, "entry6")
+				require.NoError(t, err)
+
+				_, isHAMT = dir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+				assert.False(t, isHAMT,
+					"should downgrade to BasicDirectory when at exact threshold after remove")
+
+				// Verify it's actually a BasicDirectory with correct entries
+				_, ok := dir.(*DynamicDirectory).Directory.(*BasicDirectory)
+				require.True(t, ok, "should be BasicDirectory")
+
+				links, err := dir.Links(ctx)
+				require.NoError(t, err)
+				assert.Len(t, links, 6, "should have 6 entries after downgrade")
+			})
+		})
+	}
+}
+
+// TestDataFieldSerializedSizeMatchesActual verifies that the arithmetic calculation
+// in dataFieldSerializedSize matches the actual protobuf serialization. This test
+// ensures the optimization doesn't drift from reality and will catch any future
+// field additions to the UnixFS spec for directories.
+func TestDataFieldSerializedSizeMatchesActual(t *testing.T) {
+	testCases := []struct {
+		name  string
+		mode  os.FileMode
+		mtime time.Time
+	}{
+		{"empty directory", 0, time.Time{}},
+		{"with mode 0755", os.FileMode(0o755), time.Time{}},
+		{"with mode 0644", os.FileMode(0o644), time.Time{}},
+		{"with mtime seconds only", 0, time.Unix(1700000000, 0)},
+		{"with mtime and nanos", 0, time.Unix(1700000000, 123456789)},
+		{"with mode and mtime", os.FileMode(0o755), time.Unix(1700000000, 123456789)},
+		{"with negative timestamp", 0, time.Unix(-1000000, 0)}, // before 1970
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			// Create actual node and serialize
+			var node *mdag.ProtoNode
+			if tc.mode != 0 || !tc.mtime.IsZero() {
+				node = ft.EmptyDirNodeWithStat(tc.mode, tc.mtime)
+			} else {
+				node = ft.EmptyDirNode()
+			}
+
+			actualSize, err := calculateBlockSize(node)
+			require.NoError(t, err)
+
+			// Compute using arithmetic function
+			computedSize := dataFieldSerializedSize(tc.mode, tc.mtime)
+
+			assert.Equal(t, actualSize, computedSize,
+				"dataFieldSerializedSize must match actual serialized size; "+
+					"if UnixFS spec adds new directory fields, update dataFieldSerializedSize()")
+		})
+	}
+}
+
+// TestHAMTAndBasicDirectorySizeConsistency verifies that HAMTDirectory.sizeBelowThreshold()
+// and BasicDirectory.estimatedSize compute equivalent values for the same directory contents.
+// This ensures both calculations use the same Data field overhead.
+func TestHAMTAndBasicDirectorySizeConsistency(t *testing.T) {
+	saveAndRestoreGlobals(t)
+
+	ds := mdtest.Mock()
+	ctx := t.Context()
+
+	child := ft.EmptyDirNode()
+	require.NoError(t, ds.Add(ctx, child))
+
+	t.Run("SizeEstimationBlock mode/mtime consistency", func(t *testing.T) {
+		HAMTSizeEstimation = SizeEstimationBlock
+
+		mtime := time.Unix(1700000000, 123456789)
+		mode := os.FileMode(0o755)
+
+		// Create a BasicDirectory with mode/mtime and add some entries
+		basicDir, err := NewBasicDirectory(ds,
+			WithStat(mode, mtime),
+			WithSizeEstimationMode(SizeEstimationBlock),
+		)
+		require.NoError(t, err)
+
+		for i := range 5 {
+			err = basicDir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+			require.NoError(t, err)
+		}
+
+		// Get BasicDirectory's estimated size
+		basicEstimatedSize := basicDir.estimatedSize
+
+		// Verify it matches actual serialized size
+		node, err := basicDir.GetNode()
+		require.NoError(t, err)
+		actualSize, err := calculateBlockSize(node.(*mdag.ProtoNode))
+		require.NoError(t, err)
+		assert.Equal(t, actualSize, basicEstimatedSize,
+			"BasicDirectory.estimatedSize should match actual block size")
+
+		// Now create a HAMT and convert back to verify consistency
+		// Set threshold low to force HAMT conversion
+		HAMTShardingSize = 50
+
+		hamtDir, err := NewDirectory(ds,
+			WithStat(mode, mtime),
+			WithSizeEstimationMode(SizeEstimationBlock),
+		)
+		require.NoError(t, err)
+
+		// Add same entries to trigger HAMT conversion
+		for i := range 5 {
+			err = hamtDir.AddChild(ctx, fmt.Sprintf("entry%d", i), child)
+			require.NoError(t, err)
+		}
+
+		// Should be a HAMT now
+		hamt, ok := hamtDir.(*DynamicDirectory).Directory.(*HAMTDirectory)
+		require.True(t, ok, "should be HAMTDirectory")
+
+		// Set mode/mtime on HAMT (simulating what would happen during conversion)
+		hamt.SetStat(mode, mtime)
+
+		// Set threshold high to allow BasicDirectory and test sizeBelowThreshold
+		HAMTShardingSize = 1000
+
+		// sizeBelowThreshold should compute the same size as BasicDirectory
+		// (since it's calculating the hypothetical BasicDirectory size)
+		below, err := hamt.sizeBelowThreshold(ctx, 0)
+		require.NoError(t, err)
+		assert.True(t, below, "should be below threshold")
+
+		// The BasicDirectory estimated size should equal what HAMT would calculate
+		// for conversion decision. We can verify this by creating a fresh BasicDirectory
+		// from the HAMT's contents and comparing sizes.
+		freshBasic, err := NewBasicDirectory(ds,
+			WithStat(mode, mtime),
+			WithSizeEstimationMode(SizeEstimationBlock),
+		)
+		require.NoError(t, err)
+
+		err = hamt.ForEachLink(ctx, func(link *ipld.Link) error {
+			return freshBasic.addLinkChild(ctx, link.Name, link)
+		})
+		require.NoError(t, err)
+
+		// The fresh BasicDirectory's estimated size should be consistent
+		t.Logf("BasicDirectory estimated size: %d", basicEstimatedSize)
+		t.Logf("Fresh BasicDirectory from HAMT estimated size: %d", freshBasic.estimatedSize)
+
+		assert.Equal(t, basicEstimatedSize, freshBasic.estimatedSize,
+			"BasicDirectory size should be consistent regardless of creation path")
+	})
+}
diff --git a/ipld/unixfs/mod/dagmodifier.go b/ipld/unixfs/mod/dagmodifier.go
index 3db1fb23a..122710fe0 100644
--- a/ipld/unixfs/mod/dagmodifier.go
+++ b/ipld/unixfs/mod/dagmodifier.go
@@ -1,24 +1,3 @@
-// Package mod provides DAG modification utilities to, for example,
-// insert additional nodes in a unixfs DAG or truncate them.
-//
-// Identity CID Handling:
-//
-// This package automatically handles identity CIDs (multihash code 0x00) which
-// inline data directly in the CID. When modifying nodes with identity CIDs,
-// the package ensures the [verifcid.MaxIdentityDigestSize] limit is respected by
-// automatically switching to a cryptographic hash function when the encoded
-// data would exceed this limit. The replacement hash function is chosen from
-// (in order): the configured Prefix if non-identity, or [util.DefaultIpfsHash]
-// as a fallback. This prevents creation of unacceptably big identity CIDs
-// while preserving them for small data that fits within the limit.
-//
-// RawNode Growth:
-//
-// When appending data to a RawNode that would require multiple blocks, the
-// node is automatically converted to a UnixFS file structure. This is necessary
-// because RawNodes cannot have child nodes. The original raw data remains
-// accessible via its original CID, while the new structure provides full
-// UnixFS capabilities.
 package mod
 
 import (
@@ -161,7 +140,13 @@ func (dm *DagModifier) expandSparse(size int64) error {
 		return err
 	}
 	err = dm.dagserv.Add(dm.ctx, nnode)
-	return err
+	if err != nil {
+		return err
+	}
+	// Update curNode so subsequent writes use the expanded node.
+	// Without this, writes after sparse expansion would go to the old node.
+	dm.curNode = nnode
+	return nil
 }
 
 // Write continues writing to the dag at the current offset
@@ -258,6 +243,11 @@ func (dm *DagModifier) Sync() error {
 			return err
 		}
 
+		// Ensure the final node doesn't exceed identity hash limits
+		if pn, ok := dm.curNode.(*mdag.ProtoNode); ok {
+			dm.ensureSafeProtoNodeHash(pn)
+		}
+
 		err = dm.dagserv.Add(dm.ctx, dm.curNode)
 		if err != nil {
 			return err
@@ -312,6 +302,53 @@ func (dm *DagModifier) ensureSafeProtoNodeHash(node *mdag.ProtoNode) {
 	}
 }
 
+// maybeCollapseToRawLeaf checks if the current node is a ProtoNode wrapper around
+// a single RawNode child with no metadata, and if RawLeaves is enabled, collapses
+// the structure to just the RawNode. This ensures CID compatibility with ipfs add
+// for single-block files. Returns (node, true) if collapsed, (nil, false) otherwise.
+func (dm *DagModifier) maybeCollapseToRawLeaf() (ipld.Node, bool) {
+	if !dm.RawLeaves {
+		return nil, false
+	}
+
+	protoNode, ok := dm.curNode.(*mdag.ProtoNode)
+	if !ok {
+		return nil, false
+	}
+
+	// Must have exactly one child link
+	links := protoNode.Links()
+	if len(links) != 1 {
+		return nil, false
+	}
+
+	// Parse UnixFS metadata to check for ModTime or other metadata
+	fsn, err := ft.FSNodeFromBytes(protoNode.Data())
+	if err != nil {
+		return nil, false // Not valid UnixFS, keep as is
+	}
+
+	// If there's metadata (like ModTime or Mode), keep as ProtoNode
+	if !fsn.ModTime().IsZero() || fsn.Mode() != 0 {
+		return nil, false
+	}
+
+	// Get the child node from DAGService (should be cached from appendData)
+	childNode, err := links[0].GetNode(dm.ctx, dm.dagserv)
+	if err != nil {
+		return nil, false // Can't fetch child, keep as is
+	}
+
+	// Child must be a RawNode
+	rawChild, ok := childNode.(*mdag.RawNode)
+	if !ok {
+		return nil, false
+	}
+
+	// Collapse to the RawNode child
+	return rawChild, true
+}
+
 // modifyDag writes the data in 'dm.wrBuf' over the data in 'node' starting at 'offset'
 // returns the new key of the passed in node.
 func (dm *DagModifier) modifyDag(n ipld.Node, offset uint64) (cid.Cid, error) {
@@ -331,7 +368,8 @@ func (dm *DagModifier) modifyDag(n ipld.Node, offset uint64) (cid.Cid, error) {
 				return cid.Cid{}, err
 			}
 
-			// Update newly written node..
+			// MFS semantics: update mtime on content modification (see doc.go).
+			// To preserve a specific mtime, set it explicitly after the operation.
 			if !fsn.ModTime().IsZero() {
 				fsn.SetModTime(time.Now())
 			}
@@ -466,12 +504,50 @@ func (dm *DagModifier) modifyDag(n ipld.Node, offset uint64) (cid.Cid, error) {
 // RawNode to ProtoNode. The original data remains accessible through the
 // direct CID of the original raw block, and the new dag-pb CID makes
 // post-append data accessible through the standard UnixFS APIs.
+
+// identitySafeDAGService wraps a DAGService to handle identity CID overflow.
+// When adding a node with identity hash that would exceed the size limit,
+// it automatically switches to a cryptographic hash.
+type identitySafeDAGService struct {
+	ipld.DAGService
+	fallbackPrefix cid.Prefix
+}
+
+func (s *identitySafeDAGService) Add(ctx context.Context, nd ipld.Node) error {
+	// Check if this is a ProtoNode with identity hash that's too large
+	if pn, ok := nd.(*mdag.ProtoNode); ok {
+		if prefix, ok := pn.CidBuilder().(cid.Prefix); ok && prefix.MhType == mh.IDENTITY {
+			encoded, _ := pn.EncodeProtobuf(false)
+			if len(encoded) > verifcid.DefaultMaxIdentityDigestSize {
+				// Switch to fallback hash
+				pn.SetCidBuilder(s.fallbackPrefix)
+			}
+		}
+	}
+	return s.DAGService.Add(ctx, nd)
+}
+
 func (dm *DagModifier) appendData(nd ipld.Node, spl chunker.Splitter) (ipld.Node, error) {
+	// Create a wrapper DAGService that handles identity overflow automatically.
+	// This allows small appends to preserve identity while preventing overflow errors.
+	dagserv := dm.dagserv
+	if dm.Prefix.MhType == mh.IDENTITY {
+		dagserv = &identitySafeDAGService{
+			DAGService: dm.dagserv,
+			fallbackPrefix: cid.Prefix{
+				Version:  dm.Prefix.Version,
+				Codec:    dm.Prefix.Codec,
+				MhType:   util.DefaultIpfsHash,
+				MhLength: -1,
+			},
+		}
+	}
+
 	switch nd := nd.(type) {
 	case *mdag.ProtoNode:
 		// ProtoNode can be directly passed to trickle.Append
 		dbp := &help.DagBuilderParams{
-			Dagserv:    dm.dagserv,
+			Dagserv:    dagserv,
 			Maxlinks:   dm.MaxLinks,
 			CidBuilder: dm.Prefix,
 			RawLeaves:  dm.RawLeaves,
@@ -509,7 +585,7 @@ func (dm *DagModifier) appendData(nd ipld.Node, spl chunker.Splitter) (ipld.Node
 
 		// Now we can append new data using trickle
 		dbp := &help.DagBuilderParams{
-			Dagserv:    dm.dagserv,
+			Dagserv:    dagserv,
 			Maxlinks:   dm.MaxLinks,
 			CidBuilder: dm.Prefix,
 			RawLeaves:  true, // Ensure future leaves are raw for consistency
@@ -587,6 +663,14 @@ func (dm *DagModifier) GetNode() (ipld.Node, error) {
 	if err != nil {
 		return nil, err
 	}
+
+	// If RawLeaves is enabled and the result is a ProtoNode with a single RawNode child
+	// and no metadata, collapse it to just the RawNode for CID compatibility with ipfs add.
+	// The collapsed RawNode is returned directly (no Copy needed - it's a different node
+	// fetched from DAGService, not dm.curNode, and RawNodes are immutable).
+	if collapsed, ok := dm.maybeCollapseToRawLeaf(); ok {
+		return collapsed, nil
+	}
 	return dm.curNode.Copy(), nil
 }
 
@@ -685,6 +769,7 @@ func (dm *DagModifier) dagTruncate(ctx context.Context, n ipld.Node, size uint64
 			}
 
 			fsn.SetData(fsn.Data()[:size])
+			// MFS semantics: update mtime on truncation (see doc.go)
 			if !fsn.ModTime().IsZero() {
 				fsn.SetModTime(time.Now())
 			}
diff --git a/ipld/unixfs/mod/dagmodifier_test.go b/ipld/unixfs/mod/dagmodifier_test.go
index eee243750..820a072ff 100644
--- a/ipld/unixfs/mod/dagmodifier_test.go
+++ b/ipld/unixfs/mod/dagmodifier_test.go
@@ -5,7 +5,9 @@ import (
 	"context"
 	"fmt"
 	"io"
+	"os"
 	"testing"
+	"time"
 
 	dag "github.com/ipfs/boxo/ipld/merkledag"
 	"github.com/ipfs/boxo/ipld/unixfs"
@@ -614,6 +616,78 @@ func testSeekPastEndWrite(t *testing.T, opts testu.NodeOpts) {
 	}
 }
 
+// TestSparseWriteOnExistingRawNode ensures curNode is updated after sparse
+// expansion. This mimics the scenario where `ipfs files write --offset N` is
+// called on an existing file created with --raw-leaves:
+// 1. First command creates a RawNode file
+// 2. Second command loads the RawNode and writes at offset past its end
+// Without the fix, expandSparse wouldn't update curNode, causing modifyDag
+// to operate on the old unexpanded node and panic with slice bounds error.
+func TestSparseWriteOnExistingRawNode(t *testing.T) {
+	ctx := context.Background()
+	dserv := testu.GetDAGServ()
+
+	// Step 1: Create initial file as RawNode (simulates ipfs files write --raw-leaves)
+	initialData := []byte("foobar\n")
+	rawNode, err := dag.NewRawNodeWPrefix(initialData, cid.Prefix{
+		Version:  1,
+		Codec:    cid.Raw,
+		MhType:   mh.SHA2_256,
+		MhLength: -1,
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = dserv.Add(ctx, rawNode)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Step 2: Load existing RawNode in NEW DagModifier (simulates second ipfs files write)
+	dagmod, err := NewDagModifier(ctx, rawNode, dserv, testu.SizeSplitterGen(512))
+	if err != nil {
+		t.Fatal(err)
+	}
+	dagmod.RawLeaves = true
+
+	// Step 3: Write at offset 50 (past end of 7-byte file)
+	_, err = dagmod.Seek(50, io.SeekStart)
+	if err != nil {
+		t.Fatalf("Seek to offset 50 failed: %v", err)
+	}
+
+	newData := []byte("blah\n")
+	_, err = dagmod.Write(newData)
+	if err != nil {
+		t.Fatalf("Write at offset 50 failed: %v", err)
+	}
+
+	// Step 4: Sync - this panicked before the fix
+	err = dagmod.Sync()
+	if err != nil {
+		t.Fatalf("Sync failed: %v", err)
+	}
+
+	// Step 5: Verify result
+	_, err = dagmod.Seek(0, io.SeekStart)
+	if err != nil {
+		t.Fatal(err)
+	}
+	result, err := io.ReadAll(dagmod)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Expected: "foobar\n" + 43 zero bytes + "blah\n" = 55 bytes
+	expected := make([]byte, 55)
+	copy(expected, initialData)
+	copy(expected[50:], newData)
+
+	if err = testu.ArrComp(result, expected); err != nil {
+		t.Fatal(err)
+	}
+}
+
 func TestRelativeSeek(t *testing.T) {
 	runAllSubtests(t, testRelativeSeek)
 }
@@ -1502,3 +1576,216 @@ func TestRawNodeGrowthConversion(t *testing.T) {
 		}
 	})
 }
+
+func TestRawLeavesCollapse(t *testing.T) {
+	t.Run("single-block file collapses to RawNode when RawLeaves enabled", func(t *testing.T) {
+		ctx, cancel := context.WithCancel(context.Background())
+		defer cancel()
+
+		dserv := testu.GetDAGServ()
+
+		// Create an empty file node with CIDv1 (required for raw leaves)
+		emptyNode := testu.GetEmptyNode(t, dserv, testu.UseCidV1)
+
+		// Create DagModifier with RawLeaves enabled
+		dmod, err := NewDagModifier(ctx, emptyNode, dserv, testu.SizeSplitterGen(512))
+		if err != nil {
+			t.Fatal(err)
+		}
+		dmod.RawLeaves = true
+
+		// Write small data (fits in single block)
+		data := []byte("hello world")
+		_, err = dmod.Write(data)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		// Get the final node
+		resultNode, err := dmod.GetNode()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		// Should be a RawNode, not a ProtoNode
+		rawNode, ok := resultNode.(*dag.RawNode)
+		if !ok {
+			t.Fatalf("expected RawNode for single-block file with RawLeaves=true, got %T", resultNode)
+		}
+
+		// Verify the data
+		if !bytes.Equal(rawNode.RawData(), data) {
+			t.Errorf("data mismatch: expected %q, got %q", data, rawNode.RawData())
+		}
+
+		// Verify CID uses raw codec
+		if rawNode.Cid().Prefix().Codec != cid.Raw {
+			t.Errorf("expected raw codec, got %d", rawNode.Cid().Prefix().Codec)
+		}
+	})
+
+	t.Run("multi-block file stays as ProtoNode", func(t *testing.T) {
+		ctx, cancel := context.WithCancel(context.Background())
+		defer cancel()
+
+		dserv := testu.GetDAGServ()
+
+		// Create an empty file node with CIDv1
+		emptyNode := testu.GetEmptyNode(t, dserv, testu.UseCidV1)
+
+		// Create DagModifier with RawLeaves enabled and small chunk size
+		dmod, err := NewDagModifier(ctx, emptyNode, dserv, testu.SizeSplitterGen(32))
+		if err != nil {
+			t.Fatal(err)
+		}
+		dmod.RawLeaves = true
+
+		// Write enough data to require multiple blocks
+		data := bytes.Repeat([]byte("x"), 100)
+		_, err = dmod.Write(data)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		// Get the final node
+		resultNode, err := dmod.GetNode()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		// Should be a ProtoNode (not collapsed)
+		protoNode, ok := resultNode.(*dag.ProtoNode)
+		if !ok {
+			t.Fatalf("expected ProtoNode for multi-block file, got %T", resultNode)
+		}
+
+		// Should have multiple children
+		if len(protoNode.Links()) < 2 {
+			t.Errorf("expected multiple blocks, got %d links", len(protoNode.Links()))
+		}
+	})
+
+	// Table-driven test for metadata preservation.
+	// Files with Mode or ModTime metadata should NOT collapse to RawNode
+	// even when RawLeaves=true and the file fits in a single block.
+	// We use explicit non-zero values and verify they're preserved exactly.
+	testMtime := time.Date(2025, 6, 15, 12, 30, 0, 0, time.UTC)
+	metadataTests := []struct {
+		name  string
+		mode  os.FileMode
+		mtime time.Time
+	}{
+		{"file with ModTime stays as ProtoNode", 0, testMtime},
+		{"file with Mode stays as ProtoNode", 0o755, time.Time{}},
+		{"file with both Mode and ModTime stays as ProtoNode", 0o644, testMtime},
+	}
+
+	for _, tc := range metadataTests {
+		t.Run(tc.name, func(t *testing.T) {
+			ctx, cancel := context.WithCancel(context.Background())
+			defer cancel()
+
+			dserv := testu.GetDAGServ()
+
+			// Create a file node with specified metadata
+			fsNode := unixfs.NewFSNode(unixfs.TFile)
+			if tc.mode != 0 {
+				fsNode.SetMode(tc.mode)
+			}
+			if !tc.mtime.IsZero() {
+				fsNode.SetModTime(tc.mtime)
+			}
+			fsNodeData, err := fsNode.GetBytes()
+			if err != nil {
+				t.Fatal(err)
+			}
+			emptyNode := dag.NodeWithData(fsNodeData)
+			emptyNode.SetCidBuilder(cid.Prefix{
+				Version:  1,
+				Codec:    cid.DagProtobuf,
+				MhType:   mh.SHA2_256,
+				MhLength: -1,
+			})
+			err = dserv.Add(ctx, emptyNode)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			// Create DagModifier with RawLeaves enabled
+			dmod, err := NewDagModifier(ctx, emptyNode, dserv, testu.SizeSplitterGen(512))
+			if err != nil {
+				t.Fatal(err)
+			}
+			dmod.RawLeaves = true
+
+			// Write small data (would fit in single block)
+			data := []byte("hello world")
+			_, err = dmod.Write(data)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			// Get the final node
+			resultNode, err := dmod.GetNode()
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			// Should stay as ProtoNode because of metadata
+			protoNode, ok := resultNode.(*dag.ProtoNode)
+			if !ok {
+				t.Fatalf("expected ProtoNode, got %T", resultNode)
+			}
+
+			// Verify metadata is preserved
+			fsn, err := unixfs.FSNodeFromBytes(protoNode.Data())
+			if err != nil {
+				t.Fatal(err)
+			}
+			// Mode should be preserved exactly
+			if tc.mode != 0 && fsn.Mode() != tc.mode {
+				t.Errorf("Mode: expected %o, got %o", tc.mode, fsn.Mode())
+			}
+			// ModTime gets updated to time.Now() on write, so just verify it's present
+			if !tc.mtime.IsZero() && fsn.ModTime().IsZero() {
+				t.Error("ModTime was lost (expected non-zero)")
+			}
+		})
+	}
+
+	t.Run("RawLeaves=false keeps ProtoNode", func(t *testing.T) {
+		ctx, cancel := context.WithCancel(context.Background())
+		defer cancel()
+
+		dserv := testu.GetDAGServ()
+
+		// Create an empty file node with CIDv1
+		emptyNode := testu.GetEmptyNode(t, dserv, testu.UseCidV1)
+
+		// Create DagModifier with RawLeaves explicitly disabled
+		dmod, err := NewDagModifier(ctx, emptyNode, dserv, testu.SizeSplitterGen(512))
+		if err != nil {
+			t.Fatal(err)
+		}
+		dmod.RawLeaves = false
+
+		// Write small data
+		data := []byte("hello world")
+		_, err = dmod.Write(data)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		// Get the final node
+		resultNode, err := dmod.GetNode()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		// Should stay as ProtoNode when RawLeaves=false
+		_, ok := resultNode.(*dag.ProtoNode)
+		if !ok {
+			t.Fatalf("expected ProtoNode when RawLeaves=false, got %T", resultNode)
+		}
+	})
+}
diff --git a/ipld/unixfs/mod/doc.go b/ipld/unixfs/mod/doc.go
new file mode 100644
index 000000000..40bcc80ac
--- /dev/null
+++ b/ipld/unixfs/mod/doc.go
@@ -0,0 +1,46 @@
+// Package mod provides DAG modification utilities for UnixFS files.
+//
+// This package is designed for MFS (Mutable File System) operations where
+// files need to be modified in place: writing at offsets, appending data,
+// truncating, and seeking within files.
+//
+// # Metadata Handling (Mode and ModTime)
+//
+// UnixFS supports optional Mode (unix permissions) and ModTime (modification
+// time) metadata. Most use cases do not set these fields, and files without
+// metadata work normally.
+//
+// When metadata IS present, DagModifier follows Unix filesystem semantics:
+//
+//   - ModTime is automatically updated to time.Now() when content is modified
+//     or truncated, matching how Unix filesystems behave.
+//   - Mode is preserved during modifications.
+//
+// If you need a specific mtime after modification (e.g., for archival or
+// reproducible builds), explicitly set it on the FSNode after the operation.
+//
+// # Identity CID Handling
+//
+// This package automatically handles identity CIDs (multihash code 0x00) which
+// inline data directly in the CID. When modifying nodes with identity CIDs,
+// the package ensures the [verifcid.DefaultMaxIdentityDigestSize] limit is
+// respected by automatically switching to a cryptographic hash function when
+// the encoded data would exceed this limit. The replacement hash function is
+// chosen from (in order): the configured Prefix if non-identity, or
+// [util.DefaultIpfsHash] as a fallback.
+//
+// # RawNode Growth
+//
+// When appending data to a RawNode that would require multiple blocks, the
+// node is automatically converted to a UnixFS file structure. This is necessary
+// because RawNodes cannot have child nodes. The original raw data remains
+// accessible via its original CID, while the new structure provides full
+// UnixFS capabilities.
+//
+// # Raw Leaf Collapsing
+//
+// When RawLeaves is enabled and a file fits in a single block with no metadata
+// (no Mode or ModTime), GetNode returns a RawNode directly rather than a
+// ProtoNode wrapper. This ensures CID compatibility with `ipfs add --raw-leaves`.
+// Files with metadata always return ProtoNode to preserve the metadata.
+package mod
diff --git a/ipld/unixfs/unixfs.go b/ipld/unixfs/unixfs.go
index 3db4f540f..2a5fa14de 100644
--- a/ipld/unixfs/unixfs.go
+++ b/ipld/unixfs/unixfs.go
@@ -166,6 +166,12 @@ func SymlinkData(path string) ([]byte, error) {
 
 // HAMTShardData return a `Data_HAMTShard` protobuf message
 func HAMTShardData(data []byte, fanout uint64, hashType uint64) ([]byte, error) {
+	return HAMTShardDataWithStat(data, fanout, hashType, 0, time.Time{})
+}
+
+// HAMTShardDataWithStat return a `Data_HAMTShard` protobuf message with optional mode/mtime.
+// Pass mode=0 and zero mtime to omit those fields.
+func HAMTShardDataWithStat(data []byte, fanout uint64, hashType uint64, mode os.FileMode, mtime time.Time) ([]byte, error) {
 	pbdata := new(pb.Data)
 	typ := pb.Data_HAMTShard
 	pbdata.Type = &typ
@@ -173,6 +179,18 @@ func HAMTShardData(data []byte, fanout uint64, hashType uint64) ([]byte, error)
 	pbdata.Data = data
 	pbdata.Fanout = proto.Uint64(fanout)
 
+	if mode != 0 {
+		pbdata.Mode = proto.Uint32(files.ModePermsToUnixPerms(mode))
+	}
+	if !mtime.IsZero() {
+		pbdata.Mtime = &pb.IPFSTimestamp{
+			Seconds: proto.Int64(mtime.Unix()),
+		}
+		if nanos := uint32(mtime.Nanosecond()); nanos > 0 {
+			pbdata.Mtime.Nanos = &nanos
+		}
+	}
+
 	out, err := proto.Marshal(pbdata)
 	if err != nil {
 		return nil, err
diff --git a/mfs/dir.go b/mfs/dir.go
index fcca94208..1d9bf4379 100644
--- a/mfs/dir.go
+++ b/mfs/dir.go
@@ -9,6 +9,7 @@ import (
 	"sync"
 	"time"
 
+	chunker "github.com/ipfs/boxo/chunker"
 	dag "github.com/ipfs/boxo/ipld/merkledag"
 	ft "github.com/ipfs/boxo/ipld/unixfs"
 	uio "github.com/ipfs/boxo/ipld/unixfs/io"
@@ -42,7 +43,8 @@ type Directory struct {
 	// reading and editing directories.
 	unixfsDir uio.Directory
 
-	prov provider.MultihashProvider
+	prov    provider.MultihashProvider
+	chunker chunker.SplitterGen // inherited from parent, nil means default
 }
 
 // NewDirectory constructs a new MFS directory.
@@ -82,6 +84,7 @@ func NewDirectory(ctx context.Context, name string, node ipld.Node, parent paren
 		unixfsDir:    db,
 		prov:         prov,
 		entriesCache: make(map[string]FSNode),
+		chunker:      parent.getChunker(), // inherit from parent
 	}, nil
 }
 
@@ -89,15 +92,23 @@ func NewDirectory(ctx context.Context, name string, node ipld.Node, parent paren
 // The directory is added to the DAGService. To create a new MFS
 // root use NewEmptyRootFolder instead.
 func NewEmptyDirectory(ctx context.Context, name string, parent parent, dserv ipld.DAGService, prov provider.MultihashProvider, opts MkdirOpts) (*Directory, error) {
-	db, err := uio.NewDirectory(dserv,
+	dirOpts := []uio.DirectoryOption{
 		uio.WithMaxLinks(opts.MaxLinks),
 		uio.WithMaxHAMTFanout(opts.MaxHAMTFanout),
 		uio.WithStat(opts.Mode, opts.ModTime),
 		uio.WithCidBuilder(opts.CidBuilder),
-	)
+	}
+	if opts.SizeEstimationMode != nil {
+		dirOpts = append(dirOpts, uio.WithSizeEstimationMode(*opts.SizeEstimationMode))
+	}
+	db, err := uio.NewDirectory(dserv, dirOpts...)
 	if err != nil {
 		return nil, err
 	}
+	// Set HAMTShardingSize after creation (not a DirectoryOption)
+	if opts.HAMTShardingSize > 0 {
+		db.SetHAMTShardingSize(opts.HAMTShardingSize)
+	}
 
 	nd, err := db.GetNode()
 	if err != nil {
@@ -111,6 +122,12 @@ func NewEmptyDirectory(ctx context.Context, name string, parent parent, dserv ip
 
 	// note: we don't provide the empty unixfs dir as it is always local.
 
+	// Use chunker from opts if set, otherwise inherit from parent
+	c := opts.Chunker
+	if c == nil {
+		c = parent.getChunker()
+	}
+
 	return &Directory{
 		inode: inode{
 			name:       name,
@@ -121,6 +138,7 @@ func NewEmptyDirectory(ctx context.Context, name string, parent parent, dserv ip
 		unixfsDir:    db,
 		prov:         prov,
 		entriesCache: make(map[string]FSNode),
+		chunker:      c,
 	}, nil
 }
 
@@ -129,6 +147,11 @@ func (d *Directory) GetCidBuilder() cid.Builder {
 	return d.unixfsDir.GetCidBuilder()
 }
 
+// getChunker implements the parent interface.
+func (d *Directory) getChunker() chunker.SplitterGen {
+	return d.chunker
+}
+
 // SetCidBuilder sets the CID builder
 func (d *Directory) SetCidBuilder(b cid.Builder) {
 	d.unixfsDir.SetCidBuilder(b)
@@ -214,8 +237,12 @@ func (d *Directory) cacheNode(name string, nd ipld.Node) (FSNode, error) {
 
 			// these options are not persisted so they need to be
 			// inherited from the parent.
-			ndir.unixfsDir.SetMaxLinks(d.unixfsDir.GetMaxLinks())
+			parentMaxLinks := d.unixfsDir.GetMaxLinks()
+			parentMode := d.unixfsDir.GetSizeEstimationMode()
+			ndir.unixfsDir.SetMaxLinks(parentMaxLinks)
 			ndir.unixfsDir.SetMaxHAMTFanout(d.unixfsDir.GetMaxHAMTFanout())
+			ndir.unixfsDir.SetHAMTShardingSize(d.unixfsDir.GetHAMTShardingSize())
+			ndir.unixfsDir.SetSizeEstimationMode(parentMode)
 			d.entriesCache[name] = ndir
 			return ndir, nil
 		case ft.TFile, ft.TRaw, ft.TSymlink:
@@ -343,8 +370,9 @@ func (d *Directory) ForEachEntry(ctx context.Context, f func(NodeListing) error)
 
 func (d *Directory) Mkdir(name string) (*Directory, error) {
 	return d.MkdirWithOpts(name, MkdirOpts{
-		MaxLinks:      d.unixfsDir.GetMaxLinks(),
-		MaxHAMTFanout: d.unixfsDir.GetMaxHAMTFanout(),
+		MaxLinks:         d.unixfsDir.GetMaxLinks(),
+		MaxHAMTFanout:    d.unixfsDir.GetMaxHAMTFanout(),
+		HAMTShardingSize: d.unixfsDir.GetHAMTShardingSize(),
 	})
 }
 
@@ -578,8 +606,10 @@ func (d *Directory) setNodeData(data []byte, links []*ipld.Link) error {
 	}
 
 	// We need to carry our desired settings.
+	// Note: SizeEstimationMode is set at creation time and cannot be changed.
 	db.SetMaxLinks(d.unixfsDir.GetMaxLinks())
 	db.SetMaxHAMTFanout(d.unixfsDir.GetMaxHAMTFanout())
+	db.SetHAMTShardingSize(d.unixfsDir.GetHAMTShardingSize())
 	d.unixfsDir = db
 
 	return nil
diff --git a/mfs/file.go b/mfs/file.go
index 8f9e9c059..547d196b2 100644
--- a/mfs/file.go
+++ b/mfs/file.go
@@ -12,9 +12,7 @@ import (
 	ft "github.com/ipfs/boxo/ipld/unixfs"
 	mod "github.com/ipfs/boxo/ipld/unixfs/mod"
 	"github.com/ipfs/boxo/provider"
-	"github.com/ipfs/boxo/util"
 	ipld "github.com/ipfs/go-ipld-format"
-	mh "github.com/multiformats/go-multihash"
 )
 
 // File represents a file in the MFS, its logic its mainly targeted
@@ -104,37 +102,18 @@ func (fi *File) Open(flags Flags) (_ FileDescriptor, _retErr error) {
 		// Ok as well.
 	}
 
-	dmod, err := mod.NewDagModifier(context.TODO(), node, fi.dagService, chunker.DefaultSplitter)
-	// TODO: Remove the use of the `chunker` package here, add a new `NewDagModifier` in
-	// `go-unixfs` with the `DefaultSplitter` already included.
+	// Use configured chunker from parent, fall back to default
+	chunkerGen := fi.parent.getChunker()
+	if chunkerGen == nil {
+		chunkerGen = chunker.DefaultSplitter
+	}
+
+	dmod, err := mod.NewDagModifier(context.TODO(), node, fi.dagService, chunkerGen)
 	if err != nil {
 		return nil, err
 	}
 	dmod.RawLeaves = fi.RawLeaves
 
-	// If the node uses identity hash, configure DagModifier with a fallback
-	// to avoid issues when the data grows beyond the identity hash size limit
-	if dmod.Prefix.MhType == mh.IDENTITY {
-		// Try to inherit full prefix from parent directory
-		if fi.parent != nil {
-			if dir, ok := fi.parent.(*Directory); ok {
-				if parentNode, err := dir.GetNode(); err == nil {
-					parentPrefix := parentNode.Cid().Prefix()
-					if parentPrefix.MhType != mh.IDENTITY {
-						// Use parent's full prefix (all fields)
-						dmod.Prefix = parentPrefix
-					}
-				}
-			}
-		}
-
-		// If still identity (no suitable parent), set up fallback
-		if dmod.Prefix.MhType == mh.IDENTITY {
-			dmod.Prefix.MhType = util.DefaultIpfsHash
-			dmod.Prefix.MhLength = -1 // Use default length for the hash function
-		}
-	}
-
 	return &fileDescriptor{
 		inode: fi,
 		flags: flags,
diff --git a/mfs/mfs_test.go b/mfs/mfs_test.go
index 376d145c2..325895185 100644
--- a/mfs/mfs_test.go
+++ b/mfs/mfs_test.go
@@ -1826,3 +1826,713 @@ func FuzzMkdirAndWriteConcurrently(f *testing.F) {
 		}
 	})
 }
+
+// TestRootOptionChunker verifies that WithChunker sets the chunker factory
+// for files created in the MFS, producing the exact expected block count and sizes.
+func TestRootOptionChunker(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	ds := getDagserv(t)
+
+	// Use 512-byte chunks (non-default: default is 256KB = 262144 bytes).
+	// With default chunker, 2048 bytes would produce 1 chunk.
+	// With our custom 512-byte chunker, it must produce exactly 4 chunks.
+	const chunkSize = 512
+	const dataSize = 2048 // exactly 4 chunks with 512-byte chunker
+
+	// Verify our test value differs from default
+	if int64(chunkSize) == chunker.DefaultBlockSize {
+		t.Fatalf("test uses default chunk size %d; use a non-default value", chunkSize)
+	}
+
+	// Create root with custom chunker (512 bytes)
+	root, err := NewEmptyRoot(ctx, ds, nil, nil, MkdirOpts{}, WithChunker(chunker.SizeSplitterGen(chunkSize)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Verify chunker was set on root
+	if root.GetChunker() == nil {
+		t.Fatal("expected non-nil chunker on root")
+	}
+
+	// Create empty file node
+	nd := dag.NodeWithData(ft.FilePBData(nil, 0))
+	nd.SetCidBuilder(cid.V1Builder{Codec: cid.DagProtobuf, MhType: multihash.SHA2_256})
+	if err := ds.Add(ctx, nd); err != nil {
+		t.Fatal(err)
+	}
+	if err := PutNode(root, "/testfile", nd); err != nil {
+		t.Fatal(err)
+	}
+
+	// Write exactly 2048 bytes (should produce exactly 4 x 512-byte chunks)
+	data := make([]byte, dataSize)
+	for i := range data {
+		data[i] = byte(i % 256)
+	}
+
+	fi, err := Lookup(root, "/testfile")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	fd, err := fi.(*File).Open(Flags{Write: true, Sync: true})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, err := fd.Write(data); err != nil {
+		t.Fatal(err)
+	}
+	if err := fd.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Verify result: must have exactly 4 child links (proves custom chunker was used)
+	// With default 256KB chunker, this would be 1 link (or 0 links with inline data)
+	node, err := fi.GetNode()
+	if err != nil {
+		t.Fatal(err)
+	}
+	links := node.Links()
+	if len(links) != 4 {
+		t.Fatalf("expected exactly 4 links for %d bytes with %d-byte chunks, got %d (default chunker would produce 1)", dataSize, chunkSize, len(links))
+	}
+
+	// Verify each chunk is exactly chunkSize bytes
+	for i, link := range links {
+		if link.Size != chunkSize {
+			t.Fatalf("link %d: expected size %d, got %d", i, chunkSize, link.Size)
+		}
+	}
+}
+
+// TestRootOptionMaxLinks verifies that WithMaxLinks triggers HAMT sharding
+// when directory entries exceed the configured maximum.
+func TestRootOptionMaxLinks(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	ds := getDagserv(t)
+
+	// Use MaxLinks=3 (non-default: default is ~174 from helpers.DefaultLinksPerBlock).
+	// With default MaxLinks, 4 files would NOT trigger HAMT sharding.
+	// With our custom MaxLinks=3, adding 4 files MUST trigger HAMT.
+	const maxLinks = 3
+	const fileCount = 4 // exceeds maxLinks=3, but not default ~174
+
+	// Use SizeEstimationDisabled (non-default: default is SizeEstimationLinks=0).
+	// This mode ignores size-based thresholds and only considers link count,
+	// ensuring we're testing MaxLinks propagation, not size thresholds.
+	sizeEstimationDisabled := uio.SizeEstimationDisabled
+	if sizeEstimationDisabled == uio.SizeEstimationLinks {
+		t.Fatal("test uses default SizeEstimationMode; use a non-default value")
+	}
+
+	root, err := NewEmptyRoot(ctx, ds, nil, nil, MkdirOpts{SizeEstimationMode: &sizeEstimationDisabled},
+		WithMaxLinks(maxLinks),
+		WithSizeEstimationMode(sizeEstimationDisabled))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create subdirectory with explicit opts to ensure MaxLinks and SizeEstimationMode propagate
+	if err := Mkdir(root, "/subdir", MkdirOpts{
+		MaxLinks:           maxLinks,
+		SizeEstimationMode: &sizeEstimationDisabled,
+	}); err != nil {
+		t.Fatal(err)
+	}
+	subdir, err := Lookup(root, "/subdir")
+	if err != nil {
+		t.Fatal(err)
+	}
+	dir := subdir.(*Directory)
+
+	// Verify MaxLinks was propagated to the subdirectory
+	actualMaxLinks := dir.unixfsDir.GetMaxLinks()
+	if actualMaxLinks != maxLinks {
+		t.Fatalf("MaxLinks not propagated: expected %d, got %d", maxLinks, actualMaxLinks)
+	}
+
+	// Verify SizeEstimationMode was propagated
+	actualMode := dir.unixfsDir.GetSizeEstimationMode()
+	if actualMode != sizeEstimationDisabled {
+		t.Fatalf("SizeEstimationMode not propagated: expected %d, got %d", sizeEstimationDisabled, actualMode)
+	}
+
+	// Verify starts as BasicDirectory
+	node, err := dir.GetNode()
+	if err != nil {
+		t.Fatal(err)
+	}
+	fsn, err := ft.FSNodeFromBytes(node.(*dag.ProtoNode).Data())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if fsn.Type() != ft.TDirectory {
+		t.Fatalf("expected TDirectory initially, got %s", fsn.Type())
+	}
+
+	// Add exactly 4 files (exceeds MaxLinks=3, but would not exceed default ~174)
+	for i := range fileCount {
+		child := dag.NodeWithData(ft.FilePBData([]byte("test"), 4))
+		if err := ds.Add(ctx, child); err != nil {
+			t.Fatal(err)
+		}
+		if err := dir.AddChild(fmt.Sprintf("file%d", i), child); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// Flush and verify it became HAMT (proves custom MaxLinks was used)
+	// With default MaxLinks (~174), 4 files would NOT trigger HAMT
+	if err := dir.Flush(); err != nil {
+		t.Fatal(err)
+	}
+	node, err = dir.GetNode()
+	if err != nil {
+		t.Fatal(err)
+	}
+	fsn, err = ft.FSNodeFromBytes(node.(*dag.ProtoNode).Data())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if fsn.Type() != ft.THAMTShard {
+		t.Fatalf("expected THAMTShard after %d entries with MaxLinks=%d, got %s (default MaxLinks would stay BasicDirectory)", fileCount, maxLinks, fsn.Type())
+	}
+}
+
+// TestRootOptionSizeEstimationMode verifies that WithSizeEstimationMode
+// propagates correctly to child directories, including after reload from DAG.
+func TestRootOptionSizeEstimationMode(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	ds := getDagserv(t)
+
+	// Use SizeEstimationDisabled (non-default: default is SizeEstimationLinks=0).
+	// This mode ignores size-based thresholds and only considers link count.
+	// With default SizeEstimationLinks, the size threshold might prevent HAMT
+	// conversion even when MaxLinks is exceeded (if size is below threshold).
+	mode := uio.SizeEstimationDisabled
+	if mode == uio.SizeEstimationLinks {
+		t.Fatal("test uses default SizeEstimationMode; use a non-default value")
+	}
+
+	// Use MaxLinks=3 (non-default: default is ~174).
+	const maxLinks = 3
+
+	// Create root with non-default settings
+	root, err := NewEmptyRoot(ctx, ds, nil, nil, MkdirOpts{SizeEstimationMode: &mode},
+		WithSizeEstimationMode(mode),
+		WithMaxLinks(maxLinks))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create subdirectory (will inherit settings via cacheNode when loaded)
+	if err := Mkdir(root, "/subdir", MkdirOpts{}); err != nil {
+		t.Fatal(err)
+	}
+
+	// Add 2 files (below MaxLinks threshold)
+	for i := range 2 {
+		child := dag.NodeWithData(ft.FilePBData([]byte("test"), 4))
+		if err := ds.Add(ctx, child); err != nil {
+			t.Fatal(err)
+		}
+		if err := PutNode(root, fmt.Sprintf("/subdir/file%d", i), child); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// Flush to persist to DAG
+	if err := root.GetDirectory().Flush(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Get root node and reload (simulates daemon restart)
+	rootNode, err := root.GetDirectory().GetNode()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create new root from persisted node with same settings.
+	// This tests that settings propagate to directories loaded from DAG via cacheNode().
+	root2, err := NewRoot(ctx, ds, rootNode.(*dag.ProtoNode), nil, nil,
+		WithSizeEstimationMode(mode),
+		WithMaxLinks(maxLinks))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Access reloaded subdirectory (triggers cacheNode which should propagate settings)
+	subdir2, err := Lookup(root2, "/subdir")
+	if err != nil {
+		t.Fatal(err)
+	}
+	dir2 := subdir2.(*Directory)
+
+	// Verify settings were propagated to reloaded directory
+	actualMode := dir2.unixfsDir.GetSizeEstimationMode()
+	if actualMode != mode {
+		t.Fatalf("SizeEstimationMode not propagated after reload: expected %d, got %d", mode, actualMode)
+	}
+	actualMaxLinks := dir2.unixfsDir.GetMaxLinks()
+	if actualMaxLinks != maxLinks {
+		t.Fatalf("MaxLinks not propagated after reload: expected %d, got %d", maxLinks, actualMaxLinks)
+	}
+
+	// Verify still BasicDirectory with 2 entries
+	node, err := dir2.GetNode()
+	if err != nil {
+		t.Fatal(err)
+	}
+	fsn, err := ft.FSNodeFromBytes(node.(*dag.ProtoNode).Data())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if fsn.Type() != ft.TDirectory {
+		t.Fatalf("expected TDirectory with 2 entries, got %s", fsn.Type())
+	}
+
+	// Add 2 more files (now 4 total, exceeds MaxLinks=3)
+	for i := 2; i < 4; i++ {
+		child := dag.NodeWithData(ft.FilePBData([]byte("test"), 4))
+		if err := ds.Add(ctx, child); err != nil {
+			t.Fatal(err)
+		}
+		if err := dir2.AddChild(fmt.Sprintf("file%d", i), child); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// Flush and verify it became HAMT (proves settings propagated after reload)
+	// With default settings, 4 tiny files would NOT trigger HAMT
+	if err := dir2.Flush(); err != nil {
+		t.Fatal(err)
+	}
+	node, err = dir2.GetNode()
+	if err != nil {
+		t.Fatal(err)
+	}
+	fsn, err = ft.FSNodeFromBytes(node.(*dag.ProtoNode).Data())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if fsn.Type() != ft.THAMTShard {
+		t.Fatalf("expected THAMTShard after reload with custom settings, got %s", fsn.Type())
+	}
+}
+
+// TestChunkerInheritance verifies that the chunker setting propagates
+// through nested subdirectories to files created deep in the hierarchy.
+func TestChunkerInheritance(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	ds := getDagserv(t)
+
+	// Use 256-byte chunks (non-default: default is 256KB = 262144 bytes).
+	// With default chunker, 1024 bytes would produce 1 chunk.
+	// With our custom 256-byte chunker, it must produce exactly 4 chunks.
+	// This tests that the chunker propagates through /a -> /b -> /c to the file.
+	const chunkSize = 256
+	const dataSize = 1024 // exactly 4 chunks with 256-byte chunker
+
+	// Verify our test value differs from default
+	if int64(chunkSize) == chunker.DefaultBlockSize {
+		t.Fatalf("test uses default chunk size %d; use a non-default value", chunkSize)
+	}
+
+	// Create root with custom chunker
+	root, err := NewEmptyRoot(ctx, ds, nil, nil, MkdirOpts{}, WithChunker(chunker.SizeSplitterGen(chunkSize)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Verify chunker was set on root
+	if root.GetChunker() == nil {
+		t.Fatal("expected non-nil chunker on root")
+	}
+
+	// Create nested subdirectory (3 levels deep: /a/b/c)
+	// Each directory should inherit the chunker from its parent
+	if err := Mkdir(root, "/a/b/c", MkdirOpts{Mkparents: true}); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create file in nested directory
+	nd := dag.NodeWithData(ft.FilePBData(nil, 0))
+	nd.SetCidBuilder(cid.V1Builder{Codec: cid.DagProtobuf, MhType: multihash.SHA2_256})
+	if err := ds.Add(ctx, nd); err != nil {
+		t.Fatal(err)
+	}
+	if err := PutNode(root, "/a/b/c/file", nd); err != nil {
+		t.Fatal(err)
+	}
+
+	// Write data
+	data := make([]byte, dataSize)
+	for i := range data {
+		data[i] = byte(i)
+	}
+
+	fi, err := Lookup(root, "/a/b/c/file")
+	if err != nil {
+		t.Fatal(err)
+	}
+	fd, err := fi.(*File).Open(Flags{Write: true, Sync: true})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, err := fd.Write(data); err != nil {
+		t.Fatal(err)
+	}
+	if err := fd.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Verify: must have exactly 4 links (proves chunker inherited through /a/b/c)
+	// With default 256KB chunker, 1024 bytes would produce 1 chunk (or inline data)
+	node, err := fi.GetNode()
+	if err != nil {
+		t.Fatal(err)
+	}
+	links := node.Links()
+	if len(links) != 4 {
+		t.Fatalf("expected exactly 4 links (chunker inherited through 3 levels), got %d (default chunker would produce 1)", len(links))
+	}
+
+	// Verify each chunk is exactly chunkSize bytes
+	for i, link := range links {
+		if link.Size != chunkSize {
+			t.Fatalf("link %d: expected size %d, got %d", i, chunkSize, link.Size)
+		}
+	}
+}
+
+// TestRootOptionMaxHAMTFanout verifies that WithMaxHAMTFanout propagates
+// to directories and affects HAMT shard width.
+func TestRootOptionMaxHAMTFanout(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	ds := getDagserv(t)
+
+	// Use custom fanout of 64 (non-default: default is 256).
+	const customFanout = 64
+
+	// Use SizeEstimationDisabled so we rely only on MaxLinks for HAMT conversion.
+	sizeEstimationDisabled := uio.SizeEstimationDisabled
+	const maxLinks = 3
+
+	root, err := NewEmptyRoot(ctx, ds, nil, nil,
+		MkdirOpts{SizeEstimationMode: &sizeEstimationDisabled},
+		WithMaxLinks(maxLinks),
+		WithMaxHAMTFanout(customFanout),
+		WithSizeEstimationMode(sizeEstimationDisabled))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create subdirectory
+	if err := Mkdir(root, "/subdir", MkdirOpts{
+		MaxLinks:           maxLinks,
+		MaxHAMTFanout:      customFanout,
+		SizeEstimationMode: &sizeEstimationDisabled,
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	subdir, err := Lookup(root, "/subdir")
+	if err != nil {
+		t.Fatal(err)
+	}
+	dir := subdir.(*Directory)
+
+	// Verify MaxHAMTFanout was propagated
+	actualFanout := dir.unixfsDir.GetMaxHAMTFanout()
+	if actualFanout != customFanout {
+		t.Fatalf("MaxHAMTFanout not propagated: expected %d, got %d", customFanout, actualFanout)
+	}
+
+	// Add files to trigger HAMT conversion (exceeds MaxLinks=3)
+	for i := range 4 {
+		child := dag.NodeWithData(ft.FilePBData([]byte("test"), 4))
+		if err := ds.Add(ctx, child); err != nil {
+			t.Fatal(err)
+		}
+		if err := dir.AddChild(fmt.Sprintf("file%d", i), child); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// Flush and verify it became HAMT
+	if err := dir.Flush(); err != nil {
+		t.Fatal(err)
+	}
+	node, err := dir.GetNode()
+	if err != nil {
+		t.Fatal(err)
+	}
+	fsn, err := ft.FSNodeFromBytes(node.(*dag.ProtoNode).Data())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if fsn.Type() != ft.THAMTShard {
+		t.Fatalf("expected THAMTShard, got %s", fsn.Type())
+	}
+
+	// Verify the HAMT uses the custom fanout by checking the shard's fanout field.
+	// The fanout is stored in the UnixFS Data field.
+	if fsn.Fanout() != uint64(customFanout) {
+		t.Fatalf("expected HAMT fanout %d, got %d", customFanout, fsn.Fanout())
+	}
+}
+
+// TestRootOptionHAMTShardingSize verifies that WithHAMTShardingSize propagates
+// to directories and affects HAMT conversion threshold.
+func TestRootOptionHAMTShardingSize(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	ds := getDagserv(t)
+
+	// Use very small threshold of 100 bytes (non-default: default is 256KiB).
+	// This should trigger HAMT conversion with just a few small files.
+	const customShardingSize = 100
+
+	// Use SizeEstimationBlock for accurate size tracking.
+	sizeEstimationBlock := uio.SizeEstimationBlock
+
+	root, err := NewEmptyRoot(ctx, ds, nil, nil,
+		MkdirOpts{SizeEstimationMode: &sizeEstimationBlock},
+		WithHAMTShardingSize(customShardingSize),
+		WithSizeEstimationMode(sizeEstimationBlock))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create subdirectory
+	if err := Mkdir(root, "/subdir", MkdirOpts{
+		SizeEstimationMode: &sizeEstimationBlock,
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	subdir, err := Lookup(root, "/subdir")
+	if err != nil {
+		t.Fatal(err)
+	}
+	dir := subdir.(*Directory)
+
+	// Verify HAMTShardingSize was propagated
+	actualShardingSize := dir.unixfsDir.GetHAMTShardingSize()
+	if actualShardingSize != customShardingSize {
+		t.Fatalf("HAMTShardingSize not propagated: expected %d, got %d", customShardingSize, actualShardingSize)
+	}
+
+	// Verify starts as BasicDirectory
+	node, err := dir.GetNode()
+	if err != nil {
+		t.Fatal(err)
+	}
+	fsn, err := ft.FSNodeFromBytes(node.(*dag.ProtoNode).Data())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if fsn.Type() != ft.TDirectory {
+		t.Fatalf("expected TDirectory initially, got %s", fsn.Type())
+	}
+
+	// Add files to exceed the small 100-byte threshold.
+	// Each link adds roughly 40-50 bytes, so 3 files should exceed 100 bytes.
+	for i := range 3 {
+		child := dag.NodeWithData(ft.FilePBData([]byte("test content"), 12))
+		if err := ds.Add(ctx, child); err != nil {
+			t.Fatal(err)
+		}
+		if err := dir.AddChild(fmt.Sprintf("file%d", i), child); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// Flush and verify it became HAMT
+	if err := dir.Flush(); err != nil {
+		t.Fatal(err)
+	}
+	node, err = dir.GetNode()
+	if err != nil {
+		t.Fatal(err)
+	}
+	fsn, err = ft.FSNodeFromBytes(node.(*dag.ProtoNode).Data())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if fsn.Type() != ft.THAMTShard {
+		t.Fatalf("expected THAMTShard after exceeding custom sharding threshold (%d bytes), got %s", customShardingSize, fsn.Type())
+	}
+}
+
+// TestHAMTShardingSizeInheritance verifies that HAMTShardingSize propagates
+// through nested subdirectories and after reload from DAG.
+func TestHAMTShardingSizeInheritance(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	ds := getDagserv(t)
+
+	// Use very small threshold
+	const customShardingSize = 150
+	sizeEstimationBlock := uio.SizeEstimationBlock
+
+	// Create root with custom settings
+	root, err := NewEmptyRoot(ctx, ds, nil, nil,
+		MkdirOpts{SizeEstimationMode: &sizeEstimationBlock},
+		WithHAMTShardingSize(customShardingSize),
+		WithSizeEstimationMode(sizeEstimationBlock))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create nested subdirectory
+	if err := Mkdir(root, "/a/b", MkdirOpts{Mkparents: true}); err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a file to /a/b
+	child := dag.NodeWithData(ft.FilePBData([]byte("test"), 4))
+	if err := ds.Add(ctx, child); err != nil {
+		t.Fatal(err)
+	}
+	if err := PutNode(root, "/a/b/file", child); err != nil {
+		t.Fatal(err)
+	}
+
+	// Flush to persist to DAG
+	if err := root.GetDirectory().Flush(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Get root node and reload (simulates daemon restart)
+	rootNode, err := root.GetDirectory().GetNode()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create new root from persisted node with same settings
+	root2, err := NewRoot(ctx, ds, rootNode.(*dag.ProtoNode), nil, nil,
+		WithHAMTShardingSize(customShardingSize),
+		WithSizeEstimationMode(sizeEstimationBlock))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Access reloaded nested directory (triggers cacheNode)
+	subdir2, err := Lookup(root2, "/a/b")
+	if err != nil {
+		t.Fatal(err)
+	}
+	dir2 := subdir2.(*Directory)
+
+	// Verify settings were propagated after reload
+	actualShardingSize := dir2.unixfsDir.GetHAMTShardingSize()
+	if actualShardingSize != customShardingSize {
+		t.Fatalf("HAMTShardingSize not propagated after reload: expected %d, got %d", customShardingSize, actualShardingSize)
+	}
+
+	// Add more files to exceed threshold
+	for i := 1; i < 5; i++ {
+		child := dag.NodeWithData(ft.FilePBData([]byte("test content"), 12))
+		if err := ds.Add(ctx, child); err != nil {
+			t.Fatal(err)
+		}
+		if err := dir2.AddChild(fmt.Sprintf("file%d", i), child); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// Verify it became HAMT (proves custom threshold was applied after reload)
+	if err := dir2.Flush(); err != nil {
+		t.Fatal(err)
+	}
+	node, err := dir2.GetNode()
+	if err != nil {
+		t.Fatal(err)
+	}
+	fsn, err := ft.FSNodeFromBytes(node.(*dag.ProtoNode).Data())
+	if err != nil {
+		t.Fatal(err)
+	}
+	if fsn.Type() != ft.THAMTShard {
+		t.Fatalf("expected THAMTShard after reload with custom threshold, got %s", fsn.Type())
+	}
+}
+
+// TestMkdirParentsChunker verifies that Mkdir with Mkparents propagates
+// the Chunker from opts to intermediate directories, not just the final one.
+func TestMkdirParentsChunker(t *testing.T) {
+	ctx := t.Context()
+	ds := getDagserv(t)
+
+	// Create root with 256-byte chunker (the "default" for this test).
+	// This is what intermediate directories would incorrectly inherit
+	// if the bug exists.
+	rootChunkSize := int64(256)
+	root, err := NewEmptyRoot(ctx, ds, nil, nil, MkdirOpts{},
+		WithChunker(chunker.SizeSplitterGen(rootChunkSize)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create /a/b/c with a DIFFERENT 128-byte chunker.
+	// The bug: intermediate directory /a inherits root's 256-byte chunker
+	// instead of the 128-byte chunker specified in opts.
+	optsChunkSize := int64(128)
+	err = Mkdir(root, "/a/b/c", MkdirOpts{
+		Mkparents: true,
+		Chunker:   chunker.SizeSplitterGen(optsChunkSize),
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create file in /a (the first intermediate directory)
+	nd := dag.NodeWithData(ft.FilePBData(nil, 0))
+	nd.SetCidBuilder(cid.V1Builder{Codec: cid.DagProtobuf, MhType: multihash.SHA2_256})
+	if err := ds.Add(ctx, nd); err != nil {
+		t.Fatal(err)
+	}
+	if err := PutNode(root, "/a/testfile", nd); err != nil {
+		t.Fatal(err)
+	}
+
+	// Write 512 bytes
+	// With root's 256-byte chunker (bug): 2 chunks
+	// With opts' 128-byte chunker (correct): 4 chunks
+	data := make([]byte, 512)
+	for i := range data {
+		data[i] = byte(i % 256)
+	}
+
+	fi, err := Lookup(root, "/a/testfile")
+	if err != nil {
+		t.Fatal(err)
+	}
+	fd, err := fi.(*File).Open(Flags{Write: true, Sync: true})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, err := fd.Write(data); err != nil {
+		t.Fatal(err)
+	}
+	if err := fd.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Verify: should have 4 chunks (opts' chunker), not 2 (root's chunker)
+	node, err := fi.GetNode()
+	if err != nil {
+		t.Fatal(err)
+	}
+	links := node.Links()
+	if len(links) != 4 {
+		t.Fatalf("intermediate dir should use opts.Chunker (128-byte, 4 chunks), not root's (256-byte, 2 chunks); got %d links", len(links))
+	}
+}
diff --git a/mfs/ops.go b/mfs/ops.go
index b6345fc3b..8a4401f8b 100644
--- a/mfs/ops.go
+++ b/mfs/ops.go
@@ -9,6 +9,8 @@ import (
 	"strings"
 	"time"
 
+	chunker "github.com/ipfs/boxo/chunker"
+	uio "github.com/ipfs/boxo/ipld/unixfs/io"
 	cid "github.com/ipfs/go-cid"
 	ipld "github.com/ipfs/go-ipld-format"
 )
@@ -120,13 +122,16 @@ func PutNode(r *Root, path string, nd ipld.Node) error {
 
 // MkdirOpts is used by Mkdir
 type MkdirOpts struct {
-	Mkparents     bool
-	Flush         bool
-	CidBuilder    cid.Builder
-	Mode          os.FileMode
-	ModTime       time.Time
-	MaxLinks      int
-	MaxHAMTFanout int
+	Mkparents          bool
+	Flush              bool
+	CidBuilder         cid.Builder
+	Mode               os.FileMode
+	ModTime            time.Time
+	MaxLinks           int
+	MaxHAMTFanout      int
+	HAMTShardingSize   int
+	SizeEstimationMode *uio.SizeEstimationMode
+	Chunker            chunker.SplitterGen // chunker factory for files created in this directory
 }
 
 // Mkdir creates a directory at 'path' under the directory 'd', creating
@@ -155,10 +160,26 @@ func Mkdir(r *Root, pth string, opts MkdirOpts) error {
 
 	cur := r.GetDirectory()
 
+	// Inherit unset values from the root directory's underlying unixfs settings.
+	// This ensures that root-level config (WithMaxLinks, WithHAMTShardingSize, etc.)
+	// propagates to subdirectories created via Mkdir.
+	if opts.MaxLinks == 0 {
+		opts.MaxLinks = cur.unixfsDir.GetMaxLinks()
+	}
+	if opts.MaxHAMTFanout == 0 {
+		opts.MaxHAMTFanout = cur.unixfsDir.GetMaxHAMTFanout()
+	}
+	if opts.HAMTShardingSize == 0 {
+		opts.HAMTShardingSize = cur.unixfsDir.GetHAMTShardingSize()
+	}
+
 	// opts to make the parents leave MkParents and Flush as false.
 	parentsOpts := MkdirOpts{
-		MaxLinks:      opts.MaxLinks,
-		MaxHAMTFanout: opts.MaxHAMTFanout,
+		MaxLinks:           opts.MaxLinks,
+		MaxHAMTFanout:      opts.MaxHAMTFanout,
+		HAMTShardingSize:   opts.HAMTShardingSize,
+		SizeEstimationMode: opts.SizeEstimationMode,
+		Chunker:            opts.Chunker,
 	}
 
 	for i, d := range parts[:len(parts)-1] {
diff --git a/mfs/root.go b/mfs/root.go
index 5f9e5f53b..039dba1c3 100644
--- a/mfs/root.go
+++ b/mfs/root.go
@@ -10,8 +10,10 @@ import (
 	"os"
 	"time"
 
+	chunker "github.com/ipfs/boxo/chunker"
 	dag "github.com/ipfs/boxo/ipld/merkledag"
 	ft "github.com/ipfs/boxo/ipld/unixfs"
+	uio "github.com/ipfs/boxo/ipld/unixfs/io"
 	"github.com/ipfs/boxo/provider"
 	ipld "github.com/ipfs/go-ipld-format"
 	logging "github.com/ipfs/go-log/v2"
@@ -55,6 +57,9 @@ type parent interface {
 	// `updateChildEntry` to update the entry pointing to the new directory,
 	// this mechanism is in turn repeated until reaching the `Root`.
 	updateChildEntry(c child) error
+
+	// getChunker returns the chunker factory for files, or nil for default.
+	getChunker() chunker.SplitterGen
 }
 
 type NodeType int
@@ -98,12 +103,64 @@ type Root struct {
 	// Root directory of the MFS layout.
 	dir *Directory
 
-	repub *Republisher
-	prov  provider.MultihashProvider
+	repub   *Republisher
+	prov    provider.MultihashProvider
+	chunker chunker.SplitterGen // chunker factory for files, nil means default
+
+	// Directory settings to propagate to child directories when loaded from disk.
+	maxLinks           int
+	maxHAMTFanout      int
+	hamtShardingSize   int
+	sizeEstimationMode *uio.SizeEstimationMode
+}
+
+// RootOption is a functional option for configuring a Root.
+type RootOption func(*Root)
+
+// WithChunker sets the chunker factory for files created in this MFS.
+// If not set (or nil), chunker.DefaultSplitter is used.
+func WithChunker(c chunker.SplitterGen) RootOption {
+	return func(r *Root) {
+		r.chunker = c
+	}
+}
+
+// WithMaxLinks sets the max links for directories created in this MFS.
+// This is used to determine when to switch between BasicDirectory and HAMTDirectory.
+func WithMaxLinks(n int) RootOption {
+	return func(r *Root) {
+		r.maxLinks = n
+	}
+}
+
+// WithSizeEstimationMode sets the size estimation mode for directories in this MFS.
+// This controls how directory size is estimated for HAMT sharding decisions.
+func WithSizeEstimationMode(mode uio.SizeEstimationMode) RootOption {
+	return func(r *Root) {
+		r.sizeEstimationMode = &mode
+	}
+}
+
+// WithMaxHAMTFanout sets the max fanout (width) for HAMT directories in this MFS.
+// This controls the maximum number of children per HAMT bucket.
+// Must be a power of 2 and multiple of 8.
+func WithMaxHAMTFanout(n int) RootOption {
+	return func(r *Root) {
+		r.maxHAMTFanout = n
+	}
+}
+
+// WithHAMTShardingSize sets the per-directory size threshold for switching to HAMT.
+// When a directory's estimated size exceeds this threshold, it converts to HAMT.
+// If not set (0), the global uio.HAMTShardingSize is used.
+func WithHAMTShardingSize(size int) RootOption {
+	return func(r *Root) {
+		r.hamtShardingSize = size
+	}
 }
 
 // NewRoot creates a new Root and starts up a republisher routine for it.
-func NewRoot(ctx context.Context, ds ipld.DAGService, node *dag.ProtoNode, pf PubFunc, prov provider.MultihashProvider) (*Root, error) {
+func NewRoot(ctx context.Context, ds ipld.DAGService, node *dag.ProtoNode, pf PubFunc, prov provider.MultihashProvider, opts ...RootOption) (*Root, error) {
 	var repub *Republisher
 	if pf != nil {
 		repub = NewRepublisher(pf, repubQuick, repubLong, node.Cid())
@@ -113,6 +170,10 @@ func NewRoot(ctx context.Context, ds ipld.DAGService, node *dag.ProtoNode, pf Pu
 		repub: repub,
 	}
 
+	for _, opt := range opts {
+		opt(root)
+	}
+
 	fsn, err := ft.FSNodeFromBytes(node.Data())
 	if err != nil {
 		log.Error("IPNS pointer was not unixfs node")
@@ -127,6 +188,20 @@ func NewRoot(ctx context.Context, ds ipld.DAGService, node *dag.ProtoNode, pf Pu
 			return nil, err
 		}
 
+		// Apply root-level directory settings
+		if root.maxLinks > 0 {
+			newDir.unixfsDir.SetMaxLinks(root.maxLinks)
+		}
+		if root.maxHAMTFanout > 0 {
+			newDir.unixfsDir.SetMaxHAMTFanout(root.maxHAMTFanout)
+		}
+		if root.hamtShardingSize > 0 {
+			newDir.unixfsDir.SetHAMTShardingSize(root.hamtShardingSize)
+		}
+		if root.sizeEstimationMode != nil {
+			newDir.unixfsDir.SetSizeEstimationMode(*root.sizeEstimationMode)
+		}
+
 		root.dir = newDir
 	case ft.TFile, ft.TMetadata, ft.TRaw:
 		return nil, fmt.Errorf("root can't be a file (unixfs type: %s)", fsn.Type())
@@ -140,10 +215,25 @@ func NewRoot(ctx context.Context, ds ipld.DAGService, node *dag.ProtoNode, pf Pu
 
 // NewEmptyRoot creates an empty Root directory with the given directory
 // options. A republisher is created if PubFunc is not nil.
-func NewEmptyRoot(ctx context.Context, ds ipld.DAGService, pf PubFunc, prov provider.MultihashProvider, opts MkdirOpts) (*Root, error) {
+func NewEmptyRoot(ctx context.Context, ds ipld.DAGService, pf PubFunc, prov provider.MultihashProvider, mkdirOpts MkdirOpts, rootOpts ...RootOption) (*Root, error) {
 	root := new(Root)
 
-	dir, err := NewEmptyDirectory(ctx, "", root, ds, prov, opts)
+	for _, opt := range rootOpts {
+		opt(root)
+	}
+
+	// Pass settings from root opts to mkdir opts if not already set
+	if mkdirOpts.Chunker == nil {
+		mkdirOpts.Chunker = root.chunker
+	}
+	if mkdirOpts.MaxHAMTFanout == 0 && root.maxHAMTFanout > 0 {
+		mkdirOpts.MaxHAMTFanout = root.maxHAMTFanout
+	}
+	if mkdirOpts.HAMTShardingSize == 0 && root.hamtShardingSize > 0 {
+		mkdirOpts.HAMTShardingSize = root.hamtShardingSize
+	}
+
+	dir, err := NewEmptyDirectory(ctx, "", root, ds, prov, mkdirOpts)
 	if err != nil {
 		return nil, err
 	}
@@ -170,6 +260,16 @@ func (kr *Root) GetDirectory() *Directory {
 	return kr.dir
 }
 
+// GetChunker returns the chunker factory, or nil if using default.
+func (kr *Root) GetChunker() chunker.SplitterGen {
+	return kr.chunker
+}
+
+// getChunker implements the parent interface.
+func (kr *Root) getChunker() chunker.SplitterGen {
+	return kr.chunker
+}
+
 // Flush signals that an update has occurred since the last publish,
 // and updates the Root republisher.
 // TODO: We are definitely abusing the "flush" terminology here.