Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -314,12 +314,12 @@ bash prerequisite.sh

```bash
cd megatron/shm_tensor_new_rdma
pip install -e .
pip install -e . --no-build-isolation
```

```bash
cd megatron/shm_tensor_new_rdma_pre_alloc
pip install -e .
pip install -e . --no-build-isolation
```

### Run
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -313,18 +313,23 @@ def write_preloaded_data(
mem_before = _process_memory()

local_results = []
extra_kwargs = {}
try:
import inspect
if "serialization_format" in inspect.signature(_write_item).parameters:
from torch.distributed.checkpoint.filesystem import SerializationFormat
extra_kwargs["serialization_format"] = SerializationFormat.TORCH_SAVE
file_name, storage_key, (bytes_data, tensor_data) = write_bucket
with open(file_name, "wb") as stream:
for write_item, data in bytes_data:
local_results.append(
_write_item(*transform_list, stream, data, write_item, storage_key)
_write_item(*transform_list, stream, data, write_item, storage_key, **extra_kwargs)
)

for write_item, tensor in tensor_data:
assert tensor.is_cpu
local_results.append(
_write_item(*transform_list, stream, tensor, write_item, storage_key)
_write_item(*transform_list, stream, tensor, write_item, storage_key, **extra_kwargs)
)

if use_fsync:
Expand Down
46 changes: 33 additions & 13 deletions megatron/shm_tensor_new_rdma/setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
from setuptools import setup
from torch.utils.cpp_extension import CppExtension, BuildExtension, include_paths
import torch

abi_flag = getattr(torch._C, "_GLIBCXX_USE_CXX11_ABI", None)
if abi_flag is None:
abi_flag = 1

abi_macro = f"-D_GLIBCXX_USE_CXX11_ABI={int(abi_flag)}"

common_extra_compile_args = [
"-fPIC",
"-std=c++17",
abi_macro,
"-I/usr/local/cuda/include",
]

common_extra_link_args = [
"-Wl,-rpath,$ORIGIN",
"-L/usr/local/cuda/lib64",
"-lcudart",
]


setup(
name="shm_tensor_new_rdma",
Expand All @@ -9,19 +30,18 @@
sources=["shm_tensor_new_rdma.cpp"],
include_dirs=include_paths(),
libraries=["rdmacm", "ibverbs", "torch", "torch_python", "c10"],
extra_compile_args=[
"-fPIC",
"-std=c++17",
"-D_GLIBCXX_USE_CXX11_ABI=0",
"-I/usr/local/cuda/include",
],
extra_link_args=[
"-Wl,-rpath,$ORIGIN",
"-L/usr/local/cuda/lib64",
"-lcudart",
],
)
extra_compile_args=common_extra_compile_args,
extra_link_args=common_extra_link_args,
),
CppExtension(
name="shm_tensor_new_rdma_pre_alloc",
sources=["shm_tensor_new_rdma_pre_alloc.cpp"],
include_dirs=include_paths(),
libraries=["rdmacm", "ibverbs", "torch", "torch_python", "c10"],
extra_compile_args=common_extra_compile_args,
extra_link_args=common_extra_link_args,
),
],
cmdclass={"build_ext": BuildExtension},
packages=[],
)
)
46 changes: 33 additions & 13 deletions megatron/shm_tensor_new_rdma_pre_alloc/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,47 @@

from setuptools import setup
from torch.utils.cpp_extension import CppExtension, BuildExtension, include_paths
import torch

abi_flag = getattr(torch._C, "_GLIBCXX_USE_CXX11_ABI", None)
if abi_flag is None:
abi_flag = 1

abi_macro = f"-D_GLIBCXX_USE_CXX11_ABI={int(abi_flag)}"

common_extra_compile_args = [
"-fPIC",
"-std=c++17",
abi_macro,
"-I/usr/local/cuda/include",
]

common_extra_link_args = [
"-Wl,-rpath,$ORIGIN",
"-L/usr/local/cuda/lib64",
"-lcudart",
]


setup(
name="shm_tensor_new_rdma_pre_alloc",
name="shm_tensor_new_rdma",
ext_modules=[
CppExtension(
name="shm_tensor_new_rdma",
sources=["shm_tensor_new_rdma.cpp"],
include_dirs=include_paths(),
libraries=["rdmacm", "ibverbs", "torch", "torch_python", "c10"],
extra_compile_args=common_extra_compile_args,
extra_link_args=common_extra_link_args,
),
CppExtension(
name="shm_tensor_new_rdma_pre_alloc",
sources=["shm_tensor_new_rdma_pre_alloc.cpp"],
include_dirs=include_paths(),
libraries=["rdmacm", "ibverbs", "torch", "torch_python", "c10"],
extra_compile_args=[
"-fPIC",
"-std=c++20",
"-D_GLIBCXX_USE_CXX11_ABI=0",
"-I/usr/local/cuda/include",
],
extra_link_args=[
"-Wl,-rpath,$ORIGIN",
"-L/usr/local/cuda/lib64",
"-lcudart",
],
)
extra_compile_args=common_extra_compile_args,
extra_link_args=common_extra_link_args,
),
],
cmdclass={"build_ext": BuildExtension},
packages=[],
Expand Down
Loading