diff --git a/README.md b/README.md index 773ebc81..1692c717 100644 --- a/README.md +++ b/README.md @@ -314,12 +314,12 @@ bash prerequisite.sh ```bash cd megatron/shm_tensor_new_rdma -pip install -e . +pip install -e . --no-build-isolation ``` ```bash cd megatron/shm_tensor_new_rdma_pre_alloc -pip install -e . +pip install -e . --no-build-isolation ``` ### Run diff --git a/megatron/core/dist_checkpointing/strategies/filesystem_async.py b/megatron/core/dist_checkpointing/strategies/filesystem_async.py index 967856d9..97871b45 100644 --- a/megatron/core/dist_checkpointing/strategies/filesystem_async.py +++ b/megatron/core/dist_checkpointing/strategies/filesystem_async.py @@ -313,18 +313,23 @@ def write_preloaded_data( mem_before = _process_memory() local_results = [] + extra_kwargs = {} try: + import inspect + if "serialization_format" in inspect.signature(_write_item).parameters: + from torch.distributed.checkpoint.filesystem import SerializationFormat + extra_kwargs["serialization_format"] = SerializationFormat.TORCH_SAVE file_name, storage_key, (bytes_data, tensor_data) = write_bucket with open(file_name, "wb") as stream: for write_item, data in bytes_data: local_results.append( - _write_item(*transform_list, stream, data, write_item, storage_key) + _write_item(*transform_list, stream, data, write_item, storage_key, **extra_kwargs) ) for write_item, tensor in tensor_data: assert tensor.is_cpu local_results.append( - _write_item(*transform_list, stream, tensor, write_item, storage_key) + _write_item(*transform_list, stream, tensor, write_item, storage_key, **extra_kwargs) ) if use_fsync: diff --git a/megatron/shm_tensor_new_rdma/setup.py b/megatron/shm_tensor_new_rdma/setup.py index 1f33546a..2316bc53 100644 --- a/megatron/shm_tensor_new_rdma/setup.py +++ b/megatron/shm_tensor_new_rdma/setup.py @@ -1,5 +1,26 @@ from setuptools import setup from torch.utils.cpp_extension import CppExtension, BuildExtension, include_paths +import torch + +abi_flag = getattr(torch._C, "_GLIBCXX_USE_CXX11_ABI", None) +if abi_flag is None: + abi_flag = 1 + +abi_macro = f"-D_GLIBCXX_USE_CXX11_ABI={int(abi_flag)}" + +common_extra_compile_args = [ + "-fPIC", + "-std=c++17", + abi_macro, + "-I/usr/local/cuda/include", +] + +common_extra_link_args = [ + "-Wl,-rpath,$ORIGIN", + "-L/usr/local/cuda/lib64", + "-lcudart", +] + setup( name="shm_tensor_new_rdma", @@ -9,19 +30,18 @@ sources=["shm_tensor_new_rdma.cpp"], include_dirs=include_paths(), libraries=["rdmacm", "ibverbs", "torch", "torch_python", "c10"], - extra_compile_args=[ - "-fPIC", - "-std=c++17", - "-D_GLIBCXX_USE_CXX11_ABI=0", - "-I/usr/local/cuda/include", - ], - extra_link_args=[ - "-Wl,-rpath,$ORIGIN", - "-L/usr/local/cuda/lib64", - "-lcudart", - ], - ) + extra_compile_args=common_extra_compile_args, + extra_link_args=common_extra_link_args, + ), + CppExtension( + name="shm_tensor_new_rdma_pre_alloc", + sources=["shm_tensor_new_rdma_pre_alloc.cpp"], + include_dirs=include_paths(), + libraries=["rdmacm", "ibverbs", "torch", "torch_python", "c10"], + extra_compile_args=common_extra_compile_args, + extra_link_args=common_extra_link_args, + ), ], cmdclass={"build_ext": BuildExtension}, packages=[], -) \ No newline at end of file +) diff --git a/megatron/shm_tensor_new_rdma_pre_alloc/setup.py b/megatron/shm_tensor_new_rdma_pre_alloc/setup.py index d18b8c0b..d764992e 100644 --- a/megatron/shm_tensor_new_rdma_pre_alloc/setup.py +++ b/megatron/shm_tensor_new_rdma_pre_alloc/setup.py @@ -14,27 +14,47 @@ from setuptools import setup from torch.utils.cpp_extension import CppExtension, BuildExtension, include_paths +import torch + +abi_flag = getattr(torch._C, "_GLIBCXX_USE_CXX11_ABI", None) +if abi_flag is None: + abi_flag = 1 + +abi_macro = f"-D_GLIBCXX_USE_CXX11_ABI={int(abi_flag)}" + +common_extra_compile_args = [ + "-fPIC", + "-std=c++17", + abi_macro, + "-I/usr/local/cuda/include", +] + +common_extra_link_args = [ + "-Wl,-rpath,$ORIGIN", + "-L/usr/local/cuda/lib64", + "-lcudart", +] + setup( - name="shm_tensor_new_rdma_pre_alloc", + name="shm_tensor_new_rdma", ext_modules=[ + CppExtension( + name="shm_tensor_new_rdma", + sources=["shm_tensor_new_rdma.cpp"], + include_dirs=include_paths(), + libraries=["rdmacm", "ibverbs", "torch", "torch_python", "c10"], + extra_compile_args=common_extra_compile_args, + extra_link_args=common_extra_link_args, + ), CppExtension( name="shm_tensor_new_rdma_pre_alloc", sources=["shm_tensor_new_rdma_pre_alloc.cpp"], include_dirs=include_paths(), libraries=["rdmacm", "ibverbs", "torch", "torch_python", "c10"], - extra_compile_args=[ - "-fPIC", - "-std=c++20", - "-D_GLIBCXX_USE_CXX11_ABI=0", - "-I/usr/local/cuda/include", - ], - extra_link_args=[ - "-Wl,-rpath,$ORIGIN", - "-L/usr/local/cuda/lib64", - "-lcudart", - ], - ) + extra_compile_args=common_extra_compile_args, + extra_link_args=common_extra_link_args, + ), ], cmdclass={"build_ext": BuildExtension}, packages=[],