From 6dad827e4343778cf1cde46315a32378bfd24242 Mon Sep 17 00:00:00 2001 From: zhaohaidao Date: Fri, 27 Feb 2026 04:36:17 +0000 Subject: [PATCH 1/9] docs: add cpp system and build usage guides with examples --- BUILD.bazel | 44 +++ MODULE.bazel | 64 ++++ bazel/cpp/BUILD.bazel | 9 + bazel/cpp/deps.bzl | 290 ++++++++++++++++++ bindings/cpp/.gitignore | 7 + bindings/cpp/BUILD.bazel | 32 +- bindings/cpp/CMakeLists.txt | 130 +++++++- bindings/cpp/MODULE.bazel | 37 +++ bindings/cpp/bazel/cpp/BUILD.bazel | 10 + bindings/cpp/bazel/cpp/deps.bzl | 290 ++++++++++++++++++ .../examples/bazel-consumer/build/BUILD.bazel | 25 ++ .../bazel-consumer/build/MODULE.bazel | 38 +++ .../cpp/examples/bazel-consumer/build/main.cc | 28 ++ .../bazel-consumer/system/BUILD.bazel | 25 ++ .../bazel-consumer/system/MODULE.bazel | 41 +++ .../examples/bazel-consumer/system/main.cc | 27 ++ bindings/cpp/scripts/ensure_protoc.sh | 184 +++++++++++ docs/cpp-bazel-usage.md | 276 +++++++++++++++++ docs/cpp-cmake-usage.md | 129 ++++++++ 19 files changed, 1677 insertions(+), 9 deletions(-) create mode 100644 BUILD.bazel create mode 100644 MODULE.bazel create mode 100644 bazel/cpp/BUILD.bazel create mode 100644 bazel/cpp/deps.bzl create mode 100644 bindings/cpp/bazel/cpp/BUILD.bazel create mode 100644 bindings/cpp/bazel/cpp/deps.bzl create mode 100644 bindings/cpp/examples/bazel-consumer/build/BUILD.bazel create mode 100644 bindings/cpp/examples/bazel-consumer/build/MODULE.bazel create mode 100644 bindings/cpp/examples/bazel-consumer/build/main.cc create mode 100644 bindings/cpp/examples/bazel-consumer/system/BUILD.bazel create mode 100644 bindings/cpp/examples/bazel-consumer/system/MODULE.bazel create mode 100644 bindings/cpp/examples/bazel-consumer/system/main.cc create mode 100755 bindings/cpp/scripts/ensure_protoc.sh create mode 100644 docs/cpp-bazel-usage.md create mode 100644 docs/cpp-cmake-usage.md diff --git a/BUILD.bazel b/BUILD.bazel new file mode 100644 index 00000000..1e04f817 --- /dev/null +++ b/BUILD.bazel @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +package(default_visibility = ["//visibility:public"]) + +alias( + name = "fluss_cpp", + actual = "//bindings/cpp:fluss_cpp", +) + +# Keep compatibility with existing CI entrypoint that builds //:consume_table. +alias( + name = "consume_table", + actual = "//bindings/cpp:fluss_cpp_example", +) + +alias( + name = "fluss_cpp_example", + actual = "//bindings/cpp:fluss_cpp_example", +) + +alias( + name = "fluss_cpp_admin_example", + actual = "//bindings/cpp:fluss_cpp_admin_example", +) + +alias( + name = "fluss_cpp_kv_example", + actual = "//bindings/cpp:fluss_cpp_kv_example", +) diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 00000000..fae7d3be --- /dev/null +++ b/MODULE.bazel @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module( + name = "red-fluss-rust", + version = "0.0.0", +) + +# NOTE: Keep this dependency block in sync with bindings/cpp/MODULE.bazel. +# SYNC_START bindings/cpp/MODULE.bazel +bazel_dep(name = "rules_cc", version = "0.0.17") +bazel_dep(name = "platforms", version = "0.0.10") +bazel_dep(name = "rules_foreign_cc", version = "0.15.1") +bazel_dep(name = "rules_python", version = "1.2.0") +# SYNC_END bindings/cpp/MODULE.bazel + +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python.toolchain(python_version = "3.12") +use_repo(python, "python_3_12") + +foreign_cc_tools = use_extension("@rules_foreign_cc//foreign_cc:extensions.bzl", "tools") +use_repo( + foreign_cc_tools, + "cmake_3.31.8_toolchains", + "cmake_src", + "ninja_1.13.0_toolchains", + "ninja_build_src", + "rules_foreign_cc_framework_toolchains", +) + +register_toolchains( + "@rules_foreign_cc_framework_toolchains//:all", + "@cmake_3.31.8_toolchains//:all", + "@ninja_1.13.0_toolchains//:all", + "@python_3_12//:all", + "@rules_foreign_cc//toolchains:all", +) + +cpp_sdk = use_extension("//bazel/cpp:deps.bzl", "cpp_sdk") +# Phase 1 keeps build mode behavior while moving Arrow build details out of +# MODULE.bazel. Registry/system modes will reuse the same extension entrypoint. +cpp_sdk.config( + mode = "build", + arrow_cpp_version = "19.0.1", + protobuf_version = "3.25.5", + ep_cmake_ranlib = "/usr/bin/ranlib", + ep_cmake_ar = "/usr/bin/ar", + ep_cmake_nm = "/usr/bin/nm", +) +use_repo(cpp_sdk, "apache_arrow_cpp") diff --git a/bazel/cpp/BUILD.bazel b/bazel/cpp/BUILD.bazel new file mode 100644 index 00000000..b1424e45 --- /dev/null +++ b/bazel/cpp/BUILD.bazel @@ -0,0 +1,9 @@ +package(default_visibility = ["//visibility:public"]) + +# Stable indirection target for the Arrow C++ dependency. The implementation +# repo name can change across modes (registry/build/system) without touching +# bindings/cpp/BUILD.bazel. +alias( + name = "arrow_cpp_dep", + actual = "@apache_arrow_cpp//:arrow_cpp", +) diff --git a/bazel/cpp/deps.bzl b/bazel/cpp/deps.bzl new file mode 100644 index 00000000..4abd5919 --- /dev/null +++ b/bazel/cpp/deps.bzl @@ -0,0 +1,290 @@ +"""Bzlmod extension for fluss C++ SDK dependency provisioning.""" + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +_ARROW_BUILD_FILE_TEMPLATE = """ +load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake") + +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "all_srcs", + srcs = glob( + ["**"], + exclude = [ + "**/BUILD", + "**/BUILD.bazel", + ], + ), +) + +cmake( + name = "arrow_cpp", + lib_source = ":all_srcs", + working_directory = "cpp", + generate_args = ["-GUnix Makefiles"], + cache_entries = { + "CMAKE_BUILD_TYPE": "Release", + "CMAKE_INSTALL_LIBDIR": "lib", + "CMAKE_POSITION_INDEPENDENT_CODE": "ON", + "ARROW_BUILD_SHARED": "ON", + "ARROW_BUILD_STATIC": "OFF", + "ARROW_BUILD_TESTS": "OFF", + "ARROW_BUILD_EXAMPLES": "OFF", + "ARROW_BUILD_BENCHMARKS": "OFF", + "ARROW_BUILD_INTEGRATION": "OFF", + "ARROW_BUILD_UTILITIES": "OFF", + "ARROW_COMPUTE": "OFF", + "ARROW_CSV": "OFF", + "ARROW_DATASET": "OFF", + "ARROW_FILESYSTEM": "OFF", + "ARROW_JSON": "OFF", + "ARROW_PARQUET": "OFF", + "ARROW_IPC": "ON", + "ARROW_DEPENDENCY_SOURCE": "BUNDLED", + # Temporary workarounds for older images / Bazel sandbox toolchain detection. + "EP_CMAKE_RANLIB": "__EP_CMAKE_RANLIB__", + "EP_CMAKE_AR": "__EP_CMAKE_AR__", + "EP_CMAKE_NM": "__EP_CMAKE_NM__", + }, + out_include_dir = "include", + out_lib_dir = "lib", + out_shared_libs = select({ + "@platforms//os:macos": ["libarrow.dylib"], + "//conditions:default": [ + "libarrow.so", + "libarrow.so.1900", + "libarrow.so.1900.1.0", + ], + }), +) +""" + +_ARROW_PATCH_CMDS = [ + "sed -i 's|#define ARROW_CXX_COMPILER_FLAGS \"@CMAKE_CXX_FLAGS@\"|#define ARROW_CXX_COMPILER_FLAGS \"\"|' cpp/src/arrow/util/config.h.cmake", +] + +_SYSTEM_ARROW_BUILD_FILE_TEMPLATE = """ +load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +cc_import( + name = "arrow_shared_import", + shared_library = "__SYSTEM_ARROW_SHARED_LIBRARY__", +) + +filegroup( + name = "arrow_runtime_libs", + srcs = [ +__SYSTEM_ARROW_RUNTIME_SRCS__ + ], +) + +cc_library( + name = "arrow_cpp", + hdrs = [ +__SYSTEM_ARROW_HDRS__ + ], + includes = ["__SYSTEM_ARROW_INCLUDE_DIR__"], + data = [":arrow_runtime_libs"], + deps = [":arrow_shared_import"], +) +""" + +_ARROW_BUILD_VERSIONS = { + "19.0.1": { + "urls": ["https://github.com/apache/arrow/archive/refs/tags/apache-arrow-19.0.1.tar.gz"], + "strip_prefix": "arrow-apache-arrow-19.0.1", + }, +} + +_config_tag = tag_class(attrs = { + "mode": attr.string(default = "build"), + "arrow_cpp_version": attr.string(default = "19.0.1"), + "protobuf_version": attr.string(default = "3.25.5"), + "ep_cmake_ranlib": attr.string(default = "ranlib"), + "ep_cmake_ar": attr.string(default = "ar"), + "ep_cmake_nm": attr.string(default = "nm"), + "system_arrow_prefix": attr.string(default = "/usr"), + "system_arrow_include_dir": attr.string(default = "include"), + "system_arrow_shared_library": attr.string(default = "lib/x86_64-linux-gnu/libarrow.so"), + "system_arrow_runtime_glob": attr.string(default = "lib/x86_64-linux-gnu/libarrow.so*"), +}) + +def _render_arrow_build_file(tag): + return _ARROW_BUILD_FILE_TEMPLATE.replace( + "__EP_CMAKE_RANLIB__", + tag.ep_cmake_ranlib, + ).replace( + "__EP_CMAKE_AR__", + tag.ep_cmake_ar, + ).replace( + "__EP_CMAKE_NM__", + tag.ep_cmake_nm, + ) + +def _render_system_arrow_build_file(tag, shared_library_override = None): + shared_library = shared_library_override if shared_library_override else (tag.system_arrow_shared_library if hasattr(tag, "system_arrow_shared_library") else tag.shared_library) + include_dir = tag.system_arrow_include_dir if hasattr(tag, "system_arrow_include_dir") else tag.include_dir + return _SYSTEM_ARROW_BUILD_FILE_TEMPLATE.replace( + "__SYSTEM_ARROW_SHARED_LIBRARY__", + "sysroot/" + shared_library, + ).replace( + "__SYSTEM_ARROW_INCLUDE_DIR__", + "sysroot/" + include_dir, + ) + +def _starlark_string_list(items): + if not items: + return "" + return "\n".join([' "%s",' % i for i in items]) + +def _list_files(repo_ctx, base_dir, suffixes): + result = repo_ctx.execute([ + "/usr/bin/find", + base_dir, + "-type", + "f", + ]) + if result.return_code != 0: + fail("failed to enumerate files under %s: %s" % (base_dir, result.stderr)) + files = [] + for line in result.stdout.splitlines(): + for suffix in suffixes: + if line.endswith(suffix): + files.append(line) + break + return sorted(files) + +def _system_arrow_repo_impl(repo_ctx): + prefix = repo_ctx.attr.prefix.rstrip("/") + include_dir = repo_ctx.attr.include_dir + shared_library = repo_ctx.attr.shared_library + runtime_glob = repo_ctx.attr.runtime_glob + + repo_ctx.execute(["/bin/mkdir", "-p", "sysroot"]) + copy_res = repo_ctx.execute(["/bin/cp", "-a", prefix + "/.", "sysroot"]) + if copy_res.return_code != 0: + fail("failed to copy system arrow prefix %s: %s" % (prefix, copy_res.stderr)) + + header_root = prefix + "/" + include_dir + headers = _list_files(repo_ctx, header_root, [".h", ".hpp"]) + header_srcs = [] + for h in headers: + if not h.startswith(prefix + "/"): + fail("header path %s is outside prefix %s" % (h, prefix)) + header_srcs.append("sysroot/" + h[len(prefix) + 1:]) + + runtime_dir = runtime_glob.rsplit("/", 1)[0] + runtime_prefix = runtime_glob.rsplit("/", 1)[1].replace("*", "") + runtime_files = _list_files(repo_ctx, prefix + "/" + runtime_dir, [""]) + runtime_srcs = [] + for f in runtime_files: + rel = f[len(prefix) + 1:] if f.startswith(prefix + "/") else None + if rel == None: + continue + if rel.startswith(runtime_dir + "/") and rel.rsplit("/", 1)[1].startswith(runtime_prefix): + runtime_srcs.append("sysroot/" + rel) + runtime_srcs = sorted(runtime_srcs) + + # Prefer a versioned soname file as the imported shared library so Bazel + # runfiles contain the exact filename required by the runtime loader. + shared_import_rel = "sysroot/" + shared_library + shared_basename = shared_library.rsplit("/", 1)[1] + soname_candidates = [] + for rel in runtime_srcs: + base = rel.rsplit("/", 1)[1] + if base == shared_basename: + continue + if base.startswith(shared_basename + "."): + soname_candidates.append(rel) + if soname_candidates: + # Prefer shortest suffix first (e.g. libarrow.so.1900 before + # libarrow.so.1900.1.0) to match ELF SONAME naming when available. + soname_candidates = sorted(soname_candidates, key = lambda s: (len(s), s)) + shared_import_rel = soname_candidates[0] + + build_file = _render_system_arrow_build_file(repo_ctx.attr, shared_library_override = shared_import_rel[len("sysroot/"):]).replace( + "__SYSTEM_ARROW_HDRS__", + _starlark_string_list(header_srcs), + ).replace( + "__SYSTEM_ARROW_RUNTIME_SRCS__", + _starlark_string_list(runtime_srcs), + ) + repo_ctx.file("BUILD.bazel", build_file) + +_system_arrow_repository = repository_rule( + implementation = _system_arrow_repo_impl, + attrs = { + "prefix": attr.string(mandatory = True), + "include_dir": attr.string(mandatory = True), + "shared_library": attr.string(mandatory = True), + "runtime_glob": attr.string(mandatory = True), + }, + local = True, +) + +def _select_config(ctx): + selected = None + selected_owner = None + root_selected = None + for mod in ctx.modules: + for tag in mod.tags.config: + is_root = hasattr(mod, "is_root") and mod.is_root + if is_root: + if root_selected != None: + fail("cpp_sdk.config may only be declared once in the root module") + root_selected = tag + continue + if selected == None: + selected = tag + selected_owner = mod.name + elif selected_owner != mod.name: + # Prefer root override. Dependency defaults are tolerated as long + # as they come from a single module. + fail("multiple dependency defaults for cpp_sdk.config without root override") + if root_selected != None: + return root_selected + return selected + +def _cpp_sdk_impl(ctx): + tag = _select_config(ctx) + if tag == None: + return + + if tag.mode == "registry": + return + + if tag.mode == "system": + _system_arrow_repository( + name = "apache_arrow_cpp", + prefix = tag.system_arrow_prefix, + include_dir = tag.system_arrow_include_dir, + shared_library = tag.system_arrow_shared_library, + runtime_glob = tag.system_arrow_runtime_glob, + ) + return + + if tag.mode != "build": + fail("unsupported cpp_sdk mode: %s" % tag.mode) + + arrow_version = _ARROW_BUILD_VERSIONS.get(tag.arrow_cpp_version) + if arrow_version == None: + fail("unsupported arrow_cpp_version for build mode: %s" % tag.arrow_cpp_version) + + http_archive( + name = "apache_arrow_cpp", + urls = arrow_version["urls"], + strip_prefix = arrow_version["strip_prefix"], + # TODO: Pin sha256/integrity once release packaging is finalized. + patch_cmds = _ARROW_PATCH_CMDS, + build_file_content = _render_arrow_build_file(tag), + ) + +cpp_sdk = module_extension( + implementation = _cpp_sdk_impl, + tag_classes = { + "config": _config_tag, + }, +) diff --git a/bindings/cpp/.gitignore b/bindings/cpp/.gitignore index da15a58b..4d5db858 100644 --- a/bindings/cpp/.gitignore +++ b/bindings/cpp/.gitignore @@ -15,3 +15,10 @@ bazel-testlogs bazel-cpp bazel-* MODULE.bazel.lock + +# Keep versioned Bazel consumer examples (name starts with bazel-). +!examples/bazel-consumer/ +!examples/bazel-consumer/** +examples/bazel-consumer/**/MODULE.bazel.lock +examples/bazel-consumer/**/bazel-* +examples/bazel-consumer/**/tmp.log diff --git a/bindings/cpp/BUILD.bazel b/bindings/cpp/BUILD.bazel index 0ae2ce32..8f845ab3 100644 --- a/bindings/cpp/BUILD.bazel +++ b/bindings/cpp/BUILD.bazel @@ -17,7 +17,7 @@ licenses(["notice"]) -load("@rules_cc//cc:defs.bzl", "cc_library", "cc_binary") +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_import", "cc_library") config_setting( name = "debug_mode", @@ -49,6 +49,20 @@ genrule( ], cmd = """ set -e + if [ -n "$${PROTOC:-}" ]; then + if [ ! -x "$$PROTOC" ]; then + echo "Error: PROTOC is set but not executable: $$PROTOC" >&2 + exit 1 + fi + export PROTOC + else + PROTOC_BIN=$$(command -v protoc || true) + if [ -z "$$PROTOC_BIN" ]; then + echo "Error: protoc not found in PATH and PROTOC is not set" >&2 + exit 1 + fi + export PROTOC="$$PROTOC_BIN" + fi EXECROOT=$$(pwd) OUTPUT_LIB=$(location rust_lib_debug.a) OUTPUT_CC=$(location rust_bridge_cc_debug.cc) @@ -116,6 +130,20 @@ genrule( ], cmd = """ set -e + if [ -n "$${PROTOC:-}" ]; then + if [ ! -x "$$PROTOC" ]; then + echo "Error: PROTOC is set but not executable: $$PROTOC" >&2 + exit 1 + fi + export PROTOC + else + PROTOC_BIN=$$(command -v protoc || true) + if [ -z "$$PROTOC_BIN" ]; then + echo "Error: protoc not found in PATH and PROTOC is not set" >&2 + exit 1 + fi + export PROTOC="$$PROTOC_BIN" + fi EXECROOT=$$(pwd) OUTPUT_LIB=$(location rust_lib_release.a) OUTPUT_CC=$(location rust_bridge_cc_release.cc) @@ -303,6 +331,7 @@ cc_library( }), deps = [ ":rust_lib", + "//bazel/cpp:arrow_cpp_dep", ], visibility = ["//visibility:public"], ) @@ -405,4 +434,3 @@ cc_binary( }), visibility = ["//visibility:public"], ) - diff --git a/bindings/cpp/CMakeLists.txt b/bindings/cpp/CMakeLists.txt index a8f527ed..66b09808 100644 --- a/bindings/cpp/CMakeLists.txt +++ b/bindings/cpp/CMakeLists.txt @@ -27,9 +27,17 @@ include(FetchContent) set(FLUSS_GOOGLETEST_VERSION 1.15.2 CACHE STRING "version of GoogleTest") set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -find_package(Threads REQUIRED) +set(FLUSS_CPP_DEP_MODE "system" CACHE STRING "Dependency provisioning mode for fluss-cpp (system|build)") +set_property(CACHE FLUSS_CPP_DEP_MODE PROPERTY STRINGS system build) +set(FLUSS_CPP_ARROW_VERSION "19.0.1" CACHE STRING "Arrow C++ version baseline for fluss-cpp") +set(FLUSS_CPP_PROTOBUF_VERSION "3.25.5" CACHE STRING "Protobuf/protoc version baseline for fluss-cpp") +set(FLUSS_CPP_ARROW_SYSTEM_ROOT "" CACHE PATH "Optional Arrow installation prefix for system mode") +set(FLUSS_CPP_ARROW_SOURCE_URL + "https://github.com/apache/arrow/archive/refs/tags/apache-arrow-19.0.1.tar.gz" + CACHE STRING + "Arrow source archive URL used in build mode") -find_package(Arrow REQUIRED) +find_package(Threads REQUIRED) if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) @@ -47,10 +55,113 @@ if (FLUSS_DEV) set(FLUSS_ENABLE_TESTING ON) endif() +if (NOT FLUSS_CPP_DEP_MODE STREQUAL "system" AND NOT FLUSS_CPP_DEP_MODE STREQUAL "build") + message(FATAL_ERROR "Unsupported FLUSS_CPP_DEP_MODE='${FLUSS_CPP_DEP_MODE}'. Expected 'system' or 'build'.") +endif() + +find_program(FLUSS_PROTOC_EXECUTABLE NAMES protoc) +if (NOT FLUSS_PROTOC_EXECUTABLE) + message(FATAL_ERROR "protoc not found. Install protoc or set it in PATH. (Fluss baseline: ${FLUSS_CPP_PROTOBUF_VERSION})") +endif() + +if (DEFINED ENV{CARGO} AND NOT "$ENV{CARGO}" STREQUAL "" AND EXISTS "$ENV{CARGO}") + set(FLUSS_CARGO_EXECUTABLE "$ENV{CARGO}") +else() + if (DEFINED ENV{CARGO} AND NOT "$ENV{CARGO}" STREQUAL "") + get_filename_component(_FLUSS_CARGO_HINT_DIR "$ENV{CARGO}" DIRECTORY) + endif() + find_program(FLUSS_CARGO_EXECUTABLE NAMES cargo HINTS "${_FLUSS_CARGO_HINT_DIR}") +endif() +if (NOT FLUSS_CARGO_EXECUTABLE) + message(FATAL_ERROR "cargo not found. Install Rust toolchain or set CARGO/PATH.") +endif() + +execute_process( + COMMAND ${FLUSS_PROTOC_EXECUTABLE} --version + OUTPUT_VARIABLE FLUSS_PROTOC_VERSION_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) +string(REGEX MATCH "([0-9]+\\.[0-9]+\\.[0-9]+)" FLUSS_PROTOC_VERSION "${FLUSS_PROTOC_VERSION_OUTPUT}") +set(FLUSS_PROTOC_VERSION_NORM "${FLUSS_PROTOC_VERSION}") +set(FLUSS_CPP_PROTOBUF_VERSION_NORM "${FLUSS_CPP_PROTOBUF_VERSION}") +string(REGEX REPLACE "^3\\." "" FLUSS_PROTOC_VERSION_NORM "${FLUSS_PROTOC_VERSION_NORM}") +string(REGEX REPLACE "^3\\." "" FLUSS_CPP_PROTOBUF_VERSION_NORM "${FLUSS_CPP_PROTOBUF_VERSION_NORM}") +if (FLUSS_PROTOC_VERSION AND + NOT FLUSS_PROTOC_VERSION VERSION_EQUAL FLUSS_CPP_PROTOBUF_VERSION AND + NOT FLUSS_PROTOC_VERSION_NORM VERSION_EQUAL FLUSS_CPP_PROTOBUF_VERSION_NORM) + message(WARNING + "protoc version (${FLUSS_PROTOC_VERSION}) does not match Fluss baseline " + "(${FLUSS_CPP_PROTOBUF_VERSION}). Build may still work, but this is outside the tested baseline.") +endif() + +message(STATUS "Fluss C++ dependency mode: ${FLUSS_CPP_DEP_MODE}") +message(STATUS "Fluss C++ protoc executable: ${FLUSS_PROTOC_EXECUTABLE} (${FLUSS_PROTOC_VERSION_OUTPUT})") +message(STATUS "Fluss C++ cargo executable: ${FLUSS_CARGO_EXECUTABLE}") + +if (FLUSS_CPP_DEP_MODE STREQUAL "system") + if (FLUSS_CPP_ARROW_SYSTEM_ROOT) + list(APPEND CMAKE_PREFIX_PATH "${FLUSS_CPP_ARROW_SYSTEM_ROOT}") + set(Arrow_ROOT "${FLUSS_CPP_ARROW_SYSTEM_ROOT}") + endif() + + find_package(Arrow REQUIRED) + + if (DEFINED Arrow_VERSION AND Arrow_VERSION AND NOT Arrow_VERSION VERSION_EQUAL FLUSS_CPP_ARROW_VERSION) + message(WARNING + "Arrow version (${Arrow_VERSION}) does not match Fluss baseline " + "(${FLUSS_CPP_ARROW_VERSION}). Build may still work, but this is outside the tested baseline.") + endif() +else() + # Build mode: provision Arrow C++ from source in-tree. + set(ARROW_BUILD_SHARED ON CACHE BOOL "" FORCE) + set(ARROW_BUILD_STATIC OFF CACHE BOOL "" FORCE) + set(ARROW_BUILD_TESTS OFF CACHE BOOL "" FORCE) + set(ARROW_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) + set(ARROW_BUILD_BENCHMARKS OFF CACHE BOOL "" FORCE) + set(ARROW_BUILD_INTEGRATION OFF CACHE BOOL "" FORCE) + set(ARROW_BUILD_UTILITIES OFF CACHE BOOL "" FORCE) + set(ARROW_COMPUTE OFF CACHE BOOL "" FORCE) + set(ARROW_CSV OFF CACHE BOOL "" FORCE) + set(ARROW_DATASET OFF CACHE BOOL "" FORCE) + set(ARROW_FILESYSTEM OFF CACHE BOOL "" FORCE) + set(ARROW_JSON OFF CACHE BOOL "" FORCE) + set(ARROW_PARQUET OFF CACHE BOOL "" FORCE) + set(ARROW_IPC ON CACHE BOOL "" FORCE) + # Reduce third-party sub-build complexity in build mode. + set(ARROW_JEMALLOC OFF CACHE BOOL "" FORCE) + set(ARROW_MIMALLOC OFF CACHE BOOL "" FORCE) + set(ARROW_DEPENDENCY_SOURCE BUNDLED CACHE STRING "" FORCE) + set(ARROW_SIMD_LEVEL NONE CACHE STRING "" FORCE) + set(ARROW_RUNTIME_SIMD_LEVEL NONE CACHE STRING "" FORCE) + + FetchContent_Declare( + apache_arrow_src + URL ${FLUSS_CPP_ARROW_SOURCE_URL} + SOURCE_SUBDIR cpp + ) + FetchContent_MakeAvailable(apache_arrow_src) + set(FLUSS_CPP_ARROW_EXTRA_INCLUDE_DIRS + "${apache_arrow_src_SOURCE_DIR}/cpp/src" + "${apache_arrow_src_BINARY_DIR}/src") + + if (TARGET arrow_shared AND NOT TARGET Arrow::arrow_shared) + add_library(Arrow::arrow_shared ALIAS arrow_shared) + endif() + if (NOT TARGET Arrow::arrow_shared) + message(FATAL_ERROR "Arrow build mode did not produce target Arrow::arrow_shared (or arrow_shared).") + endif() +endif() + # Get cargo target dir -execute_process(COMMAND cargo locate-project --workspace --message-format plain +execute_process(COMMAND ${FLUSS_CARGO_EXECUTABLE} locate-project --workspace --message-format plain OUTPUT_VARIABLE CARGO_TARGET_DIR WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) +if (NOT CARGO_TARGET_DIR) + message(FATAL_ERROR + "Failed to resolve Cargo workspace target dir via '${FLUSS_CARGO_EXECUTABLE} locate-project'. " + "Check Rust toolchain installation and PATH/CARGO.") +endif() string(REGEX REPLACE "/Cargo.toml\n$" "/target" CARGO_TARGET_DIR "${CARGO_TARGET_DIR}") set(CARGO_MANIFEST ${PROJECT_SOURCE_DIR}/Cargo.toml) @@ -77,7 +188,7 @@ if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug") endif() add_custom_target(cargo_build - COMMAND cargo build --manifest-path ${CARGO_MANIFEST} ${CARGO_BUILD_FLAGS} + COMMAND ${CMAKE_COMMAND} -E env PROTOC=${FLUSS_PROTOC_EXECUTABLE} ${FLUSS_CARGO_EXECUTABLE} build --manifest-path ${CARGO_MANIFEST} ${CARGO_BUILD_FLAGS} BYPRODUCTS ${RUST_BRIDGE_CPP} ${RUST_LIB} ${RUST_HEADER_FILE} DEPENDS ${RUST_SOURCE_FILE} USES_TERMINAL @@ -88,6 +199,9 @@ add_library(fluss_cpp STATIC ${CPP_SOURCE_FILE} ${RUST_BRIDGE_CPP}) target_sources(fluss_cpp PUBLIC ${CPP_HEADER_FILE}) target_sources(fluss_cpp PRIVATE ${RUST_HEADER_FILE}) target_include_directories(fluss_cpp PUBLIC ${CPP_INCLUDE_DIR}) +if (FLUSS_CPP_ARROW_EXTRA_INCLUDE_DIRS) + target_include_directories(fluss_cpp PUBLIC ${FLUSS_CPP_ARROW_EXTRA_INCLUDE_DIRS}) +endif() target_link_libraries(fluss_cpp PUBLIC ${RUST_LIB}) target_link_libraries(fluss_cpp PRIVATE ${CMAKE_DL_LIBS} Threads::Threads) target_link_libraries(fluss_cpp PUBLIC Arrow::arrow_shared) @@ -114,9 +228,11 @@ target_link_libraries(fluss_cpp_kv_example PRIVATE Arrow::arrow_shared) target_compile_definitions(fluss_cpp_kv_example PRIVATE ARROW_FOUND) target_include_directories(fluss_cpp_kv_example PUBLIC ${CPP_INCLUDE_DIR}) -set_target_properties(fluss_cpp - PROPERTIES ADDITIONAL_CLEAN_FILES ${CARGO_TARGET_DIR} -) +if (CARGO_TARGET_DIR) + set_target_properties(fluss_cpp + PROPERTIES ADDITIONAL_CLEAN_FILES "${CARGO_TARGET_DIR}" + ) +endif() add_dependencies(fluss_cpp cargo_build) if (FLUSS_ENABLE_ADDRESS_SANITIZER) diff --git a/bindings/cpp/MODULE.bazel b/bindings/cpp/MODULE.bazel index f75d3e6f..9771774c 100644 --- a/bindings/cpp/MODULE.bazel +++ b/bindings/cpp/MODULE.bazel @@ -21,3 +21,40 @@ module( bazel_dep(name = "rules_cc", version = "0.0.17") bazel_dep(name = "platforms", version = "0.0.10") +bazel_dep(name = "rules_foreign_cc", version = "0.15.1") +bazel_dep(name = "rules_python", version = "1.2.0") + +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python.toolchain(python_version = "3.12") +use_repo(python, "python_3_12") + +foreign_cc_tools = use_extension("@rules_foreign_cc//foreign_cc:extensions.bzl", "tools") +use_repo( + foreign_cc_tools, + "cmake_3.31.8_toolchains", + "cmake_src", + "ninja_1.13.0_toolchains", + "ninja_build_src", + "rules_foreign_cc_framework_toolchains", +) + +register_toolchains( + "@rules_foreign_cc_framework_toolchains//:all", + "@cmake_3.31.8_toolchains//:all", + "@ninja_1.13.0_toolchains//:all", + "@python_3_12//:all", + "@rules_foreign_cc//toolchains:all", +) + +cpp_sdk = use_extension("//bazel/cpp:deps.bzl", "cpp_sdk") +# Phase 1 keeps build mode behavior while moving Arrow build details out of +# MODULE.bazel. Registry/system modes will reuse the same extension entrypoint. +cpp_sdk.config( + mode = "build", + arrow_cpp_version = "19.0.1", + protobuf_version = "3.25.5", + ep_cmake_ranlib = "/usr/bin/ranlib", + ep_cmake_ar = "/usr/bin/ar", + ep_cmake_nm = "/usr/bin/nm", +) +use_repo(cpp_sdk, "apache_arrow_cpp") diff --git a/bindings/cpp/bazel/cpp/BUILD.bazel b/bindings/cpp/bazel/cpp/BUILD.bazel new file mode 100644 index 00000000..ee7150c5 --- /dev/null +++ b/bindings/cpp/bazel/cpp/BUILD.bazel @@ -0,0 +1,10 @@ +package(default_visibility = ["//visibility:public"]) + +# Stable indirection target for the Arrow C++ dependency. The implementation +# repo name can change across modes (registry/build/system) without touching +# bindings/cpp/BUILD.bazel. +alias( + name = "arrow_cpp_dep", + actual = "@apache_arrow_cpp//:arrow_cpp", +) + diff --git a/bindings/cpp/bazel/cpp/deps.bzl b/bindings/cpp/bazel/cpp/deps.bzl new file mode 100644 index 00000000..4abd5919 --- /dev/null +++ b/bindings/cpp/bazel/cpp/deps.bzl @@ -0,0 +1,290 @@ +"""Bzlmod extension for fluss C++ SDK dependency provisioning.""" + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +_ARROW_BUILD_FILE_TEMPLATE = """ +load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake") + +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "all_srcs", + srcs = glob( + ["**"], + exclude = [ + "**/BUILD", + "**/BUILD.bazel", + ], + ), +) + +cmake( + name = "arrow_cpp", + lib_source = ":all_srcs", + working_directory = "cpp", + generate_args = ["-GUnix Makefiles"], + cache_entries = { + "CMAKE_BUILD_TYPE": "Release", + "CMAKE_INSTALL_LIBDIR": "lib", + "CMAKE_POSITION_INDEPENDENT_CODE": "ON", + "ARROW_BUILD_SHARED": "ON", + "ARROW_BUILD_STATIC": "OFF", + "ARROW_BUILD_TESTS": "OFF", + "ARROW_BUILD_EXAMPLES": "OFF", + "ARROW_BUILD_BENCHMARKS": "OFF", + "ARROW_BUILD_INTEGRATION": "OFF", + "ARROW_BUILD_UTILITIES": "OFF", + "ARROW_COMPUTE": "OFF", + "ARROW_CSV": "OFF", + "ARROW_DATASET": "OFF", + "ARROW_FILESYSTEM": "OFF", + "ARROW_JSON": "OFF", + "ARROW_PARQUET": "OFF", + "ARROW_IPC": "ON", + "ARROW_DEPENDENCY_SOURCE": "BUNDLED", + # Temporary workarounds for older images / Bazel sandbox toolchain detection. + "EP_CMAKE_RANLIB": "__EP_CMAKE_RANLIB__", + "EP_CMAKE_AR": "__EP_CMAKE_AR__", + "EP_CMAKE_NM": "__EP_CMAKE_NM__", + }, + out_include_dir = "include", + out_lib_dir = "lib", + out_shared_libs = select({ + "@platforms//os:macos": ["libarrow.dylib"], + "//conditions:default": [ + "libarrow.so", + "libarrow.so.1900", + "libarrow.so.1900.1.0", + ], + }), +) +""" + +_ARROW_PATCH_CMDS = [ + "sed -i 's|#define ARROW_CXX_COMPILER_FLAGS \"@CMAKE_CXX_FLAGS@\"|#define ARROW_CXX_COMPILER_FLAGS \"\"|' cpp/src/arrow/util/config.h.cmake", +] + +_SYSTEM_ARROW_BUILD_FILE_TEMPLATE = """ +load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +cc_import( + name = "arrow_shared_import", + shared_library = "__SYSTEM_ARROW_SHARED_LIBRARY__", +) + +filegroup( + name = "arrow_runtime_libs", + srcs = [ +__SYSTEM_ARROW_RUNTIME_SRCS__ + ], +) + +cc_library( + name = "arrow_cpp", + hdrs = [ +__SYSTEM_ARROW_HDRS__ + ], + includes = ["__SYSTEM_ARROW_INCLUDE_DIR__"], + data = [":arrow_runtime_libs"], + deps = [":arrow_shared_import"], +) +""" + +_ARROW_BUILD_VERSIONS = { + "19.0.1": { + "urls": ["https://github.com/apache/arrow/archive/refs/tags/apache-arrow-19.0.1.tar.gz"], + "strip_prefix": "arrow-apache-arrow-19.0.1", + }, +} + +_config_tag = tag_class(attrs = { + "mode": attr.string(default = "build"), + "arrow_cpp_version": attr.string(default = "19.0.1"), + "protobuf_version": attr.string(default = "3.25.5"), + "ep_cmake_ranlib": attr.string(default = "ranlib"), + "ep_cmake_ar": attr.string(default = "ar"), + "ep_cmake_nm": attr.string(default = "nm"), + "system_arrow_prefix": attr.string(default = "/usr"), + "system_arrow_include_dir": attr.string(default = "include"), + "system_arrow_shared_library": attr.string(default = "lib/x86_64-linux-gnu/libarrow.so"), + "system_arrow_runtime_glob": attr.string(default = "lib/x86_64-linux-gnu/libarrow.so*"), +}) + +def _render_arrow_build_file(tag): + return _ARROW_BUILD_FILE_TEMPLATE.replace( + "__EP_CMAKE_RANLIB__", + tag.ep_cmake_ranlib, + ).replace( + "__EP_CMAKE_AR__", + tag.ep_cmake_ar, + ).replace( + "__EP_CMAKE_NM__", + tag.ep_cmake_nm, + ) + +def _render_system_arrow_build_file(tag, shared_library_override = None): + shared_library = shared_library_override if shared_library_override else (tag.system_arrow_shared_library if hasattr(tag, "system_arrow_shared_library") else tag.shared_library) + include_dir = tag.system_arrow_include_dir if hasattr(tag, "system_arrow_include_dir") else tag.include_dir + return _SYSTEM_ARROW_BUILD_FILE_TEMPLATE.replace( + "__SYSTEM_ARROW_SHARED_LIBRARY__", + "sysroot/" + shared_library, + ).replace( + "__SYSTEM_ARROW_INCLUDE_DIR__", + "sysroot/" + include_dir, + ) + +def _starlark_string_list(items): + if not items: + return "" + return "\n".join([' "%s",' % i for i in items]) + +def _list_files(repo_ctx, base_dir, suffixes): + result = repo_ctx.execute([ + "/usr/bin/find", + base_dir, + "-type", + "f", + ]) + if result.return_code != 0: + fail("failed to enumerate files under %s: %s" % (base_dir, result.stderr)) + files = [] + for line in result.stdout.splitlines(): + for suffix in suffixes: + if line.endswith(suffix): + files.append(line) + break + return sorted(files) + +def _system_arrow_repo_impl(repo_ctx): + prefix = repo_ctx.attr.prefix.rstrip("/") + include_dir = repo_ctx.attr.include_dir + shared_library = repo_ctx.attr.shared_library + runtime_glob = repo_ctx.attr.runtime_glob + + repo_ctx.execute(["/bin/mkdir", "-p", "sysroot"]) + copy_res = repo_ctx.execute(["/bin/cp", "-a", prefix + "/.", "sysroot"]) + if copy_res.return_code != 0: + fail("failed to copy system arrow prefix %s: %s" % (prefix, copy_res.stderr)) + + header_root = prefix + "/" + include_dir + headers = _list_files(repo_ctx, header_root, [".h", ".hpp"]) + header_srcs = [] + for h in headers: + if not h.startswith(prefix + "/"): + fail("header path %s is outside prefix %s" % (h, prefix)) + header_srcs.append("sysroot/" + h[len(prefix) + 1:]) + + runtime_dir = runtime_glob.rsplit("/", 1)[0] + runtime_prefix = runtime_glob.rsplit("/", 1)[1].replace("*", "") + runtime_files = _list_files(repo_ctx, prefix + "/" + runtime_dir, [""]) + runtime_srcs = [] + for f in runtime_files: + rel = f[len(prefix) + 1:] if f.startswith(prefix + "/") else None + if rel == None: + continue + if rel.startswith(runtime_dir + "/") and rel.rsplit("/", 1)[1].startswith(runtime_prefix): + runtime_srcs.append("sysroot/" + rel) + runtime_srcs = sorted(runtime_srcs) + + # Prefer a versioned soname file as the imported shared library so Bazel + # runfiles contain the exact filename required by the runtime loader. + shared_import_rel = "sysroot/" + shared_library + shared_basename = shared_library.rsplit("/", 1)[1] + soname_candidates = [] + for rel in runtime_srcs: + base = rel.rsplit("/", 1)[1] + if base == shared_basename: + continue + if base.startswith(shared_basename + "."): + soname_candidates.append(rel) + if soname_candidates: + # Prefer shortest suffix first (e.g. libarrow.so.1900 before + # libarrow.so.1900.1.0) to match ELF SONAME naming when available. + soname_candidates = sorted(soname_candidates, key = lambda s: (len(s), s)) + shared_import_rel = soname_candidates[0] + + build_file = _render_system_arrow_build_file(repo_ctx.attr, shared_library_override = shared_import_rel[len("sysroot/"):]).replace( + "__SYSTEM_ARROW_HDRS__", + _starlark_string_list(header_srcs), + ).replace( + "__SYSTEM_ARROW_RUNTIME_SRCS__", + _starlark_string_list(runtime_srcs), + ) + repo_ctx.file("BUILD.bazel", build_file) + +_system_arrow_repository = repository_rule( + implementation = _system_arrow_repo_impl, + attrs = { + "prefix": attr.string(mandatory = True), + "include_dir": attr.string(mandatory = True), + "shared_library": attr.string(mandatory = True), + "runtime_glob": attr.string(mandatory = True), + }, + local = True, +) + +def _select_config(ctx): + selected = None + selected_owner = None + root_selected = None + for mod in ctx.modules: + for tag in mod.tags.config: + is_root = hasattr(mod, "is_root") and mod.is_root + if is_root: + if root_selected != None: + fail("cpp_sdk.config may only be declared once in the root module") + root_selected = tag + continue + if selected == None: + selected = tag + selected_owner = mod.name + elif selected_owner != mod.name: + # Prefer root override. Dependency defaults are tolerated as long + # as they come from a single module. + fail("multiple dependency defaults for cpp_sdk.config without root override") + if root_selected != None: + return root_selected + return selected + +def _cpp_sdk_impl(ctx): + tag = _select_config(ctx) + if tag == None: + return + + if tag.mode == "registry": + return + + if tag.mode == "system": + _system_arrow_repository( + name = "apache_arrow_cpp", + prefix = tag.system_arrow_prefix, + include_dir = tag.system_arrow_include_dir, + shared_library = tag.system_arrow_shared_library, + runtime_glob = tag.system_arrow_runtime_glob, + ) + return + + if tag.mode != "build": + fail("unsupported cpp_sdk mode: %s" % tag.mode) + + arrow_version = _ARROW_BUILD_VERSIONS.get(tag.arrow_cpp_version) + if arrow_version == None: + fail("unsupported arrow_cpp_version for build mode: %s" % tag.arrow_cpp_version) + + http_archive( + name = "apache_arrow_cpp", + urls = arrow_version["urls"], + strip_prefix = arrow_version["strip_prefix"], + # TODO: Pin sha256/integrity once release packaging is finalized. + patch_cmds = _ARROW_PATCH_CMDS, + build_file_content = _render_arrow_build_file(tag), + ) + +cpp_sdk = module_extension( + implementation = _cpp_sdk_impl, + tag_classes = { + "config": _config_tag, + }, +) diff --git a/bindings/cpp/examples/bazel-consumer/build/BUILD.bazel b/bindings/cpp/examples/bazel-consumer/build/BUILD.bazel new file mode 100644 index 00000000..ba62b18f --- /dev/null +++ b/bindings/cpp/examples/bazel-consumer/build/BUILD.bazel @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +load("@rules_cc//cc:defs.bzl", "cc_binary") + +cc_binary( + name = "consumer_build", + srcs = ["main.cc"], + copts = ["-std=c++17"], + deps = ["@red-fluss-rust//:fluss_cpp"], +) diff --git a/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel new file mode 100644 index 00000000..4f22b219 --- /dev/null +++ b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module(name = "fluss_cpp_consumer_build") + +bazel_dep(name = "rules_cc", version = "0.2.14") +bazel_dep(name = "red-fluss-rust", version = "0.1.0") + +# Local override for repository-local validation only. +local_path_override( + module_name = "red-fluss-rust", + path = "../../../../../", +) + +fluss_cpp = use_extension("@red-fluss-rust//bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp.config( + mode = "build", + protobuf_version = "3.25.5", + arrow_cpp_version = "19.0.1", + ep_cmake_ranlib = "/usr/bin/ranlib", + ep_cmake_ar = "/usr/bin/ar", + ep_cmake_nm = "/usr/bin/nm", +) +use_repo(fluss_cpp, "apache_arrow_cpp") diff --git a/bindings/cpp/examples/bazel-consumer/build/main.cc b/bindings/cpp/examples/bazel-consumer/build/main.cc new file mode 100644 index 00000000..87e5b682 --- /dev/null +++ b/bindings/cpp/examples/bazel-consumer/build/main.cc @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "fluss.hpp" + +#include + +int main() { + fluss::TablePath table_path("demo_db", "demo_table"); + std::cout << "Bazel build-mode dependency example ready: " + << table_path.ToString() << std::endl; + return 0; +} + diff --git a/bindings/cpp/examples/bazel-consumer/system/BUILD.bazel b/bindings/cpp/examples/bazel-consumer/system/BUILD.bazel new file mode 100644 index 00000000..2cf5e820 --- /dev/null +++ b/bindings/cpp/examples/bazel-consumer/system/BUILD.bazel @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +load("@rules_cc//cc:defs.bzl", "cc_binary") + +cc_binary( + name = "consumer_system", + srcs = ["main.cc"], + copts = ["-std=c++17"], + deps = ["@red-fluss-rust//:fluss_cpp"], +) diff --git a/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel new file mode 100644 index 00000000..f8f23397 --- /dev/null +++ b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module(name = "fluss_cpp_consumer_system") + +bazel_dep(name = "rules_cc", version = "0.2.14") +bazel_dep(name = "red-fluss-rust", version = "0.1.0") + +local_path_override( + module_name = "red-fluss-rust", + path = "../../../../../", +) + +# Intended interface for preinstalled protoc + Arrow C++ environments. +fluss_cpp = use_extension("@red-fluss-rust//bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp.config( + mode = "system", + protobuf_version = "3.25.5", + arrow_cpp_version = "19.0.1", + # Adjust these paths for your environment. + # Ubuntu 22.04 (apt / custom package) commonly uses lib/x86_64-linux-gnu. + system_arrow_prefix = "/usr", + system_arrow_include_dir = "include", + system_arrow_shared_library = "lib/x86_64-linux-gnu/libarrow.so", + system_arrow_runtime_glob = "lib/x86_64-linux-gnu/libarrow.so*", +) +use_repo(fluss_cpp, "apache_arrow_cpp") diff --git a/bindings/cpp/examples/bazel-consumer/system/main.cc b/bindings/cpp/examples/bazel-consumer/system/main.cc new file mode 100644 index 00000000..b1f0b70b --- /dev/null +++ b/bindings/cpp/examples/bazel-consumer/system/main.cc @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "fluss.hpp" + +#include + +int main() { + fluss::TablePath table_path("demo_db", "demo_table"); + std::cout << "Bazel system-mode dependency example ready: " + << table_path.ToString() << std::endl; + return 0; +} diff --git a/bindings/cpp/scripts/ensure_protoc.sh b/bindings/cpp/scripts/ensure_protoc.sh new file mode 100755 index 00000000..da7adb87 --- /dev/null +++ b/bindings/cpp/scripts/ensure_protoc.sh @@ -0,0 +1,184 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euo pipefail + +PROTOBUF_BASELINE_VERSION="${PROTOBUF_BASELINE_VERSION:-3.25.5}" +PROTOC_INSTALL_ROOT="${PROTOC_INSTALL_ROOT:-/tmp/fluss-cpp-tools}" +PROTOC_OS="${PROTOC_OS:-linux}" +PROTOC_ARCH="${PROTOC_ARCH:-x86_64}" +PROTOC_FORCE_INSTALL="${PROTOC_FORCE_INSTALL:-0}" +PROTOC_PRINT_PATH_ONLY="${PROTOC_PRINT_PATH_ONLY:-0}" + +usage() { + cat <<'EOF' +Usage: bindings/cpp/scripts/ensure_protoc.sh [--print-path] + +Ensures a protoc binary matching the configured protobuf baseline is available. +Installs into a local cache directory (default: /tmp/fluss-cpp-tools) and prints +the protoc path on stdout. + +Env vars: + PROTOBUF_BASELINE_VERSION Baseline protobuf version (default: 3.25.5) + PROTOC_INSTALL_ROOT Local cache root (default: /tmp/fluss-cpp-tools) + PROTOC_OS protoc package OS (default: linux) + PROTOC_ARCH protoc package arch (default: x86_64) + PROTOC_FORCE_INSTALL 1 to force re-download + BAZEL_PROXY_URL Optional proxy (sets curl/wget proxy envs if present) +EOF +} + +for arg in "$@"; do + case "$arg" in + --print-path) + PROTOC_PRINT_PATH_ONLY=1 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $arg" >&2 + usage >&2 + exit 1 + ;; + esac +done + +setup_proxy_env() { + if [[ -n "${BAZEL_PROXY_URL:-}" ]]; then + export http_proxy="${http_proxy:-$BAZEL_PROXY_URL}" + export https_proxy="${https_proxy:-$BAZEL_PROXY_URL}" + export HTTP_PROXY="${HTTP_PROXY:-$http_proxy}" + export HTTPS_PROXY="${HTTPS_PROXY:-$https_proxy}" + fi +} + +normalize_version_for_protoc_release() { + local v="$1" + # Protobuf release packaging switched from v3.x.y to vX.Y for newer versions. + # For our current agreed baseline (3.25.5), the protoc archive/tag is 25.5. + if [[ "$v" =~ ^3\.([0-9]+\.[0-9]+)$ ]]; then + local stripped="${BASH_REMATCH[1]}" + local major="${stripped%%.*}" + if [[ "$major" -ge 21 ]]; then + echo "$stripped" + return 0 + fi + fi + echo "$v" +} + +version_matches_baseline() { + local actual="$1" + local baseline="$2" + local actual_norm baseline_norm + actual_norm="$(normalize_version_for_protoc_release "$actual")" + baseline_norm="$(normalize_version_for_protoc_release "$baseline")" + [[ "$actual" == "$baseline" || "$actual_norm" == "$baseline_norm" ]] +} + +download_file() { + local url="$1" + local out="$2" + + if command -v curl >/dev/null 2>&1; then + if [[ -n "${https_proxy:-}" || -n "${http_proxy:-}" ]]; then + curl -fLk "$url" -o "$out" + else + curl -fL "$url" -o "$out" + fi + return 0 + fi + + if command -v wget >/dev/null 2>&1; then + local wget_args=() + if [[ -n "${https_proxy:-}" || -n "${http_proxy:-}" ]]; then + wget_args+=(--no-check-certificate -e use_proxy=yes) + if [[ -n "${https_proxy:-}" ]]; then + wget_args+=(-e "https_proxy=${https_proxy}") + fi + if [[ -n "${http_proxy:-}" ]]; then + wget_args+=(-e "http_proxy=${http_proxy}") + fi + fi + wget "${wget_args[@]}" -O "$out" "$url" + return 0 + fi + + echo "ERROR: neither curl nor wget is available for downloading protoc." >&2 + return 1 +} + +ensure_zip_tools() { + command -v unzip >/dev/null 2>&1 || { + echo "ERROR: unzip not found." >&2 + exit 1 + } +} + +setup_proxy_env +ensure_zip_tools + +if command -v protoc >/dev/null 2>&1; then + existing_out="$(protoc --version 2>/dev/null || true)" + if [[ "$existing_out" =~ ([0-9]+\.[0-9]+\.[0-9]+) ]]; then + existing_ver="${BASH_REMATCH[1]}" + if version_matches_baseline "$existing_ver" "$PROTOBUF_BASELINE_VERSION"; then + command -v protoc + exit 0 + fi + fi +fi + +PROTOC_RELEASE_VERSION="$(normalize_version_for_protoc_release "$PROTOBUF_BASELINE_VERSION")" +PROTOC_ARCHIVE="protoc-${PROTOC_RELEASE_VERSION}-${PROTOC_OS}-${PROTOC_ARCH}.zip" +PROTOC_URL="https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_RELEASE_VERSION}/${PROTOC_ARCHIVE}" +PROTOC_PREFIX="${PROTOC_INSTALL_ROOT}/protoc-${PROTOC_RELEASE_VERSION}-${PROTOC_OS}-${PROTOC_ARCH}" +PROTOC_BIN="${PROTOC_PREFIX}/bin/protoc" + +if [[ "${PROTOC_FORCE_INSTALL}" != "1" && -x "${PROTOC_BIN}" ]]; then + if [[ "${PROTOC_PRINT_PATH_ONLY}" == "1" ]]; then + echo "${PROTOC_BIN}" + else + echo "${PROTOC_BIN}" + fi + exit 0 +fi + +mkdir -p "${PROTOC_INSTALL_ROOT}" +tmpdir="$(mktemp -d "${PROTOC_INSTALL_ROOT}/.protoc-download.XXXXXX")" +trap 'rm -rf "${tmpdir}"' EXIT + +archive_path="${tmpdir}/${PROTOC_ARCHIVE}" +download_file "${PROTOC_URL}" "${archive_path}" + +extract_dir="${tmpdir}/extract" +mkdir -p "${extract_dir}" +unzip -q "${archive_path}" -d "${extract_dir}" + +rm -rf "${PROTOC_PREFIX}" +mkdir -p "${PROTOC_PREFIX}" +cp -a "${extract_dir}/." "${PROTOC_PREFIX}/" +chmod +x "${PROTOC_BIN}" + +if [[ "${PROTOC_PRINT_PATH_ONLY}" == "1" ]]; then + echo "${PROTOC_BIN}" +else + echo "${PROTOC_BIN}" +fi diff --git a/docs/cpp-bazel-usage.md b/docs/cpp-bazel-usage.md new file mode 100644 index 00000000..1d58a8da --- /dev/null +++ b/docs/cpp-bazel-usage.md @@ -0,0 +1,276 @@ +# Fluss C++ Bazel Usage Guide (System / Build Modes) + +This guide is for: + +- C++ application teams consuming Fluss C++ bindings via Bazel +- Maintainers evolving the Bazel integration + +For the CMake flow with the same `system` / `build` dependency modes, see +`docs/cpp-cmake-usage.md`. + +Current simplification scope: + +- Keep only two dependency modes in the mainline guidance: + - `system` + - `build` +- Defer strict internal-registry-only module flow from the mainline path + +## Scope + +- Dependency model: **root module mode** +- Consumer dependency target: `@red-fluss-rust//:fluss_cpp` +- Build systems covered by this document: **Bazel** +- Dependency modes covered by this document: **system/build** + +Version baseline references currently used by examples: + +- `protobuf/protoc`: `3.25.5` +- `arrow-cpp`: `19.0.1` + +## Common Consumer `BUILD.bazel` + +Both modes use the same dependency target: + +```starlark +load("@rules_cc//cc:defs.bzl", "cc_binary") + +cc_binary( + name = "fluss_reader", + srcs = ["reader.cc"], + deps = ["@red-fluss-rust//:fluss_cpp"], +) +``` + +## Mode 1: `system` (Recommended in preinstalled environments) + +Use this mode when your environment already provides: + +- `protoc` +- Arrow C++ (headers + shared libraries) + +### Consumer `MODULE.bazel` (pattern) + +```starlark +module(name = "my_cpp_app") + +bazel_dep(name = "rules_cc", version = "0.2.14") +bazel_dep(name = "red-fluss-rust", version = "0.1.0") + +fluss_cpp = use_extension("@red-fluss-rust//bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp.config( + mode = "system", + protobuf_version = "3.25.5", + arrow_cpp_version = "19.0.1", + # Adjust Arrow paths for your environment + system_arrow_prefix = "/usr", + system_arrow_include_dir = "include", + system_arrow_shared_library = "lib/x86_64-linux-gnu/libarrow.so", + system_arrow_runtime_glob = "lib/x86_64-linux-gnu/libarrow.so*", +) +use_repo(fluss_cpp, "apache_arrow_cpp") +``` + +### Build and run (consumer workspace pattern) + +```bash +PROTOC_BIN="$(command -v protoc)" +CARGO_BIN="$(command -v cargo)" +bazel run \ + --action_env=PROTOC="$PROTOC_BIN" \ + --action_env=CARGO="$CARGO_BIN" \ + --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \ + //:fluss_reader +``` + +### Runnable example + +- `bindings/cpp/examples/bazel-consumer/system` + +```bash +cd bindings/cpp/examples/bazel-consumer/system +PROTOC_BIN="$(command -v protoc)" +CARGO_BIN="$(command -v cargo)" +bazel run \ + --action_env=PROTOC="$PROTOC_BIN" \ + --action_env=CARGO="$CARGO_BIN" \ + --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \ + //:consumer_system +``` + +## Mode 2: `build` (No internal registry / no preinstalled Arrow) + +Use this mode when Arrow C++ is not preinstalled and you want Bazel to +provision it from source. + +### Consumer `MODULE.bazel` (pattern) + +```starlark +module(name = "my_cpp_app") + +bazel_dep(name = "rules_cc", version = "0.2.14") +bazel_dep(name = "red-fluss-rust", version = "0.1.0") + +fluss_cpp = use_extension("@red-fluss-rust//bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp.config( + mode = "build", + protobuf_version = "3.25.5", + arrow_cpp_version = "19.0.1", +) +use_repo(fluss_cpp, "apache_arrow_cpp") +``` + +Notes: + +- `build` mode in the core Bazel integration still uses `PROTOC` (env / PATH). +- To auto-download a pinned `protoc` for `build` mode, use + `bindings/cpp/scripts/ensure_protoc.sh` and pass the result via `--action_env=PROTOC=...`. +- Some environments may require `ep_cmake_ar/ranlib/nm` overrides. + +### Build and run (consumer workspace pattern, with auto-downloaded `protoc`) + +```bash +PROTOC_BIN="$(bash bindings/cpp/scripts/ensure_protoc.sh --print-path)" +``` + +```bash +bazel run --action_env=PROTOC="$PROTOC_BIN" //:fluss_reader +``` + +If `cargo` is not on Bazel action `PATH`, also pass: + +```bash +CARGO_BIN="$(command -v cargo)" +bazel run \ + --action_env=PROTOC="$PROTOC_BIN" \ + --action_env=CARGO="$CARGO_BIN" \ + --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \ + //:fluss_reader +``` + +### Runnable example + +- `bindings/cpp/examples/bazel-consumer/build` + +```bash +cd bindings/cpp/examples/bazel-consumer/build +PROTOC_BIN="$(bash ../../../scripts/ensure_protoc.sh --print-path)" +CARGO_BIN="$(command -v cargo)" +bazel run \ + --action_env=PROTOC="$PROTOC_BIN" \ + --action_env=CARGO="$CARGO_BIN" \ + --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \ + //:consumer_build +``` + +## Local Development Override (Optional) + +For repository-local validation only: + +```starlark +local_path_override( + module_name = "red-fluss-rust", + path = "/path/to/fluss-rust", +) +``` + +Do not keep local overrides in long-lived branches. + +## Repository-local Validation (Direct Commands) + +These commands validate the repository examples directly. +If your environment requires a proxy for Bazel external downloads, export it +before running: + +```bash +export BAZEL_PROXY_URL="${BAZEL_PROXY_URL:-http://10.7.4.2:3128}" +export http_proxy="$BAZEL_PROXY_URL" +export https_proxy="$BAZEL_PROXY_URL" +export HTTP_PROXY="$http_proxy" +export HTTPS_PROXY="$https_proxy" +unset all_proxy ALL_PROXY +``` + +### Validate `build` example + +```bash +cd bindings/cpp/examples/bazel-consumer/build +PROTOC_BIN="$(bash ../../../scripts/ensure_protoc.sh --print-path)" +CARGO_BIN="$(command -v cargo)" +bazel --ignore_all_rc_files run \ + --registry=https://bcr.bazel.build \ + --lockfile_mode=off \ + --repo_env=http_proxy="${http_proxy:-}" \ + --repo_env=https_proxy="${https_proxy:-}" \ + --repo_env=HTTP_PROXY="${HTTP_PROXY:-}" \ + --repo_env=HTTPS_PROXY="${HTTPS_PROXY:-}" \ + --action_env=http_proxy="${http_proxy:-}" \ + --action_env=https_proxy="${https_proxy:-}" \ + --action_env=HTTP_PROXY="${HTTP_PROXY:-}" \ + --action_env=HTTPS_PROXY="${HTTPS_PROXY:-}" \ + --action_env=all_proxy= \ + --action_env=ALL_PROXY= \ + --action_env=PROTOC="$PROTOC_BIN" \ + --action_env=CARGO="$CARGO_BIN" \ + --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \ + --strategy=CcCmakeMakeRule=local \ + --strategy=BootstrapGNUMake=local \ + --strategy=BootstrapPkgConfig=local \ + //:consumer_build +``` + +### Validate `system` example (using a local Arrow prefix) + +The `system` example defaults to `/usr`. If your Arrow prefix is elsewhere +(for example a locally built prefix), copy the example to a temp directory and +patch `MODULE.bazel` before running: + +```bash +tmp_dir="$(mktemp -d /tmp/fluss-bazel-system-doc.XXXXXX)" +cp -a bindings/cpp/examples/bazel-consumer/system/. "$tmp_dir/" +sed -i \ + -e 's|path = "../../../../../"|path = "/home/admin/mh/fluss-r2/fluss-rust"|' \ + -e 's|system_arrow_prefix = "/usr"|system_arrow_prefix = "/tmp/fluss-system-arrow-19.0.1"|' \ + -e 's|system_arrow_shared_library = "lib/x86_64-linux-gnu/libarrow.so"|system_arrow_shared_library = "lib/libarrow.so"|' \ + -e 's|system_arrow_runtime_glob = "lib/x86_64-linux-gnu/libarrow.so\\*"|system_arrow_runtime_glob = "lib/libarrow.so*"|' \ + "$tmp_dir/MODULE.bazel" +cd "$tmp_dir" +PROTOC_BIN="$(command -v protoc)" +CARGO_BIN="$(command -v cargo)" +bazel --ignore_all_rc_files run \ + --registry=https://bcr.bazel.build \ + --lockfile_mode=off \ + --repo_env=http_proxy="${http_proxy:-}" \ + --repo_env=https_proxy="${https_proxy:-}" \ + --repo_env=HTTP_PROXY="${HTTP_PROXY:-}" \ + --repo_env=HTTPS_PROXY="${HTTPS_PROXY:-}" \ + --action_env=http_proxy="${http_proxy:-}" \ + --action_env=https_proxy="${https_proxy:-}" \ + --action_env=HTTP_PROXY="${HTTP_PROXY:-}" \ + --action_env=HTTPS_PROXY="${HTTPS_PROXY:-}" \ + --action_env=all_proxy= \ + --action_env=ALL_PROXY= \ + --action_env=PROTOC="$PROTOC_BIN" \ + --action_env=CARGO="$CARGO_BIN" \ + --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \ + //:consumer_system +``` + +## Upgrade Procedure + +1. Update `bazel_dep(name = "red-fluss-rust", version = "...")` +2. Update mode version settings if needed (`protobuf_version`, `arrow_cpp_version`) +3. Run `bazel mod tidy` +4. Commit `MODULE.bazel` and `MODULE.bazel.lock` +5. Run build + tests +6. Verify dependency graph: + +```bash +bazel mod graph | rg "red-fluss-rust@" +``` + +## Examples and Non-Mainline References + +Mainline examples: + +- `bindings/cpp/examples/bazel-consumer/build` +- `bindings/cpp/examples/bazel-consumer/system` diff --git a/docs/cpp-cmake-usage.md b/docs/cpp-cmake-usage.md new file mode 100644 index 00000000..3002d1c4 --- /dev/null +++ b/docs/cpp-cmake-usage.md @@ -0,0 +1,129 @@ +# Fluss C++ CMake Usage Guide (System / Build Modes) + +## Audience + +- C++ application teams building `bindings/cpp` with CMake +- Maintainers evolving Fluss C++ dependency provisioning + +## Scope + +- Build system covered by this document: **CMake** +- Dependency modes covered by this document: **system/build** + +Current tested baselines: + +- `protoc`: `3.25.5` +- `arrow-cpp`: `19.0.1` + +Notes: + +- CMake currently warns (does not fail) when local `protoc`/Arrow versions differ from the baselines. +- `protoc` is required because Rust `prost-build` runs during the C++ build. + +## Common Prerequisites + +- Rust toolchain (`cargo` in `PATH`, or set `CARGO=/path/to/cargo`) +- `protoc` in `PATH` (required for `system` mode; `build` mode can auto-download via `bindings/cpp/scripts/ensure_protoc.sh`) +- C++17 compiler +- CMake 3.22+ + +Examples below use `bindings/cpp` as the source directory. + +## Mode 1: `system` + +Use this mode when the environment already provides Arrow C++. + +### Configure + +```bash +cmake -S bindings/cpp -B /tmp/fluss-cpp-cmake-system \ + -DFLUSS_CPP_DEP_MODE=system \ + -DFLUSS_CPP_ARROW_SYSTEM_ROOT=/path/to/arrow/prefix +``` + +Typical prefixes: + +- Ubuntu package install: `/usr` +- Custom install prefix: `/usr/local` or `/opt/arrow` + +### Build + +```bash +cmake --build /tmp/fluss-cpp-cmake-system --target fluss_cpp -j +``` + +## Mode 2: `build` + +Use this mode when Arrow C++ is not preinstalled and CMake should fetch/build it. + +### Configure (with auto-downloaded `protoc`) + +```bash +PROTOC_BIN="$(bash bindings/cpp/scripts/ensure_protoc.sh --print-path)" +export PATH="$(dirname "$PROTOC_BIN"):$PATH" +``` + +Then configure: + +```bash +cmake -S bindings/cpp -B /tmp/fluss-cpp-cmake-build \ + -DFLUSS_CPP_DEP_MODE=build +``` + +Optional overrides: + +- `-DFLUSS_CPP_ARROW_VERSION=19.0.1` +- `-DFLUSS_CPP_ARROW_SOURCE_URL=...` (internal mirror or pinned archive) +- `-DFLUSS_CPP_PROTOBUF_VERSION=3.25.5` (baseline warning only) + +If your environment needs a proxy for CMake/FetchContent downloads, export standard proxy vars before configure/build: + +```bash +export http_proxy=http://host:port +export https_proxy=http://host:port +export HTTP_PROXY="$http_proxy" +export HTTPS_PROXY="$https_proxy" +``` + +### Build + +```bash +cmake --build /tmp/fluss-cpp-cmake-build --target fluss_cpp -j +``` + +This mode is slower on first build because it compiles Arrow C++ from source. + +## Repository-local Validation (Direct Commands) + +### Validate `system` mode + +```bash +PROTOC_BIN="$(bash bindings/cpp/scripts/ensure_protoc.sh --print-path)" +export PATH="$(dirname "$PROTOC_BIN"):$PATH" +cmake -S bindings/cpp -B /tmp/fluss-cpp-cmake-system \ + -DFLUSS_CPP_DEP_MODE=system \ + -DFLUSS_CPP_ARROW_SYSTEM_ROOT=/tmp/fluss-system-arrow-19.0.1 +cmake --build /tmp/fluss-cpp-cmake-system --target fluss_cpp -j +``` + +### Validate `build` mode + +```bash +PROTOC_BIN="$(bash bindings/cpp/scripts/ensure_protoc.sh --print-path)" +export PATH="$(dirname "$PROTOC_BIN"):$PATH" +cmake -S bindings/cpp -B /tmp/fluss-cpp-cmake-build \ + -DFLUSS_CPP_DEP_MODE=build +cmake --build /tmp/fluss-cpp-cmake-build --target fluss_cpp -j +``` + +## Troubleshooting + +- `cargo not found` + - Install Rust toolchain or set `CARGO=/path/to/cargo`. +- `protoc not found` + - Install `protoc` and ensure it is in `PATH`. + - For `build` mode, use `bindings/cpp/scripts/ensure_protoc.sh` and prepend the returned path to `PATH`. +- `arrow/c/bridge.h` not found (build mode) + - Reconfigure after updating to the latest `bindings/cpp/CMakeLists.txt`; build mode now adds Arrow source/build include dirs explicitly. +- Long first build in `build` mode + - Expected. Arrow C++ source build dominates wall time. From 6f12dfff533e7536ca058b0e2c384383dcce5ce8 Mon Sep 17 00:00:00 2001 From: zhaohaidao Date: Fri, 27 Feb 2026 04:36:17 +0000 Subject: [PATCH 2/9] docs: use fluss-cpp module name and direct bazel cpp target --- BUILD.bazel | 44 ------------------- MODULE.bazel | 5 ++- .../examples/bazel-consumer/build/BUILD.bazel | 2 +- .../bazel-consumer/build/MODULE.bazel | 6 +-- .../bazel-consumer/system/BUILD.bazel | 2 +- .../bazel-consumer/system/MODULE.bazel | 8 ++-- docs/cpp-bazel-usage.md | 22 +++++----- 7 files changed, 25 insertions(+), 64 deletions(-) delete mode 100644 BUILD.bazel diff --git a/BUILD.bazel b/BUILD.bazel deleted file mode 100644 index 1e04f817..00000000 --- a/BUILD.bazel +++ /dev/null @@ -1,44 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -package(default_visibility = ["//visibility:public"]) - -alias( - name = "fluss_cpp", - actual = "//bindings/cpp:fluss_cpp", -) - -# Keep compatibility with existing CI entrypoint that builds //:consume_table. -alias( - name = "consume_table", - actual = "//bindings/cpp:fluss_cpp_example", -) - -alias( - name = "fluss_cpp_example", - actual = "//bindings/cpp:fluss_cpp_example", -) - -alias( - name = "fluss_cpp_admin_example", - actual = "//bindings/cpp:fluss_cpp_admin_example", -) - -alias( - name = "fluss_cpp_kv_example", - actual = "//bindings/cpp:fluss_cpp_kv_example", -) diff --git a/MODULE.bazel b/MODULE.bazel index fae7d3be..e9a75a54 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -15,8 +15,11 @@ # specific language governing permissions and limitations # under the License. +# Required for root module mode (`bazel_dep(name = "fluss-cpp", ...)`). +# A root `BUILD.bazel` file is optional if consumers depend on +# `@fluss-cpp//bindings/cpp:fluss_cpp` directly. module( - name = "red-fluss-rust", + name = "fluss-cpp", version = "0.0.0", ) diff --git a/bindings/cpp/examples/bazel-consumer/build/BUILD.bazel b/bindings/cpp/examples/bazel-consumer/build/BUILD.bazel index ba62b18f..afd35edd 100644 --- a/bindings/cpp/examples/bazel-consumer/build/BUILD.bazel +++ b/bindings/cpp/examples/bazel-consumer/build/BUILD.bazel @@ -21,5 +21,5 @@ cc_binary( name = "consumer_build", srcs = ["main.cc"], copts = ["-std=c++17"], - deps = ["@red-fluss-rust//:fluss_cpp"], + deps = ["@fluss-cpp//bindings/cpp:fluss_cpp"], ) diff --git a/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel index 4f22b219..33c39c52 100644 --- a/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel +++ b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel @@ -18,15 +18,15 @@ module(name = "fluss_cpp_consumer_build") bazel_dep(name = "rules_cc", version = "0.2.14") -bazel_dep(name = "red-fluss-rust", version = "0.1.0") +bazel_dep(name = "fluss-cpp", version = "0.1.0") # Local override for repository-local validation only. local_path_override( - module_name = "red-fluss-rust", + module_name = "fluss-cpp", path = "../../../../../", ) -fluss_cpp = use_extension("@red-fluss-rust//bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp = use_extension("@fluss-cpp//bazel/cpp:deps.bzl", "cpp_sdk") fluss_cpp.config( mode = "build", protobuf_version = "3.25.5", diff --git a/bindings/cpp/examples/bazel-consumer/system/BUILD.bazel b/bindings/cpp/examples/bazel-consumer/system/BUILD.bazel index 2cf5e820..2f24e6de 100644 --- a/bindings/cpp/examples/bazel-consumer/system/BUILD.bazel +++ b/bindings/cpp/examples/bazel-consumer/system/BUILD.bazel @@ -21,5 +21,5 @@ cc_binary( name = "consumer_system", srcs = ["main.cc"], copts = ["-std=c++17"], - deps = ["@red-fluss-rust//:fluss_cpp"], + deps = ["@fluss-cpp//bindings/cpp:fluss_cpp"], ) diff --git a/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel index f8f23397..c8e2b42b 100644 --- a/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel +++ b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel @@ -18,15 +18,15 @@ module(name = "fluss_cpp_consumer_system") bazel_dep(name = "rules_cc", version = "0.2.14") -bazel_dep(name = "red-fluss-rust", version = "0.1.0") +bazel_dep(name = "fluss-cpp", version = "0.1.0") local_path_override( - module_name = "red-fluss-rust", - path = "../../../../../", + module_name = "fluss-cpp", + path = "/path/to/fluss-rust", ) # Intended interface for preinstalled protoc + Arrow C++ environments. -fluss_cpp = use_extension("@red-fluss-rust//bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp = use_extension("@fluss-cpp//bazel/cpp:deps.bzl", "cpp_sdk") fluss_cpp.config( mode = "system", protobuf_version = "3.25.5", diff --git a/docs/cpp-bazel-usage.md b/docs/cpp-bazel-usage.md index 1d58a8da..28752f33 100644 --- a/docs/cpp-bazel-usage.md +++ b/docs/cpp-bazel-usage.md @@ -18,7 +18,9 @@ Current simplification scope: ## Scope - Dependency model: **root module mode** -- Consumer dependency target: `@red-fluss-rust//:fluss_cpp` +- Consumer dependency target: `@fluss-cpp//bindings/cpp:fluss_cpp` +- Root `MODULE.bazel` is required for root module mode. +- Root `BUILD.bazel` is optional when consumers use the direct target label above. - Build systems covered by this document: **Bazel** - Dependency modes covered by this document: **system/build** @@ -37,7 +39,7 @@ load("@rules_cc//cc:defs.bzl", "cc_binary") cc_binary( name = "fluss_reader", srcs = ["reader.cc"], - deps = ["@red-fluss-rust//:fluss_cpp"], + deps = ["@fluss-cpp//bindings/cpp:fluss_cpp"], ) ``` @@ -54,9 +56,9 @@ Use this mode when your environment already provides: module(name = "my_cpp_app") bazel_dep(name = "rules_cc", version = "0.2.14") -bazel_dep(name = "red-fluss-rust", version = "0.1.0") +bazel_dep(name = "fluss-cpp", version = "0.1.0") -fluss_cpp = use_extension("@red-fluss-rust//bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp = use_extension("@fluss-cpp//bazel/cpp:deps.bzl", "cpp_sdk") fluss_cpp.config( mode = "system", protobuf_version = "3.25.5", @@ -108,9 +110,9 @@ provision it from source. module(name = "my_cpp_app") bazel_dep(name = "rules_cc", version = "0.2.14") -bazel_dep(name = "red-fluss-rust", version = "0.1.0") +bazel_dep(name = "fluss-cpp", version = "0.1.0") -fluss_cpp = use_extension("@red-fluss-rust//bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp = use_extension("@fluss-cpp//bazel/cpp:deps.bzl", "cpp_sdk") fluss_cpp.config( mode = "build", protobuf_version = "3.25.5", @@ -168,7 +170,7 @@ For repository-local validation only: ```starlark local_path_override( - module_name = "red-fluss-rust", + module_name = "fluss-cpp", path = "/path/to/fluss-rust", ) ``` @@ -228,7 +230,7 @@ patch `MODULE.bazel` before running: tmp_dir="$(mktemp -d /tmp/fluss-bazel-system-doc.XXXXXX)" cp -a bindings/cpp/examples/bazel-consumer/system/. "$tmp_dir/" sed -i \ - -e 's|path = "../../../../../"|path = "/home/admin/mh/fluss-r2/fluss-rust"|' \ + -e 's|path = "/path/to/fluss-rust"|path = "/home/admin/mh/fluss-r2/fluss-rust"|' \ -e 's|system_arrow_prefix = "/usr"|system_arrow_prefix = "/tmp/fluss-system-arrow-19.0.1"|' \ -e 's|system_arrow_shared_library = "lib/x86_64-linux-gnu/libarrow.so"|system_arrow_shared_library = "lib/libarrow.so"|' \ -e 's|system_arrow_runtime_glob = "lib/x86_64-linux-gnu/libarrow.so\\*"|system_arrow_runtime_glob = "lib/libarrow.so*"|' \ @@ -257,7 +259,7 @@ bazel --ignore_all_rc_files run \ ## Upgrade Procedure -1. Update `bazel_dep(name = "red-fluss-rust", version = "...")` +1. Update `bazel_dep(name = "fluss-cpp", version = "...")` 2. Update mode version settings if needed (`protobuf_version`, `arrow_cpp_version`) 3. Run `bazel mod tidy` 4. Commit `MODULE.bazel` and `MODULE.bazel.lock` @@ -265,7 +267,7 @@ bazel --ignore_all_rc_files run \ 6. Verify dependency graph: ```bash -bazel mod graph | rg "red-fluss-rust@" +bazel mod graph | rg "fluss-cpp@" ``` ## Examples and Non-Mainline References From 5833464eac4b0242a1689ba44970d088289bb8e3 Mon Sep 17 00:00:00 2001 From: zhaohaidao Date: Fri, 27 Feb 2026 04:36:17 +0000 Subject: [PATCH 3/9] docs: remove hardcoded local path in bazel system example --- MODULE.bazel | 2 -- docs/cpp-bazel-usage.md | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/MODULE.bazel b/MODULE.bazel index e9a75a54..5c1eee51 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -16,8 +16,6 @@ # under the License. # Required for root module mode (`bazel_dep(name = "fluss-cpp", ...)`). -# A root `BUILD.bazel` file is optional if consumers depend on -# `@fluss-cpp//bindings/cpp:fluss_cpp` directly. module( name = "fluss-cpp", version = "0.0.0", diff --git a/docs/cpp-bazel-usage.md b/docs/cpp-bazel-usage.md index 28752f33..9898cb7f 100644 --- a/docs/cpp-bazel-usage.md +++ b/docs/cpp-bazel-usage.md @@ -228,9 +228,10 @@ patch `MODULE.bazel` before running: ```bash tmp_dir="$(mktemp -d /tmp/fluss-bazel-system-doc.XXXXXX)" +FLUSS_RUST_ROOT="$(pwd)" cp -a bindings/cpp/examples/bazel-consumer/system/. "$tmp_dir/" sed -i \ - -e 's|path = "/path/to/fluss-rust"|path = "/home/admin/mh/fluss-r2/fluss-rust"|' \ + -e "s|path = \"/path/to/fluss-rust\"|path = \"$FLUSS_RUST_ROOT\"|" \ -e 's|system_arrow_prefix = "/usr"|system_arrow_prefix = "/tmp/fluss-system-arrow-19.0.1"|' \ -e 's|system_arrow_shared_library = "lib/x86_64-linux-gnu/libarrow.so"|system_arrow_shared_library = "lib/libarrow.so"|' \ -e 's|system_arrow_runtime_glob = "lib/x86_64-linux-gnu/libarrow.so\\*"|system_arrow_runtime_glob = "lib/libarrow.so*"|' \ From e4e8a5c79af29b0c2b97599dd1b02d5790992c0d Mon Sep 17 00:00:00 2001 From: zhaohaidao Date: Fri, 27 Feb 2026 04:36:17 +0000 Subject: [PATCH 4/9] address comments --- MODULE.bazel | 2 +- bazel/cpp/BUILD.bazel | 9 - bazel/cpp/deps.bzl | 290 ------------------ bindings/cpp/BUILD.bazel | 2 +- bindings/cpp/CMakeLists.txt | 13 +- bindings/cpp/bazel/cpp/deps.bzl | 5 +- .../bazel-consumer/build/MODULE.bazel | 2 +- .../bazel-consumer/system/MODULE.bazel | 2 +- bindings/cpp/scripts/ensure_protoc.sh | 75 ++++- docs/cpp-bazel-usage.md | 8 +- 10 files changed, 89 insertions(+), 319 deletions(-) delete mode 100644 bazel/cpp/BUILD.bazel delete mode 100644 bazel/cpp/deps.bzl diff --git a/MODULE.bazel b/MODULE.bazel index 5c1eee51..a35618fa 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -51,7 +51,7 @@ register_toolchains( "@rules_foreign_cc//toolchains:all", ) -cpp_sdk = use_extension("//bazel/cpp:deps.bzl", "cpp_sdk") +cpp_sdk = use_extension("//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk") # Phase 1 keeps build mode behavior while moving Arrow build details out of # MODULE.bazel. Registry/system modes will reuse the same extension entrypoint. cpp_sdk.config( diff --git a/bazel/cpp/BUILD.bazel b/bazel/cpp/BUILD.bazel deleted file mode 100644 index b1424e45..00000000 --- a/bazel/cpp/BUILD.bazel +++ /dev/null @@ -1,9 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -# Stable indirection target for the Arrow C++ dependency. The implementation -# repo name can change across modes (registry/build/system) without touching -# bindings/cpp/BUILD.bazel. -alias( - name = "arrow_cpp_dep", - actual = "@apache_arrow_cpp//:arrow_cpp", -) diff --git a/bazel/cpp/deps.bzl b/bazel/cpp/deps.bzl deleted file mode 100644 index 4abd5919..00000000 --- a/bazel/cpp/deps.bzl +++ /dev/null @@ -1,290 +0,0 @@ -"""Bzlmod extension for fluss C++ SDK dependency provisioning.""" - -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - -_ARROW_BUILD_FILE_TEMPLATE = """ -load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake") - -package(default_visibility = ["//visibility:public"]) - -filegroup( - name = "all_srcs", - srcs = glob( - ["**"], - exclude = [ - "**/BUILD", - "**/BUILD.bazel", - ], - ), -) - -cmake( - name = "arrow_cpp", - lib_source = ":all_srcs", - working_directory = "cpp", - generate_args = ["-GUnix Makefiles"], - cache_entries = { - "CMAKE_BUILD_TYPE": "Release", - "CMAKE_INSTALL_LIBDIR": "lib", - "CMAKE_POSITION_INDEPENDENT_CODE": "ON", - "ARROW_BUILD_SHARED": "ON", - "ARROW_BUILD_STATIC": "OFF", - "ARROW_BUILD_TESTS": "OFF", - "ARROW_BUILD_EXAMPLES": "OFF", - "ARROW_BUILD_BENCHMARKS": "OFF", - "ARROW_BUILD_INTEGRATION": "OFF", - "ARROW_BUILD_UTILITIES": "OFF", - "ARROW_COMPUTE": "OFF", - "ARROW_CSV": "OFF", - "ARROW_DATASET": "OFF", - "ARROW_FILESYSTEM": "OFF", - "ARROW_JSON": "OFF", - "ARROW_PARQUET": "OFF", - "ARROW_IPC": "ON", - "ARROW_DEPENDENCY_SOURCE": "BUNDLED", - # Temporary workarounds for older images / Bazel sandbox toolchain detection. - "EP_CMAKE_RANLIB": "__EP_CMAKE_RANLIB__", - "EP_CMAKE_AR": "__EP_CMAKE_AR__", - "EP_CMAKE_NM": "__EP_CMAKE_NM__", - }, - out_include_dir = "include", - out_lib_dir = "lib", - out_shared_libs = select({ - "@platforms//os:macos": ["libarrow.dylib"], - "//conditions:default": [ - "libarrow.so", - "libarrow.so.1900", - "libarrow.so.1900.1.0", - ], - }), -) -""" - -_ARROW_PATCH_CMDS = [ - "sed -i 's|#define ARROW_CXX_COMPILER_FLAGS \"@CMAKE_CXX_FLAGS@\"|#define ARROW_CXX_COMPILER_FLAGS \"\"|' cpp/src/arrow/util/config.h.cmake", -] - -_SYSTEM_ARROW_BUILD_FILE_TEMPLATE = """ -load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library") - -package(default_visibility = ["//visibility:public"]) - -cc_import( - name = "arrow_shared_import", - shared_library = "__SYSTEM_ARROW_SHARED_LIBRARY__", -) - -filegroup( - name = "arrow_runtime_libs", - srcs = [ -__SYSTEM_ARROW_RUNTIME_SRCS__ - ], -) - -cc_library( - name = "arrow_cpp", - hdrs = [ -__SYSTEM_ARROW_HDRS__ - ], - includes = ["__SYSTEM_ARROW_INCLUDE_DIR__"], - data = [":arrow_runtime_libs"], - deps = [":arrow_shared_import"], -) -""" - -_ARROW_BUILD_VERSIONS = { - "19.0.1": { - "urls": ["https://github.com/apache/arrow/archive/refs/tags/apache-arrow-19.0.1.tar.gz"], - "strip_prefix": "arrow-apache-arrow-19.0.1", - }, -} - -_config_tag = tag_class(attrs = { - "mode": attr.string(default = "build"), - "arrow_cpp_version": attr.string(default = "19.0.1"), - "protobuf_version": attr.string(default = "3.25.5"), - "ep_cmake_ranlib": attr.string(default = "ranlib"), - "ep_cmake_ar": attr.string(default = "ar"), - "ep_cmake_nm": attr.string(default = "nm"), - "system_arrow_prefix": attr.string(default = "/usr"), - "system_arrow_include_dir": attr.string(default = "include"), - "system_arrow_shared_library": attr.string(default = "lib/x86_64-linux-gnu/libarrow.so"), - "system_arrow_runtime_glob": attr.string(default = "lib/x86_64-linux-gnu/libarrow.so*"), -}) - -def _render_arrow_build_file(tag): - return _ARROW_BUILD_FILE_TEMPLATE.replace( - "__EP_CMAKE_RANLIB__", - tag.ep_cmake_ranlib, - ).replace( - "__EP_CMAKE_AR__", - tag.ep_cmake_ar, - ).replace( - "__EP_CMAKE_NM__", - tag.ep_cmake_nm, - ) - -def _render_system_arrow_build_file(tag, shared_library_override = None): - shared_library = shared_library_override if shared_library_override else (tag.system_arrow_shared_library if hasattr(tag, "system_arrow_shared_library") else tag.shared_library) - include_dir = tag.system_arrow_include_dir if hasattr(tag, "system_arrow_include_dir") else tag.include_dir - return _SYSTEM_ARROW_BUILD_FILE_TEMPLATE.replace( - "__SYSTEM_ARROW_SHARED_LIBRARY__", - "sysroot/" + shared_library, - ).replace( - "__SYSTEM_ARROW_INCLUDE_DIR__", - "sysroot/" + include_dir, - ) - -def _starlark_string_list(items): - if not items: - return "" - return "\n".join([' "%s",' % i for i in items]) - -def _list_files(repo_ctx, base_dir, suffixes): - result = repo_ctx.execute([ - "/usr/bin/find", - base_dir, - "-type", - "f", - ]) - if result.return_code != 0: - fail("failed to enumerate files under %s: %s" % (base_dir, result.stderr)) - files = [] - for line in result.stdout.splitlines(): - for suffix in suffixes: - if line.endswith(suffix): - files.append(line) - break - return sorted(files) - -def _system_arrow_repo_impl(repo_ctx): - prefix = repo_ctx.attr.prefix.rstrip("/") - include_dir = repo_ctx.attr.include_dir - shared_library = repo_ctx.attr.shared_library - runtime_glob = repo_ctx.attr.runtime_glob - - repo_ctx.execute(["/bin/mkdir", "-p", "sysroot"]) - copy_res = repo_ctx.execute(["/bin/cp", "-a", prefix + "/.", "sysroot"]) - if copy_res.return_code != 0: - fail("failed to copy system arrow prefix %s: %s" % (prefix, copy_res.stderr)) - - header_root = prefix + "/" + include_dir - headers = _list_files(repo_ctx, header_root, [".h", ".hpp"]) - header_srcs = [] - for h in headers: - if not h.startswith(prefix + "/"): - fail("header path %s is outside prefix %s" % (h, prefix)) - header_srcs.append("sysroot/" + h[len(prefix) + 1:]) - - runtime_dir = runtime_glob.rsplit("/", 1)[0] - runtime_prefix = runtime_glob.rsplit("/", 1)[1].replace("*", "") - runtime_files = _list_files(repo_ctx, prefix + "/" + runtime_dir, [""]) - runtime_srcs = [] - for f in runtime_files: - rel = f[len(prefix) + 1:] if f.startswith(prefix + "/") else None - if rel == None: - continue - if rel.startswith(runtime_dir + "/") and rel.rsplit("/", 1)[1].startswith(runtime_prefix): - runtime_srcs.append("sysroot/" + rel) - runtime_srcs = sorted(runtime_srcs) - - # Prefer a versioned soname file as the imported shared library so Bazel - # runfiles contain the exact filename required by the runtime loader. - shared_import_rel = "sysroot/" + shared_library - shared_basename = shared_library.rsplit("/", 1)[1] - soname_candidates = [] - for rel in runtime_srcs: - base = rel.rsplit("/", 1)[1] - if base == shared_basename: - continue - if base.startswith(shared_basename + "."): - soname_candidates.append(rel) - if soname_candidates: - # Prefer shortest suffix first (e.g. libarrow.so.1900 before - # libarrow.so.1900.1.0) to match ELF SONAME naming when available. - soname_candidates = sorted(soname_candidates, key = lambda s: (len(s), s)) - shared_import_rel = soname_candidates[0] - - build_file = _render_system_arrow_build_file(repo_ctx.attr, shared_library_override = shared_import_rel[len("sysroot/"):]).replace( - "__SYSTEM_ARROW_HDRS__", - _starlark_string_list(header_srcs), - ).replace( - "__SYSTEM_ARROW_RUNTIME_SRCS__", - _starlark_string_list(runtime_srcs), - ) - repo_ctx.file("BUILD.bazel", build_file) - -_system_arrow_repository = repository_rule( - implementation = _system_arrow_repo_impl, - attrs = { - "prefix": attr.string(mandatory = True), - "include_dir": attr.string(mandatory = True), - "shared_library": attr.string(mandatory = True), - "runtime_glob": attr.string(mandatory = True), - }, - local = True, -) - -def _select_config(ctx): - selected = None - selected_owner = None - root_selected = None - for mod in ctx.modules: - for tag in mod.tags.config: - is_root = hasattr(mod, "is_root") and mod.is_root - if is_root: - if root_selected != None: - fail("cpp_sdk.config may only be declared once in the root module") - root_selected = tag - continue - if selected == None: - selected = tag - selected_owner = mod.name - elif selected_owner != mod.name: - # Prefer root override. Dependency defaults are tolerated as long - # as they come from a single module. - fail("multiple dependency defaults for cpp_sdk.config without root override") - if root_selected != None: - return root_selected - return selected - -def _cpp_sdk_impl(ctx): - tag = _select_config(ctx) - if tag == None: - return - - if tag.mode == "registry": - return - - if tag.mode == "system": - _system_arrow_repository( - name = "apache_arrow_cpp", - prefix = tag.system_arrow_prefix, - include_dir = tag.system_arrow_include_dir, - shared_library = tag.system_arrow_shared_library, - runtime_glob = tag.system_arrow_runtime_glob, - ) - return - - if tag.mode != "build": - fail("unsupported cpp_sdk mode: %s" % tag.mode) - - arrow_version = _ARROW_BUILD_VERSIONS.get(tag.arrow_cpp_version) - if arrow_version == None: - fail("unsupported arrow_cpp_version for build mode: %s" % tag.arrow_cpp_version) - - http_archive( - name = "apache_arrow_cpp", - urls = arrow_version["urls"], - strip_prefix = arrow_version["strip_prefix"], - # TODO: Pin sha256/integrity once release packaging is finalized. - patch_cmds = _ARROW_PATCH_CMDS, - build_file_content = _render_arrow_build_file(tag), - ) - -cpp_sdk = module_extension( - implementation = _cpp_sdk_impl, - tag_classes = { - "config": _config_tag, - }, -) diff --git a/bindings/cpp/BUILD.bazel b/bindings/cpp/BUILD.bazel index 8f845ab3..795643b7 100644 --- a/bindings/cpp/BUILD.bazel +++ b/bindings/cpp/BUILD.bazel @@ -331,7 +331,7 @@ cc_library( }), deps = [ ":rust_lib", - "//bazel/cpp:arrow_cpp_dep", + "//bindings/cpp/bazel/cpp:arrow_cpp_dep", ], visibility = ["//visibility:public"], ) diff --git a/bindings/cpp/CMakeLists.txt b/bindings/cpp/CMakeLists.txt index 66b09808..cafa4481 100644 --- a/bindings/cpp/CMakeLists.txt +++ b/bindings/cpp/CMakeLists.txt @@ -36,6 +36,10 @@ set(FLUSS_CPP_ARROW_SOURCE_URL "https://github.com/apache/arrow/archive/refs/tags/apache-arrow-19.0.1.tar.gz" CACHE STRING "Arrow source archive URL used in build mode") +set(FLUSS_CPP_ARROW_SOURCE_SHA256 + "4c898504958841cc86b6f8710ecb2919f96b5e10fa8989ac10ac4fca8362d86a" + CACHE STRING + "SHA256 for the Arrow source archive used in build mode") find_package(Threads REQUIRED) @@ -138,6 +142,7 @@ else() FetchContent_Declare( apache_arrow_src URL ${FLUSS_CPP_ARROW_SOURCE_URL} + URL_HASH SHA256=${FLUSS_CPP_ARROW_SOURCE_SHA256} SOURCE_SUBDIR cpp ) FetchContent_MakeAvailable(apache_arrow_src) @@ -155,14 +160,16 @@ endif() # Get cargo target dir execute_process(COMMAND ${FLUSS_CARGO_EXECUTABLE} locate-project --workspace --message-format plain - OUTPUT_VARIABLE CARGO_TARGET_DIR + OUTPUT_VARIABLE CARGO_MANIFEST_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) -if (NOT CARGO_TARGET_DIR) +if (NOT CARGO_MANIFEST_PATH) message(FATAL_ERROR "Failed to resolve Cargo workspace target dir via '${FLUSS_CARGO_EXECUTABLE} locate-project'. " "Check Rust toolchain installation and PATH/CARGO.") endif() -string(REGEX REPLACE "/Cargo.toml\n$" "/target" CARGO_TARGET_DIR "${CARGO_TARGET_DIR}") +get_filename_component(CARGO_WORKSPACE_DIR "${CARGO_MANIFEST_PATH}" DIRECTORY) +set(CARGO_TARGET_DIR "${CARGO_WORKSPACE_DIR}/target") set(CARGO_MANIFEST ${PROJECT_SOURCE_DIR}/Cargo.toml) set(RUST_SOURCE_FILE ${PROJECT_SOURCE_DIR}/src/lib.rs) diff --git a/bindings/cpp/bazel/cpp/deps.bzl b/bindings/cpp/bazel/cpp/deps.bzl index 4abd5919..62059530 100644 --- a/bindings/cpp/bazel/cpp/deps.bzl +++ b/bindings/cpp/bazel/cpp/deps.bzl @@ -61,7 +61,7 @@ cmake( """ _ARROW_PATCH_CMDS = [ - "sed -i 's|#define ARROW_CXX_COMPILER_FLAGS \"@CMAKE_CXX_FLAGS@\"|#define ARROW_CXX_COMPILER_FLAGS \"\"|' cpp/src/arrow/util/config.h.cmake", + "sed -i.bak 's|#define ARROW_CXX_COMPILER_FLAGS \"@CMAKE_CXX_FLAGS@\"|#define ARROW_CXX_COMPILER_FLAGS \"\"|' cpp/src/arrow/util/config.h.cmake && rm -f cpp/src/arrow/util/config.h.cmake.bak", ] _SYSTEM_ARROW_BUILD_FILE_TEMPLATE = """ @@ -96,6 +96,7 @@ _ARROW_BUILD_VERSIONS = { "19.0.1": { "urls": ["https://github.com/apache/arrow/archive/refs/tags/apache-arrow-19.0.1.tar.gz"], "strip_prefix": "arrow-apache-arrow-19.0.1", + "integrity": "sha256-TImFBJWIQcyGtvhxDsspGflrXhD6iYmsEKxPyoNi2Go=", }, } @@ -277,7 +278,7 @@ def _cpp_sdk_impl(ctx): name = "apache_arrow_cpp", urls = arrow_version["urls"], strip_prefix = arrow_version["strip_prefix"], - # TODO: Pin sha256/integrity once release packaging is finalized. + integrity = arrow_version["integrity"], patch_cmds = _ARROW_PATCH_CMDS, build_file_content = _render_arrow_build_file(tag), ) diff --git a/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel index 33c39c52..e3ac1a53 100644 --- a/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel +++ b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel @@ -26,7 +26,7 @@ local_path_override( path = "../../../../../", ) -fluss_cpp = use_extension("@fluss-cpp//bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk") fluss_cpp.config( mode = "build", protobuf_version = "3.25.5", diff --git a/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel index c8e2b42b..2f411863 100644 --- a/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel +++ b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel @@ -26,7 +26,7 @@ local_path_override( ) # Intended interface for preinstalled protoc + Arrow C++ environments. -fluss_cpp = use_extension("@fluss-cpp//bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk") fluss_cpp.config( mode = "system", protobuf_version = "3.25.5", diff --git a/bindings/cpp/scripts/ensure_protoc.sh b/bindings/cpp/scripts/ensure_protoc.sh index da7adb87..ae5bc20a 100755 --- a/bindings/cpp/scripts/ensure_protoc.sh +++ b/bindings/cpp/scripts/ensure_protoc.sh @@ -19,26 +19,40 @@ set -euo pipefail PROTOBUF_BASELINE_VERSION="${PROTOBUF_BASELINE_VERSION:-3.25.5}" -PROTOC_INSTALL_ROOT="${PROTOC_INSTALL_ROOT:-/tmp/fluss-cpp-tools}" +if [[ -n "${XDG_CACHE_HOME:-}" ]]; then + _PROTOC_DEFAULT_CACHE_BASE="${XDG_CACHE_HOME}" +elif [[ -n "${HOME:-}" ]]; then + _PROTOC_DEFAULT_CACHE_BASE="${HOME}/.cache" +else + _PROTOC_DEFAULT_CACHE_BASE="/tmp" +fi +PROTOC_INSTALL_ROOT="${PROTOC_INSTALL_ROOT:-${_PROTOC_DEFAULT_CACHE_BASE}/fluss-cpp-tools}" PROTOC_OS="${PROTOC_OS:-linux}" PROTOC_ARCH="${PROTOC_ARCH:-x86_64}" PROTOC_FORCE_INSTALL="${PROTOC_FORCE_INSTALL:-0}" PROTOC_PRINT_PATH_ONLY="${PROTOC_PRINT_PATH_ONLY:-0}" +PROTOC_ALLOW_INSECURE_DOWNLOAD="${PROTOC_ALLOW_INSECURE_DOWNLOAD:-0}" +PROTOC_SKIP_CHECKSUM_VERIFY="${PROTOC_SKIP_CHECKSUM_VERIFY:-0}" usage() { cat <<'EOF' Usage: bindings/cpp/scripts/ensure_protoc.sh [--print-path] Ensures a protoc binary matching the configured protobuf baseline is available. -Installs into a local cache directory (default: /tmp/fluss-cpp-tools) and prints +Installs into a local cache directory (default: \$XDG_CACHE_HOME/fluss-cpp-tools or +\$HOME/.cache/fluss-cpp-tools) and prints the protoc path on stdout. Env vars: PROTOBUF_BASELINE_VERSION Baseline protobuf version (default: 3.25.5) - PROTOC_INSTALL_ROOT Local cache root (default: /tmp/fluss-cpp-tools) + PROTOC_INSTALL_ROOT Local cache root (default: XDG/HOME cache dir) PROTOC_OS protoc package OS (default: linux) PROTOC_ARCH protoc package arch (default: x86_64) PROTOC_FORCE_INSTALL 1 to force re-download + PROTOC_ALLOW_INSECURE_DOWNLOAD + 1 to disable TLS verification (not recommended) + PROTOC_SKIP_CHECKSUM_VERIFY + 1 to skip pinned archive checksum verification BAZEL_PROXY_URL Optional proxy (sets curl/wget proxy envs if present) EOF } @@ -93,23 +107,57 @@ version_matches_baseline() { [[ "$actual" == "$baseline" || "$actual_norm" == "$baseline_norm" ]] } +lookup_protoc_archive_sha256() { + local release_version="$1" + local os="$2" + local arch="$3" + case "${release_version}:${os}:${arch}" in + 25.5:linux:x86_64) + echo "e1ed237a17b2e851cf9662cb5ad02b46e70ff8e060e05984725bc4b4228c6b28" + ;; + *) + return 1 + ;; + esac +} + +verify_download_sha256() { + local file="$1" + local expected="$2" + local actual="" + if command -v sha256sum >/dev/null 2>&1; then + actual="$(sha256sum "$file" | awk '{print $1}')" + elif command -v shasum >/dev/null 2>&1; then + actual="$(shasum -a 256 "$file" | awk '{print $1}')" + else + echo "ERROR: neither sha256sum nor shasum is available for checksum verification." >&2 + return 1 + fi + if [[ "$actual" != "$expected" ]]; then + echo "ERROR: protoc archive checksum mismatch." >&2 + echo " expected: $expected" >&2 + echo " actual: $actual" >&2 + return 1 + fi +} + download_file() { local url="$1" local out="$2" if command -v curl >/dev/null 2>&1; then - if [[ -n "${https_proxy:-}" || -n "${http_proxy:-}" ]]; then - curl -fLk "$url" -o "$out" - else - curl -fL "$url" -o "$out" + local curl_args=(-fL) + if [[ "${PROTOC_ALLOW_INSECURE_DOWNLOAD}" == "1" ]]; then + curl_args+=(-k) fi + curl "${curl_args[@]}" "$url" -o "$out" return 0 fi if command -v wget >/dev/null 2>&1; then local wget_args=() if [[ -n "${https_proxy:-}" || -n "${http_proxy:-}" ]]; then - wget_args+=(--no-check-certificate -e use_proxy=yes) + wget_args+=(-e use_proxy=yes) if [[ -n "${https_proxy:-}" ]]; then wget_args+=(-e "https_proxy=${https_proxy}") fi @@ -117,6 +165,9 @@ download_file() { wget_args+=(-e "http_proxy=${http_proxy}") fi fi + if [[ "${PROTOC_ALLOW_INSECURE_DOWNLOAD}" == "1" ]]; then + wget_args+=(--no-check-certificate) + fi wget "${wget_args[@]}" -O "$out" "$url" return 0 fi @@ -167,6 +218,14 @@ trap 'rm -rf "${tmpdir}"' EXIT archive_path="${tmpdir}/${PROTOC_ARCHIVE}" download_file "${PROTOC_URL}" "${archive_path}" +if [[ "${PROTOC_SKIP_CHECKSUM_VERIFY}" != "1" ]]; then + if expected_sha256="$(lookup_protoc_archive_sha256 "${PROTOC_RELEASE_VERSION}" "${PROTOC_OS}" "${PROTOC_ARCH}")"; then + verify_download_sha256 "${archive_path}" "${expected_sha256}" + else + echo "ERROR: no pinned checksum for protoc archive ${PROTOC_ARCHIVE}. Set PROTOC_SKIP_CHECKSUM_VERIFY=1 to bypass." >&2 + exit 1 + fi +fi extract_dir="${tmpdir}/extract" mkdir -p "${extract_dir}" diff --git a/docs/cpp-bazel-usage.md b/docs/cpp-bazel-usage.md index 9898cb7f..e9e34f21 100644 --- a/docs/cpp-bazel-usage.md +++ b/docs/cpp-bazel-usage.md @@ -58,7 +58,7 @@ module(name = "my_cpp_app") bazel_dep(name = "rules_cc", version = "0.2.14") bazel_dep(name = "fluss-cpp", version = "0.1.0") -fluss_cpp = use_extension("@fluss-cpp//bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk") fluss_cpp.config( mode = "system", protobuf_version = "3.25.5", @@ -112,7 +112,7 @@ module(name = "my_cpp_app") bazel_dep(name = "rules_cc", version = "0.2.14") bazel_dep(name = "fluss-cpp", version = "0.1.0") -fluss_cpp = use_extension("@fluss-cpp//bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk") fluss_cpp.config( mode = "build", protobuf_version = "3.25.5", @@ -184,7 +184,7 @@ If your environment requires a proxy for Bazel external downloads, export it before running: ```bash -export BAZEL_PROXY_URL="${BAZEL_PROXY_URL:-http://10.7.4.2:3128}" +export BAZEL_PROXY_URL="http://proxy.example.com:3128" export http_proxy="$BAZEL_PROXY_URL" export https_proxy="$BAZEL_PROXY_URL" export HTTP_PROXY="$http_proxy" @@ -258,6 +258,8 @@ bazel --ignore_all_rc_files run \ //:consumer_system ``` +On macOS (BSD `sed`), replace `sed -i` with `sed -i ''` in the patch step above. + ## Upgrade Procedure 1. Update `bazel_dep(name = "fluss-cpp", version = "...")` From 345b64e7a1e57afd40136f53751c75c06eeb4df2 Mon Sep 17 00:00:00 2001 From: zhaohaidao Date: Fri, 27 Feb 2026 04:36:17 +0000 Subject: [PATCH 5/9] address comments --- MODULE.bazel | 12 ++-- bindings/cpp/MODULE.bazel | 60 ------------------- .../bazel-consumer/build/MODULE.bazel | 2 +- .../bazel-consumer/system/MODULE.bazel | 2 +- docs/cpp-bazel-usage.md | 11 ++-- 5 files changed, 15 insertions(+), 72 deletions(-) delete mode 100644 bindings/cpp/MODULE.bazel diff --git a/MODULE.bazel b/MODULE.bazel index a35618fa..f0e60250 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -15,19 +15,21 @@ # specific language governing permissions and limitations # under the License. -# Required for root module mode (`bazel_dep(name = "fluss-cpp", ...)`). +# Required at repository root for root module mode (`bazel_dep(name = "fluss-cpp", ...)`). +# Consumer examples use `local_path_override(..., path = "/path/to/fluss-rust")`, so +# Bazel resolves the module from the repository root. This also matches the Rust +# workspace layout used by `bindings/cpp` during cargo-based Bazel/CMake builds. +# `0.0.0` is a local-development placeholder in this repository branch. +# Consumers should depend on a published release version. module( name = "fluss-cpp", version = "0.0.0", ) -# NOTE: Keep this dependency block in sync with bindings/cpp/MODULE.bazel. -# SYNC_START bindings/cpp/MODULE.bazel bazel_dep(name = "rules_cc", version = "0.0.17") bazel_dep(name = "platforms", version = "0.0.10") bazel_dep(name = "rules_foreign_cc", version = "0.15.1") bazel_dep(name = "rules_python", version = "1.2.0") -# SYNC_END bindings/cpp/MODULE.bazel python = use_extension("@rules_python//python/extensions:python.bzl", "python") python.toolchain(python_version = "3.12") @@ -52,8 +54,6 @@ register_toolchains( ) cpp_sdk = use_extension("//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk") -# Phase 1 keeps build mode behavior while moving Arrow build details out of -# MODULE.bazel. Registry/system modes will reuse the same extension entrypoint. cpp_sdk.config( mode = "build", arrow_cpp_version = "19.0.1", diff --git a/bindings/cpp/MODULE.bazel b/bindings/cpp/MODULE.bazel deleted file mode 100644 index 9771774c..00000000 --- a/bindings/cpp/MODULE.bazel +++ /dev/null @@ -1,60 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -module( - name = "fluss_cpp", -) - -bazel_dep(name = "rules_cc", version = "0.0.17") -bazel_dep(name = "platforms", version = "0.0.10") -bazel_dep(name = "rules_foreign_cc", version = "0.15.1") -bazel_dep(name = "rules_python", version = "1.2.0") - -python = use_extension("@rules_python//python/extensions:python.bzl", "python") -python.toolchain(python_version = "3.12") -use_repo(python, "python_3_12") - -foreign_cc_tools = use_extension("@rules_foreign_cc//foreign_cc:extensions.bzl", "tools") -use_repo( - foreign_cc_tools, - "cmake_3.31.8_toolchains", - "cmake_src", - "ninja_1.13.0_toolchains", - "ninja_build_src", - "rules_foreign_cc_framework_toolchains", -) - -register_toolchains( - "@rules_foreign_cc_framework_toolchains//:all", - "@cmake_3.31.8_toolchains//:all", - "@ninja_1.13.0_toolchains//:all", - "@python_3_12//:all", - "@rules_foreign_cc//toolchains:all", -) - -cpp_sdk = use_extension("//bazel/cpp:deps.bzl", "cpp_sdk") -# Phase 1 keeps build mode behavior while moving Arrow build details out of -# MODULE.bazel. Registry/system modes will reuse the same extension entrypoint. -cpp_sdk.config( - mode = "build", - arrow_cpp_version = "19.0.1", - protobuf_version = "3.25.5", - ep_cmake_ranlib = "/usr/bin/ranlib", - ep_cmake_ar = "/usr/bin/ar", - ep_cmake_nm = "/usr/bin/nm", -) -use_repo(cpp_sdk, "apache_arrow_cpp") diff --git a/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel index e3ac1a53..4fad1ffa 100644 --- a/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel +++ b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel @@ -18,7 +18,7 @@ module(name = "fluss_cpp_consumer_build") bazel_dep(name = "rules_cc", version = "0.2.14") -bazel_dep(name = "fluss-cpp", version = "0.1.0") +bazel_dep(name = "fluss-cpp", version = "0.0.0") # Local override for repository-local validation only. local_path_override( diff --git a/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel index 2f411863..9d38a593 100644 --- a/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel +++ b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel @@ -18,7 +18,7 @@ module(name = "fluss_cpp_consumer_system") bazel_dep(name = "rules_cc", version = "0.2.14") -bazel_dep(name = "fluss-cpp", version = "0.1.0") +bazel_dep(name = "fluss-cpp", version = "0.0.0") local_path_override( module_name = "fluss-cpp", diff --git a/docs/cpp-bazel-usage.md b/docs/cpp-bazel-usage.md index e9e34f21..aeb0d35f 100644 --- a/docs/cpp-bazel-usage.md +++ b/docs/cpp-bazel-usage.md @@ -20,7 +20,6 @@ Current simplification scope: - Dependency model: **root module mode** - Consumer dependency target: `@fluss-cpp//bindings/cpp:fluss_cpp` - Root `MODULE.bazel` is required for root module mode. -- Root `BUILD.bazel` is optional when consumers use the direct target label above. - Build systems covered by this document: **Bazel** - Dependency modes covered by this document: **system/build** @@ -56,7 +55,7 @@ Use this mode when your environment already provides: module(name = "my_cpp_app") bazel_dep(name = "rules_cc", version = "0.2.14") -bazel_dep(name = "fluss-cpp", version = "0.1.0") +bazel_dep(name = "fluss-cpp", version = "") fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk") fluss_cpp.config( @@ -110,7 +109,7 @@ provision it from source. module(name = "my_cpp_app") bazel_dep(name = "rules_cc", version = "0.2.14") -bazel_dep(name = "fluss-cpp", version = "0.1.0") +bazel_dep(name = "fluss-cpp", version = "") fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk") fluss_cpp.config( @@ -177,11 +176,15 @@ local_path_override( Do not keep local overrides in long-lived branches. +Repository-local examples in this repo use `version = "0.0.0"` together with +`local_path_override(...)` because the root `MODULE.bazel` in this branch is not +a published release module. + ## Repository-local Validation (Direct Commands) These commands validate the repository examples directly. If your environment requires a proxy for Bazel external downloads, export it -before running: +before running (replace the placeholder URL with your actual proxy): ```bash export BAZEL_PROXY_URL="http://proxy.example.com:3128" From 439ffe1b9467e90f97578084acaa138e1e2bfcbe Mon Sep 17 00:00:00 2001 From: zhaohaidao Date: Fri, 27 Feb 2026 04:36:17 +0000 Subject: [PATCH 6/9] address comments --- bindings/cpp/bazel/cpp/BUILD.bazel | 18 +++++++++++++++++- bindings/cpp/bazel/cpp/deps.bzl | 17 +++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/bindings/cpp/bazel/cpp/BUILD.bazel b/bindings/cpp/bazel/cpp/BUILD.bazel index ee7150c5..e4b730dc 100644 --- a/bindings/cpp/bazel/cpp/BUILD.bazel +++ b/bindings/cpp/bazel/cpp/BUILD.bazel @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + package(default_visibility = ["//visibility:public"]) # Stable indirection target for the Arrow C++ dependency. The implementation @@ -7,4 +24,3 @@ alias( name = "arrow_cpp_dep", actual = "@apache_arrow_cpp//:arrow_cpp", ) - diff --git a/bindings/cpp/bazel/cpp/deps.bzl b/bindings/cpp/bazel/cpp/deps.bzl index 62059530..447b05a1 100644 --- a/bindings/cpp/bazel/cpp/deps.bzl +++ b/bindings/cpp/bazel/cpp/deps.bzl @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + """Bzlmod extension for fluss C++ SDK dependency provisioning.""" load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") From 72bf7af452c7fa5f97c90452e4338e94074477e1 Mon Sep 17 00:00:00 2001 From: zhaohaidao Date: Fri, 27 Feb 2026 04:36:17 +0000 Subject: [PATCH 7/9] address comments --- bindings/cpp/.gitignore | 3 ++ bindings/cpp/BUILD.bazel | 66 +++++++++++++-------------- bindings/cpp/bazel/cpp/deps.bzl | 9 +++- bindings/cpp/scripts/ensure_protoc.sh | 6 +-- 4 files changed, 45 insertions(+), 39 deletions(-) diff --git a/bindings/cpp/.gitignore b/bindings/cpp/.gitignore index 4d5db858..1f1632b9 100644 --- a/bindings/cpp/.gitignore +++ b/bindings/cpp/.gitignore @@ -19,6 +19,9 @@ MODULE.bazel.lock # Keep versioned Bazel consumer examples (name starts with bazel-). !examples/bazel-consumer/ !examples/bazel-consumer/** +# `build/` is ignored globally above; keep this fixture path visible. +!examples/bazel-consumer/build/ +!examples/bazel-consumer/build/** examples/bazel-consumer/**/MODULE.bazel.lock examples/bazel-consumer/**/bazel-* examples/bazel-consumer/**/tmp.log diff --git a/bindings/cpp/BUILD.bazel b/bindings/cpp/BUILD.bazel index 795643b7..d247baf1 100644 --- a/bindings/cpp/BUILD.bazel +++ b/bindings/cpp/BUILD.bazel @@ -34,21 +34,21 @@ config_setting( values = {"compilation_mode": "opt"}, ) -genrule( - name = "cargo_build_debug", - srcs = glob([ - "src/**/*.rs", - "Cargo.toml", - ]), - outs = [ - "rust_lib_debug.a", - "rust_bridge_cc_debug.cc", - "rust_bridge_h_debug.h", - "src/lib.rs_debug.h", - "cxxbridge/rust/cxx_debug.h", - ], - cmd = """ +_PROTOC_SETUP_SNIPPET = """ set -e + if [ -n "$${CARGO:-}" ]; then + if [ ! -x "$$CARGO" ]; then + echo "Error: CARGO is set but not executable: $$CARGO" >&2 + exit 1 + fi + CARGO_BIN="$$CARGO" + else + CARGO_BIN=$$(command -v cargo || true) + if [ -z "$$CARGO_BIN" ]; then + echo "Error: cargo not found in PATH and CARGO is not set" >&2 + exit 1 + fi + fi if [ -n "$${PROTOC:-}" ]; then if [ ! -x "$$PROTOC" ]; then echo "Error: PROTOC is set but not executable: $$PROTOC" >&2 @@ -63,6 +63,22 @@ genrule( fi export PROTOC="$$PROTOC_BIN" fi +""" + +genrule( + name = "cargo_build_debug", + srcs = glob([ + "src/**/*.rs", + "Cargo.toml", + ]), + outs = [ + "rust_lib_debug.a", + "rust_bridge_cc_debug.cc", + "rust_bridge_h_debug.h", + "src/lib.rs_debug.h", + "cxxbridge/rust/cxx_debug.h", + ], + cmd = _PROTOC_SETUP_SNIPPET + """ EXECROOT=$$(pwd) OUTPUT_LIB=$(location rust_lib_debug.a) OUTPUT_CC=$(location rust_bridge_cc_debug.cc) @@ -80,7 +96,7 @@ genrule( exit 1 fi cd $$WORKSPACE_ROOT - cargo build --manifest-path $$CARGO_DIR/Cargo.toml + "$$CARGO_BIN" build --manifest-path $$CARGO_DIR/Cargo.toml CARGO_TARGET_DIR=$$WORKSPACE_ROOT/target # cxxbridge uses the Cargo package name (with hyphen): fluss-cpp RUST_BRIDGE_DIR=$$CARGO_TARGET_DIR/cxxbridge/fluss-cpp/src @@ -128,22 +144,7 @@ genrule( "src/lib.rs_release.h", "cxxbridge/rust/cxx_release.h", ], - cmd = """ - set -e - if [ -n "$${PROTOC:-}" ]; then - if [ ! -x "$$PROTOC" ]; then - echo "Error: PROTOC is set but not executable: $$PROTOC" >&2 - exit 1 - fi - export PROTOC - else - PROTOC_BIN=$$(command -v protoc || true) - if [ -z "$$PROTOC_BIN" ]; then - echo "Error: protoc not found in PATH and PROTOC is not set" >&2 - exit 1 - fi - export PROTOC="$$PROTOC_BIN" - fi + cmd = _PROTOC_SETUP_SNIPPET + """ EXECROOT=$$(pwd) OUTPUT_LIB=$(location rust_lib_release.a) OUTPUT_CC=$(location rust_bridge_cc_release.cc) @@ -161,7 +162,7 @@ genrule( exit 1 fi cd $$WORKSPACE_ROOT - cargo build --release --manifest-path $$CARGO_DIR/Cargo.toml + "$$CARGO_BIN" build --release --manifest-path $$CARGO_DIR/Cargo.toml CARGO_TARGET_DIR=$$WORKSPACE_ROOT/target # cxxbridge uses the Cargo package name (with hyphen): fluss-cpp RUST_BRIDGE_DIR=$$CARGO_TARGET_DIR/cxxbridge/fluss-cpp/src @@ -280,7 +281,6 @@ cc_library( "src/admin.cpp", "src/connection.cpp", "src/table.cpp", - ":rust_bridge_cc_unified", ], hdrs = [ "include/fluss.hpp", diff --git a/bindings/cpp/bazel/cpp/deps.bzl b/bindings/cpp/bazel/cpp/deps.bzl index 447b05a1..f2bad0a6 100644 --- a/bindings/cpp/bazel/cpp/deps.bzl +++ b/bindings/cpp/bazel/cpp/deps.bzl @@ -58,6 +58,10 @@ cmake( "ARROW_JSON": "OFF", "ARROW_PARQUET": "OFF", "ARROW_IPC": "ON", + "ARROW_JEMALLOC": "OFF", + "ARROW_MIMALLOC": "OFF", + "ARROW_SIMD_LEVEL": "NONE", + "ARROW_RUNTIME_SIMD_LEVEL": "NONE", "ARROW_DEPENDENCY_SOURCE": "BUNDLED", # Temporary workarounds for older images / Bazel sandbox toolchain detection. "EP_CMAKE_RANLIB": "__EP_CMAKE_RANLIB__", @@ -186,7 +190,10 @@ def _system_arrow_repo_impl(repo_ctx): if copy_res.return_code != 0: fail("failed to copy system arrow prefix %s: %s" % (prefix, copy_res.stderr)) - header_root = prefix + "/" + include_dir + include_dir_for_scan = include_dir + if include_dir_for_scan.endswith("/"): + include_dir_for_scan = include_dir_for_scan[:-1] + header_root = prefix + "/" + include_dir_for_scan + "/arrow" headers = _list_files(repo_ctx, header_root, [".h", ".hpp"]) header_srcs = [] for h in headers: diff --git a/bindings/cpp/scripts/ensure_protoc.sh b/bindings/cpp/scripts/ensure_protoc.sh index ae5bc20a..77e925d7 100755 --- a/bindings/cpp/scripts/ensure_protoc.sh +++ b/bindings/cpp/scripts/ensure_protoc.sh @@ -236,8 +236,4 @@ mkdir -p "${PROTOC_PREFIX}" cp -a "${extract_dir}/." "${PROTOC_PREFIX}/" chmod +x "${PROTOC_BIN}" -if [[ "${PROTOC_PRINT_PATH_ONLY}" == "1" ]]; then - echo "${PROTOC_BIN}" -else - echo "${PROTOC_BIN}" -fi +echo "${PROTOC_BIN}" From aab47d0b760fb2f85518f856a07b27bf3025d19a Mon Sep 17 00:00:00 2001 From: zhaohaidao Date: Fri, 27 Feb 2026 08:06:47 +0000 Subject: [PATCH 8/9] address comments --- .../bazel-consumer/build/MODULE.bazel | 3 +- .../bazel-consumer/system/MODULE.bazel | 7 ++- bindings/cpp/scripts/ensure_protoc.sh | 46 +++++++++++++++++-- docs/cpp-bazel-usage.md | 15 ++++-- 4 files changed, 60 insertions(+), 11 deletions(-) diff --git a/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel index 4fad1ffa..f31165c1 100644 --- a/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel +++ b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel @@ -18,11 +18,12 @@ module(name = "fluss_cpp_consumer_build") bazel_dep(name = "rules_cc", version = "0.2.14") -bazel_dep(name = "fluss-cpp", version = "0.0.0") +bazel_dep(name = "fluss-cpp", version = "0.1.0") # Local override for repository-local validation only. local_path_override( module_name = "fluss-cpp", + # Repository root path (the directory containing `bindings/cpp`). path = "../../../../../", ) diff --git a/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel index 9d38a593..2a4d6a65 100644 --- a/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel +++ b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel @@ -18,11 +18,14 @@ module(name = "fluss_cpp_consumer_system") bazel_dep(name = "rules_cc", version = "0.2.14") -bazel_dep(name = "fluss-cpp", version = "0.0.0") +bazel_dep(name = "fluss-cpp", version = "0.1.0") +# Repository-local example path (repository root containing `bindings/cpp`). +# If you copy this example out of tree, replace this with an absolute path +# (for example: /path/to/fluss-rust). local_path_override( module_name = "fluss-cpp", - path = "/path/to/fluss-rust", + path = "../../../../../", ) # Intended interface for preinstalled protoc + Arrow C++ environments. diff --git a/bindings/cpp/scripts/ensure_protoc.sh b/bindings/cpp/scripts/ensure_protoc.sh index 77e925d7..3210bcc7 100755 --- a/bindings/cpp/scripts/ensure_protoc.sh +++ b/bindings/cpp/scripts/ensure_protoc.sh @@ -26,9 +26,38 @@ elif [[ -n "${HOME:-}" ]]; then else _PROTOC_DEFAULT_CACHE_BASE="/tmp" fi + +_PROTOC_UNAME_S="$(uname -s | tr '[:upper:]' '[:lower:]')" +case "${_PROTOC_UNAME_S}" in + linux*) + _PROTOC_DEFAULT_OS="linux" + ;; + darwin*) + _PROTOC_DEFAULT_OS="osx" + ;; + *) + echo "ERROR: unsupported host OS '${_PROTOC_UNAME_S}'. Please set PROTOC_OS explicitly." >&2 + exit 1 + ;; +esac + +_PROTOC_UNAME_M="$(uname -m)" +case "${_PROTOC_UNAME_M}" in + x86_64|amd64) + _PROTOC_DEFAULT_ARCH="x86_64" + ;; + aarch64|arm64) + _PROTOC_DEFAULT_ARCH="aarch_64" + ;; + *) + echo "ERROR: unsupported host arch '${_PROTOC_UNAME_M}'. Please set PROTOC_ARCH explicitly." >&2 + exit 1 + ;; +esac + PROTOC_INSTALL_ROOT="${PROTOC_INSTALL_ROOT:-${_PROTOC_DEFAULT_CACHE_BASE}/fluss-cpp-tools}" -PROTOC_OS="${PROTOC_OS:-linux}" -PROTOC_ARCH="${PROTOC_ARCH:-x86_64}" +PROTOC_OS="${PROTOC_OS:-${_PROTOC_DEFAULT_OS}}" +PROTOC_ARCH="${PROTOC_ARCH:-${_PROTOC_DEFAULT_ARCH}}" PROTOC_FORCE_INSTALL="${PROTOC_FORCE_INSTALL:-0}" PROTOC_PRINT_PATH_ONLY="${PROTOC_PRINT_PATH_ONLY:-0}" PROTOC_ALLOW_INSECURE_DOWNLOAD="${PROTOC_ALLOW_INSECURE_DOWNLOAD:-0}" @@ -46,8 +75,8 @@ the protoc path on stdout. Env vars: PROTOBUF_BASELINE_VERSION Baseline protobuf version (default: 3.25.5) PROTOC_INSTALL_ROOT Local cache root (default: XDG/HOME cache dir) - PROTOC_OS protoc package OS (default: linux) - PROTOC_ARCH protoc package arch (default: x86_64) + PROTOC_OS protoc package OS (default: auto-detect host: linux/osx) + PROTOC_ARCH protoc package arch (default: auto-detect host: x86_64/aarch_64) PROTOC_FORCE_INSTALL 1 to force re-download PROTOC_ALLOW_INSECURE_DOWNLOAD 1 to disable TLS verification (not recommended) @@ -112,9 +141,18 @@ lookup_protoc_archive_sha256() { local os="$2" local arch="$3" case "${release_version}:${os}:${arch}" in + 25.5:linux:aarch_64) + echo "dc715bb5aab2ebf9653d7d3efbe55e01a035e45c26f391ff6d9b7923e22914b7" + ;; 25.5:linux:x86_64) echo "e1ed237a17b2e851cf9662cb5ad02b46e70ff8e060e05984725bc4b4228c6b28" ;; + 25.5:osx:aarch_64) + echo "781a6fc4c265034872cadc65e63dd3c0fc49245b70917821b60e2d457a6876ab" + ;; + 25.5:osx:x86_64) + echo "c5447e4f0d5caffb18d9ff21eae7bc7faf2bb2000083d6f49e5b6000b30fceae" + ;; *) return 1 ;; diff --git a/docs/cpp-bazel-usage.md b/docs/cpp-bazel-usage.md index aeb0d35f..61d861ed 100644 --- a/docs/cpp-bazel-usage.md +++ b/docs/cpp-bazel-usage.md @@ -73,6 +73,9 @@ use_repo(fluss_cpp, "apache_arrow_cpp") ### Build and run (consumer workspace pattern) +Run from your consumer workspace root (the directory containing +`MODULE.bazel` and your top-level `BUILD.bazel`). + ```bash PROTOC_BIN="$(command -v protoc)" CARGO_BIN="$(command -v cargo)" @@ -125,10 +128,14 @@ Notes: - `build` mode in the core Bazel integration still uses `PROTOC` (env / PATH). - To auto-download a pinned `protoc` for `build` mode, use `bindings/cpp/scripts/ensure_protoc.sh` and pass the result via `--action_env=PROTOC=...`. +- `ensure_protoc.sh` auto-detects host OS/arch (`linux`/`osx`, `x86_64`/`aarch_64`). - Some environments may require `ep_cmake_ar/ranlib/nm` overrides. ### Build and run (consumer workspace pattern, with auto-downloaded `protoc`) +Run from the `fluss-rust` repository root, or adjust the script path if you +copied it elsewhere. + ```bash PROTOC_BIN="$(bash bindings/cpp/scripts/ensure_protoc.sh --print-path)" ``` @@ -176,9 +183,9 @@ local_path_override( Do not keep local overrides in long-lived branches. -Repository-local examples in this repo use `version = "0.0.0"` together with -`local_path_override(...)` because the root `MODULE.bazel` in this branch is not -a published release module. +Repository-local examples in this repo use `version = "0.1.0"` together with +`local_path_override(...)` for local validation before publishing to the Bazel +registry. ## Repository-local Validation (Direct Commands) @@ -234,7 +241,7 @@ tmp_dir="$(mktemp -d /tmp/fluss-bazel-system-doc.XXXXXX)" FLUSS_RUST_ROOT="$(pwd)" cp -a bindings/cpp/examples/bazel-consumer/system/. "$tmp_dir/" sed -i \ - -e "s|path = \"/path/to/fluss-rust\"|path = \"$FLUSS_RUST_ROOT\"|" \ + -e "s|path = \"../../../../../\"|path = \"$FLUSS_RUST_ROOT\"|" \ -e 's|system_arrow_prefix = "/usr"|system_arrow_prefix = "/tmp/fluss-system-arrow-19.0.1"|' \ -e 's|system_arrow_shared_library = "lib/x86_64-linux-gnu/libarrow.so"|system_arrow_shared_library = "lib/libarrow.so"|' \ -e 's|system_arrow_runtime_glob = "lib/x86_64-linux-gnu/libarrow.so\\*"|system_arrow_runtime_glob = "lib/libarrow.so*"|' \ From ad458b96149f859f31e6e73f966083f1fa8d5e11 Mon Sep 17 00:00:00 2001 From: zhaohaidao Date: Fri, 27 Feb 2026 09:51:01 +0000 Subject: [PATCH 9/9] address comments --- bindings/cpp/bazel/cpp/deps.bzl | 45 ++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/bindings/cpp/bazel/cpp/deps.bzl b/bindings/cpp/bazel/cpp/deps.bzl index f2bad0a6..592ece91 100644 --- a/bindings/cpp/bazel/cpp/deps.bzl +++ b/bindings/cpp/bazel/cpp/deps.bzl @@ -166,8 +166,13 @@ def _list_files(repo_ctx, base_dir, suffixes): result = repo_ctx.execute([ "/usr/bin/find", base_dir, + "(", "-type", "f", + "-o", + "-type", + "l", + ")", ]) if result.return_code != 0: fail("failed to enumerate files under %s: %s" % (base_dir, result.stderr)) @@ -179,38 +184,57 @@ def _list_files(repo_ctx, base_dir, suffixes): break return sorted(files) +def _copy_file_to_sysroot(repo_ctx, prefix, rel_path): + if rel_path.startswith("/"): + fail("expected relative path under prefix, got absolute path: %s" % rel_path) + src = prefix + "/" + rel_path + dst = "sysroot/" + rel_path + dst_parent = dst.rsplit("/", 1)[0] if "/" in dst else "sysroot" + mkdir_res = repo_ctx.execute(["/bin/mkdir", "-p", dst_parent]) + if mkdir_res.return_code != 0: + fail("failed to create directory %s: %s" % (dst_parent, mkdir_res.stderr)) + # Resolve symlinks into real files to keep the generated sysroot self-contained. + cp_res = repo_ctx.execute(["/bin/cp", "-L", src, dst]) + if cp_res.return_code != 0: + fail("failed to copy %s to %s: %s" % (src, dst, cp_res.stderr)) + def _system_arrow_repo_impl(repo_ctx): prefix = repo_ctx.attr.prefix.rstrip("/") include_dir = repo_ctx.attr.include_dir shared_library = repo_ctx.attr.shared_library runtime_glob = repo_ctx.attr.runtime_glob - repo_ctx.execute(["/bin/mkdir", "-p", "sysroot"]) - copy_res = repo_ctx.execute(["/bin/cp", "-a", prefix + "/.", "sysroot"]) - if copy_res.return_code != 0: - fail("failed to copy system arrow prefix %s: %s" % (prefix, copy_res.stderr)) + mkdir_res = repo_ctx.execute(["/bin/mkdir", "-p", "sysroot"]) + if mkdir_res.return_code != 0: + fail("failed to create sysroot directory: %s" % mkdir_res.stderr) include_dir_for_scan = include_dir if include_dir_for_scan.endswith("/"): include_dir_for_scan = include_dir_for_scan[:-1] header_root = prefix + "/" + include_dir_for_scan + "/arrow" headers = _list_files(repo_ctx, header_root, [".h", ".hpp"]) + header_srcs_rel = [] header_srcs = [] for h in headers: if not h.startswith(prefix + "/"): fail("header path %s is outside prefix %s" % (h, prefix)) - header_srcs.append("sysroot/" + h[len(prefix) + 1:]) + rel = h[len(prefix) + 1:] + header_srcs_rel.append(rel) + header_srcs.append("sysroot/" + rel) runtime_dir = runtime_glob.rsplit("/", 1)[0] runtime_prefix = runtime_glob.rsplit("/", 1)[1].replace("*", "") runtime_files = _list_files(repo_ctx, prefix + "/" + runtime_dir, [""]) + runtime_srcs_rel = [] runtime_srcs = [] for f in runtime_files: rel = f[len(prefix) + 1:] if f.startswith(prefix + "/") else None if rel == None: continue if rel.startswith(runtime_dir + "/") and rel.rsplit("/", 1)[1].startswith(runtime_prefix): + runtime_srcs_rel.append(rel) runtime_srcs.append("sysroot/" + rel) + runtime_srcs_rel = sorted(runtime_srcs_rel) runtime_srcs = sorted(runtime_srcs) # Prefer a versioned soname file as the imported shared library so Bazel @@ -218,18 +242,25 @@ def _system_arrow_repo_impl(repo_ctx): shared_import_rel = "sysroot/" + shared_library shared_basename = shared_library.rsplit("/", 1)[1] soname_candidates = [] - for rel in runtime_srcs: + for rel in runtime_srcs_rel: base = rel.rsplit("/", 1)[1] if base == shared_basename: continue if base.startswith(shared_basename + "."): - soname_candidates.append(rel) + soname_candidates.append("sysroot/" + rel) if soname_candidates: # Prefer shortest suffix first (e.g. libarrow.so.1900 before # libarrow.so.1900.1.0) to match ELF SONAME naming when available. soname_candidates = sorted(soname_candidates, key = lambda s: (len(s), s)) shared_import_rel = soname_candidates[0] + # Copy only required Arrow artifacts instead of mirroring the full system prefix. + copy_rel_paths = {} + for rel in header_srcs_rel + runtime_srcs_rel + [shared_library]: + copy_rel_paths[rel] = True + for rel in sorted(copy_rel_paths.keys()): + _copy_file_to_sysroot(repo_ctx, prefix, rel) + build_file = _render_system_arrow_build_file(repo_ctx.attr, shared_library_override = shared_import_rel[len("sysroot/"):]).replace( "__SYSTEM_ARROW_HDRS__", _starlark_string_list(header_srcs),