diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 00000000..f0e60250 --- /dev/null +++ b/MODULE.bazel @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Required at repository root for root module mode (`bazel_dep(name = "fluss-cpp", ...)`). +# Consumer examples use `local_path_override(..., path = "/path/to/fluss-rust")`, so +# Bazel resolves the module from the repository root. This also matches the Rust +# workspace layout used by `bindings/cpp` during cargo-based Bazel/CMake builds. +# `0.0.0` is a local-development placeholder in this repository branch. +# Consumers should depend on a published release version. +module( + name = "fluss-cpp", + version = "0.0.0", +) + +bazel_dep(name = "rules_cc", version = "0.0.17") +bazel_dep(name = "platforms", version = "0.0.10") +bazel_dep(name = "rules_foreign_cc", version = "0.15.1") +bazel_dep(name = "rules_python", version = "1.2.0") + +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python.toolchain(python_version = "3.12") +use_repo(python, "python_3_12") + +foreign_cc_tools = use_extension("@rules_foreign_cc//foreign_cc:extensions.bzl", "tools") +use_repo( + foreign_cc_tools, + "cmake_3.31.8_toolchains", + "cmake_src", + "ninja_1.13.0_toolchains", + "ninja_build_src", + "rules_foreign_cc_framework_toolchains", +) + +register_toolchains( + "@rules_foreign_cc_framework_toolchains//:all", + "@cmake_3.31.8_toolchains//:all", + "@ninja_1.13.0_toolchains//:all", + "@python_3_12//:all", + "@rules_foreign_cc//toolchains:all", +) + +cpp_sdk = use_extension("//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk") +cpp_sdk.config( + mode = "build", + arrow_cpp_version = "19.0.1", + protobuf_version = "3.25.5", + ep_cmake_ranlib = "/usr/bin/ranlib", + ep_cmake_ar = "/usr/bin/ar", + ep_cmake_nm = "/usr/bin/nm", +) +use_repo(cpp_sdk, "apache_arrow_cpp") diff --git a/bindings/cpp/.gitignore b/bindings/cpp/.gitignore index da15a58b..1f1632b9 100644 --- a/bindings/cpp/.gitignore +++ b/bindings/cpp/.gitignore @@ -15,3 +15,13 @@ bazel-testlogs bazel-cpp bazel-* MODULE.bazel.lock + +# Keep versioned Bazel consumer examples (name starts with bazel-). +!examples/bazel-consumer/ +!examples/bazel-consumer/** +# `build/` is ignored globally above; keep this fixture path visible. +!examples/bazel-consumer/build/ +!examples/bazel-consumer/build/** +examples/bazel-consumer/**/MODULE.bazel.lock +examples/bazel-consumer/**/bazel-* +examples/bazel-consumer/**/tmp.log diff --git a/bindings/cpp/BUILD.bazel b/bindings/cpp/BUILD.bazel index 0ae2ce32..d247baf1 100644 --- a/bindings/cpp/BUILD.bazel +++ b/bindings/cpp/BUILD.bazel @@ -17,7 +17,7 @@ licenses(["notice"]) -load("@rules_cc//cc:defs.bzl", "cc_library", "cc_binary") +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_import", "cc_library") config_setting( name = "debug_mode", @@ -34,6 +34,37 @@ config_setting( values = {"compilation_mode": "opt"}, ) +_PROTOC_SETUP_SNIPPET = """ + set -e + if [ -n "$${CARGO:-}" ]; then + if [ ! -x "$$CARGO" ]; then + echo "Error: CARGO is set but not executable: $$CARGO" >&2 + exit 1 + fi + CARGO_BIN="$$CARGO" + else + CARGO_BIN=$$(command -v cargo || true) + if [ -z "$$CARGO_BIN" ]; then + echo "Error: cargo not found in PATH and CARGO is not set" >&2 + exit 1 + fi + fi + if [ -n "$${PROTOC:-}" ]; then + if [ ! -x "$$PROTOC" ]; then + echo "Error: PROTOC is set but not executable: $$PROTOC" >&2 + exit 1 + fi + export PROTOC + else + PROTOC_BIN=$$(command -v protoc || true) + if [ -z "$$PROTOC_BIN" ]; then + echo "Error: protoc not found in PATH and PROTOC is not set" >&2 + exit 1 + fi + export PROTOC="$$PROTOC_BIN" + fi +""" + genrule( name = "cargo_build_debug", srcs = glob([ @@ -47,8 +78,7 @@ genrule( "src/lib.rs_debug.h", "cxxbridge/rust/cxx_debug.h", ], - cmd = """ - set -e + cmd = _PROTOC_SETUP_SNIPPET + """ EXECROOT=$$(pwd) OUTPUT_LIB=$(location rust_lib_debug.a) OUTPUT_CC=$(location rust_bridge_cc_debug.cc) @@ -66,7 +96,7 @@ genrule( exit 1 fi cd $$WORKSPACE_ROOT - cargo build --manifest-path $$CARGO_DIR/Cargo.toml + "$$CARGO_BIN" build --manifest-path $$CARGO_DIR/Cargo.toml CARGO_TARGET_DIR=$$WORKSPACE_ROOT/target # cxxbridge uses the Cargo package name (with hyphen): fluss-cpp RUST_BRIDGE_DIR=$$CARGO_TARGET_DIR/cxxbridge/fluss-cpp/src @@ -114,8 +144,7 @@ genrule( "src/lib.rs_release.h", "cxxbridge/rust/cxx_release.h", ], - cmd = """ - set -e + cmd = _PROTOC_SETUP_SNIPPET + """ EXECROOT=$$(pwd) OUTPUT_LIB=$(location rust_lib_release.a) OUTPUT_CC=$(location rust_bridge_cc_release.cc) @@ -133,7 +162,7 @@ genrule( exit 1 fi cd $$WORKSPACE_ROOT - cargo build --release --manifest-path $$CARGO_DIR/Cargo.toml + "$$CARGO_BIN" build --release --manifest-path $$CARGO_DIR/Cargo.toml CARGO_TARGET_DIR=$$WORKSPACE_ROOT/target # cxxbridge uses the Cargo package name (with hyphen): fluss-cpp RUST_BRIDGE_DIR=$$CARGO_TARGET_DIR/cxxbridge/fluss-cpp/src @@ -252,7 +281,6 @@ cc_library( "src/admin.cpp", "src/connection.cpp", "src/table.cpp", - ":rust_bridge_cc_unified", ], hdrs = [ "include/fluss.hpp", @@ -303,6 +331,7 @@ cc_library( }), deps = [ ":rust_lib", + "//bindings/cpp/bazel/cpp:arrow_cpp_dep", ], visibility = ["//visibility:public"], ) @@ -405,4 +434,3 @@ cc_binary( }), visibility = ["//visibility:public"], ) - diff --git a/bindings/cpp/CMakeLists.txt b/bindings/cpp/CMakeLists.txt index a8f527ed..cafa4481 100644 --- a/bindings/cpp/CMakeLists.txt +++ b/bindings/cpp/CMakeLists.txt @@ -27,9 +27,21 @@ include(FetchContent) set(FLUSS_GOOGLETEST_VERSION 1.15.2 CACHE STRING "version of GoogleTest") set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -find_package(Threads REQUIRED) +set(FLUSS_CPP_DEP_MODE "system" CACHE STRING "Dependency provisioning mode for fluss-cpp (system|build)") +set_property(CACHE FLUSS_CPP_DEP_MODE PROPERTY STRINGS system build) +set(FLUSS_CPP_ARROW_VERSION "19.0.1" CACHE STRING "Arrow C++ version baseline for fluss-cpp") +set(FLUSS_CPP_PROTOBUF_VERSION "3.25.5" CACHE STRING "Protobuf/protoc version baseline for fluss-cpp") +set(FLUSS_CPP_ARROW_SYSTEM_ROOT "" CACHE PATH "Optional Arrow installation prefix for system mode") +set(FLUSS_CPP_ARROW_SOURCE_URL + "https://github.com/apache/arrow/archive/refs/tags/apache-arrow-19.0.1.tar.gz" + CACHE STRING + "Arrow source archive URL used in build mode") +set(FLUSS_CPP_ARROW_SOURCE_SHA256 + "4c898504958841cc86b6f8710ecb2919f96b5e10fa8989ac10ac4fca8362d86a" + CACHE STRING + "SHA256 for the Arrow source archive used in build mode") -find_package(Arrow REQUIRED) +find_package(Threads REQUIRED) if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) @@ -47,11 +59,117 @@ if (FLUSS_DEV) set(FLUSS_ENABLE_TESTING ON) endif() +if (NOT FLUSS_CPP_DEP_MODE STREQUAL "system" AND NOT FLUSS_CPP_DEP_MODE STREQUAL "build") + message(FATAL_ERROR "Unsupported FLUSS_CPP_DEP_MODE='${FLUSS_CPP_DEP_MODE}'. Expected 'system' or 'build'.") +endif() + +find_program(FLUSS_PROTOC_EXECUTABLE NAMES protoc) +if (NOT FLUSS_PROTOC_EXECUTABLE) + message(FATAL_ERROR "protoc not found. Install protoc or set it in PATH. (Fluss baseline: ${FLUSS_CPP_PROTOBUF_VERSION})") +endif() + +if (DEFINED ENV{CARGO} AND NOT "$ENV{CARGO}" STREQUAL "" AND EXISTS "$ENV{CARGO}") + set(FLUSS_CARGO_EXECUTABLE "$ENV{CARGO}") +else() + if (DEFINED ENV{CARGO} AND NOT "$ENV{CARGO}" STREQUAL "") + get_filename_component(_FLUSS_CARGO_HINT_DIR "$ENV{CARGO}" DIRECTORY) + endif() + find_program(FLUSS_CARGO_EXECUTABLE NAMES cargo HINTS "${_FLUSS_CARGO_HINT_DIR}") +endif() +if (NOT FLUSS_CARGO_EXECUTABLE) + message(FATAL_ERROR "cargo not found. Install Rust toolchain or set CARGO/PATH.") +endif() + +execute_process( + COMMAND ${FLUSS_PROTOC_EXECUTABLE} --version + OUTPUT_VARIABLE FLUSS_PROTOC_VERSION_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) +string(REGEX MATCH "([0-9]+\\.[0-9]+\\.[0-9]+)" FLUSS_PROTOC_VERSION "${FLUSS_PROTOC_VERSION_OUTPUT}") +set(FLUSS_PROTOC_VERSION_NORM "${FLUSS_PROTOC_VERSION}") +set(FLUSS_CPP_PROTOBUF_VERSION_NORM "${FLUSS_CPP_PROTOBUF_VERSION}") +string(REGEX REPLACE "^3\\." "" FLUSS_PROTOC_VERSION_NORM "${FLUSS_PROTOC_VERSION_NORM}") +string(REGEX REPLACE "^3\\." "" FLUSS_CPP_PROTOBUF_VERSION_NORM "${FLUSS_CPP_PROTOBUF_VERSION_NORM}") +if (FLUSS_PROTOC_VERSION AND + NOT FLUSS_PROTOC_VERSION VERSION_EQUAL FLUSS_CPP_PROTOBUF_VERSION AND + NOT FLUSS_PROTOC_VERSION_NORM VERSION_EQUAL FLUSS_CPP_PROTOBUF_VERSION_NORM) + message(WARNING + "protoc version (${FLUSS_PROTOC_VERSION}) does not match Fluss baseline " + "(${FLUSS_CPP_PROTOBUF_VERSION}). Build may still work, but this is outside the tested baseline.") +endif() + +message(STATUS "Fluss C++ dependency mode: ${FLUSS_CPP_DEP_MODE}") +message(STATUS "Fluss C++ protoc executable: ${FLUSS_PROTOC_EXECUTABLE} (${FLUSS_PROTOC_VERSION_OUTPUT})") +message(STATUS "Fluss C++ cargo executable: ${FLUSS_CARGO_EXECUTABLE}") + +if (FLUSS_CPP_DEP_MODE STREQUAL "system") + if (FLUSS_CPP_ARROW_SYSTEM_ROOT) + list(APPEND CMAKE_PREFIX_PATH "${FLUSS_CPP_ARROW_SYSTEM_ROOT}") + set(Arrow_ROOT "${FLUSS_CPP_ARROW_SYSTEM_ROOT}") + endif() + + find_package(Arrow REQUIRED) + + if (DEFINED Arrow_VERSION AND Arrow_VERSION AND NOT Arrow_VERSION VERSION_EQUAL FLUSS_CPP_ARROW_VERSION) + message(WARNING + "Arrow version (${Arrow_VERSION}) does not match Fluss baseline " + "(${FLUSS_CPP_ARROW_VERSION}). Build may still work, but this is outside the tested baseline.") + endif() +else() + # Build mode: provision Arrow C++ from source in-tree. + set(ARROW_BUILD_SHARED ON CACHE BOOL "" FORCE) + set(ARROW_BUILD_STATIC OFF CACHE BOOL "" FORCE) + set(ARROW_BUILD_TESTS OFF CACHE BOOL "" FORCE) + set(ARROW_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) + set(ARROW_BUILD_BENCHMARKS OFF CACHE BOOL "" FORCE) + set(ARROW_BUILD_INTEGRATION OFF CACHE BOOL "" FORCE) + set(ARROW_BUILD_UTILITIES OFF CACHE BOOL "" FORCE) + set(ARROW_COMPUTE OFF CACHE BOOL "" FORCE) + set(ARROW_CSV OFF CACHE BOOL "" FORCE) + set(ARROW_DATASET OFF CACHE BOOL "" FORCE) + set(ARROW_FILESYSTEM OFF CACHE BOOL "" FORCE) + set(ARROW_JSON OFF CACHE BOOL "" FORCE) + set(ARROW_PARQUET OFF CACHE BOOL "" FORCE) + set(ARROW_IPC ON CACHE BOOL "" FORCE) + # Reduce third-party sub-build complexity in build mode. + set(ARROW_JEMALLOC OFF CACHE BOOL "" FORCE) + set(ARROW_MIMALLOC OFF CACHE BOOL "" FORCE) + set(ARROW_DEPENDENCY_SOURCE BUNDLED CACHE STRING "" FORCE) + set(ARROW_SIMD_LEVEL NONE CACHE STRING "" FORCE) + set(ARROW_RUNTIME_SIMD_LEVEL NONE CACHE STRING "" FORCE) + + FetchContent_Declare( + apache_arrow_src + URL ${FLUSS_CPP_ARROW_SOURCE_URL} + URL_HASH SHA256=${FLUSS_CPP_ARROW_SOURCE_SHA256} + SOURCE_SUBDIR cpp + ) + FetchContent_MakeAvailable(apache_arrow_src) + set(FLUSS_CPP_ARROW_EXTRA_INCLUDE_DIRS + "${apache_arrow_src_SOURCE_DIR}/cpp/src" + "${apache_arrow_src_BINARY_DIR}/src") + + if (TARGET arrow_shared AND NOT TARGET Arrow::arrow_shared) + add_library(Arrow::arrow_shared ALIAS arrow_shared) + endif() + if (NOT TARGET Arrow::arrow_shared) + message(FATAL_ERROR "Arrow build mode did not produce target Arrow::arrow_shared (or arrow_shared).") + endif() +endif() + # Get cargo target dir -execute_process(COMMAND cargo locate-project --workspace --message-format plain - OUTPUT_VARIABLE CARGO_TARGET_DIR +execute_process(COMMAND ${FLUSS_CARGO_EXECUTABLE} locate-project --workspace --message-format plain + OUTPUT_VARIABLE CARGO_MANIFEST_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) -string(REGEX REPLACE "/Cargo.toml\n$" "/target" CARGO_TARGET_DIR "${CARGO_TARGET_DIR}") +if (NOT CARGO_MANIFEST_PATH) + message(FATAL_ERROR + "Failed to resolve Cargo workspace target dir via '${FLUSS_CARGO_EXECUTABLE} locate-project'. " + "Check Rust toolchain installation and PATH/CARGO.") +endif() +get_filename_component(CARGO_WORKSPACE_DIR "${CARGO_MANIFEST_PATH}" DIRECTORY) +set(CARGO_TARGET_DIR "${CARGO_WORKSPACE_DIR}/target") set(CARGO_MANIFEST ${PROJECT_SOURCE_DIR}/Cargo.toml) set(RUST_SOURCE_FILE ${PROJECT_SOURCE_DIR}/src/lib.rs) @@ -77,7 +195,7 @@ if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug") endif() add_custom_target(cargo_build - COMMAND cargo build --manifest-path ${CARGO_MANIFEST} ${CARGO_BUILD_FLAGS} + COMMAND ${CMAKE_COMMAND} -E env PROTOC=${FLUSS_PROTOC_EXECUTABLE} ${FLUSS_CARGO_EXECUTABLE} build --manifest-path ${CARGO_MANIFEST} ${CARGO_BUILD_FLAGS} BYPRODUCTS ${RUST_BRIDGE_CPP} ${RUST_LIB} ${RUST_HEADER_FILE} DEPENDS ${RUST_SOURCE_FILE} USES_TERMINAL @@ -88,6 +206,9 @@ add_library(fluss_cpp STATIC ${CPP_SOURCE_FILE} ${RUST_BRIDGE_CPP}) target_sources(fluss_cpp PUBLIC ${CPP_HEADER_FILE}) target_sources(fluss_cpp PRIVATE ${RUST_HEADER_FILE}) target_include_directories(fluss_cpp PUBLIC ${CPP_INCLUDE_DIR}) +if (FLUSS_CPP_ARROW_EXTRA_INCLUDE_DIRS) + target_include_directories(fluss_cpp PUBLIC ${FLUSS_CPP_ARROW_EXTRA_INCLUDE_DIRS}) +endif() target_link_libraries(fluss_cpp PUBLIC ${RUST_LIB}) target_link_libraries(fluss_cpp PRIVATE ${CMAKE_DL_LIBS} Threads::Threads) target_link_libraries(fluss_cpp PUBLIC Arrow::arrow_shared) @@ -114,9 +235,11 @@ target_link_libraries(fluss_cpp_kv_example PRIVATE Arrow::arrow_shared) target_compile_definitions(fluss_cpp_kv_example PRIVATE ARROW_FOUND) target_include_directories(fluss_cpp_kv_example PUBLIC ${CPP_INCLUDE_DIR}) -set_target_properties(fluss_cpp - PROPERTIES ADDITIONAL_CLEAN_FILES ${CARGO_TARGET_DIR} -) +if (CARGO_TARGET_DIR) + set_target_properties(fluss_cpp + PROPERTIES ADDITIONAL_CLEAN_FILES "${CARGO_TARGET_DIR}" + ) +endif() add_dependencies(fluss_cpp cargo_build) if (FLUSS_ENABLE_ADDRESS_SANITIZER) diff --git a/bindings/cpp/bazel/cpp/BUILD.bazel b/bindings/cpp/bazel/cpp/BUILD.bazel new file mode 100644 index 00000000..e4b730dc --- /dev/null +++ b/bindings/cpp/bazel/cpp/BUILD.bazel @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +package(default_visibility = ["//visibility:public"]) + +# Stable indirection target for the Arrow C++ dependency. The implementation +# repo name can change across modes (registry/build/system) without touching +# bindings/cpp/BUILD.bazel. +alias( + name = "arrow_cpp_dep", + actual = "@apache_arrow_cpp//:arrow_cpp", +) diff --git a/bindings/cpp/bazel/cpp/deps.bzl b/bindings/cpp/bazel/cpp/deps.bzl new file mode 100644 index 00000000..592ece91 --- /dev/null +++ b/bindings/cpp/bazel/cpp/deps.bzl @@ -0,0 +1,346 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Bzlmod extension for fluss C++ SDK dependency provisioning.""" + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +_ARROW_BUILD_FILE_TEMPLATE = """ +load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake") + +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "all_srcs", + srcs = glob( + ["**"], + exclude = [ + "**/BUILD", + "**/BUILD.bazel", + ], + ), +) + +cmake( + name = "arrow_cpp", + lib_source = ":all_srcs", + working_directory = "cpp", + generate_args = ["-GUnix Makefiles"], + cache_entries = { + "CMAKE_BUILD_TYPE": "Release", + "CMAKE_INSTALL_LIBDIR": "lib", + "CMAKE_POSITION_INDEPENDENT_CODE": "ON", + "ARROW_BUILD_SHARED": "ON", + "ARROW_BUILD_STATIC": "OFF", + "ARROW_BUILD_TESTS": "OFF", + "ARROW_BUILD_EXAMPLES": "OFF", + "ARROW_BUILD_BENCHMARKS": "OFF", + "ARROW_BUILD_INTEGRATION": "OFF", + "ARROW_BUILD_UTILITIES": "OFF", + "ARROW_COMPUTE": "OFF", + "ARROW_CSV": "OFF", + "ARROW_DATASET": "OFF", + "ARROW_FILESYSTEM": "OFF", + "ARROW_JSON": "OFF", + "ARROW_PARQUET": "OFF", + "ARROW_IPC": "ON", + "ARROW_JEMALLOC": "OFF", + "ARROW_MIMALLOC": "OFF", + "ARROW_SIMD_LEVEL": "NONE", + "ARROW_RUNTIME_SIMD_LEVEL": "NONE", + "ARROW_DEPENDENCY_SOURCE": "BUNDLED", + # Temporary workarounds for older images / Bazel sandbox toolchain detection. + "EP_CMAKE_RANLIB": "__EP_CMAKE_RANLIB__", + "EP_CMAKE_AR": "__EP_CMAKE_AR__", + "EP_CMAKE_NM": "__EP_CMAKE_NM__", + }, + out_include_dir = "include", + out_lib_dir = "lib", + out_shared_libs = select({ + "@platforms//os:macos": ["libarrow.dylib"], + "//conditions:default": [ + "libarrow.so", + "libarrow.so.1900", + "libarrow.so.1900.1.0", + ], + }), +) +""" + +_ARROW_PATCH_CMDS = [ + "sed -i.bak 's|#define ARROW_CXX_COMPILER_FLAGS \"@CMAKE_CXX_FLAGS@\"|#define ARROW_CXX_COMPILER_FLAGS \"\"|' cpp/src/arrow/util/config.h.cmake && rm -f cpp/src/arrow/util/config.h.cmake.bak", +] + +_SYSTEM_ARROW_BUILD_FILE_TEMPLATE = """ +load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +cc_import( + name = "arrow_shared_import", + shared_library = "__SYSTEM_ARROW_SHARED_LIBRARY__", +) + +filegroup( + name = "arrow_runtime_libs", + srcs = [ +__SYSTEM_ARROW_RUNTIME_SRCS__ + ], +) + +cc_library( + name = "arrow_cpp", + hdrs = [ +__SYSTEM_ARROW_HDRS__ + ], + includes = ["__SYSTEM_ARROW_INCLUDE_DIR__"], + data = [":arrow_runtime_libs"], + deps = [":arrow_shared_import"], +) +""" + +_ARROW_BUILD_VERSIONS = { + "19.0.1": { + "urls": ["https://github.com/apache/arrow/archive/refs/tags/apache-arrow-19.0.1.tar.gz"], + "strip_prefix": "arrow-apache-arrow-19.0.1", + "integrity": "sha256-TImFBJWIQcyGtvhxDsspGflrXhD6iYmsEKxPyoNi2Go=", + }, +} + +_config_tag = tag_class(attrs = { + "mode": attr.string(default = "build"), + "arrow_cpp_version": attr.string(default = "19.0.1"), + "protobuf_version": attr.string(default = "3.25.5"), + "ep_cmake_ranlib": attr.string(default = "ranlib"), + "ep_cmake_ar": attr.string(default = "ar"), + "ep_cmake_nm": attr.string(default = "nm"), + "system_arrow_prefix": attr.string(default = "/usr"), + "system_arrow_include_dir": attr.string(default = "include"), + "system_arrow_shared_library": attr.string(default = "lib/x86_64-linux-gnu/libarrow.so"), + "system_arrow_runtime_glob": attr.string(default = "lib/x86_64-linux-gnu/libarrow.so*"), +}) + +def _render_arrow_build_file(tag): + return _ARROW_BUILD_FILE_TEMPLATE.replace( + "__EP_CMAKE_RANLIB__", + tag.ep_cmake_ranlib, + ).replace( + "__EP_CMAKE_AR__", + tag.ep_cmake_ar, + ).replace( + "__EP_CMAKE_NM__", + tag.ep_cmake_nm, + ) + +def _render_system_arrow_build_file(tag, shared_library_override = None): + shared_library = shared_library_override if shared_library_override else (tag.system_arrow_shared_library if hasattr(tag, "system_arrow_shared_library") else tag.shared_library) + include_dir = tag.system_arrow_include_dir if hasattr(tag, "system_arrow_include_dir") else tag.include_dir + return _SYSTEM_ARROW_BUILD_FILE_TEMPLATE.replace( + "__SYSTEM_ARROW_SHARED_LIBRARY__", + "sysroot/" + shared_library, + ).replace( + "__SYSTEM_ARROW_INCLUDE_DIR__", + "sysroot/" + include_dir, + ) + +def _starlark_string_list(items): + if not items: + return "" + return "\n".join([' "%s",' % i for i in items]) + +def _list_files(repo_ctx, base_dir, suffixes): + result = repo_ctx.execute([ + "/usr/bin/find", + base_dir, + "(", + "-type", + "f", + "-o", + "-type", + "l", + ")", + ]) + if result.return_code != 0: + fail("failed to enumerate files under %s: %s" % (base_dir, result.stderr)) + files = [] + for line in result.stdout.splitlines(): + for suffix in suffixes: + if line.endswith(suffix): + files.append(line) + break + return sorted(files) + +def _copy_file_to_sysroot(repo_ctx, prefix, rel_path): + if rel_path.startswith("/"): + fail("expected relative path under prefix, got absolute path: %s" % rel_path) + src = prefix + "/" + rel_path + dst = "sysroot/" + rel_path + dst_parent = dst.rsplit("/", 1)[0] if "/" in dst else "sysroot" + mkdir_res = repo_ctx.execute(["/bin/mkdir", "-p", dst_parent]) + if mkdir_res.return_code != 0: + fail("failed to create directory %s: %s" % (dst_parent, mkdir_res.stderr)) + # Resolve symlinks into real files to keep the generated sysroot self-contained. + cp_res = repo_ctx.execute(["/bin/cp", "-L", src, dst]) + if cp_res.return_code != 0: + fail("failed to copy %s to %s: %s" % (src, dst, cp_res.stderr)) + +def _system_arrow_repo_impl(repo_ctx): + prefix = repo_ctx.attr.prefix.rstrip("/") + include_dir = repo_ctx.attr.include_dir + shared_library = repo_ctx.attr.shared_library + runtime_glob = repo_ctx.attr.runtime_glob + + mkdir_res = repo_ctx.execute(["/bin/mkdir", "-p", "sysroot"]) + if mkdir_res.return_code != 0: + fail("failed to create sysroot directory: %s" % mkdir_res.stderr) + + include_dir_for_scan = include_dir + if include_dir_for_scan.endswith("/"): + include_dir_for_scan = include_dir_for_scan[:-1] + header_root = prefix + "/" + include_dir_for_scan + "/arrow" + headers = _list_files(repo_ctx, header_root, [".h", ".hpp"]) + header_srcs_rel = [] + header_srcs = [] + for h in headers: + if not h.startswith(prefix + "/"): + fail("header path %s is outside prefix %s" % (h, prefix)) + rel = h[len(prefix) + 1:] + header_srcs_rel.append(rel) + header_srcs.append("sysroot/" + rel) + + runtime_dir = runtime_glob.rsplit("/", 1)[0] + runtime_prefix = runtime_glob.rsplit("/", 1)[1].replace("*", "") + runtime_files = _list_files(repo_ctx, prefix + "/" + runtime_dir, [""]) + runtime_srcs_rel = [] + runtime_srcs = [] + for f in runtime_files: + rel = f[len(prefix) + 1:] if f.startswith(prefix + "/") else None + if rel == None: + continue + if rel.startswith(runtime_dir + "/") and rel.rsplit("/", 1)[1].startswith(runtime_prefix): + runtime_srcs_rel.append(rel) + runtime_srcs.append("sysroot/" + rel) + runtime_srcs_rel = sorted(runtime_srcs_rel) + runtime_srcs = sorted(runtime_srcs) + + # Prefer a versioned soname file as the imported shared library so Bazel + # runfiles contain the exact filename required by the runtime loader. + shared_import_rel = "sysroot/" + shared_library + shared_basename = shared_library.rsplit("/", 1)[1] + soname_candidates = [] + for rel in runtime_srcs_rel: + base = rel.rsplit("/", 1)[1] + if base == shared_basename: + continue + if base.startswith(shared_basename + "."): + soname_candidates.append("sysroot/" + rel) + if soname_candidates: + # Prefer shortest suffix first (e.g. libarrow.so.1900 before + # libarrow.so.1900.1.0) to match ELF SONAME naming when available. + soname_candidates = sorted(soname_candidates, key = lambda s: (len(s), s)) + shared_import_rel = soname_candidates[0] + + # Copy only required Arrow artifacts instead of mirroring the full system prefix. + copy_rel_paths = {} + for rel in header_srcs_rel + runtime_srcs_rel + [shared_library]: + copy_rel_paths[rel] = True + for rel in sorted(copy_rel_paths.keys()): + _copy_file_to_sysroot(repo_ctx, prefix, rel) + + build_file = _render_system_arrow_build_file(repo_ctx.attr, shared_library_override = shared_import_rel[len("sysroot/"):]).replace( + "__SYSTEM_ARROW_HDRS__", + _starlark_string_list(header_srcs), + ).replace( + "__SYSTEM_ARROW_RUNTIME_SRCS__", + _starlark_string_list(runtime_srcs), + ) + repo_ctx.file("BUILD.bazel", build_file) + +_system_arrow_repository = repository_rule( + implementation = _system_arrow_repo_impl, + attrs = { + "prefix": attr.string(mandatory = True), + "include_dir": attr.string(mandatory = True), + "shared_library": attr.string(mandatory = True), + "runtime_glob": attr.string(mandatory = True), + }, + local = True, +) + +def _select_config(ctx): + selected = None + selected_owner = None + root_selected = None + for mod in ctx.modules: + for tag in mod.tags.config: + is_root = hasattr(mod, "is_root") and mod.is_root + if is_root: + if root_selected != None: + fail("cpp_sdk.config may only be declared once in the root module") + root_selected = tag + continue + if selected == None: + selected = tag + selected_owner = mod.name + elif selected_owner != mod.name: + # Prefer root override. Dependency defaults are tolerated as long + # as they come from a single module. + fail("multiple dependency defaults for cpp_sdk.config without root override") + if root_selected != None: + return root_selected + return selected + +def _cpp_sdk_impl(ctx): + tag = _select_config(ctx) + if tag == None: + return + + if tag.mode == "registry": + return + + if tag.mode == "system": + _system_arrow_repository( + name = "apache_arrow_cpp", + prefix = tag.system_arrow_prefix, + include_dir = tag.system_arrow_include_dir, + shared_library = tag.system_arrow_shared_library, + runtime_glob = tag.system_arrow_runtime_glob, + ) + return + + if tag.mode != "build": + fail("unsupported cpp_sdk mode: %s" % tag.mode) + + arrow_version = _ARROW_BUILD_VERSIONS.get(tag.arrow_cpp_version) + if arrow_version == None: + fail("unsupported arrow_cpp_version for build mode: %s" % tag.arrow_cpp_version) + + http_archive( + name = "apache_arrow_cpp", + urls = arrow_version["urls"], + strip_prefix = arrow_version["strip_prefix"], + integrity = arrow_version["integrity"], + patch_cmds = _ARROW_PATCH_CMDS, + build_file_content = _render_arrow_build_file(tag), + ) + +cpp_sdk = module_extension( + implementation = _cpp_sdk_impl, + tag_classes = { + "config": _config_tag, + }, +) diff --git a/bindings/cpp/MODULE.bazel b/bindings/cpp/examples/bazel-consumer/build/BUILD.bazel similarity index 80% rename from bindings/cpp/MODULE.bazel rename to bindings/cpp/examples/bazel-consumer/build/BUILD.bazel index f75d3e6f..afd35edd 100644 --- a/bindings/cpp/MODULE.bazel +++ b/bindings/cpp/examples/bazel-consumer/build/BUILD.bazel @@ -15,9 +15,11 @@ # specific language governing permissions and limitations # under the License. -module( - name = "fluss_cpp", -) +load("@rules_cc//cc:defs.bzl", "cc_binary") -bazel_dep(name = "rules_cc", version = "0.0.17") -bazel_dep(name = "platforms", version = "0.0.10") +cc_binary( + name = "consumer_build", + srcs = ["main.cc"], + copts = ["-std=c++17"], + deps = ["@fluss-cpp//bindings/cpp:fluss_cpp"], +) diff --git a/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel new file mode 100644 index 00000000..f31165c1 --- /dev/null +++ b/bindings/cpp/examples/bazel-consumer/build/MODULE.bazel @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module(name = "fluss_cpp_consumer_build") + +bazel_dep(name = "rules_cc", version = "0.2.14") +bazel_dep(name = "fluss-cpp", version = "0.1.0") + +# Local override for repository-local validation only. +local_path_override( + module_name = "fluss-cpp", + # Repository root path (the directory containing `bindings/cpp`). + path = "../../../../../", +) + +fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp.config( + mode = "build", + protobuf_version = "3.25.5", + arrow_cpp_version = "19.0.1", + ep_cmake_ranlib = "/usr/bin/ranlib", + ep_cmake_ar = "/usr/bin/ar", + ep_cmake_nm = "/usr/bin/nm", +) +use_repo(fluss_cpp, "apache_arrow_cpp") diff --git a/bindings/cpp/examples/bazel-consumer/build/main.cc b/bindings/cpp/examples/bazel-consumer/build/main.cc new file mode 100644 index 00000000..87e5b682 --- /dev/null +++ b/bindings/cpp/examples/bazel-consumer/build/main.cc @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "fluss.hpp" + +#include + +int main() { + fluss::TablePath table_path("demo_db", "demo_table"); + std::cout << "Bazel build-mode dependency example ready: " + << table_path.ToString() << std::endl; + return 0; +} + diff --git a/bindings/cpp/examples/bazel-consumer/system/BUILD.bazel b/bindings/cpp/examples/bazel-consumer/system/BUILD.bazel new file mode 100644 index 00000000..2f24e6de --- /dev/null +++ b/bindings/cpp/examples/bazel-consumer/system/BUILD.bazel @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +load("@rules_cc//cc:defs.bzl", "cc_binary") + +cc_binary( + name = "consumer_system", + srcs = ["main.cc"], + copts = ["-std=c++17"], + deps = ["@fluss-cpp//bindings/cpp:fluss_cpp"], +) diff --git a/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel new file mode 100644 index 00000000..2a4d6a65 --- /dev/null +++ b/bindings/cpp/examples/bazel-consumer/system/MODULE.bazel @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module(name = "fluss_cpp_consumer_system") + +bazel_dep(name = "rules_cc", version = "0.2.14") +bazel_dep(name = "fluss-cpp", version = "0.1.0") + +# Repository-local example path (repository root containing `bindings/cpp`). +# If you copy this example out of tree, replace this with an absolute path +# (for example: /path/to/fluss-rust). +local_path_override( + module_name = "fluss-cpp", + path = "../../../../../", +) + +# Intended interface for preinstalled protoc + Arrow C++ environments. +fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp.config( + mode = "system", + protobuf_version = "3.25.5", + arrow_cpp_version = "19.0.1", + # Adjust these paths for your environment. + # Ubuntu 22.04 (apt / custom package) commonly uses lib/x86_64-linux-gnu. + system_arrow_prefix = "/usr", + system_arrow_include_dir = "include", + system_arrow_shared_library = "lib/x86_64-linux-gnu/libarrow.so", + system_arrow_runtime_glob = "lib/x86_64-linux-gnu/libarrow.so*", +) +use_repo(fluss_cpp, "apache_arrow_cpp") diff --git a/bindings/cpp/examples/bazel-consumer/system/main.cc b/bindings/cpp/examples/bazel-consumer/system/main.cc new file mode 100644 index 00000000..b1f0b70b --- /dev/null +++ b/bindings/cpp/examples/bazel-consumer/system/main.cc @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "fluss.hpp" + +#include + +int main() { + fluss::TablePath table_path("demo_db", "demo_table"); + std::cout << "Bazel system-mode dependency example ready: " + << table_path.ToString() << std::endl; + return 0; +} diff --git a/bindings/cpp/scripts/ensure_protoc.sh b/bindings/cpp/scripts/ensure_protoc.sh new file mode 100755 index 00000000..3210bcc7 --- /dev/null +++ b/bindings/cpp/scripts/ensure_protoc.sh @@ -0,0 +1,277 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euo pipefail + +PROTOBUF_BASELINE_VERSION="${PROTOBUF_BASELINE_VERSION:-3.25.5}" +if [[ -n "${XDG_CACHE_HOME:-}" ]]; then + _PROTOC_DEFAULT_CACHE_BASE="${XDG_CACHE_HOME}" +elif [[ -n "${HOME:-}" ]]; then + _PROTOC_DEFAULT_CACHE_BASE="${HOME}/.cache" +else + _PROTOC_DEFAULT_CACHE_BASE="/tmp" +fi + +_PROTOC_UNAME_S="$(uname -s | tr '[:upper:]' '[:lower:]')" +case "${_PROTOC_UNAME_S}" in + linux*) + _PROTOC_DEFAULT_OS="linux" + ;; + darwin*) + _PROTOC_DEFAULT_OS="osx" + ;; + *) + echo "ERROR: unsupported host OS '${_PROTOC_UNAME_S}'. Please set PROTOC_OS explicitly." >&2 + exit 1 + ;; +esac + +_PROTOC_UNAME_M="$(uname -m)" +case "${_PROTOC_UNAME_M}" in + x86_64|amd64) + _PROTOC_DEFAULT_ARCH="x86_64" + ;; + aarch64|arm64) + _PROTOC_DEFAULT_ARCH="aarch_64" + ;; + *) + echo "ERROR: unsupported host arch '${_PROTOC_UNAME_M}'. Please set PROTOC_ARCH explicitly." >&2 + exit 1 + ;; +esac + +PROTOC_INSTALL_ROOT="${PROTOC_INSTALL_ROOT:-${_PROTOC_DEFAULT_CACHE_BASE}/fluss-cpp-tools}" +PROTOC_OS="${PROTOC_OS:-${_PROTOC_DEFAULT_OS}}" +PROTOC_ARCH="${PROTOC_ARCH:-${_PROTOC_DEFAULT_ARCH}}" +PROTOC_FORCE_INSTALL="${PROTOC_FORCE_INSTALL:-0}" +PROTOC_PRINT_PATH_ONLY="${PROTOC_PRINT_PATH_ONLY:-0}" +PROTOC_ALLOW_INSECURE_DOWNLOAD="${PROTOC_ALLOW_INSECURE_DOWNLOAD:-0}" +PROTOC_SKIP_CHECKSUM_VERIFY="${PROTOC_SKIP_CHECKSUM_VERIFY:-0}" + +usage() { + cat <<'EOF' +Usage: bindings/cpp/scripts/ensure_protoc.sh [--print-path] + +Ensures a protoc binary matching the configured protobuf baseline is available. +Installs into a local cache directory (default: \$XDG_CACHE_HOME/fluss-cpp-tools or +\$HOME/.cache/fluss-cpp-tools) and prints +the protoc path on stdout. + +Env vars: + PROTOBUF_BASELINE_VERSION Baseline protobuf version (default: 3.25.5) + PROTOC_INSTALL_ROOT Local cache root (default: XDG/HOME cache dir) + PROTOC_OS protoc package OS (default: auto-detect host: linux/osx) + PROTOC_ARCH protoc package arch (default: auto-detect host: x86_64/aarch_64) + PROTOC_FORCE_INSTALL 1 to force re-download + PROTOC_ALLOW_INSECURE_DOWNLOAD + 1 to disable TLS verification (not recommended) + PROTOC_SKIP_CHECKSUM_VERIFY + 1 to skip pinned archive checksum verification + BAZEL_PROXY_URL Optional proxy (sets curl/wget proxy envs if present) +EOF +} + +for arg in "$@"; do + case "$arg" in + --print-path) + PROTOC_PRINT_PATH_ONLY=1 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $arg" >&2 + usage >&2 + exit 1 + ;; + esac +done + +setup_proxy_env() { + if [[ -n "${BAZEL_PROXY_URL:-}" ]]; then + export http_proxy="${http_proxy:-$BAZEL_PROXY_URL}" + export https_proxy="${https_proxy:-$BAZEL_PROXY_URL}" + export HTTP_PROXY="${HTTP_PROXY:-$http_proxy}" + export HTTPS_PROXY="${HTTPS_PROXY:-$https_proxy}" + fi +} + +normalize_version_for_protoc_release() { + local v="$1" + # Protobuf release packaging switched from v3.x.y to vX.Y for newer versions. + # For our current agreed baseline (3.25.5), the protoc archive/tag is 25.5. + if [[ "$v" =~ ^3\.([0-9]+\.[0-9]+)$ ]]; then + local stripped="${BASH_REMATCH[1]}" + local major="${stripped%%.*}" + if [[ "$major" -ge 21 ]]; then + echo "$stripped" + return 0 + fi + fi + echo "$v" +} + +version_matches_baseline() { + local actual="$1" + local baseline="$2" + local actual_norm baseline_norm + actual_norm="$(normalize_version_for_protoc_release "$actual")" + baseline_norm="$(normalize_version_for_protoc_release "$baseline")" + [[ "$actual" == "$baseline" || "$actual_norm" == "$baseline_norm" ]] +} + +lookup_protoc_archive_sha256() { + local release_version="$1" + local os="$2" + local arch="$3" + case "${release_version}:${os}:${arch}" in + 25.5:linux:aarch_64) + echo "dc715bb5aab2ebf9653d7d3efbe55e01a035e45c26f391ff6d9b7923e22914b7" + ;; + 25.5:linux:x86_64) + echo "e1ed237a17b2e851cf9662cb5ad02b46e70ff8e060e05984725bc4b4228c6b28" + ;; + 25.5:osx:aarch_64) + echo "781a6fc4c265034872cadc65e63dd3c0fc49245b70917821b60e2d457a6876ab" + ;; + 25.5:osx:x86_64) + echo "c5447e4f0d5caffb18d9ff21eae7bc7faf2bb2000083d6f49e5b6000b30fceae" + ;; + *) + return 1 + ;; + esac +} + +verify_download_sha256() { + local file="$1" + local expected="$2" + local actual="" + if command -v sha256sum >/dev/null 2>&1; then + actual="$(sha256sum "$file" | awk '{print $1}')" + elif command -v shasum >/dev/null 2>&1; then + actual="$(shasum -a 256 "$file" | awk '{print $1}')" + else + echo "ERROR: neither sha256sum nor shasum is available for checksum verification." >&2 + return 1 + fi + if [[ "$actual" != "$expected" ]]; then + echo "ERROR: protoc archive checksum mismatch." >&2 + echo " expected: $expected" >&2 + echo " actual: $actual" >&2 + return 1 + fi +} + +download_file() { + local url="$1" + local out="$2" + + if command -v curl >/dev/null 2>&1; then + local curl_args=(-fL) + if [[ "${PROTOC_ALLOW_INSECURE_DOWNLOAD}" == "1" ]]; then + curl_args+=(-k) + fi + curl "${curl_args[@]}" "$url" -o "$out" + return 0 + fi + + if command -v wget >/dev/null 2>&1; then + local wget_args=() + if [[ -n "${https_proxy:-}" || -n "${http_proxy:-}" ]]; then + wget_args+=(-e use_proxy=yes) + if [[ -n "${https_proxy:-}" ]]; then + wget_args+=(-e "https_proxy=${https_proxy}") + fi + if [[ -n "${http_proxy:-}" ]]; then + wget_args+=(-e "http_proxy=${http_proxy}") + fi + fi + if [[ "${PROTOC_ALLOW_INSECURE_DOWNLOAD}" == "1" ]]; then + wget_args+=(--no-check-certificate) + fi + wget "${wget_args[@]}" -O "$out" "$url" + return 0 + fi + + echo "ERROR: neither curl nor wget is available for downloading protoc." >&2 + return 1 +} + +ensure_zip_tools() { + command -v unzip >/dev/null 2>&1 || { + echo "ERROR: unzip not found." >&2 + exit 1 + } +} + +setup_proxy_env +ensure_zip_tools + +if command -v protoc >/dev/null 2>&1; then + existing_out="$(protoc --version 2>/dev/null || true)" + if [[ "$existing_out" =~ ([0-9]+\.[0-9]+\.[0-9]+) ]]; then + existing_ver="${BASH_REMATCH[1]}" + if version_matches_baseline "$existing_ver" "$PROTOBUF_BASELINE_VERSION"; then + command -v protoc + exit 0 + fi + fi +fi + +PROTOC_RELEASE_VERSION="$(normalize_version_for_protoc_release "$PROTOBUF_BASELINE_VERSION")" +PROTOC_ARCHIVE="protoc-${PROTOC_RELEASE_VERSION}-${PROTOC_OS}-${PROTOC_ARCH}.zip" +PROTOC_URL="https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_RELEASE_VERSION}/${PROTOC_ARCHIVE}" +PROTOC_PREFIX="${PROTOC_INSTALL_ROOT}/protoc-${PROTOC_RELEASE_VERSION}-${PROTOC_OS}-${PROTOC_ARCH}" +PROTOC_BIN="${PROTOC_PREFIX}/bin/protoc" + +if [[ "${PROTOC_FORCE_INSTALL}" != "1" && -x "${PROTOC_BIN}" ]]; then + if [[ "${PROTOC_PRINT_PATH_ONLY}" == "1" ]]; then + echo "${PROTOC_BIN}" + else + echo "${PROTOC_BIN}" + fi + exit 0 +fi + +mkdir -p "${PROTOC_INSTALL_ROOT}" +tmpdir="$(mktemp -d "${PROTOC_INSTALL_ROOT}/.protoc-download.XXXXXX")" +trap 'rm -rf "${tmpdir}"' EXIT + +archive_path="${tmpdir}/${PROTOC_ARCHIVE}" +download_file "${PROTOC_URL}" "${archive_path}" +if [[ "${PROTOC_SKIP_CHECKSUM_VERIFY}" != "1" ]]; then + if expected_sha256="$(lookup_protoc_archive_sha256 "${PROTOC_RELEASE_VERSION}" "${PROTOC_OS}" "${PROTOC_ARCH}")"; then + verify_download_sha256 "${archive_path}" "${expected_sha256}" + else + echo "ERROR: no pinned checksum for protoc archive ${PROTOC_ARCHIVE}. Set PROTOC_SKIP_CHECKSUM_VERIFY=1 to bypass." >&2 + exit 1 + fi +fi + +extract_dir="${tmpdir}/extract" +mkdir -p "${extract_dir}" +unzip -q "${archive_path}" -d "${extract_dir}" + +rm -rf "${PROTOC_PREFIX}" +mkdir -p "${PROTOC_PREFIX}" +cp -a "${extract_dir}/." "${PROTOC_PREFIX}/" +chmod +x "${PROTOC_BIN}" + +echo "${PROTOC_BIN}" diff --git a/docs/cpp-bazel-usage.md b/docs/cpp-bazel-usage.md new file mode 100644 index 00000000..61d861ed --- /dev/null +++ b/docs/cpp-bazel-usage.md @@ -0,0 +1,291 @@ +# Fluss C++ Bazel Usage Guide (System / Build Modes) + +This guide is for: + +- C++ application teams consuming Fluss C++ bindings via Bazel +- Maintainers evolving the Bazel integration + +For the CMake flow with the same `system` / `build` dependency modes, see +`docs/cpp-cmake-usage.md`. + +Current simplification scope: + +- Keep only two dependency modes in the mainline guidance: + - `system` + - `build` +- Defer strict internal-registry-only module flow from the mainline path + +## Scope + +- Dependency model: **root module mode** +- Consumer dependency target: `@fluss-cpp//bindings/cpp:fluss_cpp` +- Root `MODULE.bazel` is required for root module mode. +- Build systems covered by this document: **Bazel** +- Dependency modes covered by this document: **system/build** + +Version baseline references currently used by examples: + +- `protobuf/protoc`: `3.25.5` +- `arrow-cpp`: `19.0.1` + +## Common Consumer `BUILD.bazel` + +Both modes use the same dependency target: + +```starlark +load("@rules_cc//cc:defs.bzl", "cc_binary") + +cc_binary( + name = "fluss_reader", + srcs = ["reader.cc"], + deps = ["@fluss-cpp//bindings/cpp:fluss_cpp"], +) +``` + +## Mode 1: `system` (Recommended in preinstalled environments) + +Use this mode when your environment already provides: + +- `protoc` +- Arrow C++ (headers + shared libraries) + +### Consumer `MODULE.bazel` (pattern) + +```starlark +module(name = "my_cpp_app") + +bazel_dep(name = "rules_cc", version = "0.2.14") +bazel_dep(name = "fluss-cpp", version = "") + +fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp.config( + mode = "system", + protobuf_version = "3.25.5", + arrow_cpp_version = "19.0.1", + # Adjust Arrow paths for your environment + system_arrow_prefix = "/usr", + system_arrow_include_dir = "include", + system_arrow_shared_library = "lib/x86_64-linux-gnu/libarrow.so", + system_arrow_runtime_glob = "lib/x86_64-linux-gnu/libarrow.so*", +) +use_repo(fluss_cpp, "apache_arrow_cpp") +``` + +### Build and run (consumer workspace pattern) + +Run from your consumer workspace root (the directory containing +`MODULE.bazel` and your top-level `BUILD.bazel`). + +```bash +PROTOC_BIN="$(command -v protoc)" +CARGO_BIN="$(command -v cargo)" +bazel run \ + --action_env=PROTOC="$PROTOC_BIN" \ + --action_env=CARGO="$CARGO_BIN" \ + --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \ + //:fluss_reader +``` + +### Runnable example + +- `bindings/cpp/examples/bazel-consumer/system` + +```bash +cd bindings/cpp/examples/bazel-consumer/system +PROTOC_BIN="$(command -v protoc)" +CARGO_BIN="$(command -v cargo)" +bazel run \ + --action_env=PROTOC="$PROTOC_BIN" \ + --action_env=CARGO="$CARGO_BIN" \ + --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \ + //:consumer_system +``` + +## Mode 2: `build` (No internal registry / no preinstalled Arrow) + +Use this mode when Arrow C++ is not preinstalled and you want Bazel to +provision it from source. + +### Consumer `MODULE.bazel` (pattern) + +```starlark +module(name = "my_cpp_app") + +bazel_dep(name = "rules_cc", version = "0.2.14") +bazel_dep(name = "fluss-cpp", version = "") + +fluss_cpp = use_extension("@fluss-cpp//bindings/cpp/bazel/cpp:deps.bzl", "cpp_sdk") +fluss_cpp.config( + mode = "build", + protobuf_version = "3.25.5", + arrow_cpp_version = "19.0.1", +) +use_repo(fluss_cpp, "apache_arrow_cpp") +``` + +Notes: + +- `build` mode in the core Bazel integration still uses `PROTOC` (env / PATH). +- To auto-download a pinned `protoc` for `build` mode, use + `bindings/cpp/scripts/ensure_protoc.sh` and pass the result via `--action_env=PROTOC=...`. +- `ensure_protoc.sh` auto-detects host OS/arch (`linux`/`osx`, `x86_64`/`aarch_64`). +- Some environments may require `ep_cmake_ar/ranlib/nm` overrides. + +### Build and run (consumer workspace pattern, with auto-downloaded `protoc`) + +Run from the `fluss-rust` repository root, or adjust the script path if you +copied it elsewhere. + +```bash +PROTOC_BIN="$(bash bindings/cpp/scripts/ensure_protoc.sh --print-path)" +``` + +```bash +bazel run --action_env=PROTOC="$PROTOC_BIN" //:fluss_reader +``` + +If `cargo` is not on Bazel action `PATH`, also pass: + +```bash +CARGO_BIN="$(command -v cargo)" +bazel run \ + --action_env=PROTOC="$PROTOC_BIN" \ + --action_env=CARGO="$CARGO_BIN" \ + --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \ + //:fluss_reader +``` + +### Runnable example + +- `bindings/cpp/examples/bazel-consumer/build` + +```bash +cd bindings/cpp/examples/bazel-consumer/build +PROTOC_BIN="$(bash ../../../scripts/ensure_protoc.sh --print-path)" +CARGO_BIN="$(command -v cargo)" +bazel run \ + --action_env=PROTOC="$PROTOC_BIN" \ + --action_env=CARGO="$CARGO_BIN" \ + --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \ + //:consumer_build +``` + +## Local Development Override (Optional) + +For repository-local validation only: + +```starlark +local_path_override( + module_name = "fluss-cpp", + path = "/path/to/fluss-rust", +) +``` + +Do not keep local overrides in long-lived branches. + +Repository-local examples in this repo use `version = "0.1.0"` together with +`local_path_override(...)` for local validation before publishing to the Bazel +registry. + +## Repository-local Validation (Direct Commands) + +These commands validate the repository examples directly. +If your environment requires a proxy for Bazel external downloads, export it +before running (replace the placeholder URL with your actual proxy): + +```bash +export BAZEL_PROXY_URL="http://proxy.example.com:3128" +export http_proxy="$BAZEL_PROXY_URL" +export https_proxy="$BAZEL_PROXY_URL" +export HTTP_PROXY="$http_proxy" +export HTTPS_PROXY="$https_proxy" +unset all_proxy ALL_PROXY +``` + +### Validate `build` example + +```bash +cd bindings/cpp/examples/bazel-consumer/build +PROTOC_BIN="$(bash ../../../scripts/ensure_protoc.sh --print-path)" +CARGO_BIN="$(command -v cargo)" +bazel --ignore_all_rc_files run \ + --registry=https://bcr.bazel.build \ + --lockfile_mode=off \ + --repo_env=http_proxy="${http_proxy:-}" \ + --repo_env=https_proxy="${https_proxy:-}" \ + --repo_env=HTTP_PROXY="${HTTP_PROXY:-}" \ + --repo_env=HTTPS_PROXY="${HTTPS_PROXY:-}" \ + --action_env=http_proxy="${http_proxy:-}" \ + --action_env=https_proxy="${https_proxy:-}" \ + --action_env=HTTP_PROXY="${HTTP_PROXY:-}" \ + --action_env=HTTPS_PROXY="${HTTPS_PROXY:-}" \ + --action_env=all_proxy= \ + --action_env=ALL_PROXY= \ + --action_env=PROTOC="$PROTOC_BIN" \ + --action_env=CARGO="$CARGO_BIN" \ + --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \ + --strategy=CcCmakeMakeRule=local \ + --strategy=BootstrapGNUMake=local \ + --strategy=BootstrapPkgConfig=local \ + //:consumer_build +``` + +### Validate `system` example (using a local Arrow prefix) + +The `system` example defaults to `/usr`. If your Arrow prefix is elsewhere +(for example a locally built prefix), copy the example to a temp directory and +patch `MODULE.bazel` before running: + +```bash +tmp_dir="$(mktemp -d /tmp/fluss-bazel-system-doc.XXXXXX)" +FLUSS_RUST_ROOT="$(pwd)" +cp -a bindings/cpp/examples/bazel-consumer/system/. "$tmp_dir/" +sed -i \ + -e "s|path = \"../../../../../\"|path = \"$FLUSS_RUST_ROOT\"|" \ + -e 's|system_arrow_prefix = "/usr"|system_arrow_prefix = "/tmp/fluss-system-arrow-19.0.1"|' \ + -e 's|system_arrow_shared_library = "lib/x86_64-linux-gnu/libarrow.so"|system_arrow_shared_library = "lib/libarrow.so"|' \ + -e 's|system_arrow_runtime_glob = "lib/x86_64-linux-gnu/libarrow.so\\*"|system_arrow_runtime_glob = "lib/libarrow.so*"|' \ + "$tmp_dir/MODULE.bazel" +cd "$tmp_dir" +PROTOC_BIN="$(command -v protoc)" +CARGO_BIN="$(command -v cargo)" +bazel --ignore_all_rc_files run \ + --registry=https://bcr.bazel.build \ + --lockfile_mode=off \ + --repo_env=http_proxy="${http_proxy:-}" \ + --repo_env=https_proxy="${https_proxy:-}" \ + --repo_env=HTTP_PROXY="${HTTP_PROXY:-}" \ + --repo_env=HTTPS_PROXY="${HTTPS_PROXY:-}" \ + --action_env=http_proxy="${http_proxy:-}" \ + --action_env=https_proxy="${https_proxy:-}" \ + --action_env=HTTP_PROXY="${HTTP_PROXY:-}" \ + --action_env=HTTPS_PROXY="${HTTPS_PROXY:-}" \ + --action_env=all_proxy= \ + --action_env=ALL_PROXY= \ + --action_env=PROTOC="$PROTOC_BIN" \ + --action_env=CARGO="$CARGO_BIN" \ + --action_env=PATH="$(dirname "$CARGO_BIN"):$PATH" \ + //:consumer_system +``` + +On macOS (BSD `sed`), replace `sed -i` with `sed -i ''` in the patch step above. + +## Upgrade Procedure + +1. Update `bazel_dep(name = "fluss-cpp", version = "...")` +2. Update mode version settings if needed (`protobuf_version`, `arrow_cpp_version`) +3. Run `bazel mod tidy` +4. Commit `MODULE.bazel` and `MODULE.bazel.lock` +5. Run build + tests +6. Verify dependency graph: + +```bash +bazel mod graph | rg "fluss-cpp@" +``` + +## Examples and Non-Mainline References + +Mainline examples: + +- `bindings/cpp/examples/bazel-consumer/build` +- `bindings/cpp/examples/bazel-consumer/system` diff --git a/docs/cpp-cmake-usage.md b/docs/cpp-cmake-usage.md new file mode 100644 index 00000000..3002d1c4 --- /dev/null +++ b/docs/cpp-cmake-usage.md @@ -0,0 +1,129 @@ +# Fluss C++ CMake Usage Guide (System / Build Modes) + +## Audience + +- C++ application teams building `bindings/cpp` with CMake +- Maintainers evolving Fluss C++ dependency provisioning + +## Scope + +- Build system covered by this document: **CMake** +- Dependency modes covered by this document: **system/build** + +Current tested baselines: + +- `protoc`: `3.25.5` +- `arrow-cpp`: `19.0.1` + +Notes: + +- CMake currently warns (does not fail) when local `protoc`/Arrow versions differ from the baselines. +- `protoc` is required because Rust `prost-build` runs during the C++ build. + +## Common Prerequisites + +- Rust toolchain (`cargo` in `PATH`, or set `CARGO=/path/to/cargo`) +- `protoc` in `PATH` (required for `system` mode; `build` mode can auto-download via `bindings/cpp/scripts/ensure_protoc.sh`) +- C++17 compiler +- CMake 3.22+ + +Examples below use `bindings/cpp` as the source directory. + +## Mode 1: `system` + +Use this mode when the environment already provides Arrow C++. + +### Configure + +```bash +cmake -S bindings/cpp -B /tmp/fluss-cpp-cmake-system \ + -DFLUSS_CPP_DEP_MODE=system \ + -DFLUSS_CPP_ARROW_SYSTEM_ROOT=/path/to/arrow/prefix +``` + +Typical prefixes: + +- Ubuntu package install: `/usr` +- Custom install prefix: `/usr/local` or `/opt/arrow` + +### Build + +```bash +cmake --build /tmp/fluss-cpp-cmake-system --target fluss_cpp -j +``` + +## Mode 2: `build` + +Use this mode when Arrow C++ is not preinstalled and CMake should fetch/build it. + +### Configure (with auto-downloaded `protoc`) + +```bash +PROTOC_BIN="$(bash bindings/cpp/scripts/ensure_protoc.sh --print-path)" +export PATH="$(dirname "$PROTOC_BIN"):$PATH" +``` + +Then configure: + +```bash +cmake -S bindings/cpp -B /tmp/fluss-cpp-cmake-build \ + -DFLUSS_CPP_DEP_MODE=build +``` + +Optional overrides: + +- `-DFLUSS_CPP_ARROW_VERSION=19.0.1` +- `-DFLUSS_CPP_ARROW_SOURCE_URL=...` (internal mirror or pinned archive) +- `-DFLUSS_CPP_PROTOBUF_VERSION=3.25.5` (baseline warning only) + +If your environment needs a proxy for CMake/FetchContent downloads, export standard proxy vars before configure/build: + +```bash +export http_proxy=http://host:port +export https_proxy=http://host:port +export HTTP_PROXY="$http_proxy" +export HTTPS_PROXY="$https_proxy" +``` + +### Build + +```bash +cmake --build /tmp/fluss-cpp-cmake-build --target fluss_cpp -j +``` + +This mode is slower on first build because it compiles Arrow C++ from source. + +## Repository-local Validation (Direct Commands) + +### Validate `system` mode + +```bash +PROTOC_BIN="$(bash bindings/cpp/scripts/ensure_protoc.sh --print-path)" +export PATH="$(dirname "$PROTOC_BIN"):$PATH" +cmake -S bindings/cpp -B /tmp/fluss-cpp-cmake-system \ + -DFLUSS_CPP_DEP_MODE=system \ + -DFLUSS_CPP_ARROW_SYSTEM_ROOT=/tmp/fluss-system-arrow-19.0.1 +cmake --build /tmp/fluss-cpp-cmake-system --target fluss_cpp -j +``` + +### Validate `build` mode + +```bash +PROTOC_BIN="$(bash bindings/cpp/scripts/ensure_protoc.sh --print-path)" +export PATH="$(dirname "$PROTOC_BIN"):$PATH" +cmake -S bindings/cpp -B /tmp/fluss-cpp-cmake-build \ + -DFLUSS_CPP_DEP_MODE=build +cmake --build /tmp/fluss-cpp-cmake-build --target fluss_cpp -j +``` + +## Troubleshooting + +- `cargo not found` + - Install Rust toolchain or set `CARGO=/path/to/cargo`. +- `protoc not found` + - Install `protoc` and ensure it is in `PATH`. + - For `build` mode, use `bindings/cpp/scripts/ensure_protoc.sh` and prepend the returned path to `PATH`. +- `arrow/c/bridge.h` not found (build mode) + - Reconfigure after updating to the latest `bindings/cpp/CMakeLists.txt`; build mode now adds Arrow source/build include dirs explicitly. +- Long first build in `build` mode + - Expected. Arrow C++ source build dominates wall time.