From 779f95f708aeff2a0cb93434e5c8c2749881f166 Mon Sep 17 00:00:00 2001 From: NicolasHuertas Date: Mon, 9 Dec 2024 15:36:41 -0500 Subject: [PATCH 1/2] added binary input example --- java/com/engflow/binaryinput/BUILD | 32 +++++++ .../engflow/binaryinput/BenchmarkScript.py | 96 +++++++++++++++++++ java/com/engflow/binaryinput/Main.java | 21 ++++ java/com/engflow/binaryinput/README.md | 52 ++++++++++ 4 files changed, 201 insertions(+) create mode 100644 java/com/engflow/binaryinput/BUILD create mode 100644 java/com/engflow/binaryinput/BenchmarkScript.py create mode 100644 java/com/engflow/binaryinput/Main.java create mode 100644 java/com/engflow/binaryinput/README.md diff --git a/java/com/engflow/binaryinput/BUILD b/java/com/engflow/binaryinput/BUILD new file mode 100644 index 00000000..8252be0e --- /dev/null +++ b/java/com/engflow/binaryinput/BUILD @@ -0,0 +1,32 @@ +load("@rules_java//java:defs.bzl", "java_binary", "java_library") + +NUM_FILES = 100 + +# Generates a number of java files based on the value of NUM_FILES +# Each file is named HelloX.java where X is the number of the file +# Each file contains a class with a greetNum method that prints "Hello" + the number of the file +[genrule( + name = "Hello" + str(x), + outs = ["Hello" + str(x) + ".java"], + cmd_bash = "echo 'package com.engflow.binaryinput;" + "\n" + + "public class Hello" + str(x) + + " { public static void greetNum() { System.out.println(\"Hello " + str(x) + "\"); } }' > $@", +) for x in range(1,NUM_FILES+1)] + +# Generates a java library that contains all the generated java files +[java_library( + name = "genbinary" + str(x), + srcs = [":Hello" + str(x) + ".java" for x in range(1,NUM_FILES+1)], + visibility = ["//visibility:public"], +) for x in range(1,NUM_FILES+1)] + +# Main class +java_binary( + name = "main", + srcs = ["Main.java"], + main_class = "com.engflow.binaryinput.Main", + deps = [ + ":genbinary" + str(x) for x in range(1,NUM_FILES+1) + ], + args = [str(NUM_FILES)], +) \ No newline at end of file diff --git a/java/com/engflow/binaryinput/BenchmarkScript.py b/java/com/engflow/binaryinput/BenchmarkScript.py new file mode 100644 index 00000000..9c61c1f4 --- /dev/null +++ b/java/com/engflow/binaryinput/BenchmarkScript.py @@ -0,0 +1,96 @@ +import subprocess +import uuid +import os +import json + +# Modify the BUILD file to define the number of files +def modify_build_file(num_files): + with open('BUILD', 'r') as file: + lines = file.readlines() + + with open('BUILD', 'w') as file: + for line in lines: + if line.startswith('NUM_FILES'): + file.write(f'NUM_FILES = {num_files}\n') + else: + file.write(line) + +def run_bazel_command(command): + subprocess.run(command, check=True) + +# Read the Bazel profile data +def analyze_bazel_profile(profile_path): + with open(profile_path, 'r') as file: + profile_data = json.load(file) + return profile_data + +# Extract the critical time and total run time from the Bazel profile data +def extract_times(profile_data): + critical_time = 0 + start_time = None + end_time = None + + # Iterate through the events in the profile data to extract the critical time and total run time + for event in profile_data['traceEvents']: + if event.get('cat') == 'critical path component': + critical_time += event['dur'] / 1000000.0 # Convert microseconds to seconds + if event.get('cat') == 'build phase marker' and event['name'] == 'Launch Blaze': + start_time = event['ts'] / 1000000.0 # Convert microseconds to seconds + if event.get('cat') == 'build phase marker' and event['name'] == 'Complete build': + end_time = event['ts'] / 1000000.0 # Convert microseconds to seconds + + # Calculate the total run time + total_run_time = end_time - start_time if start_time and end_time else None + + return critical_time, total_run_time + + +def main(): + num_files = int(input("Enter the number of files: ")) + execution_type = input("Enter the execution type (local/remote): ") + iterations = int(input("Enter the number of iterations: ")) + + modify_build_file(num_files) + + results = [] + + # Path to the Bazel profile data + # Using an absolute path to avoid issues with the Bazel workspace + profile_path = os.path.abspath('profile.json') + + for i in range(iterations): + if execution_type == 'local': + # Clear the Bazel cache + run_bazel_command(['bazel', 'clean', '--expunge']) + # Generate the input files + targets = [f':genbinary{j}' for j in range(1, num_files + 1)] + run_bazel_command(['bazel', 'build'] + targets) + # Build the main target and generate the Bazel profile data + run_bazel_command(['bazel', 'build', f'--profile={profile_path}', ':main']) + elif execution_type == 'remote': + # Generate a unique key for the cache silo + key = str(uuid.uuid4()) + # Generate the input files + targets = [f':genbinary{j}' for j in range(1, num_files + 1)] + run_bazel_command(['bazel', 'build', '--config=engflow', f'--remote_default_exec_properties=cache-silo-key={key}'] + targets) + # Build the main target and generate the Bazel profile data + run_bazel_command(['bazel', 'build', '--config=engflow', f'--profile={profile_path}', f'--remote_default_exec_properties=cache-silo-key={key}', ':main']) + + profile_output = analyze_bazel_profile(profile_path) + critical_time, total_run_time = extract_times(profile_output) + results.append((critical_time, total_run_time)) + + critical_times = [result[0] for result in results] + total_run_times = [result[1] for result in results] + + print(results) + + # Calculate the highest critical time and total run time + critical_time_max = max(critical_times) + total_run_time_max = max(total_run_times) + + print(f'Highest Critical Time: {critical_time_max}') + print(f'Highest Total Run Time: {total_run_time_max}') + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/java/com/engflow/binaryinput/Main.java b/java/com/engflow/binaryinput/Main.java new file mode 100644 index 00000000..d430ce2f --- /dev/null +++ b/java/com/engflow/binaryinput/Main.java @@ -0,0 +1,21 @@ +package com.engflow.binaryinput; + +import java.lang.reflect.InvocationTargetException; + +public class Main { + public static void main(String[] args) { + try { + // args[0] is the number of files to read + int numFiles = Integer.parseInt(args[0]); + + // Load and run the greetNum method from each class + for(int i = 1; i <= numFiles; i++){ + Class clazz = Class.forName("com.engflow.binaryinput.Hello" + i); + clazz.getMethod("greetNum").invoke(null); + } + + } catch (ClassNotFoundException | InvocationTargetException | IllegalAccessException | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } +} diff --git a/java/com/engflow/binaryinput/README.md b/java/com/engflow/binaryinput/README.md new file mode 100644 index 00000000..55cba586 --- /dev/null +++ b/java/com/engflow/binaryinput/README.md @@ -0,0 +1,52 @@ +# Multiple Binary Input Example + +## Overview + +The goal of this example project is to test the performance of Engflow's remote execution and caching service based on the number of input binary files in the dependency graph. The project contains a `genrule` that generates a specified number of Java binaries for the `genbinary` Java library, which are then listed as dependencies in the main binary. The `Main.java` file loops through each generated class and calls its `greetNum` method. + +## Project Structure + +- `java/com/engflow/binaryinput/Main.java`: Main class that dynamically loads and invokes methods from generated classes. +- `java/com/engflow/binaryinput/BUILD`: Bazel build file for the `main` java binary and the `genbinary` library. + +## Usage + +To generate the test files, build the `genbinary` library using the `genrule`: +```sh +bazel build //java/com/engflow/binaryinput:genbinary +``` + +Then, the program can be run with the following command: +```sh +bazel run //java/com/engflow/binaryinput:main +``` + +## How It Works + +1. **Generation of Java Binaries:** + - The `genrule` in the `BUILD` file generates a specified number of Java classes (`Hello1.java`, `Hello2.java`, ..., `HelloN.java`). + - Each generated class contains a `greetNum` method that prints a unique message. + +2. **Main Class Execution:** + - The `Main.java` file in `binaryinput` dynamically loads each generated class using reflection. + - It then invokes the `greetNum` method of each class, printing the corresponding message. + +## Configuration + +The number of generated files is controlled by the `NUM_FILES` variable in the `BUILD` file of the `binaryinput` package. Modify this variable to change the number of generated classes and observe the performance impact on Engflow's remote execution and caching service. + +## Example + +To generate and run the program with 10 input binary files: + +1. Set `NUM_FILES` to 10 in `java/com/engflow/binaryinput/BUILD`. +2. Build the `genbinary` library: + ```sh + bazel build //java/com/engflow/binaryinput:genbinary + ``` +3. Run the `main` binary: + ```sh + bazel run //java/com/engflow/binaryinput:main + ``` + +This will generate 10 Java classes, build the `genbinary` library, and run the `main` binary, which will print messages from each generated class. \ No newline at end of file From ee9e280447ec8cec4188addb06f6749ee5d636ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20Felipe=20Barco?= Date: Mon, 16 Dec 2024 12:04:54 -0500 Subject: [PATCH 2/2] no py --- .../engflow/binaryinput/BenchmarkScript.py | 96 ------------------- 1 file changed, 96 deletions(-) delete mode 100644 java/com/engflow/binaryinput/BenchmarkScript.py diff --git a/java/com/engflow/binaryinput/BenchmarkScript.py b/java/com/engflow/binaryinput/BenchmarkScript.py deleted file mode 100644 index 9c61c1f4..00000000 --- a/java/com/engflow/binaryinput/BenchmarkScript.py +++ /dev/null @@ -1,96 +0,0 @@ -import subprocess -import uuid -import os -import json - -# Modify the BUILD file to define the number of files -def modify_build_file(num_files): - with open('BUILD', 'r') as file: - lines = file.readlines() - - with open('BUILD', 'w') as file: - for line in lines: - if line.startswith('NUM_FILES'): - file.write(f'NUM_FILES = {num_files}\n') - else: - file.write(line) - -def run_bazel_command(command): - subprocess.run(command, check=True) - -# Read the Bazel profile data -def analyze_bazel_profile(profile_path): - with open(profile_path, 'r') as file: - profile_data = json.load(file) - return profile_data - -# Extract the critical time and total run time from the Bazel profile data -def extract_times(profile_data): - critical_time = 0 - start_time = None - end_time = None - - # Iterate through the events in the profile data to extract the critical time and total run time - for event in profile_data['traceEvents']: - if event.get('cat') == 'critical path component': - critical_time += event['dur'] / 1000000.0 # Convert microseconds to seconds - if event.get('cat') == 'build phase marker' and event['name'] == 'Launch Blaze': - start_time = event['ts'] / 1000000.0 # Convert microseconds to seconds - if event.get('cat') == 'build phase marker' and event['name'] == 'Complete build': - end_time = event['ts'] / 1000000.0 # Convert microseconds to seconds - - # Calculate the total run time - total_run_time = end_time - start_time if start_time and end_time else None - - return critical_time, total_run_time - - -def main(): - num_files = int(input("Enter the number of files: ")) - execution_type = input("Enter the execution type (local/remote): ") - iterations = int(input("Enter the number of iterations: ")) - - modify_build_file(num_files) - - results = [] - - # Path to the Bazel profile data - # Using an absolute path to avoid issues with the Bazel workspace - profile_path = os.path.abspath('profile.json') - - for i in range(iterations): - if execution_type == 'local': - # Clear the Bazel cache - run_bazel_command(['bazel', 'clean', '--expunge']) - # Generate the input files - targets = [f':genbinary{j}' for j in range(1, num_files + 1)] - run_bazel_command(['bazel', 'build'] + targets) - # Build the main target and generate the Bazel profile data - run_bazel_command(['bazel', 'build', f'--profile={profile_path}', ':main']) - elif execution_type == 'remote': - # Generate a unique key for the cache silo - key = str(uuid.uuid4()) - # Generate the input files - targets = [f':genbinary{j}' for j in range(1, num_files + 1)] - run_bazel_command(['bazel', 'build', '--config=engflow', f'--remote_default_exec_properties=cache-silo-key={key}'] + targets) - # Build the main target and generate the Bazel profile data - run_bazel_command(['bazel', 'build', '--config=engflow', f'--profile={profile_path}', f'--remote_default_exec_properties=cache-silo-key={key}', ':main']) - - profile_output = analyze_bazel_profile(profile_path) - critical_time, total_run_time = extract_times(profile_output) - results.append((critical_time, total_run_time)) - - critical_times = [result[0] for result in results] - total_run_times = [result[1] for result in results] - - print(results) - - # Calculate the highest critical time and total run time - critical_time_max = max(critical_times) - total_run_time_max = max(total_run_times) - - print(f'Highest Critical Time: {critical_time_max}') - print(f'Highest Total Run Time: {total_run_time_max}') - -if __name__ == '__main__': - main() \ No newline at end of file