From 779f95f708aeff2a0cb93434e5c8c2749881f166 Mon Sep 17 00:00:00 2001 From: NicolasHuertas Date: Mon, 9 Dec 2024 15:36:41 -0500 Subject: [PATCH 1/5] added binary input example --- java/com/engflow/binaryinput/BUILD | 32 +++++++ .../engflow/binaryinput/BenchmarkScript.py | 96 +++++++++++++++++++ java/com/engflow/binaryinput/Main.java | 21 ++++ java/com/engflow/binaryinput/README.md | 52 ++++++++++ 4 files changed, 201 insertions(+) create mode 100644 java/com/engflow/binaryinput/BUILD create mode 100644 java/com/engflow/binaryinput/BenchmarkScript.py create mode 100644 java/com/engflow/binaryinput/Main.java create mode 100644 java/com/engflow/binaryinput/README.md diff --git a/java/com/engflow/binaryinput/BUILD b/java/com/engflow/binaryinput/BUILD new file mode 100644 index 00000000..8252be0e --- /dev/null +++ b/java/com/engflow/binaryinput/BUILD @@ -0,0 +1,32 @@ +load("@rules_java//java:defs.bzl", "java_binary", "java_library") + +NUM_FILES = 100 + +# Generates a number of java files based on the value of NUM_FILES +# Each file is named HelloX.java where X is the number of the file +# Each file contains a class with a greetNum method that prints "Hello" + the number of the file +[genrule( + name = "Hello" + str(x), + outs = ["Hello" + str(x) + ".java"], + cmd_bash = "echo 'package com.engflow.binaryinput;" + "\n" + + "public class Hello" + str(x) + + " { public static void greetNum() { System.out.println(\"Hello " + str(x) + "\"); } }' > $@", +) for x in range(1,NUM_FILES+1)] + +# Generates a java library that contains all the generated java files +[java_library( + name = "genbinary" + str(x), + srcs = [":Hello" + str(x) + ".java" for x in range(1,NUM_FILES+1)], + visibility = ["//visibility:public"], +) for x in range(1,NUM_FILES+1)] + +# Main class +java_binary( + name = "main", + srcs = ["Main.java"], + main_class = "com.engflow.binaryinput.Main", + deps = [ + ":genbinary" + str(x) for x in range(1,NUM_FILES+1) + ], + args = [str(NUM_FILES)], +) \ No newline at end of file diff --git a/java/com/engflow/binaryinput/BenchmarkScript.py b/java/com/engflow/binaryinput/BenchmarkScript.py new file mode 100644 index 00000000..9c61c1f4 --- /dev/null +++ b/java/com/engflow/binaryinput/BenchmarkScript.py @@ -0,0 +1,96 @@ +import subprocess +import uuid +import os +import json + +# Modify the BUILD file to define the number of files +def modify_build_file(num_files): + with open('BUILD', 'r') as file: + lines = file.readlines() + + with open('BUILD', 'w') as file: + for line in lines: + if line.startswith('NUM_FILES'): + file.write(f'NUM_FILES = {num_files}\n') + else: + file.write(line) + +def run_bazel_command(command): + subprocess.run(command, check=True) + +# Read the Bazel profile data +def analyze_bazel_profile(profile_path): + with open(profile_path, 'r') as file: + profile_data = json.load(file) + return profile_data + +# Extract the critical time and total run time from the Bazel profile data +def extract_times(profile_data): + critical_time = 0 + start_time = None + end_time = None + + # Iterate through the events in the profile data to extract the critical time and total run time + for event in profile_data['traceEvents']: + if event.get('cat') == 'critical path component': + critical_time += event['dur'] / 1000000.0 # Convert microseconds to seconds + if event.get('cat') == 'build phase marker' and event['name'] == 'Launch Blaze': + start_time = event['ts'] / 1000000.0 # Convert microseconds to seconds + if event.get('cat') == 'build phase marker' and event['name'] == 'Complete build': + end_time = event['ts'] / 1000000.0 # Convert microseconds to seconds + + # Calculate the total run time + total_run_time = end_time - start_time if start_time and end_time else None + + return critical_time, total_run_time + + +def main(): + num_files = int(input("Enter the number of files: ")) + execution_type = input("Enter the execution type (local/remote): ") + iterations = int(input("Enter the number of iterations: ")) + + modify_build_file(num_files) + + results = [] + + # Path to the Bazel profile data + # Using an absolute path to avoid issues with the Bazel workspace + profile_path = os.path.abspath('profile.json') + + for i in range(iterations): + if execution_type == 'local': + # Clear the Bazel cache + run_bazel_command(['bazel', 'clean', '--expunge']) + # Generate the input files + targets = [f':genbinary{j}' for j in range(1, num_files + 1)] + run_bazel_command(['bazel', 'build'] + targets) + # Build the main target and generate the Bazel profile data + run_bazel_command(['bazel', 'build', f'--profile={profile_path}', ':main']) + elif execution_type == 'remote': + # Generate a unique key for the cache silo + key = str(uuid.uuid4()) + # Generate the input files + targets = [f':genbinary{j}' for j in range(1, num_files + 1)] + run_bazel_command(['bazel', 'build', '--config=engflow', f'--remote_default_exec_properties=cache-silo-key={key}'] + targets) + # Build the main target and generate the Bazel profile data + run_bazel_command(['bazel', 'build', '--config=engflow', f'--profile={profile_path}', f'--remote_default_exec_properties=cache-silo-key={key}', ':main']) + + profile_output = analyze_bazel_profile(profile_path) + critical_time, total_run_time = extract_times(profile_output) + results.append((critical_time, total_run_time)) + + critical_times = [result[0] for result in results] + total_run_times = [result[1] for result in results] + + print(results) + + # Calculate the highest critical time and total run time + critical_time_max = max(critical_times) + total_run_time_max = max(total_run_times) + + print(f'Highest Critical Time: {critical_time_max}') + print(f'Highest Total Run Time: {total_run_time_max}') + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/java/com/engflow/binaryinput/Main.java b/java/com/engflow/binaryinput/Main.java new file mode 100644 index 00000000..d430ce2f --- /dev/null +++ b/java/com/engflow/binaryinput/Main.java @@ -0,0 +1,21 @@ +package com.engflow.binaryinput; + +import java.lang.reflect.InvocationTargetException; + +public class Main { + public static void main(String[] args) { + try { + // args[0] is the number of files to read + int numFiles = Integer.parseInt(args[0]); + + // Load and run the greetNum method from each class + for(int i = 1; i <= numFiles; i++){ + Class clazz = Class.forName("com.engflow.binaryinput.Hello" + i); + clazz.getMethod("greetNum").invoke(null); + } + + } catch (ClassNotFoundException | InvocationTargetException | IllegalAccessException | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } +} diff --git a/java/com/engflow/binaryinput/README.md b/java/com/engflow/binaryinput/README.md new file mode 100644 index 00000000..55cba586 --- /dev/null +++ b/java/com/engflow/binaryinput/README.md @@ -0,0 +1,52 @@ +# Multiple Binary Input Example + +## Overview + +The goal of this example project is to test the performance of Engflow's remote execution and caching service based on the number of input binary files in the dependency graph. The project contains a `genrule` that generates a specified number of Java binaries for the `genbinary` Java library, which are then listed as dependencies in the main binary. The `Main.java` file loops through each generated class and calls its `greetNum` method. + +## Project Structure + +- `java/com/engflow/binaryinput/Main.java`: Main class that dynamically loads and invokes methods from generated classes. +- `java/com/engflow/binaryinput/BUILD`: Bazel build file for the `main` java binary and the `genbinary` library. + +## Usage + +To generate the test files, build the `genbinary` library using the `genrule`: +```sh +bazel build //java/com/engflow/binaryinput:genbinary +``` + +Then, the program can be run with the following command: +```sh +bazel run //java/com/engflow/binaryinput:main +``` + +## How It Works + +1. **Generation of Java Binaries:** + - The `genrule` in the `BUILD` file generates a specified number of Java classes (`Hello1.java`, `Hello2.java`, ..., `HelloN.java`). + - Each generated class contains a `greetNum` method that prints a unique message. + +2. **Main Class Execution:** + - The `Main.java` file in `binaryinput` dynamically loads each generated class using reflection. + - It then invokes the `greetNum` method of each class, printing the corresponding message. + +## Configuration + +The number of generated files is controlled by the `NUM_FILES` variable in the `BUILD` file of the `binaryinput` package. Modify this variable to change the number of generated classes and observe the performance impact on Engflow's remote execution and caching service. + +## Example + +To generate and run the program with 10 input binary files: + +1. Set `NUM_FILES` to 10 in `java/com/engflow/binaryinput/BUILD`. +2. Build the `genbinary` library: + ```sh + bazel build //java/com/engflow/binaryinput:genbinary + ``` +3. Run the `main` binary: + ```sh + bazel run //java/com/engflow/binaryinput:main + ``` + +This will generate 10 Java classes, build the `genbinary` library, and run the `main` binary, which will print messages from each generated class. \ No newline at end of file From f5d30ad1000f140cb74173dfa7d0174b5d8c767a Mon Sep 17 00:00:00 2001 From: Huertas Cadavid Nicolas Fernando <98675931+NicolasHuertas@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:32:39 -0500 Subject: [PATCH 2/5] Suggested change to README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Andrés Felipe Barco Santa --- java/com/engflow/binaryinput/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/com/engflow/binaryinput/README.md b/java/com/engflow/binaryinput/README.md index 55cba586..5d5bce07 100644 --- a/java/com/engflow/binaryinput/README.md +++ b/java/com/engflow/binaryinput/README.md @@ -2,7 +2,7 @@ ## Overview -The goal of this example project is to test the performance of Engflow's remote execution and caching service based on the number of input binary files in the dependency graph. The project contains a `genrule` that generates a specified number of Java binaries for the `genbinary` Java library, which are then listed as dependencies in the main binary. The `Main.java` file loops through each generated class and calls its `greetNum` method. +The example contains a `genrule` that generates a specified number of Java binaries for the `genbinary` Java library, which are then listed as dependencies in the main binary. In this way we can study the relation between number of inputs (dependendencies) and build time. ## Project Structure From 41e80788edfc5f7141bbe2a37c2f44848790b8f9 Mon Sep 17 00:00:00 2001 From: Huertas Cadavid Nicolas Fernando <98675931+NicolasHuertas@users.noreply.github.com> Date: Thu, 19 Dec 2024 23:06:31 -0500 Subject: [PATCH 3/5] Added README recommendations and usage for BenchmarkScript.py --- java/com/engflow/binaryinput/README.md | 50 +++++++++++++++++--------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/java/com/engflow/binaryinput/README.md b/java/com/engflow/binaryinput/README.md index 5d5bce07..22c793d7 100644 --- a/java/com/engflow/binaryinput/README.md +++ b/java/com/engflow/binaryinput/README.md @@ -1,33 +1,34 @@ # Multiple Binary Input Example -## Overview - -The example contains a `genrule` that generates a specified number of Java binaries for the `genbinary` Java library, which are then listed as dependencies in the main binary. In this way we can study the relation between number of inputs (dependendencies) and build time. - -## Project Structure - -- `java/com/engflow/binaryinput/Main.java`: Main class that dynamically loads and invokes methods from generated classes. -- `java/com/engflow/binaryinput/BUILD`: Bazel build file for the `main` java binary and the `genbinary` library. - ## Usage +Set the `NUM_FILES` variable in the BUILD file to the desired input size. To generate the test files, build the `genbinary` library using the `genrule`: ```sh -bazel build //java/com/engflow/binaryinput:genbinary +bazel build //java/com/engflow/binaryinput:genbinary{1..} ``` -Then, the program can be run with the following command: +Then, the program can be built with the following command: ```sh -bazel run //java/com/engflow/binaryinput:main +bazel build //java/com/engflow/binaryinput:main ``` +Alternatively, use run the `BenchmarkScript.py` script. +Make sure that the `.bazelrc` file in the main directory is properly set up for remote execution using MyEngflow for building remmotely. +Write the desired values in the console input prompt. + ## How It Works 1. **Generation of Java Binaries:** - The `genrule` in the `BUILD` file generates a specified number of Java classes (`Hello1.java`, `Hello2.java`, ..., `HelloN.java`). - Each generated class contains a `greetNum` method that prints a unique message. + - A java library is created for each file (`Hello1.jar`, `Hello2.jar`, ..., `HelloN.jar`). + +2. **Building the main target:** + - The previously created libraries are added to the main class as dependencies through a for loop. + - The consistent naming scheme of the libraries simplifies their inclusion in the build process. -2. **Main Class Execution:** +3. **Main Class Execution:** - The `Main.java` file in `binaryinput` dynamically loads each generated class using reflection. - It then invokes the `greetNum` method of each class, printing the corresponding message. @@ -42,11 +43,26 @@ To generate and run the program with 10 input binary files: 1. Set `NUM_FILES` to 10 in `java/com/engflow/binaryinput/BUILD`. 2. Build the `genbinary` library: ```sh - bazel build //java/com/engflow/binaryinput:genbinary + bazel build //java/com/engflow/binaryinput:genbinary{1..10} ``` -3. Run the `main` binary: +3. Build the `main` binary: ```sh - bazel run //java/com/engflow/binaryinput:main + bazel build //java/com/engflow/binaryinput:main + ``` + +This will generate 10 Java classes, build the `genbinary` library, and build the `main` binary. Using `bazel run` will also print messages from each generated class. + +To use `BenchmarkScript.py` to execute 5 builds locally: + +1. Run the script. For a linux bash shell: + ```sh + python3 BenchmarkScript.py + ``` +2. In the console: + ```sh + Enter the number of files: 10 + Enter the execution type (local/remote): local + Enter the number of iterations: 5 ``` -This will generate 10 Java classes, build the `genbinary` library, and run the `main` binary, which will print messages from each generated class. \ No newline at end of file +This will clear the bazel cache, generate the input files and their libraries, then build the main target 5 times. Then the longest critical path time and total run time from the 5 runs will be printed in the console. From 57ac595cd47b05ca65e4e34afedbbbae3e00a108 Mon Sep 17 00:00:00 2001 From: Huertas Cadavid Nicolas Fernando <98675931+NicolasHuertas@users.noreply.github.com> Date: Fri, 20 Dec 2024 17:25:06 -0500 Subject: [PATCH 4/5] Deleted BenchmarkScript.py --- .../engflow/binaryinput/BenchmarkScript.py | 96 ------------------- 1 file changed, 96 deletions(-) delete mode 100644 java/com/engflow/binaryinput/BenchmarkScript.py diff --git a/java/com/engflow/binaryinput/BenchmarkScript.py b/java/com/engflow/binaryinput/BenchmarkScript.py deleted file mode 100644 index 9c61c1f4..00000000 --- a/java/com/engflow/binaryinput/BenchmarkScript.py +++ /dev/null @@ -1,96 +0,0 @@ -import subprocess -import uuid -import os -import json - -# Modify the BUILD file to define the number of files -def modify_build_file(num_files): - with open('BUILD', 'r') as file: - lines = file.readlines() - - with open('BUILD', 'w') as file: - for line in lines: - if line.startswith('NUM_FILES'): - file.write(f'NUM_FILES = {num_files}\n') - else: - file.write(line) - -def run_bazel_command(command): - subprocess.run(command, check=True) - -# Read the Bazel profile data -def analyze_bazel_profile(profile_path): - with open(profile_path, 'r') as file: - profile_data = json.load(file) - return profile_data - -# Extract the critical time and total run time from the Bazel profile data -def extract_times(profile_data): - critical_time = 0 - start_time = None - end_time = None - - # Iterate through the events in the profile data to extract the critical time and total run time - for event in profile_data['traceEvents']: - if event.get('cat') == 'critical path component': - critical_time += event['dur'] / 1000000.0 # Convert microseconds to seconds - if event.get('cat') == 'build phase marker' and event['name'] == 'Launch Blaze': - start_time = event['ts'] / 1000000.0 # Convert microseconds to seconds - if event.get('cat') == 'build phase marker' and event['name'] == 'Complete build': - end_time = event['ts'] / 1000000.0 # Convert microseconds to seconds - - # Calculate the total run time - total_run_time = end_time - start_time if start_time and end_time else None - - return critical_time, total_run_time - - -def main(): - num_files = int(input("Enter the number of files: ")) - execution_type = input("Enter the execution type (local/remote): ") - iterations = int(input("Enter the number of iterations: ")) - - modify_build_file(num_files) - - results = [] - - # Path to the Bazel profile data - # Using an absolute path to avoid issues with the Bazel workspace - profile_path = os.path.abspath('profile.json') - - for i in range(iterations): - if execution_type == 'local': - # Clear the Bazel cache - run_bazel_command(['bazel', 'clean', '--expunge']) - # Generate the input files - targets = [f':genbinary{j}' for j in range(1, num_files + 1)] - run_bazel_command(['bazel', 'build'] + targets) - # Build the main target and generate the Bazel profile data - run_bazel_command(['bazel', 'build', f'--profile={profile_path}', ':main']) - elif execution_type == 'remote': - # Generate a unique key for the cache silo - key = str(uuid.uuid4()) - # Generate the input files - targets = [f':genbinary{j}' for j in range(1, num_files + 1)] - run_bazel_command(['bazel', 'build', '--config=engflow', f'--remote_default_exec_properties=cache-silo-key={key}'] + targets) - # Build the main target and generate the Bazel profile data - run_bazel_command(['bazel', 'build', '--config=engflow', f'--profile={profile_path}', f'--remote_default_exec_properties=cache-silo-key={key}', ':main']) - - profile_output = analyze_bazel_profile(profile_path) - critical_time, total_run_time = extract_times(profile_output) - results.append((critical_time, total_run_time)) - - critical_times = [result[0] for result in results] - total_run_times = [result[1] for result in results] - - print(results) - - # Calculate the highest critical time and total run time - critical_time_max = max(critical_times) - total_run_time_max = max(total_run_times) - - print(f'Highest Critical Time: {critical_time_max}') - print(f'Highest Total Run Time: {total_run_time_max}') - -if __name__ == '__main__': - main() \ No newline at end of file From 7e0f693f3771b1bb019f7b467996520f0833c54d Mon Sep 17 00:00:00 2001 From: Huertas Cadavid Nicolas Fernando <98675931+NicolasHuertas@users.noreply.github.com> Date: Fri, 20 Dec 2024 17:25:56 -0500 Subject: [PATCH 5/5] Removed mentions of BenchmarkScript from README --- java/com/engflow/binaryinput/README.md | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/java/com/engflow/binaryinput/README.md b/java/com/engflow/binaryinput/README.md index 22c793d7..e5e93a7e 100644 --- a/java/com/engflow/binaryinput/README.md +++ b/java/com/engflow/binaryinput/README.md @@ -13,10 +13,6 @@ Then, the program can be built with the following command: bazel build //java/com/engflow/binaryinput:main ``` -Alternatively, use run the `BenchmarkScript.py` script. -Make sure that the `.bazelrc` file in the main directory is properly set up for remote execution using MyEngflow for building remmotely. -Write the desired values in the console input prompt. - ## How It Works 1. **Generation of Java Binaries:** @@ -51,18 +47,3 @@ To generate and run the program with 10 input binary files: ``` This will generate 10 Java classes, build the `genbinary` library, and build the `main` binary. Using `bazel run` will also print messages from each generated class. - -To use `BenchmarkScript.py` to execute 5 builds locally: - -1. Run the script. For a linux bash shell: - ```sh - python3 BenchmarkScript.py - ``` -2. In the console: - ```sh - Enter the number of files: 10 - Enter the execution type (local/remote): local - Enter the number of iterations: 5 - ``` - -This will clear the bazel cache, generate the input files and their libraries, then build the main target 5 times. Then the longest critical path time and total run time from the 5 runs will be printed in the console.