diff --git a/.gitignore b/.gitignore index 47be9c4..fb9bede 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ /build*/ /trace/ /scratch/ +/reports/ /.vscode/ /workspace.code-workspace @@ -23,3 +24,5 @@ !/docs/*.svg /include/otter/otter-version.h + +/*.sh diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..f51d0dc --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "modules/pyotter"] + path = modules/pyotter + url = git@github.com:Otter-Taskification/pyotter.git + branch = dev diff --git a/CMakeLists.txt b/CMakeLists.txt index 777cbda..753d55c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,8 +17,11 @@ option(WITH_EXAMPLES "Generate and build examples for demonstrating Otter" OFF) option(WITH_TESTS "Generate and build tests" OFF) option(BUILD_SHARED_LIBS "Build using shared libraries" ON) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) -set(OTF2_INCLUDE_DIR "/opt/otf2/include" CACHE STRING "OTF2 include directory") -set(OTF2_LIB_DIR "/opt/otf2/lib" CACHE STRING "OTF2 library directory") +set(OTF2_INSTALL_DIR "/opt/otf2" CACHE STRING "OTF2 install directory") + +if (CMAKE_C_FLAGS) + message(STATUS "Flags added: \"${CMAKE_C_FLAGS}\"") +endif() # Select default config type if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) @@ -79,7 +82,33 @@ if(NOT "${C_COMPILER_VERSION_MINOR}" STREQUAL "") endif() endif() -message(STATUS "C compiler selected: ${CMAKE_C_COMPILER_ID} ${C_COMPILER_VERSION}") +# Extract CXX compiler version as separate components +execute_process ( + COMMAND bash -c "${CMAKE_CXX_COMPILER} -dumpversion | tr -d '\n' | sed 's/\\./\\;/g'" + OUTPUT_VARIABLE CXX_COMPILER_VERSION_LIST +) +list(GET CXX_COMPILER_VERSION_LIST 0 CXX_COMPILER_VERSION_MAJOR) +list(GET CXX_COMPILER_VERSION_LIST 1 CXX_COMPILER_VERSION_MINOR) +list(GET CXX_COMPILER_VERSION_LIST 2 CXX_COMPILER_VERSION_BUGFIX) + +# Build version string component-wise +set(CXX_COMPILER_VERSION ${CXX_COMPILER_VERSION_MAJOR}) +if(NOT "${CXX_COMPILER_VERSION_MINOR}" STREQUAL "") + set(CXX_COMPILER_VERSION "${CXX_COMPILER_VERSION}.${CXX_COMPILER_VERSION_MINOR}") + if(NOT "${CXX_COMPILER_VERSION_BUGFIX}" STREQUAL "") + set(CXX_COMPILER_VERSION "${CXX_COMPILER_VERSION}.${CXX_COMPILER_VERSION_BUGFIX}") + endif() +endif() + +message(STATUS "C compiler selected: ${CMAKE_C_COMPILER} (${CMAKE_C_COMPILER_ID}) ${C_COMPILER_VERSION}") +message(STATUS "C++ compiler selected: ${CMAKE_CXX_COMPILER} (${CMAKE_CXX_COMPILER_ID}) ${CXX_COMPILER_VERSION}") + +# # # OMPT # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # +# Check for omp-tools.h +CHECK_INCLUDE_FILE(omp-tools.h HAVE_OMP_TOOLS_H) +if(NOT HAVE_OMP_TOOLS_H) + message(WARNING "Can't find omp-tools.h - did you specify an OMPT-supporting compiler?") +endif() # Detect requirement to use 5.1 features instead of deprecated 5.1 features set(USE_OMPT_51 0) @@ -97,123 +126,190 @@ if(CMAKE_C_COMPILER_ID STREQUAL "Intel") endif() elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang") if("${C_COMPILER_VERSION}" VERSION_GREATER "11.1") - message(STATUS "Detected Clang >11") + message(STATUS "Detected Clang >11") set(USE_OMPT_51 1) endif() +elseif(CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM") + if("${C_COMPILER_VERSION}" VERSION_GREATER_EQUAL "13.0.0") + message(STATUS "Detected IntelLLVM >= 13.0.0") + set(USE_OMPT_51 1) + endif() else() - message(WARN "Selected C compiler may not be supported") + message(WARN " Selected C compiler may not be supported:") + message(WARN " CMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID}") + message(WARN " C_COMPILER_VERSION=${C_COMPILER_VERSION}") endif() -# Define library components -add_library(otter-core OBJECT - ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/otter-core.c - ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/otter-entry.c - ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/otter-structs.c -) -target_include_directories(otter-core PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) -target_compile_definitions(otter-core PRIVATE DEBUG_LEVEL=${OTTER_DEBUG_CORE}) -set_property(TARGET otter-core PROPERTY POSITION_INDEPENDENT_CODE ON) +# Check for otf2.h +if(DEFINED OTF2_INSTALL_DIR) +set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES};${OTF2_INSTALL_DIR}/include) +message(STATUS "Searching for otf2.h in: ${CMAKE_REQUIRED_INCLUDES}") +else() +message(STATUS "Searching for otf2.h in default include locations") +endif() +CHECK_INCLUDE_FILE(otf2/otf2.h HAVE_OTF2_H) +if(NOT HAVE_OTF2_H) + message(FATAL_ERROR "Can't find otf2.h - did you specify OTF2_INSTALL_DIR?") +endif() + +# Check OTF2 version found +find_path(OTF2_INCLUDE_DIR otf2.h + PATHS ${OTF2_INSTALL_DIR}/include/otf2) +message(STATUS OTF2_INCLUDE_DIR=${OTF2_INCLUDE_DIR}) +file(READ "${OTF2_INCLUDE_DIR}/OTF2_GeneralDefinitions.h" otf2_defs_h) -add_library(otter-trace OBJECT - ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace-core.c - ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace-structs.c +string(REGEX MATCH "#define OTF2_VERSION_MAJOR[ \t]+[0-9]+" OTF2_VERSION_MAJOR_DEF "${otf2_defs_h}") +string(REGEX REPLACE "#define OTF2" "" OTF2_VERSION_MAJOR_R "${OTF2_VERSION_MAJOR_DEF}") +string(REGEX MATCH "[0-9]+" OTF2_VERSION_MAJOR "${OTF2_VERSION_MAJOR_R}") + +string(REGEX MATCH "#define OTF2_VERSION_MINOR[ \t]+[0-9]+" OTF2_VERSION_MINOR_DEF "${otf2_defs_h}") +string(REGEX REPLACE "#define OTF2" "" OTF2_VERSION_MINOR_R "${OTF2_VERSION_MINOR_DEF}") +string(REGEX MATCH "[0-9]+" OTF2_VERSION_MINOR "${OTF2_VERSION_MINOR_R}") + +string(REGEX MATCH "#define OTF2_VERSION_BUGFIX[ \t]+[0-9]+" OTF2_VERSION_BUGFIX_DEF "${otf2_defs_h}") +string(REGEX REPLACE "#define OTF2" "" OTF2_VERSION_BUGFIX_R "${OTF2_VERSION_BUGFIX_DEF}") +string(REGEX MATCH "[0-9]+" OTF2_VERSION_BUGFIX "${OTF2_VERSION_BUGFIX_R}") + +message(STATUS "Using OTF2 v${OTF2_VERSION_MAJOR}.${OTF2_VERSION_MINOR}.${OTF2_VERSION_BUGFIX}") + +if(NOT "${OTF2_VERSION_MAJOR}.${OTF2_VERSION_MINOR}" STREQUAL "2.3") + message(FATAL_ERROR "Otter requires OTF2 v2.3") +endif() + +# OMPT event source +add_library(otter-events-ompt OBJECT + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/events/ompt/otter-core.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/events/ompt/otter-entry.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/types/otter-structs.c ) -target_include_directories(otter-trace PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) -target_compile_definitions(otter-trace PRIVATE DEBUG_LEVEL=${OTTER_DEBUG_TRACE}) -set_property(TARGET otter-trace PROPERTY POSITION_INDEPENDENT_CODE ON) +target_include_directories(otter-events-ompt PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_compile_options(otter-events-ompt BEFORE PRIVATE ${CMAKE_C_FLAGS}) +target_compile_definitions(otter-events-ompt PRIVATE DEBUG_LEVEL=${OTTER_DEBUG_CORE}) +set_property(TARGET otter-events-ompt PROPERTY POSITION_INDEPENDENT_CODE ON) + +# Serial event source +add_library(otter-events-serial OBJECT + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/events/serial/otter-serial.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/events/serial/otter-serial.F90 + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/types/otter-structs.c +) +target_include_directories(otter-events-serial PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_compile_options(otter-events-serial BEFORE PRIVATE ${CMAKE_C_FLAGS}) +target_compile_definitions(otter-events-serial PRIVATE DEBUG_LEVEL=${OTTER_DEBUG_CORE}) +set_property(TARGET otter-events-serial PROPERTY POSITION_INDEPENDENT_CODE ON) + +# Trace component for OMP mode +add_library(otter-trace-ompt OBJECT + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-core.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-location.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-region-parallel.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-region-workshare.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-region-master.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-region-sync.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-region-task.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-archive.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-unique-refs.c +) +target_include_directories(otter-trace-ompt PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_compile_definitions(otter-trace-ompt PRIVATE DEBUG_LEVEL=${OTTER_DEBUG_TRACE}) +set_property(TARGET otter-trace-ompt PROPERTY POSITION_INDEPENDENT_CODE ON) + +# Trace component for serial mode +add_library(otter-trace-serial OBJECT + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-core.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-location.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-region-parallel.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-region-workshare.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-region-master.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-region-sync.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-region-task.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-archive.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/trace/trace-unique-refs.c +) +target_include_directories(otter-trace-serial PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_compile_definitions(otter-trace-serial PRIVATE DEBUG_LEVEL=${OTTER_DEBUG_TRACE}) +set_property(TARGET otter-trace-serial PROPERTY POSITION_INDEPENDENT_CODE ON) +# Add define to signify serial tracing mode +target_compile_definitions(otter-trace-serial PRIVATE OTTER_SERIAL_MODE) +# Datatypes component add_library(otter-dtype OBJECT - ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/dt-queue.c - ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/dt-stack.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/types/dt-queue.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/types/dt-stack.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/otter/types/char_ref_registry.cpp ) target_include_directories(otter-dtype PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) target_compile_definitions(otter-dtype PRIVATE DEBUG_LEVEL=${OTTER_DEBUG_DTYPE}) set_property(TARGET otter-dtype PROPERTY POSITION_INDEPENDENT_CODE ON) -# Link component objects into shared library -add_library(otter - $ - $ +# Otter library compiled for OMP mode +if(HAVE_OMP_TOOLS_H) +add_library(otter-ompt + $ + $ $ ) +install(TARGETS otter-ompt DESTINATION lib) +target_link_libraries(otter-ompt pthread) +target_link_libraries(otter-ompt otf2) +if(DEFINED OTF2_INSTALL_DIR) + target_link_libraries(otter-ompt -L${OTF2_INSTALL_DIR}/lib) +endif() +else() +message(WARNING "otter-ompt target was not generated as omp-tools.h is not available") +endif() -# Specify Otter install location -install(TARGETS otter DESTINATION lib) - -# Configure version header -configure_file( - ${CMAKE_CURRENT_SOURCE_DIR}/include/otter/otter-version.h.in - ${CMAKE_CURRENT_SOURCE_DIR}/include/otter/otter-version.h +# Otter library compiled for serial mode +add_library(otter-serial + $ + $ + $ ) +set_target_properties(otter-serial PROPERTIES PUBLIC_HEADER "include/otter/otter-serial.h") -# Link to dependencies -target_link_libraries(otter pthread) - -# # # OMPT # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # -# Check for omp-tools.h -CHECK_INCLUDE_FILE(omp-tools.h HAVE_OMP_TOOLS_H) -if(NOT HAVE_OMP_TOOLS_H) - message(FATAL_ERROR "Can't find omp-tools.h - did you specify an OMPT-supporting compiler?") +# # # OTF2 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # +# Add OTF2 install directory if it was specified +if(DEFINED OTF2_INSTALL_DIR) + target_include_directories(otter-events-ompt PRIVATE ${OTF2_INSTALL_DIR}/include) + target_include_directories(otter-events-serial PRIVATE ${OTF2_INSTALL_DIR}/include) + target_include_directories(otter-trace-ompt PRIVATE ${OTF2_INSTALL_DIR}/include) + target_include_directories(otter-trace-serial PRIVATE ${OTF2_INSTALL_DIR}/include) + target_include_directories(otter-dtype PRIVATE ${OTF2_INSTALL_DIR}/include) endif() # Flag that we should use OMPT 5.1 features if(USE_OMPT_51) message(STATUS "Using OMPT 5.1") - target_compile_definitions(otter-core PRIVATE USE_OMPT_MASKED) - target_compile_definitions(otter-trace PRIVATE USE_OMPT_MASKED) + target_compile_definitions(otter-events-ompt PRIVATE USE_OMPT_MASKED) + target_compile_definitions(otter-trace-ompt PRIVATE USE_OMPT_MASKED) else() message(STATUS "Not using OMPT 5.1") endif() -# # # OTF2 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # -# Add OTF2 include directory if it was specified -set(HAVE_OTF2_INCLUDE_DIR FALSE) -if(DEFINED OTF2_INCLUDE_DIR) - target_include_directories(otter-core PRIVATE ${OTF2_INCLUDE_DIR}) - target_include_directories(otter-trace PRIVATE ${OTF2_INCLUDE_DIR}) - set(HAVE_OTF2_INCLUDE_DIR TRUE) - message(STATUS "OTF2 include dir was specified") -endif() - -target_link_libraries(otter otf2) -# Add library directory for OTF2 if it was specified -if(DEFINED OTF2_LIB_DIR) - target_link_libraries(otter -L${OTF2_LIB_DIR}) -endif() - -# Check for otf2.h -set(CMAKE_REQUIRED_INCLUDES $<$:${OTF2_INCLUDE_DIR}>) -CHECK_INCLUDE_FILE(otf2/otf2.h HAVE_OTF2_H) -if(NOT HAVE_OTF2_H) - message(FATAL_ERROR "Can't find otf2.h - did you specify OTF2_INCLUDE_DIR?") -endif() - -# Check OTF2 version found -file(READ "${OTF2_INCLUDE_DIR}/otf2/OTF2_GeneralDefinitions.h" otf2_defs_h) - -string(REGEX MATCH "#define OTF2_VERSION_MAJOR[ \t]+[0-9]+" OTF2_VERSION_MAJOR_DEF "${otf2_defs_h}") -string(REGEX REPLACE "#define OTF2" "" OTF2_VERSION_MAJOR_R "${OTF2_VERSION_MAJOR_DEF}") -string(REGEX MATCH "[0-9]+" OTF2_VERSION_MAJOR "${OTF2_VERSION_MAJOR_R}") - -string(REGEX MATCH "#define OTF2_VERSION_MINOR[ \t]+[0-9]+" OTF2_VERSION_MINOR_DEF "${otf2_defs_h}") -string(REGEX REPLACE "#define OTF2" "" OTF2_VERSION_MINOR_R "${OTF2_VERSION_MINOR_DEF}") -string(REGEX MATCH "[0-9]+" OTF2_VERSION_MINOR "${OTF2_VERSION_MINOR_R}") - -string(REGEX MATCH "#define OTF2_VERSION_BUGFIX[ \t]+[0-9]+" OTF2_VERSION_BUGFIX_DEF "${otf2_defs_h}") -string(REGEX REPLACE "#define OTF2" "" OTF2_VERSION_BUGFIX_R "${OTF2_VERSION_BUGFIX_DEF}") -string(REGEX MATCH "[0-9]+" OTF2_VERSION_BUGFIX "${OTF2_VERSION_BUGFIX_R}") +# Specify Otter install location +install(TARGETS otter-serial LIBRARY DESTINATION lib PUBLIC_HEADER DESTINATION include/otter) -message(STATUS "Using OTF2 v${OTF2_VERSION_MAJOR}.${OTF2_VERSION_MINOR}.${OTF2_VERSION_BUGFIX}") +# Configure version header +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/include/otter/otter-version.h.in + ${CMAKE_CURRENT_SOURCE_DIR}/include/otter/otter-version.h +) -if(NOT "${OTF2_VERSION_MAJOR}.${OTF2_VERSION_MINOR}" STREQUAL "2.3") - message(FATAL_ERROR "Otter requires OTF2 v2.3") +# Link to dependencies +target_link_libraries(otter-serial pthread) +target_link_libraries(otter-serial otf2) +# Add library directory for OTF2 if it was specified +if(DEFINED OTF2_INSTALL_DIR) + target_link_libraries(otter-serial -L${OTF2_INSTALL_DIR}/lib) endif() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Build examples for demonstrating Otter # # # # # # # # # # # # # # # +message(STATUS "Detecting examples") if (WITH_EXAMPLES) + message(STATUS "Adding examples") add_subdirectory(src/otter/examples) endif() diff --git a/Makefile b/Makefile deleted file mode 100644 index b6ae95d..0000000 --- a/Makefile +++ /dev/null @@ -1,80 +0,0 @@ -$(info CC=$(shell which $(CC))) -$(info CXX=$(shell which $(CXX))) - -# Global options -#C = clang <- pass in as environment variable instead -CC_VERSION = $(shell $(CC) --version | head -n 1) -INCLUDE = -Iinclude -I/opt/otf2/include -I/ddn/data/$(USER)/local/include -NOWARN = -Wno-unused-function -Wno-unused-variable -CFLAGS = -Wall -Werror $(NOWARN) $(INCLUDE) -DCC_VERSION="$(CC_VERSION)" -LDFLAGS = -L/opt/otf2/lib -L/ddn/data/$(USER)/local/lib -DEBUG = -g -NDEBUG = -DNDEBUG - -# MAIN OUTPUT -OTTER = lib/libotter.so - -# header files -COMMON_H = $(wildcard include/*.h) -OTTERHEAD = $(wildcard include/otter-core/*.h) $(COMMON_H) -TRACEHEAD = $(wildcard include/otter-trace/*.h) $(COMMON_H) -DTYPEHEAD = $(wildcard include/otter-datatypes/*.h) $(COMMON_H) - -# source files -OTTERSRC = $(wildcard src/otter-core/*.c) -TRACESRC = $(wildcard src/otter-trace/*.c) -DTYPESRC = $(wildcard src/otter-datatypes/*.c) -OMPSRC = $(wildcard src/otter-demo/*c) -OMPSRC_CPP = $(wildcard src/otter-demo/*.cpp) - -# executables -OMPEXE = $(patsubst src/otter-demo/omp-%.c, omp-%, $(OMPSRC)) -OMPEXE_CPP = $(patsubst src/otter-demo/omp-%.cpp, omp-%, $(OMPSRC_CPP)) - -BINS = $(OTTER) $(OMPEXE) $(OMPEXE_CPP) - -.PHONY: clean cleanfiles run - -otter: $(OTTER) -all: $(BINS) -exe: $(OMPEXE) $(OMPEXE_CPP) - -# link Otter as a dynamic first-party tool to be loaded by the runtime -OTTEROBJ = $(patsubst src/otter-core/otter-%.c, obj/otter-%.o, $(OTTERSRC)) -TRACEOBJ = $(patsubst src/otter-trace/trace-%.c, obj/trace-%.o, $(TRACESRC)) -DTYPEOBJ = $(patsubst src/otter-datatypes/dt-%.c, obj/dt-%.o, $(DTYPESRC)) -$(OTTER): $(OTTEROBJ) $(TRACEOBJ) $(DTYPEOBJ) - @printf "==> linking %s\n" $@ - $(CC) $(LDFLAGS) -lpthread -lotf2 -shared $^ -o $@ - -# otter obj files -obj/otter-%.o: src/otter-core/otter-%.c - @printf "==> compiling %s\n" $@ - $(CC) $(CFLAGS) $(DEBUG) $(NDEBUG) -DDEBUG_LEVEL=$(DEBUG_OTTER) $^ -fPIC -c -o $@ - -# trace obj files -obj/trace-%.o: src/otter-trace/trace-%.c - @printf "==> compiling %s\n" $@ - $(CC) $(CFLAGS) $(DEBUG) $(NDEBUG) -DDEBUG_LEVEL=$(DEBUG_TRACE) $^ -fPIC -c -o $@ - -# dtype obj files -obj/dt-%.o: src/otter-datatypes/dt-%.c - @printf "==> compiling %s\n" $@ - $(CC) $(CFLAGS) $(DEBUG) $(NDEBUG) -DDEBUG_LEVEL=$(DEBUG_DATATYPES) $^ -fPIC -c -o $@ - -# standalone OMP apps -$(OMPEXE): $(OMPSRC) - @printf "==> compiling %s\n" $@ - $(CC) $(CFLAGS) $(DEBUG) -fopenmp src/otter-demo/$@.c -o $@ - @echo $@ links to `ldd $@ | grep "[lib|libi|libg]omp"` - -$(OMPEXE_CPP): $(OMPSRC_CPP) - @printf "==> compiling %s\n" $@ - $(CXX) $(CFLAGS) $(DEBUG) -fopenmp src/otter-demo/$@.cpp -o $@ - @echo $@ links to `ldd $@ | grep "[lib|libi|libg]omp"` - -clean: - -rm -f lib/* obj/* $(BINS) - -cleanlib: - -rm -f lib/* obj/* diff --git a/README.md b/README.md index a57c603..1f9fe6f 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,12 @@ -# Otter - An OMPT Tool for Tracing OpenMP Tasks +# Otter -Otter is a tool for visualising the structure of task-based OpenMP programs allowing developers and researchers to see the true structure of their OpenMP 5.0 programs from the perspective of the OpenMP runtime, without any modification of the target application. +Developed under the [ExCALIBUR task parallelism cross-cutting research theme](https://excalibur.ac.uk/projects/exposing-parallelism-task-parallelism/), Otter is a tool designed to facilitate data-driven parallelisation of serial code. Otter allows HPC developers to: -Otter uses the OpenMP Tools interface in [OpenMP 5.0](https://www.openmp.org/spec-html/5.0/openmpch4.html) to observe task creation and synchronisation events, extracting from this data the structure of a target application independent of the particular scheduling of tasks at runtime. +- Annotate, trace & visualise loop/task-based serial code as a directed graph; +- Recommend strategies for transforming serial code into effective task-based parallel code; +- Non-invasively trace & visualise loop/task-based OpenMP 5.x programs. -## Features - -- Trace the task creation and synchronisation constructs of an OpenMP 5.0 program without any modification of the source - no need to add any instrumentation to the target application. -- Supports synchronisation due to taskwait and taskgroup constructs. -- Supports nested tasks and nested parallelism. -- No additional thread synchronisation - won't accidentally serialise the target application. - - -## Example - -Take this example code which uses nested tasks synchronised by a taskwait barrier to calculate the nth Fibonacci number: +The [project wiki](https://github.com/Otter-Taskification/otter/wiki) introduces the Otter toolset and explains how to use the features above. Otter can trace and visualise the structure of serial and OpenMP code, for example transforming the following code: ```c int fibonacci(int n) { @@ -27,125 +19,55 @@ int fibonacci(int n) { #pragma omp taskwait return i+j; } + +int main(int argc, char *argv[]) { + int n = atoi(argv[1]); + #pragma omp parallel shared(n) + { + #pragma omp single + printf("f(%d) = %d\n", n, f(n)); + } +} ``` -We can speculate about the structure of this code in terms of tasks and their synchronisation, but how can we check that our ideas match reality? This is a challenge even for the simple code above, and soon becomes impossible for complex task-based code. We might try using performance analysis tools to trace or profile an application, providing a thread-centric view of a specific arrangement of tasks. While this gives us insight into the application's runtime performance, we would still struggle to get a clear picture of the application's overall structure. Using Otter we can observe the true structure of a task-based OpenMP application, all without modifying the application's source. Here is the result of applying Otter to a program using the Fibonacci function above to calculate `fibonacci(5)`: +Into a directed graph visualising the code's task creation and synchronisation structure:

The task-based structure of the Fibonacci function.

-The nodes of this graph represent the different OpenMP constructs that Otter can show: +The nodes of this graph represent the different tasking constructs that Otter can show:

The node styles representing the OpenMP constructs represented by Otter.

-## Getting Started - -### Prerequisites - -The following dependencies should be installed before building Otter: - -- A recent version of a compiler supporting OMPT, such as Clang or Intel's one-API compilers. -- [OTF2 v2.3](https://zenodo.org/record/4682684) -- [`python-igraph` v0.9.1](https://pypi.org/project/python-igraph/0.9.1/) - -### Building Otter - -To build the runtime tool and install post-processing python components: - -```bash -git clone https://github.com/adamtuft/otter.git && cd otter -git checkout dev -mkdir build && cd build -cmake -DCMAKE_C_COMPILER=clang \ - -DCMAKE_INSTALL_PREFIX= \ - -DOTF2_INCLUDE_DIR= \ - -DOTF2_LIB_DIR= \ - ../ -cmake --build . && cmake --install . -pip install ../src/python/ -``` - -The `OTF2_INCLUDE_DIR` and `OTF2_LIB_DIR` arguments should point to the include and library directories for OTF2. The default values assumed by Otter are `/opt/otf2/include` and `/opt/otf2/lib`. - -To build several example OpenMP programs with which you can test Otter, set `WITH_EXAMPLES=ON`. You will need to also specify C++ and Fortran compilers which use the LLVM or Intel OpenMP library. **Note:** it is not advised to compile the examples with `gcc`/`gfortran` and then link them to `libomp` or `libiomp5` as this does not give access to the full range of runtime events needed by Otter. Doing this is likely to cause a corrupted trace to be created. For more details, see [this post](https://github.com/adamtuft/otter/issues/14#issuecomment-914156774). - -You can build tests by including `WITH_TESTS=ON`. These can be found in the `test` folder and executed with `ctest`. +## Otter Toolset -### Using Otter +The Otter toolset includes: -Tracing a target application `omp-demo` is as simple as: +- [**Otter-Serial**](https://github.com/Otter-Taskification/otter/wiki/Otter-Serial): an API and runtime library for annotating & tracing the structure of a serial target application. + +- [**Otter-OMPT**](https://github.com/Otter-Taskification/otter/wiki/Otter-OMPT): an OMPT tool for non-invasive tracing of the loop/task-based structure of OpenMP 5.x programs. + +- [**PyOtter**](https://github.com/Otter-Taskification/otter/wiki/PyOtter): The visualisation & reporting tool for use with Otter trace data. -```bash -OMP_TOOL_LIBRARIES=/lib/libotter.so ./omp-demo -``` - -If everything is set up correctly, you should see output like this (depending on the specific OpenMP runtime you are using): - -``` -Intel(R) OMP version: 5.0.20210428, OMP v. 201611 -Otter was compiled with icc (ICC) 2021.3.0 20210609 -Starting OTTer... -Trace output path: trace/otter_trace.[pid] - -Registering callbacks: -ompt_callback_thread_begin | ompt_set_always (5) -ompt_callback_thread_end | ompt_set_always (5) -ompt_callback_parallel_begin | ompt_set_always (5) -ompt_callback_parallel_end | ompt_set_always (5) -ompt_callback_task_create | ompt_set_always (5) -ompt_callback_task_schedule | ompt_set_always (5) -ompt_callback_implicit_task | ompt_set_always (5) -ompt_callback_work | ompt_set_always (5) -ompt_callback_masked | ompt_set_always (5) -ompt_callback_sync_region | ompt_set_always (5) - -PROCESS RESOURCE USAGE: - maximum resident set size: 11916 kb -page reclaims (soft page faults): 5644 - page faults (hard page faults): 1 - block input operations: 0 - block output operations: 72 - - threads: 3 - parallel regions: 2 - tasks: 25 -OTTER_TRACE_FOLDER=/home/adam/otter/trace/otter_trace.[pid] -``` - -By default, Otter writes a trace to `trace/otter_trace.[pid]` - the location and name of the trace can be set with the `OTTER_TRACE_PATH` and `OTTER_TRACE_NAME` environment variables. - -The contents of the trace can be converted into a graph with: - -```bash -python3 -m otter trace/otter_trace.[pid]/otter_trace.[pid].otf2 -o graph.dot -``` - -The graph, saved to `graph.dot`, can then be visualised using the `dot` command line tool included with [Graphviz](https://graphviz.org/) or a graph visualisation tool such as [yEd-Desktop or yEd-Live](https://www.yworks.com/\#products). - -## Future Work - -The future direction of development may include, in no particular order: - -- [ ] Record and visualise actual work done per task. -- [ ] Automatic detection of the critical path. -- [ ] Support for MPI+OpenMP applications. -- [ ] Support for GPU-offloaded tasks. -- [ ] Stronger graph visualisation capabilities. - -## Contributing +## Getting Started -Contributions are welcome! If you would like to contribute, please fork the repository and use the `contributions` branch. There is no specific style guide, although I would be grateful if you could code in a style consistent with that of the main project. +- [Installation guide](https://github.com/Otter-Taskification/otter/wiki#installation-guide) +- [Using Otter-Serial](https://github.com/Otter-Taskification/otter/wiki/Otter-Serial/#using-otter-serial) +- [Using Otter-OMPT](https://github.com/Otter-Taskification/otter/wiki/Otter-OMPT#getting-started) +- [Using PyOtter](https://github.com/Otter-Taskification/otter/wiki/PyOtter) ## Issues, Questions and Feature Requests -Please post any of the above here: https://github.com/adamtuft/otter/issues +For **Otter-Serial** or **Otter-OMPT**, please post [here](https://github.com/Otter-Taskification/otter/issues). + +For **PyOtter**, please post [here](https://github.com/Otter-Taskification/pyotter/issues). ## Licensing -Otter is released under the BSD 3-clause license. See [LICENSE](LICENSE) for details. +Otter is released under the BSD 3-clause license. See [LICENCE](LICENCE) for details. Copyright (c) 2021, Adam Tuft All rights reserved. diff --git a/include/otter/char_ref_registry.hpp b/include/otter/char_ref_registry.hpp new file mode 100644 index 0000000..c1e2951 --- /dev/null +++ b/include/otter/char_ref_registry.hpp @@ -0,0 +1,50 @@ +#ifdef __cplusplus +#include +#include +#include +#include +#else +#include +#endif + +#ifdef __cplusplus + + // Opaque class which maps const char* to uint32_t + class char_ref_registry; + + // A callback used to get a new uint32_t label when inserting a new key + using char_ref_registry_label_cbk = uint32_t (*)(void); + + // A callback to be applied to each key-value pair in the registry when it is + // deleted + using char_ref_registry_delete_cbk = void (*)(const char*, uint32_t); + +#else + + typedef struct char_ref_registry char_ref_registry; + + // A callback used to get a new value when inserting a new key + typedef uint32_t (*char_ref_registry_label_cbk)(void); + + // A callback to be applied to each key-value pair in the registry when it is + // deleted + typedef void (*char_ref_registry_delete_cbk)(const char*, uint32_t); + +#endif // #ifdef __cplusplus + +#ifdef __cplusplus +extern "C" { +#endif + +// Make a new char_ref_registry, injecting behaviour with callbacks +char_ref_registry *char_ref_registry_make(char_ref_registry_label_cbk, char_ref_registry_delete_cbk); + +// Delete a char_ref_registry +void char_ref_registry_delete(char_ref_registry *); + +// Insert a key, returning the new value, or the existing value if already in the registry +uint32_t char_ref_registry_insert(char_ref_registry *, const char *); + +#ifdef __cplusplus +} +#endif diff --git a/include/otter/otter-common.h b/include/otter/otter-common.h index 175acb4..6c8f0b1 100644 --- a/include/otter/otter-common.h +++ b/include/otter/otter-common.h @@ -14,6 +14,15 @@ #define OMPT_TASK_TYPE_BITS 0x0F typedef uint64_t unique_id_t; +typedef uint32_t otter_string_ref_t; + +#define OTTER_STRING_REF_UNDEFINED (0); + +typedef struct otter_src_location_t { + const char *file; + const char *func; + int line; +} otter_src_location_t; typedef struct otter_opt_t { char *hostname; diff --git a/include/otter/otter-serial.F90 b/include/otter/otter-serial.F90 new file mode 100644 index 0000000..61ec731 --- /dev/null +++ b/include/otter/otter-serial.F90 @@ -0,0 +1,8 @@ +#define fortran_otterTraceInitialise() fortran_otterTraceInitialise_i(__FILE__, "FORTRAN_FUNCTION", __LINE__) +#define fortran_otterParallelBegin() fortran_otterParallelBegin_i(__FILE__, "FORTRAN_FUNCTION", __LINE__) +#define fortran_otterTaskBegin() fortran_otterTaskBegin_i(__FILE__, "FORTRAN_FUNCTION", __LINE__) +#define fortran_otterTaskSingleBegin() fortran_otterTaskSingleBegin_i(__FILE__, "FORTRAN_FUNCTION", __LINE__) +#define fortran_otterLoopBegin() fortran_otterLoopBegin_i(__FILE__, "FORTRAN_FUNCTION", __LINE__) +#define fortran_otterLoopIterationBegin() fortran_otterLoopIterationBegin_i(__FILE__, "FORTRAN_FUNCTION", __LINE__) +#define fortran_otterSynchroniseChildTasks() fortran_otterSynchroniseChildTasks_i(__FILE__, "FORTRAN_FUNCTION", __LINE__) +#define fortran_otterSynchroniseDescendantTasksBegin() fortran_otterSynchroniseDescendantTasksBegin_i(__FILE__, "FORTRAN_FUNCTION", __LINE__) diff --git a/include/otter/otter-serial.h b/include/otter/otter-serial.h new file mode 100644 index 0000000..4bcd1ce --- /dev/null +++ b/include/otter/otter-serial.h @@ -0,0 +1,53 @@ +#if !defined(OTTER_SERIAL_H) +#define OTTER_SERIAL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define otterTraceInitialise() \ + otterTraceInitialise_i(__FILE__, __func__, __LINE__) +#define otterParallelBegin() \ + otterParallelBegin_i(__FILE__, __func__, __LINE__) +#define otterTaskBegin() \ + otterTaskBegin_i(__FILE__, __func__, __LINE__) +#define otterTaskSingleBegin() \ + otterTaskSingleBegin_i(__FILE__, __func__, __LINE__) +#define otterLoopBegin() \ + otterLoopBegin_i(__FILE__, __func__, __LINE__) +#define otterLoopIterationBegin() \ + otterLoopIterationBegin_i(__FILE__, __func__, __LINE__) +#define otterSynchroniseChildTasks() \ + otterSynchroniseChildTasks_i(__FILE__, __func__, __LINE__) +#define otterSynchroniseDescendantTasksBegin() \ + otterSynchroniseDescendantTasksBegin_i(__FILE__, __func__, __LINE__) + +/* +The public API for the Otter serial programme tracing library +*/ + +// API entrypoints +void otterTraceInitialise_i(const char*, const char*, const int); +void otterTraceFinalise(void); +void otterParallelBegin_i(const char*, const char*, const int); +void otterParallelEnd(void); +void otterTaskBegin_i(const char*, const char*, const int); +void otterTaskEnd(void); +void otterTaskSingleBegin_i(const char*, const char*, const int); +void otterTaskSingleEnd(void); +void otterLoopBegin_i(const char*, const char*, const int); +void otterLoopEnd(void); +void otterLoopIterationBegin_i(const char*, const char*, const int); +void otterLoopIterationEnd(void); +void otterSynchroniseChildTasks_i(const char*, const char*, const int); +void otterSynchroniseDescendantTasksBegin_i(const char*, const char*, const int); +void otterSynchroniseDescendantTasksEnd(void); + +void otterTraceStart(void); +void otterTraceStop(void); + +#ifdef __cplusplus +} +#endif + +#endif // OTTER_SERIAL_H diff --git a/include/otter/otter-structs.h b/include/otter/otter-structs.h index 7b3b650..4bd4f29 100644 --- a/include/otter/otter-structs.h +++ b/include/otter/otter-structs.h @@ -3,15 +3,30 @@ #include #include -#include "otter/otter-ompt-header.h" -#include "otter/otter.h" +// #include "otter/otter-ompt-header.h" +// #include "otter/otter.h" #include "otter/trace.h" -/* forward declarations */ -typedef struct parallel_data_t parallel_data_t; -typedef struct thread_data_t thread_data_t; -typedef struct task_data_t task_data_t; -typedef struct scope_t scope_t; +typedef struct thread_data_t { + unique_id_t id; + trace_location_def_t *location; + otter_thread_t type; + bool is_master_thread; // of parallel region +} thread_data_t; + +typedef struct task_data_t { + unique_id_t id; + otter_task_flag_t type; + otter_task_flag_t flags; + trace_region_def_t *region; +} task_data_t; + +typedef struct parallel_data_t { + unique_id_t id; + unique_id_t master_thread; + task_data_t *encountering_task_data; + trace_region_def_t *region; +} parallel_data_t; /* Parallel */ parallel_data_t *new_parallel_data( @@ -21,31 +36,26 @@ parallel_data_t *new_parallel_data( unsigned int requested_parallelism, int flags); void parallel_destroy(parallel_data_t *thread_data); -struct parallel_data_t { - unique_id_t id; - unique_id_t master_thread; - task_data_t *encountering_task_data; - trace_region_def_t *region; -}; /* Thread */ -thread_data_t *new_thread_data(ompt_thread_t type); +thread_data_t *new_thread_data(otter_thread_t type); void thread_destroy(thread_data_t *thread_data); -struct thread_data_t { - unique_id_t id; - trace_location_def_t *location; - ompt_thread_t type; - bool is_master_thread; // of parallel region -}; /* Task */ -task_data_t *new_task_data(trace_location_def_t *loc,trace_region_def_t *parent_task_region, unique_id_t task_id, ompt_task_flag_t flags, int has_dependences); +task_data_t *new_task_data( + trace_location_def_t *loc, + trace_region_def_t *parent_task_region, + unique_id_t task_id, + otter_task_flag_t flags, + int has_dependences, + otter_src_location_t *src_location +); void task_destroy(task_data_t *task_data); -struct task_data_t { - unique_id_t id; - ompt_task_flag_t type; - ompt_task_flag_t flags; - trace_region_def_t *region; -}; + +/* Get new unique ID */ +unique_id_t get_unique_parallel_id(void); +unique_id_t get_unique_thread_id(void); +unique_id_t get_unique_task_id(void); +unique_id_t get_dummy_time(void); #endif // OTTER_STRUCTS_H diff --git a/include/otter/otter-version.h.in b/include/otter/otter-version.h.in index a1d0f4e..6d5f2b3 100644 --- a/include/otter/otter-version.h.in +++ b/include/otter/otter-version.h.in @@ -4,6 +4,7 @@ #define OTTER_VERSION_MAJOR "@PROJECT_VERSION_MAJOR@" #define OTTER_VERSION_MINOR "@PROJECT_VERSION_MINOR@" #define OTTER_VERSION_PATCH "@PROJECT_VERSION_PATCH@" +#define OTTER_VERSION_STRING "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@" #define CC_VERSION "@CC_VERSION@" diff --git a/include/otter/otter.h b/include/otter/otter.h index 91612cd..5809b0c 100644 --- a/include/otter/otter.h +++ b/include/otter/otter.h @@ -40,19 +40,19 @@ #define implements_callback_master #include "otter/ompt-callback-prototypes.h" -/* Used as an array index to keep track of unique ids for different entities */ -typedef enum unique_id_type_t { - id_timestamp , - id_parallel , - id_thread , - id_task , - NUM_ID_TYPES -} unique_id_type_t; -#define get_unique_parallel_id() get_unique_id(id_parallel) -#define get_unique_thread_id() get_unique_id(id_thread) -#define get_unique_task_id() get_unique_id(id_task) -#define get_dummy_time() get_unique_id(id_timestamp) - -unique_id_t get_unique_id(unique_id_type_t id_type); +// /* Used as an array index to keep track of unique ids for different entities */ +// typedef enum unique_id_type_t { +// id_timestamp , +// id_parallel , +// id_thread , +// id_task , +// NUM_ID_TYPES +// } unique_id_type_t; +// #define get_unique_parallel_id() get_unique_id(id_parallel) +// #define get_unique_thread_id() get_unique_id(id_thread) +// #define get_unique_task_id() get_unique_id(id_task) +// #define get_dummy_time() get_unique_id(id_timestamp) + +// unique_id_t get_unique_id(unique_id_type_t id_type); #endif // OTTER_H diff --git a/include/otter/queue.h b/include/otter/queue.h index cda72c2..8f9ec2d 100644 --- a/include/otter/queue.h +++ b/include/otter/queue.h @@ -28,7 +28,7 @@ bool queue_append(otter_queue_t *q, otter_queue_t *r); /* scan through the items in a queue without modifying the queue write the current queue item to dest */ -void queue_scan(otter_queue_t *q, data_item_t *dest, void **next); +// void queue_scan(otter_queue_t *q, data_item_t *dest, void **next); #if DEBUG_LEVEL >= 4 void queue_print(otter_queue_t *q); diff --git a/include/otter/stack.h b/include/otter/stack.h index 3fff198..f87bc23 100644 --- a/include/otter/stack.h +++ b/include/otter/stack.h @@ -23,6 +23,9 @@ size_t stack_size(otter_stack_t *s); bool stack_is_empty(otter_stack_t *s); void stack_destroy(otter_stack_t *s, bool items, data_destructor_t destructor); +/* transfer the items from src to dest, maintaining the order of items in src */ +bool stack_transfer(otter_stack_t *dest, otter_stack_t *src); + #if DEBUG_LEVEL >= 4 void stack_print(otter_stack_t *s); #endif diff --git a/include/otter/trace-archive.h b/include/otter/trace-archive.h new file mode 100644 index 0000000..986d66e --- /dev/null +++ b/include/otter/trace-archive.h @@ -0,0 +1,28 @@ +/** + * @file trace-archive.c + * @author Adam Tuft + * @brief Responsible for initialising and finalising single instances of the + * trace archive and its global definitions writer. Returns pointers to these + * resources as well as mutexes protecting access to them both. + */ + +#if !defined(OTTER_TRACE_ARCHIVE_H) +#define OTTER_TRACE_ARCHIVE_H + +#include +#include +#include "otter/otter-common.h" +#include "otter/char_ref_registry.hpp" + +pthread_mutex_t *global_def_writer_lock(void); +pthread_mutex_t *global_archive_lock(void); + +OTF2_GlobalDefWriter *get_global_def_writer(void); +OTF2_Archive *get_global_archive(void); +char_ref_registry *get_global_str_registry(void); + +/* interface function prototypes */ +bool trace_initialise_archive(otter_opt_t *opt); +bool trace_finalise_archive(void); + +#endif // OTTER_TRACE_ARCHIVE_H diff --git a/include/otter/trace-attribute-defs.h b/include/otter/trace-attribute-defs.h index a493b4f..8afb32a 100644 --- a/include/otter/trace-attribute-defs.h +++ b/include/otter/trace-attribute-defs.h @@ -37,6 +37,8 @@ INCLUDE_LABEL(flag, false ) /* Unique ID attributes */ INCLUDE_ATTRIBUTE(OTF2_TYPE_UINT64, unique_id, "unique ID of a task, parallel region or thread") +INCLUDE_ATTRIBUTE(OTF2_TYPE_UINT64, prior_task_id, "unique ID of a task suspended at a task-scheduling point") +INCLUDE_ATTRIBUTE(OTF2_TYPE_UINT64, next_task_id, "unique ID of a task resumed at a task-scheduling point") INCLUDE_ATTRIBUTE(OTF2_TYPE_UINT64, encountering_task_id, "unique ID of the task that encountered this region") /* Attributes relating to parallel regions */ @@ -66,7 +68,7 @@ INCLUDE_LABEL(event_type, workshare_end ) INCLUDE_LABEL(event_type, sync_begin ) INCLUDE_LABEL(event_type, sync_end ) INCLUDE_LABEL(event_type, task_create ) -INCLUDE_LABEL(event_type, task_schedule ) +INCLUDE_LABEL(event_type, task_switch ) INCLUDE_LABEL(event_type, task_enter ) INCLUDE_LABEL(event_type, task_leave ) INCLUDE_LABEL(event_type, master_begin ) @@ -102,6 +104,7 @@ INCLUDE_LABEL(thread_type, initial) INCLUDE_LABEL(thread_type, worker ) /* region type - parallel, workshare, sync, task */ +INCLUDE_ATTRIBUTE(OTF2_TYPE_STRING, next_task_region_type, "region type of a task resumed at a task-scheduling point") INCLUDE_ATTRIBUTE(OTF2_TYPE_STRING, region_type, "region type") /* generic region types */ INCLUDE_LABEL(region_type, parallel) @@ -139,5 +142,10 @@ INCLUDE_LABEL(prior_task_status, early_fulfil ) INCLUDE_LABEL(prior_task_status, late_fulfil ) INCLUDE_LABEL(prior_task_status, switch ) +/* task source location */ +INCLUDE_ATTRIBUTE(OTF2_TYPE_UINT32, source_line_number, "the line number of the construct which caused this region to be created") +INCLUDE_ATTRIBUTE(OTF2_TYPE_STRING, source_file_name, "the source file containing the construct which caused this region to be created") +INCLUDE_ATTRIBUTE(OTF2_TYPE_STRING, source_func_name, "the name of the function containing the construct which caused this region to be created") + #undef INCLUDE_LABEL #undef INCLUDE_ATTRIBUTE diff --git a/include/otter/trace-check-error-code.h b/include/otter/trace-check-error-code.h new file mode 100644 index 0000000..e5008ce --- /dev/null +++ b/include/otter/trace-check-error-code.h @@ -0,0 +1,15 @@ +#if !defined(OTTER_TRACE_OTF2_ERROR_CODE_H) +#define OTTER_TRACE_OTF2_ERROR_CODE_H + +#define CHECK_OTF2_ERROR_CODE(r) \ + {if (r != OTF2_SUCCESS) \ + { \ + LOG_ERROR("%s: %s (%s:%d)", \ + OTF2_Error_GetName(r), \ + OTF2_Error_GetDescription(r), \ + __FILE__, \ + __LINE__ \ + ); \ + }} + +#endif // OTTER_TRACE_OTF2_ERROR_CODE_H diff --git a/include/otter/trace-location.h b/include/otter/trace-location.h new file mode 100644 index 0000000..12121a9 --- /dev/null +++ b/include/otter/trace-location.h @@ -0,0 +1,46 @@ +/** + * @file trace-location.c + * @author Adam Tuft + * @brief Defines trace_location_def_t which represents an OTF2 location, used + * to record the location's definition in the trace. Responsible for new/delete, + * adding a thread's attributes to its OTF2 attribute list when recording an + * event, and writing a location's definition to the trace. + */ + +#if !defined(OTTER_TRACE_LOCATION_H) +#define OTTER_TRACE_LOCATION_H + +#include +#include "otter/otter-common.h" +#include "otter/queue.h" +#include "otter/stack.h" +#include "otter/trace-types.h" + +/* Store values needed to register location definition (threads) with OTF2 */ +typedef struct { + unique_id_t id; + otter_thread_t thread_type; + uint64_t events; + otter_stack_t *rgn_stack; + otter_queue_t *rgn_defs; + otter_stack_t *rgn_defs_stack; + OTF2_LocationRef ref; + OTF2_LocationType type; + OTF2_LocationGroupRef location_group; + OTF2_AttributeList *attributes; + OTF2_EvtWriter *evt_writer; + OTF2_DefWriter *def_writer; +} trace_location_def_t; + +/* Create new location */ +trace_location_def_t *trace_new_location_definition( + uint64_t id, + otter_thread_t thread_type, + OTF2_LocationType loc_type, + OTF2_LocationGroupRef loc_grp +); +void trace_destroy_location(trace_location_def_t *loc); +void trace_add_thread_attributes(trace_location_def_t *self); +void trace_write_location_definition(trace_location_def_t *loc); + +#endif // OTTER_TRACE_LOCATION_H diff --git a/include/otter/trace-lookup-macros.h b/include/otter/trace-lookup-macros.h index b811e82..f249631 100644 --- a/include/otter/trace-lookup-macros.h +++ b/include/otter/trace-lookup-macros.h @@ -2,124 +2,124 @@ #define OTTER_TRACE_LOOKUP_MACROS_H #define WORK_TYPE_TO_STR_REF(type) \ - (type == ompt_work_loop \ + (type == otter_work_loop \ ? attr_label_ref[attr_region_type_loop] : \ - type == ompt_work_sections \ + type == otter_work_sections \ ? attr_label_ref[attr_region_type_sections] : \ - type == ompt_work_single_executor \ + type == otter_work_single_executor \ ? attr_label_ref[attr_region_type_single_executor] : \ - type == ompt_work_single_other \ + type == otter_work_single_other \ ? attr_label_ref[attr_region_type_single_other] : \ - type == ompt_work_workshare \ + type == otter_work_workshare \ ? attr_label_ref[attr_region_type_workshare] : \ - type == ompt_work_distribute \ + type == otter_work_distribute \ ? attr_label_ref[attr_region_type_distribute] : \ - type == ompt_work_taskloop \ + type == otter_work_taskloop \ ? attr_label_ref[attr_region_type_taskloop] : 0) #define WORK_TYPE_TO_OTF2_REGION_ROLE(type) \ - (type == ompt_work_loop ? OTF2_REGION_ROLE_LOOP : \ - type == ompt_work_sections ? OTF2_REGION_ROLE_SECTIONS : \ - type == ompt_work_single_executor ? OTF2_REGION_ROLE_SINGLE : \ - type == ompt_work_single_other ? OTF2_REGION_ROLE_SINGLE : \ - type == ompt_work_workshare ? OTF2_REGION_ROLE_WORKSHARE : \ - type == ompt_work_distribute ? OTF2_REGION_ROLE_UNKNOWN : \ - type == ompt_work_taskloop ? OTF2_REGION_ROLE_LOOP : \ + (type == otter_work_loop ? OTF2_REGION_ROLE_LOOP : \ + type == otter_work_sections ? OTF2_REGION_ROLE_SECTIONS : \ + type == otter_work_single_executor ? OTF2_REGION_ROLE_SINGLE : \ + type == otter_work_single_other ? OTF2_REGION_ROLE_SINGLE : \ + type == otter_work_workshare ? OTF2_REGION_ROLE_WORKSHARE : \ + type == otter_work_distribute ? OTF2_REGION_ROLE_UNKNOWN : \ + type == otter_work_taskloop ? OTF2_REGION_ROLE_LOOP : \ OTF2_REGION_ROLE_UNKNOWN) /* Intel compiler defines additional types of sync regions compared to LLVM */ #if defined(__INTEL_COMPILER) #define SYNC_TYPE_TO_STR_REF(type) \ - (type == ompt_sync_region_barrier \ + (type == otter_sync_region_barrier \ ? attr_label_ref[attr_region_type_barrier] : \ - type == ompt_sync_region_barrier_implicit \ + type == otter_sync_region_barrier_implicit \ ? attr_label_ref[attr_region_type_barrier_implicit] : \ - type == ompt_sync_region_barrier_explicit \ + type == otter_sync_region_barrier_explicit \ ? attr_label_ref[attr_region_type_barrier_explicit] : \ - type == ompt_sync_region_barrier_implementation \ + type == otter_sync_region_barrier_implementation \ ? attr_label_ref[attr_region_type_barrier_implementation] : \ - type == ompt_sync_region_taskwait \ + type == otter_sync_region_taskwait \ ? attr_label_ref[attr_region_type_taskwait] : \ - type == ompt_sync_region_taskgroup \ + type == otter_sync_region_taskgroup \ ? attr_label_ref[attr_region_type_taskgroup] : \ - type == ompt_sync_region_barrier_implicit_workshare \ + type == otter_sync_region_barrier_implicit_workshare \ ? attr_label_ref[attr_region_type_barrier_implicit] : \ - type == ompt_sync_region_barrier_implicit_parallel \ + type == otter_sync_region_barrier_implicit_parallel \ ? attr_label_ref[attr_region_type_barrier_implicit] : \ - type == ompt_sync_region_barrier_teams \ + type == otter_sync_region_barrier_teams \ ? attr_label_ref[attr_region_type_barrier_implicit] : 0 ) #else #define SYNC_TYPE_TO_STR_REF(type) \ - (type == ompt_sync_region_barrier \ + (type == otter_sync_region_barrier \ ? attr_label_ref[attr_region_type_barrier] : \ - type == ompt_sync_region_barrier_implicit \ + type == otter_sync_region_barrier_implicit \ ? attr_label_ref[attr_region_type_barrier_implicit] : \ - type == ompt_sync_region_barrier_explicit \ + type == otter_sync_region_barrier_explicit \ ? attr_label_ref[attr_region_type_barrier_explicit] : \ - type == ompt_sync_region_barrier_implementation \ + type == otter_sync_region_barrier_implementation \ ? attr_label_ref[attr_region_type_barrier_implementation] : \ - type == ompt_sync_region_taskwait \ + type == otter_sync_region_taskwait \ ? attr_label_ref[attr_region_type_taskwait] : \ - type == ompt_sync_region_taskgroup \ + type == otter_sync_region_taskgroup \ ? attr_label_ref[attr_region_type_taskgroup] : 0) #endif #if defined(__INTEL_COMPILER) #define SYNC_TYPE_TO_OTF2_REGION_ROLE(type) \ - (type == ompt_sync_region_barrier \ + (type == otter_sync_region_barrier \ ? OTF2_REGION_ROLE_BARRIER : \ - type == ompt_sync_region_barrier_implicit \ + type == otter_sync_region_barrier_implicit \ ? OTF2_REGION_ROLE_IMPLICIT_BARRIER : \ - type == ompt_sync_region_barrier_explicit \ + type == otter_sync_region_barrier_explicit \ ? OTF2_REGION_ROLE_BARRIER : \ - type == ompt_sync_region_barrier_implementation \ + type == otter_sync_region_barrier_implementation \ ? OTF2_REGION_ROLE_BARRIER : \ - type == ompt_sync_region_taskwait \ + type == otter_sync_region_taskwait \ ? OTF2_REGION_ROLE_TASK_WAIT : \ - type == ompt_sync_region_taskgroup \ + type == otter_sync_region_taskgroup \ ? OTF2_REGION_ROLE_TASK_WAIT : \ - type == ompt_sync_region_barrier_implicit_workshare \ + type == otter_sync_region_barrier_implicit_workshare \ ? OTF2_REGION_ROLE_IMPLICIT_BARRIER : \ - type == ompt_sync_region_barrier_implicit_parallel \ + type == otter_sync_region_barrier_implicit_parallel \ ? OTF2_REGION_ROLE_IMPLICIT_BARRIER : \ - type == ompt_sync_region_barrier_teams \ + type == otter_sync_region_barrier_teams \ ? OTF2_REGION_ROLE_IMPLICIT_BARRIER : OTF2_REGION_ROLE_UNKNOWN) #else #define SYNC_TYPE_TO_OTF2_REGION_ROLE(type) \ - (type == ompt_sync_region_barrier \ + (type == otter_sync_region_barrier \ ? OTF2_REGION_ROLE_BARRIER : \ - type == ompt_sync_region_barrier_implicit \ + type == otter_sync_region_barrier_implicit \ ? OTF2_REGION_ROLE_IMPLICIT_BARRIER : \ - type == ompt_sync_region_barrier_explicit \ + type == otter_sync_region_barrier_explicit \ ? OTF2_REGION_ROLE_BARRIER : \ - type == ompt_sync_region_barrier_implementation \ + type == otter_sync_region_barrier_implementation \ ? OTF2_REGION_ROLE_BARRIER : \ - type == ompt_sync_region_taskwait \ + type == otter_sync_region_taskwait \ ? OTF2_REGION_ROLE_TASK_WAIT : \ - type == ompt_sync_region_taskgroup \ + type == otter_sync_region_taskgroup \ ? OTF2_REGION_ROLE_TASK_WAIT : OTF2_REGION_ROLE_UNKNOWN) #endif #define TASK_TYPE_TO_STR_REF(type) \ - (type == ompt_task_initial ? attr_label_ref[attr_task_type_initial_task]: \ - type == ompt_task_implicit ? attr_label_ref[attr_task_type_implicit_task]:\ - type == ompt_task_explicit ? attr_label_ref[attr_task_type_explicit_task]:\ - type == ompt_task_target ? attr_label_ref[attr_task_type_target_task]: 0 )\ + (type == otter_task_initial ? attr_label_ref[attr_task_type_initial_task]: \ + type == otter_task_implicit ? attr_label_ref[attr_task_type_implicit_task]:\ + type == otter_task_explicit ? attr_label_ref[attr_task_type_explicit_task]:\ + type == otter_task_target ? attr_label_ref[attr_task_type_target_task]: 0) \ #define TASK_STATUS_TO_STR_REF(status) \ - (status == ompt_task_complete ? \ + (status == otter_task_complete ? \ attr_label_ref[attr_prior_task_status_complete] : \ - status == ompt_task_yield ? \ + status == otter_task_yield ? \ attr_label_ref[attr_prior_task_status_yield] : \ - status == ompt_task_cancel ? \ + status == otter_task_cancel ? \ attr_label_ref[attr_prior_task_status_cancel] : \ - status == ompt_task_detach ? \ + status == otter_task_detach ? \ attr_label_ref[attr_prior_task_status_detach] : \ - status == ompt_task_early_fulfill ? \ + status == otter_task_early_fulfill ? \ attr_label_ref[attr_prior_task_status_early_fulfil] : \ - status == ompt_task_late_fulfill ? \ + status == otter_task_late_fulfill ? \ attr_label_ref[attr_prior_task_status_late_fulfil] : \ - status == ompt_task_switch ? \ + status == otter_task_switch ? \ attr_label_ref[attr_prior_task_status_switch] : 0 ) #endif // OTTER_TRACE_LOOKUP_MACROS_H diff --git a/include/otter/trace-region-master.h b/include/otter/trace-region-master.h new file mode 100644 index 0000000..e0d86ba --- /dev/null +++ b/include/otter/trace-region-master.h @@ -0,0 +1,26 @@ +#if !defined(OTTER_TRACE_RGN_MASTER_H) +#define OTTER_TRACE_RGN_MASTER_H + +#include +#include +#include + +#include "otter/otter-ompt-header.h" +#include "otter/otter-common.h" +#include "otter/queue.h" +#include "otter/stack.h" +#include "otter/trace.h" + +/* Create region */ +trace_region_def_t * +trace_new_master_region( + trace_location_def_t *loc, + unique_id_t encountering_task_id +); + +/* Destroy region */ +void trace_destroy_master_region(trace_region_def_t *rgn); + +void trace_add_master_attributes(trace_region_def_t *rgn); + +#endif // OTTER_TRACE_RGN_MASTER_H diff --git a/include/otter/trace-region-parallel.h b/include/otter/trace-region-parallel.h new file mode 100644 index 0000000..b09a8ac --- /dev/null +++ b/include/otter/trace-region-parallel.h @@ -0,0 +1,29 @@ +#if !defined(OTTER_TRACE_RGN_PARALLEL_H) +#define OTTER_TRACE_RGN_PARALLEL_H + +#include +#include +#include + +#include "otter/otter-ompt-header.h" +#include "otter/otter-common.h" +#include "otter/queue.h" +#include "otter/stack.h" +#include "otter/trace.h" + +/* Create new region */ +trace_region_def_t * +trace_new_parallel_region( + unique_id_t id, + unique_id_t master, + unique_id_t encountering_task_id, + int flags, + unsigned int requested_parallelism +); + +/* Destroy parallel region */ +void trace_destroy_parallel_region(trace_region_def_t *rgn); + +void trace_add_parallel_attributes(trace_region_def_t *rgn); + +#endif // OTTER_TRACE_RGN_PARALLEL_H diff --git a/include/otter/trace-region-sync.h b/include/otter/trace-region-sync.h new file mode 100644 index 0000000..ba34a49 --- /dev/null +++ b/include/otter/trace-region-sync.h @@ -0,0 +1,27 @@ +#if !defined(OTTER_TRACE_RGN_SYNC_H) +#define OTTER_TRACE_RGN_SYNC_H + +#include +#include +#include + +#include "otter/otter-ompt-header.h" +#include "otter/otter-common.h" +#include "otter/queue.h" +#include "otter/stack.h" +#include "otter/trace.h" + +/* Create region */ +trace_region_def_t * +trace_new_sync_region( + trace_location_def_t *loc, + otter_sync_region_t stype, + unique_id_t encountering_task_id +); + +/* Destroy region */ +void trace_destroy_sync_region(trace_region_def_t *rgn); + +void trace_add_sync_attributes(trace_region_def_t *rgn); + +#endif // OTTER_TRACE_RGN_SYNC_H diff --git a/include/otter/trace-region-task.h b/include/otter/trace-region-task.h new file mode 100644 index 0000000..63058c7 --- /dev/null +++ b/include/otter/trace-region-task.h @@ -0,0 +1,30 @@ +#if !defined(OTTER_TRACE_RGN_TASK_H) +#define OTTER_TRACE_RGN_TASK_H + +#include +#include +#include + +#include "otter/otter-ompt-header.h" +#include "otter/otter-common.h" +#include "otter/queue.h" +#include "otter/stack.h" +#include "otter/trace.h" + +/* Create region */ +trace_region_def_t * +trace_new_task_region( + trace_location_def_t *loc, + trace_region_def_t *parent_task_region, + unique_id_t task_id, + otter_task_flag_t flags, + int has_dependences, + otter_src_location_t *src_location +); + +/* Destroy region */ +void trace_destroy_task_region(trace_region_def_t *rgn); + +void trace_add_task_attributes(trace_region_def_t *rgn); + +#endif // OTTER_TRACE_RGN_TASK_H diff --git a/include/otter/trace-region-workshare.h b/include/otter/trace-region-workshare.h new file mode 100644 index 0000000..ad7e694 --- /dev/null +++ b/include/otter/trace-region-workshare.h @@ -0,0 +1,28 @@ +#if !defined(OTTER_TRACE_RGN_WORKSHARE_H) +#define OTTER_TRACE_RGN_WORKSHARE_H + +#include +#include +#include + +#include "otter/otter-ompt-header.h" +#include "otter/otter-common.h" +#include "otter/queue.h" +#include "otter/stack.h" +#include "otter/trace.h" + +/* Create region */ +trace_region_def_t * +trace_new_workshare_region( + trace_location_def_t *loc, + otter_work_t wstype, + uint64_t count, + unique_id_t encountering_task_id +); + +/* Destroy region */ +void trace_destroy_workshare_region(trace_region_def_t *rgn); + +void trace_add_workshare_attributes(trace_region_def_t *rgn); + +#endif // OTTER_TRACE_RGN_WORKSHARE_H diff --git a/include/otter/trace-static-constants.h b/include/otter/trace-static-constants.h new file mode 100644 index 0000000..b16ef13 --- /dev/null +++ b/include/otter/trace-static-constants.h @@ -0,0 +1,10 @@ +#if !defined(OTTER_TRACE_STATIC_CONST_H) +#define OTTER_TRACE_STATIC_CONST_H + +#include + +static const unsigned int DEFAULT_NAME_BUF_SZ = 256; +static const OTF2_LocationGroupRef DEFAULT_LOCATION_GRP = 0; +static const OTF2_SystemTreeNodeRef DEFAULT_SYSTEM_TREE = 0; + +#endif // OTTER_TRACE_STATIC_CONST_H diff --git a/include/otter/trace-structs.h b/include/otter/trace-structs.h index d3f811f..db395a6 100644 --- a/include/otter/trace-structs.h +++ b/include/otter/trace-structs.h @@ -3,6 +3,7 @@ #include #include +#include #include #include "otter/otter-ompt-header.h" @@ -10,140 +11,14 @@ #include "otter/queue.h" #include "otter/stack.h" #include "otter/trace.h" - -/* Forward definitions */ -typedef struct trace_parallel_region_attr_t trace_parallel_region_attr_t; -typedef struct trace_region_attr_empty_t trace_region_attr_empty_t; -typedef struct trace_wshare_region_attr_t trace_wshare_region_attr_t; -typedef struct trace_master_region_attr_t trace_master_region_attr_t; -typedef struct trace_sync_region_attr_t trace_sync_region_attr_t; -typedef struct trace_task_region_attr_t trace_task_region_attr_t; - -/* Attributes of a parallel region */ -struct trace_parallel_region_attr_t { - unique_id_t id; - unique_id_t master_thread; - bool is_league; - unsigned int requested_parallelism; - unsigned int ref_count; - unsigned int enter_count; - pthread_mutex_t lock_rgn; - otter_queue_t *rgn_defs; -}; - -/* Attributes of a workshare region */ -struct trace_wshare_region_attr_t { - ompt_work_t type; - uint64_t count; -}; - -/* Attributes of a master region */ -struct trace_master_region_attr_t { - uint64_t thread; -}; - -/* Attributes of a sync region */ -struct trace_sync_region_attr_t { - ompt_sync_region_t type; - unique_id_t encountering_task_id; -}; - -/* Attributes of a task region */ -struct trace_task_region_attr_t { - unique_id_t id; - ompt_task_flag_t type; - ompt_task_flag_t flags; - int has_dependences; - unique_id_t parent_id; - ompt_task_flag_t parent_type; - ompt_task_status_t task_status; -}; - -/* Store values needed to register region definition (tasks, parallel regions, - workshare constructs etc.) with OTF2 */ -struct trace_region_def_t { - OTF2_RegionRef ref; - OTF2_RegionRole role; - OTF2_AttributeList *attributes; - trace_region_type_t type; - unique_id_t encountering_task_id; - union { - trace_parallel_region_attr_t parallel; - trace_wshare_region_attr_t wshare; - trace_master_region_attr_t master; - trace_sync_region_attr_t sync; - trace_task_region_attr_t task; - } attr; -}; - -/* Store values needed to register location definition (threads) with OTF2 */ -struct trace_location_def_t { - unique_id_t id; - ompt_thread_t thread_type; - uint64_t events; - otter_stack_t *rgn_stack; - otter_queue_t *rgn_defs; - otter_stack_t *rgn_defs_stack; - OTF2_LocationRef ref; - OTF2_LocationType type; - OTF2_LocationGroupRef location_group; - OTF2_AttributeList *attributes; - OTF2_EvtWriter *evt_writer; - OTF2_DefWriter *def_writer; -}; - -/* Create new location */ -trace_location_def_t * -trace_new_location_definition( - uint64_t id, - ompt_thread_t thread_type, - OTF2_LocationType loc_type, - OTF2_LocationGroupRef loc_grp); - -/* Create new region */ -trace_region_def_t * -trace_new_parallel_region( - unique_id_t id, - unique_id_t master, - unique_id_t encountering_task_id, - int flags, - unsigned int requested_parallelism); - -trace_region_def_t * -trace_new_workshare_region( - trace_location_def_t *loc, - ompt_work_t wstype, - uint64_t count, - unique_id_t encountering_task_id); - -trace_region_def_t * -trace_new_master_region( - trace_location_def_t *loc, - unique_id_t encountering_task_id); - -trace_region_def_t * -trace_new_sync_region( - trace_location_def_t *loc, - ompt_sync_region_t stype, - unique_id_t encountering_task_id); - -trace_region_def_t * -trace_new_task_region( - trace_location_def_t *loc, - trace_region_def_t *parent_task_region, - unique_id_t task_id, - ompt_task_flag_t flags, - int has_dependences); - -/* Destroy location/region */ -void trace_destroy_location(trace_location_def_t *loc); -void trace_destroy_parallel_region(trace_region_def_t *rgn); -void trace_destroy_workshare_region(trace_region_def_t *rgn); -void trace_destroy_master_region(trace_region_def_t *rgn); -void trace_destroy_sync_region(trace_region_def_t *rgn); -void trace_destroy_task_region(trace_region_def_t *rgn); +#include "otter/trace-location.h" +#include "otter/trace-region-parallel.h" +#include "otter/trace-region-workshare.h" +#include "otter/trace-region-master.h" +#include "otter/trace-region-sync.h" +#include "otter/trace-region-task.h" /* pretty-print region definitions */ -void trace_region_pprint(FILE *fp, trace_region_def_t *r, const char func[], const int line); +// void trace_region_pprint(FILE *fp, trace_region_def_t *r, const char func[], const int line); #endif // OTTER_TRACE_STRUCTS_H diff --git a/include/otter/trace-types.h b/include/otter/trace-types.h new file mode 100644 index 0000000..5001688 --- /dev/null +++ b/include/otter/trace-types.h @@ -0,0 +1,157 @@ +#if !defined(OTTER_TRACE_TYPES_H) +#define OTTER_TRACE_TYPES_H + +#include +#include +#include "otter/otter-common.h" +#include "otter/queue.h" +#include "otter/stack.h" +#include "otter/char_ref_registry.hpp" + +/* Different kinds of unique IDs */ +typedef enum trace_ref_type_t { + trace_region, + trace_string, + trace_location, + trace_other, + NUM_REF_TYPES // <- MUST BE LAST ENUM ITEM +} trace_ref_type_t; + +/* Types of event endpoint */ +typedef enum { + trace_event_type_enter, + trace_event_type_leave, + trace_event_type_task_create +} trace_event_type_t; + +/* Different kinds of regions supported */ +typedef enum { + trace_region_parallel, + trace_region_workshare, + trace_region_synchronise, + trace_region_task, +#if defined(USE_OMPT_MASKED) + trace_region_masked +#else + trace_region_master +#endif +} trace_region_type_t; + +typedef enum otter_thread_t { + otter_thread_initial = 1, + otter_thread_worker = 2, + otter_thread_other = 3, + otter_thread_unknown = 4 +} otter_thread_t; + +typedef enum otter_work_t { + otter_work_loop = 1, + otter_work_sections = 2, + otter_work_single_executor = 3, + otter_work_single_other = 4, + otter_work_workshare = 5, + otter_work_distribute = 6, + otter_work_taskloop = 7 +} otter_work_t; + +typedef enum otter_sync_region_t { + otter_sync_region_barrier = 1, + otter_sync_region_barrier_implicit = 2, + otter_sync_region_barrier_explicit = 3, + otter_sync_region_barrier_implementation = 4, + otter_sync_region_taskwait = 5, + otter_sync_region_taskgroup = 6, + otter_sync_region_reduction = 7, + otter_sync_region_barrier_implicit_workshare = 8, + otter_sync_region_barrier_implicit_parallel = 9, + otter_sync_region_barrier_teams = 10 +} otter_sync_region_t; + +typedef enum otter_task_flag_t { + otter_task_initial = 0x00000001, + otter_task_implicit = 0x00000002, + otter_task_explicit = 0x00000004, + otter_task_target = 0x00000008, + otter_task_taskwait = 0x00000010, + otter_task_undeferred = 0x08000000, + otter_task_untied = 0x10000000, + otter_task_final = 0x20000000, + otter_task_mergeable = 0x40000000, + otter_task_merged = 0x80000000 +} otter_task_flag_t; + +typedef enum otter_task_status_t { + otter_task_complete = 1, + otter_task_yield = 2, + otter_task_cancel = 3, + otter_task_detach = 4, + otter_task_early_fulfill = 5, + otter_task_late_fulfill = 6, + otter_task_switch = 7, + otter_taskwait_complete = 8 +} otter_task_status_t; + +/* Attributes of a parallel region */ +typedef struct { + unique_id_t id; + unique_id_t master_thread; + bool is_league; + unsigned int requested_parallelism; + unsigned int ref_count; + unsigned int enter_count; + pthread_mutex_t lock_rgn; + otter_queue_t *rgn_defs; +} trace_parallel_region_attr_t; + +/* Attributes of a workshare region */ +typedef struct { + otter_work_t type; + uint64_t count; +} trace_wshare_region_attr_t; + +/* Attributes of a master region */ +typedef struct { + uint64_t thread; +} trace_master_region_attr_t; + +/* Attributes of a sync region */ +typedef struct { + otter_sync_region_t type; + unique_id_t encountering_task_id; +} trace_sync_region_attr_t; + +/* Attributes of a task region */ +typedef struct { + unique_id_t id; + otter_task_flag_t type; + otter_task_flag_t flags; + int has_dependences; + unique_id_t parent_id; + otter_task_flag_t parent_type; + otter_task_status_t task_status; + otter_string_ref_t source_file_name_ref; + otter_string_ref_t source_func_name_ref; + int source_line_number; +} trace_task_region_attr_t; + +typedef union { + trace_parallel_region_attr_t parallel; + trace_wshare_region_attr_t wshare; + trace_master_region_attr_t master; + trace_sync_region_attr_t sync; + trace_task_region_attr_t task; +} trace_region_attr_t; + +/* Store values needed to register region definition (tasks, parallel regions, + workshare constructs etc.) with OTF2 */ +typedef struct { + OTF2_RegionRef ref; + OTF2_RegionRole role; + OTF2_AttributeList *attributes; + trace_region_type_t type; + unique_id_t encountering_task_id; + otter_stack_t *rgn_stack; + trace_region_attr_t attr; +} trace_region_def_t; + +#endif // OTTER_TRACE_TYPES_H diff --git a/include/otter/trace-unique-refs.h b/include/otter/trace-unique-refs.h new file mode 100644 index 0000000..4b9c32a --- /dev/null +++ b/include/otter/trace-unique-refs.h @@ -0,0 +1,13 @@ +#if !defined(OTTER_TRACE_UNIQUE_REFS_H) +#define OTTER_TRACE_UNIQUE_REFS_H + +#include + +// Implement unique references for various OTF2 constructs +// Internal to otter-trace only + +OTF2_RegionRef get_unique_rgn_ref(void); +OTF2_StringRef get_unique_str_ref(void); +OTF2_LocationRef get_unique_loc_ref(void); + +#endif // OTTER_TRACE_UNIQUE_REFS_H diff --git a/include/otter/trace.h b/include/otter/trace.h index 6228806..f06f19a 100644 --- a/include/otter/trace.h +++ b/include/otter/trace.h @@ -1,87 +1,27 @@ #if !defined(OTTER_TRACE_H) #define OTTER_TRACE_H -#include -#include -#include -#include - -#include "otter/otter-ompt-header.h" -#include "otter/otter-common.h" - -#define DEFAULT_LOCATION_GRP 0 // OTF2_UNDEFINED_LOCATION_GROUP -#define DEFAULT_SYSTEM_TREE 0 -#define DEFAULT_NAME_BUF_SZ 256 - -#define CHECK_OTF2_ERROR_CODE(r) \ - {if (r != OTF2_SUCCESS) \ - { \ - LOG_ERROR("%s: %s (%s:%d)", \ - OTF2_Error_GetName(r), \ - OTF2_Error_GetDescription(r), \ - __FILE__, \ - __LINE__ \ - ); \ - }} - -#define get_unique_rgn_ref() (get_unique_uint32_ref(trace_region)) -#define get_unique_str_ref() (get_unique_uint32_ref(trace_string)) -#define get_unique_loc_ref() (get_unique_uint64_ref(trace_location)) -#define get_other_ref() (get_unique_uint64_ref(trace_other)) - -/* Different kinds of unique IDs */ -typedef enum trace_ref_type_t { - trace_region, - trace_string, - trace_location, - trace_other, - NUM_REF_TYPES // <- MUST BE LAST ENUM ITEM -} trace_ref_type_t; - -/* event endpoint */ -typedef enum { - trace_event_type_enter, - trace_event_type_leave, - trace_event_type_task_create -} trace_event_type_t; - -/* Different kinds of regions supported */ -typedef enum { - trace_region_parallel, - trace_region_workshare, - trace_region_synchronise, - trace_region_task, -#if defined(USE_OMPT_MASKED) - trace_region_masked -#else - trace_region_master -#endif -} trace_region_type_t; - -/* Defined in trace-structs.h */ -typedef struct trace_region_def_t trace_region_def_t; -typedef struct trace_location_def_t trace_location_def_t; - -/* unique OTF2 refs accessed via macro wrappers */ -uint64_t get_unique_uint64_ref(trace_ref_type_t ref_type); -uint32_t get_unique_uint32_ref(trace_ref_type_t ref_type); - -/* interface function prototypes */ -bool trace_initialise_archive(otter_opt_t *opt); -bool trace_finalise_archive(void); - -/* write events */ +#include "otter/trace-types.h" +#include "otter/trace-static-constants.h" +#include "otter/trace-location.h" +#include "otter/trace-archive.h" +#include "otter/trace-region-parallel.h" +#include "otter/trace-region-task.h" +#include "otter/trace-region-workshare.h" +#include "otter/trace-region-master.h" +#include "otter/trace-region-sync.h" + +/* Functions defined in trace-core.c */ void trace_event_thread_begin(trace_location_def_t *self); void trace_event_thread_end(trace_location_def_t *self); void trace_event_enter(trace_location_def_t *self, trace_region_def_t *region); void trace_event_leave(trace_location_def_t *self); void trace_event_task_create(trace_location_def_t *self, trace_region_def_t *created_task); -void trace_event_task_schedule(trace_location_def_t *self, trace_region_def_t *prior_task, ompt_task_status_t prior_status); -// void trace_event_task_switch(trace_location_def_t *self); +void trace_event_task_schedule(trace_location_def_t *self, trace_region_def_t *prior_task, otter_task_status_t prior_status); +void trace_event_task_switch(trace_location_def_t *self, trace_region_def_t *prior_task, otter_task_status_t prior_status, trace_region_def_t *next_task); // void trace_event_task_complete(trace_location_def_t *self); - -/* write definitions to the global def writer */ -void trace_write_location_definition(trace_location_def_t *loc); void trace_write_region_definition(trace_region_def_t *rgn); +void trace_region_pprint(FILE *fp, trace_region_def_t *r, const char func[], const int line); + #endif // OTTER_TRACE_H diff --git a/lib/.gitignore b/lib/.gitignore deleted file mode 100644 index e11f08e..0000000 --- a/lib/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -* -*/ -!.gitignore - diff --git a/modules/pyotter b/modules/pyotter new file mode 160000 index 0000000..549a714 --- /dev/null +++ b/modules/pyotter @@ -0,0 +1 @@ +Subproject commit 549a71424f7bfb2e82295c941cd69150116459fb diff --git a/obj/.gitignore b/obj/.gitignore deleted file mode 100644 index e11f08e..0000000 --- a/obj/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -* -*/ -!.gitignore - diff --git a/src/otter/otter-core.c b/src/otter/events/ompt/otter-core.c similarity index 94% rename from src/otter/otter-core.c rename to src/otter/events/ompt/otter-core.c index 94dc775..86a9ccd 100644 --- a/src/otter/otter-core.c +++ b/src/otter/events/ompt/otter-core.c @@ -1,5 +1,8 @@ #include #include +#if !defined(__USE_POSIX) +#define __USE_POSIX // for HOST_NAME_MAX +#endif #include #include // gethostname #include // getrusage @@ -18,7 +21,7 @@ #include "otter/otter-entry.h" #include "otter/otter-environment-variables.h" #include "otter/trace.h" -#include "otter/trace-structs.h" +// #include "otter/trace-structs.h" /* Static function prototypes */ static void print_resource_usage(void); @@ -305,7 +308,7 @@ on_ompt_callback_task_create( /* make space for the newly-created task */ task_data_t *task_data = new_task_data(thread_data->location, parent_task_data ? parent_task_data->region : NULL, - get_unique_task_id(), flags, has_dependences); + get_unique_task_id(), flags, has_dependences, NULL); /* record the task-create event */ trace_event_task_create(thread_data->location, task_data->region); @@ -364,6 +367,8 @@ on_ompt_callback_task_schedule( } #endif +#if defined(TASK_SCHEDULE_LEAVE_ENTER) + // Deprecated if (prior_task_data->type == ompt_task_explicit || prior_task_data->type == ompt_task_target) { @@ -380,6 +385,15 @@ on_ompt_callback_task_schedule( prior_task_data->region, 0); /* no status */ trace_event_enter(thread_data->location, next_task_data->region); } +#else + // Default is to record task-switch event + trace_event_task_switch( + thread_data->location, + prior_task_data->region, + prior_task_status, + next_task_data->region + ); +#endif return; } @@ -426,7 +440,9 @@ on_ompt_callback_implicit_task( parallel_data->encountering_task_data->region : NULL, get_unique_task_id(), flags, - 0); + 0, + NULL + ); task->ptr = implicit_task_data; /* Enter implicit task region */ @@ -505,7 +521,15 @@ on_ompt_callback_work( if (endpoint == ompt_scope_begin) { trace_region_def_t *wshare_rgn = trace_new_workshare_region( - thread_data->location, wstype, count, task_data->id); + thread_data->location, + /* Convert the OMPT enum type to a generic Otter enum type */ + wstype == ompt_work_loop ? otter_work_loop : + wstype == ompt_work_single_executor ? otter_work_single_executor : + wstype == ompt_work_single_other ? otter_work_single_other : + wstype == ompt_work_taskloop ? otter_work_taskloop : 0, + count, + task_data->id + ); trace_event_enter(thread_data->location, wshare_rgn); } else { @@ -617,9 +641,9 @@ on_ompt_callback_sync_region( return; } -unique_id_t -get_unique_id(unique_id_type_t id_type) -{ - static unique_id_t id[NUM_ID_TYPES] = {0,0,0,0}; - return __sync_fetch_and_add(&id[id_type], 1L); -} +// unique_id_t +// get_unique_id(unique_id_type_t id_type) +// { +// static unique_id_t id[NUM_ID_TYPES] = {0,0,0,0}; +// return __sync_fetch_and_add(&id[id_type], 1L); +// } diff --git a/src/otter/otter-entry.c b/src/otter/events/ompt/otter-entry.c similarity index 100% rename from src/otter/otter-entry.c rename to src/otter/events/ompt/otter-entry.c diff --git a/src/otter/events/serial/otter-serial.F90 b/src/otter/events/serial/otter-serial.F90 new file mode 100644 index 0000000..99108b3 --- /dev/null +++ b/src/otter/events/serial/otter-serial.F90 @@ -0,0 +1,220 @@ +module otter_serial + + contains + + subroutine fortran_otterTraceInitialise_i(filename, functionname, linenum) + use, intrinsic :: iso_c_binding + character(len = *) :: filename + character(len = *) :: functionname + integer :: linenum + interface + subroutine otterTraceInitialise_i(filename, functionname, linenum) bind(C, NAME="otterTraceInitialise_i") + use, intrinsic :: iso_c_binding + character(len=1, kind=c_char), dimension(*), intent(in) :: filename, functionname + integer(c_int), value :: linenum + end subroutine otterTraceInitialise_i + end interface + call otterTraceInitialise_i(trim(filename)//char(0), trim(functionname)//char(0), Int(linenum, kind=c_int)) + end subroutine fortran_otterTraceInitialise_i + + subroutine fortran_otterTraceFinalise() + interface + subroutine otterTraceFinalise() bind(C, NAME="otterTraceFinalise") + end subroutine + end interface + call otterTraceFinalise() + end subroutine fortran_otterTraceFinalise + + subroutine fortran_otterParallelBegin_i(filename, functionname, linenum) + use, intrinsic :: iso_c_binding + character(len = *) :: filename + character(len = *) :: functionname + integer :: linenum + interface + subroutine otterParallelBegin_i(filename, functionname, linenum) bind(C, NAME="otterParallelBegin_i") + use, intrinsic :: iso_c_binding + character(len=1, kind=c_char), dimension(*), intent(in) :: filename, functionname + integer(c_int), value :: linenum + end subroutine otterParallelBegin_i + end interface + call otterParallelBegin_i(trim(filename)//char(0), trim(functionname)//char(0), Int(linenum, kind=c_int)) + end subroutine fortran_otterParallelBegin_i + + subroutine fortran_otterParallelEnd() + use, intrinsic :: iso_c_binding + interface + subroutine otterParallelEnd() bind(C, NAME="otterParallelEnd") + end subroutine + end interface + + call otterParallelEnd() + + end subroutine + + subroutine fortran_otterTaskBegin_i(filename, functionname, linenum) + use, intrinsic :: iso_c_binding + character(len = *) :: filename + character(len = *) :: functionname + integer :: linenum + interface + subroutine otterTaskBegin_i(filename, functionname, linenum) bind(C, NAME="otterTaskBegin_i") + use, intrinsic :: iso_c_binding + character(len=1, kind=c_char), dimension(*), intent(in) :: filename, functionname + integer(c_int), value :: linenum + end subroutine otterTaskBegin_i + end interface + + call otterTaskBegin_i(trim(filename)//char(0), trim(functionname)//char(0), Int(linenum, kind=c_int)) + end subroutine fortran_otterTaskBegin_i + + subroutine fortran_otterTaskEnd() + use, intrinsic :: iso_c_binding + interface + subroutine otterTaskEnd() bind(C, NAME="otterTaskEnd") + end subroutine otterTaskEnd + end interface + + call otterTaskEnd() + end subroutine fortran_otterTaskEnd + + + subroutine fortran_otterTaskSingleBegin_i(filename, functionname, linenum) + use, intrinsic :: iso_c_binding + character(len = *) :: filename + character(len = *) :: functionname + integer :: linenum + interface + subroutine otterTaskSingleBegin_i(filename, functionname, linenum) bind(C, NAME="otterTaskSingleBegin_i") + use, intrinsic :: iso_c_binding + character(len=1, kind=c_char), dimension(*), intent(in) :: filename, functionname + integer(c_int), value :: linenum + end subroutine otterTaskSingleBegin_i + end interface + + call otterTaskSingleBegin_i(trim(filename)//char(0), trim(functionname)//char(0), Int(linenum, kind=c_int)) + end subroutine fortran_otterTaskSingleBegin_i + + subroutine fortran_otterTaskSingleEnd() + use, intrinsic :: iso_c_binding + interface + subroutine otterTaskSingleEnd() bind(C, NAME="otterTaskSingleEnd") + end subroutine + end interface + + call otterTaskSingleEnd() + end subroutine fortran_otterTaskSingleEnd + + subroutine fortran_otterLoopBegin_i(filename, functionname, linenum) + use, intrinsic :: iso_c_binding + character(len = *) :: filename + character(len = *) :: functionname + integer :: linenum + interface + subroutine otterLoopBegin_i(filename, functionname, linenum) bind(C, NAME="otterLoopBegin_i") + use, intrinsic :: iso_c_binding + character(len=1, kind=c_char), dimension(*), intent(in) :: filename, functionname + integer(c_int), value :: linenum + end subroutine otterLoopBegin_i + end interface + call otterLoopBegin_i(trim(filename)//char(0), trim(functionname)//char(0), Int(linenum, kind=c_int)) + end subroutine fortran_otterLoopBegin_i + + subroutine fortran_otterLoopEnd() + use, intrinsic :: iso_c_binding + + interface + subroutine otterLoopEnd() bind(C, NAME="otterLoopEnd") + use, intrinsic :: iso_c_binding + end subroutine otterLoopEnd + end interface + + call otterLoopEnd() + end subroutine + + subroutine fortran_otterLoopIterationBegin_i(filename, functionname, linenum) + use, intrinsic :: iso_c_binding + character(len = *) :: filename + character(len = *) :: functionname + integer :: linenum + interface + subroutine otterLoopIterationBegin_i(filename, functionname, linenum) bind(C, NAME="otterLoopIterationBegin_i") + use, intrinsic :: iso_c_binding + character(len=1, kind=c_char), dimension(*), intent(in) :: filename, functionname + integer(c_int), value :: linenum + end subroutine otterLoopIterationBegin_i + end interface + call otterLoopIterationBegin_i(trim(filename)//char(0), trim(functionname)//char(0), Int(linenum, kind=c_int)) + end subroutine fortran_otterLoopIterationBegin_i + + subroutine fortran_otterLoopIterationEnd() + use, intrinsic :: iso_c_binding + + interface + subroutine otterLoopIterationEnd() bind(C, NAME="otterLoopIterationEnd") + use, intrinsic :: iso_c_binding + end subroutine otterLoopIterationEnd + end interface + + call otterLoopIterationEnd() + end subroutine + + subroutine fortran_otterSynchroniseChildTasks_i(filename, functionname, linenum) + use, intrinsic :: iso_c_binding + character(len = *) :: filename + character(len = *) :: functionname + integer :: linenum + interface + subroutine otterSynchroniseChildTasks_i(filename, functionname, linenum) bind(C, NAME="otterSynchroniseChildTasks_i") + use, intrinsic :: iso_c_binding + character(len=1, kind=c_char), dimension(*), intent(in) :: filename, functionname + integer(c_int), value :: linenum + end subroutine otterSynchroniseChildTasks_i + end interface + call otterSynchroniseChildTasks_i(trim(filename)//char(0), trim(functionname)//char(0), Int(linenum, kind=c_int)) + + end subroutine fortran_otterSynchroniseChildTasks_i + + subroutine fortran_otterSynchroniseDescendantTasksBegin_i(filename, functionname, linenum) + use, intrinsic :: iso_c_binding + character(len = *) :: filename + character(len = *) :: functionname + integer :: linenum + interface + subroutine otterSynchroniseDescendantTasksBegin_i(filename, functionname, linenum)& + bind(C, NAME="otterSynchroniseDescendantTasksBegin_i") + use, intrinsic :: iso_c_binding + character(len=1, kind=c_char), dimension(*), intent(in) :: filename, functionname + integer(c_int), value :: linenum + end subroutine otterSynchroniseDescendantTasksBegin_i + end interface + call otterSynchroniseDescendantTasksBegin_i(trim(filename)//char(0), trim(functionname)//char(0), Int(linenum, kind=c_int)) + end subroutine fortran_otterSynchroniseDescendantTasksBegin_i + + subroutine fortran_otterSynchroniseDescendantTasksEnd() + use, intrinsic :: iso_c_binding + interface + subroutine otterSynchroniseDescendantTasksEnd() bind(C, NAME="otterSynchroniseDescendantTasksEnd") + end subroutine + end interface + call otterSynchroniseDescendantTasksEnd() + end subroutine fortran_otterSynchroniseDescendantTasksEnd + + subroutine fortran_otterTraceStart() + use, intrinsic :: iso_c_binding + interface + subroutine otterTraceStart() bind(C, NAME="otterTraceStart") + end subroutine otterTraceStart + end interface + call otterTraceStart() + end subroutine fortran_otterTraceStart + + subroutine fortran_otterTraceStop() + use, intrinsic :: iso_c_binding + interface + subroutine otterTraceStop() bind(C, NAME="otterTraceStop") + end subroutine otterTraceStop + end interface + call otterTraceStop() + end subroutine fortran_otterTraceStop + +end module otter_serial diff --git a/src/otter/events/serial/otter-serial.c b/src/otter/events/serial/otter-serial.c new file mode 100644 index 0000000..aa19895 --- /dev/null +++ b/src/otter/events/serial/otter-serial.c @@ -0,0 +1,489 @@ +#define __USE_POSIX // HOST_NAME_MAX +#include +#include +#include +#include + +#include "otter/otter-version.h" +#include "otter/general.h" +#include "otter/debug.h" +#include "otter/otter-environment-variables.h" +#include "otter/trace.h" +#include "otter/otter-serial.h" +#include "otter/otter-structs.h" +#include "otter/char_ref_registry.hpp" + +#define LOG_EVENT_CALL(file, func, line, ifunc) LOG_DEBUG("%s:%d in %s", file, line, func) + +static thread_data_t *thread_data = NULL; +static otter_stack_t *region_stack = NULL; +static otter_stack_t *task_stack = NULL; +static otter_stack_t *parallel_stack = NULL; +static bool tracingActive = false; + +/* detect environment variables */ +static otter_opt_t opt = { + .hostname = NULL, + .tracename = NULL, + .tracepath = NULL, + .archive_name = NULL, + .append_hostname = false +}; + +static task_data_t *get_encountering_task(void) +{ + task_data_t *t = NULL; + stack_peek(task_stack, (data_item_t*) &t); + assert(t != NULL); + return t; +} + +static trace_region_def_t *get_encountering_region(void) +{ + trace_region_def_t *r = NULL; + stack_peek(region_stack, (data_item_t*) &r); + assert(r != NULL); + return r; +} + +void otterTraceInitialise_i(const char* file, const char* func, const int line) +{ + // Initialise archive + + static char host[HOST_NAME_MAX+1] = {0}; + gethostname(host, HOST_NAME_MAX); + + opt.hostname = host; + opt.tracename = getenv(ENV_VAR_TRACE_OUTPUT); + opt.tracepath = getenv(ENV_VAR_TRACE_PATH); + opt.append_hostname = getenv(ENV_VAR_APPEND_HOST) == NULL ? false : true; + + /* Apply defaults if variables not provided */ + if(opt.tracename == NULL) opt.tracename = DEFAULT_OTF2_TRACE_OUTPUT; + if(opt.tracepath == NULL) opt.tracepath = DEFAULT_OTF2_TRACE_PATH; + + LOG_INFO("Otter environment variables:"); + LOG_INFO("%-30s %s", "host", opt.hostname); + LOG_INFO("%-30s %s", ENV_VAR_TRACE_PATH, opt.tracepath); + LOG_INFO("%-30s %s", ENV_VAR_TRACE_OUTPUT, opt.tracename); + LOG_INFO("%-30s %s", ENV_VAR_APPEND_HOST, opt.append_hostname?"Yes":"No"); + + trace_initialise_archive(&opt); + + region_stack = stack_create(); + task_stack = stack_create(); + parallel_stack = stack_create(); + + tracingActive = true; + + thread_data = new_thread_data(otter_thread_initial); + trace_event_thread_begin(thread_data->location); + + otter_src_location_t src_location = { + .file = file, + .func = func, + .line = line + }; + LOG_EVENT_CALL(src_location.file, src_location.func, src_location.line, __func__); + + // initial task + task_data_t *initial_task = new_task_data( + thread_data->location, + NULL, + get_unique_task_id(), + otter_task_initial, + 0, + &src_location + ); + + stack_push(region_stack, (data_item_t) {.ptr = initial_task->region}); + stack_push(task_stack, (data_item_t) {.ptr = initial_task}); + + trace_event_enter(thread_data->location, initial_task->region); + + return; +} + +void otterTraceFinalise(void) +{ + // Finalise arhchive + + + // initial task + trace_event_leave(thread_data->location); + + /* For initial-task-end event, must manually record region defintion + as it never gets handed off to an enclosing parallel region to be + written at parallel-end */ + trace_region_def_t *initial_task_region = NULL; + stack_pop(region_stack, (data_item_t*) &initial_task_region); + assert((initial_task_region->type == trace_region_task) + && (initial_task_region->attr.task.type == otter_task_initial)); + trace_write_region_definition(initial_task_region); + trace_destroy_task_region(initial_task_region); + + task_data_t *initial_task = NULL; + stack_pop(task_stack, (data_item_t*) &initial_task); + assert(initial_task->flags == otter_task_initial); + task_destroy(initial_task); + initial_task = NULL; + + trace_event_thread_end(thread_data->location); + thread_destroy(thread_data); + trace_finalise_archive(); + + stack_destroy(region_stack, false, NULL); + stack_destroy(task_stack, false, NULL); + stack_destroy(parallel_stack, false, NULL); + + char trace_folder[PATH_MAX] = {0}; + + realpath(opt.tracepath, &trace_folder[0]); + + fprintf(stderr, "%s%s/%s\n", + "OTTER_TRACE_FOLDER=", trace_folder, opt.archive_name); + + return; +} + +void otterParallelBegin_i(const char* file, const char* func, const int line) +{ + if (!tracingActive) + { + fprintf(stderr, "%s [INACTIVE]\n", __func__); + return; + } + + task_data_t *encountering_task = get_encountering_task(); + + parallel_data_t *parallel_data = new_parallel_data( + thread_data->id, + encountering_task->id, + encountering_task, + 0, + 0 + ); + + stack_push(region_stack, (data_item_t) {.ptr = parallel_data->region}); + stack_push(parallel_stack, (data_item_t) {.ptr = parallel_data}); + + trace_event_enter(thread_data->location, parallel_data->region); + + otter_src_location_t src_location = { + .file = file, + .func = func, + .line = line + }; + LOG_EVENT_CALL(src_location.file, src_location.func, src_location.line, __func__); + + task_data_t *implicit_task = new_task_data( + thread_data->location, + encountering_task->region, + get_unique_task_id(), + otter_task_implicit, + 0, + &src_location + ); + + stack_push(region_stack, (data_item_t) {.ptr = implicit_task->region}); + stack_push(task_stack, (data_item_t) {.ptr = implicit_task}); + + trace_event_enter(thread_data->location, implicit_task->region); + + return; +} + +void otterParallelEnd(void) +{ + if (!tracingActive) + { + fprintf(stderr, "%s [INACTIVE]\n", __func__); + return; + } + + + + task_data_t *implicit_task = NULL; + parallel_data_t *parallel_data = NULL; + trace_region_def_t *implicit_task_region = NULL; + trace_region_def_t *parallel_region = NULL; + + stack_pop(region_stack, (data_item_t*) &implicit_task_region); + stack_pop(task_stack, (data_item_t*) &implicit_task); + assert(implicit_task->region == implicit_task_region); + trace_event_leave(thread_data->location); // implicit task + task_destroy(implicit_task); + + stack_pop(region_stack, (data_item_t*) ¶llel_region); + stack_pop(parallel_stack, (data_item_t*) ¶llel_data); + assert(parallel_data->region == parallel_region); + trace_event_leave(thread_data->location); // parallel + parallel_destroy(parallel_data); + return; +} + +void otterTaskBegin_i(const char* file, const char* func, const int line) +{ + if (!tracingActive) + { + fprintf(stderr, "%s [INACTIVE]\n", __func__); + return; + } + + otter_src_location_t src_location = { + .file = file, + .func = func, + .line = line + }; + LOG_EVENT_CALL(src_location.file, src_location.func, src_location.line, __func__); + + task_data_t *encountering_task = get_encountering_task(); + + task_data_t *task = new_task_data( + thread_data->location, + encountering_task->region, + get_unique_task_id(), + otter_task_explicit, + 0, + &src_location + ); + + stack_push(region_stack, (data_item_t) {.ptr = task->region}); + stack_push(task_stack, (data_item_t) {.ptr = task}); + + trace_event_task_create(thread_data->location, task->region); + + trace_event_task_switch( + thread_data->location, + encountering_task->region, + otter_task_switch, + task->region + ); + + return; +} + +void otterTaskEnd(void) +{ + if (!tracingActive) + { + fprintf(stderr, "%s [INACTIVE]\n", __func__); + return; + } + + + + task_data_t *task = NULL; + trace_region_def_t *task_region = NULL; + + stack_pop(region_stack, (data_item_t*) &task_region); + stack_pop(task_stack, (data_item_t*) &task); + + assert((task_region->type == trace_region_task) + && (task_region->attr.task.type == otter_task_explicit)); + assert(task_region == task->region); + + task_data_t *encountering_task = get_encountering_task(); + + trace_event_task_switch( + thread_data->location, + task->region, + otter_task_complete, + encountering_task->region + ); + + task_destroy(task); + + return; +} + +void otterTaskSingleBegin_i(const char* file, const char* func, const int line) +{ + if (!tracingActive) + { + fprintf(stderr, "%s [INACTIVE]\n", __func__); + return; + } + + LOG_EVENT_CALL(file, func, line, __func__); + + task_data_t *encountering_task = get_encountering_task(); + + trace_region_def_t *single = trace_new_workshare_region( + thread_data->location, + otter_work_single_executor, + 1, + encountering_task->id + ); + + stack_push(region_stack, (data_item_t) {.ptr = single}); + + trace_event_enter(thread_data->location, single); + + return; +} + +void otterTaskSingleEnd(void) +{ + if (!tracingActive) + { + fprintf(stderr, "%s [INACTIVE]\n", __func__); + return; + } + + + trace_region_def_t *single = NULL; + stack_pop(region_stack, (data_item_t*) &single); + assert((single->type == trace_region_workshare) + && (single->attr.wshare.type == otter_work_single_executor)); + trace_event_leave(thread_data->location); + return; +} + +void otterLoopBegin_i(const char* file, const char* func, const int line) +{ + if (!tracingActive) + { + fprintf(stderr, "%s [INACTIVE]\n", __func__); + return; + } + + LOG_EVENT_CALL(file, func, line, __func__); + + task_data_t *encountering_task = get_encountering_task(); + + trace_region_def_t *loop = trace_new_workshare_region( + thread_data->location, + otter_work_loop, + 1, + encountering_task->id + ); + + stack_push(region_stack, (data_item_t) {.ptr = loop}); + + trace_event_enter(thread_data->location, loop); + + return; +} + +void otterLoopEnd(void) +{ + if (!tracingActive) + { + fprintf(stderr, "%s [INACTIVE]\n", __func__); + return; + } + + + trace_region_def_t *loop = NULL; + stack_pop(region_stack, (data_item_t*) &loop); + assert((loop->type == trace_region_workshare) + && (loop->attr.wshare.type == otter_work_loop)); + trace_event_leave(thread_data->location); + return; +} + +void otterLoopIterationBegin_i(const char* file, const char* func, const int line) +{ + if (!tracingActive) + { + fprintf(stderr, "%s [INACTIVE]\n", __func__); + return; + } + + LOG_EVENT_CALL(file, func, line, __func__); + return; +} + +void otterLoopIterationEnd(void) +{ + if (!tracingActive) + { + fprintf(stderr, "%s [INACTIVE]\n", __func__); + return; + } + + + return; +} + +void otterSynchroniseChildTasks_i(const char* file, const char* func, const int line) +{ + if (!tracingActive) + { + fprintf(stderr, "%s [INACTIVE]\n", __func__); + return; + } + + LOG_EVENT_CALL(file, func, line, __func__); + task_data_t *encountering_task = get_encountering_task(); + trace_region_def_t *taskwait = trace_new_sync_region( + thread_data->location, + otter_sync_region_taskwait, + encountering_task->id + ); + trace_event_enter(thread_data->location, taskwait); + trace_event_leave(thread_data->location); + return; +} + +void otterSynchroniseDescendantTasksBegin_i(const char* file, const char* func, const int line) +{ + if (!tracingActive) + { + fprintf(stderr, "%s [INACTIVE]\n", __func__); + return; + } + + LOG_EVENT_CALL(file, func, line, __func__); + task_data_t *encountering_task = get_encountering_task(); + trace_region_def_t *taskgroup = trace_new_sync_region( + thread_data->location, + otter_sync_region_taskgroup, + encountering_task->id + ); + stack_push(region_stack, (data_item_t) {.ptr = taskgroup}); + trace_event_enter(thread_data->location, taskgroup); + return; +} + +void otterSynchroniseDescendantTasksEnd(void) +{ + if (!tracingActive) + { + fprintf(stderr, "%s [INACTIVE]\n", __func__); + return; + } + + + trace_region_def_t *taskgroup = NULL; + stack_pop(region_stack, (data_item_t*) &taskgroup); + assert((taskgroup->type == trace_region_synchronise) + && (taskgroup->attr.sync.type == otter_sync_region_taskgroup)); + trace_event_leave(thread_data->location); + return; +} + +void otterTraceStart(void) +{ + if (!tracingActive) + { + fprintf(stderr, "%s: tracing interface started\n", __func__); + tracingActive = true; + } else { + fprintf(stderr, "%s: tracing interface already started\n", __func__); + } + return; +} + +void otterTraceStop(void) +{ + if (tracingActive) + { + fprintf(stderr, "%s: tracing interface stopped\n", __func__); + tracingActive = false; + } else { + fprintf(stderr, "%s: tracing interface already stopped\n", __func__); + } + return; +} diff --git a/src/otter/examples/CMakeLists.txt b/src/otter/examples/CMakeLists.txt index 252be42..1a23867 100644 --- a/src/otter/examples/CMakeLists.txt +++ b/src/otter/examples/CMakeLists.txt @@ -46,6 +46,17 @@ add_executable(omp-taskloop-yield "omp-taskloop-yield.c") add_executable(omp-spawn-n-tasks "omp-spawn-n-tasks.c") add_executable(omp-spawn-n-tasks-in-explicit-task-with-yield "omp-spawn-n-tasks-in-explicit-task-with-yield.c") +add_executable(otter-serial-fib + "otter-serial-fib.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../../include/otter/otter-serial.h") +target_include_directories(otter-serial-fib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../../include) +target_link_libraries(otter-serial-fib otter-serial) + +add_executable(otter-serial-loop + "otter-serial-loop.c") +target_include_directories(otter-serial-loop PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../../include) +target_link_libraries(otter-serial-loop otter-serial) + # Add CXX executables add_executable(omp-peano-constructs "omp-peano-constructs.cpp") diff --git a/src/otter/examples/omp-spawn-n-tasks-in-explicit-task-with-yield.c b/src/otter/examples/omp-spawn-n-tasks-in-explicit-task-with-yield.c index f6c2171..43c47a5 100644 --- a/src/otter/examples/omp-spawn-n-tasks-in-explicit-task-with-yield.c +++ b/src/otter/examples/omp-spawn-n-tasks-in-explicit-task-with-yield.c @@ -18,14 +18,19 @@ int main(int argc, char *argv[]) yield = (atoi(argv[2]) ? true : false); #pragma omp parallel - #pragma omp single nowait - #pragma omp task - #pragma omp taskloop nogroup grainsize(1) - for (int i=0; i #include -#define THREADS 6 #define LEN 4 int main(void) { int j=0; - #pragma omp parallel num_threads(THREADS) + #pragma omp parallel { #pragma omp single { diff --git a/src/otter/examples/omp-taskloop-single.c b/src/otter/examples/omp-taskloop-single.c index 8ee1580..975fbab 100644 --- a/src/otter/examples/omp-taskloop-single.c +++ b/src/otter/examples/omp-taskloop-single.c @@ -12,13 +12,12 @@ int main(void) { #pragma omp single nowait { - #pragma omp taskloop + #pragma omp taskloop nogroup for (j=0; j +#include + +int main(int argc, char *argv[]) { + + otterTraceInitialise(); + otterParallelBegin(); + otterTaskBegin(); + otterTaskEnd(); + otterParallelEnd(); + otterTraceFinalise(); + + return 0; + +} diff --git a/src/otter/examples/otter-serial-fib.F90 b/src/otter/examples/otter-serial-fib.F90 new file mode 100644 index 0000000..63871fb --- /dev/null +++ b/src/otter/examples/otter-serial-fib.F90 @@ -0,0 +1,63 @@ +#include "otter-serial.F90" + +Integer Recursive Function fib(n) result(a) + use otter_serial + Implicit None + Integer, Intent(in) :: n + Integer :: i, j + + if (n < 2) then + a = n + else + ! Tag: wrap a task + Call fortran_otterTaskBegin() + i = fib(n-1) + Call fortran_otterTaskEnd() + + ! Tag: wrap a task + Call fortran_otterTaskBegin() + j = fib(n-2) + Call fortran_otterTaskEnd() + + ! Indicate a synchronisation constraint on a subset of work items + Call fortran_otterSynchroniseChildTasks() + + a = i + j + end if +End Function fib + + +Program fibonacci + use otter_serial + Implicit None + Integer :: n, argc + Integer :: fibn + Integer, External :: fib + Character(len=32) :: arg + + argc = command_argument_count() + if (argc /= 1) then + print *, "Provide one integer argument to the program (n)" + STOP + end if + + call get_command_argument(1, value=arg) + read(arg(1:len(arg)),*) n + + Call fortran_otterTraceInitialise() + + ! Tag: start of a region we want to parallelise + Call fortran_otterParallelBegin() + + ! Tag: wrap a task + Call fortran_otterTaskSingleBegin() + fibn = fib(n) + Call fortran_otterTaskSingleEnd() + + Call fortran_otterParallelEnd() + + print *, n, fibn + + Call fortran_otterTraceFinalise() + +End Program fibonacci diff --git a/src/otter/examples/otter-serial-fib.c b/src/otter/examples/otter-serial-fib.c new file mode 100644 index 0000000..99137b6 --- /dev/null +++ b/src/otter/examples/otter-serial-fib.c @@ -0,0 +1,55 @@ +#include +#include +#include + +int fib(int n); + +int main(int argc, char *argv[]) { + + if (argc != 2) { + fprintf(stderr, "usage: %s n\n", argv[0]); + return 1; + } + + int n = atoi(argv[1]); + int fibn = 0; + + otterTraceInitialise(); + + // Tag: start of a region we want to parallelise + otterParallelBegin(); + { + // Tag: wrap a task + otterTaskSingleBegin(); + fibn = fib(n); + otterTaskSingleEnd(); + } + // Tag: end of a region we want to parallelise + otterParallelEnd(); + + printf("f(%d) = %d\n", n, fibn); + + otterTraceFinalise(); + + return 0; +} + +int fib(int n) { + if (n<2) return n; + int i, j; + + // Tag: wrap a task + otterTaskBegin(); + i = fib(n-1); + otterTaskEnd(); + + // Tag: wrap a task + otterTaskBegin(); + j = fib(n-2); + otterTaskEnd(); + + // Indicate a synchronisation constraint on a subset of work items + otterSynchroniseChildTasks(); + + return i+j; +} diff --git a/src/otter/examples/otter-serial-loop.c b/src/otter/examples/otter-serial-loop.c new file mode 100644 index 0000000..f6964dc --- /dev/null +++ b/src/otter/examples/otter-serial-loop.c @@ -0,0 +1,50 @@ +#include +#include +#include +#define LEN 5 + +int main(void) +{ + int j=0; + otterTraceInitialise(); + otterParallelBegin(); + { + otterTaskSingleBegin(); + { + otterLoopBegin(); + for (j=0; j -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "otter/general.h" -#include "otter/debug.h" -#include "otter/otter-ompt-header.h" -#include "otter/otter-common.h" -#include "otter/trace.h" -#include "otter/trace-lookup-macros.h" -#include "otter/trace-structs.h" - -#include "otter/queue.h" -#include "otter/stack.h" - -/* Defined in trace.c */ -extern OTF2_Archive *Archive; -extern OTF2_GlobalDefWriter *Defs; -extern pthread_mutex_t lock_global_def_writer; -extern pthread_mutex_t lock_global_archive; - -/* * * * * * * * * * * * * * * * */ -/* * * * * Constructors * * * * */ -/* * * * * * * * * * * * * * * * */ - -trace_location_def_t * -trace_new_location_definition( - unique_id_t id, - ompt_thread_t thread_type, - OTF2_LocationType loc_type, - OTF2_LocationGroupRef loc_grp) -{ - trace_location_def_t *new = malloc(sizeof(*new)); - - *new = (trace_location_def_t) { - .id = id, - .thread_type = thread_type, - .events = 0, - .ref = get_unique_loc_ref(), - .type = loc_type, - .location_group = loc_grp, - .rgn_stack = stack_create(), - .rgn_defs = queue_create(), - .rgn_defs_stack = stack_create(), - .attributes = OTF2_AttributeList_New() - }; - - new->evt_writer = OTF2_Archive_GetEvtWriter(Archive, new->ref); - new->def_writer = OTF2_Archive_GetDefWriter(Archive, new->ref); - - /* Thread location definition is written at thread-end (once all events - counted) */ - - LOG_DEBUG("[t=%lu] location created", id); - LOG_DEBUG("[t=%lu] %-18s %p", id, "rgn_stack:", new->rgn_stack); - LOG_DEBUG("[t=%lu] %-18s %p", id, "rgn_defs:", new->rgn_defs); - LOG_DEBUG("[t=%lu] %-18s %p", id, "rgn_defs_stack:", new->rgn_defs_stack); - - return new; -} - -trace_region_def_t * -trace_new_parallel_region( - unique_id_t id, - unique_id_t master, - unique_id_t encountering_task_id, - int flags, - unsigned int requested_parallelism) -{ - trace_region_def_t *new = malloc(sizeof(*new)); - *new = (trace_region_def_t) { - .ref = get_unique_rgn_ref(), - .role = OTF2_REGION_ROLE_PARALLEL, - .attributes = OTF2_AttributeList_New(), - .type = trace_region_parallel, - .encountering_task_id = encountering_task_id, - .attr.parallel = { - .id = id, - .master_thread = master, - .is_league = flags & ompt_parallel_league ? true : false, - .requested_parallelism = requested_parallelism, - .ref_count = 0, - .enter_count = 0, - .lock_rgn = PTHREAD_MUTEX_INITIALIZER, - .rgn_defs = queue_create() - } - }; - return new; -} - -trace_region_def_t * -trace_new_workshare_region( - trace_location_def_t *loc, - ompt_work_t wstype, - uint64_t count, - unique_id_t encountering_task_id) -{ - trace_region_def_t *new = malloc(sizeof(*new)); - *new = (trace_region_def_t) { - .ref = get_unique_rgn_ref(), - .role = WORK_TYPE_TO_OTF2_REGION_ROLE(wstype), - .attributes = OTF2_AttributeList_New(), - .type = trace_region_workshare, - .encountering_task_id = encountering_task_id, - .attr.wshare = { - .type = wstype, - .count = count - } - }; - - LOG_DEBUG("[t=%lu] created workshare region %u at %p", - loc->id, new->ref, new); - - /* Add region definition to location's region definition queue */ - queue_push(loc->rgn_defs, (data_item_t) {.ptr = new}); - - return new; -} - -trace_region_def_t * -trace_new_master_region( - trace_location_def_t *loc, - unique_id_t encountering_task_id) -{ - trace_region_def_t *new = malloc(sizeof(*new)); - *new = (trace_region_def_t) { - .ref = get_unique_rgn_ref(), - .role = OTF2_REGION_ROLE_MASTER, - .attributes = OTF2_AttributeList_New(), -#if defined(USE_OMPT_MASKED) - .type = trace_region_masked, -#else - .type = trace_region_master, -#endif - .encountering_task_id = encountering_task_id, - .attr.master = { - .thread = loc->id - } - }; - - LOG_DEBUG("[t=%lu] created master region %u at %p", - loc->id, new->ref, new); - - /* Add region definition to location's region definition queue */ - queue_push(loc->rgn_defs, (data_item_t) {.ptr = new}); - - return new; -} - -trace_region_def_t * -trace_new_sync_region( - trace_location_def_t *loc, - ompt_sync_region_t stype, - unique_id_t encountering_task_id) -{ - trace_region_def_t *new = malloc(sizeof(*new)); - *new = (trace_region_def_t) { - .ref = get_unique_rgn_ref(), - .role = SYNC_TYPE_TO_OTF2_REGION_ROLE(stype), - .attributes = OTF2_AttributeList_New(), - .type = trace_region_synchronise, - .encountering_task_id = encountering_task_id, - .attr.sync = { - .type = stype, - } - }; - - LOG_DEBUG("[t=%lu] created sync region %u at %p", - loc->id, new->ref, new); - - /* Add region definition to location's region definition queue */ - queue_push(loc->rgn_defs, (data_item_t) {.ptr = new}); - - return new; -} - -trace_region_def_t * -trace_new_task_region( - trace_location_def_t *loc, - trace_region_def_t *parent_task_region, - unique_id_t id, - ompt_task_flag_t flags, - int has_dependences) -{ - /* Create a region representing a task. Add to the location's region - definition queue. */ - - LOG_INFO_IF((parent_task_region == NULL), - "[t=%lu] parent task region is null", loc->id); - - trace_region_def_t *new = malloc(sizeof(*new)); - *new = (trace_region_def_t) { - .ref = get_unique_rgn_ref(), - .role = OTF2_REGION_ROLE_TASK, - .attributes = OTF2_AttributeList_New(), - .type = trace_region_task, - .attr.task = { - .id = id, - .type = flags & 0xF, - .flags = flags, - .has_dependences = has_dependences, - .parent_id = parent_task_region != NULL ? - parent_task_region->attr.task.id : OTF2_UNDEFINED_UINT64, - .parent_type = parent_task_region != NULL ? - parent_task_region->attr.task.type : OTF2_UNDEFINED_UINT32, - .task_status = 0 /* no status */ - } - }; - new->encountering_task_id = new->attr.task.parent_id; - - LOG_DEBUG("[t=%lu] created region %u for task %lu at %p", - loc->id, new->ref, new->attr.task.id, new); - - /* Add region definition to location's region definition queue */ - queue_push(loc->rgn_defs, (data_item_t) {.ptr = new}); - - return new; -} - -/* * * * * * * * * * * * * * * */ -/* * * * * Destructors * * * * */ -/* * * * * * * * * * * * * * * */ - -void -trace_destroy_location(trace_location_def_t *loc) -{ - if (loc == NULL) return; - trace_write_location_definition(loc); - LOG_DEBUG("[t=%lu] destroying rgn_stack %p", loc->id, loc->rgn_stack); - stack_destroy(loc->rgn_stack, false, NULL); - if (loc->rgn_defs) - { - LOG_DEBUG("[t=%lu] destroying rgn_defs %p", loc->id, loc->rgn_defs); - queue_destroy(loc->rgn_defs, false, NULL); - } - LOG_DEBUG("[t=%lu] destroying rgn_defs_stack %p", loc->id, loc->rgn_defs_stack); - stack_destroy(loc->rgn_defs_stack, false, NULL); - // OTF2_AttributeList_Delete(loc->attributes); - LOG_DEBUG("[t=%lu] destroying location", loc->id); - free(loc); - return; -} - -void -trace_destroy_parallel_region(trace_region_def_t *rgn) -{ - if (rgn->type != trace_region_parallel) - { - LOG_ERROR("invalid region type %d", rgn->type); - abort(); - } - - size_t n_defs = queue_length(rgn->attr.parallel.rgn_defs); - LOG_DEBUG("[parallel=%lu] writing nested region definitions (%lu)", - rgn->attr.parallel.id, n_defs); - - /* Lock the global def writer first */ - pthread_mutex_lock(&lock_global_def_writer); - - /* Write parallel region's definition */ - trace_write_region_definition(rgn); - - /* write region's nested region definitions */ - trace_region_def_t *r = NULL; - int count=0; - while (queue_pop(rgn->attr.parallel.rgn_defs, (data_item_t*) &r)) - { - LOG_DEBUG("[parallel=%lu] writing region definition %d/%lu (region %3u)", - rgn->attr.parallel.id, count+1, n_defs, r->ref); - count++; - trace_write_region_definition(r); - - /* destroy each region once its definition is written */ - switch (r->type) - { - case trace_region_workshare: - trace_destroy_workshare_region(r); - break; - -#if defined(USE_OMPT_MASKED) - case trace_region_masked: -#else - case trace_region_master: -#endif - trace_destroy_master_region(r); - break; - - case trace_region_synchronise: - trace_destroy_sync_region(r); - break; - - case trace_region_task: - trace_destroy_task_region(r); - break; - - default: - LOG_ERROR("unknown region type %d", r->type); - abort(); - } - } - - /* Release once done */ - pthread_mutex_unlock(&lock_global_def_writer); - - /* destroy parallel region once all locations are done with it - and all definitions written */ - // OTF2_AttributeList_Delete(rgn->attributes); - queue_destroy(rgn->attr.parallel.rgn_defs, false, NULL); - LOG_DEBUG("region %p (parallel id %lu)", rgn, rgn->attr.parallel.id); - free(rgn); - return; -} - -void -trace_destroy_workshare_region(trace_region_def_t *rgn) -{ - LOG_DEBUG("region %p destroying attribute list %p", rgn, rgn->attributes); - OTF2_AttributeList_Delete(rgn->attributes); - LOG_DEBUG("region %p", rgn); - free(rgn); -} - -void -trace_destroy_master_region(trace_region_def_t *rgn) -{ - LOG_DEBUG("region %p destroying attribute list %p", rgn, rgn->attributes); - OTF2_AttributeList_Delete(rgn->attributes); - LOG_DEBUG("region %p", rgn); - free(rgn); -} - -void -trace_destroy_sync_region(trace_region_def_t *rgn) -{ - LOG_DEBUG("region %p destroying attribute list %p", rgn, rgn->attributes); - OTF2_AttributeList_Delete(rgn->attributes); - LOG_DEBUG("region %p", rgn); - free(rgn); -} - -void -trace_destroy_task_region(trace_region_def_t *rgn) -{ - LOG_WARN_IF( - (!(rgn->attr.task.task_status == ompt_task_complete - || rgn->attr.task.task_status == ompt_task_cancel)), - "destroying task region before task-complete/task-cancel"); - LOG_DEBUG("region %p destroying attribute list %p", rgn, rgn->attributes); - OTF2_AttributeList_Delete(rgn->attributes); - LOG_DEBUG("region %p", rgn); - free(rgn); -} - -/* pretty-print region definitions */ -void -trace_region_pprint( - FILE *fp, - trace_region_def_t *r, - const char func[], - const int line) -{ - if (fp == NULL) - fp = stderr; - - switch (r->type) - { - case trace_region_parallel: - fprintf(fp, "%s:%d: Parallel(id=%lu, master=%lu, ref_count=%u, enter_count=%u) in %s:%d\n", - __func__, __LINE__, - r->attr.parallel.id, - r->attr.parallel.master_thread, - r->attr.parallel.ref_count, - r->attr.parallel.enter_count, - func, line - ); - break; - case trace_region_workshare: - fprintf(fp, "%s:%d: Work(type=%s, count=%lu) in %s:%d\n", - __func__, __LINE__, - OMPT_WORK_TYPE_TO_STR(r->attr.wshare.type), - r->attr.wshare.count, - func, line - ); - break; - case trace_region_synchronise: - fprintf(fp, "%s:%d: Sync(type=%s) in %s:%d\n", - __func__, __LINE__, - OMPT_SYNC_TYPE_TO_STR(r->attr.sync.type), - func, line - ); - break; - case trace_region_task: - fprintf(fp, "%s:%d: Task(id=%lu, type=%s) in %s:%d\n", - __func__, __LINE__, - r->attr.task.id, - OMPT_TASK_TYPE_TO_STR(OMPT_TASK_TYPE_BITS & r->attr.task.type), - func, line - ); - break; -#if defined(USE_OMPT_MASKED) - case trace_region_masked: - fprintf(fp, "%s:%d: Masked(thread=%lu) in %s:%d\n", -#else - case trace_region_master: - fprintf(fp, "%s:%d: Master(thread=%lu) in %s:%d\n", -#endif - __func__, __LINE__, - r->attr.master.thread, - func, line - ); - break; - } - return; -} \ No newline at end of file diff --git a/src/otter/trace/trace-archive.c b/src/otter/trace/trace-archive.c new file mode 100644 index 0000000..05098ba --- /dev/null +++ b/src/otter/trace/trace-archive.c @@ -0,0 +1,312 @@ +/** + * @file trace-archive.c + * @author Adam Tuft + * @brief Responsible for initialising and finalising single instances of the + * trace archive and its global definitions writer. Returns pointers to these + * resources as well as mutexes protecting access to them both. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include + +#include +#include + +#include "otter/otter-version.h" +#include "otter/debug.h" +#include "otter/trace.h" +#include "otter/trace-archive.h" +#include "otter/trace-attributes.h" +#include "otter/trace-unique-refs.h" +#include "otter/trace-check-error-code.h" + +#define DEFAULT_LOCATION_GRP 0 // OTF2_UNDEFINED_LOCATION_GROUP +#define DEFAULT_SYSTEM_TREE 0 +#define DEFAULT_NAME_BUF_SZ 256 + +static uint64_t get_timestamp(void); + +/* Lookup tables mapping enum value to string ref */ +OTF2_StringRef attr_name_ref[n_attr_defined][2] = {0}; +OTF2_StringRef attr_label_ref[n_attr_label_defined] = {0}; + +/* References to global archive & def writer */ +static OTF2_Archive *Archive = NULL; +static OTF2_GlobalDefWriter *Defs = NULL; +static char_ref_registry *Registry = NULL; + +/* Mutexes for thread-safe access to Archive and Defs */ +static pthread_mutex_t lock_global_def_writer = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t lock_global_archive = PTHREAD_MUTEX_INITIALIZER; + +pthread_mutex_t * +global_def_writer_lock(void) +{ + return &lock_global_def_writer; +} + +pthread_mutex_t * +global_archive_lock(void) +{ + return &lock_global_archive; +} + +OTF2_GlobalDefWriter *get_global_def_writer(void) +{ + return Defs; +} + +OTF2_Archive *get_global_archive(void) +{ + return Archive; +} + +char_ref_registry *get_global_str_registry(void) +{ + return Registry; +} + +/* Pre- and post-flush callbacks required by OTF2 */ +static OTF2_FlushType +pre_flush( + void *userData, + OTF2_FileType fileType, + OTF2_LocationRef location, + void *callerData, + bool final) +{ + return OTF2_FLUSH; +} + +static OTF2_TimeStamp +post_flush( + void *userData, + OTF2_FileType fileType, + OTF2_LocationRef location) +{ + return get_timestamp(); +} + +// Callback to pass to Registry which will be used to write source location +// string refs to the DefWriter when Registry is deleted. +static void trace_write_string_ref(const char *s, OTF2_StringRef ref) +{ + LOG_DEBUG("[%s] writing ref %u for string \"%s\"", __func__, ref, s); + OTF2_ErrorCode r = OTF2_SUCCESS; + r = OTF2_GlobalDefWriter_WriteString(Defs, ref, s); + CHECK_OTF2_ERROR_CODE(r); +} + +bool +trace_initialise_archive(otter_opt_t *opt) +{ + /* Determine filename & path from options */ + char archive_path[DEFAULT_NAME_BUF_SZ+1] = {0}; + static char archive_name[DEFAULT_NAME_BUF_SZ+1] = {0}; + char *p = &archive_name[0]; + + /* Copy filename */ + strncpy(p, opt->tracename, DEFAULT_NAME_BUF_SZ - strlen(archive_name)); + p = &archive_name[0] + strlen(archive_name); + + /* Copy hostname */ + if (opt->append_hostname) + { + strncpy(p, ".", DEFAULT_NAME_BUF_SZ - strlen(archive_name)); + p = &archive_name[0] + strlen(archive_name); + strncpy(p, opt->hostname, DEFAULT_NAME_BUF_SZ - strlen(archive_name)); + p = &archive_name[0] + strlen(archive_name); + } + + /* Copy PID */ + strncpy(p, ".", DEFAULT_NAME_BUF_SZ - strlen(archive_name)); + p = &archive_name[0] + strlen(archive_name); + snprintf(p, DEFAULT_NAME_BUF_SZ - strlen(archive_name), "%u", getpid()); + p = &archive_name[0] + strlen(archive_name); + + /* Copy path + filename */ + snprintf(archive_path, DEFAULT_NAME_BUF_SZ, "%s/%s", + opt->tracepath, archive_name); + + fprintf(stderr, "%-30s %s/%s\n", + "Trace output path:", opt->tracepath, archive_name); + + /* Store archive name in options struct */ + opt->archive_name = &archive_name[0]; + + /* open OTF2 archive */ + Archive = OTF2_Archive_Open( + archive_path, /* archive path */ + archive_name, /* archive name */ + OTF2_FILEMODE_WRITE, + OTF2_CHUNK_SIZE_EVENTS_DEFAULT, + OTF2_CHUNK_SIZE_DEFINITIONS_DEFAULT, + OTF2_SUBSTRATE_POSIX, + OTF2_COMPRESSION_NONE); + + /* set flush callbacks */ + static OTF2_FlushCallbacks on_flush = { + .otf2_pre_flush = pre_flush, + .otf2_post_flush = post_flush + }; + OTF2_Archive_SetFlushCallbacks(Archive, &on_flush, NULL); + + /* set serial (not MPI) collective callbacks */ + OTF2_Archive_SetSerialCollectiveCallbacks(Archive); + + /* set pthread archive locking callbacks */ + OTF2_Pthread_Archive_SetLockingCallbacks(Archive, NULL); + + /* open archive event files */ + OTF2_Archive_OpenEvtFiles(Archive); + + /* open (thread-) local definition files */ + OTF2_Archive_OpenDefFiles(Archive); + + /* get global definitions writer */ + Defs = OTF2_Archive_GetGlobalDefWriter(Archive); + + /* get clock resolution & current time for CLOCK_MONOTONIC */ + struct timespec res, tp; + if (clock_getres(CLOCK_MONOTONIC, &res) != 0) + { + LOG_ERROR("%s", strerror(errno)); + errno = 0; + } else { + LOG_DEBUG("Clock resolution: %lu s", res.tv_sec); + LOG_DEBUG("Clock resolution: %lu ns", res.tv_nsec); + LOG_DEBUG("Clock ticks per second: %lu", 1000000000 / res.tv_nsec); + } + + clock_gettime(CLOCK_MONOTONIC, &tp); + uint64_t epoch = tp.tv_sec * (uint64_t)1000000000 + tp.tv_nsec; + LOG_DEBUG("Epoch: %lu %lu %lu", tp.tv_sec, tp.tv_nsec, epoch); + + /* write global clock properties */ + OTF2_GlobalDefWriter_WriteClockProperties(Defs, + 1000000000 / res.tv_nsec, /* ticks per second */ + epoch, + UINT64_MAX /* length */ + ); + + /* write an empty string as the first entry so that string ref 0 is "" */ + OTF2_GlobalDefWriter_WriteString(Defs, get_unique_str_ref(), ""); + + + /* write Otter version string as 2nd entry so it is always at index 1 */ + OTF2_GlobalDefWriter_WriteString(Defs, get_unique_str_ref(), OTTER_VERSION_STRING); + + /* write global system tree */ + OTF2_SystemTreeNodeRef g_sys_tree_id = DEFAULT_SYSTEM_TREE; + OTF2_StringRef g_sys_tree_name = get_unique_str_ref(); + OTF2_StringRef g_sys_tree_class = get_unique_str_ref(); + OTF2_GlobalDefWriter_WriteString(Defs, g_sys_tree_name, "Sytem Tree"); + OTF2_GlobalDefWriter_WriteString(Defs, g_sys_tree_class, "node"); + OTF2_GlobalDefWriter_WriteSystemTreeNode(Defs, + g_sys_tree_id, + g_sys_tree_name, + g_sys_tree_class, + OTF2_UNDEFINED_SYSTEM_TREE_NODE); + + /* write global location group */ + OTF2_StringRef g_loc_grp_name = get_unique_str_ref(); + OTF2_LocationGroupRef g_loc_grp_id = DEFAULT_LOCATION_GRP; + OTF2_GlobalDefWriter_WriteString(Defs, g_loc_grp_name, +#if defined(OTTER_SERIAL_MODE) + "Serial Process" +#else + "OMP Process" +#endif + ); + OTF2_GlobalDefWriter_WriteLocationGroup(Defs, g_loc_grp_id, g_loc_grp_name, + OTF2_LOCATION_GROUP_TYPE_PROCESS, g_sys_tree_id); + + /* define any necessary attributes (their names, descriptions & labels) + these are defined in trace-attribute-defs.h and included via macros to + reduce code repetition. */ + + /* Populate lookup tables with unique string refs */ + int k=0; + for (k=0; ktracename, DEFAULT_NAME_BUF_SZ - strlen(archive_name)); - p = &archive_name[0] + strlen(archive_name); - - /* Copy hostname */ - if (opt->append_hostname) - { - strncpy(p, ".", DEFAULT_NAME_BUF_SZ - strlen(archive_name)); - p = &archive_name[0] + strlen(archive_name); - strncpy(p, opt->hostname, DEFAULT_NAME_BUF_SZ - strlen(archive_name)); - p = &archive_name[0] + strlen(archive_name); - } - - /* Copy PID */ - strncpy(p, ".", DEFAULT_NAME_BUF_SZ - strlen(archive_name)); - p = &archive_name[0] + strlen(archive_name); - snprintf(p, DEFAULT_NAME_BUF_SZ - strlen(archive_name), "%u", getpid()); - p = &archive_name[0] + strlen(archive_name); - - /* Copy path + filename */ - snprintf(archive_path, DEFAULT_NAME_BUF_SZ, "%s/%s", - opt->tracepath, archive_name); - - fprintf(stderr, "%-30s %s/%s\n", - "Trace output path:", opt->tracepath, archive_name); - - /* Store archive name in options struct */ - opt->archive_name = &archive_name[0]; - - /* open OTF2 archive */ - Archive = OTF2_Archive_Open( - archive_path, /* archive path */ - archive_name, /* archive name */ - OTF2_FILEMODE_WRITE, - OTF2_CHUNK_SIZE_EVENTS_DEFAULT, - OTF2_CHUNK_SIZE_DEFINITIONS_DEFAULT, - OTF2_SUBSTRATE_POSIX, - OTF2_COMPRESSION_NONE); - - /* set flush callbacks */ - static OTF2_FlushCallbacks on_flush = { - .otf2_pre_flush = pre_flush, - .otf2_post_flush = post_flush - }; - OTF2_Archive_SetFlushCallbacks(Archive, &on_flush, NULL); - - /* set serial (not MPI) collective callbacks */ - OTF2_Archive_SetSerialCollectiveCallbacks(Archive); - - /* set pthread archive locking callbacks */ - OTF2_Pthread_Archive_SetLockingCallbacks(Archive, NULL); - - /* open archive event files */ - OTF2_Archive_OpenEvtFiles(Archive); - - /* open (thread-) local definition files */ - OTF2_Archive_OpenDefFiles(Archive); - - /* get global definitions writer */ - Defs = OTF2_Archive_GetGlobalDefWriter(Archive); - - /* get clock resolution & current time for CLOCK_MONOTONIC */ - struct timespec res, tp; - if (clock_getres(CLOCK_MONOTONIC, &res) != 0) - { - LOG_ERROR("%s", strerror(errno)); - errno = 0; - } else { - LOG_DEBUG("Clock resolution: %lu s", res.tv_sec); - LOG_DEBUG("Clock resolution: %lu ns", res.tv_nsec); - LOG_DEBUG("Clock ticks per second: %lu", 1000000000 / res.tv_nsec); - } - - clock_gettime(CLOCK_MONOTONIC, &tp); - uint64_t epoch = tp.tv_sec * (uint64_t)1000000000 + tp.tv_nsec; - LOG_DEBUG("Epoch: %lu %lu %lu", tp.tv_sec, tp.tv_nsec, epoch); - - /* write global clock properties */ - OTF2_GlobalDefWriter_WriteClockProperties(Defs, - 1000000000 / res.tv_nsec, /* ticks per second */ - epoch, - UINT64_MAX /* length */ - ); - - /* write an empty string as the first entry so that string ref 0 is "" */ - OTF2_GlobalDefWriter_WriteString(Defs, get_unique_str_ref(), ""); - - /* write global system tree */ - OTF2_SystemTreeNodeRef g_sys_tree_id = DEFAULT_SYSTEM_TREE; - OTF2_StringRef g_sys_tree_name = get_unique_str_ref(); - OTF2_StringRef g_sys_tree_class = get_unique_str_ref(); - OTF2_GlobalDefWriter_WriteString(Defs, g_sys_tree_name, "Sytem Tree"); - OTF2_GlobalDefWriter_WriteString(Defs, g_sys_tree_class, "node"); - OTF2_GlobalDefWriter_WriteSystemTreeNode(Defs, - g_sys_tree_id, - g_sys_tree_name, - g_sys_tree_class, - OTF2_UNDEFINED_SYSTEM_TREE_NODE); - - /* write global location group */ - OTF2_StringRef g_loc_grp_name = get_unique_str_ref(); - OTF2_LocationGroupRef g_loc_grp_id = DEFAULT_LOCATION_GRP; - OTF2_GlobalDefWriter_WriteString(Defs, g_loc_grp_name, "OMP Process"); - OTF2_GlobalDefWriter_WriteLocationGroup(Defs, g_loc_grp_id, g_loc_grp_name, - OTF2_LOCATION_GROUP_TYPE_PROCESS, g_sys_tree_id); - - /* define any necessary attributes (their names, descriptions & labels) - these are defined in trace-attribute-defs.h and included via macros to - reduce code repetition. */ - - /* Populate lookup tables with unique string refs */ - int k=0; - for (k=0; kid); +// char location_name[DEFAULT_NAME_BUF_SZ + 1] = {0}; +// OTF2_StringRef location_name_ref = get_unique_str_ref(); +// snprintf(location_name, DEFAULT_NAME_BUF_SZ, "Thread %lu", loc->id); - LOG_DEBUG("[t=%lu] locking global def writer", loc->id); - pthread_mutex_lock(&lock_global_def_writer); +// LOG_DEBUG("[t=%lu] locking global def writer", loc->id); +// pthread_mutex_lock(&lock_global_def_writer); - OTF2_GlobalDefWriter_WriteString(Defs, - location_name_ref, - location_name); +// OTF2_GlobalDefWriter_WriteString(Defs, +// location_name_ref, +// location_name); - LOG_DEBUG("[t=%lu] writing location definition", loc->id); - OTF2_GlobalDefWriter_WriteLocation(Defs, - loc->ref, - location_name_ref, - loc->type, - loc->events, - loc->location_group); +// LOG_DEBUG("[t=%lu] writing location definition", loc->id); +// OTF2_GlobalDefWriter_WriteLocation(Defs, +// loc->ref, +// location_name_ref, +// loc->type, +// loc->events, +// loc->location_group); - LOG_DEBUG("[t=%lu] unlocking global def writer", loc->id); - pthread_mutex_unlock(&lock_global_def_writer); +// LOG_DEBUG("[t=%lu] unlocking global def writer", loc->id); +// pthread_mutex_unlock(&lock_global_def_writer); - return; -} +// return; +// } void trace_write_region_definition(trace_region_def_t *rgn) @@ -303,6 +95,8 @@ trace_write_region_definition(trace_region_def_t *rgn) LOG_DEBUG("writing region definition %3u (type=%3d, role=%3u) %p", rgn->ref, rgn->type, rgn->role, rgn); + OTF2_GlobalDefWriter *Defs = get_global_def_writer(); + switch (rgn->type) { case trace_region_parallel: @@ -319,7 +113,11 @@ trace_write_region_definition(trace_region_def_t *rgn) region_name_ref, 0, 0, /* canonical name, description */ rgn->role, +#if defined(OTTER_SERIAL_MODE) + OTF2_PARADIGM_USER, +#else OTF2_PARADIGM_OPENMP, +#endif OTF2_REGION_FLAG_NONE, 0, 0, 0); /* source file, begin line no., end line no. */ break; @@ -331,7 +129,11 @@ trace_write_region_definition(trace_region_def_t *rgn) WORK_TYPE_TO_STR_REF(rgn->attr.wshare.type), 0, 0, rgn->role, +#if defined(OTTER_SERIAL_MODE) + OTF2_PARADIGM_USER, +#else OTF2_PARADIGM_OPENMP, +#endif OTF2_REGION_FLAG_NONE, 0, 0, 0); /* source file, begin line no., end line no. */ break; @@ -347,7 +149,11 @@ trace_write_region_definition(trace_region_def_t *rgn) attr_label_ref[attr_region_type_master], 0, 0, rgn->role, +#if defined(OTTER_SERIAL_MODE) + OTF2_PARADIGM_USER, +#else OTF2_PARADIGM_OPENMP, +#endif OTF2_REGION_FLAG_NONE, 0, 0, 0); /* source file, begin line no., end line no. */ break; @@ -359,7 +165,11 @@ trace_write_region_definition(trace_region_def_t *rgn) SYNC_TYPE_TO_STR_REF(rgn->attr.sync.type), 0, 0, rgn->role, +#if defined(OTTER_SERIAL_MODE) + OTF2_PARADIGM_USER, +#else OTF2_PARADIGM_OPENMP, +#endif OTF2_REGION_FLAG_NONE, 0, 0, 0); /* source file, begin line no., end line no. */ break; @@ -398,33 +208,37 @@ trace_write_region_definition(trace_region_def_t *rgn) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ static void -trace_add_common_event_attributes(trace_region_def_t *rgn) +trace_add_common_event_attributes( + OTF2_AttributeList *attributes, + unique_id_t encountering_task_id, + trace_region_type_t region_type, + trace_region_attr_t region_attr) { OTF2_ErrorCode r = OTF2_SUCCESS; /* CPU of encountering thread */ - r = OTF2_AttributeList_AddInt32(rgn->attributes, attr_cpu, sched_getcpu()); + r = OTF2_AttributeList_AddInt32(attributes, attr_cpu, sched_getcpu()); CHECK_OTF2_ERROR_CODE(r); /* Add encountering task ID */ r = OTF2_AttributeList_AddUint64( - rgn->attributes, + attributes, attr_encountering_task_id, - rgn->encountering_task_id + encountering_task_id ); CHECK_OTF2_ERROR_CODE(r); /* Add the region type */ - r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_region_type, - rgn->type == trace_region_parallel ? + r = OTF2_AttributeList_AddStringRef(attributes, attr_region_type, + region_type == trace_region_parallel ? attr_label_ref[attr_region_type_parallel] : - rgn->type == trace_region_workshare ? - WORK_TYPE_TO_STR_REF(rgn->attr.wshare.type) : - rgn->type == trace_region_synchronise ? - SYNC_TYPE_TO_STR_REF(rgn->attr.sync.type) : - rgn->type == trace_region_task ? - TASK_TYPE_TO_STR_REF(rgn->attr.task.type) : - rgn->type == + region_type == trace_region_workshare ? + WORK_TYPE_TO_STR_REF(region_attr.wshare.type) : + region_type == trace_region_synchronise ? + SYNC_TYPE_TO_STR_REF(region_attr.sync.type) : + region_type == trace_region_task ? + TASK_TYPE_TO_STR_REF(region_attr.task.type) : + region_type == #if defined(USE_OMPT_MASKED) trace_region_masked #else @@ -438,116 +252,6 @@ trace_add_common_event_attributes(trace_region_def_t *rgn) return; } -static void -trace_add_thread_attributes(trace_location_def_t *self) -{ - OTF2_ErrorCode r = OTF2_SUCCESS; - r = OTF2_AttributeList_AddInt32(self->attributes, attr_cpu, sched_getcpu()); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddUint64(self->attributes, attr_unique_id, self->id); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddStringRef(self->attributes, attr_thread_type, - self->thread_type == ompt_thread_initial ? - attr_label_ref[attr_thread_type_initial] : - self->thread_type == ompt_thread_worker ? - attr_label_ref[attr_thread_type_worker] : 0); - CHECK_OTF2_ERROR_CODE(r); - return; -} - -static void -trace_add_parallel_attributes(trace_region_def_t *rgn) -{ - OTF2_ErrorCode r = OTF2_SUCCESS; - r = OTF2_AttributeList_AddUint64(rgn->attributes, attr_unique_id, - rgn->attr.parallel.id); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddUint32(rgn->attributes, attr_requested_parallelism, - rgn->attr.parallel.requested_parallelism); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_is_league, - rgn->attr.parallel.is_league ? - attr_label_ref[attr_flag_true] : attr_label_ref[attr_flag_false]); - CHECK_OTF2_ERROR_CODE(r); - return; -} - -static void -trace_add_workshare_attributes(trace_region_def_t *rgn) -{ - OTF2_ErrorCode r = OTF2_SUCCESS; - r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_workshare_type, - WORK_TYPE_TO_STR_REF(rgn->attr.wshare.type)); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddUint64(rgn->attributes, attr_workshare_count, - rgn->attr.wshare.count); - CHECK_OTF2_ERROR_CODE(r); - return; -} - -static void -trace_add_master_attributes(trace_region_def_t *rgn) -{ - OTF2_ErrorCode r = OTF2_SUCCESS; - r = OTF2_AttributeList_AddUint64(rgn->attributes, attr_unique_id, - rgn->attr.master.thread); - CHECK_OTF2_ERROR_CODE(r); - return; -} - -static void -trace_add_sync_attributes(trace_region_def_t *rgn) -{ - OTF2_ErrorCode r = OTF2_SUCCESS; - r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_sync_type, - SYNC_TYPE_TO_STR_REF(rgn->attr.sync.type)); - CHECK_OTF2_ERROR_CODE(r); - return; -} - -static void -trace_add_task_attributes(trace_region_def_t *rgn) -{ - OTF2_ErrorCode r = OTF2_SUCCESS; - r = OTF2_AttributeList_AddUint64(rgn->attributes, attr_unique_id, - rgn->attr.task.id); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_task_type, - TASK_TYPE_TO_STR_REF(rgn->attr.task.type)); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddUint32(rgn->attributes, attr_task_flags, - rgn->attr.task.flags); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddUint64(rgn->attributes, attr_parent_task_id, - rgn->attr.task.parent_id); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_parent_task_type, - TASK_TYPE_TO_STR_REF(rgn->attr.task.parent_type)); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddUint8(rgn->attributes, attr_task_has_dependences, - rgn->attr.task.has_dependences); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddUint8(rgn->attributes, attr_task_is_undeferred, - rgn->attr.task.flags & ompt_task_undeferred); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddUint8(rgn->attributes, attr_task_is_untied, - rgn->attr.task.flags & ompt_task_untied); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddUint8(rgn->attributes, attr_task_is_final, - rgn->attr.task.flags & ompt_task_final); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddUint8(rgn->attributes, attr_task_is_mergeable, - rgn->attr.task.flags & ompt_task_mergeable); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddUint8(rgn->attributes, attr_task_is_merged, - rgn->attr.task.flags & ompt_task_merged); - CHECK_OTF2_ERROR_CODE(r); - r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_prior_task_status, - TASK_STATUS_TO_STR_REF(rgn->attr.task.task_status)); - CHECK_OTF2_ERROR_CODE(r); - return; -} - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* WRITE EVENTS */ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -637,7 +341,12 @@ trace_event_enter( } /* Add attributes common to all enter/leave events */ - trace_add_common_event_attributes(region); + trace_add_common_event_attributes( + region->attributes, + region->encountering_task_id, + region->type, + region->attr + ); /* Add the event type attribute */ OTF2_AttributeList_AddStringRef(region->attributes, attr_event_type, @@ -736,7 +445,12 @@ trace_event_leave(trace_location_def_t *self) } /* Add attributes common to all enter/leave events */ - trace_add_common_event_attributes(region); + trace_add_common_event_attributes( + region->attributes, + region->encountering_task_id, + region->type, + region->attr + ); /* Add the event type attribute */ OTF2_AttributeList_AddStringRef(region->attributes, attr_event_type, @@ -828,7 +542,12 @@ trace_event_task_create( trace_location_def_t *self, trace_region_def_t *created_task) { - trace_add_common_event_attributes(created_task); + trace_add_common_event_attributes( + created_task->attributes, + created_task->encountering_task_id, + created_task->type, + created_task->attr + ); /* task-create */ OTF2_AttributeList_AddStringRef(created_task->attributes, attr_event_type, @@ -855,9 +574,9 @@ trace_event_task_create( void trace_event_task_schedule( - trace_location_def_t *self, - trace_region_def_t *prior_task, - ompt_task_status_t prior_status) + trace_location_def_t *self, + trace_region_def_t *prior_task, + otter_task_status_t prior_status) { /* Update prior task's status before recording task enter/leave events */ LOG_ERROR_IF((prior_task->type != trace_region_task), @@ -866,6 +585,90 @@ trace_event_task_schedule( return; } +void +trace_event_task_switch( + trace_location_def_t *self, + trace_region_def_t *prior_task, + otter_task_status_t prior_status, + trace_region_def_t *next_task) +{ + // Update prior task's status + // Transfer thread's active region stack to prior_task->rgn_stack + // Transfer next_task->rgn_stack to thread + // Record event with details of tasks swapped & prior_status + + prior_task->attr.task.task_status = prior_status; + LOG_ERROR_IF((stack_is_empty(prior_task->rgn_stack) == false), + "prior task %lu region stack not empty", + prior_task->attr.task.id); + stack_transfer(prior_task->rgn_stack, self->rgn_stack); + stack_transfer(self->rgn_stack, next_task->rgn_stack); + + trace_add_common_event_attributes( + prior_task->attributes, + prior_task->attr.task.id, + prior_task->type, + prior_task->attr + ); + + // Record the reason the task-switch event ocurred + OTF2_AttributeList_AddStringRef( + prior_task->attributes, + attr_prior_task_status, + TASK_STATUS_TO_STR_REF(prior_status) + ); + + // The task that was suspended + OTF2_AttributeList_AddUint64( + prior_task->attributes, + attr_prior_task_id, + prior_task->attr.task.id + ); + + // The task that was resumed + OTF2_AttributeList_AddUint64( + prior_task->attributes, + attr_unique_id, + next_task->attr.task.id + ); + + // The task that was resumed + OTF2_AttributeList_AddUint64( + prior_task->attributes, + attr_next_task_id, + next_task->attr.task.id + ); + + // The region_type of the task that was resumed + OTF2_AttributeList_AddStringRef( + prior_task->attributes, + attr_next_task_region_type, + TASK_TYPE_TO_STR_REF(next_task->attr.task.type) + ); + + // Task-switch is always considered a discrete event + OTF2_AttributeList_AddStringRef( + prior_task->attributes, + attr_endpoint, + attr_label_ref[attr_endpoint_discrete] + ); + + OTF2_AttributeList_AddStringRef( + prior_task->attributes, + attr_event_type, + attr_label_ref[attr_event_type_task_switch] + ); + + OTF2_EvtWriter_ThreadTaskSwitch( + self->evt_writer, + prior_task->attributes, + get_timestamp(), + OTF2_UNDEFINED_COMM, + OTF2_UNDEFINED_UINT32, 0); /* creating thread, generation number */ + + return; +} + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* TIMESTAMP & UNIQUE REFERENCES */ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -878,16 +681,64 @@ get_timestamp(void) return time.tv_sec * (uint64_t)1000000000 + time.tv_nsec; } -uint64_t -get_unique_uint64_ref(trace_ref_type_t ref_type) +/* pretty-print region definitions */ +void +trace_region_pprint( + FILE *fp, + trace_region_def_t *r, + const char func[], + const int line) { - static uint64_t id[NUM_REF_TYPES] = {0}; - return __sync_fetch_and_add(&id[ref_type], 1L); -} + if (fp == NULL) + fp = stderr; -uint32_t -get_unique_uint32_ref(trace_ref_type_t ref_type) -{ - static uint32_t id[NUM_REF_TYPES] = {0}; - return __sync_fetch_and_add(&id[ref_type], 1); + switch (r->type) + { + case trace_region_parallel: + fprintf(fp, "%s:%d: Parallel(id=%lu, master=%lu, ref_count=%u, enter_count=%u) in %s:%d\n", + __func__, __LINE__, + r->attr.parallel.id, + r->attr.parallel.master_thread, + r->attr.parallel.ref_count, + r->attr.parallel.enter_count, + func, line + ); + break; + case trace_region_workshare: + fprintf(fp, "%s:%d: Work(type=%s, count=%lu) in %s:%d\n", + __func__, __LINE__, + OMPT_WORK_TYPE_TO_STR(r->attr.wshare.type), + r->attr.wshare.count, + func, line + ); + break; + case trace_region_synchronise: + fprintf(fp, "%s:%d: Sync(type=%s) in %s:%d\n", + __func__, __LINE__, + OMPT_SYNC_TYPE_TO_STR(r->attr.sync.type), + func, line + ); + break; + case trace_region_task: + fprintf(fp, "%s:%d: Task(id=%lu, type=%s) in %s:%d\n", + __func__, __LINE__, + r->attr.task.id, + OMPT_TASK_TYPE_TO_STR(OMPT_TASK_TYPE_BITS & r->attr.task.type), + func, line + ); + break; +#if defined(USE_OMPT_MASKED) + case trace_region_masked: + fprintf(fp, "%s:%d: Masked(thread=%lu) in %s:%d\n", +#else + case trace_region_master: + fprintf(fp, "%s:%d: Master(thread=%lu) in %s:%d\n", +#endif + __func__, __LINE__, + r->attr.master.thread, + func, line + ); + break; + } + return; } diff --git a/src/otter/trace/trace-location.c b/src/otter/trace/trace-location.c new file mode 100644 index 0000000..a766a27 --- /dev/null +++ b/src/otter/trace/trace-location.c @@ -0,0 +1,139 @@ +/** + * @file trace-location.c + * @author Adam Tuft + * @brief Defines trace_location_def_t which represents an OTF2 location, used + * to record the location's definition in the trace. Responsible for new/delete, + * adding a thread's attributes to its OTF2 attribute list when recording an + * event, and writing a location's definition to the trace. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include "otter/trace-lookup-macros.h" +#include "otter/trace-attributes.h" +#include "otter/trace-location.h" +#include "otter/trace-archive.h" +#include "otter/trace-unique-refs.h" +#include "otter/trace-check-error-code.h" +#include "otter/trace-static-constants.h" +#include "otter/queue.h" +#include "otter/stack.h" + +/* Defined in trace-archive.c */ +extern OTF2_StringRef attr_name_ref[n_attr_defined][2]; +extern OTF2_StringRef attr_label_ref[n_attr_label_defined]; + +trace_location_def_t * +trace_new_location_definition( + unique_id_t id, + otter_thread_t thread_type, + OTF2_LocationType loc_type, + OTF2_LocationGroupRef loc_grp) +{ + trace_location_def_t *new = malloc(sizeof(*new)); + + *new = (trace_location_def_t) { + .id = id, + .thread_type = thread_type, + .events = 0, + .ref = get_unique_loc_ref(), + .type = loc_type, + .location_group = loc_grp, + .rgn_stack = stack_create(), + .rgn_defs = queue_create(), + .rgn_defs_stack = stack_create(), + .attributes = OTF2_AttributeList_New() + }; + + OTF2_Archive *Archive = get_global_archive(); + + new->evt_writer = OTF2_Archive_GetEvtWriter(Archive, new->ref); + new->def_writer = OTF2_Archive_GetDefWriter(Archive, new->ref); + + /* Thread location definition is written at thread-end (once all events + counted) */ + + LOG_DEBUG("[t=%lu] location created", id); + LOG_DEBUG("[t=%lu] %-18s %p", id, "rgn_stack:", new->rgn_stack); + LOG_DEBUG("[t=%lu] %-18s %p", id, "rgn_defs:", new->rgn_defs); + LOG_DEBUG("[t=%lu] %-18s %p", id, "rgn_defs_stack:", new->rgn_defs_stack); + + return new; +} + +void +trace_destroy_location(trace_location_def_t *loc) +{ + if (loc == NULL) return; + trace_write_location_definition(loc); + LOG_DEBUG("[t=%lu] destroying rgn_stack %p", loc->id, loc->rgn_stack); + stack_destroy(loc->rgn_stack, false, NULL); + if (loc->rgn_defs) + { + LOG_DEBUG("[t=%lu] destroying rgn_defs %p", loc->id, loc->rgn_defs); + queue_destroy(loc->rgn_defs, false, NULL); + } + LOG_DEBUG("[t=%lu] destroying rgn_defs_stack %p", loc->id, loc->rgn_defs_stack); + stack_destroy(loc->rgn_defs_stack, false, NULL); + OTF2_AttributeList_Delete(loc->attributes); + LOG_DEBUG("[t=%lu] destroying location", loc->id); + free(loc); + return; +} + +void +trace_add_thread_attributes(trace_location_def_t *self) +{ + OTF2_ErrorCode r = OTF2_SUCCESS; + r = OTF2_AttributeList_AddInt32(self->attributes, attr_cpu, sched_getcpu()); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddUint64(self->attributes, attr_unique_id, self->id); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddStringRef(self->attributes, attr_thread_type, + self->thread_type == otter_thread_initial ? + attr_label_ref[attr_thread_type_initial] : + self->thread_type == otter_thread_worker ? + attr_label_ref[attr_thread_type_worker] : 0); + CHECK_OTF2_ERROR_CODE(r); + return; +} + +void +trace_write_location_definition(trace_location_def_t *loc) +{ + if (loc == NULL) + { + LOG_ERROR("null pointer"); + return; + } + + char location_name[DEFAULT_NAME_BUF_SZ + 1] = {0}; + OTF2_StringRef location_name_ref = get_unique_str_ref(); + snprintf(location_name, DEFAULT_NAME_BUF_SZ, "Thread %lu", loc->id); + + LOG_DEBUG("[t=%lu] locking global def writer", loc->id); + pthread_mutex_t *def_writer_lock = global_def_writer_lock(); + pthread_mutex_lock(def_writer_lock); + + OTF2_GlobalDefWriter *Defs = get_global_def_writer(); + OTF2_GlobalDefWriter_WriteString(Defs, + location_name_ref, + location_name); + + LOG_DEBUG("[t=%lu] writing location definition", loc->id); + OTF2_GlobalDefWriter_WriteLocation(Defs, + loc->ref, + location_name_ref, + loc->type, + loc->events, + loc->location_group); + + LOG_DEBUG("[t=%lu] unlocking global def writer", loc->id); + pthread_mutex_unlock(def_writer_lock); + + return; +} diff --git a/src/otter/trace/trace-region-master.c b/src/otter/trace/trace-region-master.c new file mode 100644 index 0000000..4195d39 --- /dev/null +++ b/src/otter/trace/trace-region-master.c @@ -0,0 +1,64 @@ +#include +#include +#include "otter/trace-lookup-macros.h" +#include "otter/trace-attributes.h" +#include "otter/trace-structs.h" +#include "otter/trace-region-master.h" +#include "otter/trace-unique-refs.h" +#include "otter/trace-check-error-code.h" +#include "otter/queue.h" +#include "otter/stack.h" + +/* Defined in trace-archive.c */ +extern OTF2_StringRef attr_name_ref[n_attr_defined][2]; +extern OTF2_StringRef attr_label_ref[n_attr_label_defined]; + +trace_region_def_t * +trace_new_master_region( + trace_location_def_t *loc, + unique_id_t encountering_task_id) +{ + trace_region_def_t *new = malloc(sizeof(*new)); + *new = (trace_region_def_t) { + .ref = get_unique_rgn_ref(), + .role = OTF2_REGION_ROLE_MASTER, + .attributes = OTF2_AttributeList_New(), +#if defined(USE_OMPT_MASKED) + .type = trace_region_masked, +#else + .type = trace_region_master, +#endif + .encountering_task_id = encountering_task_id, + .rgn_stack = NULL, + .attr.master = { + .thread = loc->id + } + }; + + LOG_DEBUG("[t=%lu] created master region %u at %p", + loc->id, new->ref, new); + + /* Add region definition to location's region definition queue */ + queue_push(loc->rgn_defs, (data_item_t) {.ptr = new}); + + return new; +} + +void +trace_destroy_master_region(trace_region_def_t *rgn) +{ + LOG_DEBUG("region %p destroying attribute list %p", rgn, rgn->attributes); + OTF2_AttributeList_Delete(rgn->attributes); + LOG_DEBUG("region %p", rgn); + free(rgn); +} + +void +trace_add_master_attributes(trace_region_def_t *rgn) +{ + OTF2_ErrorCode r = OTF2_SUCCESS; + r = OTF2_AttributeList_AddUint64(rgn->attributes, attr_unique_id, + rgn->attr.master.thread); + CHECK_OTF2_ERROR_CODE(r); + return; +} \ No newline at end of file diff --git a/src/otter/trace/trace-region-parallel.c b/src/otter/trace/trace-region-parallel.c new file mode 100644 index 0000000..12063e3 --- /dev/null +++ b/src/otter/trace/trace-region-parallel.c @@ -0,0 +1,140 @@ +#include +#include +#include "otter/trace-lookup-macros.h" +#include "otter/trace-attributes.h" +#include "otter/trace-archive.h" +#include "otter/trace-structs.h" +#include "otter/trace-region-parallel.h" +#include "otter/trace-unique-refs.h" +#include "otter/trace-check-error-code.h" +#include "otter/queue.h" +#include "otter/stack.h" + +/* Defined in trace-archive.c */ +extern OTF2_StringRef attr_name_ref[n_attr_defined][2]; +extern OTF2_StringRef attr_label_ref[n_attr_label_defined]; + +/* Defined in trace.c */ +// extern pthread_mutex_t lock_global_def_writer; + +/* Constructor */ +trace_region_def_t * +trace_new_parallel_region( + unique_id_t id, + unique_id_t master, + unique_id_t encountering_task_id, + int flags, + unsigned int requested_parallelism) +{ + trace_region_def_t *new = malloc(sizeof(*new)); + *new = (trace_region_def_t) { + .ref = get_unique_rgn_ref(), + .role = OTF2_REGION_ROLE_PARALLEL, + .attributes = OTF2_AttributeList_New(), + .type = trace_region_parallel, + .encountering_task_id = encountering_task_id, + .rgn_stack = NULL, + .attr.parallel = { + .id = id, + .master_thread = master, + .is_league = flags & ompt_parallel_league ? true : false, + .requested_parallelism = requested_parallelism, + .ref_count = 0, + .enter_count = 0, + .lock_rgn = PTHREAD_MUTEX_INITIALIZER, + .rgn_defs = queue_create() + } + }; + return new; +} + +/* Destructor */ +void +trace_destroy_parallel_region(trace_region_def_t *rgn) +{ + if (rgn->type != trace_region_parallel) + { + LOG_ERROR("invalid region type %d", rgn->type); + abort(); + } + + size_t n_defs = queue_length(rgn->attr.parallel.rgn_defs); + LOG_DEBUG("[parallel=%lu] writing nested region definitions (%lu)", + rgn->attr.parallel.id, n_defs); + + pthread_mutex_t *lock_global_def_writer = global_def_writer_lock(); + + /* Lock the global def writer first */ + pthread_mutex_lock(lock_global_def_writer); + + /* Write parallel region's definition */ + trace_write_region_definition(rgn); + + /* write region's nested region definitions */ + trace_region_def_t *r = NULL; + int count=0; + while (queue_pop(rgn->attr.parallel.rgn_defs, (data_item_t*) &r)) + { + LOG_DEBUG("[parallel=%lu] writing region definition %d/%lu (region %3u)", + rgn->attr.parallel.id, count+1, n_defs, r->ref); + count++; + trace_write_region_definition(r); + + /* destroy each region once its definition is written */ + switch (r->type) + { + case trace_region_workshare: + trace_destroy_workshare_region(r); + break; + +#if defined(USE_OMPT_MASKED) + case trace_region_masked: +#else + case trace_region_master: +#endif + trace_destroy_master_region(r); + break; + + case trace_region_synchronise: + trace_destroy_sync_region(r); + break; + + case trace_region_task: + trace_destroy_task_region(r); + break; + + default: + LOG_ERROR("unknown region type %d", r->type); + abort(); + } + } + + /* Release once done */ + pthread_mutex_unlock(lock_global_def_writer); + + /* destroy parallel region once all locations are done with it + and all definitions written */ + OTF2_AttributeList_Delete(rgn->attributes); + queue_destroy(rgn->attr.parallel.rgn_defs, false, NULL); + LOG_DEBUG("region %p (parallel id %lu)", rgn, rgn->attr.parallel.id); + free(rgn); + return; +} + +/* Add parallel region attributes */ +void +trace_add_parallel_attributes(trace_region_def_t *rgn) +{ + OTF2_ErrorCode r = OTF2_SUCCESS; + r = OTF2_AttributeList_AddUint64(rgn->attributes, attr_unique_id, + rgn->attr.parallel.id); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddUint32(rgn->attributes, attr_requested_parallelism, + rgn->attr.parallel.requested_parallelism); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_is_league, + rgn->attr.parallel.is_league ? + attr_label_ref[attr_flag_true] : attr_label_ref[attr_flag_false]); + CHECK_OTF2_ERROR_CODE(r); + return; +} diff --git a/src/otter/trace/trace-region-sync.c b/src/otter/trace/trace-region-sync.c new file mode 100644 index 0000000..ef511f6 --- /dev/null +++ b/src/otter/trace/trace-region-sync.c @@ -0,0 +1,61 @@ +#include +#include +#include "otter/trace-lookup-macros.h" +#include "otter/trace-attributes.h" +#include "otter/trace-structs.h" +#include "otter/trace-region-sync.h" +#include "otter/trace-unique-refs.h" +#include "otter/trace-check-error-code.h" +#include "otter/queue.h" +#include "otter/stack.h" + +/* Defined in trace-archive.c */ +extern OTF2_StringRef attr_name_ref[n_attr_defined][2]; +extern OTF2_StringRef attr_label_ref[n_attr_label_defined]; + +trace_region_def_t * +trace_new_sync_region( + trace_location_def_t *loc, + otter_sync_region_t stype, + unique_id_t encountering_task_id) +{ + trace_region_def_t *new = malloc(sizeof(*new)); + *new = (trace_region_def_t) { + .ref = get_unique_rgn_ref(), + .role = SYNC_TYPE_TO_OTF2_REGION_ROLE(stype), + .attributes = OTF2_AttributeList_New(), + .type = trace_region_synchronise, + .encountering_task_id = encountering_task_id, + .rgn_stack = NULL, + .attr.sync = { + .type = stype, + } + }; + + LOG_DEBUG("[t=%lu] created sync region %u at %p", + loc->id, new->ref, new); + + /* Add region definition to location's region definition queue */ + queue_push(loc->rgn_defs, (data_item_t) {.ptr = new}); + + return new; +} + +void +trace_destroy_sync_region(trace_region_def_t *rgn) +{ + LOG_DEBUG("region %p destroying attribute list %p", rgn, rgn->attributes); + OTF2_AttributeList_Delete(rgn->attributes); + LOG_DEBUG("region %p", rgn); + free(rgn); +} + +void +trace_add_sync_attributes(trace_region_def_t *rgn) +{ + OTF2_ErrorCode r = OTF2_SUCCESS; + r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_sync_type, + SYNC_TYPE_TO_STR_REF(rgn->attr.sync.type)); + CHECK_OTF2_ERROR_CODE(r); + return; +} diff --git a/src/otter/trace/trace-region-task.c b/src/otter/trace/trace-region-task.c new file mode 100644 index 0000000..ad81db7 --- /dev/null +++ b/src/otter/trace/trace-region-task.c @@ -0,0 +1,151 @@ +#include +#include +#include "otter/trace-lookup-macros.h" +#include "otter/trace-attributes.h" +#include "otter/trace-structs.h" +#include "otter/trace-region-task.h" +#include "otter/trace-unique-refs.h" +#include "otter/trace-check-error-code.h" +#include "otter/queue.h" +#include "otter/stack.h" +#include "otter/char_ref_registry.hpp" + +/* Defined in trace-archive.c */ +extern OTF2_StringRef attr_name_ref[n_attr_defined][2]; +extern OTF2_StringRef attr_label_ref[n_attr_label_defined]; + +trace_region_def_t * +trace_new_task_region( + trace_location_def_t *loc, + trace_region_def_t *parent_task_region, + unique_id_t id, + otter_task_flag_t flags, + int has_dependences, + otter_src_location_t *src_location) +{ + /* Create a region representing a task. Add to the location's region + definition queue. */ + + /* A task maintains a stack of the active regions encountered during its + execution up to a task-switch event, which is restored to the executing + thread when the task is resumed */ + + LOG_INFO_IF((parent_task_region == NULL), + "[t=%lu] parent task region is null", loc->id); + + LOG_DEBUG_IF((src_location), "got src_location(file=%s, func=%s, line=%d)\n", src_location->file, src_location->func, src_location->line); + + // if (src_location) { + // fprintf(stderr, "[%s] got src_location(file=%s, func=%s, line=%d)\n", + // __func__, + // src_location->file, + // src_location->func, + // src_location->line + // ); + // } + + trace_region_def_t *new = malloc(sizeof(*new)); + *new = (trace_region_def_t) { + .ref = get_unique_rgn_ref(), + .role = OTF2_REGION_ROLE_TASK, + .attributes = OTF2_AttributeList_New(), + .type = trace_region_task, + .rgn_stack = stack_create(), + .attr.task = { + .id = id, + .type = flags & 0xF, + .flags = flags, + .has_dependences = has_dependences, + .parent_id = parent_task_region != NULL ? + parent_task_region->attr.task.id : OTF2_UNDEFINED_UINT64, + .parent_type = parent_task_region != NULL ? + parent_task_region->attr.task.type : OTF2_UNDEFINED_UINT32, + .task_status = 0 /* no status */, + + .source_file_name_ref = src_location ? char_ref_registry_insert(get_global_str_registry(), src_location->file) : 0, + .source_func_name_ref = src_location ? char_ref_registry_insert(get_global_str_registry(), src_location->func) : 0, + .source_line_number = src_location ? src_location->line : 0, + } + }; + new->encountering_task_id = new->attr.task.parent_id; + + LOG_DEBUG("[t=%lu] created region %u for task %lu at %p", + loc->id, new->ref, new->attr.task.id, new); + + /* Add region definition to location's region definition queue */ + queue_push(loc->rgn_defs, (data_item_t) {.ptr = new}); + + return new; +} + +void +trace_destroy_task_region(trace_region_def_t *rgn) +{ + LOG_WARN_IF( + (!(rgn->attr.task.task_status == otter_task_complete + || rgn->attr.task.task_status == otter_task_cancel)), + "destroying task region before task-complete/task-cancel"); + LOG_DEBUG("region %p destroying attribute list %p", rgn, rgn->attributes); + OTF2_AttributeList_Delete(rgn->attributes); + LOG_DEBUG("region %p destroying active regions stack %p", rgn, rgn->rgn_stack); + stack_destroy(rgn->rgn_stack, false, NULL); + LOG_DEBUG("region %p", rgn); + free(rgn); +} + +void +trace_add_task_attributes(trace_region_def_t *rgn) +{ + OTF2_ErrorCode r = OTF2_SUCCESS; + r = OTF2_AttributeList_AddUint64(rgn->attributes, attr_unique_id, + rgn->attr.task.id); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_task_type, + TASK_TYPE_TO_STR_REF(rgn->attr.task.type)); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddUint32(rgn->attributes, attr_task_flags, + rgn->attr.task.flags); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddUint64(rgn->attributes, attr_parent_task_id, + rgn->attr.task.parent_id); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_parent_task_type, + TASK_TYPE_TO_STR_REF(rgn->attr.task.parent_type)); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddUint8(rgn->attributes, attr_task_has_dependences, + rgn->attr.task.has_dependences); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddUint8(rgn->attributes, attr_task_is_undeferred, + rgn->attr.task.flags & ompt_task_undeferred); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddUint8(rgn->attributes, attr_task_is_untied, + rgn->attr.task.flags & ompt_task_untied); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddUint8(rgn->attributes, attr_task_is_final, + rgn->attr.task.flags & ompt_task_final); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddUint8(rgn->attributes, attr_task_is_mergeable, + rgn->attr.task.flags & ompt_task_mergeable); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddUint8(rgn->attributes, attr_task_is_merged, + rgn->attr.task.flags & ompt_task_merged); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_prior_task_status, + TASK_STATUS_TO_STR_REF(rgn->attr.task.task_status)); + CHECK_OTF2_ERROR_CODE(r); + + // Add source location if defined for this task + if (rgn->attr.task.source_file_name_ref != 0) + { + r = OTF2_AttributeList_AddUint32(rgn->attributes, attr_source_line_number, + rgn->attr.task.source_line_number); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_source_file_name, + rgn->attr.task.source_file_name_ref); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_source_func_name, + rgn->attr.task.source_func_name_ref); + CHECK_OTF2_ERROR_CODE(r); + } + return; +} diff --git a/src/otter/trace/trace-region-workshare.c b/src/otter/trace/trace-region-workshare.c new file mode 100644 index 0000000..f791a6e --- /dev/null +++ b/src/otter/trace/trace-region-workshare.c @@ -0,0 +1,66 @@ +#include +#include +#include "otter/trace-lookup-macros.h" +#include "otter/trace-attributes.h" +#include "otter/trace-structs.h" +#include "otter/trace-region-workshare.h" +#include "otter/trace-unique-refs.h" +#include "otter/trace-check-error-code.h" +#include "otter/queue.h" +#include "otter/stack.h" + +/* Defined in trace-archive.c */ +extern OTF2_StringRef attr_name_ref[n_attr_defined][2]; +extern OTF2_StringRef attr_label_ref[n_attr_label_defined]; + +trace_region_def_t * +trace_new_workshare_region( + trace_location_def_t *loc, + otter_work_t wstype, + uint64_t count, + unique_id_t encountering_task_id) +{ + trace_region_def_t *new = malloc(sizeof(*new)); + *new = (trace_region_def_t) { + .ref = get_unique_rgn_ref(), + .role = WORK_TYPE_TO_OTF2_REGION_ROLE(wstype), + .attributes = OTF2_AttributeList_New(), + .type = trace_region_workshare, + .encountering_task_id = encountering_task_id, + .rgn_stack = NULL, + .attr.wshare = { + .type = wstype, + .count = count + } + }; + + LOG_DEBUG("[t=%lu] created workshare region %u at %p", + loc->id, new->ref, new); + + /* Add region definition to location's region definition queue */ + queue_push(loc->rgn_defs, (data_item_t) {.ptr = new}); + + return new; +} + +void +trace_destroy_workshare_region(trace_region_def_t *rgn) +{ + LOG_DEBUG("region %p destroying attribute list %p", rgn, rgn->attributes); + OTF2_AttributeList_Delete(rgn->attributes); + LOG_DEBUG("region %p", rgn); + free(rgn); +} + +void +trace_add_workshare_attributes(trace_region_def_t *rgn) +{ + OTF2_ErrorCode r = OTF2_SUCCESS; + r = OTF2_AttributeList_AddStringRef(rgn->attributes, attr_workshare_type, + WORK_TYPE_TO_STR_REF(rgn->attr.wshare.type)); + CHECK_OTF2_ERROR_CODE(r); + r = OTF2_AttributeList_AddUint64(rgn->attributes, attr_workshare_count, + rgn->attr.wshare.count); + CHECK_OTF2_ERROR_CODE(r); + return; +} \ No newline at end of file diff --git a/src/otter/trace/trace-structs.c b/src/otter/trace/trace-structs.c new file mode 100644 index 0000000..f1cd610 --- /dev/null +++ b/src/otter/trace/trace-structs.c @@ -0,0 +1,67 @@ +#include "otter/trace-lookup-macros.h" +#include "otter/trace-structs.h" + +#include "otter/queue.h" +#include "otter/stack.h" + +/* pretty-print region definitions */ +// void +// trace_region_pprint( +// FILE *fp, +// trace_region_def_t *r, +// const char func[], +// const int line) +// { +// if (fp == NULL) +// fp = stderr; + +// switch (r->type) +// { +// case trace_region_parallel: +// fprintf(fp, "%s:%d: Parallel(id=%lu, master=%lu, ref_count=%u, enter_count=%u) in %s:%d\n", +// __func__, __LINE__, +// r->attr.parallel.id, +// r->attr.parallel.master_thread, +// r->attr.parallel.ref_count, +// r->attr.parallel.enter_count, +// func, line +// ); +// break; +// case trace_region_workshare: +// fprintf(fp, "%s:%d: Work(type=%s, count=%lu) in %s:%d\n", +// __func__, __LINE__, +// OMPT_WORK_TYPE_TO_STR(r->attr.wshare.type), +// r->attr.wshare.count, +// func, line +// ); +// break; +// case trace_region_synchronise: +// fprintf(fp, "%s:%d: Sync(type=%s) in %s:%d\n", +// __func__, __LINE__, +// OMPT_SYNC_TYPE_TO_STR(r->attr.sync.type), +// func, line +// ); +// break; +// case trace_region_task: +// fprintf(fp, "%s:%d: Task(id=%lu, type=%s) in %s:%d\n", +// __func__, __LINE__, +// r->attr.task.id, +// OMPT_TASK_TYPE_TO_STR(OMPT_TASK_TYPE_BITS & r->attr.task.type), +// func, line +// ); +// break; +// #if defined(USE_OMPT_MASKED) +// case trace_region_masked: +// fprintf(fp, "%s:%d: Masked(thread=%lu) in %s:%d\n", +// #else +// case trace_region_master: +// fprintf(fp, "%s:%d: Master(thread=%lu) in %s:%d\n", +// #endif +// __func__, __LINE__, +// r->attr.master.thread, +// func, line +// ); +// break; +// } +// return; +// } diff --git a/src/otter/trace/trace-unique-refs.c b/src/otter/trace/trace-unique-refs.c new file mode 100644 index 0000000..ae27a85 --- /dev/null +++ b/src/otter/trace/trace-unique-refs.c @@ -0,0 +1,31 @@ +#include "otter/trace-unique-refs.h" +#include "otter/trace-types.h" + +static uint64_t +get_unique_uint64_ref(trace_ref_type_t ref_type) +{ + static uint64_t id[NUM_REF_TYPES] = {0}; + return __sync_fetch_and_add(&id[ref_type], 1L); +} + +static uint32_t +get_unique_uint32_ref(trace_ref_type_t ref_type) +{ + static uint32_t id[NUM_REF_TYPES] = {0}; + return __sync_fetch_and_add(&id[ref_type], 1); +} + +OTF2_RegionRef get_unique_rgn_ref(void) +{ + return (OTF2_RegionRef) get_unique_uint32_ref(trace_region); +} + +OTF2_StringRef get_unique_str_ref(void) +{ + return (OTF2_StringRef) get_unique_uint32_ref(trace_string); +} + +OTF2_LocationRef get_unique_loc_ref(void) +{ + return (OTF2_LocationRef) get_unique_uint64_ref(trace_location); +} diff --git a/src/otter/types/char_ref_registry.cpp b/src/otter/types/char_ref_registry.cpp new file mode 100644 index 0000000..f1eb421 --- /dev/null +++ b/src/otter/types/char_ref_registry.cpp @@ -0,0 +1,54 @@ +#include +#include "otter/char_ref_registry.hpp" + +class char_ref_registry { +public: + char_ref_registry(char_ref_registry_label_cbk labeller, char_ref_registry_delete_cbk deleter) : + delete_key_value_pair(deleter), get_label(labeller) + { + assert(labeller != nullptr); + }; + ~char_ref_registry(); + char_ref_registry(const char_ref_registry&) = delete; + char_ref_registry(char_ref_registry&&) = delete; + char_ref_registry& operator=(const char_ref_registry&) = delete; + char_ref_registry& operator=(char_ref_registry&&) = delete; + uint32_t insert(const char *); +private: + char_ref_registry_label_cbk get_label; + char_ref_registry_delete_cbk delete_key_value_pair; + std::map registry_m; +}; + +char_ref_registry* char_ref_registry_make( + char_ref_registry_label_cbk labeller, + char_ref_registry_delete_cbk deleter) +{ + return new char_ref_registry(labeller, deleter); +} + +void char_ref_registry_delete(char_ref_registry *r) { + delete r; +} + +uint32_t char_ref_registry_insert(char_ref_registry *registry, const char *key) +{ + return registry->insert(key); +} + +uint32_t char_ref_registry::insert(const char *key) { + auto label = registry_m[key]; + if (label == 0) { // key not previously added + label = this->get_label(); + registry_m[key] = label; + } + return label; +} + +char_ref_registry::~char_ref_registry() { + if (this->delete_key_value_pair) { + for (auto&[key, value] : registry_m) { + this->delete_key_value_pair(key, value); + } + } +} diff --git a/src/otter/dt-queue.c b/src/otter/types/dt-queue.c similarity index 84% rename from src/otter/dt-queue.c rename to src/otter/types/dt-queue.c index 60d4a65..9c95c4d 100644 --- a/src/otter/dt-queue.c +++ b/src/otter/types/dt-queue.c @@ -158,37 +158,6 @@ queue_append( return true; } -/* scan through the items in a queue without modifying the queue - write the current queue item to dest - save the address of the next item in the queue to [next] - if [next] == NULL, start with queue->head - (NOTE: up to the caller to track how many items to scan, otherwise will loop) -*/ -void -queue_scan( - otter_queue_t *q, - data_item_t *dest, - void **next) -{ - if ((next == NULL) || (dest == NULL)) - { - LOG_ERROR("null pointer"); - return; - } - - node_t *next_node = (node_t*) *next; - - if (next_node == NULL) - { - *dest = q->head->data; - *next = (void*) q->head->next; - } else { - *dest = next_node->data; - *next = (void*) next_node->next; - } - return; -} - #if DEBUG_LEVEL >= 4 void queue_print(otter_queue_t *q) diff --git a/src/otter/dt-stack.c b/src/otter/types/dt-stack.c similarity index 86% rename from src/otter/dt-stack.c rename to src/otter/types/dt-stack.c index 54d7d4c..3bab578 100644 --- a/src/otter/dt-stack.c +++ b/src/otter/types/dt-stack.c @@ -14,6 +14,7 @@ struct node_t { struct otter_stack_t { node_t *head; + node_t *base; size_t size; }; @@ -28,6 +29,7 @@ stack_create(void) } LOG_DEBUG("%p", s); s->head = NULL; + s->base = NULL; s->size = 0; return s; } @@ -53,6 +55,7 @@ stack_push(otter_stack_t *s, data_item_t item) node->next = s->head; s->head = node; s->size += 1; + if (s->size == 1) s->base = node; LOG_DEBUG("%p[0]=%p", s, item.ptr); @@ -82,6 +85,7 @@ stack_pop(otter_stack_t *s, data_item_t *dest) s->size -= 1; free(node); } + if (s->size == 0) s->base = NULL; LOG_DEBUG_IF((dest != NULL), "%p[0] -> %p", s, dest->ptr); LOG_WARN_IF(dest == NULL, "popped item without returning value " "(no destination pointer)"); @@ -128,6 +132,26 @@ stack_destroy(otter_stack_t *s, bool items, data_destructor_t destructor) return; } +bool +stack_transfer(otter_stack_t *dest, otter_stack_t *src) +{ + if (dest == NULL) + { + LOG_ERROR("Cannot transfer to null stack."); + return false; + } + if (src == NULL || src->size == 0) return true; + + src->base->next = dest->head; + dest->head = src->head; + if (dest->size == 0) dest->base = src->base; + dest->size += src->size; + src->head = src->base = NULL; + src->size = 0; + + return true; +} + #if DEBUG_LEVEL >= 4 void stack_print(otter_stack_t *s) diff --git a/src/otter/otter-structs.c b/src/otter/types/otter-structs.c similarity index 69% rename from src/otter/otter-structs.c rename to src/otter/types/otter-structs.c index f0bb682..5452687 100644 --- a/src/otter/otter-structs.c +++ b/src/otter/types/otter-structs.c @@ -1,11 +1,22 @@ #include #include -#include "otter/otter-ompt-header.h" +// #include "otter/otter-ompt-header.h" #include "otter/otter.h" #include "otter/otter-structs.h" #include "otter/trace.h" #include "otter/trace-structs.h" +/* Used as an array index to keep track of unique ids for different entities */ +typedef enum { + id_timestamp, + id_parallel, + id_thread, + id_task, + NUM_ID_TYPES +} unique_id_type_t; + +static unique_id_t get_unique_id(unique_id_type_t id_type); + parallel_data_t * new_parallel_data( unique_id_t thread_id, @@ -38,7 +49,7 @@ void parallel_destroy(parallel_data_t *parallel_data) } thread_data_t * -new_thread_data(ompt_thread_t type) +new_thread_data(otter_thread_t type) { thread_data_t *thread_data = malloc(sizeof(*thread_data)); *thread_data = (thread_data_t) { @@ -70,8 +81,9 @@ new_task_data( trace_location_def_t *loc, trace_region_def_t *parent_task_region, unique_id_t task_id, - ompt_task_flag_t flags, - int has_dependences) + otter_task_flag_t flags, + int has_dependences, + otter_src_location_t *src_location) { task_data_t *new = malloc(sizeof(*new)); *new = (task_data_t) { @@ -85,7 +97,8 @@ new_task_data( parent_task_region, new->id, flags, - has_dependences + has_dependences, + src_location ); return new; } @@ -95,3 +108,30 @@ void task_destroy(task_data_t *task_data) free(task_data); return; } + +unique_id_t get_unique_parallel_id(void) +{ + return get_unique_id(id_parallel); +} + +unique_id_t get_unique_thread_id(void) +{ + return get_unique_id(id_thread); +} + +unique_id_t get_unique_task_id(void) +{ + return get_unique_id(id_task); +} + +unique_id_t get_dummy_time(void) +{ + return get_unique_id(id_timestamp); +} + +unique_id_t +get_unique_id(unique_id_type_t id_type) +{ + static unique_id_t id[NUM_ID_TYPES] = {0,0,0,0}; + return __sync_fetch_and_add(&id[id_type], 1L); +} diff --git a/src/python/.gitignore b/src/python/.gitignore deleted file mode 100644 index 45fb1bb..0000000 --- a/src/python/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -/*/ -__pycache__/ -!/otter/ -/*.dot \ No newline at end of file diff --git a/src/python/otter/__init__.py b/src/python/otter/__init__.py deleted file mode 100644 index 1cb856f..0000000 --- a/src/python/otter/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Key functionality: - -- load OTF2 trace and extract locations, events, regions, attributes so they can be easily looked up -- parse loaded trace to determine what graph nodes to create and where edges are required -- export execution graph to file -""" \ No newline at end of file diff --git a/src/python/otter/__main__.py b/src/python/otter/__main__.py deleted file mode 100644 index d093987..0000000 --- a/src/python/otter/__main__.py +++ /dev/null @@ -1,297 +0,0 @@ -import argparse -import warnings -import igraph as ig -import otf2 -from itertools import chain, count, groupby -from collections import Counter -from otf2.events import Enter, Leave -from otter.trace import AttributeLookup, RegionLookup, yield_chunks, process_chunk -from otter.styling import colormap_region_type, colormap_edge_type, shapemap_region_type -from otter.helpers import set_tuples, reject_task_create, attr_handler, label_clusters, descendants_if, attr_getter, pass_master_event - - -def main(): - parser = argparse.ArgumentParser( - prog="python3 -m otter", - description='Convert an Otter OTF2 trace archive to its execution graph representation', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument('anchorfile', help='OTF2 anchor file') - parser.add_argument('-o', '--output', dest='output', help='output file') - parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', - help='print chunks as they are generated') - parser.add_argument('-i', '--interact', action='store_true', dest='interact', - help='drop to an interactive shell upon completion') - parser.add_argument('-ns', '--no-style', action='store_true', default=False, dest='nostyle', - help='do not apply any styling to the graph nodes') - args = parser.parse_args() - - if args.output is None and not args.interact: - parser.error("must select at least one of -[o|i]") - - if args.interact: - print("Otter launched interactively") - - anchorfile = args.anchorfile - - # Convert event stream into graph chunks - print(f"loading OTF2 anchor file: {anchorfile}") - print("generating chunks from event stream...") - with otf2.reader.open(anchorfile) as tr: - attr = AttributeLookup(tr.definitions.attributes) - regions = RegionLookup(tr.definitions.regions) - results = (process_chunk(chunk, verbose=args.verbose) for chunk in yield_chunks(tr)) - items = zip(*results) - chunk_types = next(items) - chain_sort_next = lambda x: sorted(chain(*next(x)), key=lambda t: t[0]) - task_links, task_crt_ts, task_leave_ts = (chain_sort_next(items) for _ in range(3)) - g_list = next(items) - - # Make function for looking up event attributes - event_attr = attr_getter(attr) - - task_types, *_, task_ids = zip(*[r.name.split() for r in regions.values() if r.region_role == otf2.RegionRole.TASK]) - task_types, task_ids = (zip(*sorted(zip(task_types, map(int, task_ids)), key=lambda t: t[1]))) - - # Gather last leave times per explicit task - task_end_ts = {k: max(u[1] for u in v) for k, v in groupby(task_leave_ts, key=lambda t: t[0])} - - # Task tree showing parent-child links - task_tree = ig.Graph(edges=task_links, directed=True) - task_tree.vs['unique_id'] = task_ids - task_tree.vs['crt_ts'] = [t[1] for t in task_crt_ts] - task_tree.vs['end_ts'] = [task_end_ts[node.index] if node.index in task_end_ts else None for node in task_tree.vs] - task_tree.vs['parent_index'] = list(chain((None,), list(zip(*sorted(task_links, key=lambda t: t[1])))[0])) - task_tree.vs['task_type'] = task_types - if not args.nostyle: - task_tree.vs['style'] = 'filled' - task_tree.vs['color'] = ['red' if v['task_type'] == 'implicit' else 'gray' for v in task_tree.vs] - tt_layout = task_tree.layout_reingold_tilford() - - # Count chunks by type - print("graph chunks created:") - for k, v in Counter(chunk_types).items(): - print(f" {k:18s} {v:8d}") - - # Collect all chunks - print("combining chunks") - g = ig.disjoint_union(g_list) - num_nodes = g.vcount() - - print("{:20s} {:6d}".format("nodes created", num_nodes)) - - if 'task_cluster_id' not in g.vs.attribute_names(): - g.vs['task_cluster_id'] = None - g.vs['sync_cluster_id'] = None - - # Collapse by parallel sequence ID - print("contracting by parallel sequence ID") - g.vs['cluster'] = label_clusters(g.vs, lambda v: v['parallel_sequence_id'] is not None, 'parallel_sequence_id') - nodes_before = num_nodes - g.contract_vertices(g.vs['cluster'], combine_attrs=attr_handler(attr=attr)) - num_nodes = g.vcount() - print("{:20s} {:6d} -> {:6d} ({:6d})".format("nodes updated", nodes_before, num_nodes, num_nodes-nodes_before)) - - # Collapse by single-begin/end event - def is_single_executor(v): - return type(v['event']) in [Enter, Leave] and event_attr(v['event'], 'region_type') == 'single_executor' - print("contracting by single-begin/end event") - g.vs['cluster'] = label_clusters(g.vs, is_single_executor, 'event') - nodes_before = num_nodes - g.contract_vertices(g.vs['cluster'], combine_attrs=attr_handler(attr=attr)) - num_nodes = g.vcount() - print("{:20s} {:6d} -> {:6d} ({:6d})".format("nodes updated", nodes_before, num_nodes, num_nodes-nodes_before)) - - # Collapse by master-begin/end event - def is_master(v): - return type(v['event']) in [Enter, Leave] and event_attr(v['event'], 'region_type') == 'master' - print("contracting by master-begin/end event") - g.vs['cluster'] = label_clusters(g.vs, is_master, 'event') - nodes_before = num_nodes - g.contract_vertices(g.vs['cluster'], combine_attrs=attr_handler(events=pass_master_event, attr=attr)) - num_nodes = g.vcount() - print("{:20s} {:6d} -> {:6d} ({:6d})".format("nodes updated", nodes_before, num_nodes, num_nodes-nodes_before)) - - # Itermediate clean-up: for each master region, remove edges that connect - # the same nodes as the master region - master_enter = filter(lambda v: event_attr(v['event'], 'region_type') == 'master' and event_attr(v['event'], 'endpoint') == 'enter', g.vs) - master_enter_nodes = {v['event']: v for v in master_enter} - master_leave = filter(lambda v: event_attr(v['event'], 'region_type') == 'master' and event_attr(v['event'], 'endpoint') == 'leave', g.vs) - master_node_pairs = ((master_enter_nodes[leave_node['master_enter_event']], leave_node) for leave_node in master_leave) - def yield_neighbours(): - for enter_node, leave_node in master_node_pairs: - (p,), (s,) = enter_node.predecessors(), leave_node.successors() - yield p, s - neighbour_set = {(p,s) for p, s in yield_neighbours()} - redundant_edges = list(filter(lambda e: (e.source_vertex, e.target_vertex) in neighbour_set, g.es)) - print(f"deleting redundant edges due to master regions: {len(redundant_edges)}") - g.delete_edges(redundant_edges) - - # Collapse by (task-ID, endpoint) to get 1 subgraph per task - for v in g.vs: - if type(v['event']) in [Enter, Leave] and event_attr(v['event'], 'region_type') == 'explicit_task': - v['task_cluster_id'] = (event_attr(v['event'], 'unique_id'), event_attr(v['event'], 'endpoint')) - print("contracting by task ID & endpoint") - g.vs['cluster'] = label_clusters(g.vs, lambda v: v['task_cluster_id'] is not None, 'task_cluster_id') - nodes_before = num_nodes - g.contract_vertices(g.vs['cluster'], - combine_attrs=attr_handler(events=reject_task_create, tuples=set_tuples, attr=attr)) - num_nodes = g.vcount() - print("{:20s} {:6d} -> {:6d} ({:6d})".format("nodes updated", nodes_before, num_nodes, num_nodes-nodes_before)) - - # Collapse by task ID where there are no links between to combine task nodes with nothing nested within - def is_empty_task_region(v): - if v['task_cluster_id'] is None: - return False - if type(v['event']) in [Enter, Leave]: - return (type(v['event']) is Leave and v.indegree() == 0) or \ - (type(v['event']) is Enter and v.outdegree() == 0) - if type(v['event']) is list and set(map(type, v['event'])) in [{Enter}, {Leave}]: - return (set(map(type, v['event'])) == {Leave} and v.indegree() == 0) or \ - (set(map(type, v['event'])) == {Enter} and v.outdegree() == 0) - print("contracting by task ID where there are no nested nodes") - g.vs['cluster'] = label_clusters(g.vs, is_empty_task_region, lambda v: v['task_cluster_id'][0]) - nodes_before = num_nodes - g.contract_vertices(g.vs['cluster'], combine_attrs=attr_handler(events=reject_task_create, tuples=set_tuples, attr=attr)) - num_nodes = g.vcount() - print("{:20s} {:6d} -> {:6d} ({:6d})".format("nodes updated", nodes_before, num_nodes, num_nodes-nodes_before)) - - # Collapse redundant sync-enter/leave node pairs by labelling unique pairs of nodes identified by their shared edge - dummy_counter = count() - for e in g.es: - node_types = set() - for v in (e.source_vertex, e.target_vertex): - if type(v['event']) is not list: - node_types.add(event_attr(v['event'], 'region_type')) - else: - for event in v['event']: - node_types.add(event_attr(event, 'region_type')) - if node_types in [{'barrier_implicit'}, {'barrier_explicit'}, {'taskwait'}, {'loop'}] and \ - e.source_vertex.attributes().get('sync_cluster_id', None) is None and \ - e.target_vertex.attributes().get('sync_cluster_id', None) is None: - value = next(dummy_counter) - e.source_vertex['sync_cluster_id'] = e.target_vertex['sync_cluster_id'] = value - print("contracting redundant sync-enter/leave node pairs") - g.vs['cluster'] = label_clusters(g.vs, lambda v: v['sync_cluster_id'] is not None, 'sync_cluster_id') - nodes_before = num_nodes - g.contract_vertices(g.vs['cluster'], combine_attrs=attr_handler(tuples=set_tuples, attr=attr)) - num_nodes = g.vcount() - print("{:20s} {:6d} -> {:6d} ({:6d})".format("nodes updated", nodes_before, num_nodes, num_nodes-nodes_before)) - - # Unpack the region_type attribute - for v in g.vs: - if type(v['event']) is list: - v['region_type'], = set([event_attr(e, 'region_type') for e in v['event']]) - v['endpoint'] = set([event_attr(e, 'endpoint') for e in v['event']]) - else: - v['region_type'] = event_attr(v['event'], 'region_type') - v['endpoint'] = event_attr(v['event'], 'endpoint') - if type(v['endpoint']) is set and len(v['endpoint']) == 1: - v['endpoint'], = v['endpoint'] - - # Apply taskwait synchronisation - print("applying taskwait synchronisation") - for twnode in g.vs.select(lambda v: v['region_type'] == 'taskwait'): - parents = set(task_tree.vs[event_attr(e, 'encountering_task_id')] for e in twnode['event']) - tw_encounter_ts = {event_attr(e, 'encountering_task_id'): e.time for e in twnode['event'] if type(e) is Enter} - children = [c.index for c in chain(*[p.neighbors(mode='out') for p in parents]) - if c['crt_ts'] < tw_encounter_ts[c['parent_index']] < c['end_ts']] - nodes = [v for v in g.vs if v['region_type'] == 'explicit_task' - and event_attr(v['event'], 'unique_id') in children - and v['endpoint'] != 'enter'] - ecount = g.ecount() - g.add_edges([(v.index, twnode.index) for v in nodes]) - g.es[ecount:]['type'] = 'taskwait' - - def event_time_per_task(event): - """Return the map: encountering task id -> event time for all encountering tasks in the event""" - if type(event) is list: - return {event_attr(e, 'encountering_task_id'): e.time for e in event} - return {event_attr(event, 'encountering_task_id'): event.time} - - # Apply taskgroup synchronisation - print("applying taskgroup synchronisation") - for tgnode in g.vs.select(lambda v: v['region_type'] == 'taskgroup' and v['endpoint'] == 'leave'): - tg_enter_ts = event_time_per_task(tgnode['taskgroup_enter_event']) - tg_leave_ts = event_time_per_task(tgnode['event']) - parents = [task_tree.vs[k] for k in tg_enter_ts] - children = [c for c in chain(*[p.neighbors(mode='out') for p in parents]) - if tg_enter_ts[c['parent_index']] < c['crt_ts'] < tg_leave_ts[c['parent_index']]] - descendants = list(chain(*[descendants_if(c, cond=lambda x: x['task_type'] != 'implicit') for c in children])) - nodes = [v for v in g.vs if v['region_type'] == 'explicit_task' - and event_attr(v['event'], 'unique_id') in descendants - and v['endpoint'] != 'enter'] - ecount = g.ecount() - g.add_edges([(v.index, tgnode.index) for v in nodes]) - g.es[ecount:]['type'] = 'taskgroup' - - # Apply styling if desired - if not args.nostyle: - print("applying node and edge styline") - g.vs['color'] = [colormap_region_type[v['region_type']] for v in g.vs] - g.vs['style'] = 'filled' - g.vs['shape'] = [shapemap_region_type[v['region_type']] for v in g.vs] - g.es['color'] = [colormap_edge_type[e.attributes().get('type', None)] for e in g.es] - g.vs['label'] = ["{}".format(event_attr(v['event'], 'unique_id')) - if any(s in v['region_type'] for s in ['explicit', 'initial', 'parallel']) else " " for v in g.vs] - - g.simplify(combine_edges='first') - - # Clean up redundant attributes - for item in ['task_cluster_id', 'parallel_sequence_id', 'cluster', 'sync_cluster_id']: - if item in g.vs.attribute_names(): - print(f"deleting vertex attribute '{item}'") - del g.vs[item] - - if args.output: - print(f"writing graph to '{args.output}'") - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - try: - g.write(args.output) - except OSError as oserr: - print(f"igraph error: {oserr}") - print(f"failed to write to file '{args.output}'") - - if args.interact: - import atexit - import code - import os - import readline - readline.parse_and_bind("tab: complete") - - hfile = os.path.join(os.path.expanduser("~"), ".otter_history") - - try: - readline.read_history_file(hfile) - numlines = readline.get_current_history_length() - except FileNotFoundError: - open(hfile, 'wb').close() - numlines = 0 - - def append_history(n, f): - newlines = readline.get_current_history_length() - readline.set_history_length(1000) - readline.append_history_file(newlines - n, f) - - atexit.register(append_history, numlines, hfile) - - k = "" - for k, v in locals().items(): - if g is v: - break - - banner = \ -f""" -Graph '{k}' has {g.vcount()} nodes and {g.ecount()} edges - -Entering interactive mode, use: - ig.plot({k}, [target="..."], ...) to view or plot to file - {k}.write_*() to save a representation of the graph e.g. {k}.write_dot("graph.dot") -""" - Console = code.InteractiveConsole(locals=locals()) - Console.interact(banner=banner, exitmsg=f"history saved to {hfile}") - - -if __name__ == "__main__": - main() diff --git a/src/python/otter/helpers.py b/src/python/otter/helpers.py deleted file mode 100644 index 67a25a8..0000000 --- a/src/python/otter/helpers.py +++ /dev/null @@ -1,126 +0,0 @@ -from itertools import chain, count -from collections import defaultdict -from otf2.events import Enter, Leave, ThreadTaskCreate - -""" -Contains assorted helper functions for __main__.py and trace.py -""" - - -def attr_getter(attr_lookup): - """Make a function to lookup an attribute of some event by its name""" - def event_attr_getter(evt, name): - if type(evt) is list: - result, = set([e.attributes[attr_lookup[name]] for e in evt]) - return result - elif type(evt) in [Enter, Leave, ThreadTaskCreate]: - return evt.attributes[attr_lookup[name]] - else: - raise TypeError(f"unexpected type: {type(evt)}") - return event_attr_getter - - -# Helpers related to manipulating the nodes of an igraph.Graph: - - -def chain_lists(lists): - """Possible argument to attr_handler()""" - return list(chain(*lists)) - - -def set_tuples(tuples): - """Possible argument to attr_handler()""" - s = set(tuples) - if len(s) == 1: - return s.pop() - else: - return s - - -def pass_args(args): - """Possible argument to attr_handler()""" - return args - - -def pass_single_executor(events, **kw): - """Possible argument to attr_handler()""" - region_types = {e.attributes[kw['attr']['region_type']] for e in events} - if region_types == {'single_other', 'single_executor'}: - single_executor, = filter(lambda e: e.attributes[kw['attr']['region_type'] ]=='single_executor', events) - return single_executor - else: - return events - -def pass_master_event(events, **kw): - """Possible argument to attr_handler()""" - region_types = {e.attributes[kw['attr']['region_type']] for e in events} - if region_types == {'master'} and len(set(events)) == 1: - return events[0] - else: - return events - - -def reject_task_create(events, **kw): - """Possible argument to attr_handler()""" - events = [e for e in events if type(e) is not ThreadTaskCreate] - if len(events) == 1: - return events[0] - else: - return events - - -def attr_handler(events=pass_single_executor, ints=min, lists=chain_lists, tuples=set, **kw): - """Make a function for combining lists of node attributes according to their type""" - def attr_combiner(args): - if len(args) == 1: - return args[0] - else: - if all([isinstance(obj, int) for obj in args]): - return ints(args) - elif all([isinstance(obj, list) for obj in args]): - return lists(args) - elif all([isinstance(obj, tuple) for obj in args]): - return tuples(args) - elif all([type(obj) in [Enter, Leave, ThreadTaskCreate] for obj in args]): - return events(args, **kw) - else: - return args[0] - return attr_combiner - - -def label_clusters(vs, condition, key): - """Return cluster labels (given by key function) where condition is true, or a unique vertex label otherwise""" - if isinstance(key, str): - s = key - key = lambda v: v[s] - vertex_counter = count() - cluster_counter = count(start=sum(not condition(v) for v in vs)) - label = defaultdict(lambda: next(cluster_counter)) - return [label[key(v)] if condition(v) else next(vertex_counter) for v in vs] - - -def graph_slices(graph, s, t): - """Yield vertex ids in slices of the graph generated from the mincuts from s to t""" - mincuts = graph.all_st_mincuts(s, t) - vids = [set(c.partition[0]) for c in mincuts] - yield list(vids[0]) - yield from [list(set(p)-set(q)) for p, q in zip(vids[1:], vids[0:-1])] - yield list(set(vids[-0]) - set(vids[-1])) - - -# Functions useful in one specific place in the code: - -def descendants_if(node, cond=lambda x: True): - """Yield all descendants D of node, skipping E & its descendants if cond(E) is False.""" - for child in node.successors(): - if cond(child): - yield from descendants_if(child, cond=cond) - yield node.index - - -def events_bridge_region(previous, current, types, getter): - """Used in trace.process_chunk to check for certain enter-leave event sequences""" - return (getter(previous, 'region_type') in types - and getter(previous, 'endpoint') == 'enter' - and getter(current, 'region_type') in types - and getter(current, 'endpoint') == 'leave') diff --git a/src/python/otter/styling.py b/src/python/otter/styling.py deleted file mode 100644 index 230b807..0000000 --- a/src/python/otter/styling.py +++ /dev/null @@ -1,52 +0,0 @@ -from collections import defaultdict - -""" -Styling dictionaries for __main__.py -""" - -# Map region type to node color -colormap_region_type = defaultdict(lambda: 'white', **{ - 'initial_task': 'green', - 'implicit_task': 'fuchsia', - 'explicit_task': 'cyan', - 'parallel': 'yellow', - 'single_executor': 'blue', - 'single_other': 'orange', - 'taskwait': 'red', - 'taskgroup': 'purple', - 'barrier_implicit': 'darkgreen', - 'master': 'magenta', - - # Workshare regions - 'loop': 'brown', - 'taskloop': 'orange', - - # For colouring by endpoint - 'enter': 'green', - 'leave': 'red' -}) - -colormap_edge_type = defaultdict(lambda: 'black', **{ - 'taskwait': 'red', - 'taskgroup': 'red', -}) - -shapemap_region_type = defaultdict(lambda: 'circle', **{ - 'initial_task': 'square', - 'implicit_task': 'square', - 'explicit_task': 'square', - 'parallel': 'parallelogram', - - # Sync regions - 'taskwait': 'octagon', - 'taskgroup': 'octagon', - 'barrier_implicit': 'octagon', - - # Workshare regions - 'loop': 'diamond', - 'taskloop': 'diamond', - 'single_executor': 'diamond', - - # Master - 'master': 'circle' -}) diff --git a/src/python/otter/trace.py b/src/python/otter/trace.py deleted file mode 100644 index 4535dfd..0000000 --- a/src/python/otter/trace.py +++ /dev/null @@ -1,295 +0,0 @@ -import re -import typing as T -import igraph as ig -import otf2 -from itertools import chain -from collections import defaultdict, deque -from otf2.events import Enter, Leave, ThreadTaskCreate -from otter.helpers import attr_getter, events_bridge_region - -class DefinitionLookup: - - def __init__(self, registry: otf2.registry._RefRegistry): - self._lookup = dict() - for d in registry: - if d.name in self._lookup: - raise KeyError("{} already present".format(d.name)) - self._lookup[d.name] = d - - def __getitem__(self, name): - if name in self._lookup: - return self._lookup[name] - else: - raise AttributeError(name) - - def __iter__(self): - return ((k,v) for k, v in self._lookup.items()) - - def keys(self): - return self._lookup.keys() - - def values(self): - return self._lookup.values() - - def items(self): - return self.__iter__() - - -class AttributeLookup(DefinitionLookup): - - def __init__(self, attributes: otf2.registry._RefRegistry): - if not isinstance(attributes[0], otf2.definitions.Attribute): - raise TypeError(type(attributes[0])) - super().__init__(attributes) - - def __repr__(self): - s = "{:24s} {:12s} {}\n".format("Name", "Type", "Description") - format = lambda k, v: "{:24s} {:12s} {}".format(k, str(v.type).split(".")[1], v.description) - return s+"\n".join([format(k, v) for k,v in self._lookup.items()]) - - -class LocationLookup(DefinitionLookup): - - def __init__(self, locations: otf2.registry._RefRegistry): - if not isinstance(locations[0], otf2.definitions.Location): - raise TypeError(type(locations[0])) - super().__init__(locations) - - def __repr__(self): - s = "{:12s} {:12s} {:12s} {}\n".format("Group", "Name", "Type", "Events") - format = lambda v: "{:12s} {:12s} {:12s} {}".format(v.group.name, v.name, str(v.type).split(".")[1], v.number_of_events) - return s+"\n".join([format(v) for k,v in self._lookup.items()]) - - -class RegionLookup(DefinitionLookup): - - def __init__(self, regions: otf2.registry._RefRegistry): - self._lookup = dict() - for r in regions: - ref = int(re.search(r'\d+', repr(r))[0]) - if ref in self._lookup: - raise KeyError("{} already present".format(ref)) - self._lookup[ref] = r - - def __repr__(self): - minref, maxref = min(self._lookup.keys()), max(self._lookup.keys()) - s = "{:3s} {:18s} {}\n".format("Ref", "Name", "Role") - format_item = lambda l, k: "{:3d} {:18s} {}".format(k, l[k].name, str(l[k].region_role).split(".")[1]) - return s + "\n".join([format_item(self._lookup, i) for i in range(minref, maxref+1)]) - - -class LocationEventMap: - """ - Behaves like a collection of dicts, each of which maps a location (thread) onto some sequence of events recorded - by that location - """ - - def __init__(self, events: T.Iterable, attr: AttributeLookup): - self._map = defaultdict(deque) - self.attr = attr - for l, e in events: - self._map[l].append(e) - - def __repr__(self): - s = "" - for l, q in self._map.items(): - s += l.name + "\n" - s += " {:18s} {:10s} {:20s} {:20s} {:18s} {}\n".format("Time", "Endpoint", "Region Type", "Event Type", "Region ID/Name", "CPU") - for e in q: - s += " {:<18d} {:10s} {:20s} {:20s} {:18s} {}\n".format( - e.time, - e.attributes[self.attr['endpoint']], - e.attributes.get(self.attr['region_type'], ""), - e.attributes[self.attr['event_type']], - str(e.attributes[self.attr['unique_id']]) if self.attr['unique_id'] in e.attributes else e.region.name, - e.attributes[self.attr['cpu']]) - return s - - def __getitem__(self, location): - if location in self._map: - return self._map[location] - else: - raise KeyError(location) - - def locations(self): - return sorted(self._map.keys(), key=lambda q: int(q.name.split()[1])) # "Thread x" - - def items(self): - return ((l, self[l]) for l in self.locations()) - - def append(self, l, e): - self._map[l].append(e) - - @property - def kind(self): - _, (event, *ignore) = next(self.items()) - return event.attributes[self.attr['region_type']] - - -def yield_chunks(tr): - attr = AttributeLookup(tr.definitions.attributes) - lmap_dict = defaultdict(lambda : LocationEventMap(list(), attr)) - stack_dict = defaultdict(deque) - nChunks = 0 - print("yielding chunks:", end=" ", flush=True) - for location, event in tr.events: - if type(event) in [otf2.events.ThreadBegin, otf2.events.ThreadEnd]: - continue - if event_defines_new_chunk(event, attr): - # Event marks transition from one chunk to another - if isinstance(event, Enter): - if event.attributes.get(attr['region_type'], "") != 'explicit_task': - lmap_dict[location].append(location, event) - stack_dict[location].append(lmap_dict[location]) - # New location map for new chunk - lmap_dict[location] = LocationEventMap([(location, event)], attr) - elif isinstance(event, Leave): - lmap_dict[location].append(location, event) - nChunks += 1 - if nChunks % 100 == 0: - if nChunks % 1000 == 0: - print("yielding chunks:", end=" ", flush=True) - print(f"{nChunks:4d}", end="", flush=True) - elif nChunks % 20 == 0: - print(".", end="", flush=True) - if (nChunks+1) % 1000 == 0: - print("", flush=True) - yield lmap_dict[location] - # Continue with enclosing chunk - lmap_dict[location] = stack_dict[location].pop() - if event.attributes.get(attr['region_type'], "") != 'explicit_task': - lmap_dict[location].append(location, event) - else: - lmap_dict[location].append(location, event) - else: - # Append event to current chunk for this location - lmap_dict[location].append(location, event) - print("") - - -def event_defines_new_chunk(e: otf2.events._EventMeta, a: AttributeLookup) -> bool: - return (e.attributes.get(a['region_type'], None) in ['parallel', - 'explicit_task', 'initial_task', 'single_executor', 'master']) - - -def process_chunk(chunk, verbose=False): - """Return a tuple of chunk kind, task-create links, task-create times, task-leave times and the chunk's graph""" - - # Make function for looking up event attributes - get_attr = attr_getter(chunk.attr) - - # Unpack events from chunk - (_, (first_event, *events, last_event)), = chunk.items() - - if verbose and len(events) > 0: - print(chunk) - - # Make the graph representing this chunk - g = ig.Graph(directed=True) - prior_node = g.add_vertex(event=first_event) - - # Used to save taskgroup-enter event to match to taskgroup-leave event - taskgroup_enter_event = None - - # Match master-enter event to corresponding master-leave - master_enter_event = first_event if get_attr(first_event, 'region_type') == 'master' else None - - if chunk.kind == 'parallel': - parallel_id = get_attr(first_event, 'unique_id') - prior_node["parallel_sequence_id"] = (parallel_id, get_attr(first_event, 'endpoint')) - - task_create_nodes = deque() - task_links = deque() - task_crt_ts = deque() - task_leave_ts = deque() - - if type(first_event) is Enter and get_attr(first_event, 'region_type') in ['initial_task']: - task_crt_ts.append((get_attr(first_event, 'unique_id'), first_event.time)) - - k = 1 - for event in chain(events, (last_event,)): - - if get_attr(event, 'region_type') in ['implicit_task']: - if type(event) is Enter: - task_links.append((get_attr(event, 'encountering_task_id'), get_attr(event, 'unique_id'))) - task_crt_ts.append((get_attr(event, 'unique_id'), event.time)) - elif type(event) is Leave: - task_leave_ts.append((get_attr(event, 'unique_id'), event.time)) - continue - - # The node representing this event - node = g.add_vertex(event=event) - - # Add task-leave time - if type(event) is Leave and get_attr(event, 'region_type') == 'explicit_task': - task_leave_ts.append((get_attr(event, 'unique_id'), event.time)) - - # Add task links and task crt ts - if (type(event) is Enter and get_attr(event, 'region_type') == 'implicit_task') \ - or (type(event) is ThreadTaskCreate): - task_links.append((get_attr(event, 'encountering_task_id'), get_attr(event, 'unique_id'))) - task_crt_ts.append((get_attr(event, 'unique_id'), event.time)) - - # Match taskgroup-enter/-leave events - if get_attr(event, 'region_type') in ['taskgroup']: - if type(event) is Enter: - taskgroup_enter_event = event - elif type(event) is Leave: - if taskgroup_enter_event is None: - raise ValueError("taskgroup-enter event was None") - node['taskgroup_enter_event'] = taskgroup_enter_event - taskgroup_enter_event = None - - # Match master-enter/-leave events - if get_attr(event, 'region_type') in ['master']: - if type(event) is Enter: - master_enter_event = event - elif type(event) is Leave: - if master_enter_event is None: - raise ValueError("master-enter event was None") - node['master_enter_event'] = master_enter_event - master_enter_event = None - - # Label nodes in a parallel chunk by their position for easier merging - if (chunk.kind == 'parallel' - and type(event) is not ThreadTaskCreate - and get_attr(event, 'region_type') != 'master'): - node["parallel_sequence_id"] = (parallel_id, k) - k += 1 - - if get_attr(event, 'region_type') == 'parallel': - # Label nested parallel regions for easier merging... - if event is not last_event: - node["parallel_sequence_id"] = (get_attr(event, 'unique_id'), get_attr(event, 'endpoint')) - # ... but distinguish from a parallel chunk's terminating parallel-end event - else: - node["parallel_sequence_id"] = (parallel_id, get_attr(event, 'endpoint')) - - # Add edge except for (single begin -> single end) and (parallel N begin -> parallel N end) - if events_bridge_region(prior_node['event'], node['event'], ['single_executor', 'single_other', 'master'], get_attr) \ - or (events_bridge_region(prior_node['event'], node['event'], ['parallel'], get_attr) - and get_attr(node['event'], 'unique_id') == get_attr(prior_node['event'], 'unique_id')): - pass - else: - g.add_edge(prior_node, node) - - # For task_create add dummy nodes for easier merging - if type(event) is ThreadTaskCreate: - node['task_cluster_id'] = (get_attr(event, 'unique_id'), 'enter') - dummy_node = g.add_vertex(event=event, task_cluster_id=(get_attr(event, 'unique_id'), 'leave')) - task_create_nodes.append(dummy_node) - continue - elif len(task_create_nodes) > 0: - task_create_nodes = deque() - - prior_node = node - - if chunk.kind == 'explicit_task' and len(events) == 0: - g.delete_edges([0]) - - # Require at least 1 edge between start and end nodes if there are no internal nodes, except for empty explicit - # task chunks - if chunk.kind != "explicit_task" and len(events) == 0 and g.ecount() == 0: - g.add_edge(g.vs[0], g.vs[1]) - - return chunk.kind, task_links, task_crt_ts, task_leave_ts, g diff --git a/src/python/setup.py b/src/python/setup.py deleted file mode 100644 index 24f77bd..0000000 --- a/src/python/setup.py +++ /dev/null @@ -1,16 +0,0 @@ -from setuptools import find_packages, setup - -setup(name="otter", - version="0.1", - description="Otter post-processing tool", - author="Adam Tuft", - author_email='adam.s.tuft@gmail.com', - platforms=["linux"], - license="https://github.com/adamtuft/otter/blob/main/LICENSE", - url="https://github.com/adamtuft/otter", - packages=find_packages(), - install_requires=[ - 'python-igraph==0.9.1' - ], - dependency_links=['https://perftools.pages.jsc.fz-juelich.de/cicd/otf2/'] - ) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 33c53ef..0ead121 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -55,6 +55,22 @@ target_link_libraries( $ ) +add_executable( + char_ref_registry_test + char_ref_registry_test.cc +) +target_include_directories( + char_ref_registry_test + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../include" +) +target_link_libraries( + char_ref_registry_test + gtest_main + $ +) + include(GoogleTest) gtest_discover_tests(queue_test) gtest_discover_tests(stack_test) +gtest_discover_tests(char_ref_registry_test) diff --git a/test/char_ref_registry_test.cc b/test/char_ref_registry_test.cc new file mode 100644 index 0000000..7787120 --- /dev/null +++ b/test/char_ref_registry_test.cc @@ -0,0 +1,96 @@ +#include +#include + +static uint32_t mock_labeller(); +static void mock_deleter(const char*, uint32_t); + +static uint32_t label; +static uint32_t inserted; +static uint32_t deleted; + +namespace { +class CharRefRegistryTextFxt: public testing::Test { +protected: + char_ref_registry *r; + char_ref_registry *s; + + void SetUp() override { + r = char_ref_registry_make(mock_labeller, mock_deleter); + s = nullptr; + label = 1; + inserted = 0; + deleted = 0; + } + + virtual void TearDown() override { + if (r) { + char_ref_registry_delete(r); + r = nullptr; + } + if (s) s = nullptr; + label = 1; + inserted = 0; + deleted = 0; + } + +}; +} + +using CharRefRegistryDeathTextFxt = CharRefRegistryTextFxt; + +static uint32_t mock_labeller() { inserted++; return label++; } + +static void mock_deleter(const char *key, uint32_t value) { deleted++; } + +/*** TESTS ***/ + +TEST_F(CharRefRegistryTextFxt, IsNonNull){ + ASSERT_NE(r, nullptr); +} + +TEST_F(CharRefRegistryTextFxt, AcceptsNullDeleter){ + s = char_ref_registry_make(mock_labeller, nullptr); + ASSERT_NE(s, nullptr); + char_ref_registry_delete(s); +} + +TEST_F(CharRefRegistryTextFxt, KeyIsLabelled){ + const char *key = "foo"; + uint32_t id = char_ref_registry_insert(r, key); + ASSERT_EQ(id, 1); +} + +TEST_F(CharRefRegistryTextFxt, SameKeySameLabel){ + const char *key = "foo"; + uint32_t id1 = char_ref_registry_insert(r, key); + uint32_t id2 = char_ref_registry_insert(r, key); + ASSERT_EQ(id1, id2); +} + +TEST_F(CharRefRegistryTextFxt, DiffKeyDiffLabel){ + const char *key1 = "foo"; + const char *key2 = "bar"; + uint32_t id1 = char_ref_registry_insert(r, key1); + uint32_t id2 = char_ref_registry_insert(r, key2); + ASSERT_NE(id1, id2); +} + +TEST_F(CharRefRegistryTextFxt, InsertedEqDeleted){ + const char *keys[] = {"foo", "bar", "baz"}; + for (auto& key : keys) + { + char_ref_registry_insert(r, key); + } + ASSERT_EQ(inserted, 3); + char_ref_registry_delete(r); + ASSERT_EQ(deleted, 3); + r = nullptr; +} + +/*** DEATH TESTS ***/ + +TEST_F(CharRefRegistryDeathTextFxt, DeathOnNullLabeller){ + ASSERT_DEATH({ + s = char_ref_registry_make(nullptr, mock_deleter); + }, ".*"); +} diff --git a/test/stack_test.cc b/test/stack_test.cc index 66c13a8..145e473 100644 --- a/test/stack_test.cc +++ b/test/stack_test.cc @@ -1,4 +1,5 @@ #include +#include #include "otter/stack.h" void mock_data_destructor(void *ptr); @@ -185,3 +186,71 @@ TEST_F(StackTestFxt, ItemsReturnedLIFO) { ASSERT_TRUE(stack_pop(s1, &item4)); ASSERT_EQ(item4.value, 1); } + +// Transfer Items + +TEST_F(StackTestFxt, TransferToNullStackIsFalse) { + ASSERT_FALSE(stack_transfer(nullptr, s1)); +} + +TEST_F(StackTestFxt, TransferToNonNullStackFromNullStackIsTrue) { + ASSERT_TRUE(stack_transfer(s1, nullptr)); +} + +TEST_F(StackTestFxt, TransferToNonNullStackFromNonNullEmptyStackIsTrue) { + ASSERT_TRUE(stack_is_empty(s2)); + ASSERT_TRUE(stack_transfer(s1, s2)); +} + +TEST_F(StackTestFxt, TransferToNonNullStackFromNonNullNonEmptyStackIsTrue) { + data_item_t item1 {.value = 1}; + data_item_t item2 {.value = 2}; + data_item_t item3 {.value = 3}; + data_item_t item4 {.value = 0}; + ASSERT_TRUE(stack_push(s2, item1)); + ASSERT_TRUE(stack_push(s2, item2)); + ASSERT_TRUE(stack_push(s2, item3)); + ASSERT_TRUE(stack_push(s1, item4)); + ASSERT_TRUE(stack_transfer(s1, s2)); +} + +TEST_F(StackTestFxt, TransferSrcIsEmptyAfter) { + data_item_t item1 {.value = 1}; + data_item_t item2 {.value = 2}; + data_item_t item3 {.value = 3}; + data_item_t item4 {.value = 0}; + ASSERT_TRUE(stack_push(s2, item1)); + ASSERT_TRUE(stack_push(s2, item2)); + ASSERT_TRUE(stack_push(s2, item3)); + ASSERT_TRUE(stack_push(s1, item4)); + ASSERT_FALSE(stack_is_empty(s2)); + ASSERT_TRUE(stack_transfer(s1, s2)); + ASSERT_TRUE(stack_is_empty(s2)); +} + +TEST_F(StackTestFxt, TransferDestNonEmptyAfter) { + data_item_t item1 {.value = 1}; + data_item_t item2 {.value = 2}; + data_item_t item3 {.value = 3}; + data_item_t item4 {.value = 0}; + ASSERT_TRUE(stack_push(s2, item1)); + ASSERT_TRUE(stack_push(s2, item2)); + ASSERT_TRUE(stack_push(s2, item3)); + ASSERT_TRUE(stack_is_empty(s1)); + ASSERT_TRUE(stack_transfer(s1, s2)); + ASSERT_FALSE(stack_is_empty(s1)); +} + +TEST_F(StackTestFxt, TransferTotalStackSizeConserved) { + data_item_t item1 {.value = 1}; + data_item_t item2 {.value = 2}; + data_item_t item3 {.value = 3}; + data_item_t item4 {.value = 0}; + ASSERT_TRUE(stack_push(s2, item1)); + ASSERT_TRUE(stack_push(s2, item2)); + ASSERT_TRUE(stack_push(s2, item3)); + ASSERT_TRUE(stack_push(s1, item4)); + std::size_t size1 = stack_size(s1), size2 = stack_size(s2); + ASSERT_TRUE(stack_transfer(s1, s2)); + ASSERT_EQ(stack_size(s1), size1 + size2); +} \ No newline at end of file