OpenHardware-Initiative · kagandikmen · Nov 23, 2025 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/.github/build_upmem_toolchain.sh b/.github/build_upmem_toolchain.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+cd /opt/
+git clone https://github.com/kagandikmen/upmem-sdk.git
+tar -xvf upmem-sdk/2024.2.0/upmem-2024.2.0-Linux-x86_64.tar.gz
+mv upmem-2024.2.0-Linux-x86_64/ /usr/local/bin/
+rm -rf upmem-sdk/
diff --git a/.github/workflows/valgrind.yaml → .github/workflows/memory_leak_tests.yaml b/.github/workflows/valgrind.yaml → .github/workflows/memory_leak_tests.yaml
@@ -1,12 +1,12 @@
-name: Valgrind
+name: Memory Leak Tests
 
 on:
   push:
   pull_request:
 
 jobs:
   memcheck:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
 
     steps:
       - name: Checkout repository
@@ -19,25 +19,40 @@ jobs:
           sudo apt update
           sudo apt install -y build-essential valgrind
           pip3 install numpy
+          sudo bash .github/build_upmem_toolchain.sh
 
       - name: Extract training samples & labels
         run: python3 read_dataset.py
 
-      - name: Compile MLP
-        run: gcc -g -DEPSILON=0.5 -DNUM_TRAIN_SAMPLES=2 -Iinclude src/*.c -o mlp -lm
+      - name: Compile MLP without sanitizer or UPMEM
+        run: |
+          source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh simulator
+          make SAN=0 UPMEM=0
 
       - name: Run Valgrind
         run: | 
+          source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh simulator
           valgrind --leak-check=full \
                    --show-leak-kinds=all \
                    --track-origins=yes \
                    --error-exitcode=1 \
                    --log-file=valgrind.txt \
-                   ./mlp > /dev/null
+                   ./build/mlp > /dev/null
 
       - name: Save Valgrind log
         if: always()
         uses: actions/upload-artifact@v4
         with:
           name: valgrind_log
-          path: valgrind.txt
+          path: valgrind.txt
+
+      - name: Compile MLP with sanitizer and UPMEM
+        run: |
+          source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh simulator
+          make clean
+          make SAN=1 UPMEM=1
+
+      - name: Run with sanitizer
+        run: |
+          source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh simulator
+          ./build/mlp > /dev/null
diff --git a/.github/workflows/unit_tests.yaml b/.github/workflows/unit_tests.yaml
@@ -6,7 +6,7 @@ on:
 
 jobs:
   build-and-test:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
 
     steps:
       - name: Checkout repository
@@ -15,19 +15,27 @@ jobs:
           submodules: 'recursive'
 
       - name: Install dependencies
-        run: sudo apt update && sudo apt install -y build-essential
+        run: | 
+          sudo apt update && sudo apt install -y build-essential python3.10 python3.10-dev
+          sudo bash .github/build_upmem_toolchain.sh
 
       - name: Create build directory
         run: mkdir build
 
       - name: Run CMake
         working-directory: build
-        run: cmake ..
+        run: |
+          source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh
+          cmake ..
 
       - name: Build
         working-directory: build
-        run: make
+        run: |
+          source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh
+          make
 
       - name: Run the tests
         working-directory: build
-        run: make test
+        run: |
+          source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh
+          make test
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,5 @@
 matmul.c
 matrices.h
-dpu/
 *.o
 *.out
 training_images.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -6,18 +6,44 @@ set(CMAKE_C_STANDARD_REQUIRED ON)
 
 include_directories(include)
 
-file(GLOB SRC_FILES src/*.c)
-list(REMOVE_ITEM SRC_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/mlp.c")
+file(GLOB SRC_FILES src/host/*.c)
+list(REMOVE_ITEM SRC_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/host/mlp.c")
 file(GLOB TEST_FILES tests/*.c)
 
+execute_process(
+    COMMAND dpu-pkg-config --cflags dpu
+    OUTPUT_VARIABLE DPU_C_FLAGS
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+
+execute_process(
+    COMMAND dpu-pkg-config --libs dpu
+    OUTPUT_VARIABLE DPU_LIBS
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+
 enable_testing()
 
+add_custom_target(build_dpu_program ALL
+    COMMAND dpu-upmem-dpurte-clang
+            -I${CMAKE_SOURCE_DIR}/include
+            -o ${CMAKE_BINARY_DIR}/dpu_program
+            ${CMAKE_SOURCE_DIR}/src/dpu/dpu_program.c
+)
+
+add_compile_definitions(
+    # NUM_DPU=1     Important: This macro override was commented because it does not apply to the dpu-upmem-dpurte-clang execution above; and therefore causes mismatch between
+    #               dpu_program.c and the rest. So this file should avoid modifying dimensions set through macros in aforementioned header files.
+    DPU_BINARY_PATH=\"./dpu_program\"
+)
+
 foreach(TEST_SRC ${TEST_FILES})
     get_filename_component(TEST_NAME ${TEST_SRC} NAME_WE)
 
     add_executable(${TEST_NAME} ${TEST_SRC} ${SRC_FILES})
     target_include_directories(${TEST_NAME} PRIVATE include)
-    target_link_libraries(${TEST_NAME} m)
+    target_compile_options(${TEST_NAME} PRIVATE ${DPU_C_FLAGS})
+    target_link_libraries(${TEST_NAME} PRIVATE m ${DPU_LIBS})
 
     add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME})
 endforeach()
diff --git a/Makefile b/Makefile
@@ -1,17 +1,34 @@
-CLANG = dpu-upmem-dpurte-clang
-SOURCE = matmul
-CFLAGS += -O0 -DNR_TASKLETS=6
-FILESTODELETE = matmul.c dpu/
-
-all:
-	python3 generate.py && \
-	for test in $$(seq 0 15); do \
-		$(CLANG) $(CFLAGS) -o dpu/dpu$$test/${SOURCE}.o dpu/dpu$$test/${SOURCE}.c; \
-	done
-	gcc --std=c99 host.c -o host.o `dpu-pkg-config --cflags --libs dpu`
+DPU_UPMEM_CLANG = dpu-upmem-dpurte-clang
+DPU_UPMEM_CFLAGS += -DNR_TASKLETS=16
 
-clean:
-	rm -rf *.o ${FILESTODELETE}
+BATCH_SIZE ?= 20
+MAX_EPOCH ?= 10
+NUM_TRAIN_SAMPLES ?= 200
+
+CFLAGS += -std=c99 -Iinclude -D_GNU_SOURCE -DVERBOSE -DDEBUG
+CFLAGS += -DBATCH_SIZE=$(BATCH_SIZE) -DMAX_EPOCH=$(MAX_EPOCH) -DNUM_TRAIN_SAMPLES=$(NUM_TRAIN_SAMPLES)
+
+BUILD_DIR = build/
+
+UPMEM ?= 1
+ifeq ($(UPMEM), 1)
+	CFLAGS += -DUPMEM
+endif
 
-clean_all:
-	rm -rf *.o .vscode/ .cache/ .__pycache__/ training_images.txt training_labels.txt
+SAN ?= 0
+ifeq ($(SAN), 1)
+	CFLAGS += -fsanitize=address,undefined,leak -fno-omit-frame-pointer -g
+endif
+
+EVAL ?= 0
+ifeq ($(EVAL), 1)
+	CFLAGS += -DEVAL
+endif
+
+all: clean
+	mkdir $(BUILD_DIR); \
+	$(DPU_UPMEM_CLANG) $(DPU_UPMEM_CFLAGS) -Iinclude -o build/dpu_program src/dpu/dpu_program.c; \
+	gcc src/host/*.c $(CFLAGS) -o build/mlp -lm `dpu-pkg-config --cflags --libs dpu`
+
+clean:
+	rm -rf $(BUILD_DIR)
diff --git a/README.md b/README.md
@@ -1,99 +1,108 @@
 # UPMEM-MLP
 
-UPMEM-MLP is an attempt at implementing a multilayer perceptron application in pure C and accelerating this application on the UPMEM platform.
+UPMEM-MLP implements a multilayer perceptron training application in C and accelerates this application on the UPMEM platform.
 
-[![Unit Tests](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/unit_tests.yaml/badge.svg)](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/unit_tests.yaml) [![Valgrind](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/valgrind.yaml/badge.svg)](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/valgrind.yaml)
+[![Unit Tests](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/unit_tests.yaml/badge.svg)](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/unit_tests.yaml) [![Memory Leak Tests](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/memory_leak_tests.yaml/badge.svg)](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/memory_leak_tests.yaml)
 
-## Requirements
+## Prerequisites
 
-- GCC or Clang
 - CMake 3.10 or higher
+- GCC
+- Python
 - UPMEM SDK
 
-### Installing UPMEM SDK
+<details>
+<summary><b>Installing UPMEM SDK</b></summary><br>
 
-To set up the UPMEM SDK on your system:
+1. Download UPMEM SDK tarball for your system from [this link](https://github.com/kagandikmen/upmem-sdk)
 
-1. Download UPMEM SDK tarball for your system from [this link](https://sdk.upmem.com/)
+> **NOTICE:** UPMEM SDK is no longer downloadable on UPMEM's official SDK [Downloads](https://sdk.upmem.com) page.
 
 2. Extract its content and (preferably) move it to a better place like `/usr/local/bin/`
 
-3. Add the shell script `upmem_env.sh`, which sets necessary environment variables, to be sourced into your `.bashrc` as in:
+3. Add the shell script `upmem_env.sh`, which sets necessary environment variables, to be sourced into your `.bashrc`:
 
 ```bash
-source /usr/local/bin/upmem-sdk/upmem_env.sh > /dev/null
+source /usr/local/bin/upmem-sdk/upmem_env.sh simulator > /dev/null
 ```
 
 4. Restart your shell session for the changes to become effective
 
-5. Test your setup using:
+5. Test your setup:
 
 ```bash
 which dpu-lldb
 ```
+---
+</details>
 
-which should, if correctly installed, return the path to the LLDB Debugger binary of UPMEM SDK
+## Getting Started
 
-## Running the Unit Tests
-
-To run the CMake test flow:
+1. Clone this repository and navigate inside it:
 
 ```bash
-mkdir build
-cd build
-cmake ..
-make
-make test
+git clone https://github.com/OpenHardware-Initiative/UPMEM-MLP.git
+cd UPMEM-MLP
 ```
 
-## Compiling the Multilayer Perceptron Natively
-
-To natively run the C multilayer perceptron on your system:
-
-1. Create a Python virtual environment (optional, but recommended) and install requirements:
+2. **(Optional, but recommended)** Create a Python virtual environment:
 
 ```bash
 python3 -m venv venv
 source venv/bin/activate
+```
+
+3. Install Python requirements:
+
+```bash
 pip install -r requirements.txt
 ```
 
-2. Extract training samples & labels:
+4. Extract training samples & labels:
 
 ```bash
 python3 read_dataset.py
 ```
 
-3. Compile the application:
+5. Compile the MLP:
 
 ```bash
-gcc -Iinclude src/*.c -o mlp -lm
+make
+```
+
+6. Run the MLP:
+
+```bash
+./build/mlp
 ```
 
 With this command, you can use:
 
-- `-DVERBOSE` for the verbose mode, which prints loss deltas for all epochs
-- `-DDEBUG` for the debug mode, which prints a couple samples & labels at the beginning and all weights at the end
-- `-DBATCH_SIZE=...` to configure the batch size used during training
-- `-DMAX_EPOCH=...` to configure the maximum number of epochs the training can run for
-- `-DEPSILON=...` to configure epsilon from the command line
-- `-DLEARNING_RATE=...` to configure learning rate from the command line
-- `-DDECAY_RATE=...` to configure the decay rate of the learning rate
-- `-DMOMENTUM=...` to configure momentum from the command line
-- `-DNUM_TRAIN_SAMPLES=...` to configure from the command line how many samples the model should be trained with
-- `-DTRAINING_SAMPLES_FILE=...` to configure the path to the text file samples should be sourced from
-- `-DTRAINING_LABELS_FILE=...` to configure the path to the text file labels should be sourced from
+- `BATCH_SIZE=...` to configure the batch size used during training, which otherwise defaults to 20
+- `MAX_EPOCH=...` to configure the maximum number of epochs the training can run for, which otherwise defaults to 10
+- `NUM_TRAIN_SAMPLES=...` to configure from the command line how many samples the model should be trained with, which otherwise defaults to 200
+- `UPMEM=0` to turn off matrix multiplication on UPMEM
+- `SAN=1` to run the MLP with GCC sanitizer
+- `EVAL=1` to run the MLP in evaluation mode, which adds to the printout how many cycles are spent in training
 
-## Status
+## Running the Unit Tests
+
+UPMEM-MLP comes with unit tests, which can be found in `tests/`. Run these unit tests using:
 
-UPMEM-MLP is a work in progress as of 2025-11-14.
+```bash
+mkdir build
+cd build
+cmake ..
+make
+make test
+```
 
-### To-Do
+## Status
 
-- [ ] Adapt `multiply_matrix` for in-memory matrix multiplication on UPMEM
+UPMEM-MLP is completed and being actively maintained as of 2025-11-23.
 
 ## License
 
 UPMEM-MLP is licensed under the Apache License v2.0. See [LICENSE](LICENSE) for more details.
 
----
+---
diff --git a/benchmarks.md b/benchmarks.md
@@ -0,0 +1,10 @@
+# Benchmark Results
+
+## NN Layout: NUM_FEATURES -> 4096 -> 4096 -> 2048 -> NUM_LABELS
+
+| BATCH_SIZE | NUM_TRAIN_SAMPLES | MAX_EPOCH | Cycles (Intel 64 Host) | Cycles (Intel 64 Host + UPMEM) |
+|------------|-------------------|-----------|------------------------|--------------------------------|
+| 1200       | 3600              | 1         | 13.05T                 | 12.73T                         |
+| 3600       | 10800             | 1         | 42.38T                 | 39.49T                         |
+
+---