Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
c58048a
Introduce matrix multiplication wrapper function
kagandikmen Nov 20, 2025
566cb42
Create `legacy/` & move old UPMEM files inside
kagandikmen Nov 20, 2025
ae403fa
Implement tiled matrix multiplication
kagandikmen Nov 20, 2025
0556b10
Implement UPMEM-distributed matrix multiplication
kagandikmen Nov 21, 2025
15305f5
Improve file organization
kagandikmen Nov 21, 2025
60f1432
Add Makefile
kagandikmen Nov 21, 2025
dd5d1cd
Move `dpu_load` to improve runtime
kagandikmen Nov 21, 2025
bf0b070
Broadcast matrix `B` to DPUs instead of copying it
kagandikmen Nov 21, 2025
05d42ca
Move DPU allocation and loading before tiling
kagandikmen Nov 21, 2025
f979a7f
Reorganize matrix multiplication functions
kagandikmen Nov 21, 2025
6b005fc
Add feature toggle for UPMEM to `multiply_matrix`
kagandikmen Nov 21, 2025
c72628b
Adapt unit tests for updates in `multiply_matrix`
kagandikmen Nov 21, 2025
ed06cc3
Reorganize header files & macros
kagandikmen Nov 21, 2025
087f98d
Add some assertions to `multiply_matrix_upmem`
kagandikmen Nov 21, 2025
8d02ab4
Shorten runtime
kagandikmen Nov 21, 2025
8a7b83a
Adapt CI for UPMEM toolchain
kagandikmen Nov 21, 2025
5b1646a
Update README
kagandikmen Nov 21, 2025
404ab1d
Debug CI (#1)
kagandikmen Nov 22, 2025
d0d4be8
Add `<init, free>_dpus` & improve organization
kagandikmen Nov 22, 2025
755cd7b
Parallelize DPU workload through tasklets
kagandikmen Nov 22, 2025
9c632ae
Simplify macro scheme
kagandikmen Nov 22, 2025
c1d1b3e
Move from double to single-precision float
kagandikmen Nov 22, 2025
207ada9
Introduce macro `TEST_FLOAT_EQ` & adapt unit tests
kagandikmen Nov 22, 2025
a7277ba
Update `TILE_SIZE` and `NUM_DPU`
kagandikmen Nov 23, 2025
6ddb943
Fix typo in CMake configuration file
kagandikmen Nov 23, 2025
e2cb793
Implement various improvements in top Makefile
kagandikmen Nov 23, 2025
4035e75
Add performance evaluation mode `EVAL`
kagandikmen Nov 23, 2025
a098509
Add printout at program start for debugging
kagandikmen Nov 23, 2025
efd75f3
Update README
kagandikmen Nov 23, 2025
6b5c264
Add some logging to `src/dpu/dpu_program.c`
kagandikmen Nov 23, 2025
5f3fb0a
Register benchmarking results in `benchmarks.md`
kagandikmen Nov 23, 2025
c9d9af1
Update CI for recent changes in upmem-sdk repo
kagandikmen Nov 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/build_upmem_toolchain.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

cd /opt/
git clone https://github.com/kagandikmen/upmem-sdk.git
tar -xvf upmem-sdk/2024.2.0/upmem-2024.2.0-Linux-x86_64.tar.gz
mv upmem-2024.2.0-Linux-x86_64/ /usr/local/bin/
rm -rf upmem-sdk/
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
name: Valgrind
name: Memory Leak Tests

on:
push:
pull_request:

jobs:
memcheck:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04

steps:
- name: Checkout repository
Expand All @@ -19,25 +19,40 @@ jobs:
sudo apt update
sudo apt install -y build-essential valgrind
pip3 install numpy
sudo bash .github/build_upmem_toolchain.sh

- name: Extract training samples & labels
run: python3 read_dataset.py

- name: Compile MLP
run: gcc -g -DEPSILON=0.5 -DNUM_TRAIN_SAMPLES=2 -Iinclude src/*.c -o mlp -lm
- name: Compile MLP without sanitizer or UPMEM
run: |
source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh simulator
make SAN=0 UPMEM=0

- name: Run Valgrind
run: |
source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh simulator
valgrind --leak-check=full \
--show-leak-kinds=all \
--track-origins=yes \
--error-exitcode=1 \
--log-file=valgrind.txt \
./mlp > /dev/null
./build/mlp > /dev/null

- name: Save Valgrind log
if: always()
uses: actions/upload-artifact@v4
with:
name: valgrind_log
path: valgrind.txt
path: valgrind.txt

- name: Compile MLP with sanitizer and UPMEM
run: |
source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh simulator
make clean
make SAN=1 UPMEM=1

- name: Run with sanitizer
run: |
source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh simulator
./build/mlp > /dev/null
18 changes: 13 additions & 5 deletions .github/workflows/unit_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:

jobs:
build-and-test:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04

steps:
- name: Checkout repository
Expand All @@ -15,19 +15,27 @@ jobs:
submodules: 'recursive'

- name: Install dependencies
run: sudo apt update && sudo apt install -y build-essential
run: |
sudo apt update && sudo apt install -y build-essential python3.10 python3.10-dev
sudo bash .github/build_upmem_toolchain.sh

- name: Create build directory
run: mkdir build

- name: Run CMake
working-directory: build
run: cmake ..
run: |
source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh
cmake ..

- name: Build
working-directory: build
run: make
run: |
source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh
make

- name: Run the tests
working-directory: build
run: make test
run: |
source /usr/local/bin/upmem-2024.2.0-Linux-x86_64/upmem_env.sh
make test
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
matmul.c
matrices.h
dpu/
*.o
*.out
training_images.txt
Expand Down
32 changes: 29 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,44 @@ set(CMAKE_C_STANDARD_REQUIRED ON)

include_directories(include)

file(GLOB SRC_FILES src/*.c)
list(REMOVE_ITEM SRC_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/mlp.c")
file(GLOB SRC_FILES src/host/*.c)
list(REMOVE_ITEM SRC_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/host/mlp.c")
file(GLOB TEST_FILES tests/*.c)

execute_process(
COMMAND dpu-pkg-config --cflags dpu
OUTPUT_VARIABLE DPU_C_FLAGS
OUTPUT_STRIP_TRAILING_WHITESPACE
)

execute_process(
COMMAND dpu-pkg-config --libs dpu
OUTPUT_VARIABLE DPU_LIBS
OUTPUT_STRIP_TRAILING_WHITESPACE
)

enable_testing()

add_custom_target(build_dpu_program ALL
COMMAND dpu-upmem-dpurte-clang
-I${CMAKE_SOURCE_DIR}/include
-o ${CMAKE_BINARY_DIR}/dpu_program
${CMAKE_SOURCE_DIR}/src/dpu/dpu_program.c
)

add_compile_definitions(
# NUM_DPU=1 Important: This macro override was commented because it does not apply to the dpu-upmem-dpurte-clang execution above; and therefore causes mismatch between
# dpu_program.c and the rest. So this file should avoid modifying dimensions set through macros in aforementioned header files.
DPU_BINARY_PATH=\"./dpu_program\"
)

foreach(TEST_SRC ${TEST_FILES})
get_filename_component(TEST_NAME ${TEST_SRC} NAME_WE)

add_executable(${TEST_NAME} ${TEST_SRC} ${SRC_FILES})
target_include_directories(${TEST_NAME} PRIVATE include)
target_link_libraries(${TEST_NAME} m)
target_compile_options(${TEST_NAME} PRIVATE ${DPU_C_FLAGS})
target_link_libraries(${TEST_NAME} PRIVATE m ${DPU_LIBS})

add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME})
endforeach()
47 changes: 32 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,17 +1,34 @@
CLANG = dpu-upmem-dpurte-clang
SOURCE = matmul
CFLAGS += -O0 -DNR_TASKLETS=6
FILESTODELETE = matmul.c dpu/

all:
python3 generate.py && \
for test in $$(seq 0 15); do \
$(CLANG) $(CFLAGS) -o dpu/dpu$$test/${SOURCE}.o dpu/dpu$$test/${SOURCE}.c; \
done
gcc --std=c99 host.c -o host.o `dpu-pkg-config --cflags --libs dpu`
DPU_UPMEM_CLANG = dpu-upmem-dpurte-clang
DPU_UPMEM_CFLAGS += -DNR_TASKLETS=16

clean:
rm -rf *.o ${FILESTODELETE}
BATCH_SIZE ?= 20
MAX_EPOCH ?= 10
NUM_TRAIN_SAMPLES ?= 200

CFLAGS += -std=c99 -Iinclude -D_GNU_SOURCE -DVERBOSE -DDEBUG
CFLAGS += -DBATCH_SIZE=$(BATCH_SIZE) -DMAX_EPOCH=$(MAX_EPOCH) -DNUM_TRAIN_SAMPLES=$(NUM_TRAIN_SAMPLES)

BUILD_DIR = build/

UPMEM ?= 1
ifeq ($(UPMEM), 1)
CFLAGS += -DUPMEM
endif

clean_all:
rm -rf *.o .vscode/ .cache/ .__pycache__/ training_images.txt training_labels.txt
SAN ?= 0
ifeq ($(SAN), 1)
CFLAGS += -fsanitize=address,undefined,leak -fno-omit-frame-pointer -g
endif

EVAL ?= 0
ifeq ($(EVAL), 1)
CFLAGS += -DEVAL
endif

all: clean
mkdir $(BUILD_DIR); \
$(DPU_UPMEM_CLANG) $(DPU_UPMEM_CFLAGS) -Iinclude -o build/dpu_program src/dpu/dpu_program.c; \
gcc src/host/*.c $(CFLAGS) -o build/mlp -lm `dpu-pkg-config --cflags --libs dpu`

clean:
rm -rf $(BUILD_DIR)
95 changes: 52 additions & 43 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,99 +1,108 @@
# UPMEM-MLP

UPMEM-MLP is an attempt at implementing a multilayer perceptron application in pure C and accelerating this application on the UPMEM platform.
UPMEM-MLP implements a multilayer perceptron training application in C and accelerates this application on the UPMEM platform.

[![Unit Tests](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/unit_tests.yaml/badge.svg)](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/unit_tests.yaml) [![Valgrind](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/valgrind.yaml/badge.svg)](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/valgrind.yaml)
[![Unit Tests](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/unit_tests.yaml/badge.svg)](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/unit_tests.yaml) [![Memory Leak Tests](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/memory_leak_tests.yaml/badge.svg)](https://github.com/OpenHardware-Initiative/UPMEM-MLP/actions/workflows/memory_leak_tests.yaml)

## Requirements
## Prerequisites

- GCC or Clang
- CMake 3.10 or higher
- GCC
- Python
- UPMEM SDK

### Installing UPMEM SDK
<details>
<summary><b>Installing UPMEM SDK</b></summary><br>

To set up the UPMEM SDK on your system:
1. Download UPMEM SDK tarball for your system from [this link](https://github.com/kagandikmen/upmem-sdk)

1. Download UPMEM SDK tarball for your system from [this link](https://sdk.upmem.com/)
> **NOTICE:** UPMEM SDK is no longer downloadable on UPMEM's official SDK [Downloads](https://sdk.upmem.com) page.

2. Extract its content and (preferably) move it to a better place like `/usr/local/bin/`

3. Add the shell script `upmem_env.sh`, which sets necessary environment variables, to be sourced into your `.bashrc` as in:
3. Add the shell script `upmem_env.sh`, which sets necessary environment variables, to be sourced into your `.bashrc`:

```bash
source /usr/local/bin/upmem-sdk/upmem_env.sh > /dev/null
source /usr/local/bin/upmem-sdk/upmem_env.sh simulator > /dev/null
```

4. Restart your shell session for the changes to become effective

5. Test your setup using:
5. Test your setup:

```bash
which dpu-lldb
```
---
</details>

which should, if correctly installed, return the path to the LLDB Debugger binary of UPMEM SDK
## Getting Started

## Running the Unit Tests

To run the CMake test flow:
1. Clone this repository and navigate inside it:

```bash
mkdir build
cd build
cmake ..
make
make test
git clone https://github.com/OpenHardware-Initiative/UPMEM-MLP.git
cd UPMEM-MLP
```

## Compiling the Multilayer Perceptron Natively

To natively run the C multilayer perceptron on your system:

1. Create a Python virtual environment (optional, but recommended) and install requirements:
2. **(Optional, but recommended)** Create a Python virtual environment:

```bash
python3 -m venv venv
source venv/bin/activate
```

3. Install Python requirements:

```bash
pip install -r requirements.txt
```

2. Extract training samples & labels:
4. Extract training samples & labels:

```bash
python3 read_dataset.py
```

3. Compile the application:
5. Compile the MLP:

```bash
gcc -Iinclude src/*.c -o mlp -lm
make
```

6. Run the MLP:

```bash
./build/mlp
```

With this command, you can use:

- `-DVERBOSE` for the verbose mode, which prints loss deltas for all epochs
- `-DDEBUG` for the debug mode, which prints a couple samples & labels at the beginning and all weights at the end
- `-DBATCH_SIZE=...` to configure the batch size used during training
- `-DMAX_EPOCH=...` to configure the maximum number of epochs the training can run for
- `-DEPSILON=...` to configure epsilon from the command line
- `-DLEARNING_RATE=...` to configure learning rate from the command line
- `-DDECAY_RATE=...` to configure the decay rate of the learning rate
- `-DMOMENTUM=...` to configure momentum from the command line
- `-DNUM_TRAIN_SAMPLES=...` to configure from the command line how many samples the model should be trained with
- `-DTRAINING_SAMPLES_FILE=...` to configure the path to the text file samples should be sourced from
- `-DTRAINING_LABELS_FILE=...` to configure the path to the text file labels should be sourced from
- `BATCH_SIZE=...` to configure the batch size used during training, which otherwise defaults to 20
- `MAX_EPOCH=...` to configure the maximum number of epochs the training can run for, which otherwise defaults to 10
- `NUM_TRAIN_SAMPLES=...` to configure from the command line how many samples the model should be trained with, which otherwise defaults to 200
- `UPMEM=0` to turn off matrix multiplication on UPMEM
- `SAN=1` to run the MLP with GCC sanitizer
- `EVAL=1` to run the MLP in evaluation mode, which adds to the printout how many cycles are spent in training

## Status
## Running the Unit Tests

UPMEM-MLP comes with unit tests, which can be found in `tests/`. Run these unit tests using:

UPMEM-MLP is a work in progress as of 2025-11-14.
```bash
mkdir build
cd build
cmake ..
make
make test
```

### To-Do
## Status

- [ ] Adapt `multiply_matrix` for in-memory matrix multiplication on UPMEM
UPMEM-MLP is completed and being actively maintained as of 2025-11-23.

## License

UPMEM-MLP is licensed under the Apache License v2.0. See [LICENSE](LICENSE) for more details.

---
---
10 changes: 10 additions & 0 deletions benchmarks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Benchmark Results

## NN Layout: NUM_FEATURES -> 4096 -> 4096 -> 2048 -> NUM_LABELS

| BATCH_SIZE | NUM_TRAIN_SAMPLES | MAX_EPOCH | Cycles (Intel 64 Host) | Cycles (Intel 64 Host + UPMEM) |
|------------|-------------------|-----------|------------------------|--------------------------------|
| 1200 | 3600 | 1 | 13.05T | 12.73T |
| 3600 | 10800 | 1 | 42.38T | 39.49T |

---
Loading
Loading