Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/__init__.mojo
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from .level1.iamax_device import *
from .level1.dot_device import *
from .level1.axpy_device import *
from .level1.scal_device import *
from .level1.copy_device import *
from .level1.swap_device import *
from .level1.dot_device import *
from .level1.iamax_device import *
7 changes: 5 additions & 2 deletions src/level1/__init__.mojo
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from .iamax_device import *
from .axpy_device import *
from .scal_device import *
from .copy_device import *
from .swap_device import *
from .dot_device import *
from .axpy_device import *
from .iamax_device import *
27 changes: 18 additions & 9 deletions src/level1/axpy_device.mojo
Original file line number Diff line number Diff line change
@@ -1,21 +1,30 @@
from gpu import thread_idx
from gpu import grid_dim, block_dim, global_idx

fn axpy_device[dtype: DType](
n: Int,
sa: SIMD[dtype, 1],
sx: UnsafePointer[SIMD[dtype, 1], ImmutAnyOrigin],
a: Scalar[dtype],
x: UnsafePointer[Scalar[dtype], ImmutAnyOrigin],
incx: Int,
sy: UnsafePointer[SIMD[dtype, 1], MutAnyOrigin],
y: UnsafePointer[Scalar[dtype], MutAnyOrigin],
incy: Int
):
var thread_id: UInt = thread_idx.x

if (n <= 0):
return
if (sa == 0):
if (a == 0):
return
if (incx == 0 or incy == 0):
return

if (thread_id < n):
sy[thread_id*incy] += sa*sx[thread_id*incx]
var global_i = global_idx.x
var n_threads = Int(grid_dim.x * block_dim.x)

if (n <= n_threads):
# Standard case: each thread gets 1 cell
if (global_i < n):
y[global_i*incy] += a * x[global_i*incx]

else:
Comment on lines +24 to +26
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should cover both cases. If global_i < n, the for-loop terminates after the first iteration

# Multiple cells per thread
for i in range(global_i, n, n_threads):
y[i*incy] += a * x[i*incx]

26 changes: 26 additions & 0 deletions src/level1/copy_device.mojo
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from gpu import grid_dim, block_dim, global_idx

fn copy_device[dtype: DType](
n: Int,
x: UnsafePointer[Scalar[dtype], ImmutAnyOrigin],
incx: Int,
y: UnsafePointer[Scalar[dtype], MutAnyOrigin],
incy: Int
):
if (n <= 0):
return
if (incx == 0 or incy == 0):
return

var global_i = global_idx.x
var n_threads = Int(grid_dim.x * block_dim.x)

if (n <= n_threads):
# Standard case: each thread gets 1 cell
if (global_i < n):
y[global_i*incy] = x[global_i*incx]

else:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

# Multiple cells per thread
for i in range(global_i, n, n_threads):
y[i*incy] = x[i*incx]
28 changes: 28 additions & 0 deletions src/level1/scal_device.mojo
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from gpu import grid_dim, block_dim, global_idx

fn scal_device[dtype: DType](
n: Int,
a: Scalar[dtype],
x: UnsafePointer[Scalar[dtype], MutAnyOrigin],
incx: Int,
):
if (n <= 0):
return
if (a == 0):
return
if (incx == 0):
return

var global_i = global_idx.x
var n_threads = Int(grid_dim.x * block_dim.x)

if (n <= n_threads):
# Standard case: each thread gets 1 cell
if (global_i < n):
x[global_i*incx] *= a

else:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and here

# Multiple cells per thread
for i in range(global_i, n, n_threads):
x[i*incx] *= a

30 changes: 30 additions & 0 deletions src/level1/swap_device.mojo
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from gpu import grid_dim, block_dim, global_idx

fn swap_device[dtype: DType](
n: Int,
x: UnsafePointer[Scalar[dtype], MutAnyOrigin],
incx: Int,
y: UnsafePointer[Scalar[dtype], MutAnyOrigin],
incy: Int
):
if (n <= 0):
return
if (incx == 0 or incy == 0):
return

var global_i = global_idx.x
var n_threads = Int(grid_dim.x * block_dim.x)

if (n <= n_threads):
# Standard case: each thread gets 1 cell
if (global_i < n):
var tmp = x[global_i * incx]
x[global_i * incx] = y[global_i * incy]
y[global_i * incy] = tmp

else:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and here

# Multiple cells per thread
for i in range(global_i, n, n_threads):
var tmp = x[i * incx]
x[i * incx] = y[i * incy]
y[i * incy] = tmp
Loading