Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions cuTWED/cuTWED.h.i
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,37 @@
float twed_devf(float A_dev[], int nA, float TA_dev[],
float B_dev[], int nB, float TB_dev[],
float nu, float lambda, int degree, int dim);
void twed_malloc_dvc_time_steps(const int nA, double **TA_dev,
const int nAA);
void twed_malloc_dvc_time_series(const int nA, double **A_dev,
const int dim, const int nAA);
void twed_malloc_dvc(const int nA, double **A_dev, double **TA_dev,
const int dim, const int nAA);
void twed_malloc_dev(const int nA, double **A_dev, double **TA_dev,
const int nB, double **B_dev, double **TB_dev,
const int dim, const int nAA, const int nBB);
void twed_malloc_dvc_time_stepsf(const int nA, float **TA_dev,
const int nAA);
void twed_malloc_dvc_time_seriesf(const int nA, float **A_dev,
const int dim, const int nAA);
void twed_malloc_dvcf(const int nA, float **A_dev, float **TA_dev,
const int dim, const int nAA);
void twed_malloc_devf(const int nA, float **A_dev, float **TA_dev,
const int nB, float **B_dev, float **TB_dev,
const int dim, const int nAA, const int nBB);
void twed_free_dvc(double *A_dev, double *TA_dev);
void twed_free_dev(double *A_dev, double *TA_dev,
double *B_dev, double *TB_dev);
void twed_free_dvcf(float *A_dev, float *TA_dev);
void twed_free_devf(float *A_dev, float *TA_dev,
float *B_dev, float *TB_dev);
void twed_copy_to_dvc(const int nA, double A[], double A_dev[], double TA[], double TA_dev[],
const int dim, const int nAA);
void twed_copy_to_dev(const int nA, double A[], double A_dev[], double TA[], double TA_dev[],
const int nB, double B[], double B_dev[], double TB[], double TB_dev[],
const int dim, const int nAA, const int nBB);
void twed_copy_to_dvcf(const int nA, float A[], float A_dev[], float TA[], float TA_dev[],
const int dim, const int nAA);
void twed_copy_to_devf(const int nA, float A[], float A_dev[], float TA[], float TA_dev[],
const int nB, float B[], float B_dev[], float TB[], float TB_dev[],
const int dim, const int nAA, const int nBB);
Expand Down
20 changes: 20 additions & 0 deletions src/cuTWED.cu
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,13 @@ static __inline__ __host__ __device__ diagIdx_t map_rc_to_diag(int row, int col)
*/

#define REAL_t double
#define _TWED_MALLOC_DVC_TIME_STEPS twed_malloc_dvc_time_steps
#define _TWED_MALLOC_DVC_TIME_SERIES twed_malloc_dvc_time_series
#define _TWED_MALLOC_DVC twed_malloc_dvc
#define _TWED_MALLOC_DEV twed_malloc_dev
#define _TWED_FREE_DVC twed_free_dvc
#define _TWED_FREE_DEV twed_free_dev
#define _TWED_COPY_TO_DVC twed_copy_to_dvc
#define _TWED_COPY_TO_DEV twed_copy_to_dev
#define _TWED_DEV twed_dev
#define _TWED twed
Expand All @@ -128,8 +133,13 @@ static __inline__ __host__ __device__ diagIdx_t map_rc_to_diag(int row, int col)
#define _GEAM cublasDgeam
#include "cuTWED_core.h"
#undef REAL_t
#undef _TWED_MALLOC_DVC_TIME_STEPS
#undef _TWED_MALLOC_DVC_TIME_SERIES
#undef _TWED_MALLOC_DVC
#undef _TWED_MALLOC_DEV
#undef _TWED_FREE_DVC
#undef _TWED_FREE_DEV
#undef _TWED_COPY_TO_DVC
#undef _TWED_COPY_TO_DEV
#undef _TWED_DEV
#undef _TWED
Expand All @@ -138,8 +148,13 @@ static __inline__ __host__ __device__ diagIdx_t map_rc_to_diag(int row, int col)
#undef _GEAM

#define REAL_t float
#define _TWED_MALLOC_DVC_TIME_STEPS twed_malloc_dvc_time_stepsf
#define _TWED_MALLOC_DVC_TIME_SERIES twed_malloc_dvc_time_seriesf
#define _TWED_MALLOC_DVC twed_malloc_dvcf
#define _TWED_MALLOC_DEV twed_malloc_devf
#define _TWED_FREE_DVC twed_free_dvcf
#define _TWED_FREE_DEV twed_free_devf
#define _TWED_COPY_TO_DVC twed_copy_to_dvcf
#define _TWED_COPY_TO_DEV twed_copy_to_devf
#define _TWED_DEV twed_devf
#define _TWED twedf
Expand All @@ -148,8 +163,13 @@ static __inline__ __host__ __device__ diagIdx_t map_rc_to_diag(int row, int col)
#define _GEAM cublasSgeam
#include "cuTWED_core.h"
#undef REAL_t
#undef _TWED_MALLOC_DVC_TIME_STEPS
#undef _TWED_MALLOC_DVC_TIME_SERIES
#undef _TWED_MALLOC_DVC
#undef _TWED_MALLOC_DEV
#undef _TWED_FREE_DVC
#undef _TWED_FREE_DEV
#undef _TWED_COPY_TO_DVC
#undef _TWED_COPY_TO_DEV
#undef _TWED_DEV
#undef _TWED
Expand Down
18 changes: 18 additions & 0 deletions src/cuTWED.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,22 @@ extern "C" {
/*
Mallocs memory on device, approximately (6*nA + 6*nB) * sizeof(REAL_t)
*/
void twed_malloc_dvc_time_steps(const int nA, double **TA_dev,
const int nAA);
void twed_malloc_dvc_time_series(const int nA, double **A_dev,
const int dim, const int nAA);
void twed_malloc_dvc(const int nA, double **A_dev, double **TA_dev,
const int dim, const int nAA);
void twed_malloc_dev(const int nA, double **A_dev, double **TA_dev,
const int nB, double **B_dev, double **TB_dev,
const int dim, const int nAA, const int nBB);

void twed_malloc_dvc_time_stepsf(const int nA, float **TA_dev,
const int nAA);
void twed_malloc_dvc_time_seriesf(const int nA, float **A_dev,
const int dim, const int nAA);
void twed_malloc_dvcf(const int nA, float **A_dev, float **TA_dev,
const int dim, const int nAA);
void twed_malloc_devf(const int nA, float **A_dev, float **TA_dev,
const int nB, float **B_dev, float **TB_dev,
const int dim, const int nAA, const int nBB);
Expand All @@ -69,20 +81,26 @@ extern "C" {
/*
Frees memory malloc'd in twed_malloc_dev
*/
void twed_free_dvc(double *A_dev, double *TA_dev);
void twed_free_dev(double *A_dev, double *TA_dev,
double *B_dev, double *TB_dev);

void twed_free_dvcf(float *A_dev, float *TA_dev);
void twed_free_devf(float *A_dev, float *TA_dev,
float *B_dev, float *TB_dev);

/*
Copies data from host to device. You would only use this function if you
are writing logic to reuse gpu memory.
*/
void twed_copy_to_dvc(const int nA, double A[], double A_dev[], double TA[], double TA_dev[],
const int dim, const int nAA);
void twed_copy_to_dev(const int nA, double A[], double A_dev[], double TA[], double TA_dev[],
const int nB, double B[], double B_dev[], double TB[], double TB_dev[],
const int dim, const int nAA, const int nBB);

void twed_copy_to_dvcf(const int nA, float A[], float A_dev[], float TA[], float TA_dev[],
const int dim, const int nAA);
void twed_copy_to_devf(const int nA, float A[], float A_dev[], float TA[], float TA_dev[],
const int nB, float B[], float B_dev[], float TB[], float TB_dev[],
const int dim, const int nAA, const int nBB);
Expand Down
53 changes: 35 additions & 18 deletions src/cuTWED_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,40 +367,57 @@ static void grid_evalZ(const REAL_t* __restrict__ A, const REAL_t* __restrict__
extern "C" {
#endif

void _TWED_MALLOC_DVC_TIME_SERIES(const int nA, REAL_t **A_dev,
const int dim, const int nAA) {
const size_t sza = nAA*(nA+1) * sizeof(REAL_t);
HANDLE_ERROR(cudaMalloc(A_dev, sza*dim));
}

void _TWED_MALLOC_DVC_TIME_STEPS(const int nA, REAL_t **TA_dev,
const int nAA){
const size_t sza = nAA*(nA+1) * sizeof(REAL_t);
HANDLE_ERROR(cudaMalloc(TA_dev, sza));
}

void _TWED_MALLOC_DVC(const int nA, REAL_t **A_dev, REAL_t **TA_dev,
const int dim, const int nAA){
_TWED_MALLOC_DVC_TIME_SERIES(nA, A_dev, dim, nAA);
_TWED_MALLOC_DVC_TIME_STEPS(nA, TA_dev, nAA);
}

void _TWED_MALLOC_DEV(const int nA, REAL_t **A_dev, REAL_t **TA_dev,
const int nB, REAL_t **B_dev, REAL_t **TB_dev,
const int dim, const int nAA, const int nBB){
/* malloc on gpu and copy */
const size_t sza = nAA*(nA+1) * sizeof(**A_dev);
HANDLE_ERROR(cudaMalloc(A_dev, sza*dim));
HANDLE_ERROR(cudaMalloc(TA_dev, sza));

const size_t szb = nBB*(nB+1) * sizeof(**B_dev);
HANDLE_ERROR(cudaMalloc(B_dev, szb*dim));
HANDLE_ERROR(cudaMalloc(TB_dev, szb));
_TWED_MALLOC_DVC(nA, A_dev, TA_dev, dim, nAA);
_TWED_MALLOC_DVC(nB, B_dev, TB_dev, dim, nBB);
}


void _TWED_FREE_DVC(REAL_t *A_dev, REAL_t *TA_dev) {
HANDLE_ERROR(cudaFree( A_dev));
HANDLE_ERROR(cudaFree(TA_dev));
}

void _TWED_FREE_DEV(REAL_t *A_dev, REAL_t *TA_dev,
REAL_t *B_dev, REAL_t *TB_dev){
/* In a minute I'll be free */
HANDLE_ERROR(cudaFree(A_dev));
HANDLE_ERROR(cudaFree(TA_dev));
HANDLE_ERROR(cudaFree(B_dev));
HANDLE_ERROR(cudaFree(TB_dev));
_TWED_FREE_DVC(A_dev, TA_dev);
_TWED_FREE_DVC(B_dev, TB_dev);
}


void _TWED_COPY_TO_DEV(const int nA, REAL_t A[], REAL_t A_dev[], REAL_t TA[], REAL_t TA_dev[],
const int nB, REAL_t B[], REAL_t B_dev[], REAL_t TB[], REAL_t TB_dev[],
const int dim, const int nAA, const int nBB){
void _TWED_COPY_TO_DVC(const int nA, REAL_t A[], REAL_t A_dev[], REAL_t TA[], REAL_t TA_dev[],
const int dim, const int nAA){
const size_t sza = nAA*nA*sizeof(*A);
HANDLE_ERROR(cudaMemcpy(A_dev, A, sza*dim, cudaMemcpyHostToDevice));
HANDLE_ERROR(cudaMemcpy(TA_dev, TA, sza, cudaMemcpyHostToDevice));
}

const size_t szb = nBB*nB*sizeof(*B);
HANDLE_ERROR(cudaMemcpy(B_dev, B , szb*dim, cudaMemcpyHostToDevice));
HANDLE_ERROR(cudaMemcpy(TB_dev, TB, szb, cudaMemcpyHostToDevice));
void _TWED_COPY_TO_DEV(const int nA, REAL_t A[], REAL_t A_dev[], REAL_t TA[], REAL_t TA_dev[],
const int nB, REAL_t B[], REAL_t B_dev[], REAL_t TB[], REAL_t TB_dev[],
const int dim, const int nAA, const int nBB){
_TWED_COPY_TO_DVC(nA, A, A_dev, TA, TA_dev, dim, nAA);
_TWED_COPY_TO_DVC(nB, B, B_dev, TB, TB_dev, dim, nBB);
}


Expand Down