diff --git a/cuTWED/cuTWED.h.i b/cuTWED/cuTWED.h.i
index 9e0968c..4f0aee4 100644
--- a/cuTWED/cuTWED.h.i
+++ b/cuTWED/cuTWED.h.i
@@ -13,19 +13,37 @@
   float twed_devf(float A_dev[], int nA, float TA_dev[],
                   float B_dev[], int nB, float TB_dev[],
                   float nu, float lambda, int degree, int dim);
+  void twed_malloc_dvc_time_steps(const int nA, double **TA_dev,
+                       const int nAA);
+  void twed_malloc_dvc_time_series(const int nA, double **A_dev,
+                       const int dim, const int nAA);
+  void twed_malloc_dvc(const int nA, double **A_dev, double **TA_dev,
+                       const int dim, const int nAA);
   void twed_malloc_dev(const int nA, double **A_dev, double **TA_dev,
                        const int nB, double **B_dev, double **TB_dev,
                        const int dim, const int nAA, const int nBB);
+  void twed_malloc_dvc_time_stepsf(const int nA, float **TA_dev,
+                        const int nAA);
+  void twed_malloc_dvc_time_seriesf(const int nA, float **A_dev,
+                        const int dim, const int nAA);
+  void twed_malloc_dvcf(const int nA, float **A_dev, float **TA_dev,
+                        const int dim, const int nAA);
   void twed_malloc_devf(const int nA, float **A_dev, float **TA_dev,
                         const int nB, float **B_dev, float **TB_dev,
                         const int dim, const int nAA, const int nBB);
+  void twed_free_dvc(double *A_dev, double *TA_dev);
   void twed_free_dev(double *A_dev, double *TA_dev,
                      double *B_dev, double *TB_dev);
+  void twed_free_dvcf(float *A_dev, float *TA_dev);
   void twed_free_devf(float *A_dev, float *TA_dev,
                       float *B_dev, float *TB_dev);
+  void twed_copy_to_dvc(const int nA, double A[], double A_dev[], double TA[], double TA_dev[],
+                        const int dim, const int nAA);
   void twed_copy_to_dev(const int nA, double A[], double A_dev[], double TA[], double TA_dev[],
                         const int nB, double B[], double B_dev[], double TB[], double TB_dev[],
                         const int dim, const int nAA, const int nBB);
+  void twed_copy_to_dvcf(const int nA, float A[], float A_dev[], float TA[], float TA_dev[],
+                         const int dim, const int nAA);
   void twed_copy_to_devf(const int nA, float A[], float A_dev[], float TA[], float TA_dev[],
                          const int nB, float B[], float B_dev[], float TB[], float TB_dev[],
                          const int dim, const int nAA, const int nBB);
diff --git a/src/cuTWED.cu b/src/cuTWED.cu
index dd9a05b..38a5834 100644
--- a/src/cuTWED.cu
+++ b/src/cuTWED.cu
@@ -118,8 +118,13 @@ static __inline__ __host__ __device__ diagIdx_t map_rc_to_diag(int row, int col)
 */
 
 #define REAL_t double
+#define _TWED_MALLOC_DVC_TIME_STEPS  twed_malloc_dvc_time_steps
+#define _TWED_MALLOC_DVC_TIME_SERIES twed_malloc_dvc_time_series
+#define _TWED_MALLOC_DVC twed_malloc_dvc
 #define _TWED_MALLOC_DEV twed_malloc_dev
+#define _TWED_FREE_DVC twed_free_dvc
 #define _TWED_FREE_DEV twed_free_dev
+#define _TWED_COPY_TO_DVC twed_copy_to_dvc
 #define _TWED_COPY_TO_DEV twed_copy_to_dev
 #define _TWED_DEV twed_dev
 #define _TWED twed
@@ -128,8 +133,13 @@ static __inline__ __host__ __device__ diagIdx_t map_rc_to_diag(int row, int col)
 #define _GEAM cublasDgeam
 #include "cuTWED_core.h"
 #undef REAL_t
+#undef _TWED_MALLOC_DVC_TIME_STEPS
+#undef _TWED_MALLOC_DVC_TIME_SERIES
+#undef _TWED_MALLOC_DVC
 #undef _TWED_MALLOC_DEV
+#undef _TWED_FREE_DVC
 #undef _TWED_FREE_DEV
+#undef _TWED_COPY_TO_DVC
 #undef _TWED_COPY_TO_DEV
 #undef _TWED_DEV
 #undef _TWED
@@ -138,8 +148,13 @@ static __inline__ __host__ __device__ diagIdx_t map_rc_to_diag(int row, int col)
 #undef _GEAM
 
 #define REAL_t float
+#define _TWED_MALLOC_DVC_TIME_STEPS  twed_malloc_dvc_time_stepsf
+#define _TWED_MALLOC_DVC_TIME_SERIES twed_malloc_dvc_time_seriesf
+#define _TWED_MALLOC_DVC twed_malloc_dvcf
 #define _TWED_MALLOC_DEV twed_malloc_devf
+#define _TWED_FREE_DVC twed_free_dvcf
 #define _TWED_FREE_DEV twed_free_devf
+#define _TWED_COPY_TO_DVC twed_copy_to_dvcf
 #define _TWED_COPY_TO_DEV twed_copy_to_devf
 #define _TWED_DEV twed_devf
 #define _TWED twedf
@@ -148,8 +163,13 @@ static __inline__ __host__ __device__ diagIdx_t map_rc_to_diag(int row, int col)
 #define _GEAM cublasSgeam
 #include "cuTWED_core.h"
 #undef REAL_t
+#undef _TWED_MALLOC_DVC_TIME_STEPS
+#undef _TWED_MALLOC_DVC_TIME_SERIES
+#undef _TWED_MALLOC_DVC
 #undef _TWED_MALLOC_DEV
+#undef _TWED_FREE_DVC
 #undef _TWED_FREE_DEV
+#undef _TWED_COPY_TO_DVC
 #undef _TWED_COPY_TO_DEV
 #undef _TWED_DEV
 #undef _TWED
diff --git a/src/cuTWED.h b/src/cuTWED.h
index f5d0f34..69a8105 100644
--- a/src/cuTWED.h
+++ b/src/cuTWED.h
@@ -56,10 +56,22 @@ extern "C" {
   /*
     Mallocs memory on device, approximately (6*nA + 6*nB) * sizeof(REAL_t)
   */
+  void twed_malloc_dvc_time_steps(const int nA, double **TA_dev,
+                       const int nAA);
+  void twed_malloc_dvc_time_series(const int nA, double **A_dev,
+                       const int dim, const int nAA);
+  void twed_malloc_dvc(const int nA, double **A_dev, double  **TA_dev,
+                       const int dim, const int nAA);
   void twed_malloc_dev(const int nA, double **A_dev, double  **TA_dev,
                        const int nB, double **B_dev, double  **TB_dev,
                        const int dim, const int nAA, const int nBB);
 
+  void twed_malloc_dvc_time_stepsf(const int nA, float **TA_dev,
+                        const int nAA);
+  void twed_malloc_dvc_time_seriesf(const int nA, float **A_dev,
+                        const int dim, const int nAA);
+  void twed_malloc_dvcf(const int nA, float **A_dev, float  **TA_dev,
+                        const int dim, const int nAA);
   void twed_malloc_devf(const int nA, float **A_dev, float  **TA_dev,
                         const int nB, float **B_dev, float  **TB_dev,
                         const int dim, const int nAA, const int nBB);
@@ -69,9 +81,11 @@ extern "C" {
   /*
     Frees memory malloc'd in twed_malloc_dev
   */
+  void twed_free_dvc(double *A_dev, double  *TA_dev);
   void twed_free_dev(double *A_dev, double  *TA_dev,
                      double *B_dev, double  *TB_dev);
 
+  void twed_free_dvcf(float *A_dev, float  *TA_dev);
   void twed_free_devf(float *A_dev, float  *TA_dev,
                       float *B_dev, float  *TB_dev);
 
@@ -79,10 +93,14 @@ extern "C" {
     Copies data from host to device. You would only use this function if you
     are writing logic to reuse gpu memory.
   */
+  void twed_copy_to_dvc(const int nA, double A[], double A_dev[], double TA[], double TA_dev[],
+                        const int dim, const int nAA);
   void twed_copy_to_dev(const int nA, double A[], double A_dev[], double TA[], double TA_dev[],
                         const int nB, double B[], double B_dev[], double TB[], double TB_dev[],
                         const int dim, const int nAA, const int nBB);
 
+  void twed_copy_to_dvcf(const int nA, float A[], float A_dev[], float TA[], float TA_dev[],
+                         const int dim, const int nAA);
   void twed_copy_to_devf(const int nA, float A[], float A_dev[], float TA[], float TA_dev[],
                          const int nB, float B[], float B_dev[], float TB[], float TB_dev[],
                          const int dim, const int nAA, const int nBB);
diff --git a/src/cuTWED_core.h b/src/cuTWED_core.h
index 851b9a1..9eb14ef 100644
--- a/src/cuTWED_core.h
+++ b/src/cuTWED_core.h
@@ -367,40 +367,57 @@ static void grid_evalZ(const REAL_t* __restrict__ A, const REAL_t* __restrict__
 extern "C" {
 #endif
 
+  void _TWED_MALLOC_DVC_TIME_SERIES(const int nA, REAL_t **A_dev,
+                        const int dim, const int nAA) {
+    const size_t sza = nAA*(nA+1) * sizeof(REAL_t);
+    HANDLE_ERROR(cudaMalloc(A_dev, sza*dim));
+  }
+
+  void _TWED_MALLOC_DVC_TIME_STEPS(const int nA, REAL_t  **TA_dev,
+                        const int nAA){
+    const size_t sza = nAA*(nA+1) * sizeof(REAL_t);
+    HANDLE_ERROR(cudaMalloc(TA_dev, sza));
+  }
+
+  void _TWED_MALLOC_DVC(const int nA, REAL_t **A_dev, REAL_t  **TA_dev,
+                        const int dim, const int nAA){
+    _TWED_MALLOC_DVC_TIME_SERIES(nA, A_dev, dim, nAA);
+    _TWED_MALLOC_DVC_TIME_STEPS(nA, TA_dev, nAA);
+  }
+
   void _TWED_MALLOC_DEV(const int nA, REAL_t **A_dev, REAL_t  **TA_dev,
                         const int nB, REAL_t **B_dev, REAL_t  **TB_dev,
                         const int dim, const int nAA, const int nBB){
     /* malloc on gpu and copy */
-    const size_t sza = nAA*(nA+1) * sizeof(**A_dev);
-    HANDLE_ERROR(cudaMalloc(A_dev, sza*dim));
-    HANDLE_ERROR(cudaMalloc(TA_dev, sza));
-
-    const size_t szb = nBB*(nB+1) * sizeof(**B_dev);
-    HANDLE_ERROR(cudaMalloc(B_dev, szb*dim));
-    HANDLE_ERROR(cudaMalloc(TB_dev, szb));
+    _TWED_MALLOC_DVC(nA, A_dev, TA_dev, dim, nAA);
+    _TWED_MALLOC_DVC(nB, B_dev, TB_dev, dim, nBB);
   }
 
 
+  void _TWED_FREE_DVC(REAL_t *A_dev, REAL_t  *TA_dev) {
+    HANDLE_ERROR(cudaFree( A_dev));
+    HANDLE_ERROR(cudaFree(TA_dev));
+  }
+
   void _TWED_FREE_DEV(REAL_t *A_dev, REAL_t  *TA_dev,
                       REAL_t *B_dev, REAL_t  *TB_dev){
     /* In a minute I'll be free */
-    HANDLE_ERROR(cudaFree(A_dev));
-    HANDLE_ERROR(cudaFree(TA_dev));
-    HANDLE_ERROR(cudaFree(B_dev));
-    HANDLE_ERROR(cudaFree(TB_dev));
+    _TWED_FREE_DVC(A_dev, TA_dev);
+    _TWED_FREE_DVC(B_dev, TB_dev);
   }
 
-
-  void _TWED_COPY_TO_DEV(const int nA, REAL_t A[], REAL_t A_dev[], REAL_t TA[], REAL_t TA_dev[],
-                         const int nB, REAL_t B[], REAL_t B_dev[], REAL_t TB[], REAL_t TB_dev[],
-                         const int dim, const int nAA, const int nBB){
+  void _TWED_COPY_TO_DVC(const int nA, REAL_t A[], REAL_t A_dev[], REAL_t TA[], REAL_t TA_dev[],
+                         const int dim, const int nAA){
     const size_t sza = nAA*nA*sizeof(*A);
     HANDLE_ERROR(cudaMemcpy(A_dev, A, sza*dim, cudaMemcpyHostToDevice));
     HANDLE_ERROR(cudaMemcpy(TA_dev, TA, sza, cudaMemcpyHostToDevice));
+  }
 
-    const size_t szb = nBB*nB*sizeof(*B);
-    HANDLE_ERROR(cudaMemcpy(B_dev, B , szb*dim, cudaMemcpyHostToDevice));
-    HANDLE_ERROR(cudaMemcpy(TB_dev, TB, szb, cudaMemcpyHostToDevice));
+  void _TWED_COPY_TO_DEV(const int nA, REAL_t A[], REAL_t A_dev[], REAL_t TA[], REAL_t TA_dev[],
+                         const int nB, REAL_t B[], REAL_t B_dev[], REAL_t TB[], REAL_t TB_dev[],
+                         const int dim, const int nAA, const int nBB){
+    _TWED_COPY_TO_DVC(nA, A, A_dev, TA, TA_dev, dim, nAA);
+    _TWED_COPY_TO_DVC(nB, B, B_dev, TB, TB_dev, dim, nBB);
   }