From 3ace667c26bd97882456ebc90e258d2fa9f3020f Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Thu, 6 Dec 2018 03:59:56 +0700 Subject: [PATCH 01/83] SpatialMeshCu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit код инициализации памяти и задания начальных условий и гран условия --- SpatialMeshCu.cu | 442 ++++++++++++++++++++++++++++++++++++++++++++++ SpatialMeshCu.cuh | 86 +++++++++ 2 files changed, 528 insertions(+) create mode 100644 SpatialMeshCu.cu create mode 100644 SpatialMeshCu.cuh diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu new file mode 100644 index 0000000..276129d --- /dev/null +++ b/SpatialMeshCu.cu @@ -0,0 +1,442 @@ +#include "SpatialMeshCu.cuh" +#include "device_launch_parameters.h" + +SpatialMeshCu::SpatialMeshCu(Config &conf) +{ + check_correctness_of_related_config_fields(conf); + init_x_grid(conf); + init_y_grid(conf); + init_z_grid(conf); + init_constants(conf); + allocate_ongrid_values(); + fill_node_coordinates(); + set_boundary_conditions(conf); +} + +void SpatialMeshCu::check_correctness_of_related_config_fields(Config &conf) +{ + grid_x_size_gt_zero(conf); + grid_x_step_gt_zero_le_grid_x_size(conf); + grid_y_size_gt_zero(conf); + grid_y_step_gt_zero_le_grid_y_size(conf); + grid_z_size_gt_zero(conf); + grid_z_step_gt_zero_le_grid_z_size(conf); +} + +void SpatialMeshCu::init_x_grid(Config &conf) +{ + //x_volume_size = conf.mesh_config_part.grid_x_size; + //x_n_nodes = + // ceil(conf.mesh_config_part.grid_x_size / conf.mesh_config_part.grid_x_step) + 1; + //x_cell_size = x_volume_size / (x_n_nodes - 1); + //if (x_cell_size != conf.mesh_config_part.grid_x_step) { + // std::cout.precision(3); + // std::cout << "X_step was shrinked to " << x_cell_size + // << " from " << conf.mesh_config_part.grid_x_step + // << " to fit round number of cells" << std::endl; + //} + //return; +} + +void SpatialMeshCu::init_y_grid(Config &conf) +{ + //y_volume_size = conf.mesh_config_part.grid_y_size; + //y_n_nodes = + // ceil(conf.mesh_config_part.grid_y_size / conf.mesh_config_part.grid_y_step) + 1; + //y_cell_size = y_volume_size / (y_n_nodes - 1); + //if (y_cell_size != conf.mesh_config_part.grid_y_step) { + // std::cout.precision(3); + // std::cout << "Y_step was shrinked to " << y_cell_size + // << " from " << conf.mesh_config_part.grid_y_step + // << " to fit round number of cells." << std::endl; + //} + //return; +} + +void SpatialMeshCu::init_z_grid(Config &conf) +{ + //z_volume_size = conf.mesh_config_part.grid_z_size; + //z_n_nodes = + // ceil(conf.mesh_config_part.grid_z_size / conf.mesh_config_part.grid_z_step) + 1; + //z_cell_size = z_volume_size / (z_n_nodes - 1); + //if (z_cell_size != conf.mesh_config_part.grid_z_step) { + // std::cout.precision(3); + // std::cout << "Z_step was shrinked to " << z_cell_size + // << " from " << conf.mesh_config_part.grid_z_step + // << " to fit round number of cells." << std::endl; + //} + //return; +} + +void SpatialMeshCu::init_constants(Config & conf) +{ + n_nodes = dim3( + ceil(conf.mesh_config_part.grid_x_size / conf.mesh_config_part.grid_x_step) + 1, + ceil(conf.mesh_config_part.grid_y_size / conf.mesh_config_part.grid_y_step) + 1, + ceil(conf.mesh_config_part.grid_z_size / conf.mesh_config_part.grid_z_step) + 1 + ); + cudaMemcpyToSymbol((void*)&d_n_nodes, (void*)&n_nodes,sizeof(double3),cudaMemcpyHostToDevice); + + double3 volume_size = make_double3( + conf.mesh_config_part.grid_x_size, + conf.mesh_config_part.grid_y_size, + conf.mesh_config_part.grid_z_size + ); + cudaMemcpyToSymbol((void*)& d_volume_size, (void*)& volume_size, sizeof(double3), cudaMemcpyHostToDevice); + + double3 cell_size = make_double3( + volume_size.x / (n_nodes.x - 1), + volume_size.y / (n_nodes.y - 1), + volume_size.z / (n_nodes.z - 1) + ); + cudaMemcpyToSymbol((void*)& d_volume_size, (void*)& volume_size, sizeof(double3), cudaMemcpyHostToDevice); + + ///TODO Border constants init +} + +void SpatialMeshCu::allocate_ongrid_values() +{ + //TODO + int nx = n_nodes.x; + int ny = n_nodes.y; + int nz = n_nodes.z; + + size_t total_node_count = nx * ny * nz; + + cudaMalloc(&dev_node_coordinates, total_node_count); + cudaMalloc(&dev_charge_density, total_node_count); + cudaMalloc(&dev_potential, total_node_count); + cudaMalloc(&dev_electric_field, total_node_count); + + + //node_coordinates.resize(boost::extents[nx][ny][nz]); + //charge_density.resize(boost::extents[nx][ny][nz]); + //potential.resize(boost::extents[nx][ny][nz]); + //electric_field.resize(boost::extents[nx][ny][nz]); + + return; +} + +void SpatialMeshCu::fill_node_coordinates() +{ + dim3 threads = GetThreads(); + dim3 blocks = GetBlocks(threads); + + <<>> fill_coordinates(dev_node_coordinates); +} + + + +__global__ void SpatialMeshCu::fill_coordinates(double3* node_coordinates) { + + int idx = GetIdxVolume(); + + int x = threadIdx.x + blockIdx.x*blockDim.x; + int y = threadIdx.y + blockIdx.y*blockDim.y; + int z = threadIdx.z + blockIdx.z*blockDim.z; + node_coordinates[idx] = make_double3(d_volume_size.x * x, d_volume_size.y * y, d_volume_size.z * z);//(double)., +} +void SpatialMeshCu::clear_old_density_values() +{ + //std::fill(charge_density.data(), + // charge_density.data() + charge_density.num_elements(), + // 0.0); + + //return; +} + + +void SpatialMeshCu::set_boundary_conditions(Config &conf) +{ + set_boundary_conditions(conf.boundary_config_part.boundary_phi_left, + conf.boundary_config_part.boundary_phi_right, + conf.boundary_config_part.boundary_phi_top, + conf.boundary_config_part.boundary_phi_bottom, + conf.boundary_config_part.boundary_phi_near, + conf.boundary_config_part.boundary_phi_far); +} + + +void SpatialMeshCu::set_boundary_conditions(const double phi_left, const double phi_right, + const double phi_top, const double phi_bottom, + const double phi_near, const double phi_far) +{ + dim3 blockSize = dim3(16, 16, 2); + + dim3 gridSize = dim3(n_nodes.y / 16, n_nodes.z / 16, 1); + <<>> SetBoundaryConditionOrthoX(dev_potential); + + gridSize = dim3(n_nodes.x / 16, n_nodes.z / 16, 2); + <<>> SetBoundaryConditionOrthoY(dev_potential); + + gridSize = dim3(n_nodes.x / 16, n_nodes.y / 16, 2); + <<>> SetBoundaryConditionOrthoZ(dev_potential); + //for (int j = 0; j < ny; j++) { + // for (int k = 0; k < nz; k++) { + // potential[0][j][k] = phi_right; + // potential[nx - 1][j][k] = phi_left; + // } + //} + + //for (int i = 0; i < nx; i++) { + // for (int j = 0; j < ny; j++) { + // potential[i][j][0] = phi_near; + // potential[i][j][nz - 1] = phi_far; + // } + //} + + return; +} + +bool SpatialMeshCu::is_potential_equal_on_boundaries() +{ + //bool equal = (potential[0][2][2] == potential[x_n_nodes - 1][2][2] == + // potential[2][0][2] == potential[2][y_n_nodes - 1][2] == + // potential[2][2][0] == potential[2][2][z_n_nodes - 1]); + // possible to rewrite to avoid warnings from compiler: + // bool equal = ( potential[0][2][2] == potential[x_n_nodes-1][2][2] ); + // equal = equal and ( potential[x_n_nodes-1][2][2] == potential[2][0][2] ); + // equal = equal and ( potential[2][0][2] == potential[2][y_n_nodes-1][2] ); + // equal = equal and ( potential[2][y_n_nodes-1][2] == potential[2][2][0] ); + // equal = equal and ( potential[2][2][0] == potential[2][2][z_n_nodes-1] ); + //return equal; + return false; +} + +void SpatialMeshCu::print() +{ + print_grid(); + print_ongrid_values(); + return; +} + +void SpatialMeshCu::print_grid() +{ + //std::cout << "Grid:" << std::endl; + //std::cout << "Length: x = " << x_volume_size << ", " + // << "y = " << y_volume_size << ", " + // << "z = " << z_volume_size << std::endl; + //std::cout << "Cell size: x = " << x_cell_size << ", " + // << "y = " << y_cell_size << ", " + // << "z = " << z_cell_size << std::endl; + //std::cout << "Total nodes: x = " << x_n_nodes << ", " + // << "y = " << y_n_nodes << ", " + // << "z = " << z_n_nodes << std::endl; + //return; +} + +void SpatialMeshCu::print_ongrid_values() +{ + //int nx = x_n_nodes; + //int ny = y_n_nodes; + //int nz = z_n_nodes; + //std::cout << "x_node, y_node, z_node, charge_density, potential, electric_field(x,y,z)" << std::endl; + //std::cout.precision(3); + //std::cout.setf(std::ios::scientific); + //std::cout.fill(' '); + //std::cout.setf(std::ios::right); + //for (int i = 0; i < nx; i++) { + // for (int j = 0; j < ny; j++) { + // for (int k = 0; k < nz; k++) { + // std::cout << std::setw(8) << i + // << std::setw(8) << j + // << std::setw(8) << k + // << std::setw(14) << charge_density[i][j][k] + // << std::setw(14) << potential[i][j][k] + // << std::setw(14) << vec3d_x(electric_field[i][j][k]) + // << std::setw(14) << vec3d_y(electric_field[i][j][k]) + // << std::setw(14) << vec3d_z(electric_field[i][j][k]) + // << std::endl; + // } + // } + //} + //return; +} + +void SpatialMeshCu::grid_x_size_gt_zero(Config &conf) +{ + check_and_exit_if_not(conf.mesh_config_part.grid_x_size > 0, + "grid_x_size < 0"); +} + +void SpatialMeshCu::grid_x_step_gt_zero_le_grid_x_size(Config &conf) +{ + check_and_exit_if_not( + (conf.mesh_config_part.grid_x_step > 0) && + (conf.mesh_config_part.grid_x_step <= conf.mesh_config_part.grid_x_size), + "grid_x_step < 0 or grid_x_step >= grid_x_size"); +} + +void SpatialMeshCu::grid_y_size_gt_zero(Config &conf) +{ + check_and_exit_if_not(conf.mesh_config_part.grid_y_size > 0, + "grid_y_size < 0"); +} + +void SpatialMeshCu::grid_y_step_gt_zero_le_grid_y_size(Config &conf) +{ + check_and_exit_if_not( + (conf.mesh_config_part.grid_y_step > 0) && + (conf.mesh_config_part.grid_y_step <= conf.mesh_config_part.grid_y_size), + "grid_y_step < 0 or grid_y_step >= grid_y_size"); +} + +void SpatialMeshCu::grid_z_size_gt_zero(Config &conf) +{ + check_and_exit_if_not(conf.mesh_config_part.grid_z_size > 0, + "grid_z_size < 0"); +} + +void SpatialMeshCu::grid_z_step_gt_zero_le_grid_z_size(Config &conf) +{ + check_and_exit_if_not( + (conf.mesh_config_part.grid_z_step > 0) && + (conf.mesh_config_part.grid_z_step <= conf.mesh_config_part.grid_z_size), + "grid_z_step < 0 or grid_z_step >= grid_z_size"); +} + + +void SpatialMeshCu::check_and_exit_if_not(const bool &should_be, const std::string &message) +{ + //if (!should_be) { + // std::cout << "Error: " << message << std::endl; + // exit(EXIT_FAILURE); + //} + //return; +} + +double SpatialMeshCu::node_number_to_coordinate_x(int i) +{ + //if (i >= 0 && i < x_n_nodes) { + // return i * x_cell_size; + //} + //else { + // printf("invalid node number i=%d at node_number_to_coordinate_x\n", i); + // exit(EXIT_FAILURE); + //} +} + +double SpatialMeshCu::node_number_to_coordinate_y(int j) +{ + //if (j >= 0 && j < y_n_nodes) { + // return j * y_cell_size; + //} + //else { + // printf("invalid node number j=%d at node_number_to_coordinate_y\n", j); + // exit(EXIT_FAILURE); + //} +} + +double SpatialMeshCu::node_number_to_coordinate_z(int k) +{ + //if (k >= 0 && k < z_n_nodes) { + // return k * z_cell_size; + //} + //else { + // printf("invalid node number k=%d at node_number_to_coordinate_z\n", k); + // exit(EXIT_FAILURE); + //} +} + +dim3 SpatialMeshCu::GetThreads() { + return dim3(16, 16, d_n_nodes.z / 16); +} + +dim3 SpatialMeshCu::GetBlocks(dim3 nThreads) { + return dim3(d_n_nodes.x / nThreads.x, d_n_nodes.y / nThreads.y, 16); +} + +__device__ int SpatialMeshCu::GetIdxVolume() { + //int xStepthread = 1; + int xStepBlock = blockDim.x; + + int yStepThread = d_n_nodes.x; + int yStepBlock = yStepThread * blockDim.y; + + int zStepThread = d_n_nodes.x * d_n_nodes.y; + int zStepBlock = zStepThread * blockDim.z; + + return threadIdx.x + blockIdx.x*xStepBlock + + threadIdx.y*yStepThread + blockIdx.y*yStepBlock + + threadIdx.z*zStepThread + blockIdx.z*zStepBlock; +} + +__global__ void SpatialMeshCu::SetBoundaryConditionOrthoX(double* potential) { + int xIdx = blockIdx.z*(d_n_nodes.x - 1); //0 or nodes.x-1 + + + int yStepThread = d_n_nodes.x;//x= + int yStepBlock = d_n_nodes.x * blockDim.x; + + int zStepThread = d_n_nodes.x * d_n_nodes.y; + int zStepBlock = zStepThread * blockDim.y; + + int idx = xIdx + + threadIdx.x*yStepThread + blockIdx.x*yStepBlock + + threadIdx.y*zStepThread + blockIdx.y*zStepBlock; + + potential[idx] = ((double)(1 - blockIdx.z))*d_bot_border + (blockIdx.z*d_up_border); + +} + +// +__global__ void SpatialMeshCu::SetBoundaryConditionOrthoX(double* potential) { + int xIdx = blockIdx.z*(d_n_nodes.x - 1); //0 or nodes.x-1 + + + int yStepThread = d_n_nodes.x;//x= + int yStepBlock = d_n_nodes.x * blockDim.x; + + int zStepThread = d_n_nodes.x * d_n_nodes.y; + int zStepBlock = zStepThread * blockDim.y; + + int idx = xIdx + + threadIdx.x*yStepThread + blockIdx.x*yStepBlock + + threadIdx.y*zStepThread + blockIdx.y*zStepBlock; + // if + potential[idx] = ((double)(1 - blockIdx.z)) * d_left_border + (blockIdx.z*d_right_border); + +} + +__global__ void SpatialMeshCu::SetBoundaryConditionOrthoY(double* potential) { + int yIdx = blockIdx.z * d_n_nodes.x*(d_n_nodes.y - 1); //0 or nodes.x-1 + + + int xStepThread = 1;//x= + int xStepBlock = blockDim.x; + + int zStepThread = d_n_nodes.x * d_n_nodes.y; + int zStepBlock = zStepThread * blockDim.y; + + int idx = yIdx + + threadIdx.x*xStepThread + blockIdx.x*xStepBlock + + threadIdx.y*zStepThread + blockIdx.y*zStepBlock; + // if + potential[idx] = ((double)(1 - blockIdx.z)) * d_bot_border + (blockIdx.z * d_up_border); + +} + +__global__ void SpatialMeshCu::SetBoundaryConditionOrthoZ(double* potential) { + int zIdx = blockIdx.z * (d_n_nodes.x * d_n_nodes.y * (d_n_nodes.z - 1)); //0 or nodes.x-1 + + + int xStepThread = 1;//x= + int xStepBlock = blockDim.x; + + int yStepThread = d_n_nodes.x; + int yStepBlock = yStepThread * blockDim.y; + + int idx = zIdx + + threadIdx.x*xStepThread + blockIdx.x*xStepBlock + + threadIdx.y*yStepThread + blockIdx.y*yStepBlock; + // if + potential[idx] = ((double)(1 - blockIdx.z)) * d_near_border + (blockIdx.z * d_far_border); + +} + +SpatialMeshCu::~SpatialMeshCu() { + cudaFree((void*)dev_node_coordinates); + cudaFree((void*)dev_potential); + cudaFree((void*)dev_charge_density); + cudaFree((void*)dev_electric_field); +} diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh new file mode 100644 index 0000000..30006b0 --- /dev/null +++ b/SpatialMeshCu.cuh @@ -0,0 +1,86 @@ +#include "cuda_runtime.h" + +//thread idx block idx^ +#include "config.h" +#include +//#include "hdf5.h" +//#include "hdf5_hl.h" +class SpatialMeshCu { +public: + __constant__ double3 d_volume_size; + __constant__ double3 d_cell_size; + __constant__ dim3 d_n_nodes; + + __constant__ double d_up_border; + __constant__ double d_bot_border; + + __constant__ double d_left_border; + __constant__ double d_right_border; + + __constant__ double d_far_border; + __constant__ double d_near_border; + + + dim3 n_nodes; + double3 *dev_node_coordinates; + double *dev_charge_density; + double *dev_potential; + double3 *dev_electric_field; + + + //boost::multi_array node_coordinates; + //boost::multi_array charge_density; + //boost::multi_array potential; + //boost::multi_array electric_field; +public: + SpatialMeshCu(Config &conf); + + //Spatial_mesh(hid_t h5_spat_mesh_group); + void clear_old_density_values(); + void set_boundary_conditions(Config &conf); + bool is_potential_equal_on_boundaries(); + void print(); + //void write_to_file(hid_t hdf5_file_id); + virtual ~SpatialMeshCu(); + double node_number_to_coordinate_x(int i); + double node_number_to_coordinate_y(int j); + double node_number_to_coordinate_z(int k); +private: + // init + void check_correctness_of_related_config_fields(Config &conf); + void init_x_grid(Config &conf); + void init_y_grid(Config &conf); + void init_z_grid(Config &conf); + void init_constants(Config &conf); + void allocate_ongrid_values(); + void fill_node_coordinates(); + void set_boundary_conditions(const double phi_left, const double phi_right, + const double phi_top, const double phi_bottom, + const double phi_near, const double phi_far); + // print + void print_grid(); + void print_ongrid_values(); + // write hdf5 + //void write_hdf5_attributes(hid_t group_id); + //void write_hdf5_ongrid_values(hid_t group_id); + int n_of_elements_to_write_for_each_process_for_1d_dataset(int total_elements); + int data_offset_for_each_process_for_1d_dataset(int total_elements); + //void hdf5_status_check(herr_t status); + // config check + void grid_x_size_gt_zero(Config &conf); + void grid_x_step_gt_zero_le_grid_x_size(Config &conf); + void grid_y_size_gt_zero(Config &conf); + void grid_y_step_gt_zero_le_grid_y_size(Config &conf); + void grid_z_size_gt_zero(Config &conf); + void grid_z_step_gt_zero_le_grid_z_size(Config &conf); + void check_and_exit_if_not(const bool &should_be, const std::string &message); + + dim3 GetThreads(); + dim3 GetBlocks(dim3 nThreads); + + __global__ void fill_coordinates(double3* node_coordinates); + __device__ int GetIdxVolume(); + __global__ void SetBoundaryConditionOrthoX(double* potential); + __global__ void SetBoundaryConditionOrthoY(double* potential); + __global__ void SetBoundaryConditionOrthoZ(double* potential); +}; From 7ec2ad5e8190ecd20e90b90a36942622d1bfa97d Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Wed, 12 Dec 2018 01:14:07 +0700 Subject: [PATCH 02/83] MAke try --- Makefile | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 2c7781a..4f48186 100644 --- a/Makefile +++ b/Makefile @@ -4,20 +4,29 @@ SHELL:=/bin/bash -O extglob ##### Compilers #CC=clang++ CC=g++ -HDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_BSD_SOURCE -D_FORTIFY_SOURCE=2 -g -fstack-protector-strong -Wformat -Werror=format-security +NVCC=nvcc + +HDF5FLAGS=-I/usr/local/hdf5/include -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -fstack-protector-strong -Wformat -Werror=format-security WARNINGS=-Wall CFLAGS = ${HDF5FLAGS} -O2 -std=c++11 ${WARNINGS} LDFLAGS = +CUDAFLAGS= -I/usr/local/cuda10/include -std=c++11 -arch=sm30 + ### Libraries COMMONLIBS=-lm BOOSTLIBS=-lboost_program_options -HDF5LIBS=-L/usr/lib/x86_64-linux-gnu/hdf5/serial -lhdf5_hl -lhdf5 -Wl,-z,relro -lpthread -lz -ldl -lm -Wl,-rpath -Wl,/usr/lib/x86_64-linux-gnu/hdf5/serial +HDF5LIBS=-L/usr/local/hdf5/lib -lhdf5_hl -lhdf5 -Wl,-z,relro -lpthread -lz -ldl -lm -Wl,-rpath -Wl,/usr/local/hdf5/lib +CUDALIBS=-L/usr/local/cuda10/lib64/ LIBS=${COMMONLIBS} ${BOOSTLIBS} ${HDF5LIBS} ### Sources and executable CPPSOURCES=$(wildcard *.cpp) CPPHEADERS=$(wildcard *.h) +CUSOURCES=$(wildcard *.cu) + +CUOBJECTS=$(CUSOURCES:%.cu=%.o) + OBJECTS=$(CPPSOURCES:%.cpp=%.o) EXECUTABLE=ef.out MAKE=make @@ -25,9 +34,10 @@ TINYEXPR=./lib/tinyexpr TINYEXPR_OBJ=./lib/tinyexpr/tinyexpr.o SUBDIRS=doc -$(EXECUTABLE): $(OBJECTS) $(TINYEXPR) - $(CC) $(LDFLAGS) $(OBJECTS) $(TINYEXPR_OBJ) -o $@ $(LIBS) - +$(EXECUTABLE): $(OBJECTS) $(TINYEXPR) $(CUOBJECTS) + $(CC) $(LDFLAGS) $(OBJECTS) $(TINYEXPR_OBJ) $(CUOBJECTS) -o $@ $(LIBS) $(CUDALIBS) +$(CUOBJECTS):%.o%.cu + $(NVCC) $(CUDAFLAGS) -c $< -o $@ $(OBJECTS):%.o:%.cpp $(CPPHEADERS) $(CC) $(CFLAGS) -c $< -o $@ From 06feba5a1a018762d1a64f9bb8ee0bb355f5a498 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Wed, 12 Dec 2018 01:41:56 +0700 Subject: [PATCH 03/83] fix sythax --- Makefile | 8 +- SpatialMeshCu.cu | 257 +++++++++++++++++++++++----------------------- SpatialMeshCu.cuh | 23 ----- 3 files changed, 133 insertions(+), 155 deletions(-) diff --git a/Makefile b/Makefile index 4f48186..58f5319 100644 --- a/Makefile +++ b/Makefile @@ -4,14 +4,14 @@ SHELL:=/bin/bash -O extglob ##### Compilers #CC=clang++ CC=g++ -NVCC=nvcc +NVCC=/usr/local/cuda10/bin/nvcc HDF5FLAGS=-I/usr/local/hdf5/include -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -fstack-protector-strong -Wformat -Werror=format-security WARNINGS=-Wall CFLAGS = ${HDF5FLAGS} -O2 -std=c++11 ${WARNINGS} LDFLAGS = -CUDAFLAGS= -I/usr/local/cuda10/include -std=c++11 -arch=sm30 +CUDAFLAGS= -I/usr/local/cuda10/include -std=c++11 -arch=sm_30 ### Libraries COMMONLIBS=-lm @@ -24,10 +24,12 @@ LIBS=${COMMONLIBS} ${BOOSTLIBS} ${HDF5LIBS} CPPSOURCES=$(wildcard *.cpp) CPPHEADERS=$(wildcard *.h) CUSOURCES=$(wildcard *.cu) +CUHEADERS=$(wildcard *.cuh) CUOBJECTS=$(CUSOURCES:%.cu=%.o) OBJECTS=$(CPPSOURCES:%.cpp=%.o) + EXECUTABLE=ef.out MAKE=make TINYEXPR=./lib/tinyexpr @@ -36,7 +38,7 @@ SUBDIRS=doc $(EXECUTABLE): $(OBJECTS) $(TINYEXPR) $(CUOBJECTS) $(CC) $(LDFLAGS) $(OBJECTS) $(TINYEXPR_OBJ) $(CUOBJECTS) -o $@ $(LIBS) $(CUDALIBS) -$(CUOBJECTS):%.o%.cu +$(CUOBJECTS):%.o:%.cu $(CUHEADERS) $(NVCC) $(CUDAFLAGS) -c $< -o $@ $(OBJECTS):%.o:%.cpp $(CPPHEADERS) $(CC) $(CFLAGS) -c $< -o $@ diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 276129d..4e65124 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -1,6 +1,109 @@ #include "SpatialMeshCu.cuh" #include "device_launch_parameters.h" + __constant__ double3 d_volume_size; + __constant__ double3 d_cell_size; + __constant__ dim3 d_n_nodes; + + __constant__ double d_up_border; + __constant__ double d_bot_border; + + __constant__ double d_left_border; + __constant__ double d_right_border; + + __constant__ double d_far_border; + __constant__ double d_near_border; + +dim3 SpatialMeshCu::GetThreads() { + return dim3(16, 16, d_n_nodes.z / 16); +} + +dim3 SpatialMeshCu::GetBlocks(dim3 nThreads) { + return dim3(d_n_nodes.x / nThreads.x, d_n_nodes.y / nThreads.y, 16); +} + +__device__ int GetIdxVolume() { + //int xStepthread = 1; + int xStepBlock = blockDim.x; + + int yStepThread = d_n_nodes.x; + int yStepBlock = yStepThread * blockDim.y; + + int zStepThread = d_n_nodes.x * d_n_nodes.y; + int zStepBlock = zStepThread * blockDim.z; + + return threadIdx.x + blockIdx.x*xStepBlock + + threadIdx.y*yStepThread + blockIdx.y*yStepBlock + + threadIdx.z*zStepThread + blockIdx.z*zStepBlock; +} + +__global__ void fill_coordinates(double3* node_coordinates) { + + int idx = GetIdxVolume(); + + int x = threadIdx.x + blockIdx.x*blockDim.x; + int y = threadIdx.y + blockIdx.y*blockDim.y; + int z = threadIdx.z + blockIdx.z*blockDim.z; + node_coordinates[idx] = make_double3(d_volume_size.x * x, d_volume_size.y * y, d_volume_size.z * z);//(double)., +} + + + + +__global__ void SetBoundaryConditionOrthoX(double* potential) { + int xIdx = blockIdx.z*(d_n_nodes.x - 1); //0 or nodes.x-1 + + + int yStepThread = d_n_nodes.x;//x= + int yStepBlock = d_n_nodes.x * blockDim.x; + + int zStepThread = d_n_nodes.x * d_n_nodes.y; + int zStepBlock = zStepThread * blockDim.y; + + int idx = xIdx + + threadIdx.x*yStepThread + blockIdx.x*yStepBlock + + threadIdx.y*zStepThread + blockIdx.y*zStepBlock; + + potential[idx] = ((double)(1 - blockIdx.z)) * d_left_border + (blockIdx.z*d_right_border); + +} + +__global__ void SetBoundaryConditionOrthoY(double* potential) { + int yIdx = blockIdx.z * d_n_nodes.x*(d_n_nodes.y - 1); //0 or nodes.x-1 + + + int xStepThread = 1;//x= + int xStepBlock = blockDim.x; + + int zStepThread = d_n_nodes.x * d_n_nodes.y; + int zStepBlock = zStepThread * blockDim.y; + + int idx = yIdx + + threadIdx.x*xStepThread + blockIdx.x*xStepBlock + + threadIdx.y*zStepThread + blockIdx.y*zStepBlock; + + potential[idx] = ((double)(1 - blockIdx.z)) * d_bot_border + (blockIdx.z * d_up_border); + +} + +__global__ void SetBoundaryConditionOrthoZ(double* potential) { + int zIdx = blockIdx.z * (d_n_nodes.x * d_n_nodes.y * (d_n_nodes.z - 1)); //0 or nodes.x-1 + + + int xStepThread = 1;//x= + int xStepBlock = blockDim.x; + + int yStepThread = d_n_nodes.x; + int yStepBlock = yStepThread * blockDim.y; + + int idx = zIdx + + threadIdx.x*xStepThread + blockIdx.x*xStepBlock + + threadIdx.y*yStepThread + blockIdx.y*yStepBlock; + + potential[idx] = ((double)(1 - blockIdx.z)) * d_near_border + (blockIdx.z * d_far_border); + +} + SpatialMeshCu::SpatialMeshCu(Config &conf) { check_correctness_of_related_config_fields(conf); @@ -122,20 +225,12 @@ void SpatialMeshCu::fill_node_coordinates() dim3 threads = GetThreads(); dim3 blocks = GetBlocks(threads); - <<>> fill_coordinates(dev_node_coordinates); + fill_coordinates<<>>(dev_node_coordinates); } -__global__ void SpatialMeshCu::fill_coordinates(double3* node_coordinates) { - - int idx = GetIdxVolume(); - int x = threadIdx.x + blockIdx.x*blockDim.x; - int y = threadIdx.y + blockIdx.y*blockDim.y; - int z = threadIdx.z + blockIdx.z*blockDim.z; - node_coordinates[idx] = make_double3(d_volume_size.x * x, d_volume_size.y * y, d_volume_size.z * z);//(double)., -} void SpatialMeshCu::clear_old_density_values() { //std::fill(charge_density.data(), @@ -164,13 +259,13 @@ void SpatialMeshCu::set_boundary_conditions(const double phi_left, const double dim3 blockSize = dim3(16, 16, 2); dim3 gridSize = dim3(n_nodes.y / 16, n_nodes.z / 16, 1); - <<>> SetBoundaryConditionOrthoX(dev_potential); + SetBoundaryConditionOrthoX <<>> (dev_potential); gridSize = dim3(n_nodes.x / 16, n_nodes.z / 16, 2); - <<>> SetBoundaryConditionOrthoY(dev_potential); + SetBoundaryConditionOrthoY <<>> (dev_potential); gridSize = dim3(n_nodes.x / 16, n_nodes.y / 16, 2); - <<>> SetBoundaryConditionOrthoZ(dev_potential); + SetBoundaryConditionOrthoZ <<>> (dev_potential); //for (int j = 0; j < ny; j++) { // for (int k = 0; k < nz; k++) { // potential[0][j][k] = phi_right; @@ -307,131 +402,35 @@ void SpatialMeshCu::check_and_exit_if_not(const bool &should_be, const std::stri double SpatialMeshCu::node_number_to_coordinate_x(int i) { - //if (i >= 0 && i < x_n_nodes) { - // return i * x_cell_size; - //} - //else { - // printf("invalid node number i=%d at node_number_to_coordinate_x\n", i); - // exit(EXIT_FAILURE); - //} + if (i >= 0 && i < n_nodes.x) { + return i * cell_size.x; + } + else { + printf("invalid node number i=%d at node_number_to_coordinate_x\n", i); + exit(EXIT_FAILURE); + } } double SpatialMeshCu::node_number_to_coordinate_y(int j) { - //if (j >= 0 && j < y_n_nodes) { - // return j * y_cell_size; - //} - //else { - // printf("invalid node number j=%d at node_number_to_coordinate_y\n", j); - // exit(EXIT_FAILURE); - //} + if (j >= 0 && j < n_nodes.y) { + return j * cell_size.y; + } + else { + printf("invalid node number j=%d at node_number_to_coordinate_y\n", j); + exit(EXIT_FAILURE); + } } double SpatialMeshCu::node_number_to_coordinate_z(int k) { - //if (k >= 0 && k < z_n_nodes) { - // return k * z_cell_size; - //} - //else { - // printf("invalid node number k=%d at node_number_to_coordinate_z\n", k); - // exit(EXIT_FAILURE); - //} -} - -dim3 SpatialMeshCu::GetThreads() { - return dim3(16, 16, d_n_nodes.z / 16); -} - -dim3 SpatialMeshCu::GetBlocks(dim3 nThreads) { - return dim3(d_n_nodes.x / nThreads.x, d_n_nodes.y / nThreads.y, 16); -} - -__device__ int SpatialMeshCu::GetIdxVolume() { - //int xStepthread = 1; - int xStepBlock = blockDim.x; - - int yStepThread = d_n_nodes.x; - int yStepBlock = yStepThread * blockDim.y; - - int zStepThread = d_n_nodes.x * d_n_nodes.y; - int zStepBlock = zStepThread * blockDim.z; - - return threadIdx.x + blockIdx.x*xStepBlock + - threadIdx.y*yStepThread + blockIdx.y*yStepBlock + - threadIdx.z*zStepThread + blockIdx.z*zStepBlock; -} - -__global__ void SpatialMeshCu::SetBoundaryConditionOrthoX(double* potential) { - int xIdx = blockIdx.z*(d_n_nodes.x - 1); //0 or nodes.x-1 - - - int yStepThread = d_n_nodes.x;//x= - int yStepBlock = d_n_nodes.x * blockDim.x; - - int zStepThread = d_n_nodes.x * d_n_nodes.y; - int zStepBlock = zStepThread * blockDim.y; - - int idx = xIdx + - threadIdx.x*yStepThread + blockIdx.x*yStepBlock + - threadIdx.y*zStepThread + blockIdx.y*zStepBlock; - - potential[idx] = ((double)(1 - blockIdx.z))*d_bot_border + (blockIdx.z*d_up_border); - -} - -// -__global__ void SpatialMeshCu::SetBoundaryConditionOrthoX(double* potential) { - int xIdx = blockIdx.z*(d_n_nodes.x - 1); //0 or nodes.x-1 - - - int yStepThread = d_n_nodes.x;//x= - int yStepBlock = d_n_nodes.x * blockDim.x; - - int zStepThread = d_n_nodes.x * d_n_nodes.y; - int zStepBlock = zStepThread * blockDim.y; - - int idx = xIdx + - threadIdx.x*yStepThread + blockIdx.x*yStepBlock + - threadIdx.y*zStepThread + blockIdx.y*zStepBlock; - // if - potential[idx] = ((double)(1 - blockIdx.z)) * d_left_border + (blockIdx.z*d_right_border); - -} - -__global__ void SpatialMeshCu::SetBoundaryConditionOrthoY(double* potential) { - int yIdx = blockIdx.z * d_n_nodes.x*(d_n_nodes.y - 1); //0 or nodes.x-1 - - - int xStepThread = 1;//x= - int xStepBlock = blockDim.x; - - int zStepThread = d_n_nodes.x * d_n_nodes.y; - int zStepBlock = zStepThread * blockDim.y; - - int idx = yIdx + - threadIdx.x*xStepThread + blockIdx.x*xStepBlock + - threadIdx.y*zStepThread + blockIdx.y*zStepBlock; - // if - potential[idx] = ((double)(1 - blockIdx.z)) * d_bot_border + (blockIdx.z * d_up_border); - -} - -__global__ void SpatialMeshCu::SetBoundaryConditionOrthoZ(double* potential) { - int zIdx = blockIdx.z * (d_n_nodes.x * d_n_nodes.y * (d_n_nodes.z - 1)); //0 or nodes.x-1 - - - int xStepThread = 1;//x= - int xStepBlock = blockDim.x; - - int yStepThread = d_n_nodes.x; - int yStepBlock = yStepThread * blockDim.y; - - int idx = zIdx + - threadIdx.x*xStepThread + blockIdx.x*xStepBlock + - threadIdx.y*yStepThread + blockIdx.y*yStepBlock; - // if - potential[idx] = ((double)(1 - blockIdx.z)) * d_near_border + (blockIdx.z * d_far_border); - + if (k >= 0 && k < n_nodes.z) { + return k * cell_size.z; + } + else { + printf("invalid node number k=%d at node_number_to_coordinate_z\n", k); + exit(EXIT_FAILURE); + } } SpatialMeshCu::~SpatialMeshCu() { diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index 30006b0..e9bc0dc 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -7,19 +7,6 @@ //#include "hdf5_hl.h" class SpatialMeshCu { public: - __constant__ double3 d_volume_size; - __constant__ double3 d_cell_size; - __constant__ dim3 d_n_nodes; - - __constant__ double d_up_border; - __constant__ double d_bot_border; - - __constant__ double d_left_border; - __constant__ double d_right_border; - - __constant__ double d_far_border; - __constant__ double d_near_border; - dim3 n_nodes; double3 *dev_node_coordinates; @@ -28,10 +15,6 @@ public: double3 *dev_electric_field; - //boost::multi_array node_coordinates; - //boost::multi_array charge_density; - //boost::multi_array potential; - //boost::multi_array electric_field; public: SpatialMeshCu(Config &conf); @@ -77,10 +60,4 @@ private: dim3 GetThreads(); dim3 GetBlocks(dim3 nThreads); - - __global__ void fill_coordinates(double3* node_coordinates); - __device__ int GetIdxVolume(); - __global__ void SetBoundaryConditionOrthoX(double* potential); - __global__ void SetBoundaryConditionOrthoY(double* potential); - __global__ void SetBoundaryConditionOrthoZ(double* potential); }; From 80158c5800301ba2cec22bac776c46745d9a9624 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Wed, 12 Dec 2018 01:41:56 +0700 Subject: [PATCH 04/83] fix sythax --- Makefile | 10 +- SpatialMeshCu.cu | 262 +++++++++++++++++++++++----------------------- SpatialMeshCu.cuh | 26 +---- 3 files changed, 140 insertions(+), 158 deletions(-) diff --git a/Makefile b/Makefile index 4f48186..dc4d211 100644 --- a/Makefile +++ b/Makefile @@ -4,30 +4,32 @@ SHELL:=/bin/bash -O extglob ##### Compilers #CC=clang++ CC=g++ -NVCC=nvcc +NVCC=/usr/local/cuda10/bin/nvcc HDF5FLAGS=-I/usr/local/hdf5/include -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -fstack-protector-strong -Wformat -Werror=format-security WARNINGS=-Wall CFLAGS = ${HDF5FLAGS} -O2 -std=c++11 ${WARNINGS} LDFLAGS = -CUDAFLAGS= -I/usr/local/cuda10/include -std=c++11 -arch=sm30 +CUDAFLAGS= -I/usr/local/cuda10/include -std=c++11 -arch=sm_30 ### Libraries COMMONLIBS=-lm BOOSTLIBS=-lboost_program_options HDF5LIBS=-L/usr/local/hdf5/lib -lhdf5_hl -lhdf5 -Wl,-z,relro -lpthread -lz -ldl -lm -Wl,-rpath -Wl,/usr/local/hdf5/lib -CUDALIBS=-L/usr/local/cuda10/lib64/ +CUDALIBS=-L/usr/local/cuda10/lib64/ -lcudart LIBS=${COMMONLIBS} ${BOOSTLIBS} ${HDF5LIBS} ### Sources and executable CPPSOURCES=$(wildcard *.cpp) CPPHEADERS=$(wildcard *.h) CUSOURCES=$(wildcard *.cu) +CUHEADERS=$(wildcard *.cuh) CUOBJECTS=$(CUSOURCES:%.cu=%.o) OBJECTS=$(CPPSOURCES:%.cpp=%.o) + EXECUTABLE=ef.out MAKE=make TINYEXPR=./lib/tinyexpr @@ -36,7 +38,7 @@ SUBDIRS=doc $(EXECUTABLE): $(OBJECTS) $(TINYEXPR) $(CUOBJECTS) $(CC) $(LDFLAGS) $(OBJECTS) $(TINYEXPR_OBJ) $(CUOBJECTS) -o $@ $(LIBS) $(CUDALIBS) -$(CUOBJECTS):%.o%.cu +$(CUOBJECTS):%.o:%.cu $(CUHEADERS) $(NVCC) $(CUDAFLAGS) -c $< -o $@ $(OBJECTS):%.o:%.cpp $(CPPHEADERS) $(CC) $(CFLAGS) -c $< -o $@ diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 276129d..6da1c38 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -1,6 +1,103 @@ #include "SpatialMeshCu.cuh" #include "device_launch_parameters.h" + __constant__ double3 d_volume_size[1]; + __constant__ double3 d_cell_size[1]; + __constant__ int3 d_n_nodes[1]; + + __constant__ double d_up_border[1]; + __constant__ double d_bot_border[1]; + + __constant__ double d_left_border[1]; + __constant__ double d_right_border[1]; + + __constant__ double d_far_border[1]; + __constant__ double d_near_border[1]; + + + +__device__ int GetIdxVolume() { + //int xStepthread = 1; + int xStepBlock = blockDim.x; + + int yStepThread = d_n_nodes[0].x; + int yStepBlock = yStepThread * blockDim.y; + + int zStepThread = d_n_nodes[0].x * d_n_nodes[0].y; + int zStepBlock = zStepThread * blockDim.z; + + return threadIdx.x + blockIdx.x*xStepBlock + + threadIdx.y*yStepThread + blockIdx.y*yStepBlock + + threadIdx.z*zStepThread + blockIdx.z*zStepBlock; +} + +__global__ void fill_coordinates(double3* node_coordinates) { + + int idx = GetIdxVolume(); + + int x = threadIdx.x + blockIdx.x*blockDim.x; + int y = threadIdx.y + blockIdx.y*blockDim.y; + int z = threadIdx.z + blockIdx.z*blockDim.z; + node_coordinates[idx] = make_double3(d_volume_size[0].x * x, d_volume_size[0].y * y, d_volume_size[0].z * z);//(double)., +} + + + + +__global__ void SetBoundaryConditionOrthoX(double* potential) { + int xIdx = blockIdx.z*(d_n_nodes[0].x - 1); //0 or nodes.x-1 + + + int yStepThread = d_n_nodes[0].x;//x= + int yStepBlock = d_n_nodes[0].x * blockDim.x; + + int zStepThread = d_n_nodes[0].x * d_n_nodes[0].y; + int zStepBlock = zStepThread * blockDim.y; + + int idx = xIdx + + threadIdx.x*yStepThread + blockIdx.x*yStepBlock + + threadIdx.y*zStepThread + blockIdx.y*zStepBlock; + + potential[idx] = ((double)(1 - blockIdx.z)) * d_left_border[0] + (blockIdx.z*d_right_border[0]); + +} + +__global__ void SetBoundaryConditionOrthoY(double* potential) { + int yIdx = blockIdx.z * d_n_nodes[0].x*(d_n_nodes[0].y - 1); //0 or nodes.x-1 + + + int xStepThread = 1;//x= + int xStepBlock = blockDim.x; + + int zStepThread = d_n_nodes[0].x * d_n_nodes[0].y; + int zStepBlock = zStepThread * blockDim.y; + + int idx = yIdx + + threadIdx.x*xStepThread + blockIdx.x*xStepBlock + + threadIdx.y*zStepThread + blockIdx.y*zStepBlock; + + potential[idx] = ((double)(1 - blockIdx.z)) * d_bot_border[0] + (blockIdx.z * d_up_border[0]); + +} + +__global__ void SetBoundaryConditionOrthoZ(double* potential) { + int zIdx = blockIdx.z * (d_n_nodes[0].x * d_n_nodes[0].y * (d_n_nodes[0].z - 1)); //0 or nodes.x-1 + + + int xStepThread = 1;//x= + int xStepBlock = blockDim.x; + + int yStepThread = d_n_nodes[0].x; + int yStepBlock = yStepThread * blockDim.y; + + int idx = zIdx + + threadIdx.x*xStepThread + blockIdx.x*xStepBlock + + threadIdx.y*yStepThread + blockIdx.y*yStepBlock; + + potential[idx] = ((double)(1 - blockIdx.z)) * d_near_border[0] + (blockIdx.z * d_far_border[0]); + +} + SpatialMeshCu::SpatialMeshCu(Config &conf) { check_correctness_of_related_config_fields(conf); @@ -70,26 +167,26 @@ void SpatialMeshCu::init_z_grid(Config &conf) void SpatialMeshCu::init_constants(Config & conf) { - n_nodes = dim3( + n_nodes = make_int3( ceil(conf.mesh_config_part.grid_x_size / conf.mesh_config_part.grid_x_step) + 1, ceil(conf.mesh_config_part.grid_y_size / conf.mesh_config_part.grid_y_step) + 1, ceil(conf.mesh_config_part.grid_z_size / conf.mesh_config_part.grid_z_step) + 1 ); - cudaMemcpyToSymbol((void*)&d_n_nodes, (void*)&n_nodes,sizeof(double3),cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(d_n_nodes, (void*)&n_nodes,sizeof(dim3),cudaMemcpyHostToDevice); double3 volume_size = make_double3( conf.mesh_config_part.grid_x_size, conf.mesh_config_part.grid_y_size, conf.mesh_config_part.grid_z_size ); - cudaMemcpyToSymbol((void*)& d_volume_size, (void*)& volume_size, sizeof(double3), cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(d_volume_size, (void*)& volume_size, sizeof(double3), cudaMemcpyHostToDevice); - double3 cell_size = make_double3( + cell_size = make_double3( volume_size.x / (n_nodes.x - 1), volume_size.y / (n_nodes.y - 1), volume_size.z / (n_nodes.z - 1) ); - cudaMemcpyToSymbol((void*)& d_volume_size, (void*)& volume_size, sizeof(double3), cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(d_volume_size, (void*)& volume_size, sizeof(double3), cudaMemcpyHostToDevice); ///TODO Border constants init } @@ -122,20 +219,12 @@ void SpatialMeshCu::fill_node_coordinates() dim3 threads = GetThreads(); dim3 blocks = GetBlocks(threads); - <<>> fill_coordinates(dev_node_coordinates); + fill_coordinates<<>>(dev_node_coordinates); } -__global__ void SpatialMeshCu::fill_coordinates(double3* node_coordinates) { - int idx = GetIdxVolume(); - - int x = threadIdx.x + blockIdx.x*blockDim.x; - int y = threadIdx.y + blockIdx.y*blockDim.y; - int z = threadIdx.z + blockIdx.z*blockDim.z; - node_coordinates[idx] = make_double3(d_volume_size.x * x, d_volume_size.y * y, d_volume_size.z * z);//(double)., -} void SpatialMeshCu::clear_old_density_values() { //std::fill(charge_density.data(), @@ -164,13 +253,13 @@ void SpatialMeshCu::set_boundary_conditions(const double phi_left, const double dim3 blockSize = dim3(16, 16, 2); dim3 gridSize = dim3(n_nodes.y / 16, n_nodes.z / 16, 1); - <<>> SetBoundaryConditionOrthoX(dev_potential); + SetBoundaryConditionOrthoX <<>> (dev_potential); gridSize = dim3(n_nodes.x / 16, n_nodes.z / 16, 2); - <<>> SetBoundaryConditionOrthoY(dev_potential); + SetBoundaryConditionOrthoY <<>> (dev_potential); gridSize = dim3(n_nodes.x / 16, n_nodes.y / 16, 2); - <<>> SetBoundaryConditionOrthoZ(dev_potential); + SetBoundaryConditionOrthoZ <<>> (dev_potential); //for (int j = 0; j < ny; j++) { // for (int k = 0; k < nz; k++) { // potential[0][j][k] = phi_right; @@ -307,133 +396,46 @@ void SpatialMeshCu::check_and_exit_if_not(const bool &should_be, const std::stri double SpatialMeshCu::node_number_to_coordinate_x(int i) { - //if (i >= 0 && i < x_n_nodes) { - // return i * x_cell_size; - //} - //else { - // printf("invalid node number i=%d at node_number_to_coordinate_x\n", i); - // exit(EXIT_FAILURE); - //} + if (i >= 0 && i < n_nodes.x) { + return i * cell_size.x; + } + else { + printf("invalid node number i=%d at node_number_to_coordinate_x\n", i); + exit(EXIT_FAILURE); + } + return 0; } double SpatialMeshCu::node_number_to_coordinate_y(int j) { - //if (j >= 0 && j < y_n_nodes) { - // return j * y_cell_size; - //} - //else { - // printf("invalid node number j=%d at node_number_to_coordinate_y\n", j); - // exit(EXIT_FAILURE); - //} + if (j >= 0 && j < n_nodes.y) { + return j * cell_size.y; + } + else { + printf("invalid node number j=%d at node_number_to_coordinate_y\n", j); + exit(EXIT_FAILURE); + } + return 0; } double SpatialMeshCu::node_number_to_coordinate_z(int k) { - //if (k >= 0 && k < z_n_nodes) { - // return k * z_cell_size; - //} - //else { - // printf("invalid node number k=%d at node_number_to_coordinate_z\n", k); - // exit(EXIT_FAILURE); - //} + if (k >= 0 && k < n_nodes.z) { + return k * cell_size.z; + } + else { + printf("invalid node number k=%d at node_number_to_coordinate_z\n", k); + exit(EXIT_FAILURE); + } + return 0; } - dim3 SpatialMeshCu::GetThreads() { - return dim3(16, 16, d_n_nodes.z / 16); + return dim3(16, 16, n_nodes.z / 16); } dim3 SpatialMeshCu::GetBlocks(dim3 nThreads) { - return dim3(d_n_nodes.x / nThreads.x, d_n_nodes.y / nThreads.y, 16); + return dim3(n_nodes.x / nThreads.x, n_nodes.y / nThreads.y, 16); } - -__device__ int SpatialMeshCu::GetIdxVolume() { - //int xStepthread = 1; - int xStepBlock = blockDim.x; - - int yStepThread = d_n_nodes.x; - int yStepBlock = yStepThread * blockDim.y; - - int zStepThread = d_n_nodes.x * d_n_nodes.y; - int zStepBlock = zStepThread * blockDim.z; - - return threadIdx.x + blockIdx.x*xStepBlock + - threadIdx.y*yStepThread + blockIdx.y*yStepBlock + - threadIdx.z*zStepThread + blockIdx.z*zStepBlock; -} - -__global__ void SpatialMeshCu::SetBoundaryConditionOrthoX(double* potential) { - int xIdx = blockIdx.z*(d_n_nodes.x - 1); //0 or nodes.x-1 - - - int yStepThread = d_n_nodes.x;//x= - int yStepBlock = d_n_nodes.x * blockDim.x; - - int zStepThread = d_n_nodes.x * d_n_nodes.y; - int zStepBlock = zStepThread * blockDim.y; - - int idx = xIdx + - threadIdx.x*yStepThread + blockIdx.x*yStepBlock + - threadIdx.y*zStepThread + blockIdx.y*zStepBlock; - - potential[idx] = ((double)(1 - blockIdx.z))*d_bot_border + (blockIdx.z*d_up_border); - -} - -// -__global__ void SpatialMeshCu::SetBoundaryConditionOrthoX(double* potential) { - int xIdx = blockIdx.z*(d_n_nodes.x - 1); //0 or nodes.x-1 - - - int yStepThread = d_n_nodes.x;//x= - int yStepBlock = d_n_nodes.x * blockDim.x; - - int zStepThread = d_n_nodes.x * d_n_nodes.y; - int zStepBlock = zStepThread * blockDim.y; - - int idx = xIdx + - threadIdx.x*yStepThread + blockIdx.x*yStepBlock + - threadIdx.y*zStepThread + blockIdx.y*zStepBlock; - // if - potential[idx] = ((double)(1 - blockIdx.z)) * d_left_border + (blockIdx.z*d_right_border); - -} - -__global__ void SpatialMeshCu::SetBoundaryConditionOrthoY(double* potential) { - int yIdx = blockIdx.z * d_n_nodes.x*(d_n_nodes.y - 1); //0 or nodes.x-1 - - - int xStepThread = 1;//x= - int xStepBlock = blockDim.x; - - int zStepThread = d_n_nodes.x * d_n_nodes.y; - int zStepBlock = zStepThread * blockDim.y; - - int idx = yIdx + - threadIdx.x*xStepThread + blockIdx.x*xStepBlock + - threadIdx.y*zStepThread + blockIdx.y*zStepBlock; - // if - potential[idx] = ((double)(1 - blockIdx.z)) * d_bot_border + (blockIdx.z * d_up_border); - -} - -__global__ void SpatialMeshCu::SetBoundaryConditionOrthoZ(double* potential) { - int zIdx = blockIdx.z * (d_n_nodes.x * d_n_nodes.y * (d_n_nodes.z - 1)); //0 or nodes.x-1 - - - int xStepThread = 1;//x= - int xStepBlock = blockDim.x; - - int yStepThread = d_n_nodes.x; - int yStepBlock = yStepThread * blockDim.y; - - int idx = zIdx + - threadIdx.x*xStepThread + blockIdx.x*xStepBlock + - threadIdx.y*yStepThread + blockIdx.y*yStepBlock; - // if - potential[idx] = ((double)(1 - blockIdx.z)) * d_near_border + (blockIdx.z * d_far_border); - -} - SpatialMeshCu::~SpatialMeshCu() { cudaFree((void*)dev_node_coordinates); cudaFree((void*)dev_potential); diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index 30006b0..9c3127f 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -7,31 +7,15 @@ //#include "hdf5_hl.h" class SpatialMeshCu { public: - __constant__ double3 d_volume_size; - __constant__ double3 d_cell_size; - __constant__ dim3 d_n_nodes; - - __constant__ double d_up_border; - __constant__ double d_bot_border; - - __constant__ double d_left_border; - __constant__ double d_right_border; - __constant__ double d_far_border; - __constant__ double d_near_border; - - - dim3 n_nodes; + int3 n_nodes; + double3 cell_size; double3 *dev_node_coordinates; double *dev_charge_density; double *dev_potential; double3 *dev_electric_field; - //boost::multi_array node_coordinates; - //boost::multi_array charge_density; - //boost::multi_array potential; - //boost::multi_array electric_field; public: SpatialMeshCu(Config &conf); @@ -77,10 +61,4 @@ private: dim3 GetThreads(); dim3 GetBlocks(dim3 nThreads); - - __global__ void fill_coordinates(double3* node_coordinates); - __device__ int GetIdxVolume(); - __global__ void SetBoundaryConditionOrthoX(double* potential); - __global__ void SetBoundaryConditionOrthoY(double* potential); - __global__ void SetBoundaryConditionOrthoZ(double* potential); }; From 272946aa9f096a358cec5e9e1f450ebb6ff284cb Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Wed, 12 Dec 2018 23:34:50 +0700 Subject: [PATCH 05/83] cleaning unnecessary methods --- SpatialMeshCu.cu | 45 --------------------------------------------- SpatialMeshCu.cuh | 3 --- 2 files changed, 48 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 9bea875..aabae18 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -93,9 +93,6 @@ __global__ void SetBoundaryConditionOrthoZ(double* potential) { SpatialMeshCu::SpatialMeshCu(Config &conf) { check_correctness_of_related_config_fields(conf); - init_x_grid(conf); - init_y_grid(conf); - init_z_grid(conf); init_constants(conf); allocate_ongrid_values(); fill_node_coordinates(); @@ -194,48 +191,6 @@ void SpatialMeshCu::check_correctness_of_related_config_fields(Config &conf) { grid_z_step_gt_zero_le_grid_z_size(conf); } -void SpatialMeshCu::init_x_grid(Config &conf) { - //x_volume_size = conf.mesh_config_part.grid_x_size; - //x_n_nodes = - // ceil(conf.mesh_config_part.grid_x_size / conf.mesh_config_part.grid_x_step) + 1; - //x_cell_size = x_volume_size / (x_n_nodes - 1); - //if (x_cell_size != conf.mesh_config_part.grid_x_step) { - // std::cout.precision(3); - // std::cout << "X_step was shrinked to " << x_cell_size - // << " from " << conf.mesh_config_part.grid_x_step - // << " to fit round number of cells" << std::endl; - //} - //return; -} - -void SpatialMeshCu::init_y_grid(Config &conf) { - //y_volume_size = conf.mesh_config_part.grid_y_size; - //y_n_nodes = - // ceil(conf.mesh_config_part.grid_y_size / conf.mesh_config_part.grid_y_step) + 1; - //y_cell_size = y_volume_size / (y_n_nodes - 1); - //if (y_cell_size != conf.mesh_config_part.grid_y_step) { - // std::cout.precision(3); - // std::cout << "Y_step was shrinked to " << y_cell_size - // << " from " << conf.mesh_config_part.grid_y_step - // << " to fit round number of cells." << std::endl; - //} - //return; -} - -void SpatialMeshCu::init_z_grid(Config &conf) { - //z_volume_size = conf.mesh_config_part.grid_z_size; - //z_n_nodes = - // ceil(conf.mesh_config_part.grid_z_size / conf.mesh_config_part.grid_z_step) + 1; - //z_cell_size = z_volume_size / (z_n_nodes - 1); - //if (z_cell_size != conf.mesh_config_part.grid_z_step) { - // std::cout.precision(3); - // std::cout << "Z_step was shrinked to " << z_cell_size - // << " from " << conf.mesh_config_part.grid_z_step - // << " to fit round number of cells." << std::endl; - //} - //return; -} - void SpatialMeshCu::init_constants(Config & conf) { n_nodes = make_int3( ceil( diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index 64a53a7..964fbbc 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -32,9 +32,6 @@ public: private: // init void check_correctness_of_related_config_fields(Config &conf); - void init_x_grid(Config &conf); - void init_y_grid(Config &conf); - void init_z_grid(Config &conf); void init_constants(Config &conf); void copy_constants_to_device(); void allocate_ongrid_values(); From a6a8552c7606d59cf44ce8c500f7cb96ebceb4d0 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Sun, 16 Dec 2018 13:02:47 +0700 Subject: [PATCH 06/83] Cuda field solver no inner regions, no convergence --- FieldSolver.cu | 246 ++++++++++++++++++++++++++++++++ FieldSolver.cuh | 43 ++++++ SpatialMeshCu.cu | 354 +++++++++++++++++++++++++--------------------- SpatialMeshCu.cuh | 12 +- 4 files changed, 488 insertions(+), 167 deletions(-) create mode 100644 FieldSolver.cu create mode 100644 FieldSolver.cuh diff --git a/FieldSolver.cu b/FieldSolver.cu new file mode 100644 index 0000000..1bf02e5 --- /dev/null +++ b/FieldSolver.cu @@ -0,0 +1,246 @@ +#include "FieldSolver.cuh" +#include "device_launch_parameters.h" +#include "math_functions.h" +#include "math_constants.h" + +#define ABS_TOLERANCE = 1.0e-5; +#define REL_TOLERANCE = 1.0e-12; + +__constant__ double dxdxdydy[1]; +__constant__ double dxdxdzdz[1]; +__constant__ double dydydzdz[1]; +__constant__ double dxdxdydydzdz[1]; + +__constant__ int end[1]; + +__device__ int GetIdxVolume_NoBorder() { + //int xStepthread = 1; + int xStepBlock = blockDim.x; + int yStepThread = d_n_nodes[0].x; + int yStepBlock = yStepThread * blockDim.y; + int zStepThread = d_n_nodes[0].x * d_n_nodes[0].y; + int zStepBlock = zStepThread * blockDim.z; + return (threadIdx.x + blockIdx.x * xStepBlock) + + (threadIdx.y * yStepThread + blockIdx.y * yStepBlock) + + (threadIdx.z * zStepThread + blockIdx.z * zStepBlock); +} + +__global__ void SetPhiNextAsCurrent(double* d_phi_current, double* d_phi_next) { + int idx = GetIdxVolume_NoBorder(); + d_phi_current[idx] = d_phi_next[idx]; +} + +__global__ void ComputePhiNext(const double* d_phi_current, const double* d_charge, double* d_phi_next) { + int idx = GetIdxVolume_NoBorder(); + int offset_Dx = 1; + //todo rewrite usind device n_nodes.x/y/z + int offset_Dy = blockDim.x * gridDim.x; + int offset_Dz = offset_Dy * blockDim.y * gridDim.y; + + int prev_neibhour_idx; + int next_neibhour_idx; + + //double dxdxdydy = mesh.volume_size.x * mesh.volume_size.x * + // mesh.volume_size.y * mesh.volume_size.y; + //double dxdxdzdz = mesh.volume_size.x * mesh.volume_size.x * + // mesh.volume_size.z * mesh.volume_size.z; + //double dydydzdz = mesh.volume_size.y * mesh.volume_size.y * + // mesh.volume_size.z * mesh.volume_size.z; + + //double dxdxdydydzdz = mesh.volume_size.x * mesh.volume_size.x * + // dy * dy * dz * dz; + double denom = (double)2* (dxdxdydy[0] + dxdxdzdz[0] + dydydzdz[0]); + //// + prev_neibhour_idx = max(idx + offset_Dx,0); + next_neibhour_idx = min(idx + offset_Dx,end[0]);//dirty : can be optimized for configs where n_nodes side equals (k*POT+2) + d_phi_next[idx] = + (d_phi_current[next_neibhour_idx] + d_phi_current[prev_neibhour_idx])*dydydzdz[0]; + + prev_neibhour_idx = max(idx + offset_Dy, 0); + next_neibhour_idx = min(idx + offset_Dy, end[0]); + d_phi_next[idx] += + (d_phi_current[next_neibhour_idx] + d_phi_current[prev_neibhour_idx])*dxdxdzdz[0]; + + prev_neibhour_idx = max(idx + offset_Dz, 0); + next_neibhour_idx = min(idx + offset_Dz, end[0]); + d_phi_next[idx] += + (d_phi_current[next_neibhour_idx] + d_phi_current[prev_neibhour_idx])*dxdxdydy[0]; + + d_phi_next[idx] += 4.0 * CUDART_PI * d_charge[idx] * dxdxdydydzdz[0]; + d_phi_next[idx] /= denom; + +} +__device__ double GradientComponent(double phi1, double phi2, double cell_side_size) { + return ((phi2 - phi1) / cell_side_size); +} + +__global__ void EvaluateFields(const double* dev_potential, double3* dev_el_field) { + int idx = GetIdxVolume_NoBorder(); + + double3 e = make_double3(0, 0, 0); + bool is_on_up_border; + bool is_on_low_border; + bool is_inside_borders; + int offset; + + offset = 1; + is_on_up_border = ((threadIdx.x == 0) && (blockIdx.x == 0)); + is_on_low_border = ((threadIdx.x == (blockDim.x - 1)) && (blockIdx.x == (gridDim.x - 1))); + is_inside_borders = !(is_on_low_border || is_on_up_border); + + e.x = -(1 / (1 + is_inside_borders)) * GradientComponent( + dev_potential[idx + (offset*is_on_up_border) - (offset*is_inside_borders)], + dev_potential[idx - (offset*is_on_low_border) + (offset*is_inside_borders)], + dev_cell_size.x); + + offset = d_n_nodex.x; + is_on_up_border = ((threadIdx.x == 0) && (blockIdx.x == 0)); + is_on_low_border = ((threadIdx.x == (blockDim.x - 1)) && (blockIdx.x == (gridDim.x - 1))); + is_inside_borders = !(is_on_low_border || is_on_up_border); + + e.y = -(1 / (1 + is_inside_borders)) * GradientComponent( + dev_potential[idx + (offset*is_on_up_border) - (offset*is_inside_borders)], + dev_potential[idx - (offset*is_on_low_border) + (offset*is_inside_borders)], + dev_cell_size.y); + + offset = d_n_nodes.y*d_n_nodes.x; + is_on_up_border = ((threadIdx.x == 0) && (blockIdx.x == 0)); + is_on_low_border = ((threadIdx.x == (blockDim.x - 1)) && (blockIdx.x == (gridDim.x - 1))); + is_inside_borders = !(is_on_low_border || is_on_up_border); + + e.z = -(1 / (1 + is_inside_borders)) * GradientComponent( + dev_potential[idx + (offset*is_on_up_border) - (offset*is_inside_borders)], + dev_potential[idx - (offset*is_on_low_border) + (offset*is_inside_borders)], + dev_cell_size.z); + + dev_el_field[idx] = e; + +} +FieldSolver::FieldSolver(SpatialMeshCu &mesh, Inner_regions_manager &inner_regions) :mesh(mesh) +{ + allocate_next_phi(); +} + +void FieldSolver::allocate_next_phi() +{ + size_t dim = mesh.n_nodes.x * mesh.n_nodes.y * mesh.n_nodes.z; + cudaError_t cuda_status; + + cuda_status= cudaMalloc(&dev_phi_next, dim); + +} + +void FieldSolver::eval_potential(Inner_regions_manager &inner_regions) +{ + solve_poisson_eqn_Jacobi(inner_regions); +} + +void FieldSolver::solve_poisson_eqn_Jacobi(Inner_regions_manager &inner_regions) +{ + max_Jacobi_iterations = 150; + int iter; + + //init_current_phi_from_mesh_phi(); + for (iter = 0; iter < max_Jacobi_iterations; ++iter) { + single_Jacobi_iteration(inner_regions); + if (iterative_Jacobi_solutions_converged()) { + break; + } + set_phi_next_as_phi_current(); + } + if (iter == max_Jacobi_iterations) { + printf("WARING: potential evaluation did't converge after max iterations!\n"); + } + //transfer_solution_to_mesh(); + + //return; +} +void FieldSolver::single_Jacobi_iteration(Inner_regions_manager &inner_regions) +{ + set_phi_next_at_boundaries(); + compute_phi_next_at_inner_points(); + set_phi_next_at_inner_regions(inner_regions); +} + +void FieldSolver::set_phi_next_at_boundaries() +{ + mesh.set_boundary_conditions(dev_phi_next); +} + +void FieldSolver::compute_phi_next_at_inner_points() +{ + dim3 threads = mesh.GetThreads(); + dim3 blocks = mesh.GetBlocks(threads); + cudaError_t cuda_status; + + ComputePhiNext<<>> (mesh.dev_potential, mesh.dev_charge_density, dev_phi_next); + cuda_status = cudaDeviceSynchronize(); +} + +void FieldSolver::set_phi_next_at_inner_regions(Inner_regions_manager &inner_regions) +{ + //for (auto ® : inner_regions.regions) { + // for (auto &node : reg.inner_nodes) { + // // todo: mark nodes at edge during construction + // // if (!node.at_domain_edge( nx, ny, nz )) { + // phi_next[node.x][node.y][node.z] = reg.potential; + // // } + // } + //} +} + + +bool FieldSolver::iterative_Jacobi_solutions_converged() +{ + //// todo: bind tol to config parameters + ////abs_tolerance = std::max( dx * dx, std::max( dy * dy, dz * dz ) ) / 5; + + //double diff; + //double rel_diff; + ////double tol; + //// + //for (int i = 0; i < nx; i++) { + // for (int j = 0; j < ny; j++) { + // for (int k = 0; k < nz; k++) { + // diff = fabs(phi_next[i][j][k] - phi_current[i][j][k]); + // rel_diff = diff / fabs(phi_current[i][j][k]); + // if (diff > abs_tolerance || rel_diff > rel_tolerance) { + // return false; + // } + // } + // } + //} + //return true; +} + + +void FieldSolver::set_phi_next_as_phi_current() +{ + dim3 threads = mesh.GetThreads(); + dim3 blocks = mesh.GetBlocks(threads); + cudaError_t cuda_status; + SetPhiNextAsCurrent <<>> (mesh.dev_potential, dev_phi_next); + cuda_status = cudaDeviceSynchronize(); +} + + +void FieldSolver::eval_fields_from_potential() +{ + dim3 threads = mesh.GetThreads(); + dim3 blocks = mesh.GetBlocks(threads); + cudaError_t cuda_status; + + EvaluateFields <<>> (mesh.dev_potential, mesh.dev_electric_field); + + cuda_status = cudaDeviceSynchronize(); + return; +} + + + + +FieldSolver::~FieldSolver() +{ + // delete phi arrays? + cudaFree((void*)dev_phi_next); +} \ No newline at end of file diff --git a/FieldSolver.cuh b/FieldSolver.cuh new file mode 100644 index 0000000..09788c0 --- /dev/null +++ b/FieldSolver.cuh @@ -0,0 +1,43 @@ +#ifndef _FIELD_SOLVER_H_ +#define _FIELD_SOLVER_H_ + +#include +#include +#include "SpatialMeshCu.cuh" +#include "inner_region.h" +#include "cuda.h" +#include "cuda_runtime.h" + +class FieldSolver { +public: + FieldSolver(SpatialMeshCu &spat_mesh, Inner_regions_manager &inner_regions); + void eval_potential(Inner_regions_manager &inner_regions); + void eval_fields_from_potential(); + virtual ~FieldSolver(); +private: + SpatialMeshCu& mesh; + +private: + int max_Jacobi_iterations; + double rel_tolerance; + double abs_tolerance; + double *dev_phi_next; + //boost::multi_array phi_current; + //boost::multi_array phi_next; + void allocate_next_phi(); + void init_constants(); + // Solve potential + void solve_poisson_eqn_Jacobi(Inner_regions_manager &inner_regions); + void single_Jacobi_iteration(Inner_regions_manager &inner_regions); + void set_phi_next_at_boundaries(); + void compute_phi_next_at_inner_points(); + void set_phi_next_at_inner_regions(Inner_regions_manager &inner_regions); + bool iterative_Jacobi_solutions_converged(); + void set_phi_next_as_phi_current(); + void transfer_solution_to_spat_mesh(); + // Eval fields from potential + double boundary_difference(double phi1, double phi2, double dx); + double central_difference(double phi1, double phi2, double dx); +}; + +#endif _FIELD_SOLVER_H_ diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index aabae18..6706276 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -24,8 +24,8 @@ __device__ int GetIdxVolume() { int zStepBlock = zStepThread * blockDim.z; return threadIdx.x + blockIdx.x * xStepBlock + threadIdx.y * yStepThread - + blockIdx.y * yStepBlock + threadIdx.z * zStepThread - + blockIdx.z * zStepBlock; + + blockIdx.y * yStepBlock + threadIdx.z * zStepThread + + blockIdx.z * zStepBlock; } __global__ void fill_coordinates(double3* node_coordinates) { @@ -36,7 +36,7 @@ __global__ void fill_coordinates(double3* node_coordinates) { int y = threadIdx.y + blockIdx.y * blockDim.y; int z = threadIdx.z + blockIdx.z * blockDim.z; node_coordinates[idx] = make_double3(d_volume_size[0].x * x, - d_volume_size[0].y * y, d_volume_size[0].z * z); //(double)., + d_volume_size[0].y * y, d_volume_size[0].z * z); //(double)., } __global__ void SetBoundaryConditionOrthoX(double* potential) { @@ -49,10 +49,10 @@ __global__ void SetBoundaryConditionOrthoX(double* potential) { int zStepBlock = zStepThread * blockDim.y; int idx = xIdx + threadIdx.x * yStepThread + blockIdx.x * yStepBlock - + threadIdx.y * zStepThread + blockIdx.y * zStepBlock; + + threadIdx.y * zStepThread + blockIdx.y * zStepBlock; - potential[idx] = ((double) (1 - blockIdx.z)) * d_left_border[0] - + (blockIdx.z * d_right_border[0]); + potential[idx] = ((double)(1 - blockIdx.z)) * d_left_border[0] + + (blockIdx.z * d_right_border[0]); } @@ -66,16 +66,16 @@ __global__ void SetBoundaryConditionOrthoY(double* potential) { int zStepBlock = zStepThread * blockDim.y; int idx = yIdx + threadIdx.x * xStepThread + blockIdx.x * xStepBlock - + threadIdx.y * zStepThread + blockIdx.y * zStepBlock; + + threadIdx.y * zStepThread + blockIdx.y * zStepBlock; - potential[idx] = ((double) (1 - blockIdx.z)) * d_bot_border[0] - + (blockIdx.z * d_up_border[0]); + potential[idx] = ((double)(1 - blockIdx.z)) * d_bot_border[0] + + (blockIdx.z * d_up_border[0]); } __global__ void SetBoundaryConditionOrthoZ(double* potential) { int zIdx = blockIdx.z - * (d_n_nodes[0].x * d_n_nodes[0].y * (d_n_nodes[0].z - 1)); //0 or nodes.x-1 + * (d_n_nodes[0].x * d_n_nodes[0].y * (d_n_nodes[0].z - 1)); //0 or nodes.x-1 int xStepThread = 1; //x= int xStepBlock = blockDim.x; @@ -84,10 +84,10 @@ __global__ void SetBoundaryConditionOrthoZ(double* potential) { int yStepBlock = yStepThread * blockDim.y; int idx = zIdx + threadIdx.x * xStepThread + blockIdx.x * xStepBlock - + threadIdx.y * yStepThread + blockIdx.y * yStepBlock; + + threadIdx.y * yStepThread + blockIdx.y * yStepBlock; - potential[idx] = ((double) (1 - blockIdx.z)) * d_near_border[0] - + (blockIdx.z * d_far_border[0]); + potential[idx] = ((double)(1 - blockIdx.z)) * d_near_border[0] + + (blockIdx.z * d_far_border[0]); } @@ -96,44 +96,44 @@ SpatialMeshCu::SpatialMeshCu(Config &conf) { init_constants(conf); allocate_ongrid_values(); fill_node_coordinates(); - set_boundary_conditions(conf); + set_boundary_conditions(dev_potential); } SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { herr_t status; - + cudaError_t cuda_status; volume_size = make_double3(0, 0, 0); cell_size = make_double3(0, 0, 0); n_nodes = make_int3(0, 0, 0); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_volume_size", - &volume_size.x); + &volume_size.x); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_volume_size", - &volume_size.y); + &volume_size.y); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_volume_size", - &volume_size.z); + &volume_size.z); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_cell_size", - &cell_size.x); + &cell_size.x); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_cell_size", - &cell_size.y); + &cell_size.y); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_cell_size", - &cell_size.z); + &cell_size.z); hdf5_status_check(status); status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "x_n_nodes", - &n_nodes.x); + &n_nodes.x); hdf5_status_check(status); status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "y_n_nodes", - &n_nodes.y); + &n_nodes.y); hdf5_status_check(status); status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "z_n_nodes", - &n_nodes.z); + &n_nodes.z); hdf5_status_check(status); allocate_ongrid_values(); @@ -146,34 +146,44 @@ SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { dim3 threads = GetThreads(); dim3 blocks = GetBlocks(threads); - fill_coordinates<<>>(dev_node_coordinates); + fill_coordinates <<< blocks, threads >>> (dev_node_coordinates); + cuda_status = cudaDeviceSynchronize(); + cuda_status_check(cuda_status); H5LTread_dataset_double(h5_spat_mesh_group, "./charge_density", - h5_tmp_buf_1); + h5_tmp_buf_1); H5LTread_dataset_double(h5_spat_mesh_group, "./potential", h5_tmp_buf_2); -// for ( int i = 0; i < dim; i++ ) { -// ( charge_density.data() )[i] = h5_tmp_buf_1[i]; -// ( potential.data() )[i] = h5_tmp_buf_2[i]; -// } + // for ( int i = 0; i < dim; i++ ) { + // ( charge_density.data() )[i] = h5_tmp_buf_1[i]; + // ( potential.data() )[i] = h5_tmp_buf_2[i]; + // } + + cuda_status = cudaMemcpy(h5_tmp_buf_1, dev_charge_density, sizeof(double) * dim, + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status); - cudaMemcpy(h5_tmp_buf_1, dev_charge_density, sizeof(double) * dim, - cudaMemcpyHostToDevice); - cudaMemcpy(h5_tmp_buf_2, dev_potential, sizeof(double) * dim, - cudaMemcpyHostToDevice); + cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_potential, sizeof(double) * dim, + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status); double3 *h5_tmp_vector = new double3[dim]; H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_x", - h5_tmp_buf_1); + h5_tmp_buf_1); H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_y", - h5_tmp_buf_2); + h5_tmp_buf_2); H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_z", - h5_tmp_buf_3); + h5_tmp_buf_3); for (int i = 0; i < dim; i++) { h5_tmp_vector[i] = make_double3(h5_tmp_buf_1[i], h5_tmp_buf_2[i], - h5_tmp_buf_3[i]); + h5_tmp_buf_3[i]); } + + cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_electric_field, sizeof(double3) * dim, + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status); + delete[] h5_tmp_buf_1; delete[] h5_tmp_buf_2; delete[] h5_tmp_buf_3; @@ -193,22 +203,22 @@ void SpatialMeshCu::check_correctness_of_related_config_fields(Config &conf) { void SpatialMeshCu::init_constants(Config & conf) { n_nodes = make_int3( - ceil( - conf.mesh_config_part.grid_x_size - / conf.mesh_config_part.grid_x_step) + 1, - ceil( - conf.mesh_config_part.grid_y_size - / conf.mesh_config_part.grid_y_step) + 1, - ceil( - conf.mesh_config_part.grid_z_size - / conf.mesh_config_part.grid_z_step) + 1); + ceil( + conf.mesh_config_part.grid_x_size + / conf.mesh_config_part.grid_x_step) + 1, + ceil( + conf.mesh_config_part.grid_y_size + / conf.mesh_config_part.grid_y_step) + 1, + ceil( + conf.mesh_config_part.grid_z_size + / conf.mesh_config_part.grid_z_step) + 1); volume_size = make_double3(conf.mesh_config_part.grid_x_size, - conf.mesh_config_part.grid_y_size, - conf.mesh_config_part.grid_z_size); + conf.mesh_config_part.grid_y_size, + conf.mesh_config_part.grid_z_size); cell_size = make_double3(volume_size.x / (n_nodes.x - 1), - volume_size.y / (n_nodes.y - 1), volume_size.z / (n_nodes.z - 1)); + volume_size.y / (n_nodes.y - 1), volume_size.z / (n_nodes.z - 1)); copy_constants_to_device(); @@ -216,26 +226,42 @@ void SpatialMeshCu::init_constants(Config & conf) { } void SpatialMeshCu::copy_constants_to_device() { - cudaMemcpyToSymbol(d_n_nodes, (void*) &n_nodes, sizeof(dim3), - cudaMemcpyHostToDevice); - cudaMemcpyToSymbol(d_volume_size, (void*) &volume_size, sizeof(double3), - cudaMemcpyHostToDevice); - cudaMemcpyToSymbol(d_volume_size, (void*) &volume_size, sizeof(double3), - cudaMemcpyHostToDevice); + cudaError_t cuda_status; + + cuda_status = cudaMemcpyToSymbol(d_n_nodes, (void*)&n_nodes, sizeof(dim3), + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status); + + cuda_status = cudaMemcpyToSymbol(d_volume_size, (void*)&volume_size, sizeof(double3), + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status); + + cuda_status = cudaMemcpyToSymbol(d_volume_size, (void*)&volume_size, sizeof(double3), + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status); + + return; } void SpatialMeshCu::allocate_ongrid_values() { - //TODO int nx = n_nodes.x; int ny = n_nodes.y; int nz = n_nodes.z; size_t total_node_count = nx * ny * nz; + cudaError_t cuda_status; + + cuda_status = cudaMalloc < double3 >(&dev_node_coordinates, total_node_count); + cuda_status_check(cuda_status); + + cuda_status = cudaMalloc(&dev_charge_density, total_node_count); + cuda_status_check(cuda_status); - cudaMalloc < double3 > (&dev_node_coordinates, total_node_count); - cudaMalloc(&dev_charge_density, total_node_count); - cudaMalloc(&dev_potential, total_node_count); - cudaMalloc < double3 > (&dev_electric_field, total_node_count); + cuda_status = cudaMalloc(&dev_potential, total_node_count); + cuda_status_check(cuda_status); + + cuda_status = cudaMalloc < double3 >(&dev_electric_field, total_node_count); + cuda_status_check(cuda_status); return; } @@ -243,8 +269,13 @@ void SpatialMeshCu::allocate_ongrid_values() { void SpatialMeshCu::fill_node_coordinates() { dim3 threads = GetThreads(); dim3 blocks = GetBlocks(threads); + cudaError_t cuda_status; + + fill_coordinates <<< blocks,threads>>> (dev_node_coordinates); + cuda_status= cudaDeviceSynchronize(); + cuda_status_check(cuda_status); - fill_coordinates<<>>(dev_node_coordinates); + return; } void SpatialMeshCu::clear_old_density_values() { @@ -255,41 +286,24 @@ void SpatialMeshCu::clear_old_density_values() { //return; } -void SpatialMeshCu::set_boundary_conditions(Config &conf) { - set_boundary_conditions(conf.boundary_config_part.boundary_phi_left, - conf.boundary_config_part.boundary_phi_right, - conf.boundary_config_part.boundary_phi_top, - conf.boundary_config_part.boundary_phi_bottom, - conf.boundary_config_part.boundary_phi_near, - conf.boundary_config_part.boundary_phi_far); -} - -void SpatialMeshCu::set_boundary_conditions(const double phi_left, - const double phi_right, const double phi_top, const double phi_bottom, - const double phi_near, const double phi_far) { - dim3 blockSize = dim3(16, 16, 2); +void SpatialMeshCu::set_boundary_conditions(double* d_potential) { + dim3 threads = dim3(16, 16, 2); + cudaError_t cuda_status; - dim3 gridSize = dim3(n_nodes.y / 16, n_nodes.z / 16, 1); - SetBoundaryConditionOrthoX<<>>(dev_potential); + dim3 blocks = dim3(n_nodes.y / 16, n_nodes.z / 16, 1); + SetBoundaryConditionOrthoX <<< blocks, threads >>> (d_potential); + cuda_status = cudaDeviceSynchronize(); + cuda_status_check(cuda_status); - gridSize = dim3(n_nodes.x / 16, n_nodes.z / 16, 2); - SetBoundaryConditionOrthoY<<>>(dev_potential); + blocks = dim3(n_nodes.x / 16, n_nodes.z / 16, 2); + SetBoundaryConditionOrthoY << < blocks, threads >> > (d_potential); + cuda_status = cudaDeviceSynchronize(); + cuda_status_check(cuda_status); - gridSize = dim3(n_nodes.x / 16, n_nodes.y / 16, 2); - SetBoundaryConditionOrthoZ<<>>(dev_potential); - //for (int j = 0; j < ny; j++) { - // for (int k = 0; k < nz; k++) { - // potential[0][j][k] = phi_right; - // potential[nx - 1][j][k] = phi_left; - // } - //} - - //for (int i = 0; i < nx; i++) { - // for (int j = 0; j < ny; j++) { - // potential[i][j][0] = phi_near; - // potential[i][j][nz - 1] = phi_far; - // } - //} + blocks = dim3(n_nodes.x / 16, n_nodes.y / 16, 2); + SetBoundaryConditionOrthoZ << < blocks, threads >> > (d_potential); + cuda_status = cudaDeviceSynchronize(); + cuda_status_check(cuda_status); return; } @@ -360,7 +374,7 @@ void SpatialMeshCu::write_to_file(hid_t hdf5_file_id) { herr_t status; std::string hdf5_groupname = "/SpatialMesh"; group_id = H5Gcreate(hdf5_file_id, hdf5_groupname.c_str(), H5P_DEFAULT, - H5P_DEFAULT, H5P_DEFAULT); + H5P_DEFAULT, H5P_DEFAULT); hdf5_status_check(group_id); write_hdf5_attributes(group_id); @@ -377,37 +391,38 @@ void SpatialMeshCu::write_hdf5_attributes(hid_t group_id) { std::string hdf5_current_group = "./"; status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "x_volume_size", &volume_size.x, single_element); + "x_volume_size", &volume_size.x, single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "y_volume_size", &volume_size.y, single_element); + "y_volume_size", &volume_size.y, single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "z_volume_size", &volume_size.z, single_element); + "z_volume_size", &volume_size.z, single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "x_cell_size", &cell_size.x, single_element); + "x_cell_size", &cell_size.x, single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "y_cell_size", &cell_size.y, single_element); + "y_cell_size", &cell_size.y, single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "z_cell_size", &cell_size.z, single_element); + "z_cell_size", &cell_size.z, single_element); hdf5_status_check(status); status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), - "x_n_nodes", &n_nodes.x, single_element); + "x_n_nodes", &n_nodes.x, single_element); hdf5_status_check(status); status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), - "y_n_nodes", &n_nodes.y, single_element); + "y_n_nodes", &n_nodes.y, single_element); hdf5_status_check(status); status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), - "z_n_nodes", &n_nodes.z, single_element); + "z_n_nodes", &n_nodes.z, single_element); hdf5_status_check(status); } void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { hid_t filespace, dset; herr_t status; + cudaError_t cuda_status; int rank = 1; hsize_t dims[rank]; dims[0] = n_nodes.x * n_nodes.y * n_nodes.z; @@ -422,9 +437,9 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { double *nz = new double[dims[0]]; double3 *hdf5_tmp_write_data = new double3[dims[0]]; - cudaMemcpy(hdf5_tmp_write_data, dev_node_coordinates, - sizeof(double3) * dims[0], cudaMemcpyDeviceToHost); - + cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_node_coordinates, + sizeof(double3) * dims[0], cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status); for (unsigned int i = 0; i < dims[0]; i++) { nx[i] = hdf5_tmp_write_data[i].x; ny[i] = hdf5_tmp_write_data[i].y; @@ -432,28 +447,28 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { } dset = H5Dcreate(group_id, "./node_coordinates_x", H5T_IEEE_F64BE, - filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); hdf5_status_check(dset); status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, - H5P_DEFAULT, nx); + H5P_DEFAULT, nx); hdf5_status_check(status); status = H5Dclose(dset); hdf5_status_check(status); dset = H5Dcreate(group_id, "./node_coordinates_y", H5T_IEEE_F64BE, - filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); hdf5_status_check(dset); status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, - H5P_DEFAULT, ny); + H5P_DEFAULT, ny); hdf5_status_check(status); status = H5Dclose(dset); hdf5_status_check(status); dset = H5Dcreate(group_id, "./node_coordinates_z", H5T_IEEE_F64BE, - filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); hdf5_status_check(dset); status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, - H5P_DEFAULT, nz); + H5P_DEFAULT, nz); hdf5_status_check(status); status = H5Dclose(dset); hdf5_status_check(status); @@ -464,13 +479,15 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { } { double *hdf5_tmp_write_data = new double[dims[0]]; - cudaMemcpy(hdf5_tmp_write_data, dev_charge_density, - sizeof(double) * dims[0], cudaMemcpyDeviceToHost); + cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_charge_density, + sizeof(double) * dims[0], cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status); + dset = H5Dcreate(group_id, "./charge_density", H5T_IEEE_F64BE, - filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); hdf5_status_check(dset); status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, - H5P_DEFAULT, hdf5_tmp_write_data); + H5P_DEFAULT, hdf5_tmp_write_data); hdf5_status_check(status); status = H5Dclose(dset); hdf5_status_check(status); @@ -478,13 +495,14 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { } { double *hdf5_tmp_write_data = new double[dims[0]]; - cudaMemcpy(hdf5_tmp_write_data, dev_potential, sizeof(double) * dims[0], - cudaMemcpyDeviceToHost); + cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_potential, sizeof(double) * dims[0], + cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status); dset = H5Dcreate(group_id, "./potential", H5T_IEEE_F64BE, filespace, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); hdf5_status_check(dset); status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, - H5P_DEFAULT, hdf5_tmp_write_data); + H5P_DEFAULT, hdf5_tmp_write_data); hdf5_status_check(status); status = H5Dclose(dset); hdf5_status_check(status); @@ -496,8 +514,9 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { double *ey = new double[dims[0]]; double *ez = new double[dims[0]]; double3 *hdf5_tmp_write_data = new double3[dims[0]]; - cudaMemcpy(dev_node_coordinates, hdf5_tmp_write_data, - sizeof(double3) * dims[0], cudaMemcpyDeviceToHost); + cuda_status = cudaMemcpy(dev_node_coordinates, hdf5_tmp_write_data, + sizeof(double3) * dims[0], cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status); for (unsigned int i = 0; i < dims[0]; i++) { ex[i] = hdf5_tmp_write_data[i].x; @@ -505,28 +524,28 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { ez[i] = hdf5_tmp_write_data[i].z; } dset = H5Dcreate(group_id, "./electric_field_x", H5T_IEEE_F64BE, - filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); hdf5_status_check(dset); status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, - H5P_DEFAULT, ex); + H5P_DEFAULT, ex); hdf5_status_check(status); status = H5Dclose(dset); hdf5_status_check(status); dset = H5Dcreate(group_id, "./electric_field_y", H5T_IEEE_F64BE, - filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); hdf5_status_check(dset); status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, - H5P_DEFAULT, ey); + H5P_DEFAULT, ey); hdf5_status_check(status); status = H5Dclose(dset); hdf5_status_check(status); dset = H5Dcreate(group_id, "./electric_field_z", H5T_IEEE_F64BE, - filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); hdf5_status_check(dset); status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, - H5P_DEFAULT, ez); + H5P_DEFAULT, ez); hdf5_status_check(status); status = H5Dclose(dset); hdf5_status_check(status); @@ -541,45 +560,45 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { void SpatialMeshCu::grid_x_size_gt_zero(Config &conf) { check_and_exit_if_not(conf.mesh_config_part.grid_x_size > 0, - "grid_x_size < 0"); + "grid_x_size < 0"); } void SpatialMeshCu::grid_x_step_gt_zero_le_grid_x_size(Config &conf) { check_and_exit_if_not( - (conf.mesh_config_part.grid_x_step > 0) - && (conf.mesh_config_part.grid_x_step - <= conf.mesh_config_part.grid_x_size), - "grid_x_step < 0 or grid_x_step >= grid_x_size"); + (conf.mesh_config_part.grid_x_step > 0) + && (conf.mesh_config_part.grid_x_step + <= conf.mesh_config_part.grid_x_size), + "grid_x_step < 0 or grid_x_step >= grid_x_size"); } void SpatialMeshCu::grid_y_size_gt_zero(Config &conf) { check_and_exit_if_not(conf.mesh_config_part.grid_y_size > 0, - "grid_y_size < 0"); + "grid_y_size < 0"); } void SpatialMeshCu::grid_y_step_gt_zero_le_grid_y_size(Config &conf) { check_and_exit_if_not( - (conf.mesh_config_part.grid_y_step > 0) - && (conf.mesh_config_part.grid_y_step - <= conf.mesh_config_part.grid_y_size), - "grid_y_step < 0 or grid_y_step >= grid_y_size"); + (conf.mesh_config_part.grid_y_step > 0) + && (conf.mesh_config_part.grid_y_step + <= conf.mesh_config_part.grid_y_size), + "grid_y_step < 0 or grid_y_step >= grid_y_size"); } void SpatialMeshCu::grid_z_size_gt_zero(Config &conf) { check_and_exit_if_not(conf.mesh_config_part.grid_z_size > 0, - "grid_z_size < 0"); + "grid_z_size < 0"); } void SpatialMeshCu::grid_z_step_gt_zero_le_grid_z_size(Config &conf) { check_and_exit_if_not( - (conf.mesh_config_part.grid_z_step > 0) - && (conf.mesh_config_part.grid_z_step - <= conf.mesh_config_part.grid_z_size), - "grid_z_step < 0 or grid_z_step >= grid_z_size"); + (conf.mesh_config_part.grid_z_step > 0) + && (conf.mesh_config_part.grid_z_step + <= conf.mesh_config_part.grid_z_size), + "grid_z_step < 0 or grid_z_step >= grid_z_size"); } void SpatialMeshCu::check_and_exit_if_not(const bool &should_be, - const std::string &message) { + const std::string &message) { //if (!should_be) { // std::cout << "Error: " << message << std::endl; // exit(EXIT_FAILURE); @@ -590,9 +609,10 @@ void SpatialMeshCu::check_and_exit_if_not(const bool &should_be, double SpatialMeshCu::node_number_to_coordinate_x(int i) { if (i >= 0 && i < n_nodes.x) { return i * cell_size.x; - } else { + } + else { printf("invalid node number i=%d at node_number_to_coordinate_x\n", i); - exit (EXIT_FAILURE); + exit(EXIT_FAILURE); } return 0; } @@ -600,9 +620,10 @@ double SpatialMeshCu::node_number_to_coordinate_x(int i) { double SpatialMeshCu::node_number_to_coordinate_y(int j) { if (j >= 0 && j < n_nodes.y) { return j * cell_size.y; - } else { + } + else { printf("invalid node number j=%d at node_number_to_coordinate_y\n", j); - exit (EXIT_FAILURE); + exit(EXIT_FAILURE); } return 0; } @@ -610,21 +631,31 @@ double SpatialMeshCu::node_number_to_coordinate_y(int j) { double SpatialMeshCu::node_number_to_coordinate_z(int k) { if (k >= 0 && k < n_nodes.z) { return k * cell_size.z; - } else { + } + else { printf("invalid node number k=%d at node_number_to_coordinate_z\n", k); - exit (EXIT_FAILURE); + exit(EXIT_FAILURE); } return 0; } -void SpatialMeshCu::hdf5_status_check( herr_t status ) +void SpatialMeshCu::hdf5_status_check(herr_t status) { - if( status < 0 ){ - std::cout << "Something went wrong while writing Spatial_mesh group. Aborting." - << std::endl; - exit( EXIT_FAILURE ); - } + if (status < 0) { + std::cout << "Something went wrong while writing Spatial_mesh group. Aborting." + << std::endl; + exit(EXIT_FAILURE); + } } + +void SpatialMeshCu::cuda_status_check(cudaError_t status) +{ + if (status > 0) { + std::cout << "Cuda error: " << cudaGetErrorString(status) << std::endl; + exit(EXIT_FAILURE); + } +} + dim3 SpatialMeshCu::GetThreads() { return dim3(16, 16, n_nodes.z / 16); } @@ -632,9 +663,10 @@ dim3 SpatialMeshCu::GetThreads() { dim3 SpatialMeshCu::GetBlocks(dim3 nThreads) { return dim3(n_nodes.x / nThreads.x, n_nodes.y / nThreads.y, 16); } + SpatialMeshCu::~SpatialMeshCu() { - cudaFree((void*) dev_node_coordinates); - cudaFree((void*) dev_potential); - cudaFree((void*) dev_charge_density); - cudaFree((void*) dev_electric_field); + cudaFree((void*)dev_node_coordinates); + cudaFree((void*)dev_potential); + cudaFree((void*)dev_charge_density); + cudaFree((void*)dev_electric_field); } diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index 964fbbc..74900a6 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -21,7 +21,7 @@ public: SpatialMeshCu(Config &conf); SpatialMeshCu(hid_t h5_spat_mesh_group); void clear_old_density_values(); - void set_boundary_conditions(Config &conf); + void set_boundary_conditions(double* d_potential); bool is_potential_equal_on_boundaries(); void print(); void write_to_file(hid_t hdf5_file_id); @@ -29,6 +29,8 @@ public: double node_number_to_coordinate_x(int i); double node_number_to_coordinate_y(int j); double node_number_to_coordinate_z(int k); + dim3 GetThreads(); + dim3 GetBlocks(dim3 nThreads); private: // init void check_correctness_of_related_config_fields(Config &conf); @@ -36,9 +38,7 @@ private: void copy_constants_to_device(); void allocate_ongrid_values(); void fill_node_coordinates(); - void set_boundary_conditions(const double phi_left, const double phi_right, - const double phi_top, const double phi_bottom, - const double phi_near, const double phi_far); + // print void print_grid(); void print_ongrid_values(); @@ -56,7 +56,7 @@ private: void grid_z_size_gt_zero(Config &conf); void grid_z_step_gt_zero_le_grid_z_size(Config &conf); void check_and_exit_if_not(const bool &should_be, const std::string &message); + //cuda + void cuda_status_check(cudaError_t status); - dim3 GetThreads(); - dim3 GetBlocks(dim3 nThreads); }; From a7dabae75038b1d6c4be109fa036b9943a59a441 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Sun, 16 Dec 2018 13:29:51 +0700 Subject: [PATCH 07/83] cleaning --- FieldSolver.cu | 12 ++++++++---- FieldSolver.cuh | 4 ---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index 1bf02e5..f14766f 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -25,6 +25,10 @@ __device__ int GetIdxVolume_NoBorder() { (threadIdx.z * zStepThread + blockIdx.z * zStepBlock); } +__device__ double GradientComponent(double phi1, double phi2, double cell_side_size) { + return ((phi2 - phi1) / cell_side_size); +} + __global__ void SetPhiNextAsCurrent(double* d_phi_current, double* d_phi_next) { int idx = GetIdxVolume_NoBorder(); d_phi_current[idx] = d_phi_next[idx]; @@ -70,9 +74,6 @@ __global__ void ComputePhiNext(const double* d_phi_current, const double* d_char d_phi_next[idx] /= denom; } -__device__ double GradientComponent(double phi1, double phi2, double cell_side_size) { - return ((phi2 - phi1) / cell_side_size); -} __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_field) { int idx = GetIdxVolume_NoBorder(); @@ -116,6 +117,7 @@ __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_fiel dev_el_field[idx] = e; } + FieldSolver::FieldSolver(SpatialMeshCu &mesh, Inner_regions_manager &inner_regions) :mesh(mesh) { allocate_next_phi(); @@ -129,7 +131,9 @@ void FieldSolver::allocate_next_phi() cuda_status= cudaMalloc(&dev_phi_next, dim); } +void FieldSolver::init_constants() { +} void FieldSolver::eval_potential(Inner_regions_manager &inner_regions) { solve_poisson_eqn_Jacobi(inner_regions); @@ -151,7 +155,7 @@ void FieldSolver::solve_poisson_eqn_Jacobi(Inner_regions_manager &inner_regions) if (iter == max_Jacobi_iterations) { printf("WARING: potential evaluation did't converge after max iterations!\n"); } - //transfer_solution_to_mesh(); + set_phi_next_as_phi_current(); //return; } diff --git a/FieldSolver.cuh b/FieldSolver.cuh index 09788c0..37dc864 100644 --- a/FieldSolver.cuh +++ b/FieldSolver.cuh @@ -34,10 +34,6 @@ private: void set_phi_next_at_inner_regions(Inner_regions_manager &inner_regions); bool iterative_Jacobi_solutions_converged(); void set_phi_next_as_phi_current(); - void transfer_solution_to_spat_mesh(); - // Eval fields from potential - double boundary_difference(double phi1, double phi2, double dx); - double central_difference(double phi1, double phi2, double dx); }; #endif _FIELD_SOLVER_H_ From 6fd3adb56e9817d674e3437a754342c55fdff404 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Sun, 16 Dec 2018 18:27:08 +0700 Subject: [PATCH 08/83] fixes+ convergence --- FieldSolver.cu | 160 ++++++++++++++++++++++++++++------------------- FieldSolver.cuh | 6 +- SpatialMeshCu.cu | 2 +- 3 files changed, 103 insertions(+), 65 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index f14766f..36fae64 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -1,19 +1,20 @@ #include "FieldSolver.cuh" -#include "device_launch_parameters.h" -#include "math_functions.h" -#include "math_constants.h" + #define ABS_TOLERANCE = 1.0e-5; #define REL_TOLERANCE = 1.0e-12; -__constant__ double dxdxdydy[1]; -__constant__ double dxdxdzdz[1]; -__constant__ double dydydzdz[1]; -__constant__ double dxdxdydydzdz[1]; +__constant__ double3 d_cell_size[1]; +__constant__ int3 d_n_nodes[1]; + +__constant__ double dev_dxdxdydy[1]; +__constant__ double dev_dxdxdzdz[1]; +__constant__ double dev_dydydzdz[1]; +__constant__ double dev_dxdxdydydzdz[1]; -__constant__ int end[1]; +__constant__ int dev_end[1]; -__device__ int GetIdxVolume_NoBorder() { +__device__ int GetIdxVolume() { //int xStepthread = 1; int xStepBlock = blockDim.x; int yStepThread = d_n_nodes[0].x; @@ -30,55 +31,48 @@ __device__ double GradientComponent(double phi1, double phi2, double cell_side_s } __global__ void SetPhiNextAsCurrent(double* d_phi_current, double* d_phi_next) { - int idx = GetIdxVolume_NoBorder(); + int idx = GetIdxVolume(); d_phi_current[idx] = d_phi_next[idx]; } __global__ void ComputePhiNext(const double* d_phi_current, const double* d_charge, double* d_phi_next) { - int idx = GetIdxVolume_NoBorder(); + int idx = GetIdxVolume(); int offset_Dx = 1; //todo rewrite usind device n_nodes.x/y/z - int offset_Dy = blockDim.x * gridDim.x; - int offset_Dz = offset_Dy * blockDim.y * gridDim.y; + int offset_Dy = d_n_nodes[0].x; + int offset_Dz = d_n_nodes[0].x*d_n_nodes[0].y; int prev_neibhour_idx; int next_neibhour_idx; - //double dxdxdydy = mesh.volume_size.x * mesh.volume_size.x * - // mesh.volume_size.y * mesh.volume_size.y; - //double dxdxdzdz = mesh.volume_size.x * mesh.volume_size.x * - // mesh.volume_size.z * mesh.volume_size.z; - //double dydydzdz = mesh.volume_size.y * mesh.volume_size.y * - // mesh.volume_size.z * mesh.volume_size.z; - - //double dxdxdydydzdz = mesh.volume_size.x * mesh.volume_size.x * - // dy * dy * dz * dz; - double denom = (double)2* (dxdxdydy[0] + dxdxdzdz[0] + dydydzdz[0]); - //// - prev_neibhour_idx = max(idx + offset_Dx,0); - next_neibhour_idx = min(idx + offset_Dx,end[0]);//dirty : can be optimized for configs where n_nodes side equals (k*POT+2) + double denom = (double)2 * (dev_dxdxdydy[0] + dev_dxdxdzdz[0] + dev_dydydzdz[0]); + + prev_neibhour_idx = max(idx + offset_Dx, 0); + next_neibhour_idx = min(idx + offset_Dx, dev_end[0]); d_phi_next[idx] = - (d_phi_current[next_neibhour_idx] + d_phi_current[prev_neibhour_idx])*dydydzdz[0]; + (d_phi_current[next_neibhour_idx] + d_phi_current[prev_neibhour_idx])*dev_dydydzdz[0]; prev_neibhour_idx = max(idx + offset_Dy, 0); - next_neibhour_idx = min(idx + offset_Dy, end[0]); + next_neibhour_idx = min(idx + offset_Dy, dev_end[0]); d_phi_next[idx] += - (d_phi_current[next_neibhour_idx] + d_phi_current[prev_neibhour_idx])*dxdxdzdz[0]; + (d_phi_current[next_neibhour_idx] + d_phi_current[prev_neibhour_idx])*dev_dxdxdzdz[0]; prev_neibhour_idx = max(idx + offset_Dz, 0); - next_neibhour_idx = min(idx + offset_Dz, end[0]); + next_neibhour_idx = min(idx + offset_Dz, dev_end[0]); d_phi_next[idx] += - (d_phi_current[next_neibhour_idx] + d_phi_current[prev_neibhour_idx])*dxdxdydy[0]; + (d_phi_current[next_neibhour_idx] + d_phi_current[prev_neibhour_idx])*dev_dxdxdydy[0]; - d_phi_next[idx] += 4.0 * CUDART_PI * d_charge[idx] * dxdxdydydzdz[0]; + d_phi_next[idx] += 4.0 * CUDART_PI * d_charge[idx] * dev_dxdxdydydzdz[0]; d_phi_next[idx] /= denom; } __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_field) { - int idx = GetIdxVolume_NoBorder(); + int idx = GetIdxVolume(); double3 e = make_double3(0, 0, 0); + //assuming true=1, false =0 + //this method is hard to read due avoidance of if-else constructions on device code bool is_on_up_border; bool is_on_low_border; bool is_inside_borders; @@ -92,9 +86,9 @@ __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_fiel e.x = -(1 / (1 + is_inside_borders)) * GradientComponent( dev_potential[idx + (offset*is_on_up_border) - (offset*is_inside_borders)], dev_potential[idx - (offset*is_on_low_border) + (offset*is_inside_borders)], - dev_cell_size.x); + d_cell_size[0].x); - offset = d_n_nodex.x; + offset = d_n_nodes[0].x; is_on_up_border = ((threadIdx.x == 0) && (blockIdx.x == 0)); is_on_low_border = ((threadIdx.x == (blockDim.x - 1)) && (blockIdx.x == (gridDim.x - 1))); is_inside_borders = !(is_on_low_border || is_on_up_border); @@ -102,9 +96,9 @@ __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_fiel e.y = -(1 / (1 + is_inside_borders)) * GradientComponent( dev_potential[idx + (offset*is_on_up_border) - (offset*is_inside_borders)], dev_potential[idx - (offset*is_on_low_border) + (offset*is_inside_borders)], - dev_cell_size.y); + d_cell_size[0].y); - offset = d_n_nodes.y*d_n_nodes.x; + offset = d_n_nodes[0].y*d_n_nodes[0].x; is_on_up_border = ((threadIdx.x == 0) && (blockIdx.x == 0)); is_on_low_border = ((threadIdx.x == (blockDim.x - 1)) && (blockIdx.x == (gridDim.x - 1))); is_inside_borders = !(is_on_low_border || is_on_up_border); @@ -112,15 +106,26 @@ __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_fiel e.z = -(1 / (1 + is_inside_borders)) * GradientComponent( dev_potential[idx + (offset*is_on_up_border) - (offset*is_inside_borders)], dev_potential[idx - (offset*is_on_low_border) + (offset*is_inside_borders)], - dev_cell_size.z); + d_cell_size[0].z); dev_el_field[idx] = e; } +__global__ void AssertConvergence(const double* d_phi_current, const double* d_phi_next) { + double rel_diff; + double abs_diff; + int idx = GetIdxVolume(); + abs_diff = fabs(d_phi_next[idx] - d_phi_current[idx]); + rel_diff = abs_diff / fabs(d_phi_current[idx]); + + assert((abs_diff < ABS_TOLERANCE) || (rel_diff < REL_TOLERANCE)); +} + FieldSolver::FieldSolver(SpatialMeshCu &mesh, Inner_regions_manager &inner_regions) :mesh(mesh) { allocate_next_phi(); + copy_constants_to_device(); } void FieldSolver::allocate_next_phi() @@ -128,12 +133,44 @@ void FieldSolver::allocate_next_phi() size_t dim = mesh.n_nodes.x * mesh.n_nodes.y * mesh.n_nodes.z; cudaError_t cuda_status; - cuda_status= cudaMalloc(&dev_phi_next, dim); - + cuda_status = cudaMalloc(&dev_phi_next, dim); + } -void FieldSolver::init_constants() { +void FieldSolver::copy_constants_to_device() { + cudaError_t cuda_status; + + cuda_status = cudaMemcpyToSymbol(d_n_nodes, (void*)&mesh.n_nodes, sizeof(dim3), + cudaMemcpyHostToDevice); + cuda_status = cudaMemcpyToSymbol(d_cell_size, (void*)&mesh.cell_size, sizeof(double3), + cudaMemcpyHostToDevice); + + double dxdxdydy = mesh.cell_size.x*mesh.cell_size.x* + mesh.cell_size.y*mesh.cell_size.y; + cuda_status = cudaMemcpyToSymbol(dev_dxdxdydy, (void*)&dxdxdydy, sizeof(double), + cudaMemcpyHostToDevice); + + double dxdxdzdz = mesh.cell_size.x*mesh.cell_size.x* + mesh.cell_size.z*mesh.cell_size.z; + cuda_status = cudaMemcpyToSymbol(dev_dxdxdzdz, (void*)&dxdxdzdz, sizeof(double), + cudaMemcpyHostToDevice); + + double dydydzdz = mesh.cell_size.y*mesh.cell_size.y* + mesh.cell_size.z*mesh.cell_size.z; + cuda_status = cudaMemcpyToSymbol(dev_dydydzdz, (void*)&dydydzdz, sizeof(double), + cudaMemcpyHostToDevice); + + double dxdxdydydzdz = mesh.cell_size.x*mesh.cell_size.x* + mesh.cell_size.y*mesh.cell_size.y* + mesh.cell_size.z*mesh.cell_size.z; + cuda_status = cudaMemcpyToSymbol(dev_dxdxdydydzdz, (void*)&dxdxdydydzdz, sizeof(double), + cudaMemcpyHostToDevice); + + int end = mesh.n_nodes.x*mesh.n_nodes.y*mesh.n_nodes.z - 1; + cuda_status = cudaMemcpyToSymbol(dev_end, (void*)&end, sizeof(int), + cudaMemcpyHostToDevice); } + void FieldSolver::eval_potential(Inner_regions_manager &inner_regions) { solve_poisson_eqn_Jacobi(inner_regions); @@ -144,7 +181,6 @@ void FieldSolver::solve_poisson_eqn_Jacobi(Inner_regions_manager &inner_regions) max_Jacobi_iterations = 150; int iter; - //init_current_phi_from_mesh_phi(); for (iter = 0; iter < max_Jacobi_iterations; ++iter) { single_Jacobi_iteration(inner_regions); if (iterative_Jacobi_solutions_converged()) { @@ -159,6 +195,7 @@ void FieldSolver::solve_poisson_eqn_Jacobi(Inner_regions_manager &inner_regions) //return; } + void FieldSolver::single_Jacobi_iteration(Inner_regions_manager &inner_regions) { set_phi_next_at_boundaries(); @@ -177,7 +214,7 @@ void FieldSolver::compute_phi_next_at_inner_points() dim3 blocks = mesh.GetBlocks(threads); cudaError_t cuda_status; - ComputePhiNext<<>> (mesh.dev_potential, mesh.dev_charge_density, dev_phi_next); + ComputePhiNext << > > (mesh.dev_potential, mesh.dev_charge_density, dev_phi_next); cuda_status = cudaDeviceSynchronize(); } @@ -197,24 +234,19 @@ void FieldSolver::set_phi_next_at_inner_regions(Inner_regions_manager &inner_reg bool FieldSolver::iterative_Jacobi_solutions_converged() { //// todo: bind tol to config parameters - ////abs_tolerance = std::max( dx * dx, std::max( dy * dy, dz * dz ) ) / 5; - - //double diff; - //double rel_diff; - ////double tol; - //// - //for (int i = 0; i < nx; i++) { - // for (int j = 0; j < ny; j++) { - // for (int k = 0; k < nz; k++) { - // diff = fabs(phi_next[i][j][k] - phi_current[i][j][k]); - // rel_diff = diff / fabs(phi_current[i][j][k]); - // if (diff > abs_tolerance || rel_diff > rel_tolerance) { - // return false; - // } - // } - // } - //} - //return true; + cudaError_t status; + dim3 threads = mesh.GetThreads(); + dim3 blocks = mesh.GetBlocks(threads); + AssertConvergence << > > (mesh.dev_potential,dev_phi_next); + status = cudaDeviceSynchronize(); + if (status == cudaErrorAssert) { + return false; + } + if (status == cudaSuccess) { + return true; + } + + std::cout << "Cuda error: " << cudaGetErrorString(status) << std::endl; } @@ -223,7 +255,7 @@ void FieldSolver::set_phi_next_as_phi_current() dim3 threads = mesh.GetThreads(); dim3 blocks = mesh.GetBlocks(threads); cudaError_t cuda_status; - SetPhiNextAsCurrent <<>> (mesh.dev_potential, dev_phi_next); + SetPhiNextAsCurrent << > > (mesh.dev_potential, dev_phi_next); cuda_status = cudaDeviceSynchronize(); } @@ -234,7 +266,7 @@ void FieldSolver::eval_fields_from_potential() dim3 blocks = mesh.GetBlocks(threads); cudaError_t cuda_status; - EvaluateFields <<>> (mesh.dev_potential, mesh.dev_electric_field); + EvaluateFields << > > (mesh.dev_potential, mesh.dev_electric_field); cuda_status = cudaDeviceSynchronize(); return; @@ -247,4 +279,6 @@ FieldSolver::~FieldSolver() { // delete phi arrays? cudaFree((void*)dev_phi_next); + cudaFree((void*)d_n_nodes); + cudaFree((void*)d_cell_size); } \ No newline at end of file diff --git a/FieldSolver.cuh b/FieldSolver.cuh index 37dc864..c0a20a0 100644 --- a/FieldSolver.cuh +++ b/FieldSolver.cuh @@ -7,6 +7,10 @@ #include "inner_region.h" #include "cuda.h" #include "cuda_runtime.h" +#include +#include "device_launch_parameters.h" +#include "math_functions.h" +#include "math_constants.h" class FieldSolver { public: @@ -25,7 +29,7 @@ private: //boost::multi_array phi_current; //boost::multi_array phi_next; void allocate_next_phi(); - void init_constants(); + void copy_constants_to_device(); // Solve potential void solve_poisson_eqn_Jacobi(Inner_regions_manager &inner_regions); void single_Jacobi_iteration(Inner_regions_manager &inner_regions); diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 6706276..a4f094a 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -236,7 +236,7 @@ void SpatialMeshCu::copy_constants_to_device() { cudaMemcpyHostToDevice); cuda_status_check(cuda_status); - cuda_status = cudaMemcpyToSymbol(d_volume_size, (void*)&volume_size, sizeof(double3), + cuda_status = cudaMemcpyToSymbol(d_cell_size, (void*)&cell_size, sizeof(double3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status); From 9d3938cf62fc25dd4d56c51eb15738ea736d34eb Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Sun, 16 Dec 2018 19:59:34 +0700 Subject: [PATCH 09/83] Spatial mesh + Field solver realised on cuda particle charge map interaction with mesh excluded inner regions excluded --- FieldSolver.cu | 21 ++- FieldSolver.cuh | 8 +- Makefile | 5 +- SpatialMeshCu.cuh | 4 + domain.cpp | 10 +- domain.h | 8 +- field_solver.cpp | 230 ------------------------- field_solver.h | 44 ----- inner_region.cpp | 56 +++---- inner_region.h | 40 ++--- particle_to_mesh_map.cpp | 353 ++++++++++++++++++++------------------- particle_to_mesh_map.h | 12 +- 12 files changed, 262 insertions(+), 529 deletions(-) delete mode 100644 field_solver.cpp delete mode 100644 field_solver.h diff --git a/FieldSolver.cu b/FieldSolver.cu index 36fae64..051448c 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -1,8 +1,7 @@ #include "FieldSolver.cuh" -#define ABS_TOLERANCE = 1.0e-5; -#define REL_TOLERANCE = 1.0e-12; + __constant__ double3 d_cell_size[1]; __constant__ int3 d_n_nodes[1]; @@ -14,7 +13,7 @@ __constant__ double dev_dxdxdydydzdz[1]; __constant__ int dev_end[1]; -__device__ int GetIdxVolume() { +__device__ int GetIdx() { //int xStepthread = 1; int xStepBlock = blockDim.x; int yStepThread = d_n_nodes[0].x; @@ -31,12 +30,12 @@ __device__ double GradientComponent(double phi1, double phi2, double cell_side_s } __global__ void SetPhiNextAsCurrent(double* d_phi_current, double* d_phi_next) { - int idx = GetIdxVolume(); + int idx = GetIdx(); d_phi_current[idx] = d_phi_next[idx]; } __global__ void ComputePhiNext(const double* d_phi_current, const double* d_charge, double* d_phi_next) { - int idx = GetIdxVolume(); + int idx = GetIdx(); int offset_Dx = 1; //todo rewrite usind device n_nodes.x/y/z int offset_Dy = d_n_nodes[0].x; @@ -68,7 +67,7 @@ __global__ void ComputePhiNext(const double* d_phi_current, const double* d_char } __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_field) { - int idx = GetIdxVolume(); + int idx = GetIdx(); double3 e = make_double3(0, 0, 0); //assuming true=1, false =0 @@ -115,11 +114,14 @@ __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_fiel __global__ void AssertConvergence(const double* d_phi_current, const double* d_phi_next) { double rel_diff; double abs_diff; - int idx = GetIdxVolume(); + double abs_tolerance = 1.0e-5; + double rel_tolerance = 1.0e-12; + int idx = GetIdx(); abs_diff = fabs(d_phi_next[idx] - d_phi_current[idx]); rel_diff = abs_diff / fabs(d_phi_current[idx]); + bool converged =((abs_diff <= abs_tolerance) || (rel_diff <= rel_tolerance)); - assert((abs_diff < ABS_TOLERANCE) || (rel_diff < REL_TOLERANCE)); + assert(converged==true); } FieldSolver::FieldSolver(SpatialMeshCu &mesh, Inner_regions_manager &inner_regions) :mesh(mesh) @@ -247,6 +249,7 @@ bool FieldSolver::iterative_Jacobi_solutions_converged() } std::cout << "Cuda error: " << cudaGetErrorString(status) << std::endl; + return false; } @@ -281,4 +284,4 @@ FieldSolver::~FieldSolver() cudaFree((void*)dev_phi_next); cudaFree((void*)d_n_nodes); cudaFree((void*)d_cell_size); -} \ No newline at end of file +} diff --git a/FieldSolver.cuh b/FieldSolver.cuh index c0a20a0..4f19c1e 100644 --- a/FieldSolver.cuh +++ b/FieldSolver.cuh @@ -1,15 +1,13 @@ -#ifndef _FIELD_SOLVER_H_ -#define _FIELD_SOLVER_H_ +#ifndef _FIELD_SOLVER_CUH_ +#define _FIELD_SOLVER_CUH_ #include #include #include "SpatialMeshCu.cuh" #include "inner_region.h" -#include "cuda.h" #include "cuda_runtime.h" #include #include "device_launch_parameters.h" -#include "math_functions.h" #include "math_constants.h" class FieldSolver { @@ -40,4 +38,4 @@ private: void set_phi_next_as_phi_current(); }; -#endif _FIELD_SOLVER_H_ +#endif /*_FIELD_SOLVER_CUH_*/ diff --git a/Makefile b/Makefile index bbe0c69..2351dae 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,8 @@ WARNINGS=-Wall CFLAGS = ${HDF5FLAGS} -O2 -std=c++11 ${WARNINGS} LDFLAGS = -CUDAFLAGS= -I/usr/local/cuda10/include -std=c++11 -arch=sm_30 +CUDAINCLUDES= -I/usr/local/cuda10/include +CUDAFLAGS= ${CUDAINCLUDES} -std=c++11 -arch=sm_30 ### Libraries COMMONLIBS=-lm @@ -41,7 +42,7 @@ $(EXECUTABLE): $(OBJECTS) $(TINYEXPR) $(CUOBJECTS) $(CUOBJECTS):%.o:%.cu $(CUHEADERS) $(NVCC) $(CUDAFLAGS) -I/usr/local/hdf5/include -c $< -o $@ $(OBJECTS):%.o:%.cpp $(CPPHEADERS) - $(CC) $(CFLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(CUDAINCLUDES) -c $< -o $@ .PHONY: allsubdirs $(SUBDIRS) $(TINYEXPR) clean cleansubdirs cleanall diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index 74900a6..0dc568b 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -1,3 +1,6 @@ +#ifndef _SPATIAL_MESH_H_ +#define _SPATIAL_MESH_H_ + #include "cuda_runtime.h" #include "config.h" #include @@ -60,3 +63,4 @@ private: void cuda_status_check(cudaError_t status); }; +#endif /* _SPATIAL_MESH_H_ */ diff --git a/domain.cpp b/domain.cpp index aa0e63e..2af2e3e 100644 --- a/domain.cpp +++ b/domain.cpp @@ -111,8 +111,8 @@ void Domain::eval_charge_density() void Domain::eval_potential_and_fields() { - field_solver.eval_potential( spat_mesh, inner_regions ); - field_solver.eval_fields_from_potential( spat_mesh ); + field_solver.eval_potential(inner_regions ); + field_solver.eval_fields_from_potential(); return; } @@ -367,9 +367,9 @@ bool Domain::out_of_bound( const Particle &p ) bool out; out = - ( x >= spat_mesh.x_volume_size ) || ( x <= 0 ) || - ( y >= spat_mesh.y_volume_size ) || ( y <= 0 ) || - ( z >= spat_mesh.z_volume_size ) || ( z <= 0 ) ; + ( x >= spat_mesh.volume_size.x ) || ( x <= 0 ) || + ( y >= spat_mesh.volume_size.y ) || ( y <= 0 ) || + ( z >= spat_mesh.volume_size.z ) || ( z <= 0 ) ; return out; diff --git a/domain.h b/domain.h index 671abaf..3fbfc55 100644 --- a/domain.h +++ b/domain.h @@ -10,10 +10,10 @@ #include #include "config.h" #include "time_grid.h" -#include "spatial_mesh.h" +#include "SpatialMeshCu.cuh" #include "inner_region.h" #include "particle_to_mesh_map.h" -#include "field_solver.h" +#include "FieldSolver.cuh" #include "External_field.h" #include "particle_interaction_model.h" #include "particle_source.h" @@ -28,10 +28,10 @@ class Domain { //Domain() {}; public: Time_grid time_grid; - Spatial_mesh spat_mesh; + SpatialMeshCu spat_mesh; Inner_regions_manager inner_regions; Particle_to_mesh_map particle_to_mesh_map; - Field_solver field_solver; + FieldSolver field_solver; Particle_sources_manager particle_sources; External_fields_manager external_fields; Particle_interaction_model particle_interaction_model; diff --git a/field_solver.cpp b/field_solver.cpp deleted file mode 100644 index 58821d9..0000000 --- a/field_solver.cpp +++ /dev/null @@ -1,230 +0,0 @@ -#include "field_solver.h" - -Field_solver::Field_solver( Spatial_mesh &spat_mesh, - Inner_regions_manager &inner_regions ) -{ - nx = spat_mesh.x_n_nodes; - ny = spat_mesh.y_n_nodes; - nz = spat_mesh.z_n_nodes; - dx = spat_mesh.x_cell_size; - dy = spat_mesh.y_cell_size; - dz = spat_mesh.z_cell_size; - - allocate_current_next_phi(); -} - -void Field_solver::allocate_current_next_phi() -{ - phi_current.resize( boost::extents[nx][ny][nz] ); - phi_next.resize( boost::extents[nx][ny][nz] ); -} - -void Field_solver::eval_potential( Spatial_mesh &spat_mesh, - Inner_regions_manager &inner_regions ) -{ - solve_poisson_eqn_Jacobi( spat_mesh, inner_regions ); -} - -void Field_solver::solve_poisson_eqn_Jacobi( Spatial_mesh &spat_mesh, - Inner_regions_manager &inner_regions ) -{ - max_Jacobi_iterations = 150; - int iter; - - init_current_phi_from_spat_mesh_phi( spat_mesh ); - for( iter = 0; iter < max_Jacobi_iterations; ++iter ){ - single_Jacobi_iteration( spat_mesh, inner_regions ); - if ( iterative_Jacobi_solutions_converged() ) { - break; - } - set_phi_next_as_phi_current(); - } - if ( iter == max_Jacobi_iterations ){ - printf("WARING: potential evaluation did't converge after max iterations!\n"); - } - transfer_solution_to_spat_mesh( spat_mesh ); - - return; -} - - -void Field_solver::init_current_phi_from_spat_mesh_phi( Spatial_mesh &spat_mesh ) -{ - phi_current.assign( spat_mesh.potential.data(), - spat_mesh.potential.data() + spat_mesh.potential.num_elements() ); - return; -} - -void Field_solver::single_Jacobi_iteration( Spatial_mesh &spat_mesh, - Inner_regions_manager &inner_regions ) -{ - set_phi_next_at_boundaries(); - compute_phi_next_at_inner_points( spat_mesh ); - set_phi_next_at_inner_regions( inner_regions ); -} - -void Field_solver::set_phi_next_at_boundaries() -{ - for ( int j = 0; j < ny; j++ ) { - for ( int k = 0; k < nz; k++ ) { - phi_next[0][j][k] = phi_current[0][j][k]; - phi_next[nx-1][j][k] = phi_current[nx-1][j][k]; - } - } - // - for ( int i = 0; i < nx; i++ ) { - for ( int k = 0; k < nz; k++ ) { - phi_next[i][0][k] = phi_current[i][0][k]; - phi_next[i][ny-1][k] = phi_current[i][ny-1][k]; - } - } - // - for ( int i = 0; i < nx; i++ ) { - for ( int j = 0; j < ny; j++ ) { - phi_next[i][j][0] = phi_current[i][j][0]; - phi_next[i][j][nz-1] = phi_current[i][j][nz-1]; - } - } -} - -void Field_solver::compute_phi_next_at_inner_points( Spatial_mesh &spat_mesh ) -{ - double dxdxdydy = dx * dx * dy * dy; - double dxdxdzdz = dx * dx * dz * dz; - double dydydzdz = dy * dy * dz * dz; - double dxdxdydydzdz = dx * dx * dy * dy * dz * dz; - double denom = 2 * ( dxdxdydy + dxdxdzdz + dydydzdz ); - // - for ( int i = 1; i < nx - 1; i++ ) { - for ( int j = 1; j < ny - 1; j++ ) { - for ( int k = 1; k < nz - 1; k++ ) { - phi_next[i][j][k] = - ( phi_current[i-1][j][k] + phi_current[i+1][j][k] ) * dydydzdz; - phi_next[i][j][k] = phi_next[i][j][k] + - ( phi_current[i][j-1][k] + phi_current[i][j+1][k] ) * dxdxdzdz; - phi_next[i][j][k] = phi_next[i][j][k] + - ( phi_current[i][j][k-1] + phi_current[i][j][k+1] ) * dxdxdydy; - // Delta phi = - 4 * pi * rho - phi_next[i][j][k] = phi_next[i][j][k] + - 4.0 * M_PI * spat_mesh.charge_density[i][j][k] * dxdxdydydzdz; - phi_next[i][j][k] = phi_next[i][j][k] / denom; - } - } - } -} - -void Field_solver::set_phi_next_at_inner_regions( Inner_regions_manager &inner_regions ) -{ - for( auto ® : inner_regions.regions ){ - for( auto &node : reg.inner_nodes ){ - // todo: mark nodes at edge during construction - // if (!node.at_domain_edge( nx, ny, nz )) { - phi_next[node.x][node.y][node.z] = reg.potential; - // } - } - } -} - - -bool Field_solver::iterative_Jacobi_solutions_converged() -{ - // todo: bind tol to config parameters - //abs_tolerance = std::max( dx * dx, std::max( dy * dy, dz * dz ) ) / 5; - abs_tolerance = 1.0e-5; - rel_tolerance = 1.0e-12; - double diff; - double rel_diff; - //double tol; - // - for ( int i = 0; i < nx; i++ ) { - for ( int j = 0; j < ny; j++ ) { - for ( int k = 0; k < nz; k++ ) { - diff = fabs( phi_next[i][j][k] - phi_current[i][j][k] ); - rel_diff = diff / fabs( phi_current[i][j][k] ); - if ( diff > abs_tolerance || rel_diff > rel_tolerance ){ - return false; - } - } - } - } - return true; -} - - -void Field_solver::set_phi_next_as_phi_current() -{ - // Looks like straightforward assignment - // phi_next = phi_current - // would result in copy. - // Hopefully, it could be avoided with std::swap - std::swap( phi_current, phi_next ); -} - -void Field_solver::transfer_solution_to_spat_mesh( Spatial_mesh &spat_mesh ) -{ - spat_mesh.potential.assign( phi_next.data(), - phi_next.data() + phi_next.num_elements() ); -} - - -void Field_solver::eval_fields_from_potential( Spatial_mesh &spat_mesh ) -{ - int nx = spat_mesh.x_n_nodes; - int ny = spat_mesh.y_n_nodes; - int nz = spat_mesh.z_n_nodes; - double dx = spat_mesh.x_cell_size; - double dy = spat_mesh.y_cell_size; - double dz = spat_mesh.z_cell_size; - boost::multi_array &phi = spat_mesh.potential; - double ex, ey, ez; - // - for ( int i = 0; i < nx; i++ ) { - for ( int j = 0; j < ny; j++ ) { - for ( int k = 0; k < nz; k++ ) { - if ( i == 0 ) { - ex = - boundary_difference( phi[i][j][k], phi[i+1][j][k], dx ); - } else if ( i == nx-1 ) { - ex = - boundary_difference( phi[i-1][j][k], phi[i][j][k], dx ); - } else { - ex = - central_difference( phi[i-1][j][k], phi[i+1][j][k], dx ); - } - - if ( j == 0 ) { - ey = - boundary_difference( phi[i][j][k], phi[i][j+1][k], dy ); - } else if ( j == ny-1 ) { - ey = - boundary_difference( phi[i][j-1][k], phi[i][j][k], dy ); - } else { - ey = - central_difference( phi[i][j-1][k], phi[i][j+1][k], dy ); - } - - if ( k == 0 ) { - ez = - boundary_difference( phi[i][j][k], phi[i][j][k+1], dz ); - } else if ( k == nz-1 ) { - ez = - boundary_difference( phi[i][j][k-1], phi[i][j][k], dz ); - } else { - ez = - central_difference( phi[i][j][k-1], phi[i][j][k+1], dz ); - } - - spat_mesh.electric_field[i][j][k] = vec3d_init( ex, ey, ez ); - } - } - } - - return; -} - -double Field_solver::central_difference( double phi1, double phi2, double dx ) -{ - return ( (phi2 - phi1) / ( 2.0 * dx ) ); -} - -double Field_solver::boundary_difference( double phi1, double phi2, double dx ) -{ - return ( (phi2 - phi1) / dx ); -} - - -Field_solver::~Field_solver() -{ - // delete phi arrays? -} diff --git a/field_solver.h b/field_solver.h deleted file mode 100644 index 8285b53..0000000 --- a/field_solver.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef _FIELD_SOLVER_H_ -#define _FIELD_SOLVER_H_ - -#include -#include -#include -#include "spatial_mesh.h" -#include "inner_region.h" - - -class Field_solver { - public: - Field_solver( Spatial_mesh &spat_mesh, Inner_regions_manager &inner_regions ); - void eval_potential( Spatial_mesh &spat_mesh, Inner_regions_manager &inner_regions ); - void eval_fields_from_potential( Spatial_mesh &spat_mesh ); - virtual ~Field_solver(); - private: - int nx, ny, nz; - double dx, dy, dz; - private: - int max_Jacobi_iterations; - double rel_tolerance; - double abs_tolerance; - boost::multi_array phi_current; - boost::multi_array phi_next; - void allocate_current_next_phi(); - // Solve potential - void solve_poisson_eqn_Jacobi( Spatial_mesh &spat_mesh, - Inner_regions_manager &inner_regions ); - void init_current_phi_from_spat_mesh_phi( Spatial_mesh &spat_mesh ); - void single_Jacobi_iteration( Spatial_mesh &spat_mesh, - Inner_regions_manager &inner_regions ); - void set_phi_next_at_boundaries(); - void compute_phi_next_at_inner_points( Spatial_mesh &spat_mesh ); - void set_phi_next_at_inner_regions( Inner_regions_manager &inner_regions ); - bool iterative_Jacobi_solutions_converged(); - void set_phi_next_as_phi_current(); - void transfer_solution_to_spat_mesh( Spatial_mesh &spat_mesh ); - // Eval fields from potential - double boundary_difference( double phi1, double phi2, double dx ); - double central_difference( double phi1, double phi2, double dx ); -}; - -#endif /* _FIELD_SOLVER_H_ */ diff --git a/inner_region.cpp b/inner_region.cpp index c5207bf..1a8a1d2 100644 --- a/inner_region.cpp +++ b/inner_region.cpp @@ -80,11 +80,11 @@ bool Inner_region::check_if_node_inside( Node_reference &node, return check_if_point_inside( node.x * dx, node.y * dy, node.z * dz ); } -void Inner_region::mark_inner_nodes( Spatial_mesh &spat_mesh ) +void Inner_region::mark_inner_nodes( SpatialMeshCu &spat_mesh ) { - int nx = spat_mesh.x_n_nodes; - int ny = spat_mesh.y_n_nodes; - int nz = spat_mesh.z_n_nodes; + int nx = spat_mesh.n_nodes.x; + int ny = spat_mesh.n_nodes.y; + int nz = spat_mesh.n_nodes.z; for ( int k = 0; k < nz; k++ ) { for ( int j = 0; j < ny; j++ ) { @@ -99,11 +99,11 @@ void Inner_region::mark_inner_nodes( Spatial_mesh &spat_mesh ) } } -void Inner_region::select_inner_nodes_not_at_domain_edge( Spatial_mesh &spat_mesh ) +void Inner_region::select_inner_nodes_not_at_domain_edge( SpatialMeshCu &spat_mesh ) { - int nx = spat_mesh.x_n_nodes; - int ny = spat_mesh.y_n_nodes; - int nz = spat_mesh.z_n_nodes; + int nx = spat_mesh.n_nodes.x; + int ny = spat_mesh.n_nodes.y; + int nz = spat_mesh.n_nodes.z; inner_nodes_not_at_domain_edge.reserve( inner_nodes.size() ); @@ -114,11 +114,11 @@ void Inner_region::select_inner_nodes_not_at_domain_edge( Spatial_mesh &spat_mes } } -void Inner_region::mark_near_boundary_nodes( Spatial_mesh &spat_mesh ) +void Inner_region::mark_near_boundary_nodes( SpatialMeshCu &spat_mesh ) { - int nx = spat_mesh.x_n_nodes; - int ny = spat_mesh.y_n_nodes; - int nz = spat_mesh.z_n_nodes; + int nx = spat_mesh.n_nodes.x; + int ny = spat_mesh.n_nodes.y; + int nz = spat_mesh.n_nodes.z; // rewrite; for( auto &node : inner_nodes ){ @@ -139,13 +139,13 @@ void Inner_region::mark_near_boundary_nodes( Spatial_mesh &spat_mesh ) near_boundary_nodes.end() ); } -void Inner_region::select_near_boundary_nodes_not_at_domain_edge( Spatial_mesh &spat_mesh ) +void Inner_region::select_near_boundary_nodes_not_at_domain_edge( SpatialMeshCu &spat_mesh ) { // todo: repeats with select_inner_nodes_not_at_domain_edge; // remove code duplication - int nx = spat_mesh.x_n_nodes; - int ny = spat_mesh.y_n_nodes; - int nz = spat_mesh.z_n_nodes; + int nx = spat_mesh.n_nodes.x; + int ny = spat_mesh.n_nodes.y; + int nz = spat_mesh.n_nodes.z; near_boundary_nodes_not_at_domain_edge.reserve( near_boundary_nodes.size() ); @@ -217,7 +217,7 @@ void Inner_region::hdf5_status_check( herr_t status ) Inner_region_box::Inner_region_box( Config &conf, Inner_region_box_config_part &inner_region_box_conf, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( conf, inner_region_box_conf ) { object_type = "box"; @@ -231,7 +231,7 @@ Inner_region_box::Inner_region_box( Inner_region_box::Inner_region_box( hid_t h5_inner_region_box_group_id, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( h5_inner_region_box_group_id ) { object_type = "box"; @@ -336,7 +336,7 @@ void Inner_region_box::write_hdf5_region_specific_parameters( Inner_region_sphere::Inner_region_sphere( Config &conf, Inner_region_sphere_config_part &inner_region_sphere_conf, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( conf, inner_region_sphere_conf ) { object_type = "sphere"; @@ -351,7 +351,7 @@ Inner_region_sphere::Inner_region_sphere( Inner_region_sphere::Inner_region_sphere( hid_t h5_inner_region_sphere_group_id, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( h5_inner_region_sphere_group_id ) { object_type = "sphere"; @@ -442,7 +442,7 @@ void Inner_region_sphere::write_hdf5_region_specific_parameters( Inner_region_cylinder::Inner_region_cylinder( Config &conf, Inner_region_cylinder_config_part &inner_region_cylinder_conf, - Spatial_mesh &spat_mesh ) + SpatialMeshCu &spat_mesh ) : Inner_region( conf, inner_region_cylinder_conf ) { object_type = "cylinder"; @@ -456,7 +456,7 @@ Inner_region_cylinder::Inner_region_cylinder( Inner_region_cylinder::Inner_region_cylinder( hid_t h5_inner_region_cylinder_group_id, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( h5_inner_region_cylinder_group_id ) { object_type = "cylinder"; @@ -581,7 +581,7 @@ void Inner_region_cylinder::write_hdf5_region_specific_parameters( Inner_region_tube::Inner_region_tube( Config &conf, Inner_region_tube_config_part &inner_region_tube_conf, - Spatial_mesh &spat_mesh ) + SpatialMeshCu &spat_mesh ) : Inner_region( conf, inner_region_tube_conf ) { object_type = "tube"; @@ -595,7 +595,7 @@ Inner_region_tube::Inner_region_tube( Inner_region_tube::Inner_region_tube( hid_t h5_inner_region_tube_group_id, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( h5_inner_region_tube_group_id ) { object_type = "tube"; @@ -728,7 +728,7 @@ void Inner_region_tube::write_hdf5_region_specific_parameters( Inner_region_tube_along_z_segment::Inner_region_tube_along_z_segment( Config &conf, Inner_region_tube_along_z_segment_config_part &inner_region_tube_along_z_segment_conf, - Spatial_mesh &spat_mesh ) + SpatialMeshCu &spat_mesh ) : Inner_region( conf, inner_region_tube_along_z_segment_conf ) { object_type = "tube_along_z_segment"; @@ -744,7 +744,7 @@ Inner_region_tube_along_z_segment::Inner_region_tube_along_z_segment( Inner_region_tube_along_z_segment::Inner_region_tube_along_z_segment( hid_t h5_inner_region_tube_along_z_segment_group_id, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( h5_inner_region_tube_along_z_segment_group_id ) { object_type = "tube_along_z_segment"; @@ -893,7 +893,7 @@ void Inner_region_tube_along_z_segment::write_hdf5_region_specific_parameters( Inner_region_cone_along_z::Inner_region_cone_along_z( Config &conf, Inner_region_cone_along_z_config_part &inner_region_cone_along_z_conf, - Spatial_mesh &spat_mesh ) + SpatialMeshCu &spat_mesh ) : Inner_region( conf, inner_region_cone_along_z_conf ) { object_type = "cone_along_z"; @@ -907,7 +907,7 @@ Inner_region_cone_along_z::Inner_region_cone_along_z( Inner_region_cone_along_z::Inner_region_cone_along_z( hid_t h5_inner_region_cone_along_z_group_id, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( h5_inner_region_cone_along_z_group_id ) { object_type = "cone_along_z"; diff --git a/inner_region.h b/inner_region.h index ca5b7bc..77dcaa6 100644 --- a/inner_region.h +++ b/inner_region.h @@ -9,7 +9,7 @@ #include #include #include "config.h" -#include "spatial_mesh.h" +#include "SpatialMeshCu.cuh" #include "node_reference.h" #include "particle.h" #include "vec3d.h" @@ -55,10 +55,10 @@ class Inner_region{ void write_to_file( hid_t regions_group_id ); void hdf5_status_check( herr_t status ); protected: - void mark_inner_nodes( Spatial_mesh &spat_mesh ); - void select_inner_nodes_not_at_domain_edge( Spatial_mesh &spat_mesh ); - void mark_near_boundary_nodes( Spatial_mesh &spat_mesh ); - void select_near_boundary_nodes_not_at_domain_edge( Spatial_mesh &spat_mesh ); + void mark_inner_nodes( SpatialMeshCu &spat_mesh ); + void select_inner_nodes_not_at_domain_edge( SpatialMeshCu &spat_mesh ); + void mark_near_boundary_nodes( SpatialMeshCu &spat_mesh ); + void select_near_boundary_nodes_not_at_domain_edge( SpatialMeshCu &spat_mesh ); void write_hdf5_common_parameters( hid_t current_region_group_id ); virtual void write_hdf5_region_specific_parameters( hid_t current_region_group_id ) = 0; @@ -84,9 +84,9 @@ class Inner_region_box : public Inner_region{ public: Inner_region_box( Config &conf, Inner_region_box_config_part &inner_region_conf, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); Inner_region_box( hid_t h5_inner_region_box_group_id, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); virtual ~Inner_region_box() {}; void print() { std::cout << "Inner region: name = " << name << std::endl; @@ -122,9 +122,9 @@ class Inner_region_sphere : public Inner_region{ Inner_region_sphere( Config &conf, Inner_region_sphere_config_part &inner_region_conf, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); Inner_region_sphere( hid_t h5_inner_region_group_id, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); virtual ~Inner_region_sphere() {}; void print() { std::cout << "Inner region: name = " << name << std::endl; @@ -161,9 +161,9 @@ class Inner_region_cylinder : public Inner_region{ Inner_region_cylinder( Config &conf, Inner_region_cylinder_config_part &inner_region_conf, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); Inner_region_cylinder( hid_t h5_inner_region_group_id, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); virtual ~Inner_region_cylinder() {}; void print() { std::cout << "Inner region: name = " << name << std::endl; @@ -205,9 +205,9 @@ class Inner_region_tube : public Inner_region{ Inner_region_tube( Config &conf, Inner_region_tube_config_part &inner_region_conf, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); Inner_region_tube( hid_t h5_inner_region_group_id, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); virtual ~Inner_region_tube() {}; void print() { std::cout << "Inner region: name = " << name << std::endl; @@ -249,9 +249,9 @@ class Inner_region_tube_along_z_segment : public Inner_region{ Inner_region_tube_along_z_segment( Config &conf, Inner_region_tube_along_z_segment_config_part &inner_region_conf, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); Inner_region_tube_along_z_segment( hid_t h5_inner_region_group_id, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); virtual ~Inner_region_tube_along_z_segment() {}; void print() { std::cout << "Inner region: name = " << name << std::endl; @@ -294,9 +294,9 @@ class Inner_region_cone_along_z : public Inner_region{ Inner_region_cone_along_z( Config &conf, Inner_region_cone_along_z_config_part &inner_region_conf, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); Inner_region_cone_along_z( hid_t h5_inner_region_group_id, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); virtual ~Inner_region_cone_along_z() {}; void print() { std::cout << "Inner region: name = " << name << std::endl; @@ -336,7 +336,7 @@ class Inner_regions_manager{ public: boost::ptr_vector regions; public: - Inner_regions_manager( Config &conf, Spatial_mesh &spat_mesh ) + Inner_regions_manager( Config &conf, SpatialMeshCu &spat_mesh ) { for( auto &inner_region_conf : conf.inner_regions_config_part ){ if( Inner_region_box_config_part *box_conf = @@ -383,7 +383,7 @@ class Inner_regions_manager{ } Inner_regions_manager( hid_t h5_inner_region_group, - Spatial_mesh &spat_mesh ) + SpatialMeshCu &spat_mesh ) { hsize_t nobj; ssize_t len; @@ -418,7 +418,7 @@ class Inner_regions_manager{ } void parse_hdf5_inner_reg( hid_t current_ir_grpid, - Spatial_mesh &spat_mesh ) + SpatialMeshCu &spat_mesh ) { herr_t status; char object_type_cstr[50]; diff --git a/particle_to_mesh_map.cpp b/particle_to_mesh_map.cpp index cc2487b..f134e6a 100644 --- a/particle_to_mesh_map.cpp +++ b/particle_to_mesh_map.cpp @@ -1,204 +1,205 @@ #include "particle_to_mesh_map.h" // Eval charge density on grid -void Particle_to_mesh_map::weight_particles_charge_to_mesh( - Spatial_mesh &spat_mesh, Particle_sources_manager &particle_sources ) +void Particle_to_mesh_map::weight_particles_charge_to_mesh( + SpatialMeshCu &spat_mesh, Particle_sources_manager &particle_sources ) { // Rewrite: // forall particles { // find nonzero weights and corresponding nodes // charge[node] = weight(particle, node) * particle.charge // } - double dx = spat_mesh.x_cell_size; - double dy = spat_mesh.y_cell_size; - double dz = spat_mesh.z_cell_size; - double cell_volume = dx * dy * dz; - double volume_around_node = cell_volume; - int tlf_i, tlf_j, tlf_k; // 'tlf' = 'top_left_far' - double tlf_x_weight, tlf_y_weight, tlf_z_weight; +// double dx = spat_mesh.cell_size.x; +// double dy = spat_mesh.cell_size.y; +// double dz = spat_mesh.cell_size.z; +// double cell_volume = dx * dy * dz; +// double volume_around_node = cell_volume; +// int tlf_i, tlf_j, tlf_k; // 'tlf' = 'top_left_far' +// double tlf_x_weight, tlf_y_weight, tlf_z_weight; - for( auto& part_src: particle_sources.sources ) { - for( auto& p : part_src.particles ) { - next_node_num_and_weight( vec3d_x( p.position ), dx, &tlf_i, &tlf_x_weight ); - next_node_num_and_weight( vec3d_y( p.position ), dy, &tlf_j, &tlf_y_weight ); - next_node_num_and_weight( vec3d_z( p.position ), dz, &tlf_k, &tlf_z_weight ); - spat_mesh.charge_density[tlf_i][tlf_j][tlf_k] += - tlf_x_weight * tlf_y_weight * tlf_z_weight - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i-1][tlf_j][tlf_k] += - ( 1.0 - tlf_x_weight ) * tlf_y_weight * tlf_z_weight - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i][tlf_j-1][tlf_k] += - tlf_x_weight * ( 1.0 - tlf_y_weight ) * tlf_z_weight - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i-1][tlf_j-1][tlf_k] += - ( 1.0 - tlf_x_weight ) * ( 1.0 - tlf_y_weight ) * tlf_z_weight - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i][tlf_j][tlf_k - 1] += - tlf_x_weight * tlf_y_weight * ( 1.0 - tlf_z_weight ) - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i-1][tlf_j][tlf_k - 1] += - ( 1.0 - tlf_x_weight ) * tlf_y_weight * ( 1.0 - tlf_z_weight ) - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i][tlf_j-1][tlf_k - 1] += - tlf_x_weight * ( 1.0 - tlf_y_weight ) * ( 1.0 - tlf_z_weight ) - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i-1][tlf_j-1][tlf_k - 1] += - ( 1.0 - tlf_x_weight ) * ( 1.0 - tlf_y_weight ) * ( 1.0 - tlf_z_weight ) - * p.charge / volume_around_node; - } - } +// for( auto& part_src: particle_sources.sources ) { +// for( auto& p : part_src.particles ) { +// next_node_num_and_weight( vec3d_x( p.position ), dx, &tlf_i, &tlf_x_weight ); +// next_node_num_and_weight( vec3d_y( p.position ), dy, &tlf_j, &tlf_y_weight ); +// next_node_num_and_weight( vec3d_z( p.position ), dz, &tlf_k, &tlf_z_weight ); +// spat_mesh.charge_density[tlf_i][tlf_j][tlf_k] += +// tlf_x_weight * tlf_y_weight * tlf_z_weight +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i-1][tlf_j][tlf_k] += +// ( 1.0 - tlf_x_weight ) * tlf_y_weight * tlf_z_weight +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i][tlf_j-1][tlf_k] += +// tlf_x_weight * ( 1.0 - tlf_y_weight ) * tlf_z_weight +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i-1][tlf_j-1][tlf_k] += +// ( 1.0 - tlf_x_weight ) * ( 1.0 - tlf_y_weight ) * tlf_z_weight +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i][tlf_j][tlf_k - 1] += +// tlf_x_weight * tlf_y_weight * ( 1.0 - tlf_z_weight ) +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i-1][tlf_j][tlf_k - 1] += +// ( 1.0 - tlf_x_weight ) * tlf_y_weight * ( 1.0 - tlf_z_weight ) +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i][tlf_j-1][tlf_k - 1] += +// tlf_x_weight * ( 1.0 - tlf_y_weight ) * ( 1.0 - tlf_z_weight ) +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i-1][tlf_j-1][tlf_k - 1] += +// ( 1.0 - tlf_x_weight ) * ( 1.0 - tlf_y_weight ) * ( 1.0 - tlf_z_weight ) +// * p.charge / volume_around_node; +// } +// } return; } -Vec3d Particle_to_mesh_map::field_at_particle_position( - Spatial_mesh &spat_mesh, Particle &p ) +Vec3d Particle_to_mesh_map::field_at_particle_position( + SpatialMeshCu &spat_mesh, Particle &p ) { - double dx = spat_mesh.x_cell_size; - double dy = spat_mesh.y_cell_size; - double dz = spat_mesh.z_cell_size; - int tlf_i, tlf_j, tlf_k; // 'tlf' = 'top_left_far' - double tlf_x_weight, tlf_y_weight, tlf_z_weight; +// double dx = spat_mesh.x_cell_size; +// double dy = spat_mesh.y_cell_size; +// double dz = spat_mesh.z_cell_size; +// int tlf_i, tlf_j, tlf_k; // 'tlf' = 'top_left_far' +// double tlf_x_weight, tlf_y_weight, tlf_z_weight; Vec3d field_from_node, total_field; - // - next_node_num_and_weight( vec3d_x( p.position ), dx, &tlf_i, &tlf_x_weight ); - next_node_num_and_weight( vec3d_y( p.position ), dy, &tlf_j, &tlf_y_weight ); - next_node_num_and_weight( vec3d_z( p.position ), dz, &tlf_k, &tlf_z_weight ); - // tlf +// // +// next_node_num_and_weight( vec3d_x( p.position ), dx, &tlf_i, &tlf_x_weight ); +// next_node_num_and_weight( vec3d_y( p.position ), dy, &tlf_j, &tlf_y_weight ); +// next_node_num_and_weight( vec3d_z( p.position ), dz, &tlf_k, &tlf_z_weight ); +// // tlf total_field = vec3d_zero(); - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j][tlf_k], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // trf - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // blf - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // brf - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // tln - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j][tlf_k-1], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // trn - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k-1], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // bln - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k-1], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // brn - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k-1], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j][tlf_k], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // trf +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // blf +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // brf +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // tln +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j][tlf_k-1], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // trn +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k-1], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // bln +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k-1], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // brn +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k-1], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); // return total_field; } -Vec3d Particle_to_mesh_map::force_on_particle( - Spatial_mesh &spat_mesh, Particle &p ) +Vec3d Particle_to_mesh_map::force_on_particle( + SpatialMeshCu &spat_mesh, Particle &p ) { - double dx = spat_mesh.x_cell_size; - double dy = spat_mesh.y_cell_size; - double dz = spat_mesh.z_cell_size; - int tlf_i, tlf_j, tlf_k; // 'tlf' = 'top_left_far' - double tlf_x_weight, tlf_y_weight, tlf_z_weight; +// double dx = spat_mesh.x_cell_size; +// double dy = spat_mesh.y_cell_size; +// double dz = spat_mesh.z_cell_size; +// int tlf_i, tlf_j, tlf_k; // 'tlf' = 'top_left_far' +// double tlf_x_weight, tlf_y_weight, tlf_z_weight; Vec3d field_from_node, total_field, force; // - next_node_num_and_weight( vec3d_x( p.position ), dx, &tlf_i, &tlf_x_weight ); - next_node_num_and_weight( vec3d_y( p.position ), dy, &tlf_j, &tlf_y_weight ); - next_node_num_and_weight( vec3d_z( p.position ), dz, &tlf_k, &tlf_z_weight ); - // tlf +// next_node_num_and_weight( vec3d_x( p.position ), dx, &tlf_i, &tlf_x_weight ); +// next_node_num_and_weight( vec3d_y( p.position ), dy, &tlf_j, &tlf_y_weight ); +// next_node_num_and_weight( vec3d_z( p.position ), dz, &tlf_k, &tlf_z_weight ); +// // tlf total_field = vec3d_zero(); - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j][tlf_k], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // trf - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // blf - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // brf - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // tln - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j][tlf_k-1], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // trn - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k-1], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // bln - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k-1], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // brn - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k-1], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // - force = vec3d_times_scalar( total_field, p.charge ); - return force; +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j][tlf_k], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // trf +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // blf +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // brf +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // tln +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j][tlf_k-1], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // trn +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k-1], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // bln +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k-1], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // brn +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k-1], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // +// force = vec3d_times_scalar( total_field, p.charge ); +// return force; + return total_field; } void Particle_to_mesh_map::next_node_num_and_weight( diff --git a/particle_to_mesh_map.h b/particle_to_mesh_map.h index 7ad78a4..ca8441b 100644 --- a/particle_to_mesh_map.h +++ b/particle_to_mesh_map.h @@ -1,20 +1,20 @@ -#include "spatial_mesh.h" +#include "SpatialMeshCu.cuh" #include "particle_source.h" #include "particle.h" #include "vec3d.h" class Particle_to_mesh_map { - public: + public: Particle_to_mesh_map() {}; virtual ~Particle_to_mesh_map() {}; public: - void weight_particles_charge_to_mesh( Spatial_mesh &spat_mesh, + void weight_particles_charge_to_mesh( SpatialMeshCu &spat_mesh, Particle_sources_manager &particle_sources ); - Vec3d field_at_particle_position( Spatial_mesh &spat_mesh, Particle &p ); - Vec3d force_on_particle( Spatial_mesh &spat_mesh, Particle &p ); + Vec3d field_at_particle_position( SpatialMeshCu &spat_mesh, Particle &p ); + Vec3d force_on_particle( SpatialMeshCu &spat_mesh, Particle &p ); private: - void next_node_num_and_weight( const double x, const double grid_step, + void next_node_num_and_weight( const double x, const double grid_step, int *next_node, double *weight ); }; From fc27c970a820d247993deddf9781a4a0cd296191 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Mon, 17 Dec 2018 03:46:48 +0700 Subject: [PATCH 10/83] simple set device --- main.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/main.cpp b/main.cpp index 6a83786..30c62ae 100644 --- a/main.cpp +++ b/main.cpp @@ -3,6 +3,7 @@ #include #include #include "config.h" +#include "cuda_runtime.h" #include "domain.h" #include "parse_cmd_line.h" @@ -16,6 +17,8 @@ void extract_filename_prefix_and_suffix_from_h5filename( std::string h5_file, int main( int argc, char *argv[] ) { + cudaError_t status; + cudaStatus = cudaSetDevice(0); std::string config_or_h5_file; parse_cmd_line( argc, argv, config_or_h5_file ); From 9dfe30537c687d2a0516a52a3dda05192515e01b Mon Sep 17 00:00:00 2001 From: noooway Date: Wed, 19 Dec 2018 00:15:30 +0300 Subject: [PATCH 11/83] In FieldSolver ComputePhiNext: neibhour -> neighbour --- FieldSolver.cu | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index 051448c..9fdf23f 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -41,25 +41,25 @@ __global__ void ComputePhiNext(const double* d_phi_current, const double* d_char int offset_Dy = d_n_nodes[0].x; int offset_Dz = d_n_nodes[0].x*d_n_nodes[0].y; - int prev_neibhour_idx; - int next_neibhour_idx; + int prev_neighbour_idx; + int next_neighbour_idx; double denom = (double)2 * (dev_dxdxdydy[0] + dev_dxdxdzdz[0] + dev_dydydzdz[0]); - prev_neibhour_idx = max(idx + offset_Dx, 0); - next_neibhour_idx = min(idx + offset_Dx, dev_end[0]); + prev_neighbour_idx = max(idx + offset_Dx, 0); + next_neighbour_idx = min(idx + offset_Dx, dev_end[0]); d_phi_next[idx] = - (d_phi_current[next_neibhour_idx] + d_phi_current[prev_neibhour_idx])*dev_dydydzdz[0]; + (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx])*dev_dydydzdz[0]; - prev_neibhour_idx = max(idx + offset_Dy, 0); - next_neibhour_idx = min(idx + offset_Dy, dev_end[0]); + prev_neighbour_idx = max(idx + offset_Dy, 0); + next_neighbour_idx = min(idx + offset_Dy, dev_end[0]); d_phi_next[idx] += - (d_phi_current[next_neibhour_idx] + d_phi_current[prev_neibhour_idx])*dev_dxdxdzdz[0]; + (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx])*dev_dxdxdzdz[0]; - prev_neibhour_idx = max(idx + offset_Dz, 0); - next_neibhour_idx = min(idx + offset_Dz, dev_end[0]); + prev_neighbour_idx = max(idx + offset_Dz, 0); + next_neighbour_idx = min(idx + offset_Dz, dev_end[0]); d_phi_next[idx] += - (d_phi_current[next_neibhour_idx] + d_phi_current[prev_neibhour_idx])*dev_dxdxdydy[0]; + (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx])*dev_dxdxdydy[0]; d_phi_next[idx] += 4.0 * CUDART_PI * d_charge[idx] * dev_dxdxdydydzdz[0]; d_phi_next[idx] /= denom; From 3de46aa25d62f5b638561ea3530a7c8c806afba3 Mon Sep 17 00:00:00 2001 From: noooway Date: Wed, 19 Dec 2018 00:26:41 +0300 Subject: [PATCH 12/83] In FieldSolver.cu minor formatting fixes --- FieldSolver.cu | 60 +++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index 9fdf23f..4d4f8fd 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -39,7 +39,7 @@ __global__ void ComputePhiNext(const double* d_phi_current, const double* d_char int offset_Dx = 1; //todo rewrite usind device n_nodes.x/y/z int offset_Dy = d_n_nodes[0].x; - int offset_Dz = d_n_nodes[0].x*d_n_nodes[0].y; + int offset_Dz = d_n_nodes[0].x * d_n_nodes[0].y; int prev_neighbour_idx; int next_neighbour_idx; @@ -49,17 +49,17 @@ __global__ void ComputePhiNext(const double* d_phi_current, const double* d_char prev_neighbour_idx = max(idx + offset_Dx, 0); next_neighbour_idx = min(idx + offset_Dx, dev_end[0]); d_phi_next[idx] = - (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx])*dev_dydydzdz[0]; + (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx]) * dev_dydydzdz[0]; prev_neighbour_idx = max(idx + offset_Dy, 0); next_neighbour_idx = min(idx + offset_Dy, dev_end[0]); d_phi_next[idx] += - (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx])*dev_dxdxdzdz[0]; + (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx]) * dev_dxdxdzdz[0]; prev_neighbour_idx = max(idx + offset_Dz, 0); next_neighbour_idx = min(idx + offset_Dz, dev_end[0]); d_phi_next[idx] += - (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx])*dev_dxdxdydy[0]; + (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx]) * dev_dxdxdydy[0]; d_phi_next[idx] += 4.0 * CUDART_PI * d_charge[idx] * dev_dxdxdydydzdz[0]; d_phi_next[idx] /= denom; @@ -70,7 +70,7 @@ __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_fiel int idx = GetIdx(); double3 e = make_double3(0, 0, 0); - //assuming true=1, false =0 + //assuming true = 1, false = 0 //this method is hard to read due avoidance of if-else constructions on device code bool is_on_up_border; bool is_on_low_border; @@ -83,8 +83,8 @@ __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_fiel is_inside_borders = !(is_on_low_border || is_on_up_border); e.x = -(1 / (1 + is_inside_borders)) * GradientComponent( - dev_potential[idx + (offset*is_on_up_border) - (offset*is_inside_borders)], - dev_potential[idx - (offset*is_on_low_border) + (offset*is_inside_borders)], + dev_potential[idx + (offset * is_on_up_border) - (offset * is_inside_borders)], + dev_potential[idx - (offset * is_on_low_border) + (offset * is_inside_borders)], d_cell_size[0].x); offset = d_n_nodes[0].x; @@ -93,18 +93,18 @@ __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_fiel is_inside_borders = !(is_on_low_border || is_on_up_border); e.y = -(1 / (1 + is_inside_borders)) * GradientComponent( - dev_potential[idx + (offset*is_on_up_border) - (offset*is_inside_borders)], - dev_potential[idx - (offset*is_on_low_border) + (offset*is_inside_borders)], + dev_potential[idx + (offset * is_on_up_border) - (offset * is_inside_borders)], + dev_potential[idx - (offset * is_on_low_border) + (offset * is_inside_borders)], d_cell_size[0].y); - offset = d_n_nodes[0].y*d_n_nodes[0].x; + offset = d_n_nodes[0].y * d_n_nodes[0].x; is_on_up_border = ((threadIdx.x == 0) && (blockIdx.x == 0)); is_on_low_border = ((threadIdx.x == (blockDim.x - 1)) && (blockIdx.x == (gridDim.x - 1))); is_inside_borders = !(is_on_low_border || is_on_up_border); e.z = -(1 / (1 + is_inside_borders)) * GradientComponent( - dev_potential[idx + (offset*is_on_up_border) - (offset*is_inside_borders)], - dev_potential[idx - (offset*is_on_low_border) + (offset*is_inside_borders)], + dev_potential[idx + (offset * is_on_up_border) - (offset * is_inside_borders)], + dev_potential[idx - (offset * is_on_low_border) + (offset * is_inside_borders)], d_cell_size[0].z); dev_el_field[idx] = e; @@ -114,17 +114,17 @@ __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_fiel __global__ void AssertConvergence(const double* d_phi_current, const double* d_phi_next) { double rel_diff; double abs_diff; - double abs_tolerance = 1.0e-5; - double rel_tolerance = 1.0e-12; + double abs_tolerance = 1.0e-5; + double rel_tolerance = 1.0e-12; int idx = GetIdx(); abs_diff = fabs(d_phi_next[idx] - d_phi_current[idx]); rel_diff = abs_diff / fabs(d_phi_current[idx]); - bool converged =((abs_diff <= abs_tolerance) || (rel_diff <= rel_tolerance)); + bool converged = ((abs_diff <= abs_tolerance) || (rel_diff <= rel_tolerance)); assert(converged==true); } -FieldSolver::FieldSolver(SpatialMeshCu &mesh, Inner_regions_manager &inner_regions) :mesh(mesh) +FieldSolver::FieldSolver(SpatialMeshCu &mesh, Inner_regions_manager &inner_regions) : mesh(mesh) { allocate_next_phi(); copy_constants_to_device(); @@ -147,28 +147,28 @@ void FieldSolver::copy_constants_to_device() { cuda_status = cudaMemcpyToSymbol(d_cell_size, (void*)&mesh.cell_size, sizeof(double3), cudaMemcpyHostToDevice); - double dxdxdydy = mesh.cell_size.x*mesh.cell_size.x* - mesh.cell_size.y*mesh.cell_size.y; + double dxdxdydy = mesh.cell_size.x * mesh.cell_size.x * + mesh.cell_size.y * mesh.cell_size.y; cuda_status = cudaMemcpyToSymbol(dev_dxdxdydy, (void*)&dxdxdydy, sizeof(double), cudaMemcpyHostToDevice); - double dxdxdzdz = mesh.cell_size.x*mesh.cell_size.x* - mesh.cell_size.z*mesh.cell_size.z; + double dxdxdzdz = mesh.cell_size.x * mesh.cell_size.x * + mesh.cell_size.z * mesh.cell_size.z; cuda_status = cudaMemcpyToSymbol(dev_dxdxdzdz, (void*)&dxdxdzdz, sizeof(double), cudaMemcpyHostToDevice); - double dydydzdz = mesh.cell_size.y*mesh.cell_size.y* - mesh.cell_size.z*mesh.cell_size.z; + double dydydzdz = mesh.cell_size.y * mesh.cell_size.y * + mesh.cell_size.z * mesh.cell_size.z; cuda_status = cudaMemcpyToSymbol(dev_dydydzdz, (void*)&dydydzdz, sizeof(double), cudaMemcpyHostToDevice); - double dxdxdydydzdz = mesh.cell_size.x*mesh.cell_size.x* - mesh.cell_size.y*mesh.cell_size.y* - mesh.cell_size.z*mesh.cell_size.z; + double dxdxdydydzdz = mesh.cell_size.x * mesh.cell_size.x * + mesh.cell_size.y * mesh.cell_size.y * + mesh.cell_size.z * mesh.cell_size.z; cuda_status = cudaMemcpyToSymbol(dev_dxdxdydydzdz, (void*)&dxdxdydydzdz, sizeof(double), cudaMemcpyHostToDevice); - int end = mesh.n_nodes.x*mesh.n_nodes.y*mesh.n_nodes.z - 1; + int end = mesh.n_nodes.x * mesh.n_nodes.y * mesh.n_nodes.z - 1; cuda_status = cudaMemcpyToSymbol(dev_end, (void*)&end, sizeof(int), cudaMemcpyHostToDevice); } @@ -216,7 +216,7 @@ void FieldSolver::compute_phi_next_at_inner_points() dim3 blocks = mesh.GetBlocks(threads); cudaError_t cuda_status; - ComputePhiNext << > > (mesh.dev_potential, mesh.dev_charge_density, dev_phi_next); + ComputePhiNext<<>>(mesh.dev_potential, mesh.dev_charge_density, dev_phi_next); cuda_status = cudaDeviceSynchronize(); } @@ -239,7 +239,7 @@ bool FieldSolver::iterative_Jacobi_solutions_converged() cudaError_t status; dim3 threads = mesh.GetThreads(); dim3 blocks = mesh.GetBlocks(threads); - AssertConvergence << > > (mesh.dev_potential,dev_phi_next); + AssertConvergence<<>>(mesh.dev_potential,dev_phi_next); status = cudaDeviceSynchronize(); if (status == cudaErrorAssert) { return false; @@ -258,7 +258,7 @@ void FieldSolver::set_phi_next_as_phi_current() dim3 threads = mesh.GetThreads(); dim3 blocks = mesh.GetBlocks(threads); cudaError_t cuda_status; - SetPhiNextAsCurrent << > > (mesh.dev_potential, dev_phi_next); + SetPhiNextAsCurrent<<>>(mesh.dev_potential, dev_phi_next); cuda_status = cudaDeviceSynchronize(); } @@ -269,7 +269,7 @@ void FieldSolver::eval_fields_from_potential() dim3 blocks = mesh.GetBlocks(threads); cudaError_t cuda_status; - EvaluateFields << > > (mesh.dev_potential, mesh.dev_electric_field); + EvaluateFields<<>>(mesh.dev_potential, mesh.dev_electric_field); cuda_status = cudaDeviceSynchronize(); return; From 35729a8b813449cdc8b7893010b41bfe1c7ca877 Mon Sep 17 00:00:00 2001 From: noooway Date: Sat, 22 Dec 2018 13:38:52 +0300 Subject: [PATCH 13/83] In main.cpp fix undeclared cudaStatus (`cudaError_t status;` -> `cudaError_t cudaStatus;`) --- main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index 30c62ae..84669da 100644 --- a/main.cpp +++ b/main.cpp @@ -17,8 +17,8 @@ void extract_filename_prefix_and_suffix_from_h5filename( std::string h5_file, int main( int argc, char *argv[] ) { - cudaError_t status; - cudaStatus = cudaSetDevice(0); + cudaError_t cudaStatus; + cudaStatus = cudaSetDevice(0); std::string config_or_h5_file; parse_cmd_line( argc, argv, config_or_h5_file ); From 8b99ef18aa0d381b48d946b9e104e7d16269ae6b Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Sun, 23 Dec 2018 08:25:54 +0700 Subject: [PATCH 14/83] PhiSolver fix jacobi - cuda part --- FieldSolver.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index 051448c..33e551b 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -82,24 +82,24 @@ __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_fiel is_on_low_border = ((threadIdx.x == (blockDim.x - 1)) && (blockIdx.x == (gridDim.x - 1))); is_inside_borders = !(is_on_low_border || is_on_up_border); - e.x = -(1 / (1 + is_inside_borders)) * GradientComponent( + e.x = -((double)1 / ((double)1 + is_inside_borders)) * GradientComponent( dev_potential[idx + (offset*is_on_up_border) - (offset*is_inside_borders)], dev_potential[idx - (offset*is_on_low_border) + (offset*is_inside_borders)], d_cell_size[0].x); offset = d_n_nodes[0].x; - is_on_up_border = ((threadIdx.x == 0) && (blockIdx.x == 0)); - is_on_low_border = ((threadIdx.x == (blockDim.x - 1)) && (blockIdx.x == (gridDim.x - 1))); + is_on_up_border = ((threadIdx.y == 0) && (blockIdx.y == 0)); + is_on_low_border = ((threadIdx.y == (blockDim.y - 1)) && (blockIdx.y == (gridDim.y - 1))); is_inside_borders = !(is_on_low_border || is_on_up_border); - e.y = -(1 / (1 + is_inside_borders)) * GradientComponent( + e.y = -((double)1 / ((double)1 + is_inside_borders)) * GradientComponent( dev_potential[idx + (offset*is_on_up_border) - (offset*is_inside_borders)], dev_potential[idx - (offset*is_on_low_border) + (offset*is_inside_borders)], d_cell_size[0].y); offset = d_n_nodes[0].y*d_n_nodes[0].x; - is_on_up_border = ((threadIdx.x == 0) && (blockIdx.x == 0)); - is_on_low_border = ((threadIdx.x == (blockDim.x - 1)) && (blockIdx.x == (gridDim.x - 1))); + is_on_up_border = ((threadIdx.z == 0) && (blockIdx.z == 0)); + is_on_low_border = ((threadIdx.z == (blockDim.z - 1)) && (blockIdx.z == (gridDim.z - 1))); is_inside_borders = !(is_on_low_border || is_on_up_border); e.z = -(1 / (1 + is_inside_borders)) * GradientComponent( From 9c564ec74761bdb6ff7333faa1e8995e53a363ed Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Sun, 23 Dec 2018 16:11:14 +0700 Subject: [PATCH 15/83] explicit double Z grad component on cuda --- FieldSolver.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index e68a294..c03e9e3 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -102,7 +102,7 @@ __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_fiel is_on_low_border = ((threadIdx.z == (blockDim.z - 1)) && (blockIdx.z == (gridDim.z - 1))); is_inside_borders = !(is_on_low_border || is_on_up_border); - e.z = -(1 / (1 + is_inside_borders)) * GradientComponent( + e.z = -((double)1 / ((double)1 + is_inside_borders)) * GradientComponent( dev_potential[idx + (offset * is_on_up_border) - (offset * is_inside_borders)], dev_potential[idx - (offset * is_on_low_border) + (offset * is_inside_borders)], d_cell_size[0].z); From 37f105f9db83db91814a4017ebf286f5812faccd Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Sun, 23 Dec 2018 19:45:10 +0700 Subject: [PATCH 16/83] memory access violation fix --- FieldSolver.cu | 2 ++ SpatialMeshCu.cu | 34 +++++++++++++++++++++++++++++++--- SpatialMeshCu.cuh | 9 +++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index c03e9e3..e567c2c 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -127,7 +127,9 @@ __global__ void AssertConvergence(const double* d_phi_current, const double* d_p FieldSolver::FieldSolver(SpatialMeshCu &mesh, Inner_regions_manager &inner_regions) : mesh(mesh) { allocate_next_phi(); + std::cout << "solver memory allocation"; copy_constants_to_device(); + std::cout << "solver copy constants"; } void FieldSolver::allocate_next_phi() diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index a4f094a..18ab6e6 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -220,14 +220,14 @@ void SpatialMeshCu::init_constants(Config & conf) { cell_size = make_double3(volume_size.x / (n_nodes.x - 1), volume_size.y / (n_nodes.y - 1), volume_size.z / (n_nodes.z - 1)); - copy_constants_to_device(); - ///TODO Border constants init + copy_constants_to_device(); + copy_boundary_to_device(conf); } void SpatialMeshCu::copy_constants_to_device() { cudaError_t cuda_status; - + //mesh params cuda_status = cudaMemcpyToSymbol(d_n_nodes, (void*)&n_nodes, sizeof(dim3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status); @@ -243,6 +243,34 @@ void SpatialMeshCu::copy_constants_to_device() { return; } +void SpatialMeshCu::copy_boundary_to_device(Config &conf) { + cudaError_t cuda_status; + //boundary params + cuda_status = cudaMemcpyToSymbol(d_left_border, (void*)&conf.boundary_config_part.boundary_phi_left, + sizeof(double), cudaMemcpyHostToDevice); + cuda_status_check(cuda_status); + + cuda_status = cudaMemcpyToSymbol(d_right_border, (void*)&conf.boundary_config_part.boundary_phi_right, + sizeof(double), cudaMemcpyHostToDevice); + cuda_status_check(cuda_status); + + cuda_status = cudaMemcpyToSymbol(d_up_border, (void*)&conf.boundary_config_part.boundary_phi_top, + sizeof(double), cudaMemcpyHostToDevice); + cuda_status_check(cuda_status); + + cuda_status = cudaMemcpyToSymbol(d_bot_border, (void*)&conf.boundary_config_part.boundary_phi_bottom, + sizeof(double), cudaMemcpyHostToDevice); + cuda_status_check(cuda_status); + + cuda_status = cudaMemcpyToSymbol(d_near_border, (void*)&conf.boundary_config_part.boundary_phi_near, + sizeof(double), cudaMemcpyHostToDevice); + cuda_status_check(cuda_status); + + cuda_status = cudaMemcpyToSymbol(d_far_border, (void*)&conf.boundary_config_part.boundary_phi_far, + sizeof(double), cudaMemcpyHostToDevice); + cuda_status_check(cuda_status); +} + void SpatialMeshCu::allocate_ongrid_values() { int nx = n_nodes.x; int ny = n_nodes.y; diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index 0dc568b..8e7117e 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -20,6 +20,14 @@ public: double3 *dev_electric_field; + //double border_left; + //double border_right; + //double border_top; + //double border_bottom; + //double border_near; + //double border_far; + + public: SpatialMeshCu(Config &conf); SpatialMeshCu(hid_t h5_spat_mesh_group); @@ -39,6 +47,7 @@ private: void check_correctness_of_related_config_fields(Config &conf); void init_constants(Config &conf); void copy_constants_to_device(); + void copy_boundary_to_device(Config &conf); void allocate_ongrid_values(); void fill_node_coordinates(); From 583add715ae526d44bac270030cbde39d45d648f Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Sun, 23 Dec 2018 21:03:46 +0700 Subject: [PATCH 17/83] cuda run params thread.x/y/z=4 --- SpatialMeshCu.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 18ab6e6..744cf2f 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -685,11 +685,11 @@ void SpatialMeshCu::cuda_status_check(cudaError_t status) } dim3 SpatialMeshCu::GetThreads() { - return dim3(16, 16, n_nodes.z / 16); + return dim3(4, 4, 4); } -dim3 SpatialMeshCu::GetBlocks(dim3 nThreads) { - return dim3(n_nodes.x / nThreads.x, n_nodes.y / nThreads.y, 16); +dim3 SpatialMeshCu::GetBlocks(dim3 nThreads) { + return dim3(n_nodes.x / nThreads.x, n_nodes.y / nThreads.y, n_nodes.z/nThreads.z); } SpatialMeshCu::~SpatialMeshCu() { From c1db6d95bae529520ebd227fcb98b769b84f9956 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Sun, 23 Dec 2018 22:29:29 +0700 Subject: [PATCH 18/83] spatial mesh debug message extended --- SpatialMeshCu.cu | 68 ++++++++++++++++++++++++++--------------------- SpatialMeshCu.cuh | 2 +- 2 files changed, 39 insertions(+), 31 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 744cf2f..42b37f5 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -102,6 +102,8 @@ SpatialMeshCu::SpatialMeshCu(Config &conf) { SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { herr_t status; cudaError_t cuda_status; + std::string debug_message = std::string(" reading from hdf5 "); + volume_size = make_double3(0, 0, 0); cell_size = make_double3(0, 0, 0); n_nodes = make_int3(0, 0, 0); @@ -148,7 +150,7 @@ SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { dim3 blocks = GetBlocks(threads); fill_coordinates <<< blocks, threads >>> (dev_node_coordinates); cuda_status = cudaDeviceSynchronize(); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); H5LTread_dataset_double(h5_spat_mesh_group, "./charge_density", h5_tmp_buf_1); @@ -161,11 +163,11 @@ SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { cuda_status = cudaMemcpy(h5_tmp_buf_1, dev_charge_density, sizeof(double) * dim, cudaMemcpyHostToDevice); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_potential, sizeof(double) * dim, cudaMemcpyHostToDevice); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); double3 *h5_tmp_vector = new double3[dim]; @@ -182,7 +184,7 @@ SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_electric_field, sizeof(double3) * dim, cudaMemcpyHostToDevice); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); delete[] h5_tmp_buf_1; delete[] h5_tmp_buf_2; @@ -228,17 +230,18 @@ void SpatialMeshCu::init_constants(Config & conf) { void SpatialMeshCu::copy_constants_to_device() { cudaError_t cuda_status; //mesh params + std::string debug_message = std::string(" copy constants "); cuda_status = cudaMemcpyToSymbol(d_n_nodes, (void*)&n_nodes, sizeof(dim3), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); cuda_status = cudaMemcpyToSymbol(d_volume_size, (void*)&volume_size, sizeof(double3), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); cuda_status = cudaMemcpyToSymbol(d_cell_size, (void*)&cell_size, sizeof(double3), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); return; } @@ -246,29 +249,30 @@ void SpatialMeshCu::copy_constants_to_device() { void SpatialMeshCu::copy_boundary_to_device(Config &conf) { cudaError_t cuda_status; //boundary params + std::string debug_message = std::string(" copy border constants "); cuda_status = cudaMemcpyToSymbol(d_left_border, (void*)&conf.boundary_config_part.boundary_phi_left, sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); cuda_status = cudaMemcpyToSymbol(d_right_border, (void*)&conf.boundary_config_part.boundary_phi_right, sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); cuda_status = cudaMemcpyToSymbol(d_up_border, (void*)&conf.boundary_config_part.boundary_phi_top, sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); cuda_status = cudaMemcpyToSymbol(d_bot_border, (void*)&conf.boundary_config_part.boundary_phi_bottom, sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); cuda_status = cudaMemcpyToSymbol(d_near_border, (void*)&conf.boundary_config_part.boundary_phi_near, sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); cuda_status = cudaMemcpyToSymbol(d_far_border, (void*)&conf.boundary_config_part.boundary_phi_far, sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); } void SpatialMeshCu::allocate_ongrid_values() { @@ -278,18 +282,19 @@ void SpatialMeshCu::allocate_ongrid_values() { size_t total_node_count = nx * ny * nz; cudaError_t cuda_status; + std::string debug_message = std::string(" copy borders "); cuda_status = cudaMalloc < double3 >(&dev_node_coordinates, total_node_count); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); cuda_status = cudaMalloc(&dev_charge_density, total_node_count); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); cuda_status = cudaMalloc(&dev_potential, total_node_count); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); cuda_status = cudaMalloc < double3 >(&dev_electric_field, total_node_count); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); return; } @@ -298,10 +303,10 @@ void SpatialMeshCu::fill_node_coordinates() { dim3 threads = GetThreads(); dim3 blocks = GetBlocks(threads); cudaError_t cuda_status; - + std::string debug_message = std::string(" fill coordinates "); fill_coordinates <<< blocks,threads>>> (dev_node_coordinates); cuda_status= cudaDeviceSynchronize(); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); return; } @@ -317,21 +322,22 @@ void SpatialMeshCu::clear_old_density_values() { void SpatialMeshCu::set_boundary_conditions(double* d_potential) { dim3 threads = dim3(16, 16, 2); cudaError_t cuda_status; + std::string debug_message = std::string(" set boundary "); dim3 blocks = dim3(n_nodes.y / 16, n_nodes.z / 16, 1); SetBoundaryConditionOrthoX <<< blocks, threads >>> (d_potential); cuda_status = cudaDeviceSynchronize(); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); blocks = dim3(n_nodes.x / 16, n_nodes.z / 16, 2); - SetBoundaryConditionOrthoY << < blocks, threads >> > (d_potential); + SetBoundaryConditionOrthoY <<< blocks, threads >>> (d_potential); cuda_status = cudaDeviceSynchronize(); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); blocks = dim3(n_nodes.x / 16, n_nodes.y / 16, 2); - SetBoundaryConditionOrthoZ << < blocks, threads >> > (d_potential); + SetBoundaryConditionOrthoZ <<< blocks, threads >>> (d_potential); cuda_status = cudaDeviceSynchronize(); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); return; } @@ -451,6 +457,8 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { hid_t filespace, dset; herr_t status; cudaError_t cuda_status; + std::string debug_message = std::string(" write hdf5 "); + int rank = 1; hsize_t dims[rank]; dims[0] = n_nodes.x * n_nodes.y * n_nodes.z; @@ -467,7 +475,7 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { double3 *hdf5_tmp_write_data = new double3[dims[0]]; cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_node_coordinates, sizeof(double3) * dims[0], cudaMemcpyDeviceToHost); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); for (unsigned int i = 0; i < dims[0]; i++) { nx[i] = hdf5_tmp_write_data[i].x; ny[i] = hdf5_tmp_write_data[i].y; @@ -509,7 +517,7 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { double *hdf5_tmp_write_data = new double[dims[0]]; cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_charge_density, sizeof(double) * dims[0], cudaMemcpyDeviceToHost); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); dset = H5Dcreate(group_id, "./charge_density", H5T_IEEE_F64BE, filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); @@ -525,7 +533,7 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { double *hdf5_tmp_write_data = new double[dims[0]]; cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_potential, sizeof(double) * dims[0], cudaMemcpyDeviceToHost); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); dset = H5Dcreate(group_id, "./potential", H5T_IEEE_F64BE, filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); hdf5_status_check(dset); @@ -544,7 +552,7 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { double3 *hdf5_tmp_write_data = new double3[dims[0]]; cuda_status = cudaMemcpy(dev_node_coordinates, hdf5_tmp_write_data, sizeof(double3) * dims[0], cudaMemcpyDeviceToHost); - cuda_status_check(cuda_status); + cuda_status_check(cuda_status, debug_message); for (unsigned int i = 0; i < dims[0]; i++) { ex[i] = hdf5_tmp_write_data[i].x; @@ -676,10 +684,10 @@ void SpatialMeshCu::hdf5_status_check(herr_t status) } } -void SpatialMeshCu::cuda_status_check(cudaError_t status) +void SpatialMeshCu::cuda_status_check(cudaError_t status, std::string &sender) { if (status > 0) { - std::cout << "Cuda error: " << cudaGetErrorString(status) << std::endl; + std::cout << "Cuda error at"<< sender <<": " << cudaGetErrorString(status) << std::endl; exit(EXIT_FAILURE); } } diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index 8e7117e..686fe1b 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -69,7 +69,7 @@ private: void grid_z_step_gt_zero_le_grid_z_size(Config &conf); void check_and_exit_if_not(const bool &should_be, const std::string &message); //cuda - void cuda_status_check(cudaError_t status); + void cuda_status_check(cudaError_t status, std::string &sender); }; #endif /* _SPATIAL_MESH_H_ */ From c10d9d79b3bffd50831276b77b5d15383d7160ce Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Mon, 24 Dec 2018 00:07:47 +0700 Subject: [PATCH 19/83] constants copying fix --- SpatialMeshCu.cu | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 42b37f5..c52ff23 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -231,15 +231,15 @@ void SpatialMeshCu::copy_constants_to_device() { cudaError_t cuda_status; //mesh params std::string debug_message = std::string(" copy constants "); - cuda_status = cudaMemcpyToSymbol(d_n_nodes, (void*)&n_nodes, sizeof(dim3), + cuda_status = cudaMemcpyToSymbol(d_n_nodes, &n_nodes, sizeof(dim3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_volume_size, (void*)&volume_size, sizeof(double3), + cuda_status = cudaMemcpyToSymbol(d_volume_size, &volume_size, sizeof(double3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_cell_size, (void*)&cell_size, sizeof(double3), + cuda_status = cudaMemcpyToSymbol(d_cell_size, &cell_size, sizeof(double3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); @@ -250,27 +250,27 @@ void SpatialMeshCu::copy_boundary_to_device(Config &conf) { cudaError_t cuda_status; //boundary params std::string debug_message = std::string(" copy border constants "); - cuda_status = cudaMemcpyToSymbol(d_left_border, (void*)&conf.boundary_config_part.boundary_phi_left, + cuda_status = cudaMemcpyToSymbol(d_left_border, &conf.boundary_config_part.boundary_phi_left, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_right_border, (void*)&conf.boundary_config_part.boundary_phi_right, + cuda_status = cudaMemcpyToSymbol(d_right_border, &conf.boundary_config_part.boundary_phi_right, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_up_border, (void*)&conf.boundary_config_part.boundary_phi_top, + cuda_status = cudaMemcpyToSymbol(d_up_border, &conf.boundary_config_part.boundary_phi_top, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_bot_border, (void*)&conf.boundary_config_part.boundary_phi_bottom, + cuda_status = cudaMemcpyToSymbol(d_bot_border, &conf.boundary_config_part.boundary_phi_bottom, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_near_border, (void*)&conf.boundary_config_part.boundary_phi_near, + cuda_status = cudaMemcpyToSymbol(d_near_border, &conf.boundary_config_part.boundary_phi_near, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_far_border, (void*)&conf.boundary_config_part.boundary_phi_far, + cuda_status = cudaMemcpyToSymbol(d_far_border, &conf.boundary_config_part.boundary_phi_far, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); } @@ -282,7 +282,7 @@ void SpatialMeshCu::allocate_ongrid_values() { size_t total_node_count = nx * ny * nz; cudaError_t cuda_status; - std::string debug_message = std::string(" copy borders "); + std::string debug_message = std::string(" allocation "); cuda_status = cudaMalloc < double3 >(&dev_node_coordinates, total_node_count); cuda_status_check(cuda_status, debug_message); From b762ae0179c318a77b23c84709566c605c3a904b Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Mon, 24 Dec 2018 00:23:05 +0700 Subject: [PATCH 20/83] temp border double variables --- SpatialMeshCu.cu | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index c52ff23..0f90c08 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -250,27 +250,37 @@ void SpatialMeshCu::copy_boundary_to_device(Config &conf) { cudaError_t cuda_status; //boundary params std::string debug_message = std::string(" copy border constants "); - cuda_status = cudaMemcpyToSymbol(d_left_border, &conf.boundary_config_part.boundary_phi_left, + + double left_border, right_border, up_border, bot_border, near_border, far_border; + + left_border = conf.boundary_config_part.boundary_phi_left; + right_border = conf.boundary_config_part.boundary_phi_right; + up_border = conf.boundary_config_part.boundary_phi_top; + bot_border = conf.boundary_config_part.boundary_phi_bottom; + near_border = conf.boundary_config_part.boundary_phi_near; + far_border = conf.boundary_config_part.boundary_phi_far; + + cuda_status = cudaMemcpyToSymbol(d_left_border, &left_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_right_border, &conf.boundary_config_part.boundary_phi_right, + cuda_status = cudaMemcpyToSymbol(d_right_border, &right_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_up_border, &conf.boundary_config_part.boundary_phi_top, + cuda_status = cudaMemcpyToSymbol(d_up_border, &up_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_bot_border, &conf.boundary_config_part.boundary_phi_bottom, + cuda_status = cudaMemcpyToSymbol(d_bot_border, &bot_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_near_border, &conf.boundary_config_part.boundary_phi_near, + cuda_status = cudaMemcpyToSymbol(d_near_border, &near_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_far_border, &conf.boundary_config_part.boundary_phi_far, + cuda_status = cudaMemcpyToSymbol(d_far_border, &far_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); } From 9f7a5d778aa93baaeadf6c5364505829bddfd443 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Mon, 24 Dec 2018 01:22:14 +0700 Subject: [PATCH 21/83] debug log extended: copying constants success --- SpatialMeshCu.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 0f90c08..2b5fe99 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -242,7 +242,7 @@ void SpatialMeshCu::copy_constants_to_device() { cuda_status = cudaMemcpyToSymbol(d_cell_size, &cell_size, sizeof(double3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - + std::cout << "node, volume and cell size copied to constant device memory succesfully" << std::endl; return; } From 72f9db86b3344dab3a893e9451d3750774ae0e7b Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Mon, 24 Dec 2018 01:37:38 +0700 Subject: [PATCH 22/83] SpatialMeshCu.cu : explicit const void* for copying source pointer --- SpatialMeshCu.cu | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 2b5fe99..edcad3f 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -4,14 +4,14 @@ __constant__ double3 d_volume_size[1]; __constant__ double3 d_cell_size[1]; __constant__ int3 d_n_nodes[1]; -__constant__ double d_up_border[1]; -__constant__ double d_bot_border[1]; +__device__ __constant__ double d_up_border[1]; +__device__ __constant__ double d_bot_border[1]; -__constant__ double d_left_border[1]; -__constant__ double d_right_border[1]; +__device__ __constant__ double d_left_border[1]; +__device__ __constant__ double d_right_border[1]; -__constant__ double d_far_border[1]; -__constant__ double d_near_border[1]; +__device__ __constant__ double d_far_border[1]; +__device__ __constant__ double d_near_border[1]; __device__ int GetIdxVolume() { //int xStepthread = 1; @@ -242,7 +242,6 @@ void SpatialMeshCu::copy_constants_to_device() { cuda_status = cudaMemcpyToSymbol(d_cell_size, &cell_size, sizeof(double3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - std::cout << "node, volume and cell size copied to constant device memory succesfully" << std::endl; return; } @@ -260,23 +259,23 @@ void SpatialMeshCu::copy_boundary_to_device(Config &conf) { near_border = conf.boundary_config_part.boundary_phi_near; far_border = conf.boundary_config_part.boundary_phi_far; - cuda_status = cudaMemcpyToSymbol(d_left_border, &left_border, + cuda_status = cudaMemcpyToSymbol(d_left_border, (const void*) &left_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_right_border, &right_border, + cuda_status = cudaMemcpyToSymbol(d_right_border, (const void*) &right_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_up_border, &up_border, + cuda_status = cudaMemcpyToSymbol(d_up_border, (const void*) &up_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_bot_border, &bot_border, + cuda_status = cudaMemcpyToSymbol(d_bot_border, (const void*) &bot_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_near_border, &near_border, + cuda_status = cudaMemcpyToSymbol(d_near_border, (const void*) &near_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); From 70debf5803e88d8c5de0413223b6b1ec456674c4 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Mon, 24 Dec 2018 02:07:10 +0700 Subject: [PATCH 23/83] non constant boundary allocation --- SpatialMeshCu.cu | 48 +++++++++++++++++++++++++++++++++-------------- SpatialMeshCu.cuh | 7 ++++++- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index edcad3f..0831b16 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -4,14 +4,14 @@ __constant__ double3 d_volume_size[1]; __constant__ double3 d_cell_size[1]; __constant__ int3 d_n_nodes[1]; -__device__ __constant__ double d_up_border[1]; -__device__ __constant__ double d_bot_border[1]; - -__device__ __constant__ double d_left_border[1]; -__device__ __constant__ double d_right_border[1]; - -__device__ __constant__ double d_far_border[1]; -__device__ __constant__ double d_near_border[1]; +//__constant__ double d_up_border[1]; +//__constant__ double d_bot_border[1]; +// +//__constant__ double d_left_border[1]; +//__constant__ double d_right_border[1]; +// +//__constant__ double d_far_border[1]; +//__constant__ double d_near_border[1]; __device__ int GetIdxVolume() { //int xStepthread = 1; @@ -259,27 +259,27 @@ void SpatialMeshCu::copy_boundary_to_device(Config &conf) { near_border = conf.boundary_config_part.boundary_phi_near; far_border = conf.boundary_config_part.boundary_phi_far; - cuda_status = cudaMemcpyToSymbol(d_left_border, (const void*) &left_border, + cuda_status = cudaMemcpy((void**) &d_left_border, &left_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_right_border, (const void*) &right_border, + cuda_status = cudaMemcpy((void**) &d_right_border, &right_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_up_border, (const void*) &up_border, + cuda_status = cudaMemcpy((void**) &d_up_border, &up_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_bot_border, (const void*) &bot_border, + cuda_status = cudaMemcpy((void**) &d_bot_border, &bot_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_near_border, (const void*) &near_border, + cuda_status = cudaMemcpy((void**) &d_near_border, &near_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_far_border, &far_border, + cuda_status = cudaMemcpy((void**) &d_far_border, &far_border, sizeof(double), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); } @@ -305,6 +305,26 @@ void SpatialMeshCu::allocate_ongrid_values() { cuda_status = cudaMalloc < double3 >(&dev_electric_field, total_node_count); cuda_status_check(cuda_status, debug_message); + debug_message = std::string(" borders allocation "); + + cuda_status = cudaMalloc < double >(&d_left_border, 1); + cuda_status_check(cuda_status, debug_message); + + cuda_status = cudaMalloc < double >(&d_right_border, 1); + cuda_status_check(cuda_status, debug_message); + + cuda_status = cudaMalloc < double >(&d_up_border, 1); + cuda_status_check(cuda_status, debug_message); + + cuda_status = cudaMalloc < double >(&d_bot_border, 1); + cuda_status_check(cuda_status, debug_message); + + cuda_status = cudaMalloc < double >(&d_near_border, 1); + cuda_status_check(cuda_status, debug_message); + + cuda_status = cudaMalloc < double >(&d_far_border, 1); + cuda_status_check(cuda_status, debug_message); + return; } diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index 686fe1b..cbe8d4d 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -19,7 +19,12 @@ public: double *dev_potential; double3 *dev_electric_field; - + double* d_up_border; + double* d_bot_border; + double* d_left_border; + double* d_right_border; + double* d_far_border; + double* d_near_border; //double border_left; //double border_right; //double border_top; From e41004d125f032aac66d2ec5302461eee1eafe28 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Mon, 24 Dec 2018 02:17:39 +0700 Subject: [PATCH 24/83] set boundary conditions Cuda-side fix --- SpatialMeshCu.cu | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 0831b16..c260a20 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -39,7 +39,7 @@ __global__ void fill_coordinates(double3* node_coordinates) { d_volume_size[0].y * y, d_volume_size[0].z * z); //(double)., } -__global__ void SetBoundaryConditionOrthoX(double* potential) { +__global__ void SetBoundaryConditionOrthoX(double* potential, double* left_bound, double* right_bound) { int xIdx = blockIdx.z * (d_n_nodes[0].x - 1); //0 or nodes.x-1 int yStepThread = d_n_nodes[0].x; //x= @@ -51,12 +51,12 @@ __global__ void SetBoundaryConditionOrthoX(double* potential) { int idx = xIdx + threadIdx.x * yStepThread + blockIdx.x * yStepBlock + threadIdx.y * zStepThread + blockIdx.y * zStepBlock; - potential[idx] = ((double)(1 - blockIdx.z)) * d_left_border[0] - + (blockIdx.z * d_right_border[0]); + potential[idx] = ((double)(1 - blockIdx.z)) * left_bound[0] + + (blockIdx.z * right_bound[0]); } -__global__ void SetBoundaryConditionOrthoY(double* potential) { +__global__ void SetBoundaryConditionOrthoY(double* potential, double* bot_bound, double* up_bound) { int yIdx = blockIdx.z * d_n_nodes[0].x * (d_n_nodes[0].y - 1); //0 or nodes.x-1 int xStepThread = 1; //x= @@ -68,12 +68,12 @@ __global__ void SetBoundaryConditionOrthoY(double* potential) { int idx = yIdx + threadIdx.x * xStepThread + blockIdx.x * xStepBlock + threadIdx.y * zStepThread + blockIdx.y * zStepBlock; - potential[idx] = ((double)(1 - blockIdx.z)) * d_bot_border[0] - + (blockIdx.z * d_up_border[0]); + potential[idx] = ((double)(1 - blockIdx.z)) * bot_bound[0] + + (blockIdx.z * up_bound[0]); } -__global__ void SetBoundaryConditionOrthoZ(double* potential) { +__global__ void SetBoundaryConditionOrthoZ(double* potential, double* near_bound, double* far_bound) { int zIdx = blockIdx.z * (d_n_nodes[0].x * d_n_nodes[0].y * (d_n_nodes[0].z - 1)); //0 or nodes.x-1 @@ -86,8 +86,8 @@ __global__ void SetBoundaryConditionOrthoZ(double* potential) { int idx = zIdx + threadIdx.x * xStepThread + blockIdx.x * xStepBlock + threadIdx.y * yStepThread + blockIdx.y * yStepBlock; - potential[idx] = ((double)(1 - blockIdx.z)) * d_near_border[0] - + (blockIdx.z * d_far_border[0]); + potential[idx] = ((double)(1 - blockIdx.z)) * near_bound[0] + + (blockIdx.z * far_bound[0]); } @@ -354,17 +354,17 @@ void SpatialMeshCu::set_boundary_conditions(double* d_potential) { std::string debug_message = std::string(" set boundary "); dim3 blocks = dim3(n_nodes.y / 16, n_nodes.z / 16, 1); - SetBoundaryConditionOrthoX <<< blocks, threads >>> (d_potential); + SetBoundaryConditionOrthoX <<< blocks, threads >>> (d_potential, d_left_border, d_right_border); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); blocks = dim3(n_nodes.x / 16, n_nodes.z / 16, 2); - SetBoundaryConditionOrthoY <<< blocks, threads >>> (d_potential); + SetBoundaryConditionOrthoY <<< blocks, threads >>> (d_potential, d_bot_border, d_up_border); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); blocks = dim3(n_nodes.x / 16, n_nodes.y / 16, 2); - SetBoundaryConditionOrthoZ <<< blocks, threads >>> (d_potential); + SetBoundaryConditionOrthoZ <<< blocks, threads >>> (d_potential, d_near_border, d_far_border); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); From 1065f0dc0e083ccc0f0ca5fbf971d75c8c04638e Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Wed, 26 Dec 2018 13:39:19 +0700 Subject: [PATCH 25/83] reworked memory constant memory usage --- SpatialMeshCu.cu | 223 ++++++++++++++++++++++------------------------ SpatialMeshCu.cuh | 22 +++-- 2 files changed, 122 insertions(+), 123 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index c260a20..2e2cfee 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -4,23 +4,23 @@ __constant__ double3 d_volume_size[1]; __constant__ double3 d_cell_size[1]; __constant__ int3 d_n_nodes[1]; -//__constant__ double d_up_border[1]; -//__constant__ double d_bot_border[1]; -// -//__constant__ double d_left_border[1]; -//__constant__ double d_right_border[1]; -// -//__constant__ double d_far_border[1]; -//__constant__ double d_near_border[1]; +#define TOP 0 +#define BOTTOM 1 +#define LEFT 2 +#define RIGHT 3 +#define FAR 4 +#define NEAR 5 + +__constant__ double d_boundary[6]; __device__ int GetIdxVolume() { //int xStepthread = 1; int xStepBlock = blockDim.x; - int yStepThread = d_n_nodes[0].x; + int yStepThread = d_n_nodes->x; int yStepBlock = yStepThread * blockDim.y; - int zStepThread = d_n_nodes[0].x * d_n_nodes[0].y; + int zStepThread = d_n_nodes->x * d_n_nodes->y; int zStepBlock = zStepThread * blockDim.z; return threadIdx.x + blockIdx.x * xStepBlock + threadIdx.y * yStepThread @@ -35,28 +35,28 @@ __global__ void fill_coordinates(double3* node_coordinates) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; int z = threadIdx.z + blockIdx.z * blockDim.z; - node_coordinates[idx] = make_double3(d_volume_size[0].x * x, - d_volume_size[0].y * y, d_volume_size[0].z * z); //(double)., + node_coordinates[idx] = make_double3(d_volume_size->x * x, + d_volume_size->y * y, d_volume_size->z * z); //(double)., } -__global__ void SetBoundaryConditionOrthoX(double* potential, double* left_bound, double* right_bound) { - int xIdx = blockIdx.z * (d_n_nodes[0].x - 1); //0 or nodes.x-1 +__global__ void SetBoundaryConditionOrthoX(double* potential) { + int xIdx = blockIdx.z * (d_n_nodes->x - 1); //0 or nodes.x-1 - int yStepThread = d_n_nodes[0].x; //x= - int yStepBlock = d_n_nodes[0].x * blockDim.x; + int yStepThread = d_n_nodes->x; //x= + int yStepBlock = d_n_nodes->x * blockDim.x; - int zStepThread = d_n_nodes[0].x * d_n_nodes[0].y; + int zStepThread = d_n_nodes->x * d_n_nodes->y; int zStepBlock = zStepThread * blockDim.y; int idx = xIdx + threadIdx.x * yStepThread + blockIdx.x * yStepBlock + threadIdx.y * zStepThread + blockIdx.y * zStepBlock; - potential[idx] = ((double)(1 - blockIdx.z)) * left_bound[0] - + (blockIdx.z * right_bound[0]); + potential[idx] = ((double)(1 - blockIdx.z)) * d_boundary[LEFT] + + (blockIdx.z * d_boundary[RIGHT]); } -__global__ void SetBoundaryConditionOrthoY(double* potential, double* bot_bound, double* up_bound) { +__global__ void SetBoundaryConditionOrthoY(double* potential) { int yIdx = blockIdx.z * d_n_nodes[0].x * (d_n_nodes[0].y - 1); //0 or nodes.x-1 int xStepThread = 1; //x= @@ -68,12 +68,12 @@ __global__ void SetBoundaryConditionOrthoY(double* potential, double* bot_bound, int idx = yIdx + threadIdx.x * xStepThread + blockIdx.x * xStepBlock + threadIdx.y * zStepThread + blockIdx.y * zStepBlock; - potential[idx] = ((double)(1 - blockIdx.z)) * bot_bound[0] - + (blockIdx.z * up_bound[0]); + potential[idx] = ((double)(1 - blockIdx.z)) * d_boundary[BOTTOM] + + (blockIdx.z * d_boundary[TOP]); } -__global__ void SetBoundaryConditionOrthoZ(double* potential, double* near_bound, double* far_bound) { +__global__ void SetBoundaryConditionOrthoZ(double* potential) { int zIdx = blockIdx.z * (d_n_nodes[0].x * d_n_nodes[0].y * (d_n_nodes[0].z - 1)); //0 or nodes.x-1 @@ -86,8 +86,8 @@ __global__ void SetBoundaryConditionOrthoZ(double* potential, double* near_bound int idx = zIdx + threadIdx.x * xStepThread + blockIdx.x * xStepBlock + threadIdx.y * yStepThread + blockIdx.y * yStepBlock; - potential[idx] = ((double)(1 - blockIdx.z)) * near_bound[0] - + (blockIdx.z * far_bound[0]); + potential[idx] = ((double)(1 - blockIdx.z)) * d_boundary[NEAR] + + (blockIdx.z * d_boundary[FAR]); } @@ -104,44 +104,44 @@ SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { cudaError_t cuda_status; std::string debug_message = std::string(" reading from hdf5 "); - volume_size = make_double3(0, 0, 0); - cell_size = make_double3(0, 0, 0); - n_nodes = make_int3(0, 0, 0); + volume_size[0] = make_double3(0, 0, 0); + cell_size[0] = make_double3(0, 0, 0); + n_nodes[0] = make_int3(0, 0, 0); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_volume_size", - &volume_size.x); + &volume_size->x); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_volume_size", - &volume_size.y); + &volume_size->y); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_volume_size", - &volume_size.z); + &volume_size->z); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_cell_size", - &cell_size.x); + &cell_size->x); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_cell_size", - &cell_size.y); + &cell_size->y); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_cell_size", - &cell_size.z); + &cell_size->z); hdf5_status_check(status); status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "x_n_nodes", - &n_nodes.x); + &n_nodes->x); hdf5_status_check(status); status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "y_n_nodes", - &n_nodes.y); + &n_nodes->y); hdf5_status_check(status); status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "z_n_nodes", - &n_nodes.z); + &n_nodes->z); hdf5_status_check(status); allocate_ongrid_values(); copy_constants_to_device(); - int dim = n_nodes.x * n_nodes.y * n_nodes.z; + int dim = n_nodes[0].x * n_nodes[0].y * n_nodes->z; double *h5_tmp_buf_1 = new double[dim]; double *h5_tmp_buf_2 = new double[dim]; double *h5_tmp_buf_3 = new double[dim]; @@ -204,7 +204,7 @@ void SpatialMeshCu::check_correctness_of_related_config_fields(Config &conf) { } void SpatialMeshCu::init_constants(Config & conf) { - n_nodes = make_int3( + n_nodes[0] = make_int3( ceil( conf.mesh_config_part.grid_x_size / conf.mesh_config_part.grid_x_step) + 1, @@ -215,12 +215,12 @@ void SpatialMeshCu::init_constants(Config & conf) { conf.mesh_config_part.grid_z_size / conf.mesh_config_part.grid_z_step) + 1); - volume_size = make_double3(conf.mesh_config_part.grid_x_size, + volume_size[0] = make_double3(conf.mesh_config_part.grid_x_size, conf.mesh_config_part.grid_y_size, conf.mesh_config_part.grid_z_size); - cell_size = make_double3(volume_size.x / (n_nodes.x - 1), - volume_size.y / (n_nodes.y - 1), volume_size.z / (n_nodes.z - 1)); + cell_size[0] = make_double3(volume_size[0].x / (n_nodes[0].x - 1), + volume_size[0].y / (n_nodes[0].y - 1), volume_size[0].z / (n_nodes[0].z - 1)); copy_constants_to_device(); @@ -231,15 +231,21 @@ void SpatialMeshCu::copy_constants_to_device() { cudaError_t cuda_status; //mesh params std::string debug_message = std::string(" copy constants "); - cuda_status = cudaMemcpyToSymbol(d_n_nodes, &n_nodes, sizeof(dim3), + + cuda_status = cudaMalloc < int3 >(&d_n_nodes, 1); + cuda_status_check(cuda_status, debug_message); + + debug_message = std::string(" copy constants "); + + cuda_status = cudaMemcpyToSymbol(d_n_nodes, n_nodes, sizeof(int3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_volume_size, &volume_size, sizeof(double3), + cuda_status = cudaMemcpyToSymbol(d_volume_size, volume_size, sizeof(double3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_cell_size, &cell_size, sizeof(double3), + cuda_status = cudaMemcpyToSymbol(d_cell_size, cell_size, sizeof(double3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); return; @@ -248,46 +254,29 @@ void SpatialMeshCu::copy_constants_to_device() { void SpatialMeshCu::copy_boundary_to_device(Config &conf) { cudaError_t cuda_status; //boundary params - std::string debug_message = std::string(" copy border constants "); + std::string debug_message; - double left_border, right_border, up_border, bot_border, near_border, far_border; + debug_message = std::string(" copy border constants "); - left_border = conf.boundary_config_part.boundary_phi_left; - right_border = conf.boundary_config_part.boundary_phi_right; - up_border = conf.boundary_config_part.boundary_phi_top; - bot_border = conf.boundary_config_part.boundary_phi_bottom; - near_border = conf.boundary_config_part.boundary_phi_near; - far_border = conf.boundary_config_part.boundary_phi_far; + double boundary[6]; - cuda_status = cudaMemcpy((void**) &d_left_border, &left_border, - sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status, debug_message); + boundary[LEFT] = conf.boundary_config_part.boundary_phi_left; + boundary[RIGHT] = conf.boundary_config_part.boundary_phi_right; + boundary[TOP] = conf.boundary_config_part.boundary_phi_top; + boundary[BOTTOM] = conf.boundary_config_part.boundary_phi_bottom; + boundary[NEAR] = conf.boundary_config_part.boundary_phi_near; + boundary[FAR] = conf.boundary_config_part.boundary_phi_far; - cuda_status = cudaMemcpy((void**) &d_right_border, &right_border, - sizeof(double), cudaMemcpyHostToDevice); + cuda_status = cudaMemcpyToSymbol(d_boundary, boundary, + sizeof(double) * 6, cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpy((void**) &d_up_border, &up_border, - sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMemcpy((void**) &d_bot_border, &bot_border, - sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMemcpy((void**) &d_near_border, &near_border, - sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMemcpy((void**) &d_far_border, &far_border, - sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status, debug_message); } void SpatialMeshCu::allocate_ongrid_values() { - int nx = n_nodes.x; - int ny = n_nodes.y; - int nz = n_nodes.z; + int nx = n_nodes->x; + int ny = n_nodes->y; + int nz = n_nodes->z; size_t total_node_count = nx * ny * nz; cudaError_t cuda_status; @@ -305,25 +294,7 @@ void SpatialMeshCu::allocate_ongrid_values() { cuda_status = cudaMalloc < double3 >(&dev_electric_field, total_node_count); cuda_status_check(cuda_status, debug_message); - debug_message = std::string(" borders allocation "); - - cuda_status = cudaMalloc < double >(&d_left_border, 1); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMalloc < double >(&d_right_border, 1); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMalloc < double >(&d_up_border, 1); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMalloc < double >(&d_bot_border, 1); - cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMalloc < double >(&d_near_border, 1); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMalloc < double >(&d_far_border, 1); - cuda_status_check(cuda_status, debug_message); return; } @@ -353,17 +324,17 @@ void SpatialMeshCu::set_boundary_conditions(double* d_potential) { cudaError_t cuda_status; std::string debug_message = std::string(" set boundary "); - dim3 blocks = dim3(n_nodes.y / 16, n_nodes.z / 16, 1); + dim3 blocks = dim3(n_nodes->y / 16, n_nodes->z / 16, 1); SetBoundaryConditionOrthoX <<< blocks, threads >>> (d_potential, d_left_border, d_right_border); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); - blocks = dim3(n_nodes.x / 16, n_nodes.z / 16, 2); + blocks = dim3(n_nodes->x / 16, n_nodes->z / 16, 2); SetBoundaryConditionOrthoY <<< blocks, threads >>> (d_potential, d_bot_border, d_up_border); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); - blocks = dim3(n_nodes.x / 16, n_nodes.y / 16, 2); + blocks = dim3(n_nodes->x / 16, n_nodes->y / 16, 2); SetBoundaryConditionOrthoZ <<< blocks, threads >>> (d_potential, d_near_border, d_far_border); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); @@ -454,31 +425,31 @@ void SpatialMeshCu::write_hdf5_attributes(hid_t group_id) { std::string hdf5_current_group = "./"; status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "x_volume_size", &volume_size.x, single_element); + "x_volume_size", &(volume_size->x), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "y_volume_size", &volume_size.y, single_element); + "y_volume_size", &(volume_size->y), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "z_volume_size", &volume_size.z, single_element); + "z_volume_size", &(volume_size->z), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "x_cell_size", &cell_size.x, single_element); + "x_cell_size", &(cell_size->x), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "y_cell_size", &cell_size.y, single_element); + "y_cell_size", &(cell_size->y), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "z_cell_size", &cell_size.z, single_element); + "z_cell_size", &(cell_size->z), single_element); hdf5_status_check(status); status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), - "x_n_nodes", &n_nodes.x, single_element); + "x_n_nodes", &(n_nodes->x), single_element); hdf5_status_check(status); status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), - "y_n_nodes", &n_nodes.y, single_element); + "y_n_nodes", &(n_nodes->y), single_element); hdf5_status_check(status); status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), - "z_n_nodes", &n_nodes.z, single_element); + "z_n_nodes", &(n_nodes->z), single_element); hdf5_status_check(status); } @@ -490,7 +461,7 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { int rank = 1; hsize_t dims[rank]; - dims[0] = n_nodes.x * n_nodes.y * n_nodes.z; + dims[0] = n_nodes->x * n_nodes->y * n_nodes->z; filespace = H5Screate_simple(rank, dims, NULL); @@ -619,6 +590,30 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { delete[] ez; delete[] hdf5_tmp_write_data; } + double left_border[1], right_border[1], up_border[1], bot_border[1], near_border[1], far_border[1]; + cuda_status = cudaMemcpyToSymbol( left_border, d_left_border, + sizeof(double), cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status, debug_message); + + cuda_status = cudaMemcpyToSymbol( right_border, d_right_border, + sizeof(double), cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status, debug_message); + + cuda_status = cudaMemcpyToSymbol( up_border, d_up_border, + sizeof(double), cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status, debug_message); + + cuda_status = cudaMemcpyToSymbol( bot_border, d_bot_border, + sizeof(double), cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status, debug_message); + + cuda_status = cudaMemcpyToSymbol( near_border, d_near_border, + sizeof(double), cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status, debug_message); + + cuda_status = cudaMemcpyToSymbol( far_border, d_far_border, + sizeof(double), cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status, debug_message); status = H5Sclose(filespace); hdf5_status_check(status); } @@ -672,8 +667,8 @@ void SpatialMeshCu::check_and_exit_if_not(const bool &should_be, } double SpatialMeshCu::node_number_to_coordinate_x(int i) { - if (i >= 0 && i < n_nodes.x) { - return i * cell_size.x; + if (i >= 0 && i < n_nodes->x) { + return i * cell_size->x; } else { printf("invalid node number i=%d at node_number_to_coordinate_x\n", i); @@ -683,8 +678,8 @@ double SpatialMeshCu::node_number_to_coordinate_x(int i) { } double SpatialMeshCu::node_number_to_coordinate_y(int j) { - if (j >= 0 && j < n_nodes.y) { - return j * cell_size.y; + if (j >= 0 && j < n_nodes->y) { + return j * cell_size->y; } else { printf("invalid node number j=%d at node_number_to_coordinate_y\n", j); @@ -694,8 +689,8 @@ double SpatialMeshCu::node_number_to_coordinate_y(int j) { } double SpatialMeshCu::node_number_to_coordinate_z(int k) { - if (k >= 0 && k < n_nodes.z) { - return k * cell_size.z; + if (k >= 0 && k < n_nodes->z) { + return k * cell_size->z; } else { printf("invalid node number k=%d at node_number_to_coordinate_z\n", k); @@ -726,7 +721,7 @@ dim3 SpatialMeshCu::GetThreads() { } dim3 SpatialMeshCu::GetBlocks(dim3 nThreads) { - return dim3(n_nodes.x / nThreads.x, n_nodes.y / nThreads.y, n_nodes.z/nThreads.z); + return dim3(n_nodes->x / nThreads.x, n_nodes->y / nThreads.y, n_nodes->z/nThreads.z); } SpatialMeshCu::~SpatialMeshCu() { diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index cbe8d4d..8443fa6 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -11,20 +11,24 @@ class SpatialMeshCu { public: - int3 n_nodes; - double3 cell_size; - double3 volume_size; + int3 n_nodes[1]; + double3 cell_size[1]; + double3 volume_size[1]; double3 *dev_node_coordinates; double *dev_charge_density; double *dev_potential; double3 *dev_electric_field; - double* d_up_border; - double* d_bot_border; - double* d_left_border; - double* d_right_border; - double* d_far_border; - double* d_near_border; + double3* d_volume_size; + double3* d_cell_size; + int3* d_n_nodes; + + //double* d_up_border; + //double* d_bot_border; + //double* d_left_border; + //double* d_right_border; + //double* d_far_border; + //double* d_near_border; //double border_left; //double border_right; //double border_top; From 9509815bd6a1ab0fb84abda7b46ff6a07b6dcba8 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Wed, 26 Dec 2018 14:36:10 +0700 Subject: [PATCH 26/83] compile errors n_nodes access from inner regions --- SpatialMeshCu.cu | 4 ++-- inner_region.cpp | 24 ++++++++++++------------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 2e2cfee..0735a35 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -75,12 +75,12 @@ __global__ void SetBoundaryConditionOrthoY(double* potential) { __global__ void SetBoundaryConditionOrthoZ(double* potential) { int zIdx = blockIdx.z - * (d_n_nodes[0].x * d_n_nodes[0].y * (d_n_nodes[0].z - 1)); //0 or nodes.x-1 + * (d_n_nodes->x * d_n_nodes->y * (d_n_nodes->z - 1)); //0 or nodes.x-1 int xStepThread = 1; //x= int xStepBlock = blockDim.x; - int yStepThread = d_n_nodes[0].x; + int yStepThread = d_n_nodes->x; int yStepBlock = yStepThread * blockDim.y; int idx = zIdx + threadIdx.x * xStepThread + blockIdx.x * xStepBlock diff --git a/inner_region.cpp b/inner_region.cpp index 1a8a1d2..253665a 100644 --- a/inner_region.cpp +++ b/inner_region.cpp @@ -82,9 +82,9 @@ bool Inner_region::check_if_node_inside( Node_reference &node, void Inner_region::mark_inner_nodes( SpatialMeshCu &spat_mesh ) { - int nx = spat_mesh.n_nodes.x; - int ny = spat_mesh.n_nodes.y; - int nz = spat_mesh.n_nodes.z; + int nx = spat_mesh.n_nodes->x; + int ny = spat_mesh.n_nodes->y; + int nz = spat_mesh.n_nodes->z; for ( int k = 0; k < nz; k++ ) { for ( int j = 0; j < ny; j++ ) { @@ -101,9 +101,9 @@ void Inner_region::mark_inner_nodes( SpatialMeshCu &spat_mesh ) void Inner_region::select_inner_nodes_not_at_domain_edge( SpatialMeshCu &spat_mesh ) { - int nx = spat_mesh.n_nodes.x; - int ny = spat_mesh.n_nodes.y; - int nz = spat_mesh.n_nodes.z; + int nx = spat_mesh.n_nodes->x; + int ny = spat_mesh.n_nodes->y; + int nz = spat_mesh.n_nodes->z; inner_nodes_not_at_domain_edge.reserve( inner_nodes.size() ); @@ -116,9 +116,9 @@ void Inner_region::select_inner_nodes_not_at_domain_edge( SpatialMeshCu &spat_me void Inner_region::mark_near_boundary_nodes( SpatialMeshCu &spat_mesh ) { - int nx = spat_mesh.n_nodes.x; - int ny = spat_mesh.n_nodes.y; - int nz = spat_mesh.n_nodes.z; + int nx = spat_mesh.n_nodes->x; + int ny = spat_mesh.n_nodes->y; + int nz = spat_mesh.n_nodes->z; // rewrite; for( auto &node : inner_nodes ){ @@ -143,9 +143,9 @@ void Inner_region::select_near_boundary_nodes_not_at_domain_edge( SpatialMeshCu { // todo: repeats with select_inner_nodes_not_at_domain_edge; // remove code duplication - int nx = spat_mesh.n_nodes.x; - int ny = spat_mesh.n_nodes.y; - int nz = spat_mesh.n_nodes.z; + int nx = spat_mesh.n_nodes->x; + int ny = spat_mesh.n_nodes->y; + int nz = spat_mesh.n_nodes->z; near_boundary_nodes_not_at_domain_edge.reserve( near_boundary_nodes.size() ); From cc50fe82d3d0b9387fb7543d982c0b5b5c3ac04c Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Wed, 26 Dec 2018 14:40:28 +0700 Subject: [PATCH 27/83] domain compile fix --- domain.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/domain.cpp b/domain.cpp index 2af2e3e..052dcba 100644 --- a/domain.cpp +++ b/domain.cpp @@ -367,9 +367,9 @@ bool Domain::out_of_bound( const Particle &p ) bool out; out = - ( x >= spat_mesh.volume_size.x ) || ( x <= 0 ) || - ( y >= spat_mesh.volume_size.y ) || ( y <= 0 ) || - ( z >= spat_mesh.volume_size.z ) || ( z <= 0 ) ; + ( x >= spat_mesh.volume_size->x ) || ( x <= 0 ) || + ( y >= spat_mesh.volume_size->y ) || ( y <= 0 ) || + ( z >= spat_mesh.volume_size->z ) || ( z <= 0 ) ; return out; From e4e5139a58a4ed7a023fafe93af4d56993b3e7ed Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Wed, 26 Dec 2018 14:43:30 +0700 Subject: [PATCH 28/83] spatial mesh cu - compile fix on set boundary (returning to usage of constant device memory) --- SpatialMeshCu.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 0735a35..8dd8d85 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -325,17 +325,17 @@ void SpatialMeshCu::set_boundary_conditions(double* d_potential) { std::string debug_message = std::string(" set boundary "); dim3 blocks = dim3(n_nodes->y / 16, n_nodes->z / 16, 1); - SetBoundaryConditionOrthoX <<< blocks, threads >>> (d_potential, d_left_border, d_right_border); + SetBoundaryConditionOrthoX <<< blocks, threads >>> (d_potential); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); blocks = dim3(n_nodes->x / 16, n_nodes->z / 16, 2); - SetBoundaryConditionOrthoY <<< blocks, threads >>> (d_potential, d_bot_border, d_up_border); + SetBoundaryConditionOrthoY <<< blocks, threads >>> (d_potential); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); blocks = dim3(n_nodes->x / 16, n_nodes->y / 16, 2); - SetBoundaryConditionOrthoZ <<< blocks, threads >>> (d_potential, d_near_border, d_far_border); + SetBoundaryConditionOrthoZ <<< blocks, threads >>> (d_potential); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); From 3cdd7e0e51d76ae92497e19c54dd866008b4fbfc Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Wed, 26 Dec 2018 15:57:12 +0700 Subject: [PATCH 29/83] is write boundary condition to HDF5 needed? --- SpatialMeshCu.cu | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 8dd8d85..21d301f 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -590,30 +590,11 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { delete[] ez; delete[] hdf5_tmp_write_data; } - double left_border[1], right_border[1], up_border[1], bot_border[1], near_border[1], far_border[1]; - cuda_status = cudaMemcpyToSymbol( left_border, d_left_border, - sizeof(double), cudaMemcpyDeviceToHost); - cuda_status_check(cuda_status, debug_message); + //cuda_status = cudaMemcpyToSymbol( boundary, d_boundary, + // sizeof(double), cudaMemcpyDeviceToHost); + //cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol( right_border, d_right_border, - sizeof(double), cudaMemcpyDeviceToHost); - cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol( up_border, d_up_border, - sizeof(double), cudaMemcpyDeviceToHost); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMemcpyToSymbol( bot_border, d_bot_border, - sizeof(double), cudaMemcpyDeviceToHost); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMemcpyToSymbol( near_border, d_near_border, - sizeof(double), cudaMemcpyDeviceToHost); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMemcpyToSymbol( far_border, d_far_border, - sizeof(double), cudaMemcpyDeviceToHost); - cuda_status_check(cuda_status, debug_message); status = H5Sclose(filespace); hdf5_status_check(status); } From 9878a2319f59543afee61ebab1b6604b234ea301 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Wed, 26 Dec 2018 17:24:01 +0700 Subject: [PATCH 30/83] hdf read write expession errors FieldSolver.cu(137): error: expression must have class type --- SpatialMeshCu.cu | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 21d301f..1d6ac2b 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -109,33 +109,33 @@ SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { n_nodes[0] = make_int3(0, 0, 0); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_volume_size", - &volume_size->x); + &(volume_size[0].x)); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_volume_size", - &volume_size->y); + &(volume_size[0].y)); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_volume_size", - &volume_size->z); + &(volume_size[0].z)); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_cell_size", - &cell_size->x); + &(cell_size[0].x)); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_cell_size", - &cell_size->y); + &(cell_size[0].y)); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_cell_size", - &cell_size->z); + &(cell_size[0].z)); hdf5_status_check(status); status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "x_n_nodes", - &n_nodes->x); + &(n_nodes[0].x)); hdf5_status_check(status); status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "y_n_nodes", - &n_nodes->y); + &(n_nodes[0].y)); hdf5_status_check(status); status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "z_n_nodes", - &n_nodes->z); + &(n_nodes[0].z)); hdf5_status_check(status); allocate_ongrid_values(); @@ -425,31 +425,31 @@ void SpatialMeshCu::write_hdf5_attributes(hid_t group_id) { std::string hdf5_current_group = "./"; status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "x_volume_size", &(volume_size->x), single_element); + "x_volume_size", &(volume_size[0].x), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "y_volume_size", &(volume_size->y), single_element); + "y_volume_size", &(volume_size[0].y), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "z_volume_size", &(volume_size->z), single_element); + "z_volume_size", &(volume_size[0].z), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "x_cell_size", &(cell_size->x), single_element); + "x_cell_size", &(cell_size[0].x), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "y_cell_size", &(cell_size->y), single_element); + "y_cell_size", &(cell_size[0].y), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "z_cell_size", &(cell_size->z), single_element); + "z_cell_size", &(cell_size[0].z), single_element); hdf5_status_check(status); status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), - "x_n_nodes", &(n_nodes->x), single_element); + "x_n_nodes", &(n_nodes[0].x), single_element); hdf5_status_check(status); status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), - "y_n_nodes", &(n_nodes->y), single_element); + "y_n_nodes", &(n_nodes[0].y), single_element); hdf5_status_check(status); status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), - "z_n_nodes", &(n_nodes->z), single_element); + "z_n_nodes", &(n_nodes[0].z), single_element); hdf5_status_check(status); } From 871a5473b404a7e2b15d8e1cf7354b8d75675276 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Wed, 26 Dec 2018 21:37:24 +0700 Subject: [PATCH 31/83] hdf5 H5LTget_attribute_(int /double) tricks --- SpatialMeshCu.cu | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 1d6ac2b..cc8c8b9 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -4,6 +4,7 @@ __constant__ double3 d_volume_size[1]; __constant__ double3 d_cell_size[1]; __constant__ int3 d_n_nodes[1]; +__constant__ double d_boundary[6]; #define TOP 0 #define BOTTOM 1 #define LEFT 2 @@ -11,8 +12,6 @@ __constant__ int3 d_n_nodes[1]; #define FAR 4 #define NEAR 5 -__constant__ double d_boundary[6]; - __device__ int GetIdxVolume() { //int xStepthread = 1; int xStepBlock = blockDim.x; @@ -104,44 +103,49 @@ SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { cudaError_t cuda_status; std::string debug_message = std::string(" reading from hdf5 "); - volume_size[0] = make_double3(0, 0, 0); - cell_size[0] = make_double3(0, 0, 0); - n_nodes[0] = make_int3(0, 0, 0); + double volume_sz_x, volume_sz_y, volume_sz_z; + double cell_size_x, cell_size_y, cell_size_z; + int n_nodes_x, n_nodes_y, n_nodes_z; + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_volume_size", - &(volume_size[0].x)); + &volume_sz_x); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_volume_size", - &(volume_size[0].y)); + &volume_sz_y); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_volume_size", - &(volume_size[0].z)); + &volume_sz_z); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_cell_size", - &(cell_size[0].x)); + &cell_size_x); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_cell_size", - &(cell_size[0].y)); + &cell_size_y); hdf5_status_check(status); status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_cell_size", - &(cell_size[0].z)); + &cell_size_z); hdf5_status_check(status); status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "x_n_nodes", - &(n_nodes[0].x)); + &n_nodes_x); hdf5_status_check(status); status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "y_n_nodes", - &(n_nodes[0].y)); + &n_nodes_y); hdf5_status_check(status); status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "z_n_nodes", - &(n_nodes[0].z)); + &n_nodes_z); hdf5_status_check(status); + volume_size[0] = make_double3(volume_sz_x, volume_sz_y, volume_sz_z); + cell_size[0] = make_double3(cell_size_x, cell_size_y, cell_size_z); + n_nodes[0] = make_int3(n_nodes_x, n_nodes_y, n_nodes_z); + allocate_ongrid_values(); copy_constants_to_device(); - int dim = n_nodes[0].x * n_nodes[0].y * n_nodes->z; + int dim = n_nodes->x * n_nodes->y * n_nodes->z; double *h5_tmp_buf_1 = new double[dim]; double *h5_tmp_buf_2 = new double[dim]; double *h5_tmp_buf_3 = new double[dim]; @@ -156,10 +160,6 @@ SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { h5_tmp_buf_1); H5LTread_dataset_double(h5_spat_mesh_group, "./potential", h5_tmp_buf_2); - // for ( int i = 0; i < dim; i++ ) { - // ( charge_density.data() )[i] = h5_tmp_buf_1[i]; - // ( potential.data() )[i] = h5_tmp_buf_2[i]; - // } cuda_status = cudaMemcpy(h5_tmp_buf_1, dev_charge_density, sizeof(double) * dim, cudaMemcpyHostToDevice); From b17cb5ff2d5b4cb238e48efceb5104b3fdde0906 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Thu, 27 Dec 2018 21:58:50 +0700 Subject: [PATCH 32/83] Cleared unecessary declarations --- SpatialMeshCu.cuh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index 8443fa6..9dab368 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -19,10 +19,6 @@ public: double *dev_potential; double3 *dev_electric_field; - double3* d_volume_size; - double3* d_cell_size; - int3* d_n_nodes; - //double* d_up_border; //double* d_bot_border; //double* d_left_border; From bbce42b739d237a364814c2f7bb47d250a87f299 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Thu, 27 Dec 2018 22:04:23 +0700 Subject: [PATCH 33/83] removed error in memory allocation --- SpatialMeshCu.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index cc8c8b9..705fb57 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -230,10 +230,10 @@ void SpatialMeshCu::init_constants(Config & conf) { void SpatialMeshCu::copy_constants_to_device() { cudaError_t cuda_status; //mesh params - std::string debug_message = std::string(" copy constants "); + std::string debug_message; - cuda_status = cudaMalloc < int3 >(&d_n_nodes, 1); - cuda_status_check(cuda_status, debug_message); + //cuda_status = cudaMalloc < int3 >(&d_n_nodes, 1); + //cuda_status_check(cuda_status, debug_message); debug_message = std::string(" copy constants "); From d21c5f03471d1babc06b9933af19e9ce5dbacd92 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Thu, 27 Dec 2018 22:38:30 +0700 Subject: [PATCH 34/83] without hdf5 read --- SpatialMeshCu.cu | 170 +++++++++++++++++++++++------------------------ 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 705fb57..83f1dcc 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -99,99 +99,99 @@ SpatialMeshCu::SpatialMeshCu(Config &conf) { } SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { - herr_t status; - cudaError_t cuda_status; - std::string debug_message = std::string(" reading from hdf5 "); - - double volume_sz_x, volume_sz_y, volume_sz_z; - double cell_size_x, cell_size_y, cell_size_z; - int n_nodes_x, n_nodes_y, n_nodes_z; - - - status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_volume_size", - &volume_sz_x); - hdf5_status_check(status); - status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_volume_size", - &volume_sz_y); - hdf5_status_check(status); - status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_volume_size", - &volume_sz_z); - hdf5_status_check(status); - - status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_cell_size", - &cell_size_x); - hdf5_status_check(status); - status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_cell_size", - &cell_size_y); - hdf5_status_check(status); - status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_cell_size", - &cell_size_z); - hdf5_status_check(status); - - status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "x_n_nodes", - &n_nodes_x); - hdf5_status_check(status); - status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "y_n_nodes", - &n_nodes_y); - hdf5_status_check(status); - status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "z_n_nodes", - &n_nodes_z); - hdf5_status_check(status); - - volume_size[0] = make_double3(volume_sz_x, volume_sz_y, volume_sz_z); - cell_size[0] = make_double3(cell_size_x, cell_size_y, cell_size_z); - n_nodes[0] = make_int3(n_nodes_x, n_nodes_y, n_nodes_z); - - allocate_ongrid_values(); - copy_constants_to_device(); - - int dim = n_nodes->x * n_nodes->y * n_nodes->z; - double *h5_tmp_buf_1 = new double[dim]; - double *h5_tmp_buf_2 = new double[dim]; - double *h5_tmp_buf_3 = new double[dim]; - - dim3 threads = GetThreads(); - dim3 blocks = GetBlocks(threads); - fill_coordinates <<< blocks, threads >>> (dev_node_coordinates); - cuda_status = cudaDeviceSynchronize(); - cuda_status_check(cuda_status, debug_message); + //herr_t status; + //cudaError_t cuda_status; + //std::string debug_message = std::string(" reading from hdf5 "); + + //double volume_sz_x, volume_sz_y, volume_sz_z; + //double cell_size_x, cell_size_y, cell_size_z; + //int n_nodes_x, n_nodes_y, n_nodes_z; + + + //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_volume_size", + // &volume_sz_x); + //hdf5_status_check(status); + //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_volume_size", + // &volume_sz_y); + //hdf5_status_check(status); + //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_volume_size", + // &volume_sz_z); + //hdf5_status_check(status); + + //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_cell_size", + // &cell_size_x); + //hdf5_status_check(status); + //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_cell_size", + // &cell_size_y); + //hdf5_status_check(status); + //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_cell_size", + // &cell_size_z); + //hdf5_status_check(status); + + //status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "x_n_nodes", + // &n_nodes_x); + //hdf5_status_check(status); + //status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "y_n_nodes", + // &n_nodes_y); + //hdf5_status_check(status); + //status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "z_n_nodes", + // &n_nodes_z); + //hdf5_status_check(status); + + //volume_size[0] = make_double3(volume_sz_x, volume_sz_y, volume_sz_z); + //cell_size[0] = make_double3(cell_size_x, cell_size_y, cell_size_z); + //n_nodes[0] = make_int3(n_nodes_x, n_nodes_y, n_nodes_z); + + //allocate_ongrid_values(); + //copy_constants_to_device(); + + //int dim = n_nodes->x * n_nodes->y * n_nodes->z; + //double *h5_tmp_buf_1 = new double[dim]; + //double *h5_tmp_buf_2 = new double[dim]; + //double *h5_tmp_buf_3 = new double[dim]; + + //dim3 threads = GetThreads(); + //dim3 blocks = GetBlocks(threads); + //fill_coordinates <<< blocks, threads >>> (dev_node_coordinates); + //cuda_status = cudaDeviceSynchronize(); + //cuda_status_check(cuda_status, debug_message); - H5LTread_dataset_double(h5_spat_mesh_group, "./charge_density", - h5_tmp_buf_1); - H5LTread_dataset_double(h5_spat_mesh_group, "./potential", h5_tmp_buf_2); + //H5LTread_dataset_double(h5_spat_mesh_group, "./charge_density", + // h5_tmp_buf_1); + //H5LTread_dataset_double(h5_spat_mesh_group, "./potential", h5_tmp_buf_2); - cuda_status = cudaMemcpy(h5_tmp_buf_1, dev_charge_density, sizeof(double) * dim, - cudaMemcpyHostToDevice); - cuda_status_check(cuda_status, debug_message); + //cuda_status = cudaMemcpy(h5_tmp_buf_1, dev_charge_density, sizeof(double) * dim, + // cudaMemcpyHostToDevice); + //cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_potential, sizeof(double) * dim, - cudaMemcpyHostToDevice); - cuda_status_check(cuda_status, debug_message); + //cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_potential, sizeof(double) * dim, + // cudaMemcpyHostToDevice); + //cuda_status_check(cuda_status, debug_message); - double3 *h5_tmp_vector = new double3[dim]; - - H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_x", - h5_tmp_buf_1); - H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_y", - h5_tmp_buf_2); - H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_z", - h5_tmp_buf_3); - for (int i = 0; i < dim; i++) { - h5_tmp_vector[i] = make_double3(h5_tmp_buf_1[i], h5_tmp_buf_2[i], - h5_tmp_buf_3[i]); - } + //double3 *h5_tmp_vector = new double3[dim]; + + //H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_x", + // h5_tmp_buf_1); + //H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_y", + // h5_tmp_buf_2); + //H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_z", + // h5_tmp_buf_3); + //for (int i = 0; i < dim; i++) { + // h5_tmp_vector[i] = make_double3(h5_tmp_buf_1[i], h5_tmp_buf_2[i], + // h5_tmp_buf_3[i]); + //} - cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_electric_field, sizeof(double3) * dim, - cudaMemcpyHostToDevice); - cuda_status_check(cuda_status, debug_message); + //cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_electric_field, sizeof(double3) * dim, + // cudaMemcpyHostToDevice); + //cuda_status_check(cuda_status, debug_message); - delete[] h5_tmp_buf_1; - delete[] h5_tmp_buf_2; - delete[] h5_tmp_buf_3; - delete[] h5_tmp_vector; + //delete[] h5_tmp_buf_1; + //delete[] h5_tmp_buf_2; + //delete[] h5_tmp_buf_3; + //delete[] h5_tmp_vector; - return; + //return; } void SpatialMeshCu::check_correctness_of_related_config_fields(Config &conf) { From 6dd3e4f5238d7ac5aeeb13ede1a9b609e090330d Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Thu, 27 Dec 2018 22:55:29 +0700 Subject: [PATCH 35/83] solver fixes --- FieldSolver.cu | 20 +++--- SpatialMeshCu.cu | 170 +++++++++++++++++++++++------------------------ 2 files changed, 95 insertions(+), 95 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index e567c2c..cb08cde 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -149,28 +149,28 @@ void FieldSolver::copy_constants_to_device() { cuda_status = cudaMemcpyToSymbol(d_cell_size, (void*)&mesh.cell_size, sizeof(double3), cudaMemcpyHostToDevice); - double dxdxdydy = mesh.cell_size.x * mesh.cell_size.x * - mesh.cell_size.y * mesh.cell_size.y; + double dxdxdydy = mesh.cell_size->x * mesh.cell_size->x * + mesh.cell_size->y * mesh.cell_size->y; cuda_status = cudaMemcpyToSymbol(dev_dxdxdydy, (void*)&dxdxdydy, sizeof(double), cudaMemcpyHostToDevice); - double dxdxdzdz = mesh.cell_size.x * mesh.cell_size.x * - mesh.cell_size.z * mesh.cell_size.z; + double dxdxdzdz = mesh.cell_size->x * mesh.cell_size->x * + mesh.cell_size->z * mesh.cell_size->z; cuda_status = cudaMemcpyToSymbol(dev_dxdxdzdz, (void*)&dxdxdzdz, sizeof(double), cudaMemcpyHostToDevice); - double dydydzdz = mesh.cell_size.y * mesh.cell_size.y * - mesh.cell_size.z * mesh.cell_size.z; + double dydydzdz = mesh.cell_size->y * mesh.cell_size->y * + mesh.cell_size->z * mesh.cell_size->z; cuda_status = cudaMemcpyToSymbol(dev_dydydzdz, (void*)&dydydzdz, sizeof(double), cudaMemcpyHostToDevice); - double dxdxdydydzdz = mesh.cell_size.x * mesh.cell_size.x * - mesh.cell_size.y * mesh.cell_size.y * - mesh.cell_size.z * mesh.cell_size.z; + double dxdxdydydzdz = mesh.cell_size->x * mesh.cell_size->x * + mesh.cell_size->y * mesh.cell_size->y * + mesh.cell_size->z * mesh.cell_size->z; cuda_status = cudaMemcpyToSymbol(dev_dxdxdydydzdz, (void*)&dxdxdydydzdz, sizeof(double), cudaMemcpyHostToDevice); - int end = mesh.n_nodes.x * mesh.n_nodes.y * mesh.n_nodes.z - 1; + int end = mesh.n_nodes->x * mesh.n_nodes->y * mesh.n_nodes->z - 1; cuda_status = cudaMemcpyToSymbol(dev_end, (void*)&end, sizeof(int), cudaMemcpyHostToDevice); } diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 83f1dcc..705fb57 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -99,99 +99,99 @@ SpatialMeshCu::SpatialMeshCu(Config &conf) { } SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { - //herr_t status; - //cudaError_t cuda_status; - //std::string debug_message = std::string(" reading from hdf5 "); - - //double volume_sz_x, volume_sz_y, volume_sz_z; - //double cell_size_x, cell_size_y, cell_size_z; - //int n_nodes_x, n_nodes_y, n_nodes_z; - - - //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_volume_size", - // &volume_sz_x); - //hdf5_status_check(status); - //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_volume_size", - // &volume_sz_y); - //hdf5_status_check(status); - //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_volume_size", - // &volume_sz_z); - //hdf5_status_check(status); - - //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_cell_size", - // &cell_size_x); - //hdf5_status_check(status); - //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_cell_size", - // &cell_size_y); - //hdf5_status_check(status); - //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_cell_size", - // &cell_size_z); - //hdf5_status_check(status); - - //status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "x_n_nodes", - // &n_nodes_x); - //hdf5_status_check(status); - //status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "y_n_nodes", - // &n_nodes_y); - //hdf5_status_check(status); - //status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "z_n_nodes", - // &n_nodes_z); - //hdf5_status_check(status); - - //volume_size[0] = make_double3(volume_sz_x, volume_sz_y, volume_sz_z); - //cell_size[0] = make_double3(cell_size_x, cell_size_y, cell_size_z); - //n_nodes[0] = make_int3(n_nodes_x, n_nodes_y, n_nodes_z); - - //allocate_ongrid_values(); - //copy_constants_to_device(); - - //int dim = n_nodes->x * n_nodes->y * n_nodes->z; - //double *h5_tmp_buf_1 = new double[dim]; - //double *h5_tmp_buf_2 = new double[dim]; - //double *h5_tmp_buf_3 = new double[dim]; - - //dim3 threads = GetThreads(); - //dim3 blocks = GetBlocks(threads); - //fill_coordinates <<< blocks, threads >>> (dev_node_coordinates); - //cuda_status = cudaDeviceSynchronize(); - //cuda_status_check(cuda_status, debug_message); + herr_t status; + cudaError_t cuda_status; + std::string debug_message = std::string(" reading from hdf5 "); - //H5LTread_dataset_double(h5_spat_mesh_group, "./charge_density", - // h5_tmp_buf_1); - //H5LTread_dataset_double(h5_spat_mesh_group, "./potential", h5_tmp_buf_2); + double volume_sz_x, volume_sz_y, volume_sz_z; + double cell_size_x, cell_size_y, cell_size_z; + int n_nodes_x, n_nodes_y, n_nodes_z; - //cuda_status = cudaMemcpy(h5_tmp_buf_1, dev_charge_density, sizeof(double) * dim, - // cudaMemcpyHostToDevice); - //cuda_status_check(cuda_status, debug_message); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_volume_size", + &volume_sz_x); + hdf5_status_check(status); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_volume_size", + &volume_sz_y); + hdf5_status_check(status); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_volume_size", + &volume_sz_z); + hdf5_status_check(status); - //cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_potential, sizeof(double) * dim, - // cudaMemcpyHostToDevice); - //cuda_status_check(cuda_status, debug_message); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_cell_size", + &cell_size_x); + hdf5_status_check(status); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_cell_size", + &cell_size_y); + hdf5_status_check(status); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_cell_size", + &cell_size_z); + hdf5_status_check(status); - //double3 *h5_tmp_vector = new double3[dim]; - - //H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_x", - // h5_tmp_buf_1); - //H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_y", - // h5_tmp_buf_2); - //H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_z", - // h5_tmp_buf_3); - //for (int i = 0; i < dim; i++) { - // h5_tmp_vector[i] = make_double3(h5_tmp_buf_1[i], h5_tmp_buf_2[i], - // h5_tmp_buf_3[i]); - //} + status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "x_n_nodes", + &n_nodes_x); + hdf5_status_check(status); + status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "y_n_nodes", + &n_nodes_y); + hdf5_status_check(status); + status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "z_n_nodes", + &n_nodes_z); + hdf5_status_check(status); - //cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_electric_field, sizeof(double3) * dim, - // cudaMemcpyHostToDevice); - //cuda_status_check(cuda_status, debug_message); + volume_size[0] = make_double3(volume_sz_x, volume_sz_y, volume_sz_z); + cell_size[0] = make_double3(cell_size_x, cell_size_y, cell_size_z); + n_nodes[0] = make_int3(n_nodes_x, n_nodes_y, n_nodes_z); - //delete[] h5_tmp_buf_1; - //delete[] h5_tmp_buf_2; - //delete[] h5_tmp_buf_3; - //delete[] h5_tmp_vector; + allocate_ongrid_values(); + copy_constants_to_device(); - //return; + int dim = n_nodes->x * n_nodes->y * n_nodes->z; + double *h5_tmp_buf_1 = new double[dim]; + double *h5_tmp_buf_2 = new double[dim]; + double *h5_tmp_buf_3 = new double[dim]; + + dim3 threads = GetThreads(); + dim3 blocks = GetBlocks(threads); + fill_coordinates <<< blocks, threads >>> (dev_node_coordinates); + cuda_status = cudaDeviceSynchronize(); + cuda_status_check(cuda_status, debug_message); + + H5LTread_dataset_double(h5_spat_mesh_group, "./charge_density", + h5_tmp_buf_1); + H5LTread_dataset_double(h5_spat_mesh_group, "./potential", h5_tmp_buf_2); + + + cuda_status = cudaMemcpy(h5_tmp_buf_1, dev_charge_density, sizeof(double) * dim, + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status, debug_message); + + cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_potential, sizeof(double) * dim, + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status, debug_message); + + double3 *h5_tmp_vector = new double3[dim]; + + H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_x", + h5_tmp_buf_1); + H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_y", + h5_tmp_buf_2); + H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_z", + h5_tmp_buf_3); + for (int i = 0; i < dim; i++) { + h5_tmp_vector[i] = make_double3(h5_tmp_buf_1[i], h5_tmp_buf_2[i], + h5_tmp_buf_3[i]); + } + + cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_electric_field, sizeof(double3) * dim, + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status, debug_message); + + delete[] h5_tmp_buf_1; + delete[] h5_tmp_buf_2; + delete[] h5_tmp_buf_3; + delete[] h5_tmp_vector; + + return; } void SpatialMeshCu::check_correctness_of_related_config_fields(Config &conf) { From 74b6c064cb9cca040e840014530ae52f3bdf1521 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Thu, 27 Dec 2018 22:55:29 +0700 Subject: [PATCH 36/83] solver fixes --- FieldSolver.cu | 22 +++--- SpatialMeshCu.cu | 170 +++++++++++++++++++++++------------------------ 2 files changed, 96 insertions(+), 96 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index e567c2c..216b853 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -134,7 +134,7 @@ FieldSolver::FieldSolver(SpatialMeshCu &mesh, Inner_regions_manager &inner_regio void FieldSolver::allocate_next_phi() { - size_t dim = mesh.n_nodes.x * mesh.n_nodes.y * mesh.n_nodes.z; + size_t dim = mesh.n_nodes->x * mesh.n_nodes->y * mesh.n_nodes->z; cudaError_t cuda_status; cuda_status = cudaMalloc(&dev_phi_next, dim); @@ -149,28 +149,28 @@ void FieldSolver::copy_constants_to_device() { cuda_status = cudaMemcpyToSymbol(d_cell_size, (void*)&mesh.cell_size, sizeof(double3), cudaMemcpyHostToDevice); - double dxdxdydy = mesh.cell_size.x * mesh.cell_size.x * - mesh.cell_size.y * mesh.cell_size.y; + double dxdxdydy = mesh.cell_size->x * mesh.cell_size->x * + mesh.cell_size->y * mesh.cell_size->y; cuda_status = cudaMemcpyToSymbol(dev_dxdxdydy, (void*)&dxdxdydy, sizeof(double), cudaMemcpyHostToDevice); - double dxdxdzdz = mesh.cell_size.x * mesh.cell_size.x * - mesh.cell_size.z * mesh.cell_size.z; + double dxdxdzdz = mesh.cell_size->x * mesh.cell_size->x * + mesh.cell_size->z * mesh.cell_size->z; cuda_status = cudaMemcpyToSymbol(dev_dxdxdzdz, (void*)&dxdxdzdz, sizeof(double), cudaMemcpyHostToDevice); - double dydydzdz = mesh.cell_size.y * mesh.cell_size.y * - mesh.cell_size.z * mesh.cell_size.z; + double dydydzdz = mesh.cell_size->y * mesh.cell_size->y * + mesh.cell_size->z * mesh.cell_size->z; cuda_status = cudaMemcpyToSymbol(dev_dydydzdz, (void*)&dydydzdz, sizeof(double), cudaMemcpyHostToDevice); - double dxdxdydydzdz = mesh.cell_size.x * mesh.cell_size.x * - mesh.cell_size.y * mesh.cell_size.y * - mesh.cell_size.z * mesh.cell_size.z; + double dxdxdydydzdz = mesh.cell_size->x * mesh.cell_size->x * + mesh.cell_size->y * mesh.cell_size->y * + mesh.cell_size->z * mesh.cell_size->z; cuda_status = cudaMemcpyToSymbol(dev_dxdxdydydzdz, (void*)&dxdxdydydzdz, sizeof(double), cudaMemcpyHostToDevice); - int end = mesh.n_nodes.x * mesh.n_nodes.y * mesh.n_nodes.z - 1; + int end = mesh.n_nodes->x * mesh.n_nodes->y * mesh.n_nodes->z - 1; cuda_status = cudaMemcpyToSymbol(dev_end, (void*)&end, sizeof(int), cudaMemcpyHostToDevice); } diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 83f1dcc..705fb57 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -99,99 +99,99 @@ SpatialMeshCu::SpatialMeshCu(Config &conf) { } SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { - //herr_t status; - //cudaError_t cuda_status; - //std::string debug_message = std::string(" reading from hdf5 "); - - //double volume_sz_x, volume_sz_y, volume_sz_z; - //double cell_size_x, cell_size_y, cell_size_z; - //int n_nodes_x, n_nodes_y, n_nodes_z; - - - //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_volume_size", - // &volume_sz_x); - //hdf5_status_check(status); - //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_volume_size", - // &volume_sz_y); - //hdf5_status_check(status); - //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_volume_size", - // &volume_sz_z); - //hdf5_status_check(status); - - //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_cell_size", - // &cell_size_x); - //hdf5_status_check(status); - //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_cell_size", - // &cell_size_y); - //hdf5_status_check(status); - //status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_cell_size", - // &cell_size_z); - //hdf5_status_check(status); - - //status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "x_n_nodes", - // &n_nodes_x); - //hdf5_status_check(status); - //status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "y_n_nodes", - // &n_nodes_y); - //hdf5_status_check(status); - //status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "z_n_nodes", - // &n_nodes_z); - //hdf5_status_check(status); - - //volume_size[0] = make_double3(volume_sz_x, volume_sz_y, volume_sz_z); - //cell_size[0] = make_double3(cell_size_x, cell_size_y, cell_size_z); - //n_nodes[0] = make_int3(n_nodes_x, n_nodes_y, n_nodes_z); - - //allocate_ongrid_values(); - //copy_constants_to_device(); - - //int dim = n_nodes->x * n_nodes->y * n_nodes->z; - //double *h5_tmp_buf_1 = new double[dim]; - //double *h5_tmp_buf_2 = new double[dim]; - //double *h5_tmp_buf_3 = new double[dim]; - - //dim3 threads = GetThreads(); - //dim3 blocks = GetBlocks(threads); - //fill_coordinates <<< blocks, threads >>> (dev_node_coordinates); - //cuda_status = cudaDeviceSynchronize(); - //cuda_status_check(cuda_status, debug_message); + herr_t status; + cudaError_t cuda_status; + std::string debug_message = std::string(" reading from hdf5 "); - //H5LTread_dataset_double(h5_spat_mesh_group, "./charge_density", - // h5_tmp_buf_1); - //H5LTread_dataset_double(h5_spat_mesh_group, "./potential", h5_tmp_buf_2); + double volume_sz_x, volume_sz_y, volume_sz_z; + double cell_size_x, cell_size_y, cell_size_z; + int n_nodes_x, n_nodes_y, n_nodes_z; - //cuda_status = cudaMemcpy(h5_tmp_buf_1, dev_charge_density, sizeof(double) * dim, - // cudaMemcpyHostToDevice); - //cuda_status_check(cuda_status, debug_message); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_volume_size", + &volume_sz_x); + hdf5_status_check(status); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_volume_size", + &volume_sz_y); + hdf5_status_check(status); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_volume_size", + &volume_sz_z); + hdf5_status_check(status); - //cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_potential, sizeof(double) * dim, - // cudaMemcpyHostToDevice); - //cuda_status_check(cuda_status, debug_message); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_cell_size", + &cell_size_x); + hdf5_status_check(status); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_cell_size", + &cell_size_y); + hdf5_status_check(status); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_cell_size", + &cell_size_z); + hdf5_status_check(status); - //double3 *h5_tmp_vector = new double3[dim]; - - //H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_x", - // h5_tmp_buf_1); - //H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_y", - // h5_tmp_buf_2); - //H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_z", - // h5_tmp_buf_3); - //for (int i = 0; i < dim; i++) { - // h5_tmp_vector[i] = make_double3(h5_tmp_buf_1[i], h5_tmp_buf_2[i], - // h5_tmp_buf_3[i]); - //} + status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "x_n_nodes", + &n_nodes_x); + hdf5_status_check(status); + status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "y_n_nodes", + &n_nodes_y); + hdf5_status_check(status); + status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "z_n_nodes", + &n_nodes_z); + hdf5_status_check(status); - //cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_electric_field, sizeof(double3) * dim, - // cudaMemcpyHostToDevice); - //cuda_status_check(cuda_status, debug_message); + volume_size[0] = make_double3(volume_sz_x, volume_sz_y, volume_sz_z); + cell_size[0] = make_double3(cell_size_x, cell_size_y, cell_size_z); + n_nodes[0] = make_int3(n_nodes_x, n_nodes_y, n_nodes_z); - //delete[] h5_tmp_buf_1; - //delete[] h5_tmp_buf_2; - //delete[] h5_tmp_buf_3; - //delete[] h5_tmp_vector; + allocate_ongrid_values(); + copy_constants_to_device(); - //return; + int dim = n_nodes->x * n_nodes->y * n_nodes->z; + double *h5_tmp_buf_1 = new double[dim]; + double *h5_tmp_buf_2 = new double[dim]; + double *h5_tmp_buf_3 = new double[dim]; + + dim3 threads = GetThreads(); + dim3 blocks = GetBlocks(threads); + fill_coordinates <<< blocks, threads >>> (dev_node_coordinates); + cuda_status = cudaDeviceSynchronize(); + cuda_status_check(cuda_status, debug_message); + + H5LTread_dataset_double(h5_spat_mesh_group, "./charge_density", + h5_tmp_buf_1); + H5LTread_dataset_double(h5_spat_mesh_group, "./potential", h5_tmp_buf_2); + + + cuda_status = cudaMemcpy(h5_tmp_buf_1, dev_charge_density, sizeof(double) * dim, + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status, debug_message); + + cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_potential, sizeof(double) * dim, + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status, debug_message); + + double3 *h5_tmp_vector = new double3[dim]; + + H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_x", + h5_tmp_buf_1); + H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_y", + h5_tmp_buf_2); + H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_z", + h5_tmp_buf_3); + for (int i = 0; i < dim; i++) { + h5_tmp_vector[i] = make_double3(h5_tmp_buf_1[i], h5_tmp_buf_2[i], + h5_tmp_buf_3[i]); + } + + cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_electric_field, sizeof(double3) * dim, + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status, debug_message); + + delete[] h5_tmp_buf_1; + delete[] h5_tmp_buf_2; + delete[] h5_tmp_buf_3; + delete[] h5_tmp_vector; + + return; } void SpatialMeshCu::check_correctness_of_related_config_fields(Config &conf) { From 568bd5f025a1d147633cbca395f2e663674d89ac Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Thu, 27 Dec 2018 23:10:18 +0700 Subject: [PATCH 37/83] invalid argument in Copy borders to Symbol --- FieldSolver.cu | 8 ++++---- SpatialMeshCu.cu | 32 ++++++++++++++++---------------- SpatialMeshCu.cuh | 6 +++--- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index 216b853..f2601ff 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -16,9 +16,9 @@ __constant__ int dev_end[1]; __device__ int GetIdx() { //int xStepthread = 1; int xStepBlock = blockDim.x; - int yStepThread = d_n_nodes[0].x; + int yStepThread = d_n_nodes->x; int yStepBlock = yStepThread * blockDim.y; - int zStepThread = d_n_nodes[0].x * d_n_nodes[0].y; + int zStepThread = d_n_nodes->x * d_n_nodes->y; int zStepBlock = zStepThread * blockDim.z; return (threadIdx.x + blockIdx.x * xStepBlock) + (threadIdx.y * yStepThread + blockIdx.y * yStepBlock) + @@ -38,8 +38,8 @@ __global__ void ComputePhiNext(const double* d_phi_current, const double* d_char int idx = GetIdx(); int offset_Dx = 1; //todo rewrite usind device n_nodes.x/y/z - int offset_Dy = d_n_nodes[0].x; - int offset_Dz = d_n_nodes[0].x * d_n_nodes[0].y; + int offset_Dy = d_n_nodes->x; + int offset_Dz = d_n_nodes->x * d_n_nodes->y; int prev_neighbour_idx; int next_neighbour_idx; diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 705fb57..2a33464 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -138,9 +138,9 @@ SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { &n_nodes_z); hdf5_status_check(status); - volume_size[0] = make_double3(volume_sz_x, volume_sz_y, volume_sz_z); - cell_size[0] = make_double3(cell_size_x, cell_size_y, cell_size_z); - n_nodes[0] = make_int3(n_nodes_x, n_nodes_y, n_nodes_z); + *(volume_size) = make_double3(volume_sz_x, volume_sz_y, volume_sz_z); + *(cell_size) = make_double3(cell_size_x, cell_size_y, cell_size_z); + *(n_nodes) = make_int3(n_nodes_x, n_nodes_y, n_nodes_z); allocate_ongrid_values(); copy_constants_to_device(); @@ -204,7 +204,7 @@ void SpatialMeshCu::check_correctness_of_related_config_fields(Config &conf) { } void SpatialMeshCu::init_constants(Config & conf) { - n_nodes[0] = make_int3( + *(n_nodes) = make_int3( ceil( conf.mesh_config_part.grid_x_size / conf.mesh_config_part.grid_x_step) + 1, @@ -215,12 +215,12 @@ void SpatialMeshCu::init_constants(Config & conf) { conf.mesh_config_part.grid_z_size / conf.mesh_config_part.grid_z_step) + 1); - volume_size[0] = make_double3(conf.mesh_config_part.grid_x_size, + *(volume_size) = make_double3(conf.mesh_config_part.grid_x_size, conf.mesh_config_part.grid_y_size, conf.mesh_config_part.grid_z_size); - cell_size[0] = make_double3(volume_size[0].x / (n_nodes[0].x - 1), - volume_size[0].y / (n_nodes[0].y - 1), volume_size[0].z / (n_nodes[0].z - 1)); + *(cell_size) = make_double3(volume_size->x / (n_nodes->x - 1), + volume_size->y / (n_nodes->y - 1), volume_size->z / (n_nodes->z - 1)); copy_constants_to_device(); @@ -425,31 +425,31 @@ void SpatialMeshCu::write_hdf5_attributes(hid_t group_id) { std::string hdf5_current_group = "./"; status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "x_volume_size", &(volume_size[0].x), single_element); + "x_volume_size", &(volume_size->x), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "y_volume_size", &(volume_size[0].y), single_element); + "y_volume_size", &(volume_size->y), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "z_volume_size", &(volume_size[0].z), single_element); + "z_volume_size", &(volume_size->z), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "x_cell_size", &(cell_size[0].x), single_element); + "x_cell_size", &(cell_size->x), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "y_cell_size", &(cell_size[0].y), single_element); + "y_cell_size", &(cell_size->y), single_element); hdf5_status_check(status); status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), - "z_cell_size", &(cell_size[0].z), single_element); + "z_cell_size", &(cell_size->z), single_element); hdf5_status_check(status); status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), - "x_n_nodes", &(n_nodes[0].x), single_element); + "x_n_nodes", &(n_nodes->x), single_element); hdf5_status_check(status); status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), - "y_n_nodes", &(n_nodes[0].y), single_element); + "y_n_nodes", &(n_nodes->y), single_element); hdf5_status_check(status); status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), - "z_n_nodes", &(n_nodes[0].z), single_element); + "z_n_nodes", &(n_nodes->z), single_element); hdf5_status_check(status); } diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index 9dab368..9add02f 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -11,9 +11,9 @@ class SpatialMeshCu { public: - int3 n_nodes[1]; - double3 cell_size[1]; - double3 volume_size[1]; + int3 *const n_nodes; + double3 *const cell_size; + double3 *const volume_size; double3 *dev_node_coordinates; double *dev_charge_density; double *dev_potential; From 76b0ba0cb787367e5775bdef56dd55d7ffdbdee4 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Thu, 27 Dec 2018 23:14:31 +0700 Subject: [PATCH 38/83] initialisation --- SpatialMeshCu.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index 9add02f..98ec87c 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -11,9 +11,9 @@ class SpatialMeshCu { public: - int3 *const n_nodes; - double3 *const cell_size; - double3 *const volume_size; + int3 *const n_nodes=&make_int3(0,0,0); + double3 *const cell_size = &make_double3(0, 0, 0); + double3 *const volume_size = &make_double3(0, 0, 0); double3 *dev_node_coordinates; double *dev_charge_density; double *dev_potential; From 632106108265038d099ef5ec3b1423976253f45b Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Thu, 27 Dec 2018 23:52:30 +0700 Subject: [PATCH 39/83] const pointers for copy to symbol --- SpatialMeshCu.cu | 67 +++++++++++++++++------------------------------- 1 file changed, 23 insertions(+), 44 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 42b37f5..8012dff 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -3,15 +3,14 @@ __constant__ double3 d_volume_size[1]; __constant__ double3 d_cell_size[1]; __constant__ int3 d_n_nodes[1]; +__constant__ double d_boundary[6]; -__constant__ double d_up_border[1]; -__constant__ double d_bot_border[1]; - -__constant__ double d_left_border[1]; -__constant__ double d_right_border[1]; - -__constant__ double d_far_border[1]; -__constant__ double d_near_border[1]; +#define TOP 0 +#define BOTTOM 1 +#define LEFT 2 +#define RIGHT 3 +#define FAR 4 +#define NEAR 5 __device__ int GetIdxVolume() { //int xStepthread = 1; @@ -51,8 +50,8 @@ __global__ void SetBoundaryConditionOrthoX(double* potential) { int idx = xIdx + threadIdx.x * yStepThread + blockIdx.x * yStepBlock + threadIdx.y * zStepThread + blockIdx.y * zStepBlock; - potential[idx] = ((double)(1 - blockIdx.z)) * d_left_border[0] - + (blockIdx.z * d_right_border[0]); + potential[idx] = ((double)(1 - blockIdx.z)) * d_boundary[LEFT] + + (blockIdx.z * d_boundary[RIGHT]); } @@ -68,8 +67,8 @@ __global__ void SetBoundaryConditionOrthoY(double* potential) { int idx = yIdx + threadIdx.x * xStepThread + blockIdx.x * xStepBlock + threadIdx.y * zStepThread + blockIdx.y * zStepBlock; - potential[idx] = ((double)(1 - blockIdx.z)) * d_bot_border[0] - + (blockIdx.z * d_up_border[0]); + potential[idx] = ((double)(1 - blockIdx.z)) * d_boundary[BOTTOM] + + (blockIdx.z * d_boundary[TOP]); } @@ -85,9 +84,8 @@ __global__ void SetBoundaryConditionOrthoZ(double* potential) { int idx = zIdx + threadIdx.x * xStepThread + blockIdx.x * xStepBlock + threadIdx.y * yStepThread + blockIdx.y * yStepBlock; - - potential[idx] = ((double)(1 - blockIdx.z)) * d_near_border[0] - + (blockIdx.z * d_far_border[0]); + potential[idx] = ((double)(1 - blockIdx.z)) * d_boundary[NEAR] + + (blockIdx.z * d_boundary[FAR]); } @@ -230,16 +228,17 @@ void SpatialMeshCu::init_constants(Config & conf) { void SpatialMeshCu::copy_constants_to_device() { cudaError_t cuda_status; //mesh params + const int3 *nodes = &n_nodes; std::string debug_message = std::string(" copy constants "); - cuda_status = cudaMemcpyToSymbol(d_n_nodes, (void*)&n_nodes, sizeof(dim3), + cuda_status = cudaMemcpyToSymbol(d_n_nodes, (const void*)nodes, sizeof(dim3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_volume_size, (void*)&volume_size, sizeof(double3), + cuda_status = cudaMemcpyToSymbol(d_volume_size, (const void*)&volume_size, sizeof(double3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMemcpyToSymbol(d_cell_size, (void*)&cell_size, sizeof(double3), + cuda_status = cudaMemcpyToSymbol(d_cell_size, (const void*)&cell_size, sizeof(double3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); @@ -250,28 +249,8 @@ void SpatialMeshCu::copy_boundary_to_device(Config &conf) { cudaError_t cuda_status; //boundary params std::string debug_message = std::string(" copy border constants "); - cuda_status = cudaMemcpyToSymbol(d_left_border, (void*)&conf.boundary_config_part.boundary_phi_left, - sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMemcpyToSymbol(d_right_border, (void*)&conf.boundary_config_part.boundary_phi_right, - sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMemcpyToSymbol(d_up_border, (void*)&conf.boundary_config_part.boundary_phi_top, - sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMemcpyToSymbol(d_bot_border, (void*)&conf.boundary_config_part.boundary_phi_bottom, - sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMemcpyToSymbol(d_near_border, (void*)&conf.boundary_config_part.boundary_phi_near, - sizeof(double), cudaMemcpyHostToDevice); - cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMemcpyToSymbol(d_far_border, (void*)&conf.boundary_config_part.boundary_phi_far, - sizeof(double), cudaMemcpyHostToDevice); + cuda_status = cudaMemcpyToSymbol(d_boundary, (void*)&conf.boundary_config_part.boundary_phi_left, + sizeof(double)*6, cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); } @@ -320,21 +299,21 @@ void SpatialMeshCu::clear_old_density_values() { } void SpatialMeshCu::set_boundary_conditions(double* d_potential) { - dim3 threads = dim3(16, 16, 2); + dim3 threads = dim3(4, 4, 2); cudaError_t cuda_status; std::string debug_message = std::string(" set boundary "); - dim3 blocks = dim3(n_nodes.y / 16, n_nodes.z / 16, 1); + dim3 blocks = dim3(n_nodes.y / 4, n_nodes.z / 4, 1); SetBoundaryConditionOrthoX <<< blocks, threads >>> (d_potential); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); - blocks = dim3(n_nodes.x / 16, n_nodes.z / 16, 2); + blocks = dim3(n_nodes.x / 4, n_nodes.z / 4, 2); SetBoundaryConditionOrthoY <<< blocks, threads >>> (d_potential); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); - blocks = dim3(n_nodes.x / 16, n_nodes.y / 16, 2); + blocks = dim3(n_nodes.x / 4, n_nodes.y / 4, 2); SetBoundaryConditionOrthoZ <<< blocks, threads >>> (d_potential); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); From 84946c6f72026a01ab5cf82152d12bab566b8755 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Fri, 28 Dec 2018 00:26:09 +0700 Subject: [PATCH 40/83] boundary copying woraround --- SpatialMeshCu.cu | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 8012dff..7fcb269 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -229,15 +229,18 @@ void SpatialMeshCu::copy_constants_to_device() { cudaError_t cuda_status; //mesh params const int3 *nodes = &n_nodes; - std::string debug_message = std::string(" copy constants "); + std::string debug_message = std::string(" copy nodes number "); cuda_status = cudaMemcpyToSymbol(d_n_nodes, (const void*)nodes, sizeof(dim3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); - - cuda_status = cudaMemcpyToSymbol(d_volume_size, (const void*)&volume_size, sizeof(double3), + + const double3 *volume = &volume_size; + debug_message = std::string(" copy volume size "); + cuda_status = cudaMemcpyToSymbol(d_volume_size, (const void*)&volume, sizeof(double3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); + debug_message = std::string(" copy cell size "); cuda_status = cudaMemcpyToSymbol(d_cell_size, (const void*)&cell_size, sizeof(double3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); @@ -249,9 +252,17 @@ void SpatialMeshCu::copy_boundary_to_device(Config &conf) { cudaError_t cuda_status; //boundary params std::string debug_message = std::string(" copy border constants "); - cuda_status = cudaMemcpyToSymbol(d_boundary, (void*)&conf.boundary_config_part.boundary_phi_left, + double boundary[6]; + boundary[RIGHT] = conf.boundary_config_part.boundary_phi_right; + boundary[LEFT] = conf.boundary_config_part.boundary_phi_left; + boundary[TOP] = conf.boundary_config_part.boundary_phi_top; + boundary[BOTTOM] = conf.boundary_config_part.boundary_phi_bottom; + boundary[NEAR] = conf.boundary_config_part.boundary_phi_near; + boundary[FAR] = conf.boundary_config_part.boundary_phi_far; + cuda_status = cudaMemcpyToSymbol(d_boundary, (const void*)boundary, sizeof(double)*6, cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); + delete[] boundary; } void SpatialMeshCu::allocate_ongrid_values() { From c63c2dda757f8ac835e219ffeff1799cd3222ee5 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Fri, 28 Dec 2018 00:43:35 +0700 Subject: [PATCH 41/83] copy constants workaround --- SpatialMeshCu.cu | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 7fcb269..dfee402 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -259,7 +259,8 @@ void SpatialMeshCu::copy_boundary_to_device(Config &conf) { boundary[BOTTOM] = conf.boundary_config_part.boundary_phi_bottom; boundary[NEAR] = conf.boundary_config_part.boundary_phi_near; boundary[FAR] = conf.boundary_config_part.boundary_phi_far; - cuda_status = cudaMemcpyToSymbol(d_boundary, (const void*)boundary, + const double *c_boundary = boundary; + cuda_status = cudaMemcpyToSymbol(d_boundary, (const void*)c_boundary, sizeof(double)*6, cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); delete[] boundary; @@ -272,18 +273,22 @@ void SpatialMeshCu::allocate_ongrid_values() { size_t total_node_count = nx * ny * nz; cudaError_t cuda_status; - std::string debug_message = std::string(" copy borders "); - cuda_status = cudaMalloc < double3 >(&dev_node_coordinates, total_node_count); + std::string debug_message = std::string(" malloc coords"); + + cuda_status = cudaMalloc < double3 >(&dev_node_coordinates, sizeof(double3) * total_node_count); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMalloc(&dev_charge_density, total_node_count); + debug_message = std::string(" malloc charde density"); + cuda_status = cudaMalloc(&dev_charge_density, sizeof(double3) * total_node_count); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMalloc(&dev_potential, total_node_count); + debug_message = std::string(" malloc potential"); + cuda_status = cudaMalloc(&dev_potential, sizeof(double3) * total_node_count); cuda_status_check(cuda_status, debug_message); - cuda_status = cudaMalloc < double3 >(&dev_electric_field, total_node_count); + debug_message = std::string(" malloc field"); + cuda_status = cudaMalloc < double3 >(&dev_electric_field, sizeof(double3) * total_node_count); cuda_status_check(cuda_status, debug_message); return; From 07e7cb585dfe3465d033bfacfae242eb91469116 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Fri, 28 Dec 2018 00:47:57 +0700 Subject: [PATCH 42/83] removed implicit copy direction --- SpatialMeshCu.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index dfee402..4cbbb45 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -261,7 +261,7 @@ void SpatialMeshCu::copy_boundary_to_device(Config &conf) { boundary[FAR] = conf.boundary_config_part.boundary_phi_far; const double *c_boundary = boundary; cuda_status = cudaMemcpyToSymbol(d_boundary, (const void*)c_boundary, - sizeof(double)*6, cudaMemcpyHostToDevice); + sizeof(double)*6); cuda_status_check(cuda_status, debug_message); delete[] boundary; } From b7490ba715622f4072beb7a58914670937e31209 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Fri, 28 Dec 2018 01:18:57 +0700 Subject: [PATCH 43/83] boundary delete line remover --- SpatialMeshCu.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 4cbbb45..523b2b3 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -263,7 +263,6 @@ void SpatialMeshCu::copy_boundary_to_device(Config &conf) { cuda_status = cudaMemcpyToSymbol(d_boundary, (const void*)c_boundary, sizeof(double)*6); cuda_status_check(cuda_status, debug_message); - delete[] boundary; } void SpatialMeshCu::allocate_ongrid_values() { From 3de639fd0bd70f891b9a772771059fed2bab4eb5 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Fri, 28 Dec 2018 01:28:00 +0700 Subject: [PATCH 44/83] nodes copy fix --- SpatialMeshCu.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 523b2b3..eff963e 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -230,7 +230,7 @@ void SpatialMeshCu::copy_constants_to_device() { //mesh params const int3 *nodes = &n_nodes; std::string debug_message = std::string(" copy nodes number "); - cuda_status = cudaMemcpyToSymbol(d_n_nodes, (const void*)nodes, sizeof(dim3), + cuda_status = cudaMemcpyToSymbol(d_n_nodes, (const void*)nodes, sizeof(int3), cudaMemcpyHostToDevice); cuda_status_check(cuda_status, debug_message); @@ -279,11 +279,11 @@ void SpatialMeshCu::allocate_ongrid_values() { cuda_status_check(cuda_status, debug_message); debug_message = std::string(" malloc charde density"); - cuda_status = cudaMalloc(&dev_charge_density, sizeof(double3) * total_node_count); + cuda_status = cudaMalloc(&dev_charge_density, sizeof(double) * total_node_count); cuda_status_check(cuda_status, debug_message); debug_message = std::string(" malloc potential"); - cuda_status = cudaMalloc(&dev_potential, sizeof(double3) * total_node_count); + cuda_status = cudaMalloc(&dev_potential, sizeof(double) * total_node_count); cuda_status_check(cuda_status, debug_message); debug_message = std::string(" malloc field"); From 1d60c226b6726cdbeb07e81d91ee928c62d1be89 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Fri, 28 Dec 2018 01:28:00 +0700 Subject: [PATCH 45/83] nodes copy fix --- SpatialMeshCu.cu | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 523b2b3..1894eb7 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -230,19 +230,16 @@ void SpatialMeshCu::copy_constants_to_device() { //mesh params const int3 *nodes = &n_nodes; std::string debug_message = std::string(" copy nodes number "); - cuda_status = cudaMemcpyToSymbol(d_n_nodes, (const void*)nodes, sizeof(dim3), - cudaMemcpyHostToDevice); + cuda_status = cudaMemcpyToSymbol(d_n_nodes, (const void*)nodes, sizeof(int3)); cuda_status_check(cuda_status, debug_message); const double3 *volume = &volume_size; debug_message = std::string(" copy volume size "); - cuda_status = cudaMemcpyToSymbol(d_volume_size, (const void*)&volume, sizeof(double3), - cudaMemcpyHostToDevice); + cuda_status = cudaMemcpyToSymbol(d_volume_size, (const void*)&volume, sizeof(double3)); cuda_status_check(cuda_status, debug_message); debug_message = std::string(" copy cell size "); - cuda_status = cudaMemcpyToSymbol(d_cell_size, (const void*)&cell_size, sizeof(double3), - cudaMemcpyHostToDevice); + cuda_status = cudaMemcpyToSymbol(d_cell_size, (const void*)&cell_size, sizeof(double3)); cuda_status_check(cuda_status, debug_message); return; @@ -279,11 +276,11 @@ void SpatialMeshCu::allocate_ongrid_values() { cuda_status_check(cuda_status, debug_message); debug_message = std::string(" malloc charde density"); - cuda_status = cudaMalloc(&dev_charge_density, sizeof(double3) * total_node_count); + cuda_status = cudaMalloc(&dev_charge_density, sizeof(double) * total_node_count); cuda_status_check(cuda_status, debug_message); debug_message = std::string(" malloc potential"); - cuda_status = cudaMalloc(&dev_potential, sizeof(double3) * total_node_count); + cuda_status = cudaMalloc(&dev_potential, sizeof(double) * total_node_count); cuda_status_check(cuda_status, debug_message); debug_message = std::string(" malloc field"); From 9d9a0be751f805cb18617b3e5c5de86431766f5f Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Fri, 28 Dec 2018 01:45:14 +0700 Subject: [PATCH 46/83] fieldSolver constants copy fix --- FieldSolver.cu | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index e567c2c..2244f86 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -127,9 +127,9 @@ __global__ void AssertConvergence(const double* d_phi_current, const double* d_p FieldSolver::FieldSolver(SpatialMeshCu &mesh, Inner_regions_manager &inner_regions) : mesh(mesh) { allocate_next_phi(); - std::cout << "solver memory allocation"; + std::cout << "solver memory allocation "; copy_constants_to_device(); - std::cout << "solver copy constants"; + std::cout << " solver copy constants "; } void FieldSolver::allocate_next_phi() @@ -144,35 +144,28 @@ void FieldSolver::allocate_next_phi() void FieldSolver::copy_constants_to_device() { cudaError_t cuda_status; - cuda_status = cudaMemcpyToSymbol(d_n_nodes, (void*)&mesh.n_nodes, sizeof(dim3), - cudaMemcpyHostToDevice); - cuda_status = cudaMemcpyToSymbol(d_cell_size, (void*)&mesh.cell_size, sizeof(double3), - cudaMemcpyHostToDevice); + cuda_status = cudaMemcpyToSymbol(d_n_nodes, (const void*)&mesh.n_nodes, sizeof(dim3)); + cuda_status = cudaMemcpyToSymbol(d_cell_size, (const void*)&mesh.cell_size, sizeof(double3)); double dxdxdydy = mesh.cell_size.x * mesh.cell_size.x * mesh.cell_size.y * mesh.cell_size.y; - cuda_status = cudaMemcpyToSymbol(dev_dxdxdydy, (void*)&dxdxdydy, sizeof(double), - cudaMemcpyHostToDevice); + cuda_status = cudaMemcpyToSymbol(dev_dxdxdydy, (const void*)&dxdxdydy, sizeof(double)); double dxdxdzdz = mesh.cell_size.x * mesh.cell_size.x * mesh.cell_size.z * mesh.cell_size.z; - cuda_status = cudaMemcpyToSymbol(dev_dxdxdzdz, (void*)&dxdxdzdz, sizeof(double), - cudaMemcpyHostToDevice); + cuda_status = cudaMemcpyToSymbol(dev_dxdxdzdz, (const void*)&dxdxdzdz, sizeof(double)); double dydydzdz = mesh.cell_size.y * mesh.cell_size.y * mesh.cell_size.z * mesh.cell_size.z; - cuda_status = cudaMemcpyToSymbol(dev_dydydzdz, (void*)&dydydzdz, sizeof(double), - cudaMemcpyHostToDevice); + cuda_status = cudaMemcpyToSymbol(dev_dydydzdz, (const void*)&dydydzdz, sizeof(double)); double dxdxdydydzdz = mesh.cell_size.x * mesh.cell_size.x * mesh.cell_size.y * mesh.cell_size.y * mesh.cell_size.z * mesh.cell_size.z; - cuda_status = cudaMemcpyToSymbol(dev_dxdxdydydzdz, (void*)&dxdxdydydzdz, sizeof(double), - cudaMemcpyHostToDevice); + cuda_status = cudaMemcpyToSymbol(dev_dxdxdydydzdz, (const void*)&dxdxdydydzdz, sizeof(double)); int end = mesh.n_nodes.x * mesh.n_nodes.y * mesh.n_nodes.z - 1; - cuda_status = cudaMemcpyToSymbol(dev_end, (void*)&end, sizeof(int), - cudaMemcpyHostToDevice); + cuda_status = cudaMemcpyToSymbol(dev_end, (const void*)&end, sizeof(int)); } void FieldSolver::eval_potential(Inner_regions_manager &inner_regions) From 1362490a24bdc15f3fa41c39fa63fa18396f06d0 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Fri, 28 Dec 2018 02:11:48 +0700 Subject: [PATCH 47/83] new convergence attempt --- FieldSolver.cu | 70 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index 2244f86..3f69073 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -111,17 +111,55 @@ __global__ void EvaluateFields(const double* dev_potential, double3* dev_el_fiel } -__global__ void AssertConvergence(const double* d_phi_current, const double* d_phi_next) { +//__global__ void AssertConvergence(const double* d_phi_current, const double* d_phi_next) { +// double rel_diff; +// double abs_diff; +// double abs_tolerance = 1.0e-5; +// double rel_tolerance = 1.0e-12; +// int idx = GetIdx(); +// abs_diff = fabs(d_phi_next[idx] - d_phi_current[idx]); +// rel_diff = abs_diff / fabs(d_phi_current[idx]); +// bool converged = ((abs_diff <= abs_tolerance) || (rel_diff <= rel_tolerance)); +// +// assert(converged==true); +//} + +template +__global__ void Convergence(const double* d_phi_current, const double* d_phi_next, unsigned int *d_convergence) +{ + __shared__ int w_convegence[nwarps]; + unsigned int laneid = (threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.x * blockDim.y) % warpSize; + unsigned int warpid = (threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.x * blockDim.y) / warpSize; + double rel_diff; double abs_diff; double abs_tolerance = 1.0e-5; double rel_tolerance = 1.0e-12; - int idx = GetIdx(); + + int idx = GetIdxVolume(); + abs_diff = fabs(d_phi_next[idx] - d_phi_current[idx]); rel_diff = abs_diff / fabs(d_phi_current[idx]); - bool converged = ((abs_diff <= abs_tolerance) || (rel_diff <= rel_tolerance)); - assert(converged==true); + unsigned int converged = ((abs_diff <= abs_tolerance) || (rel_diff <= rel_tolerance)); + + converged = __all_sync(FULL_MASK, converged == 1 ); + + if (laneid == 0) { + w_convegence[warpid] = converged; + } + __syncthreads(); + + if (threadIdx.x == 0) { + int b_convergence = 0; +#pragma unroll + for (int i = 0; i>>(mesh.dev_potential,dev_phi_next); + + unsigned int *convergence, *d_convergence;//host,device flags + status = cudaHostAlloc((void **)&convergence, sizeof(unsigned int), cudaHostAllocMapped); + status = cudaHostGetDevicePointer((void **)&d_convergence, convergence, 0); + + int nwarps = 2; + Convergence<<>>(mesh.dev_potential, dev_phi_next, d_convergence); status = cudaDeviceSynchronize(); - if (status == cudaErrorAssert) { - return false; - } - if (status == cudaSuccess) { - return true; - } + //if (status == cudaErrorAssert) { + // return false; + //} + //if (status == cudaSuccess) { + // return true; + //} std::cout << "Cuda error: " << cudaGetErrorString(status) << std::endl; - return false; + return *convergence ==1 ; } From f382c628241141618e604efbfb2286faa0caacd2 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Fri, 28 Dec 2018 02:12:35 +0700 Subject: [PATCH 48/83] resulting check inversion --- FieldSolver.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index 3f69073..51df978 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -288,7 +288,7 @@ bool FieldSolver::iterative_Jacobi_solutions_converged() //} std::cout << "Cuda error: " << cudaGetErrorString(status) << std::endl; - return *convergence ==1 ; + return *convergence == 0 ; } From 6f0dce69641cfcf05b7a610415a747254b9e7f90 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Fri, 28 Dec 2018 02:19:23 +0700 Subject: [PATCH 49/83] const warps number in block for convergence --- FieldSolver.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index 51df978..35dff4b 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -277,7 +277,7 @@ bool FieldSolver::iterative_Jacobi_solutions_converged() status = cudaHostAlloc((void **)&convergence, sizeof(unsigned int), cudaHostAllocMapped); status = cudaHostGetDevicePointer((void **)&d_convergence, convergence, 0); - int nwarps = 2; + const int nwarps = 2; Convergence<<>>(mesh.dev_potential, dev_phi_next, d_convergence); status = cudaDeviceSynchronize(); //if (status == cudaErrorAssert) { From 89a19af314ae8b7d21387907af14b115271517bf Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Fri, 28 Dec 2018 02:25:40 +0700 Subject: [PATCH 50/83] mask, GetIdx usage fix --- FieldSolver.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index 35dff4b..dea73d3 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -2,6 +2,8 @@ +#define FULL_MASK 0xffffffff +//mask for __all_sync used in convergence method __constant__ double3 d_cell_size[1]; __constant__ int3 d_n_nodes[1]; @@ -136,7 +138,7 @@ __global__ void Convergence(const double* d_phi_current, const double* d_phi_nex double abs_tolerance = 1.0e-5; double rel_tolerance = 1.0e-12; - int idx = GetIdxVolume(); + int idx = GetIdx(); abs_diff = fabs(d_phi_next[idx] - d_phi_current[idx]); rel_diff = abs_diff / fabs(d_phi_current[idx]); From cb515b927c32d6f0ba2e819327ad5cd8800a9264 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Fri, 28 Dec 2018 02:27:24 +0700 Subject: [PATCH 51/83] bchanged->b_convergence --- FieldSolver.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index dea73d3..629361e 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -158,7 +158,7 @@ __global__ void Convergence(const double* d_phi_current, const double* d_phi_nex for (int i = 0; i Date: Fri, 28 Dec 2018 02:29:59 +0700 Subject: [PATCH 52/83] debug message hided in commentary --- FieldSolver.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index 629361e..fa06a4c 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -289,7 +289,7 @@ bool FieldSolver::iterative_Jacobi_solutions_converged() // return true; //} - std::cout << "Cuda error: " << cudaGetErrorString(status) << std::endl; + //std::cout << "Cuda error: " << cudaGetErrorString(status) << std::endl; return *convergence == 0 ; } From 429455da371e2fe165f6912ef49c22bcca38938a Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Fri, 28 Dec 2018 02:39:26 +0700 Subject: [PATCH 53/83] debug messages for write hdf --- FieldSolver.cu | 4 ++-- SpatialMeshCu.cu | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index fa06a4c..41bbab4 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -167,9 +167,9 @@ __global__ void Convergence(const double* d_phi_current, const double* d_phi_nex FieldSolver::FieldSolver(SpatialMeshCu &mesh, Inner_regions_manager &inner_regions) : mesh(mesh) { allocate_next_phi(); - std::cout << "solver memory allocation "; + //std::cout << "solver memory allocation "; copy_constants_to_device(); - std::cout << " solver copy constants "; + //std::cout << " solver copy constants "; } void FieldSolver::allocate_next_phi() diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 1894eb7..49df3c7 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -463,6 +463,8 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { double *ny = new double[dims[0]]; double *nz = new double[dims[0]]; + debug_message = std::string(" write hdf5 node_coords"); + double3 *hdf5_tmp_write_data = new double3[dims[0]]; cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_node_coordinates, sizeof(double3) * dims[0], cudaMemcpyDeviceToHost); @@ -473,7 +475,7 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { nz[i] = hdf5_tmp_write_data[i].z; } - dset = H5Dcreate(group_id, "./node_coordinates_x", H5T_IEEE_F64BE, + dset = H5Dcreate(group_id, "./node_coordinates_x ", H5T_IEEE_F64BE, filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); hdf5_status_check(dset); status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, @@ -505,6 +507,8 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { delete[] hdf5_tmp_write_data; } { + debug_message = std::string(" write hdf5 charge_density "); + double *hdf5_tmp_write_data = new double[dims[0]]; cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_charge_density, sizeof(double) * dims[0], cudaMemcpyDeviceToHost); @@ -521,10 +525,13 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { delete[] hdf5_tmp_write_data; } { + + debug_message = std::string(" write hdf5 potential "); double *hdf5_tmp_write_data = new double[dims[0]]; cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_potential, sizeof(double) * dims[0], cudaMemcpyDeviceToHost); cuda_status_check(cuda_status, debug_message); + dset = H5Dcreate(group_id, "./potential", H5T_IEEE_F64BE, filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); hdf5_status_check(dset); @@ -537,6 +544,8 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { } { + debug_message = std::string(" write hdf5 charge_density "); + double *ex = new double[dims[0]]; double *ey = new double[dims[0]]; double *ez = new double[dims[0]]; From b445c7a9d72b4ff0e83685194f64f08bfc600e28 Mon Sep 17 00:00:00 2001 From: noooway Date: Fri, 28 Dec 2018 17:39:15 +0300 Subject: [PATCH 54/83] Fix saving of spat_mesh.electric_field to hdf5 Correct arguments in cudaMemcpy and change in debug_msg --- SpatialMeshCu.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 49df3c7..7483ab9 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -544,13 +544,13 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { } { - debug_message = std::string(" write hdf5 charge_density "); + debug_message = std::string(" write electric field to hdf5 "); double *ex = new double[dims[0]]; double *ey = new double[dims[0]]; double *ez = new double[dims[0]]; double3 *hdf5_tmp_write_data = new double3[dims[0]]; - cuda_status = cudaMemcpy(dev_node_coordinates, hdf5_tmp_write_data, + cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_electric_field, sizeof(double3) * dims[0], cudaMemcpyDeviceToHost); cuda_status_check(cuda_status, debug_message); From 4c7f2787a9580bb878f0f97911bcb4807cf12011 Mon Sep 17 00:00:00 2001 From: noooway Date: Fri, 28 Dec 2018 17:51:15 +0300 Subject: [PATCH 55/83] Remove space in hdf5 group name "./node_coordinates_x " -> "./node_coordinates_x" --- SpatialMeshCu.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 7483ab9..242b992 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -475,7 +475,7 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { nz[i] = hdf5_tmp_write_data[i].z; } - dset = H5Dcreate(group_id, "./node_coordinates_x ", H5T_IEEE_F64BE, + dset = H5Dcreate(group_id, "./node_coordinates_x", H5T_IEEE_F64BE, filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); hdf5_status_check(dset); status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, From 6b1fd396f285fd4312ebea11dae89466c4534588 Mon Sep 17 00:00:00 2001 From: noooway Date: Fri, 28 Dec 2018 18:17:25 +0300 Subject: [PATCH 56/83] In SpatialMeshCu::fill_node_coordinates minor formatting fixes --- SpatialMeshCu.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 242b992..1862b5c 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -295,8 +295,8 @@ void SpatialMeshCu::fill_node_coordinates() { dim3 blocks = GetBlocks(threads); cudaError_t cuda_status; std::string debug_message = std::string(" fill coordinates "); - fill_coordinates <<< blocks,threads>>> (dev_node_coordinates); - cuda_status= cudaDeviceSynchronize(); + fill_coordinates<<>>(dev_node_coordinates); + cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); return; From e2cd61f8137a7187b3d4a1eb77aa239fc7b15ad7 Mon Sep 17 00:00:00 2001 From: noooway Date: Sat, 29 Dec 2018 11:32:38 +0300 Subject: [PATCH 57/83] Correct node coordinates calculation 'd_volume_size' -> 'd_cell_size' in fill_coordinates --- SpatialMeshCu.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 1862b5c..d0f138a 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -29,13 +29,13 @@ __device__ int GetIdxVolume() { __global__ void fill_coordinates(double3* node_coordinates) { - int idx = GetIdxVolume(); + int plain_idx = GetIdxVolume(); - int x = threadIdx.x + blockIdx.x * blockDim.x; - int y = threadIdx.y + blockIdx.y * blockDim.y; - int z = threadIdx.z + blockIdx.z * blockDim.z; - node_coordinates[idx] = make_double3(d_volume_size[0].x * x, - d_volume_size[0].y * y, d_volume_size[0].z * z); //(double)., + int node_x = threadIdx.x + blockIdx.x * blockDim.x; + int node_y = threadIdx.y + blockIdx.y * blockDim.y; + int node_z = threadIdx.z + blockIdx.z * blockDim.z; + node_coordinates[plain_idx] = make_double3(d_cell_size[0].x * node_x, + d_volume_size[0].y * node_y, d_volume_size[0].z * node_z); } __global__ void SetBoundaryConditionOrthoX(double* potential) { From 48375cac4c267a520b95b112673277a04233ca86 Mon Sep 17 00:00:00 2001 From: noooway Date: Sat, 29 Dec 2018 11:37:38 +0300 Subject: [PATCH 58/83] d_volume_size -> d_cell_size in two other dimensions --- SpatialMeshCu.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index d0f138a..00567e1 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -35,7 +35,7 @@ __global__ void fill_coordinates(double3* node_coordinates) { int node_y = threadIdx.y + blockIdx.y * blockDim.y; int node_z = threadIdx.z + blockIdx.z * blockDim.z; node_coordinates[plain_idx] = make_double3(d_cell_size[0].x * node_x, - d_volume_size[0].y * node_y, d_volume_size[0].z * node_z); + d_cell_size[0].y * node_y, d_cell_size[0].z * node_z); } __global__ void SetBoundaryConditionOrthoX(double* potential) { From 0209cc415327d48d4aaa0c3bd293c9c3aee1453e Mon Sep 17 00:00:00 2001 From: noooway Date: Sat, 29 Dec 2018 12:05:29 +0300 Subject: [PATCH 59/83] uniform formatting --- SpatialMeshCu.cu | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 00567e1..0a10c9a 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -257,8 +257,7 @@ void SpatialMeshCu::copy_boundary_to_device(Config &conf) { boundary[NEAR] = conf.boundary_config_part.boundary_phi_near; boundary[FAR] = conf.boundary_config_part.boundary_phi_far; const double *c_boundary = boundary; - cuda_status = cudaMemcpyToSymbol(d_boundary, (const void*)c_boundary, - sizeof(double)*6); + cuda_status = cudaMemcpyToSymbol(d_boundary, (const void*)c_boundary, sizeof(double) * 6); cuda_status_check(cuda_status, debug_message); } @@ -272,7 +271,7 @@ void SpatialMeshCu::allocate_ongrid_values() { std::string debug_message = std::string(" malloc coords"); - cuda_status = cudaMalloc < double3 >(&dev_node_coordinates, sizeof(double3) * total_node_count); + cuda_status = cudaMalloc(&dev_node_coordinates, sizeof(double3) * total_node_count); cuda_status_check(cuda_status, debug_message); debug_message = std::string(" malloc charde density"); @@ -284,7 +283,7 @@ void SpatialMeshCu::allocate_ongrid_values() { cuda_status_check(cuda_status, debug_message); debug_message = std::string(" malloc field"); - cuda_status = cudaMalloc < double3 >(&dev_electric_field, sizeof(double3) * total_node_count); + cuda_status = cudaMalloc(&dev_electric_field, sizeof(double3) * total_node_count); cuda_status_check(cuda_status, debug_message); return; From 04cf2130ea8f6a2e855504b42b50b2ac41d0a250 Mon Sep 17 00:00:00 2001 From: noooway Date: Sat, 29 Dec 2018 12:32:44 +0300 Subject: [PATCH 60/83] Reminder to determine number of threads dynamically --- SpatialMeshCu.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 0a10c9a..0e85589 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -692,6 +692,7 @@ void SpatialMeshCu::cuda_status_check(cudaError_t status, std::string &sender) } dim3 SpatialMeshCu::GetThreads() { + // todo: explicitly determine number of threads from GPU warp size return dim3(4, 4, 4); } From 505d4a64d3d3597fdf85f0e677199fb1c82b2473 Mon Sep 17 00:00:00 2001 From: noooway Date: Sat, 29 Dec 2018 12:51:41 +0300 Subject: [PATCH 61/83] Explicit functions to map between thread, volume and array indexes --- SpatialMeshCu.cu | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 0e85589..498b22f 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -12,30 +12,35 @@ __constant__ double d_boundary[6]; #define FAR 4 #define NEAR 5 -__device__ int GetIdxVolume() { - //int xStepthread = 1; +__device__ int thread_idx_to_array_idx( int3 *d_n_nodes ){ + int xStepThread = 1; int xStepBlock = blockDim.x; - int yStepThread = d_n_nodes[0].x; int yStepBlock = yStepThread * blockDim.y; - int zStepThread = d_n_nodes[0].x * d_n_nodes[0].y; int zStepBlock = zStepThread * blockDim.z; - return threadIdx.x + blockIdx.x * xStepBlock + threadIdx.y * yStepThread - + blockIdx.y * yStepBlock + threadIdx.z * zStepThread - + blockIdx.z * zStepBlock; + return threadIdx.x * xStepThread + blockIdx.x * xStepBlock + + threadIdx.y * yStepThread + blockIdx.y * yStepBlock + + threadIdx.z * zStepThread + blockIdx.z * zStepBlock; } -__global__ void fill_coordinates(double3* node_coordinates) { +__device__ int3 thread_idx_to_volume_idx( int3 *d_n_nodes ){ + // each thread handles single volume node + int3 vol_idx = make_int3( threadIdx.x + blockIdx.x * blockDim.x, + threadIdx.y + blockIdx.y * blockDim.y, + threadIdx.z + blockIdx.z * blockDim.z ); + return vol_idx; +} - int plain_idx = GetIdxVolume(); - int node_x = threadIdx.x + blockIdx.x * blockDim.x; - int node_y = threadIdx.y + blockIdx.y * blockDim.y; - int node_z = threadIdx.z + blockIdx.z * blockDim.z; - node_coordinates[plain_idx] = make_double3(d_cell_size[0].x * node_x, - d_cell_size[0].y * node_y, d_cell_size[0].z * node_z); +__global__ void fill_coordinates(double3* node_coordinates) { + int plain_idx = thread_idx_to_array_idx( d_n_nodes ); + int3 vol_idx = thread_idx_to_volume_idx( d_n_nodes ); + + node_coordinates[plain_idx] = make_double3(d_cell_size[0].x * vol_idx.x, + d_cell_size[0].y * vol_idx.y, + d_cell_size[0].z * vol_idx.z); } __global__ void SetBoundaryConditionOrthoX(double* potential) { From 9e5656b4f3cef85b58424b9ba2467e6a74d3a89c Mon Sep 17 00:00:00 2001 From: noooway Date: Sat, 29 Dec 2018 13:59:33 +0300 Subject: [PATCH 62/83] Attempt to fix boundary conditions --- SpatialMeshCu.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 498b22f..8427c82 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -319,7 +319,7 @@ void SpatialMeshCu::set_boundary_conditions(double* d_potential) { cudaError_t cuda_status; std::string debug_message = std::string(" set boundary "); - dim3 blocks = dim3(n_nodes.y / 4, n_nodes.z / 4, 1); + dim3 blocks = dim3(n_nodes.y / 4, n_nodes.z / 4, 2); SetBoundaryConditionOrthoX <<< blocks, threads >>> (d_potential); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); From b4a2d69af287f2ed660dd013784251827234e01b Mon Sep 17 00:00:00 2001 From: noooway Date: Sat, 29 Dec 2018 14:32:09 +0300 Subject: [PATCH 63/83] Rename vol_idx -> mesh_idx --- SpatialMeshCu.cu | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 8427c82..3755ea0 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -25,22 +25,22 @@ __device__ int thread_idx_to_array_idx( int3 *d_n_nodes ){ threadIdx.z * zStepThread + blockIdx.z * zStepBlock; } -__device__ int3 thread_idx_to_volume_idx( int3 *d_n_nodes ){ +__device__ int3 thread_idx_to_mesh_idx( int3 *d_n_nodes ){ // each thread handles single volume node - int3 vol_idx = make_int3( threadIdx.x + blockIdx.x * blockDim.x, - threadIdx.y + blockIdx.y * blockDim.y, - threadIdx.z + blockIdx.z * blockDim.z ); - return vol_idx; + int3 mesh_idx = make_int3( threadIdx.x + blockIdx.x * blockDim.x, + threadIdx.y + blockIdx.y * blockDim.y, + threadIdx.z + blockIdx.z * blockDim.z ); + return mesh_idx; } __global__ void fill_coordinates(double3* node_coordinates) { int plain_idx = thread_idx_to_array_idx( d_n_nodes ); - int3 vol_idx = thread_idx_to_volume_idx( d_n_nodes ); + int3 mesh_idx = thread_idx_to_mesh_idx( d_n_nodes ); - node_coordinates[plain_idx] = make_double3(d_cell_size[0].x * vol_idx.x, - d_cell_size[0].y * vol_idx.y, - d_cell_size[0].z * vol_idx.z); + node_coordinates[plain_idx] = make_double3(d_cell_size[0].x * mesh_idx.x, + d_cell_size[0].y * mesh_idx.y, + d_cell_size[0].z * mesh_idx.z); } __global__ void SetBoundaryConditionOrthoX(double* potential) { From 46ab2acb0695b3c73c349667669b761d4de51a1d Mon Sep 17 00:00:00 2001 From: noooway Date: Sat, 29 Dec 2018 15:15:19 +0300 Subject: [PATCH 64/83] Rewrite SetBoundaryConditionOrthoX --- SpatialMeshCu.cu | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 3755ea0..5c8f374 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -43,21 +43,16 @@ __global__ void fill_coordinates(double3* node_coordinates) { d_cell_size[0].z * mesh_idx.z); } -__global__ void SetBoundaryConditionOrthoX(double* potential) { - int xIdx = blockIdx.z * (d_n_nodes[0].x - 1); //0 or nodes.x-1 - - int yStepThread = d_n_nodes[0].x; //x= - int yStepBlock = d_n_nodes[0].x * blockDim.x; - - int zStepThread = d_n_nodes[0].x * d_n_nodes[0].y; - int zStepBlock = zStepThread * blockDim.y; - - int idx = xIdx + threadIdx.x * yStepThread + blockIdx.x * yStepBlock - + threadIdx.y * zStepThread + blockIdx.y * zStepBlock; - - potential[idx] = ((double)(1 - blockIdx.z)) * d_boundary[LEFT] - + (blockIdx.z * d_boundary[RIGHT]); +__global__ void SetBoundaryConditionsX(double* potential, int3 *d_n_nodes){ + int mesh_x = threadIdx.x * (d_n_nodes[0].x - 1); + int mesh_y = threadIdx.y + blockIdx.y * blockDim.y; + int mesh_z = threadIdx.z + blockIdx.z * blockDim.z; + + int plain_idx = mesh_x + + mesh_y * d_n_nodes[0].x + + mesh_z * d_n_nodes[0].x * d_n_nodes[0].y; + potential[plain_idx] = threadIdx.x * d_boundary[LEFT] + (1.0 - threadIdx.x) * d_boundary[RIGHT]; } __global__ void SetBoundaryConditionOrthoY(double* potential) { @@ -315,15 +310,18 @@ void SpatialMeshCu::clear_old_density_values() { } void SpatialMeshCu::set_boundary_conditions(double* d_potential) { - dim3 threads = dim3(4, 4, 2); + dim3 threads, blocks; cudaError_t cuda_status; std::string debug_message = std::string(" set boundary "); - dim3 blocks = dim3(n_nodes.y / 4, n_nodes.z / 4, 2); - SetBoundaryConditionOrthoX <<< blocks, threads >>> (d_potential); + // todo: no magic numbers + threads = dim3(2, 4, 4); + blocks = dim3(2, n_nodes.y / 4, n_nodes.z / 4); + SetBoundaryConditionsX<<>>(d_potential, d_n_nodes); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); - + + threads = dim3(4, 4, 2); blocks = dim3(n_nodes.x / 4, n_nodes.z / 4, 2); SetBoundaryConditionOrthoY <<< blocks, threads >>> (d_potential); cuda_status = cudaDeviceSynchronize(); From 802b392b9571de6e5fb703b0fed02e3e1f605d34 Mon Sep 17 00:00:00 2001 From: noooway Date: Sat, 29 Dec 2018 15:39:06 +0300 Subject: [PATCH 65/83] Fix n of blocks in set_boundary_conditions --- SpatialMeshCu.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 5c8f374..50fe474 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -43,7 +43,8 @@ __global__ void fill_coordinates(double3* node_coordinates) { d_cell_size[0].z * mesh_idx.z); } -__global__ void SetBoundaryConditionsX(double* potential, int3 *d_n_nodes){ +__global__ void SetBoundaryConditionsX(double* potential, int3 *d_n_nodes){ + // right: threadIdx.x = 0, left: threadIdx.x = 1 int mesh_x = threadIdx.x * (d_n_nodes[0].x - 1); int mesh_y = threadIdx.y + blockIdx.y * blockDim.y; int mesh_z = threadIdx.z + blockIdx.z * blockDim.z; @@ -316,11 +317,12 @@ void SpatialMeshCu::set_boundary_conditions(double* d_potential) { // todo: no magic numbers threads = dim3(2, 4, 4); - blocks = dim3(2, n_nodes.y / 4, n_nodes.z / 4); + blocks = dim3(1, n_nodes.y / 4, n_nodes.z / 4); SetBoundaryConditionsX<<>>(d_potential, d_n_nodes); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); + // todo: simplify threads = dim3(4, 4, 2); blocks = dim3(n_nodes.x / 4, n_nodes.z / 4, 2); SetBoundaryConditionOrthoY <<< blocks, threads >>> (d_potential); From 765ccf978e24d8752f927e23c34ac992eace1a3a Mon Sep 17 00:00:00 2001 From: noooway Date: Sat, 29 Dec 2018 15:49:56 +0300 Subject: [PATCH 66/83] Use blockIdx instead of threadIdx to determine boundary side --- SpatialMeshCu.cu | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 50fe474..f2ec89f 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -44,8 +44,8 @@ __global__ void fill_coordinates(double3* node_coordinates) { } __global__ void SetBoundaryConditionsX(double* potential, int3 *d_n_nodes){ - // right: threadIdx.x = 0, left: threadIdx.x = 1 - int mesh_x = threadIdx.x * (d_n_nodes[0].x - 1); + // blockIdx.x = 0 or 1; 0 - right boundary, 1 - left boundary + int mesh_x = blockIdx.x * (d_n_nodes[0].x - 1); int mesh_y = threadIdx.y + blockIdx.y * blockDim.y; int mesh_z = threadIdx.z + blockIdx.z * blockDim.z; @@ -53,7 +53,7 @@ __global__ void SetBoundaryConditionsX(double* potential, int3 *d_n_nodes){ mesh_y * d_n_nodes[0].x + mesh_z * d_n_nodes[0].x * d_n_nodes[0].y; - potential[plain_idx] = threadIdx.x * d_boundary[LEFT] + (1.0 - threadIdx.x) * d_boundary[RIGHT]; + potential[plain_idx] = blockIdx.x * d_boundary[LEFT] + (1.0 - blockIdx.x) * d_boundary[RIGHT]; } __global__ void SetBoundaryConditionOrthoY(double* potential) { @@ -316,8 +316,8 @@ void SpatialMeshCu::set_boundary_conditions(double* d_potential) { std::string debug_message = std::string(" set boundary "); // todo: no magic numbers - threads = dim3(2, 4, 4); - blocks = dim3(1, n_nodes.y / 4, n_nodes.z / 4); + threads = dim3(1, 4, 4); + blocks = dim3(2, n_nodes.y / 4, n_nodes.z / 4); SetBoundaryConditionsX<<>>(d_potential, d_n_nodes); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); From a0d122ec442f860f10ce103fdce6ec73e1463cd4 Mon Sep 17 00:00:00 2001 From: noooway Date: Sat, 29 Dec 2018 15:59:16 +0300 Subject: [PATCH 67/83] Remove d_n_nodes from SetBoundaryConditionsX argument --- SpatialMeshCu.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index f2ec89f..f86be14 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -43,7 +43,7 @@ __global__ void fill_coordinates(double3* node_coordinates) { d_cell_size[0].z * mesh_idx.z); } -__global__ void SetBoundaryConditionsX(double* potential, int3 *d_n_nodes){ +__global__ void SetBoundaryConditionsX(double* potential){ // blockIdx.x = 0 or 1; 0 - right boundary, 1 - left boundary int mesh_x = blockIdx.x * (d_n_nodes[0].x - 1); int mesh_y = threadIdx.y + blockIdx.y * blockDim.y; @@ -318,7 +318,7 @@ void SpatialMeshCu::set_boundary_conditions(double* d_potential) { // todo: no magic numbers threads = dim3(1, 4, 4); blocks = dim3(2, n_nodes.y / 4, n_nodes.z / 4); - SetBoundaryConditionsX<<>>(d_potential, d_n_nodes); + SetBoundaryConditionsX<<>>(d_potential); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); From 7be0487ed587b50df496133ac61a3967d0c1a2be Mon Sep 17 00:00:00 2001 From: noooway Date: Sun, 30 Dec 2018 11:15:00 +0300 Subject: [PATCH 68/83] Change Makefile to work in GoogleColab --- Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 2351dae..fb6de42 100644 --- a/Makefile +++ b/Makefile @@ -4,21 +4,21 @@ SHELL:=/bin/bash -O extglob ##### Compilers #CC=clang++ CC=g++ -NVCC=/usr/local/cuda10/bin/nvcc +NVCC=nvcc -HDF5FLAGS=-I/usr/local/hdf5/include -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -fstack-protector-strong -Wformat -Werror=format-security +HDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -fstack-protector-strong -Wformat -Werror=format-security WARNINGS=-Wall CFLAGS = ${HDF5FLAGS} -O2 -std=c++11 ${WARNINGS} LDFLAGS = -CUDAINCLUDES= -I/usr/local/cuda10/include +CUDAINCLUDES= -I/usr/local/cuda/include CUDAFLAGS= ${CUDAINCLUDES} -std=c++11 -arch=sm_30 ### Libraries COMMONLIBS=-lm BOOSTLIBS=-lboost_program_options -HDF5LIBS=-L/usr/local/hdf5/lib -lhdf5_hl -lhdf5 -Wl,-z,relro -lpthread -lz -ldl -lm -Wl,-rpath -Wl,/usr/local/hdf5/lib -CUDALIBS=-L/usr/local/cuda10/lib64/ -lcudart +HDF5LIBS=-L/usr/lib/x86_64-linux-gnu/hdf5/serial -lhdf5_hl -lhdf5 -Wl,-z,relro -lpthread -lz -ldl -lm -Wl,-rpath -Wl,/usr/local/hdf5/lib +CUDALIBS=-L/usr/local/cuda/lib64/ -lcudart LIBS=${COMMONLIBS} ${BOOSTLIBS} ${HDF5LIBS} ### Sources and executable From 15db6fcf01b9eccd7ccf17ecd8496e96a7af20ca Mon Sep 17 00:00:00 2001 From: noooway Date: Sun, 30 Dec 2018 11:49:43 +0300 Subject: [PATCH 69/83] Attemp to simplify Makefile --- Makefile | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index fb6de42..4c86c45 100644 --- a/Makefile +++ b/Makefile @@ -7,19 +7,18 @@ CC=g++ NVCC=nvcc HDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -fstack-protector-strong -Wformat -Werror=format-security +CUDAINCLUDES=-I/usr/local/cuda/include WARNINGS=-Wall -CFLAGS = ${HDF5FLAGS} -O2 -std=c++11 ${WARNINGS} +CFLAGS = ${HDF5FLAGS} ${CUDAINCLUDES} -O2 -std=c++11 ${WARNINGS} +NVCCFLAGS= ${HDF5FLAGS} ${CUDAINCLUDES} -O2 -std=c++11 -arch=sm_30 ${WARNINGS} LDFLAGS = -CUDAINCLUDES= -I/usr/local/cuda/include -CUDAFLAGS= ${CUDAINCLUDES} -std=c++11 -arch=sm_30 - ### Libraries COMMONLIBS=-lm BOOSTLIBS=-lboost_program_options -HDF5LIBS=-L/usr/lib/x86_64-linux-gnu/hdf5/serial -lhdf5_hl -lhdf5 -Wl,-z,relro -lpthread -lz -ldl -lm -Wl,-rpath -Wl,/usr/local/hdf5/lib +HDF5LIBS=-L/usr/lib/x86_64-linux-gnu/hdf5/serial -lhdf5_hl -lhdf5 -Wl,-z,relro -lpthread -lz -ldl -lm -Wl,-rpath -Wl,/usr/lib/x86_64-linux-gnu/hdf5/serial CUDALIBS=-L/usr/local/cuda/lib64/ -lcudart -LIBS=${COMMONLIBS} ${BOOSTLIBS} ${HDF5LIBS} +LIBS=${COMMONLIBS} ${BOOSTLIBS} ${HDF5LIBS} ${CUDALIBS} ### Sources and executable CPPSOURCES=$(wildcard *.cpp) @@ -28,7 +27,6 @@ CUSOURCES=$(wildcard *.cu) CUHEADERS=$(wildcard *.cuh) CUOBJECTS=$(CUSOURCES:%.cu=%.o) - OBJECTS=$(CPPSOURCES:%.cpp=%.o) EXECUTABLE=ef.out @@ -37,12 +35,12 @@ TINYEXPR=./lib/tinyexpr TINYEXPR_OBJ=./lib/tinyexpr/tinyexpr.o SUBDIRS=doc -$(EXECUTABLE): $(OBJECTS) $(TINYEXPR) $(CUOBJECTS) - $(CC) $(LDFLAGS) $(OBJECTS) $(TINYEXPR_OBJ) $(CUOBJECTS) -o $@ $(LIBS) $(CUDALIBS) -$(CUOBJECTS):%.o:%.cu $(CUHEADERS) - $(NVCC) $(CUDAFLAGS) -I/usr/local/hdf5/include -c $< -o $@ -$(OBJECTS):%.o:%.cpp $(CPPHEADERS) - $(CC) $(CFLAGS) $(CUDAINCLUDES) -c $< -o $@ +$(EXECUTABLE): $(OBJECTS) $(CUOBJECTS) $(TINYEXPR) + $(CC) $(LDFLAGS) $(OBJECTS) $(CUOBJECTS) $(TINYEXPR_OBJ) -o $@ $(LIBS) +$(CUOBJECTS):%.o:%.cu + $(NVCC) $(NVCCFLAGS) -c $< -o $@ +$(OBJECTS):%.o:%.cpp + $(CC) $(CFLAGS) -c $< -o $@ .PHONY: allsubdirs $(SUBDIRS) $(TINYEXPR) clean cleansubdirs cleanall From 638c2ae74cf5e0cc0e951881d083b89dd2de765a Mon Sep 17 00:00:00 2001 From: noooway Date: Sun, 30 Dec 2018 11:58:05 +0300 Subject: [PATCH 70/83] Fix include guards for SpatialMeshCu.cuh --- SpatialMeshCu.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index 686fe1b..155a8f4 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -1,5 +1,5 @@ -#ifndef _SPATIAL_MESH_H_ -#define _SPATIAL_MESH_H_ +#ifndef _SPATIAL_MESH_CUH_ +#define _SPATIAL_MESH_CUH_ #include "cuda_runtime.h" #include "config.h" @@ -72,4 +72,4 @@ private: void cuda_status_check(cudaError_t status, std::string &sender); }; -#endif /* _SPATIAL_MESH_H_ */ +#endif /* _SPATIAL_MESH_CUH_ */ From d44be2b323dcd1e3ed14a41d2af1f0e5f5fe7cf7 Mon Sep 17 00:00:00 2001 From: noooway Date: Sun, 30 Dec 2018 12:04:11 +0300 Subject: [PATCH 71/83] Try to distinguish between system and local includes --- FieldSolver.cuh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/FieldSolver.cuh b/FieldSolver.cuh index 4f19c1e..4f0bff7 100644 --- a/FieldSolver.cuh +++ b/FieldSolver.cuh @@ -3,12 +3,12 @@ #include #include +#include +#include +#include +#include #include "SpatialMeshCu.cuh" #include "inner_region.h" -#include "cuda_runtime.h" -#include -#include "device_launch_parameters.h" -#include "math_constants.h" class FieldSolver { public: From 8128935559e3959c44512edf3565e18143897c2e Mon Sep 17 00:00:00 2001 From: noooway Date: Sun, 30 Dec 2018 12:10:41 +0300 Subject: [PATCH 72/83] Remove -fstack-protector-strong option for NVCC --- Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4c86c45..8270e70 100644 --- a/Makefile +++ b/Makefile @@ -7,10 +7,12 @@ CC=g++ NVCC=nvcc HDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -fstack-protector-strong -Wformat -Werror=format-security +NVCCHDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -Wformat -Werror=format-security CUDAINCLUDES=-I/usr/local/cuda/include + WARNINGS=-Wall CFLAGS = ${HDF5FLAGS} ${CUDAINCLUDES} -O2 -std=c++11 ${WARNINGS} -NVCCFLAGS= ${HDF5FLAGS} ${CUDAINCLUDES} -O2 -std=c++11 -arch=sm_30 ${WARNINGS} +NVCCFLAGS= ${NVCCHDF5FLAGS} ${CUDAINCLUDES} -O2 -std=c++11 -arch=sm_30 ${WARNINGS} LDFLAGS = ### Libraries From 9f14719fa8d99c3c1d591dcd27d412143db15067 Mon Sep 17 00:00:00 2001 From: noooway Date: Sun, 30 Dec 2018 12:13:49 +0300 Subject: [PATCH 73/83] Remove -Wformat option from nvcc --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8270e70..dd9789f 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ CC=g++ NVCC=nvcc HDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -fstack-protector-strong -Wformat -Werror=format-security -NVCCHDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -Wformat -Werror=format-security +NVCCHDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -Werror=format-security CUDAINCLUDES=-I/usr/local/cuda/include WARNINGS=-Wall From 1c4b4122dd1b4e1d91ee00ab42d49201d3de47a3 Mon Sep 17 00:00:00 2001 From: noooway Date: Sun, 30 Dec 2018 12:15:31 +0300 Subject: [PATCH 74/83] Remove -Werror=format-security from nvcc --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dd9789f..d0cf40d 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ CC=g++ NVCC=nvcc HDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -fstack-protector-strong -Wformat -Werror=format-security -NVCCHDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -Werror=format-security +NVCCHDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g CUDAINCLUDES=-I/usr/local/cuda/include WARNINGS=-Wall From 2b34b0c5f1113ced3fe36dd2cefd1d0a97e95bee Mon Sep 17 00:00:00 2001 From: noooway Date: Sun, 30 Dec 2018 12:17:39 +0300 Subject: [PATCH 75/83] Remove -Wall from nvcc --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d0cf40d..a65d1c1 100644 --- a/Makefile +++ b/Makefile @@ -11,8 +11,9 @@ NVCCHDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOUR CUDAINCLUDES=-I/usr/local/cuda/include WARNINGS=-Wall +NVCCWARNINGS= CFLAGS = ${HDF5FLAGS} ${CUDAINCLUDES} -O2 -std=c++11 ${WARNINGS} -NVCCFLAGS= ${NVCCHDF5FLAGS} ${CUDAINCLUDES} -O2 -std=c++11 -arch=sm_30 ${WARNINGS} +NVCCFLAGS= ${NVCCHDF5FLAGS} ${CUDAINCLUDES} -O2 -std=c++11 -arch=sm_30 ${NVCCWARNINGS} LDFLAGS = ### Libraries From 082377e161d9da9b3d428c0a3787412f55a7cb72 Mon Sep 17 00:00:00 2001 From: noooway Date: Sun, 30 Dec 2018 12:21:09 +0300 Subject: [PATCH 76/83] Distinguish between system and local includes --- SpatialMeshCu.cuh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh index 155a8f4..da6534c 100644 --- a/SpatialMeshCu.cuh +++ b/SpatialMeshCu.cuh @@ -1,12 +1,13 @@ #ifndef _SPATIAL_MESH_CUH_ #define _SPATIAL_MESH_CUH_ -#include "cuda_runtime.h" -#include "config.h" #include -#include "hdf5.h" -#include "hdf5_hl.h" -#include "device_launch_parameters.h" +#include +#include +#include +#include +#include "config.h" + class SpatialMeshCu { public: From 766d6a122ab4c0d84643b22601ba6b67cf1d9237 Mon Sep 17 00:00:00 2001 From: noooway Date: Sun, 30 Dec 2018 12:53:50 +0300 Subject: [PATCH 77/83] Remove c_boundary in copy_boundary_to_device --- SpatialMeshCu.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index f86be14..f748f92 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -257,8 +257,7 @@ void SpatialMeshCu::copy_boundary_to_device(Config &conf) { boundary[BOTTOM] = conf.boundary_config_part.boundary_phi_bottom; boundary[NEAR] = conf.boundary_config_part.boundary_phi_near; boundary[FAR] = conf.boundary_config_part.boundary_phi_far; - const double *c_boundary = boundary; - cuda_status = cudaMemcpyToSymbol(d_boundary, (const void*)c_boundary, sizeof(double) * 6); + cuda_status = cudaMemcpyToSymbol(d_boundary, boundary, 6 * sizeof(double)); cuda_status_check(cuda_status, debug_message); } From 52e6c693faa8bfc3e143c9a3d2d52c9a58850949 Mon Sep 17 00:00:00 2001 From: noooway Date: Sun, 30 Dec 2018 13:10:27 +0300 Subject: [PATCH 78/83] Attempt to simplify boundary conditions setting --- SpatialMeshCu.cu | 61 +++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index f748f92..208bea3 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -44,7 +44,7 @@ __global__ void fill_coordinates(double3* node_coordinates) { } __global__ void SetBoundaryConditionsX(double* potential){ - // blockIdx.x = 0 or 1; 0 - right boundary, 1 - left boundary + // blockIdx.x is expected to be 0 or 1; 0 - right boundary, 1 - left boundary int mesh_x = blockIdx.x * (d_n_nodes[0].x - 1); int mesh_y = threadIdx.y + blockIdx.y * blockDim.y; int mesh_z = threadIdx.z + blockIdx.z * blockDim.z; @@ -56,38 +56,31 @@ __global__ void SetBoundaryConditionsX(double* potential){ potential[plain_idx] = blockIdx.x * d_boundary[LEFT] + (1.0 - blockIdx.x) * d_boundary[RIGHT]; } -__global__ void SetBoundaryConditionOrthoY(double* potential) { - int yIdx = blockIdx.z * d_n_nodes[0].x * (d_n_nodes[0].y - 1); //0 or nodes.x-1 - - int xStepThread = 1; //x= - int xStepBlock = blockDim.x; - - int zStepThread = d_n_nodes[0].x * d_n_nodes[0].y; - int zStepBlock = zStepThread * blockDim.y; - - int idx = yIdx + threadIdx.x * xStepThread + blockIdx.x * xStepBlock - + threadIdx.y * zStepThread + blockIdx.y * zStepBlock; - - potential[idx] = ((double)(1 - blockIdx.z)) * d_boundary[BOTTOM] - + (blockIdx.z * d_boundary[TOP]); +__global__ void SetBoundaryConditionsY(double* potential){ + // blockIdx.y is expected to be 0 or 1; 0 - bottom boundary, 1 - top boundary + int mesh_x = threadIdx.x + blockIdx.x * blockDim.x; + int mesh_y = blockIdx.y * (d_n_nodes[0].y - 1); + int mesh_z = threadIdx.z + blockIdx.z * blockDim.z; + + int plain_idx = mesh_x + + mesh_y * d_n_nodes[0].x + + mesh_z * d_n_nodes[0].x * d_n_nodes[0].y; + potential[plain_idx] = blockIdx.y * d_boundary[TOP] + (1.0 - blockIdx.y) * d_boundary[BOTTOM]; } -__global__ void SetBoundaryConditionOrthoZ(double* potential) { - int zIdx = blockIdx.z - * (d_n_nodes[0].x * d_n_nodes[0].y * (d_n_nodes[0].z - 1)); //0 or nodes.x-1 - int xStepThread = 1; //x= - int xStepBlock = blockDim.x; - - int yStepThread = d_n_nodes[0].x; - int yStepBlock = yStepThread * blockDim.y; - - int idx = zIdx + threadIdx.x * xStepThread + blockIdx.x * xStepBlock - + threadIdx.y * yStepThread + blockIdx.y * yStepBlock; - potential[idx] = ((double)(1 - blockIdx.z)) * d_boundary[NEAR] - + (blockIdx.z * d_boundary[FAR]); +__global__ void SetBoundaryConditionsZ(double* potential){ + // blockIdx.z is expected to be 0 or 1; 0 - near boundary, 1 - far boundary + int mesh_x = threadIdx.x + blockIdx.x * blockDim.x; + int mesh_y = threadIdx.y + blockIdx.y * blockDim.y; + int mesh_z = blockIdx.z * (d_n_nodes[0].z - 1); + + int plain_idx = mesh_x + + mesh_y * d_n_nodes[0].x + + mesh_z * d_n_nodes[0].x * d_n_nodes[0].y; + potential[plain_idx] = blockIdx.z * d_boundary[FAR] + (1.0 - blockIdx.z) * d_boundary[NEAR]; } SpatialMeshCu::SpatialMeshCu(Config &conf) { @@ -321,18 +314,18 @@ void SpatialMeshCu::set_boundary_conditions(double* d_potential) { cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); - // todo: simplify - threads = dim3(4, 4, 2); - blocks = dim3(n_nodes.x / 4, n_nodes.z / 4, 2); - SetBoundaryConditionOrthoY <<< blocks, threads >>> (d_potential); + threads = dim3(4, 1, 4); + blocks = dim3(n_nodes.x / 4, 2, n_nodes.z / 4); + SetBoundaryConditionsY<<>>(d_potential); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); + threads = dim3(4, 4, 1); blocks = dim3(n_nodes.x / 4, n_nodes.y / 4, 2); - SetBoundaryConditionOrthoZ <<< blocks, threads >>> (d_potential); + SetBoundaryConditionsZ<<>>(d_potential); cuda_status = cudaDeviceSynchronize(); cuda_status_check(cuda_status, debug_message); - + return; } From e47f92b75ba57a1f0714f7c84d02346a2b526156 Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Sun, 30 Dec 2018 17:14:43 +0700 Subject: [PATCH 79/83] wrong arguments order fix --- SpatialMeshCu.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 49df3c7..7038e38 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -550,7 +550,7 @@ void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { double *ey = new double[dims[0]]; double *ez = new double[dims[0]]; double3 *hdf5_tmp_write_data = new double3[dims[0]]; - cuda_status = cudaMemcpy(dev_node_coordinates, hdf5_tmp_write_data, + cuda_status = cudaMemcpy( hdf5_tmp_write_data, dev_node_coordinates, sizeof(double3) * dims[0], cudaMemcpyDeviceToHost); cuda_status_check(cuda_status, debug_message); From a8113d174916c2ed0885acbbdfda533b8d47724c Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Mon, 31 Dec 2018 17:33:58 +0700 Subject: [PATCH 80/83] PhiNext computation Signs --- FieldSolver.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index 41bbab4..8e66b51 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -48,17 +48,17 @@ __global__ void ComputePhiNext(const double* d_phi_current, const double* d_char double denom = (double)2 * (dev_dxdxdydy[0] + dev_dxdxdzdz[0] + dev_dydydzdz[0]); - prev_neighbour_idx = max(idx + offset_Dx, 0); + prev_neighbour_idx = max(idx - offset_Dx, 0); next_neighbour_idx = min(idx + offset_Dx, dev_end[0]); d_phi_next[idx] = (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx]) * dev_dydydzdz[0]; - prev_neighbour_idx = max(idx + offset_Dy, 0); + prev_neighbour_idx = max(idx - offset_Dy, 0); next_neighbour_idx = min(idx + offset_Dy, dev_end[0]); d_phi_next[idx] += (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx]) * dev_dxdxdzdz[0]; - prev_neighbour_idx = max(idx + offset_Dz, 0); + prev_neighbour_idx = max(idx - offset_Dz, 0); next_neighbour_idx = min(idx + offset_Dz, dev_end[0]); d_phi_next[idx] += (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx]) * dev_dxdxdydy[0]; From f0141fabb4347f24be9628038853b613dd96e48d Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Mon, 31 Dec 2018 18:01:35 +0700 Subject: [PATCH 81/83] explicit double boundary conditions --- SpatialMeshCu.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index 208bea3..c2e26b8 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -53,7 +53,7 @@ __global__ void SetBoundaryConditionsX(double* potential){ mesh_y * d_n_nodes[0].x + mesh_z * d_n_nodes[0].x * d_n_nodes[0].y; - potential[plain_idx] = blockIdx.x * d_boundary[LEFT] + (1.0 - blockIdx.x) * d_boundary[RIGHT]; + potential[plain_idx] = (double)blockIdx.x * d_boundary[LEFT] + (1.0 - blockIdx.x) * d_boundary[RIGHT]; } __global__ void SetBoundaryConditionsY(double* potential){ @@ -66,7 +66,7 @@ __global__ void SetBoundaryConditionsY(double* potential){ mesh_y * d_n_nodes[0].x + mesh_z * d_n_nodes[0].x * d_n_nodes[0].y; - potential[plain_idx] = blockIdx.y * d_boundary[TOP] + (1.0 - blockIdx.y) * d_boundary[BOTTOM]; + potential[plain_idx] = (double)blockIdx.y * d_boundary[TOP] + (1.0 - blockIdx.y) * d_boundary[BOTTOM]; } @@ -80,7 +80,7 @@ __global__ void SetBoundaryConditionsZ(double* potential){ mesh_y * d_n_nodes[0].x + mesh_z * d_n_nodes[0].x * d_n_nodes[0].y; - potential[plain_idx] = blockIdx.z * d_boundary[FAR] + (1.0 - blockIdx.z) * d_boundary[NEAR]; + potential[plain_idx] = (double)blockIdx.z * d_boundary[FAR] + (1.0 - blockIdx.z) * d_boundary[NEAR]; } SpatialMeshCu::SpatialMeshCu(Config &conf) { From c83d23e9a79ec4f8af44f662b85dd9e113be1a4a Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Mon, 31 Dec 2018 18:12:49 +0700 Subject: [PATCH 82/83] 1 jacobi iteration --- FieldSolver.cu | 4 ++-- SpatialMeshCu.cu | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index 8e66b51..cc0b711 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -46,7 +46,7 @@ __global__ void ComputePhiNext(const double* d_phi_current, const double* d_char int prev_neighbour_idx; int next_neighbour_idx; - double denom = (double)2 * (dev_dxdxdydy[0] + dev_dxdxdzdz[0] + dev_dydydzdz[0]); + double denom = 2.0 * (dev_dxdxdydy[0] + dev_dxdxdzdz[0] + dev_dydydzdz[0]); prev_neighbour_idx = max(idx - offset_Dx, 0); next_neighbour_idx = min(idx + offset_Dx, dev_end[0]); @@ -215,7 +215,7 @@ void FieldSolver::eval_potential(Inner_regions_manager &inner_regions) void FieldSolver::solve_poisson_eqn_Jacobi(Inner_regions_manager &inner_regions) { - max_Jacobi_iterations = 150; + max_Jacobi_iterations = 1; int iter; for (iter = 0; iter < max_Jacobi_iterations; ++iter) { diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu index c2e26b8..c2764bc 100644 --- a/SpatialMeshCu.cu +++ b/SpatialMeshCu.cu @@ -80,7 +80,7 @@ __global__ void SetBoundaryConditionsZ(double* potential){ mesh_y * d_n_nodes[0].x + mesh_z * d_n_nodes[0].x * d_n_nodes[0].y; - potential[plain_idx] = (double)blockIdx.z * d_boundary[FAR] + (1.0 - blockIdx.z) * d_boundary[NEAR]; + potential[plain_idx] = ((double)blockIdx.z) * d_boundary[FAR] + (1.0 - blockIdx.z) * d_boundary[NEAR]; } SpatialMeshCu::SpatialMeshCu(Config &conf) { From c447ea8487421338bf1b8c387da980b734f20f3d Mon Sep 17 00:00:00 2001 From: Zuev Mikhail Date: Mon, 31 Dec 2018 18:21:06 +0700 Subject: [PATCH 83/83] jacobi iter 150 again --- FieldSolver.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FieldSolver.cu b/FieldSolver.cu index cc0b711..bbab559 100644 --- a/FieldSolver.cu +++ b/FieldSolver.cu @@ -215,7 +215,7 @@ void FieldSolver::eval_potential(Inner_regions_manager &inner_regions) void FieldSolver::solve_poisson_eqn_Jacobi(Inner_regions_manager &inner_regions) { - max_Jacobi_iterations = 1; + max_Jacobi_iterations = 150; int iter; for (iter = 0; iter < max_Jacobi_iterations; ++iter) {