diff --git a/FieldSolver.cu b/FieldSolver.cu new file mode 100644 index 0000000..bbab559 --- /dev/null +++ b/FieldSolver.cu @@ -0,0 +1,328 @@ +#include "FieldSolver.cuh" + + + +#define FULL_MASK 0xffffffff +//mask for __all_sync used in convergence method + +__constant__ double3 d_cell_size[1]; +__constant__ int3 d_n_nodes[1]; + +__constant__ double dev_dxdxdydy[1]; +__constant__ double dev_dxdxdzdz[1]; +__constant__ double dev_dydydzdz[1]; +__constant__ double dev_dxdxdydydzdz[1]; + +__constant__ int dev_end[1]; + +__device__ int GetIdx() { + //int xStepthread = 1; + int xStepBlock = blockDim.x; + int yStepThread = d_n_nodes[0].x; + int yStepBlock = yStepThread * blockDim.y; + int zStepThread = d_n_nodes[0].x * d_n_nodes[0].y; + int zStepBlock = zStepThread * blockDim.z; + return (threadIdx.x + blockIdx.x * xStepBlock) + + (threadIdx.y * yStepThread + blockIdx.y * yStepBlock) + + (threadIdx.z * zStepThread + blockIdx.z * zStepBlock); +} + +__device__ double GradientComponent(double phi1, double phi2, double cell_side_size) { + return ((phi2 - phi1) / cell_side_size); +} + +__global__ void SetPhiNextAsCurrent(double* d_phi_current, double* d_phi_next) { + int idx = GetIdx(); + d_phi_current[idx] = d_phi_next[idx]; +} + +__global__ void ComputePhiNext(const double* d_phi_current, const double* d_charge, double* d_phi_next) { + int idx = GetIdx(); + int offset_Dx = 1; + //todo rewrite usind device n_nodes.x/y/z + int offset_Dy = d_n_nodes[0].x; + int offset_Dz = d_n_nodes[0].x * d_n_nodes[0].y; + + int prev_neighbour_idx; + int next_neighbour_idx; + + double denom = 2.0 * (dev_dxdxdydy[0] + dev_dxdxdzdz[0] + dev_dydydzdz[0]); + + prev_neighbour_idx = max(idx - offset_Dx, 0); + next_neighbour_idx = min(idx + offset_Dx, dev_end[0]); + d_phi_next[idx] = + (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx]) * dev_dydydzdz[0]; + + prev_neighbour_idx = max(idx - offset_Dy, 0); + next_neighbour_idx = min(idx + offset_Dy, dev_end[0]); + d_phi_next[idx] += + (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx]) * dev_dxdxdzdz[0]; + + prev_neighbour_idx = max(idx - offset_Dz, 0); + next_neighbour_idx = min(idx + offset_Dz, dev_end[0]); + d_phi_next[idx] += + (d_phi_current[next_neighbour_idx] + d_phi_current[prev_neighbour_idx]) * dev_dxdxdydy[0]; + + d_phi_next[idx] += 4.0 * CUDART_PI * d_charge[idx] * dev_dxdxdydydzdz[0]; + d_phi_next[idx] /= denom; + +} + +__global__ void EvaluateFields(const double* dev_potential, double3* dev_el_field) { + int idx = GetIdx(); + + double3 e = make_double3(0, 0, 0); + //assuming true = 1, false = 0 + //this method is hard to read due avoidance of if-else constructions on device code + bool is_on_up_border; + bool is_on_low_border; + bool is_inside_borders; + int offset; + + offset = 1; + is_on_up_border = ((threadIdx.x == 0) && (blockIdx.x == 0)); + is_on_low_border = ((threadIdx.x == (blockDim.x - 1)) && (blockIdx.x == (gridDim.x - 1))); + is_inside_borders = !(is_on_low_border || is_on_up_border); + + e.x = -((double)1 / ((double)1 + is_inside_borders)) * GradientComponent( + dev_potential[idx + (offset*is_on_up_border) - (offset*is_inside_borders)], + dev_potential[idx - (offset*is_on_low_border) + (offset*is_inside_borders)], + d_cell_size[0].x); + + offset = d_n_nodes[0].x; + is_on_up_border = ((threadIdx.y == 0) && (blockIdx.y == 0)); + is_on_low_border = ((threadIdx.y == (blockDim.y - 1)) && (blockIdx.y == (gridDim.y - 1))); + is_inside_borders = !(is_on_low_border || is_on_up_border); + + e.y = -((double)1 / ((double)1 + is_inside_borders)) * GradientComponent( + dev_potential[idx + (offset*is_on_up_border) - (offset*is_inside_borders)], + dev_potential[idx - (offset*is_on_low_border) + (offset*is_inside_borders)], + d_cell_size[0].y); + + offset = d_n_nodes[0].y*d_n_nodes[0].x; + is_on_up_border = ((threadIdx.z == 0) && (blockIdx.z == 0)); + is_on_low_border = ((threadIdx.z == (blockDim.z - 1)) && (blockIdx.z == (gridDim.z - 1))); + is_inside_borders = !(is_on_low_border || is_on_up_border); + + e.z = -((double)1 / ((double)1 + is_inside_borders)) * GradientComponent( + dev_potential[idx + (offset * is_on_up_border) - (offset * is_inside_borders)], + dev_potential[idx - (offset * is_on_low_border) + (offset * is_inside_borders)], + d_cell_size[0].z); + + dev_el_field[idx] = e; + +} + +//__global__ void AssertConvergence(const double* d_phi_current, const double* d_phi_next) { +// double rel_diff; +// double abs_diff; +// double abs_tolerance = 1.0e-5; +// double rel_tolerance = 1.0e-12; +// int idx = GetIdx(); +// abs_diff = fabs(d_phi_next[idx] - d_phi_current[idx]); +// rel_diff = abs_diff / fabs(d_phi_current[idx]); +// bool converged = ((abs_diff <= abs_tolerance) || (rel_diff <= rel_tolerance)); +// +// assert(converged==true); +//} + +template +__global__ void Convergence(const double* d_phi_current, const double* d_phi_next, unsigned int *d_convergence) +{ + __shared__ int w_convegence[nwarps]; + unsigned int laneid = (threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.x * blockDim.y) % warpSize; + unsigned int warpid = (threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.x * blockDim.y) / warpSize; + + double rel_diff; + double abs_diff; + double abs_tolerance = 1.0e-5; + double rel_tolerance = 1.0e-12; + + int idx = GetIdx(); + + abs_diff = fabs(d_phi_next[idx] - d_phi_current[idx]); + rel_diff = abs_diff / fabs(d_phi_current[idx]); + + unsigned int converged = ((abs_diff <= abs_tolerance) || (rel_diff <= rel_tolerance)); + + converged = __all_sync(FULL_MASK, converged == 1 ); + + if (laneid == 0) { + w_convegence[warpid] = converged; + } + __syncthreads(); + + if (threadIdx.x == 0) { + int b_convergence = 0; +#pragma unroll + for (int i = 0; i(&dev_phi_next, dim); + +} + +void FieldSolver::copy_constants_to_device() { + cudaError_t cuda_status; + + cuda_status = cudaMemcpyToSymbol(d_n_nodes, (const void*)&mesh.n_nodes, sizeof(dim3)); + cuda_status = cudaMemcpyToSymbol(d_cell_size, (const void*)&mesh.cell_size, sizeof(double3)); + + double dxdxdydy = mesh.cell_size.x * mesh.cell_size.x * + mesh.cell_size.y * mesh.cell_size.y; + cuda_status = cudaMemcpyToSymbol(dev_dxdxdydy, (const void*)&dxdxdydy, sizeof(double)); + + double dxdxdzdz = mesh.cell_size.x * mesh.cell_size.x * + mesh.cell_size.z * mesh.cell_size.z; + cuda_status = cudaMemcpyToSymbol(dev_dxdxdzdz, (const void*)&dxdxdzdz, sizeof(double)); + + double dydydzdz = mesh.cell_size.y * mesh.cell_size.y * + mesh.cell_size.z * mesh.cell_size.z; + cuda_status = cudaMemcpyToSymbol(dev_dydydzdz, (const void*)&dydydzdz, sizeof(double)); + + double dxdxdydydzdz = mesh.cell_size.x * mesh.cell_size.x * + mesh.cell_size.y * mesh.cell_size.y * + mesh.cell_size.z * mesh.cell_size.z; + cuda_status = cudaMemcpyToSymbol(dev_dxdxdydydzdz, (const void*)&dxdxdydydzdz, sizeof(double)); + + int end = mesh.n_nodes.x * mesh.n_nodes.y * mesh.n_nodes.z - 1; + cuda_status = cudaMemcpyToSymbol(dev_end, (const void*)&end, sizeof(int)); +} + +void FieldSolver::eval_potential(Inner_regions_manager &inner_regions) +{ + solve_poisson_eqn_Jacobi(inner_regions); +} + +void FieldSolver::solve_poisson_eqn_Jacobi(Inner_regions_manager &inner_regions) +{ + max_Jacobi_iterations = 150; + int iter; + + for (iter = 0; iter < max_Jacobi_iterations; ++iter) { + single_Jacobi_iteration(inner_regions); + if (iterative_Jacobi_solutions_converged()) { + break; + } + set_phi_next_as_phi_current(); + } + if (iter == max_Jacobi_iterations) { + printf("WARING: potential evaluation did't converge after max iterations!\n"); + } + set_phi_next_as_phi_current(); + + //return; +} + +void FieldSolver::single_Jacobi_iteration(Inner_regions_manager &inner_regions) +{ + compute_phi_next_at_inner_points(); + set_phi_next_at_boundaries(); + set_phi_next_at_inner_regions(inner_regions); +} + +void FieldSolver::set_phi_next_at_boundaries() +{ + mesh.set_boundary_conditions(dev_phi_next); +} + +void FieldSolver::compute_phi_next_at_inner_points() +{ + dim3 threads = mesh.GetThreads(); + dim3 blocks = mesh.GetBlocks(threads); + cudaError_t cuda_status; + + ComputePhiNext<<>>(mesh.dev_potential, mesh.dev_charge_density, dev_phi_next); + cuda_status = cudaDeviceSynchronize(); +} + +void FieldSolver::set_phi_next_at_inner_regions(Inner_regions_manager &inner_regions) +{ + //for (auto ® : inner_regions.regions) { + // for (auto &node : reg.inner_nodes) { + // // todo: mark nodes at edge during construction + // // if (!node.at_domain_edge( nx, ny, nz )) { + // phi_next[node.x][node.y][node.z] = reg.potential; + // // } + // } + //} +} + + +bool FieldSolver::iterative_Jacobi_solutions_converged() +{ + //// todo: bind tol to config parameters + cudaError_t status; + dim3 threads = mesh.GetThreads(); + dim3 blocks = mesh.GetBlocks(threads); + + unsigned int *convergence, *d_convergence;//host,device flags + status = cudaHostAlloc((void **)&convergence, sizeof(unsigned int), cudaHostAllocMapped); + status = cudaHostGetDevicePointer((void **)&d_convergence, convergence, 0); + + const int nwarps = 2; + Convergence<<>>(mesh.dev_potential, dev_phi_next, d_convergence); + status = cudaDeviceSynchronize(); + //if (status == cudaErrorAssert) { + // return false; + //} + //if (status == cudaSuccess) { + // return true; + //} + + //std::cout << "Cuda error: " << cudaGetErrorString(status) << std::endl; + return *convergence == 0 ; +} + + +void FieldSolver::set_phi_next_as_phi_current() +{ + dim3 threads = mesh.GetThreads(); + dim3 blocks = mesh.GetBlocks(threads); + cudaError_t cuda_status; + SetPhiNextAsCurrent<<>>(mesh.dev_potential, dev_phi_next); + cuda_status = cudaDeviceSynchronize(); +} + + +void FieldSolver::eval_fields_from_potential() +{ + dim3 threads = mesh.GetThreads(); + dim3 blocks = mesh.GetBlocks(threads); + cudaError_t cuda_status; + + EvaluateFields<<>>(mesh.dev_potential, mesh.dev_electric_field); + + cuda_status = cudaDeviceSynchronize(); + return; +} + + + + +FieldSolver::~FieldSolver() +{ + // delete phi arrays? + cudaFree((void*)dev_phi_next); + cudaFree((void*)d_n_nodes); + cudaFree((void*)d_cell_size); +} diff --git a/FieldSolver.cuh b/FieldSolver.cuh new file mode 100644 index 0000000..4f0bff7 --- /dev/null +++ b/FieldSolver.cuh @@ -0,0 +1,41 @@ +#ifndef _FIELD_SOLVER_CUH_ +#define _FIELD_SOLVER_CUH_ + +#include +#include +#include +#include +#include +#include +#include "SpatialMeshCu.cuh" +#include "inner_region.h" + +class FieldSolver { +public: + FieldSolver(SpatialMeshCu &spat_mesh, Inner_regions_manager &inner_regions); + void eval_potential(Inner_regions_manager &inner_regions); + void eval_fields_from_potential(); + virtual ~FieldSolver(); +private: + SpatialMeshCu& mesh; + +private: + int max_Jacobi_iterations; + double rel_tolerance; + double abs_tolerance; + double *dev_phi_next; + //boost::multi_array phi_current; + //boost::multi_array phi_next; + void allocate_next_phi(); + void copy_constants_to_device(); + // Solve potential + void solve_poisson_eqn_Jacobi(Inner_regions_manager &inner_regions); + void single_Jacobi_iteration(Inner_regions_manager &inner_regions); + void set_phi_next_at_boundaries(); + void compute_phi_next_at_inner_points(); + void set_phi_next_at_inner_regions(Inner_regions_manager &inner_regions); + bool iterative_Jacobi_solutions_converged(); + void set_phi_next_as_phi_current(); +}; + +#endif /*_FIELD_SOLVER_CUH_*/ diff --git a/Makefile b/Makefile index 2c7781a..a65d1c1 100644 --- a/Makefile +++ b/Makefile @@ -4,31 +4,45 @@ SHELL:=/bin/bash -O extglob ##### Compilers #CC=clang++ CC=g++ -HDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_BSD_SOURCE -D_FORTIFY_SOURCE=2 -g -fstack-protector-strong -Wformat -Werror=format-security +NVCC=nvcc + +HDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g -fstack-protector-strong -Wformat -Werror=format-security +NVCCHDF5FLAGS=-I/usr/include/hdf5/serial -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FORTIFY_SOURCE=2 -g +CUDAINCLUDES=-I/usr/local/cuda/include + WARNINGS=-Wall -CFLAGS = ${HDF5FLAGS} -O2 -std=c++11 ${WARNINGS} +NVCCWARNINGS= +CFLAGS = ${HDF5FLAGS} ${CUDAINCLUDES} -O2 -std=c++11 ${WARNINGS} +NVCCFLAGS= ${NVCCHDF5FLAGS} ${CUDAINCLUDES} -O2 -std=c++11 -arch=sm_30 ${NVCCWARNINGS} LDFLAGS = ### Libraries COMMONLIBS=-lm BOOSTLIBS=-lboost_program_options HDF5LIBS=-L/usr/lib/x86_64-linux-gnu/hdf5/serial -lhdf5_hl -lhdf5 -Wl,-z,relro -lpthread -lz -ldl -lm -Wl,-rpath -Wl,/usr/lib/x86_64-linux-gnu/hdf5/serial -LIBS=${COMMONLIBS} ${BOOSTLIBS} ${HDF5LIBS} +CUDALIBS=-L/usr/local/cuda/lib64/ -lcudart +LIBS=${COMMONLIBS} ${BOOSTLIBS} ${HDF5LIBS} ${CUDALIBS} ### Sources and executable CPPSOURCES=$(wildcard *.cpp) CPPHEADERS=$(wildcard *.h) +CUSOURCES=$(wildcard *.cu) +CUHEADERS=$(wildcard *.cuh) + +CUOBJECTS=$(CUSOURCES:%.cu=%.o) OBJECTS=$(CPPSOURCES:%.cpp=%.o) + EXECUTABLE=ef.out MAKE=make TINYEXPR=./lib/tinyexpr TINYEXPR_OBJ=./lib/tinyexpr/tinyexpr.o SUBDIRS=doc -$(EXECUTABLE): $(OBJECTS) $(TINYEXPR) - $(CC) $(LDFLAGS) $(OBJECTS) $(TINYEXPR_OBJ) -o $@ $(LIBS) - -$(OBJECTS):%.o:%.cpp $(CPPHEADERS) +$(EXECUTABLE): $(OBJECTS) $(CUOBJECTS) $(TINYEXPR) + $(CC) $(LDFLAGS) $(OBJECTS) $(CUOBJECTS) $(TINYEXPR_OBJ) -o $@ $(LIBS) +$(CUOBJECTS):%.o:%.cu + $(NVCC) $(NVCCFLAGS) -c $< -o $@ +$(OBJECTS):%.o:%.cpp $(CC) $(CFLAGS) -c $< -o $@ .PHONY: allsubdirs $(SUBDIRS) $(TINYEXPR) clean cleansubdirs cleanall diff --git a/SpatialMeshCu.cu b/SpatialMeshCu.cu new file mode 100644 index 0000000..c2764bc --- /dev/null +++ b/SpatialMeshCu.cu @@ -0,0 +1,705 @@ +#include "SpatialMeshCu.cuh" + +__constant__ double3 d_volume_size[1]; +__constant__ double3 d_cell_size[1]; +__constant__ int3 d_n_nodes[1]; +__constant__ double d_boundary[6]; + +#define TOP 0 +#define BOTTOM 1 +#define LEFT 2 +#define RIGHT 3 +#define FAR 4 +#define NEAR 5 + +__device__ int thread_idx_to_array_idx( int3 *d_n_nodes ){ + int xStepThread = 1; + int xStepBlock = blockDim.x; + int yStepThread = d_n_nodes[0].x; + int yStepBlock = yStepThread * blockDim.y; + int zStepThread = d_n_nodes[0].x * d_n_nodes[0].y; + int zStepBlock = zStepThread * blockDim.z; + + return threadIdx.x * xStepThread + blockIdx.x * xStepBlock + + threadIdx.y * yStepThread + blockIdx.y * yStepBlock + + threadIdx.z * zStepThread + blockIdx.z * zStepBlock; +} + +__device__ int3 thread_idx_to_mesh_idx( int3 *d_n_nodes ){ + // each thread handles single volume node + int3 mesh_idx = make_int3( threadIdx.x + blockIdx.x * blockDim.x, + threadIdx.y + blockIdx.y * blockDim.y, + threadIdx.z + blockIdx.z * blockDim.z ); + return mesh_idx; +} + + +__global__ void fill_coordinates(double3* node_coordinates) { + int plain_idx = thread_idx_to_array_idx( d_n_nodes ); + int3 mesh_idx = thread_idx_to_mesh_idx( d_n_nodes ); + + node_coordinates[plain_idx] = make_double3(d_cell_size[0].x * mesh_idx.x, + d_cell_size[0].y * mesh_idx.y, + d_cell_size[0].z * mesh_idx.z); +} + +__global__ void SetBoundaryConditionsX(double* potential){ + // blockIdx.x is expected to be 0 or 1; 0 - right boundary, 1 - left boundary + int mesh_x = blockIdx.x * (d_n_nodes[0].x - 1); + int mesh_y = threadIdx.y + blockIdx.y * blockDim.y; + int mesh_z = threadIdx.z + blockIdx.z * blockDim.z; + + int plain_idx = mesh_x + + mesh_y * d_n_nodes[0].x + + mesh_z * d_n_nodes[0].x * d_n_nodes[0].y; + + potential[plain_idx] = (double)blockIdx.x * d_boundary[LEFT] + (1.0 - blockIdx.x) * d_boundary[RIGHT]; +} + +__global__ void SetBoundaryConditionsY(double* potential){ + // blockIdx.y is expected to be 0 or 1; 0 - bottom boundary, 1 - top boundary + int mesh_x = threadIdx.x + blockIdx.x * blockDim.x; + int mesh_y = blockIdx.y * (d_n_nodes[0].y - 1); + int mesh_z = threadIdx.z + blockIdx.z * blockDim.z; + + int plain_idx = mesh_x + + mesh_y * d_n_nodes[0].x + + mesh_z * d_n_nodes[0].x * d_n_nodes[0].y; + + potential[plain_idx] = (double)blockIdx.y * d_boundary[TOP] + (1.0 - blockIdx.y) * d_boundary[BOTTOM]; +} + + +__global__ void SetBoundaryConditionsZ(double* potential){ + // blockIdx.z is expected to be 0 or 1; 0 - near boundary, 1 - far boundary + int mesh_x = threadIdx.x + blockIdx.x * blockDim.x; + int mesh_y = threadIdx.y + blockIdx.y * blockDim.y; + int mesh_z = blockIdx.z * (d_n_nodes[0].z - 1); + + int plain_idx = mesh_x + + mesh_y * d_n_nodes[0].x + + mesh_z * d_n_nodes[0].x * d_n_nodes[0].y; + + potential[plain_idx] = ((double)blockIdx.z) * d_boundary[FAR] + (1.0 - blockIdx.z) * d_boundary[NEAR]; +} + +SpatialMeshCu::SpatialMeshCu(Config &conf) { + check_correctness_of_related_config_fields(conf); + init_constants(conf); + allocate_ongrid_values(); + fill_node_coordinates(); + set_boundary_conditions(dev_potential); +} + +SpatialMeshCu::SpatialMeshCu(hid_t h5_spat_mesh_group) { + herr_t status; + cudaError_t cuda_status; + std::string debug_message = std::string(" reading from hdf5 "); + + volume_size = make_double3(0, 0, 0); + cell_size = make_double3(0, 0, 0); + n_nodes = make_int3(0, 0, 0); + + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_volume_size", + &volume_size.x); + hdf5_status_check(status); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_volume_size", + &volume_size.y); + hdf5_status_check(status); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_volume_size", + &volume_size.z); + hdf5_status_check(status); + + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "x_cell_size", + &cell_size.x); + hdf5_status_check(status); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "y_cell_size", + &cell_size.y); + hdf5_status_check(status); + status = H5LTget_attribute_double(h5_spat_mesh_group, "./", "z_cell_size", + &cell_size.z); + hdf5_status_check(status); + + status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "x_n_nodes", + &n_nodes.x); + hdf5_status_check(status); + status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "y_n_nodes", + &n_nodes.y); + hdf5_status_check(status); + status = H5LTget_attribute_int(h5_spat_mesh_group, "./", "z_n_nodes", + &n_nodes.z); + hdf5_status_check(status); + + allocate_ongrid_values(); + copy_constants_to_device(); + + int dim = n_nodes.x * n_nodes.y * n_nodes.z; + double *h5_tmp_buf_1 = new double[dim]; + double *h5_tmp_buf_2 = new double[dim]; + double *h5_tmp_buf_3 = new double[dim]; + + dim3 threads = GetThreads(); + dim3 blocks = GetBlocks(threads); + fill_coordinates <<< blocks, threads >>> (dev_node_coordinates); + cuda_status = cudaDeviceSynchronize(); + cuda_status_check(cuda_status, debug_message); + + H5LTread_dataset_double(h5_spat_mesh_group, "./charge_density", + h5_tmp_buf_1); + H5LTread_dataset_double(h5_spat_mesh_group, "./potential", h5_tmp_buf_2); + + // for ( int i = 0; i < dim; i++ ) { + // ( charge_density.data() )[i] = h5_tmp_buf_1[i]; + // ( potential.data() )[i] = h5_tmp_buf_2[i]; + // } + + cuda_status = cudaMemcpy(h5_tmp_buf_1, dev_charge_density, sizeof(double) * dim, + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status, debug_message); + + cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_potential, sizeof(double) * dim, + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status, debug_message); + + double3 *h5_tmp_vector = new double3[dim]; + + H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_x", + h5_tmp_buf_1); + H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_y", + h5_tmp_buf_2); + H5LTread_dataset_double(h5_spat_mesh_group, "./electric_field_z", + h5_tmp_buf_3); + for (int i = 0; i < dim; i++) { + h5_tmp_vector[i] = make_double3(h5_tmp_buf_1[i], h5_tmp_buf_2[i], + h5_tmp_buf_3[i]); + } + + cuda_status = cudaMemcpy(h5_tmp_buf_2, dev_electric_field, sizeof(double3) * dim, + cudaMemcpyHostToDevice); + cuda_status_check(cuda_status, debug_message); + + delete[] h5_tmp_buf_1; + delete[] h5_tmp_buf_2; + delete[] h5_tmp_buf_3; + delete[] h5_tmp_vector; + + return; +} + +void SpatialMeshCu::check_correctness_of_related_config_fields(Config &conf) { + grid_x_size_gt_zero(conf); + grid_x_step_gt_zero_le_grid_x_size(conf); + grid_y_size_gt_zero(conf); + grid_y_step_gt_zero_le_grid_y_size(conf); + grid_z_size_gt_zero(conf); + grid_z_step_gt_zero_le_grid_z_size(conf); +} + +void SpatialMeshCu::init_constants(Config & conf) { + n_nodes = make_int3( + ceil( + conf.mesh_config_part.grid_x_size + / conf.mesh_config_part.grid_x_step) + 1, + ceil( + conf.mesh_config_part.grid_y_size + / conf.mesh_config_part.grid_y_step) + 1, + ceil( + conf.mesh_config_part.grid_z_size + / conf.mesh_config_part.grid_z_step) + 1); + + volume_size = make_double3(conf.mesh_config_part.grid_x_size, + conf.mesh_config_part.grid_y_size, + conf.mesh_config_part.grid_z_size); + + cell_size = make_double3(volume_size.x / (n_nodes.x - 1), + volume_size.y / (n_nodes.y - 1), volume_size.z / (n_nodes.z - 1)); + + + copy_constants_to_device(); + copy_boundary_to_device(conf); +} + +void SpatialMeshCu::copy_constants_to_device() { + cudaError_t cuda_status; + //mesh params + const int3 *nodes = &n_nodes; + std::string debug_message = std::string(" copy nodes number "); + cuda_status = cudaMemcpyToSymbol(d_n_nodes, (const void*)nodes, sizeof(int3)); + cuda_status_check(cuda_status, debug_message); + + const double3 *volume = &volume_size; + debug_message = std::string(" copy volume size "); + cuda_status = cudaMemcpyToSymbol(d_volume_size, (const void*)&volume, sizeof(double3)); + cuda_status_check(cuda_status, debug_message); + + debug_message = std::string(" copy cell size "); + cuda_status = cudaMemcpyToSymbol(d_cell_size, (const void*)&cell_size, sizeof(double3)); + cuda_status_check(cuda_status, debug_message); + + return; +} + +void SpatialMeshCu::copy_boundary_to_device(Config &conf) { + cudaError_t cuda_status; + //boundary params + std::string debug_message = std::string(" copy border constants "); + double boundary[6]; + boundary[RIGHT] = conf.boundary_config_part.boundary_phi_right; + boundary[LEFT] = conf.boundary_config_part.boundary_phi_left; + boundary[TOP] = conf.boundary_config_part.boundary_phi_top; + boundary[BOTTOM] = conf.boundary_config_part.boundary_phi_bottom; + boundary[NEAR] = conf.boundary_config_part.boundary_phi_near; + boundary[FAR] = conf.boundary_config_part.boundary_phi_far; + cuda_status = cudaMemcpyToSymbol(d_boundary, boundary, 6 * sizeof(double)); + cuda_status_check(cuda_status, debug_message); +} + +void SpatialMeshCu::allocate_ongrid_values() { + int nx = n_nodes.x; + int ny = n_nodes.y; + int nz = n_nodes.z; + + size_t total_node_count = nx * ny * nz; + cudaError_t cuda_status; + + std::string debug_message = std::string(" malloc coords"); + + cuda_status = cudaMalloc(&dev_node_coordinates, sizeof(double3) * total_node_count); + cuda_status_check(cuda_status, debug_message); + + debug_message = std::string(" malloc charde density"); + cuda_status = cudaMalloc(&dev_charge_density, sizeof(double) * total_node_count); + cuda_status_check(cuda_status, debug_message); + + debug_message = std::string(" malloc potential"); + cuda_status = cudaMalloc(&dev_potential, sizeof(double) * total_node_count); + cuda_status_check(cuda_status, debug_message); + + debug_message = std::string(" malloc field"); + cuda_status = cudaMalloc(&dev_electric_field, sizeof(double3) * total_node_count); + cuda_status_check(cuda_status, debug_message); + + return; +} + +void SpatialMeshCu::fill_node_coordinates() { + dim3 threads = GetThreads(); + dim3 blocks = GetBlocks(threads); + cudaError_t cuda_status; + std::string debug_message = std::string(" fill coordinates "); + fill_coordinates<<>>(dev_node_coordinates); + cuda_status = cudaDeviceSynchronize(); + cuda_status_check(cuda_status, debug_message); + + return; +} + +void SpatialMeshCu::clear_old_density_values() { + //std::fill(charge_density.data(), + // charge_density.data() + charge_density.num_elements(), + // 0.0); + + //return; +} + +void SpatialMeshCu::set_boundary_conditions(double* d_potential) { + dim3 threads, blocks; + cudaError_t cuda_status; + std::string debug_message = std::string(" set boundary "); + + // todo: no magic numbers + threads = dim3(1, 4, 4); + blocks = dim3(2, n_nodes.y / 4, n_nodes.z / 4); + SetBoundaryConditionsX<<>>(d_potential); + cuda_status = cudaDeviceSynchronize(); + cuda_status_check(cuda_status, debug_message); + + threads = dim3(4, 1, 4); + blocks = dim3(n_nodes.x / 4, 2, n_nodes.z / 4); + SetBoundaryConditionsY<<>>(d_potential); + cuda_status = cudaDeviceSynchronize(); + cuda_status_check(cuda_status, debug_message); + + threads = dim3(4, 4, 1); + blocks = dim3(n_nodes.x / 4, n_nodes.y / 4, 2); + SetBoundaryConditionsZ<<>>(d_potential); + cuda_status = cudaDeviceSynchronize(); + cuda_status_check(cuda_status, debug_message); + + return; +} + +bool SpatialMeshCu::is_potential_equal_on_boundaries() { + //bool equal = (potential[0][2][2] == potential[x_n_nodes - 1][2][2] == + // potential[2][0][2] == potential[2][y_n_nodes - 1][2] == + // potential[2][2][0] == potential[2][2][z_n_nodes - 1]); + // possible to rewrite to avoid warnings from compiler: + // bool equal = ( potential[0][2][2] == potential[x_n_nodes-1][2][2] ); + // equal = equal and ( potential[x_n_nodes-1][2][2] == potential[2][0][2] ); + // equal = equal and ( potential[2][0][2] == potential[2][y_n_nodes-1][2] ); + // equal = equal and ( potential[2][y_n_nodes-1][2] == potential[2][2][0] ); + // equal = equal and ( potential[2][2][0] == potential[2][2][z_n_nodes-1] ); + //return equal; + return false; +} + +void SpatialMeshCu::print() { + print_grid(); + print_ongrid_values(); + return; +} + +void SpatialMeshCu::print_grid() { + //std::cout << "Grid:" << std::endl; + //std::cout << "Length: x = " << x_volume_size << ", " + // << "y = " << y_volume_size << ", " + // << "z = " << z_volume_size << std::endl; + //std::cout << "Cell size: x = " << x_cell_size << ", " + // << "y = " << y_cell_size << ", " + // << "z = " << z_cell_size << std::endl; + //std::cout << "Total nodes: x = " << x_n_nodes << ", " + // << "y = " << y_n_nodes << ", " + // << "z = " << z_n_nodes << std::endl; + //return; +} + +void SpatialMeshCu::print_ongrid_values() { + //int nx = x_n_nodes; + //int ny = y_n_nodes; + //int nz = z_n_nodes; + //std::cout << "x_node, y_node, z_node, charge_density, potential, electric_field(x,y,z)" << std::endl; + //std::cout.precision(3); + //std::cout.setf(std::ios::scientific); + //std::cout.fill(' '); + //std::cout.setf(std::ios::right); + //for (int i = 0; i < nx; i++) { + // for (int j = 0; j < ny; j++) { + // for (int k = 0; k < nz; k++) { + // std::cout << std::setw(8) << i + // << std::setw(8) << j + // << std::setw(8) << k + // << std::setw(14) << charge_density[i][j][k] + // << std::setw(14) << potential[i][j][k] + // << std::setw(14) << vec3d_x(electric_field[i][j][k]) + // << std::setw(14) << vec3d_y(electric_field[i][j][k]) + // << std::setw(14) << vec3d_z(electric_field[i][j][k]) + // << std::endl; + // } + // } + //} + //return; +} + +void SpatialMeshCu::write_to_file(hid_t hdf5_file_id) { + hid_t group_id; + herr_t status; + std::string hdf5_groupname = "/SpatialMesh"; + group_id = H5Gcreate(hdf5_file_id, hdf5_groupname.c_str(), H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + hdf5_status_check(group_id); + + write_hdf5_attributes(group_id); + write_hdf5_ongrid_values(group_id); + + status = H5Gclose(group_id); + hdf5_status_check(status); + return; +} + +void SpatialMeshCu::write_hdf5_attributes(hid_t group_id) { + herr_t status; + int single_element = 1; + std::string hdf5_current_group = "./"; + + status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), + "x_volume_size", &volume_size.x, single_element); + hdf5_status_check(status); + status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), + "y_volume_size", &volume_size.y, single_element); + hdf5_status_check(status); + status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), + "z_volume_size", &volume_size.z, single_element); + hdf5_status_check(status); + status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), + "x_cell_size", &cell_size.x, single_element); + hdf5_status_check(status); + status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), + "y_cell_size", &cell_size.y, single_element); + hdf5_status_check(status); + status = H5LTset_attribute_double(group_id, hdf5_current_group.c_str(), + "z_cell_size", &cell_size.z, single_element); + hdf5_status_check(status); + status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), + "x_n_nodes", &n_nodes.x, single_element); + hdf5_status_check(status); + status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), + "y_n_nodes", &n_nodes.y, single_element); + hdf5_status_check(status); + status = H5LTset_attribute_int(group_id, hdf5_current_group.c_str(), + "z_n_nodes", &n_nodes.z, single_element); + hdf5_status_check(status); +} + +void SpatialMeshCu::write_hdf5_ongrid_values(hid_t group_id) { + hid_t filespace, dset; + herr_t status; + cudaError_t cuda_status; + std::string debug_message = std::string(" write hdf5 "); + + int rank = 1; + hsize_t dims[rank]; + dims[0] = n_nodes.x * n_nodes.y * n_nodes.z; + + filespace = H5Screate_simple(rank, dims, NULL); + + // todo: without compound datasets + // there is this copying problem. + { + double *nx = new double[dims[0]]; + double *ny = new double[dims[0]]; + double *nz = new double[dims[0]]; + + debug_message = std::string(" write hdf5 node_coords"); + + double3 *hdf5_tmp_write_data = new double3[dims[0]]; + cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_node_coordinates, + sizeof(double3) * dims[0], cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status, debug_message); + for (unsigned int i = 0; i < dims[0]; i++) { + nx[i] = hdf5_tmp_write_data[i].x; + ny[i] = hdf5_tmp_write_data[i].y; + nz[i] = hdf5_tmp_write_data[i].z; + } + + dset = H5Dcreate(group_id, "./node_coordinates_x", H5T_IEEE_F64BE, + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + hdf5_status_check(dset); + status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, + H5P_DEFAULT, nx); + hdf5_status_check(status); + status = H5Dclose(dset); + hdf5_status_check(status); + + dset = H5Dcreate(group_id, "./node_coordinates_y", H5T_IEEE_F64BE, + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + hdf5_status_check(dset); + status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, + H5P_DEFAULT, ny); + hdf5_status_check(status); + status = H5Dclose(dset); + hdf5_status_check(status); + + dset = H5Dcreate(group_id, "./node_coordinates_z", H5T_IEEE_F64BE, + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + hdf5_status_check(dset); + status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, + H5P_DEFAULT, nz); + hdf5_status_check(status); + status = H5Dclose(dset); + hdf5_status_check(status); + delete[] nx; + delete[] ny; + delete[] nz; + delete[] hdf5_tmp_write_data; + } + { + debug_message = std::string(" write hdf5 charge_density "); + + double *hdf5_tmp_write_data = new double[dims[0]]; + cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_charge_density, + sizeof(double) * dims[0], cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status, debug_message); + + dset = H5Dcreate(group_id, "./charge_density", H5T_IEEE_F64BE, + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + hdf5_status_check(dset); + status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, + H5P_DEFAULT, hdf5_tmp_write_data); + hdf5_status_check(status); + status = H5Dclose(dset); + hdf5_status_check(status); + delete[] hdf5_tmp_write_data; + } + { + + debug_message = std::string(" write hdf5 potential "); + double *hdf5_tmp_write_data = new double[dims[0]]; + cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_potential, sizeof(double) * dims[0], + cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status, debug_message); + + dset = H5Dcreate(group_id, "./potential", H5T_IEEE_F64BE, filespace, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + hdf5_status_check(dset); + status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, + H5P_DEFAULT, hdf5_tmp_write_data); + hdf5_status_check(status); + status = H5Dclose(dset); + hdf5_status_check(status); + delete[] hdf5_tmp_write_data; + + } + { + debug_message = std::string(" write electric field to hdf5 "); + + double *ex = new double[dims[0]]; + double *ey = new double[dims[0]]; + double *ez = new double[dims[0]]; + double3 *hdf5_tmp_write_data = new double3[dims[0]]; + cuda_status = cudaMemcpy(hdf5_tmp_write_data, dev_electric_field, + sizeof(double3) * dims[0], cudaMemcpyDeviceToHost); + cuda_status_check(cuda_status, debug_message); + + for (unsigned int i = 0; i < dims[0]; i++) { + ex[i] = hdf5_tmp_write_data[i].x; + ey[i] = hdf5_tmp_write_data[i].y; + ez[i] = hdf5_tmp_write_data[i].z; + } + dset = H5Dcreate(group_id, "./electric_field_x", H5T_IEEE_F64BE, + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + hdf5_status_check(dset); + status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, + H5P_DEFAULT, ex); + hdf5_status_check(status); + status = H5Dclose(dset); + hdf5_status_check(status); + + dset = H5Dcreate(group_id, "./electric_field_y", H5T_IEEE_F64BE, + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + hdf5_status_check(dset); + status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, + H5P_DEFAULT, ey); + hdf5_status_check(status); + status = H5Dclose(dset); + hdf5_status_check(status); + + dset = H5Dcreate(group_id, "./electric_field_z", H5T_IEEE_F64BE, + filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + hdf5_status_check(dset); + status = H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, filespace, + H5P_DEFAULT, ez); + hdf5_status_check(status); + status = H5Dclose(dset); + hdf5_status_check(status); + delete[] ex; + delete[] ey; + delete[] ez; + delete[] hdf5_tmp_write_data; + } + status = H5Sclose(filespace); + hdf5_status_check(status); +} + +void SpatialMeshCu::grid_x_size_gt_zero(Config &conf) { + check_and_exit_if_not(conf.mesh_config_part.grid_x_size > 0, + "grid_x_size < 0"); +} + +void SpatialMeshCu::grid_x_step_gt_zero_le_grid_x_size(Config &conf) { + check_and_exit_if_not( + (conf.mesh_config_part.grid_x_step > 0) + && (conf.mesh_config_part.grid_x_step + <= conf.mesh_config_part.grid_x_size), + "grid_x_step < 0 or grid_x_step >= grid_x_size"); +} + +void SpatialMeshCu::grid_y_size_gt_zero(Config &conf) { + check_and_exit_if_not(conf.mesh_config_part.grid_y_size > 0, + "grid_y_size < 0"); +} + +void SpatialMeshCu::grid_y_step_gt_zero_le_grid_y_size(Config &conf) { + check_and_exit_if_not( + (conf.mesh_config_part.grid_y_step > 0) + && (conf.mesh_config_part.grid_y_step + <= conf.mesh_config_part.grid_y_size), + "grid_y_step < 0 or grid_y_step >= grid_y_size"); +} + +void SpatialMeshCu::grid_z_size_gt_zero(Config &conf) { + check_and_exit_if_not(conf.mesh_config_part.grid_z_size > 0, + "grid_z_size < 0"); +} + +void SpatialMeshCu::grid_z_step_gt_zero_le_grid_z_size(Config &conf) { + check_and_exit_if_not( + (conf.mesh_config_part.grid_z_step > 0) + && (conf.mesh_config_part.grid_z_step + <= conf.mesh_config_part.grid_z_size), + "grid_z_step < 0 or grid_z_step >= grid_z_size"); +} + +void SpatialMeshCu::check_and_exit_if_not(const bool &should_be, + const std::string &message) { + //if (!should_be) { + // std::cout << "Error: " << message << std::endl; + // exit(EXIT_FAILURE); + //} + //return; +} + +double SpatialMeshCu::node_number_to_coordinate_x(int i) { + if (i >= 0 && i < n_nodes.x) { + return i * cell_size.x; + } + else { + printf("invalid node number i=%d at node_number_to_coordinate_x\n", i); + exit(EXIT_FAILURE); + } + return 0; +} + +double SpatialMeshCu::node_number_to_coordinate_y(int j) { + if (j >= 0 && j < n_nodes.y) { + return j * cell_size.y; + } + else { + printf("invalid node number j=%d at node_number_to_coordinate_y\n", j); + exit(EXIT_FAILURE); + } + return 0; +} + +double SpatialMeshCu::node_number_to_coordinate_z(int k) { + if (k >= 0 && k < n_nodes.z) { + return k * cell_size.z; + } + else { + printf("invalid node number k=%d at node_number_to_coordinate_z\n", k); + exit(EXIT_FAILURE); + } + return 0; +} + +void SpatialMeshCu::hdf5_status_check(herr_t status) +{ + if (status < 0) { + std::cout << "Something went wrong while writing Spatial_mesh group. Aborting." + << std::endl; + exit(EXIT_FAILURE); + } +} + +void SpatialMeshCu::cuda_status_check(cudaError_t status, std::string &sender) +{ + if (status > 0) { + std::cout << "Cuda error at"<< sender <<": " << cudaGetErrorString(status) << std::endl; + exit(EXIT_FAILURE); + } +} + +dim3 SpatialMeshCu::GetThreads() { + // todo: explicitly determine number of threads from GPU warp size + return dim3(4, 4, 4); +} + +dim3 SpatialMeshCu::GetBlocks(dim3 nThreads) { + return dim3(n_nodes.x / nThreads.x, n_nodes.y / nThreads.y, n_nodes.z/nThreads.z); +} + +SpatialMeshCu::~SpatialMeshCu() { + cudaFree((void*)dev_node_coordinates); + cudaFree((void*)dev_potential); + cudaFree((void*)dev_charge_density); + cudaFree((void*)dev_electric_field); +} diff --git a/SpatialMeshCu.cuh b/SpatialMeshCu.cuh new file mode 100644 index 0000000..da6534c --- /dev/null +++ b/SpatialMeshCu.cuh @@ -0,0 +1,76 @@ +#ifndef _SPATIAL_MESH_CUH_ +#define _SPATIAL_MESH_CUH_ + +#include +#include +#include +#include +#include +#include "config.h" + + +class SpatialMeshCu { +public: + + int3 n_nodes; + double3 cell_size; + double3 volume_size; + double3 *dev_node_coordinates; + double *dev_charge_density; + double *dev_potential; + double3 *dev_electric_field; + + + //double border_left; + //double border_right; + //double border_top; + //double border_bottom; + //double border_near; + //double border_far; + + +public: + SpatialMeshCu(Config &conf); + SpatialMeshCu(hid_t h5_spat_mesh_group); + void clear_old_density_values(); + void set_boundary_conditions(double* d_potential); + bool is_potential_equal_on_boundaries(); + void print(); + void write_to_file(hid_t hdf5_file_id); + virtual ~SpatialMeshCu(); + double node_number_to_coordinate_x(int i); + double node_number_to_coordinate_y(int j); + double node_number_to_coordinate_z(int k); + dim3 GetThreads(); + dim3 GetBlocks(dim3 nThreads); +private: + // init + void check_correctness_of_related_config_fields(Config &conf); + void init_constants(Config &conf); + void copy_constants_to_device(); + void copy_boundary_to_device(Config &conf); + void allocate_ongrid_values(); + void fill_node_coordinates(); + + // print + void print_grid(); + void print_ongrid_values(); + // write hdf5 + void write_hdf5_attributes(hid_t group_id); + void write_hdf5_ongrid_values(hid_t group_id); + int n_of_elements_to_write_for_each_process_for_1d_dataset(int total_elements); + int data_offset_for_each_process_for_1d_dataset(int total_elements); + void hdf5_status_check(herr_t status); + // config check + void grid_x_size_gt_zero(Config &conf); + void grid_x_step_gt_zero_le_grid_x_size(Config &conf); + void grid_y_size_gt_zero(Config &conf); + void grid_y_step_gt_zero_le_grid_y_size(Config &conf); + void grid_z_size_gt_zero(Config &conf); + void grid_z_step_gt_zero_le_grid_z_size(Config &conf); + void check_and_exit_if_not(const bool &should_be, const std::string &message); + //cuda + void cuda_status_check(cudaError_t status, std::string &sender); + +}; +#endif /* _SPATIAL_MESH_CUH_ */ diff --git a/domain.cpp b/domain.cpp index aa0e63e..2af2e3e 100644 --- a/domain.cpp +++ b/domain.cpp @@ -111,8 +111,8 @@ void Domain::eval_charge_density() void Domain::eval_potential_and_fields() { - field_solver.eval_potential( spat_mesh, inner_regions ); - field_solver.eval_fields_from_potential( spat_mesh ); + field_solver.eval_potential(inner_regions ); + field_solver.eval_fields_from_potential(); return; } @@ -367,9 +367,9 @@ bool Domain::out_of_bound( const Particle &p ) bool out; out = - ( x >= spat_mesh.x_volume_size ) || ( x <= 0 ) || - ( y >= spat_mesh.y_volume_size ) || ( y <= 0 ) || - ( z >= spat_mesh.z_volume_size ) || ( z <= 0 ) ; + ( x >= spat_mesh.volume_size.x ) || ( x <= 0 ) || + ( y >= spat_mesh.volume_size.y ) || ( y <= 0 ) || + ( z >= spat_mesh.volume_size.z ) || ( z <= 0 ) ; return out; diff --git a/domain.h b/domain.h index 671abaf..3fbfc55 100644 --- a/domain.h +++ b/domain.h @@ -10,10 +10,10 @@ #include #include "config.h" #include "time_grid.h" -#include "spatial_mesh.h" +#include "SpatialMeshCu.cuh" #include "inner_region.h" #include "particle_to_mesh_map.h" -#include "field_solver.h" +#include "FieldSolver.cuh" #include "External_field.h" #include "particle_interaction_model.h" #include "particle_source.h" @@ -28,10 +28,10 @@ class Domain { //Domain() {}; public: Time_grid time_grid; - Spatial_mesh spat_mesh; + SpatialMeshCu spat_mesh; Inner_regions_manager inner_regions; Particle_to_mesh_map particle_to_mesh_map; - Field_solver field_solver; + FieldSolver field_solver; Particle_sources_manager particle_sources; External_fields_manager external_fields; Particle_interaction_model particle_interaction_model; diff --git a/field_solver.cpp b/field_solver.cpp deleted file mode 100644 index 58821d9..0000000 --- a/field_solver.cpp +++ /dev/null @@ -1,230 +0,0 @@ -#include "field_solver.h" - -Field_solver::Field_solver( Spatial_mesh &spat_mesh, - Inner_regions_manager &inner_regions ) -{ - nx = spat_mesh.x_n_nodes; - ny = spat_mesh.y_n_nodes; - nz = spat_mesh.z_n_nodes; - dx = spat_mesh.x_cell_size; - dy = spat_mesh.y_cell_size; - dz = spat_mesh.z_cell_size; - - allocate_current_next_phi(); -} - -void Field_solver::allocate_current_next_phi() -{ - phi_current.resize( boost::extents[nx][ny][nz] ); - phi_next.resize( boost::extents[nx][ny][nz] ); -} - -void Field_solver::eval_potential( Spatial_mesh &spat_mesh, - Inner_regions_manager &inner_regions ) -{ - solve_poisson_eqn_Jacobi( spat_mesh, inner_regions ); -} - -void Field_solver::solve_poisson_eqn_Jacobi( Spatial_mesh &spat_mesh, - Inner_regions_manager &inner_regions ) -{ - max_Jacobi_iterations = 150; - int iter; - - init_current_phi_from_spat_mesh_phi( spat_mesh ); - for( iter = 0; iter < max_Jacobi_iterations; ++iter ){ - single_Jacobi_iteration( spat_mesh, inner_regions ); - if ( iterative_Jacobi_solutions_converged() ) { - break; - } - set_phi_next_as_phi_current(); - } - if ( iter == max_Jacobi_iterations ){ - printf("WARING: potential evaluation did't converge after max iterations!\n"); - } - transfer_solution_to_spat_mesh( spat_mesh ); - - return; -} - - -void Field_solver::init_current_phi_from_spat_mesh_phi( Spatial_mesh &spat_mesh ) -{ - phi_current.assign( spat_mesh.potential.data(), - spat_mesh.potential.data() + spat_mesh.potential.num_elements() ); - return; -} - -void Field_solver::single_Jacobi_iteration( Spatial_mesh &spat_mesh, - Inner_regions_manager &inner_regions ) -{ - set_phi_next_at_boundaries(); - compute_phi_next_at_inner_points( spat_mesh ); - set_phi_next_at_inner_regions( inner_regions ); -} - -void Field_solver::set_phi_next_at_boundaries() -{ - for ( int j = 0; j < ny; j++ ) { - for ( int k = 0; k < nz; k++ ) { - phi_next[0][j][k] = phi_current[0][j][k]; - phi_next[nx-1][j][k] = phi_current[nx-1][j][k]; - } - } - // - for ( int i = 0; i < nx; i++ ) { - for ( int k = 0; k < nz; k++ ) { - phi_next[i][0][k] = phi_current[i][0][k]; - phi_next[i][ny-1][k] = phi_current[i][ny-1][k]; - } - } - // - for ( int i = 0; i < nx; i++ ) { - for ( int j = 0; j < ny; j++ ) { - phi_next[i][j][0] = phi_current[i][j][0]; - phi_next[i][j][nz-1] = phi_current[i][j][nz-1]; - } - } -} - -void Field_solver::compute_phi_next_at_inner_points( Spatial_mesh &spat_mesh ) -{ - double dxdxdydy = dx * dx * dy * dy; - double dxdxdzdz = dx * dx * dz * dz; - double dydydzdz = dy * dy * dz * dz; - double dxdxdydydzdz = dx * dx * dy * dy * dz * dz; - double denom = 2 * ( dxdxdydy + dxdxdzdz + dydydzdz ); - // - for ( int i = 1; i < nx - 1; i++ ) { - for ( int j = 1; j < ny - 1; j++ ) { - for ( int k = 1; k < nz - 1; k++ ) { - phi_next[i][j][k] = - ( phi_current[i-1][j][k] + phi_current[i+1][j][k] ) * dydydzdz; - phi_next[i][j][k] = phi_next[i][j][k] + - ( phi_current[i][j-1][k] + phi_current[i][j+1][k] ) * dxdxdzdz; - phi_next[i][j][k] = phi_next[i][j][k] + - ( phi_current[i][j][k-1] + phi_current[i][j][k+1] ) * dxdxdydy; - // Delta phi = - 4 * pi * rho - phi_next[i][j][k] = phi_next[i][j][k] + - 4.0 * M_PI * spat_mesh.charge_density[i][j][k] * dxdxdydydzdz; - phi_next[i][j][k] = phi_next[i][j][k] / denom; - } - } - } -} - -void Field_solver::set_phi_next_at_inner_regions( Inner_regions_manager &inner_regions ) -{ - for( auto ® : inner_regions.regions ){ - for( auto &node : reg.inner_nodes ){ - // todo: mark nodes at edge during construction - // if (!node.at_domain_edge( nx, ny, nz )) { - phi_next[node.x][node.y][node.z] = reg.potential; - // } - } - } -} - - -bool Field_solver::iterative_Jacobi_solutions_converged() -{ - // todo: bind tol to config parameters - //abs_tolerance = std::max( dx * dx, std::max( dy * dy, dz * dz ) ) / 5; - abs_tolerance = 1.0e-5; - rel_tolerance = 1.0e-12; - double diff; - double rel_diff; - //double tol; - // - for ( int i = 0; i < nx; i++ ) { - for ( int j = 0; j < ny; j++ ) { - for ( int k = 0; k < nz; k++ ) { - diff = fabs( phi_next[i][j][k] - phi_current[i][j][k] ); - rel_diff = diff / fabs( phi_current[i][j][k] ); - if ( diff > abs_tolerance || rel_diff > rel_tolerance ){ - return false; - } - } - } - } - return true; -} - - -void Field_solver::set_phi_next_as_phi_current() -{ - // Looks like straightforward assignment - // phi_next = phi_current - // would result in copy. - // Hopefully, it could be avoided with std::swap - std::swap( phi_current, phi_next ); -} - -void Field_solver::transfer_solution_to_spat_mesh( Spatial_mesh &spat_mesh ) -{ - spat_mesh.potential.assign( phi_next.data(), - phi_next.data() + phi_next.num_elements() ); -} - - -void Field_solver::eval_fields_from_potential( Spatial_mesh &spat_mesh ) -{ - int nx = spat_mesh.x_n_nodes; - int ny = spat_mesh.y_n_nodes; - int nz = spat_mesh.z_n_nodes; - double dx = spat_mesh.x_cell_size; - double dy = spat_mesh.y_cell_size; - double dz = spat_mesh.z_cell_size; - boost::multi_array &phi = spat_mesh.potential; - double ex, ey, ez; - // - for ( int i = 0; i < nx; i++ ) { - for ( int j = 0; j < ny; j++ ) { - for ( int k = 0; k < nz; k++ ) { - if ( i == 0 ) { - ex = - boundary_difference( phi[i][j][k], phi[i+1][j][k], dx ); - } else if ( i == nx-1 ) { - ex = - boundary_difference( phi[i-1][j][k], phi[i][j][k], dx ); - } else { - ex = - central_difference( phi[i-1][j][k], phi[i+1][j][k], dx ); - } - - if ( j == 0 ) { - ey = - boundary_difference( phi[i][j][k], phi[i][j+1][k], dy ); - } else if ( j == ny-1 ) { - ey = - boundary_difference( phi[i][j-1][k], phi[i][j][k], dy ); - } else { - ey = - central_difference( phi[i][j-1][k], phi[i][j+1][k], dy ); - } - - if ( k == 0 ) { - ez = - boundary_difference( phi[i][j][k], phi[i][j][k+1], dz ); - } else if ( k == nz-1 ) { - ez = - boundary_difference( phi[i][j][k-1], phi[i][j][k], dz ); - } else { - ez = - central_difference( phi[i][j][k-1], phi[i][j][k+1], dz ); - } - - spat_mesh.electric_field[i][j][k] = vec3d_init( ex, ey, ez ); - } - } - } - - return; -} - -double Field_solver::central_difference( double phi1, double phi2, double dx ) -{ - return ( (phi2 - phi1) / ( 2.0 * dx ) ); -} - -double Field_solver::boundary_difference( double phi1, double phi2, double dx ) -{ - return ( (phi2 - phi1) / dx ); -} - - -Field_solver::~Field_solver() -{ - // delete phi arrays? -} diff --git a/field_solver.h b/field_solver.h deleted file mode 100644 index 8285b53..0000000 --- a/field_solver.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef _FIELD_SOLVER_H_ -#define _FIELD_SOLVER_H_ - -#include -#include -#include -#include "spatial_mesh.h" -#include "inner_region.h" - - -class Field_solver { - public: - Field_solver( Spatial_mesh &spat_mesh, Inner_regions_manager &inner_regions ); - void eval_potential( Spatial_mesh &spat_mesh, Inner_regions_manager &inner_regions ); - void eval_fields_from_potential( Spatial_mesh &spat_mesh ); - virtual ~Field_solver(); - private: - int nx, ny, nz; - double dx, dy, dz; - private: - int max_Jacobi_iterations; - double rel_tolerance; - double abs_tolerance; - boost::multi_array phi_current; - boost::multi_array phi_next; - void allocate_current_next_phi(); - // Solve potential - void solve_poisson_eqn_Jacobi( Spatial_mesh &spat_mesh, - Inner_regions_manager &inner_regions ); - void init_current_phi_from_spat_mesh_phi( Spatial_mesh &spat_mesh ); - void single_Jacobi_iteration( Spatial_mesh &spat_mesh, - Inner_regions_manager &inner_regions ); - void set_phi_next_at_boundaries(); - void compute_phi_next_at_inner_points( Spatial_mesh &spat_mesh ); - void set_phi_next_at_inner_regions( Inner_regions_manager &inner_regions ); - bool iterative_Jacobi_solutions_converged(); - void set_phi_next_as_phi_current(); - void transfer_solution_to_spat_mesh( Spatial_mesh &spat_mesh ); - // Eval fields from potential - double boundary_difference( double phi1, double phi2, double dx ); - double central_difference( double phi1, double phi2, double dx ); -}; - -#endif /* _FIELD_SOLVER_H_ */ diff --git a/inner_region.cpp b/inner_region.cpp index c5207bf..1a8a1d2 100644 --- a/inner_region.cpp +++ b/inner_region.cpp @@ -80,11 +80,11 @@ bool Inner_region::check_if_node_inside( Node_reference &node, return check_if_point_inside( node.x * dx, node.y * dy, node.z * dz ); } -void Inner_region::mark_inner_nodes( Spatial_mesh &spat_mesh ) +void Inner_region::mark_inner_nodes( SpatialMeshCu &spat_mesh ) { - int nx = spat_mesh.x_n_nodes; - int ny = spat_mesh.y_n_nodes; - int nz = spat_mesh.z_n_nodes; + int nx = spat_mesh.n_nodes.x; + int ny = spat_mesh.n_nodes.y; + int nz = spat_mesh.n_nodes.z; for ( int k = 0; k < nz; k++ ) { for ( int j = 0; j < ny; j++ ) { @@ -99,11 +99,11 @@ void Inner_region::mark_inner_nodes( Spatial_mesh &spat_mesh ) } } -void Inner_region::select_inner_nodes_not_at_domain_edge( Spatial_mesh &spat_mesh ) +void Inner_region::select_inner_nodes_not_at_domain_edge( SpatialMeshCu &spat_mesh ) { - int nx = spat_mesh.x_n_nodes; - int ny = spat_mesh.y_n_nodes; - int nz = spat_mesh.z_n_nodes; + int nx = spat_mesh.n_nodes.x; + int ny = spat_mesh.n_nodes.y; + int nz = spat_mesh.n_nodes.z; inner_nodes_not_at_domain_edge.reserve( inner_nodes.size() ); @@ -114,11 +114,11 @@ void Inner_region::select_inner_nodes_not_at_domain_edge( Spatial_mesh &spat_mes } } -void Inner_region::mark_near_boundary_nodes( Spatial_mesh &spat_mesh ) +void Inner_region::mark_near_boundary_nodes( SpatialMeshCu &spat_mesh ) { - int nx = spat_mesh.x_n_nodes; - int ny = spat_mesh.y_n_nodes; - int nz = spat_mesh.z_n_nodes; + int nx = spat_mesh.n_nodes.x; + int ny = spat_mesh.n_nodes.y; + int nz = spat_mesh.n_nodes.z; // rewrite; for( auto &node : inner_nodes ){ @@ -139,13 +139,13 @@ void Inner_region::mark_near_boundary_nodes( Spatial_mesh &spat_mesh ) near_boundary_nodes.end() ); } -void Inner_region::select_near_boundary_nodes_not_at_domain_edge( Spatial_mesh &spat_mesh ) +void Inner_region::select_near_boundary_nodes_not_at_domain_edge( SpatialMeshCu &spat_mesh ) { // todo: repeats with select_inner_nodes_not_at_domain_edge; // remove code duplication - int nx = spat_mesh.x_n_nodes; - int ny = spat_mesh.y_n_nodes; - int nz = spat_mesh.z_n_nodes; + int nx = spat_mesh.n_nodes.x; + int ny = spat_mesh.n_nodes.y; + int nz = spat_mesh.n_nodes.z; near_boundary_nodes_not_at_domain_edge.reserve( near_boundary_nodes.size() ); @@ -217,7 +217,7 @@ void Inner_region::hdf5_status_check( herr_t status ) Inner_region_box::Inner_region_box( Config &conf, Inner_region_box_config_part &inner_region_box_conf, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( conf, inner_region_box_conf ) { object_type = "box"; @@ -231,7 +231,7 @@ Inner_region_box::Inner_region_box( Inner_region_box::Inner_region_box( hid_t h5_inner_region_box_group_id, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( h5_inner_region_box_group_id ) { object_type = "box"; @@ -336,7 +336,7 @@ void Inner_region_box::write_hdf5_region_specific_parameters( Inner_region_sphere::Inner_region_sphere( Config &conf, Inner_region_sphere_config_part &inner_region_sphere_conf, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( conf, inner_region_sphere_conf ) { object_type = "sphere"; @@ -351,7 +351,7 @@ Inner_region_sphere::Inner_region_sphere( Inner_region_sphere::Inner_region_sphere( hid_t h5_inner_region_sphere_group_id, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( h5_inner_region_sphere_group_id ) { object_type = "sphere"; @@ -442,7 +442,7 @@ void Inner_region_sphere::write_hdf5_region_specific_parameters( Inner_region_cylinder::Inner_region_cylinder( Config &conf, Inner_region_cylinder_config_part &inner_region_cylinder_conf, - Spatial_mesh &spat_mesh ) + SpatialMeshCu &spat_mesh ) : Inner_region( conf, inner_region_cylinder_conf ) { object_type = "cylinder"; @@ -456,7 +456,7 @@ Inner_region_cylinder::Inner_region_cylinder( Inner_region_cylinder::Inner_region_cylinder( hid_t h5_inner_region_cylinder_group_id, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( h5_inner_region_cylinder_group_id ) { object_type = "cylinder"; @@ -581,7 +581,7 @@ void Inner_region_cylinder::write_hdf5_region_specific_parameters( Inner_region_tube::Inner_region_tube( Config &conf, Inner_region_tube_config_part &inner_region_tube_conf, - Spatial_mesh &spat_mesh ) + SpatialMeshCu &spat_mesh ) : Inner_region( conf, inner_region_tube_conf ) { object_type = "tube"; @@ -595,7 +595,7 @@ Inner_region_tube::Inner_region_tube( Inner_region_tube::Inner_region_tube( hid_t h5_inner_region_tube_group_id, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( h5_inner_region_tube_group_id ) { object_type = "tube"; @@ -728,7 +728,7 @@ void Inner_region_tube::write_hdf5_region_specific_parameters( Inner_region_tube_along_z_segment::Inner_region_tube_along_z_segment( Config &conf, Inner_region_tube_along_z_segment_config_part &inner_region_tube_along_z_segment_conf, - Spatial_mesh &spat_mesh ) + SpatialMeshCu &spat_mesh ) : Inner_region( conf, inner_region_tube_along_z_segment_conf ) { object_type = "tube_along_z_segment"; @@ -744,7 +744,7 @@ Inner_region_tube_along_z_segment::Inner_region_tube_along_z_segment( Inner_region_tube_along_z_segment::Inner_region_tube_along_z_segment( hid_t h5_inner_region_tube_along_z_segment_group_id, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( h5_inner_region_tube_along_z_segment_group_id ) { object_type = "tube_along_z_segment"; @@ -893,7 +893,7 @@ void Inner_region_tube_along_z_segment::write_hdf5_region_specific_parameters( Inner_region_cone_along_z::Inner_region_cone_along_z( Config &conf, Inner_region_cone_along_z_config_part &inner_region_cone_along_z_conf, - Spatial_mesh &spat_mesh ) + SpatialMeshCu &spat_mesh ) : Inner_region( conf, inner_region_cone_along_z_conf ) { object_type = "cone_along_z"; @@ -907,7 +907,7 @@ Inner_region_cone_along_z::Inner_region_cone_along_z( Inner_region_cone_along_z::Inner_region_cone_along_z( hid_t h5_inner_region_cone_along_z_group_id, - Spatial_mesh &spat_mesh ) : + SpatialMeshCu &spat_mesh ) : Inner_region( h5_inner_region_cone_along_z_group_id ) { object_type = "cone_along_z"; diff --git a/inner_region.h b/inner_region.h index ca5b7bc..77dcaa6 100644 --- a/inner_region.h +++ b/inner_region.h @@ -9,7 +9,7 @@ #include #include #include "config.h" -#include "spatial_mesh.h" +#include "SpatialMeshCu.cuh" #include "node_reference.h" #include "particle.h" #include "vec3d.h" @@ -55,10 +55,10 @@ class Inner_region{ void write_to_file( hid_t regions_group_id ); void hdf5_status_check( herr_t status ); protected: - void mark_inner_nodes( Spatial_mesh &spat_mesh ); - void select_inner_nodes_not_at_domain_edge( Spatial_mesh &spat_mesh ); - void mark_near_boundary_nodes( Spatial_mesh &spat_mesh ); - void select_near_boundary_nodes_not_at_domain_edge( Spatial_mesh &spat_mesh ); + void mark_inner_nodes( SpatialMeshCu &spat_mesh ); + void select_inner_nodes_not_at_domain_edge( SpatialMeshCu &spat_mesh ); + void mark_near_boundary_nodes( SpatialMeshCu &spat_mesh ); + void select_near_boundary_nodes_not_at_domain_edge( SpatialMeshCu &spat_mesh ); void write_hdf5_common_parameters( hid_t current_region_group_id ); virtual void write_hdf5_region_specific_parameters( hid_t current_region_group_id ) = 0; @@ -84,9 +84,9 @@ class Inner_region_box : public Inner_region{ public: Inner_region_box( Config &conf, Inner_region_box_config_part &inner_region_conf, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); Inner_region_box( hid_t h5_inner_region_box_group_id, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); virtual ~Inner_region_box() {}; void print() { std::cout << "Inner region: name = " << name << std::endl; @@ -122,9 +122,9 @@ class Inner_region_sphere : public Inner_region{ Inner_region_sphere( Config &conf, Inner_region_sphere_config_part &inner_region_conf, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); Inner_region_sphere( hid_t h5_inner_region_group_id, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); virtual ~Inner_region_sphere() {}; void print() { std::cout << "Inner region: name = " << name << std::endl; @@ -161,9 +161,9 @@ class Inner_region_cylinder : public Inner_region{ Inner_region_cylinder( Config &conf, Inner_region_cylinder_config_part &inner_region_conf, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); Inner_region_cylinder( hid_t h5_inner_region_group_id, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); virtual ~Inner_region_cylinder() {}; void print() { std::cout << "Inner region: name = " << name << std::endl; @@ -205,9 +205,9 @@ class Inner_region_tube : public Inner_region{ Inner_region_tube( Config &conf, Inner_region_tube_config_part &inner_region_conf, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); Inner_region_tube( hid_t h5_inner_region_group_id, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); virtual ~Inner_region_tube() {}; void print() { std::cout << "Inner region: name = " << name << std::endl; @@ -249,9 +249,9 @@ class Inner_region_tube_along_z_segment : public Inner_region{ Inner_region_tube_along_z_segment( Config &conf, Inner_region_tube_along_z_segment_config_part &inner_region_conf, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); Inner_region_tube_along_z_segment( hid_t h5_inner_region_group_id, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); virtual ~Inner_region_tube_along_z_segment() {}; void print() { std::cout << "Inner region: name = " << name << std::endl; @@ -294,9 +294,9 @@ class Inner_region_cone_along_z : public Inner_region{ Inner_region_cone_along_z( Config &conf, Inner_region_cone_along_z_config_part &inner_region_conf, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); Inner_region_cone_along_z( hid_t h5_inner_region_group_id, - Spatial_mesh &spat_mesh ); + SpatialMeshCu &spat_mesh ); virtual ~Inner_region_cone_along_z() {}; void print() { std::cout << "Inner region: name = " << name << std::endl; @@ -336,7 +336,7 @@ class Inner_regions_manager{ public: boost::ptr_vector regions; public: - Inner_regions_manager( Config &conf, Spatial_mesh &spat_mesh ) + Inner_regions_manager( Config &conf, SpatialMeshCu &spat_mesh ) { for( auto &inner_region_conf : conf.inner_regions_config_part ){ if( Inner_region_box_config_part *box_conf = @@ -383,7 +383,7 @@ class Inner_regions_manager{ } Inner_regions_manager( hid_t h5_inner_region_group, - Spatial_mesh &spat_mesh ) + SpatialMeshCu &spat_mesh ) { hsize_t nobj; ssize_t len; @@ -418,7 +418,7 @@ class Inner_regions_manager{ } void parse_hdf5_inner_reg( hid_t current_ir_grpid, - Spatial_mesh &spat_mesh ) + SpatialMeshCu &spat_mesh ) { herr_t status; char object_type_cstr[50]; diff --git a/main.cpp b/main.cpp index 6a83786..84669da 100644 --- a/main.cpp +++ b/main.cpp @@ -3,6 +3,7 @@ #include #include #include "config.h" +#include "cuda_runtime.h" #include "domain.h" #include "parse_cmd_line.h" @@ -16,6 +17,8 @@ void extract_filename_prefix_and_suffix_from_h5filename( std::string h5_file, int main( int argc, char *argv[] ) { + cudaError_t cudaStatus; + cudaStatus = cudaSetDevice(0); std::string config_or_h5_file; parse_cmd_line( argc, argv, config_or_h5_file ); diff --git a/particle_to_mesh_map.cpp b/particle_to_mesh_map.cpp index cc2487b..f134e6a 100644 --- a/particle_to_mesh_map.cpp +++ b/particle_to_mesh_map.cpp @@ -1,204 +1,205 @@ #include "particle_to_mesh_map.h" // Eval charge density on grid -void Particle_to_mesh_map::weight_particles_charge_to_mesh( - Spatial_mesh &spat_mesh, Particle_sources_manager &particle_sources ) +void Particle_to_mesh_map::weight_particles_charge_to_mesh( + SpatialMeshCu &spat_mesh, Particle_sources_manager &particle_sources ) { // Rewrite: // forall particles { // find nonzero weights and corresponding nodes // charge[node] = weight(particle, node) * particle.charge // } - double dx = spat_mesh.x_cell_size; - double dy = spat_mesh.y_cell_size; - double dz = spat_mesh.z_cell_size; - double cell_volume = dx * dy * dz; - double volume_around_node = cell_volume; - int tlf_i, tlf_j, tlf_k; // 'tlf' = 'top_left_far' - double tlf_x_weight, tlf_y_weight, tlf_z_weight; +// double dx = spat_mesh.cell_size.x; +// double dy = spat_mesh.cell_size.y; +// double dz = spat_mesh.cell_size.z; +// double cell_volume = dx * dy * dz; +// double volume_around_node = cell_volume; +// int tlf_i, tlf_j, tlf_k; // 'tlf' = 'top_left_far' +// double tlf_x_weight, tlf_y_weight, tlf_z_weight; - for( auto& part_src: particle_sources.sources ) { - for( auto& p : part_src.particles ) { - next_node_num_and_weight( vec3d_x( p.position ), dx, &tlf_i, &tlf_x_weight ); - next_node_num_and_weight( vec3d_y( p.position ), dy, &tlf_j, &tlf_y_weight ); - next_node_num_and_weight( vec3d_z( p.position ), dz, &tlf_k, &tlf_z_weight ); - spat_mesh.charge_density[tlf_i][tlf_j][tlf_k] += - tlf_x_weight * tlf_y_weight * tlf_z_weight - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i-1][tlf_j][tlf_k] += - ( 1.0 - tlf_x_weight ) * tlf_y_weight * tlf_z_weight - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i][tlf_j-1][tlf_k] += - tlf_x_weight * ( 1.0 - tlf_y_weight ) * tlf_z_weight - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i-1][tlf_j-1][tlf_k] += - ( 1.0 - tlf_x_weight ) * ( 1.0 - tlf_y_weight ) * tlf_z_weight - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i][tlf_j][tlf_k - 1] += - tlf_x_weight * tlf_y_weight * ( 1.0 - tlf_z_weight ) - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i-1][tlf_j][tlf_k - 1] += - ( 1.0 - tlf_x_weight ) * tlf_y_weight * ( 1.0 - tlf_z_weight ) - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i][tlf_j-1][tlf_k - 1] += - tlf_x_weight * ( 1.0 - tlf_y_weight ) * ( 1.0 - tlf_z_weight ) - * p.charge / volume_around_node; - spat_mesh.charge_density[tlf_i-1][tlf_j-1][tlf_k - 1] += - ( 1.0 - tlf_x_weight ) * ( 1.0 - tlf_y_weight ) * ( 1.0 - tlf_z_weight ) - * p.charge / volume_around_node; - } - } +// for( auto& part_src: particle_sources.sources ) { +// for( auto& p : part_src.particles ) { +// next_node_num_and_weight( vec3d_x( p.position ), dx, &tlf_i, &tlf_x_weight ); +// next_node_num_and_weight( vec3d_y( p.position ), dy, &tlf_j, &tlf_y_weight ); +// next_node_num_and_weight( vec3d_z( p.position ), dz, &tlf_k, &tlf_z_weight ); +// spat_mesh.charge_density[tlf_i][tlf_j][tlf_k] += +// tlf_x_weight * tlf_y_weight * tlf_z_weight +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i-1][tlf_j][tlf_k] += +// ( 1.0 - tlf_x_weight ) * tlf_y_weight * tlf_z_weight +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i][tlf_j-1][tlf_k] += +// tlf_x_weight * ( 1.0 - tlf_y_weight ) * tlf_z_weight +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i-1][tlf_j-1][tlf_k] += +// ( 1.0 - tlf_x_weight ) * ( 1.0 - tlf_y_weight ) * tlf_z_weight +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i][tlf_j][tlf_k - 1] += +// tlf_x_weight * tlf_y_weight * ( 1.0 - tlf_z_weight ) +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i-1][tlf_j][tlf_k - 1] += +// ( 1.0 - tlf_x_weight ) * tlf_y_weight * ( 1.0 - tlf_z_weight ) +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i][tlf_j-1][tlf_k - 1] += +// tlf_x_weight * ( 1.0 - tlf_y_weight ) * ( 1.0 - tlf_z_weight ) +// * p.charge / volume_around_node; +// spat_mesh.charge_density[tlf_i-1][tlf_j-1][tlf_k - 1] += +// ( 1.0 - tlf_x_weight ) * ( 1.0 - tlf_y_weight ) * ( 1.0 - tlf_z_weight ) +// * p.charge / volume_around_node; +// } +// } return; } -Vec3d Particle_to_mesh_map::field_at_particle_position( - Spatial_mesh &spat_mesh, Particle &p ) +Vec3d Particle_to_mesh_map::field_at_particle_position( + SpatialMeshCu &spat_mesh, Particle &p ) { - double dx = spat_mesh.x_cell_size; - double dy = spat_mesh.y_cell_size; - double dz = spat_mesh.z_cell_size; - int tlf_i, tlf_j, tlf_k; // 'tlf' = 'top_left_far' - double tlf_x_weight, tlf_y_weight, tlf_z_weight; +// double dx = spat_mesh.x_cell_size; +// double dy = spat_mesh.y_cell_size; +// double dz = spat_mesh.z_cell_size; +// int tlf_i, tlf_j, tlf_k; // 'tlf' = 'top_left_far' +// double tlf_x_weight, tlf_y_weight, tlf_z_weight; Vec3d field_from_node, total_field; - // - next_node_num_and_weight( vec3d_x( p.position ), dx, &tlf_i, &tlf_x_weight ); - next_node_num_and_weight( vec3d_y( p.position ), dy, &tlf_j, &tlf_y_weight ); - next_node_num_and_weight( vec3d_z( p.position ), dz, &tlf_k, &tlf_z_weight ); - // tlf +// // +// next_node_num_and_weight( vec3d_x( p.position ), dx, &tlf_i, &tlf_x_weight ); +// next_node_num_and_weight( vec3d_y( p.position ), dy, &tlf_j, &tlf_y_weight ); +// next_node_num_and_weight( vec3d_z( p.position ), dz, &tlf_k, &tlf_z_weight ); +// // tlf total_field = vec3d_zero(); - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j][tlf_k], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // trf - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // blf - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // brf - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // tln - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j][tlf_k-1], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // trn - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k-1], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // bln - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k-1], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // brn - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k-1], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j][tlf_k], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // trf +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // blf +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // brf +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // tln +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j][tlf_k-1], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // trn +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k-1], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // bln +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k-1], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // brn +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k-1], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); // return total_field; } -Vec3d Particle_to_mesh_map::force_on_particle( - Spatial_mesh &spat_mesh, Particle &p ) +Vec3d Particle_to_mesh_map::force_on_particle( + SpatialMeshCu &spat_mesh, Particle &p ) { - double dx = spat_mesh.x_cell_size; - double dy = spat_mesh.y_cell_size; - double dz = spat_mesh.z_cell_size; - int tlf_i, tlf_j, tlf_k; // 'tlf' = 'top_left_far' - double tlf_x_weight, tlf_y_weight, tlf_z_weight; +// double dx = spat_mesh.x_cell_size; +// double dy = spat_mesh.y_cell_size; +// double dz = spat_mesh.z_cell_size; +// int tlf_i, tlf_j, tlf_k; // 'tlf' = 'top_left_far' +// double tlf_x_weight, tlf_y_weight, tlf_z_weight; Vec3d field_from_node, total_field, force; // - next_node_num_and_weight( vec3d_x( p.position ), dx, &tlf_i, &tlf_x_weight ); - next_node_num_and_weight( vec3d_y( p.position ), dy, &tlf_j, &tlf_y_weight ); - next_node_num_and_weight( vec3d_z( p.position ), dz, &tlf_k, &tlf_z_weight ); - // tlf +// next_node_num_and_weight( vec3d_x( p.position ), dx, &tlf_i, &tlf_x_weight ); +// next_node_num_and_weight( vec3d_y( p.position ), dy, &tlf_j, &tlf_y_weight ); +// next_node_num_and_weight( vec3d_z( p.position ), dz, &tlf_k, &tlf_z_weight ); +// // tlf total_field = vec3d_zero(); - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j][tlf_k], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // trf - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // blf - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // brf - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // tln - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j][tlf_k-1], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // trn - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k-1], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // bln - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k-1], - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // brn - field_from_node = vec3d_times_scalar( - spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k-1], - 1.0 - tlf_x_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); - field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); - total_field = vec3d_add( total_field, field_from_node ); - // - force = vec3d_times_scalar( total_field, p.charge ); - return force; +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j][tlf_k], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // trf +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // blf +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // brf +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // tln +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j][tlf_k-1], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // trn +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j][tlf_k-1], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // bln +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i][tlf_j - 1][tlf_k-1], +// tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // brn +// field_from_node = vec3d_times_scalar( +// spat_mesh.electric_field[tlf_i-1][tlf_j-1][tlf_k-1], +// 1.0 - tlf_x_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_y_weight ); +// field_from_node = vec3d_times_scalar( field_from_node, 1.0 - tlf_z_weight ); +// total_field = vec3d_add( total_field, field_from_node ); +// // +// force = vec3d_times_scalar( total_field, p.charge ); +// return force; + return total_field; } void Particle_to_mesh_map::next_node_num_and_weight( diff --git a/particle_to_mesh_map.h b/particle_to_mesh_map.h index 7ad78a4..ca8441b 100644 --- a/particle_to_mesh_map.h +++ b/particle_to_mesh_map.h @@ -1,20 +1,20 @@ -#include "spatial_mesh.h" +#include "SpatialMeshCu.cuh" #include "particle_source.h" #include "particle.h" #include "vec3d.h" class Particle_to_mesh_map { - public: + public: Particle_to_mesh_map() {}; virtual ~Particle_to_mesh_map() {}; public: - void weight_particles_charge_to_mesh( Spatial_mesh &spat_mesh, + void weight_particles_charge_to_mesh( SpatialMeshCu &spat_mesh, Particle_sources_manager &particle_sources ); - Vec3d field_at_particle_position( Spatial_mesh &spat_mesh, Particle &p ); - Vec3d force_on_particle( Spatial_mesh &spat_mesh, Particle &p ); + Vec3d field_at_particle_position( SpatialMeshCu &spat_mesh, Particle &p ); + Vec3d force_on_particle( SpatialMeshCu &spat_mesh, Particle &p ); private: - void next_node_num_and_weight( const double x, const double grid_step, + void next_node_num_and_weight( const double x, const double grid_step, int *next_node, double *weight ); };