Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 26 additions & 24 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ GPUdii() gpuSpan<const Cluster> getClustersOnLayer(const int rof,
}

template <int nLayers>
GPUg() void fitTrackSeedsKernel(
GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel(
CellSeed<nLayers>* trackSeeds,
const TrackingFrameInfo** foundTrackingFrameInfo,
o2::its::TrackITSExt* tracks,
Expand Down Expand Up @@ -374,7 +374,7 @@ GPUg() void fitTrackSeedsKernel(
}

template <bool initRun, int nLayers = 7>
GPUg() void computeLayerCellNeighboursKernel(
GPUg() void __launch_bounds__(256, 1) computeLayerCellNeighboursKernel(
CellSeed<nLayers>** cellSeedArray,
int* neighboursLUT,
int* neighboursIndexTable,
Expand Down Expand Up @@ -438,7 +438,7 @@ GPUg() void computeLayerCellNeighboursKernel(
}

template <bool initRun, int nLayers>
GPUg() void computeLayerCellsKernel(
GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel(
const Cluster** sortedClusters,
const Cluster** unsortedClusters,
const TrackingFrameInfo** tfInfo,
Expand Down Expand Up @@ -525,7 +525,7 @@ GPUg() void computeLayerCellsKernel(
}

template <bool initRun, int nLayers>
GPUg() void computeLayerTrackletsMultiROFKernel(
GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel(
const IndexTableUtils<nLayers>* utils,
const uint8_t* multMask,
const int layerIndex,
Expand Down Expand Up @@ -652,33 +652,35 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
}
}

GPUg() void compileTrackletsLookupTableKernel(const Tracklet* tracklets,
int* trackletsLookUpTable,
const int nTracklets)
GPUg() void __launch_bounds__(256, 1) compileTrackletsLookupTableKernel(
const Tracklet* tracklets,
int* trackletsLookUpTable,
const int nTracklets)
{
for (int currentTrackletIndex = blockIdx.x * blockDim.x + threadIdx.x; currentTrackletIndex < nTracklets; currentTrackletIndex += blockDim.x * gridDim.x) {
atomicAdd(&trackletsLookUpTable[tracklets[currentTrackletIndex].firstClusterIndex], 1);
}
}

template <bool dryRun, int nLayers = 7>
GPUg() void processNeighboursKernel(const int layer,
const int level,
CellSeed<nLayers>** allCellSeeds,
CellSeed<nLayers>* currentCellSeeds,
const int* currentCellIds,
const unsigned int nCurrentCells,
CellSeed<nLayers>* updatedCellSeeds,
int* updatedCellsIds,
int* foundSeedsTable, // auxiliary only in GPU code to compute the number of cells per iteration
const unsigned char** usedClusters, // Used clusters
int* neighbours,
int* neighboursLUT,
const TrackingFrameInfo** foundTrackingFrameInfo,
const float bz,
const float maxChi2ClusterAttachment,
const o2::base::Propagator* propagator,
const o2::base::PropagatorF::MatCorrType matCorrType)
GPUg() void __launch_bounds__(256, 1) processNeighboursKernel(
const int layer,
const int level,
CellSeed<nLayers>** allCellSeeds,
CellSeed<nLayers>* currentCellSeeds,
const int* currentCellIds,
const unsigned int nCurrentCells,
CellSeed<nLayers>* updatedCellSeeds,
int* updatedCellsIds,
int* foundSeedsTable, // auxiliary only in GPU code to compute the number of cells per iteration
const unsigned char** usedClusters, // Used clusters
int* neighbours,
int* neighboursLUT,
const TrackingFrameInfo** foundTrackingFrameInfo,
const float bz,
const float maxChi2ClusterAttachment,
const o2::base::Propagator* propagator,
const o2::base::PropagatorF::MatCorrType matCorrType)
{
constexpr float layerxX0[7] = {5.e-3f, 5.e-3f, 5.e-3f, 1.e-2f, 1.e-2f, 1.e-2f, 1.e-2f}; // Hardcoded here for the moment.
for (unsigned int iCurrentCell = blockIdx.x * blockDim.x + threadIdx.x; iCurrentCell < nCurrentCells; iCurrentCell += blockDim.x * gridDim.x) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,19 +121,19 @@ struct ITSGpuTrackingParamConfig : public o2::conf::ConfigurableParamHelper<ITSG
void maybeOverride() const;

/// Individual kernel launch parameter for each iteration
int nBlocksLayerTracklets[MaxIter] = {30, 30, 30, 30};
int nBlocksLayerTracklets[MaxIter] = {60, 60, 60, 60};
int nThreadsLayerTracklets[MaxIter] = {256, 256, 256, 256};

int nBlocksLayerCells[MaxIter] = {30, 30, 30, 30};
int nBlocksLayerCells[MaxIter] = {60, 60, 60, 60};
int nThreadsLayerCells[MaxIter] = {256, 256, 256, 256};

int nBlocksFindNeighbours[MaxIter] = {30, 30, 30, 30};
int nBlocksFindNeighbours[MaxIter] = {60, 60, 60, 60};
int nThreadsFindNeighbours[MaxIter] = {256, 256, 256, 256};

int nBlocksProcessNeighbours[MaxIter] = {30, 30, 30, 30};
int nBlocksProcessNeighbours[MaxIter] = {60, 60, 60, 60};
int nThreadsProcessNeighbours[MaxIter] = {256, 256, 256, 256};

int nBlocksTracksSeeds[MaxIter] = {30, 30, 30, 30};
int nBlocksTracksSeeds[MaxIter] = {60, 60, 60, 60};
int nThreadsTracksSeeds[MaxIter] = {256, 256, 256, 256};

O2ParamDef(ITSGpuTrackingParamConfig, "ITSGpuTrackingParam");
Expand Down