diff --git a/src/examples/PairHMM/Makefile b/src/examples/PairHMM/Makefile new file mode 100644 index 00000000..9fc875a0 --- /dev/null +++ b/src/examples/PairHMM/Makefile @@ -0,0 +1,19 @@ +CC = gcc +CFLAGS= -O3 -std=c++11 +SHARED_LIBRARIES = -L../../../build/native -lgkl_pairhmm_c -lstdc++ + +all: pairhmm clean + +pairhmm: PairHMMUnitTest.o + $(CC) -o $@ $(SHARED_LIBRARIES) $^ $(CFLAGS) + +PairHMMUnitTest.o: PairHMMUnitTest.cpp PairHMMUnitTest.h + $(CC) -c $(CFLAGS) $< + +.PHONY: clean cleanest + +clean: + rm *.o + +cleanest: clean + rm pairhmm \ No newline at end of file diff --git a/src/examples/PairHMM/PairHMMUnitTest.cpp b/src/examples/PairHMM/PairHMMUnitTest.cpp new file mode 100644 index 00000000..ac8c3d34 --- /dev/null +++ b/src/examples/PairHMM/PairHMMUnitTest.cpp @@ -0,0 +1,238 @@ +// Copyright(c) 2016-2017, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#include +#include +#include +#include +#include +#include +#include "pairhmm_common.h" + +#include "PairHMMUnitTest.h" +#include "shacc_pairhmm.h" + +using namespace std; +using namespace shacc_pairhmm; + +int g_total_pass = 0; +int g_total_fail = 0; +float g_cpu_max_perf = 0; +float g_fpga_max_perf = 0; +long g_cpu_total_time = 0; +long g_fpga_total_time = 0; +long g_total_cells = 0; + + +void initPairHMM(); +void computelikelihoodsfloat(testcase *testcases, float *expected_result); + + +void normalize(string& str, int min_value=0) { + for (int i = 0; i < str.length(); i++) { + str[i] = max(min_value, str[i] - 33); + } +} + +Batch read_batch(istream &is) { + Batch batch; + + is >> batch.num_reads >> batch.num_haps >> ws; + + batch.reads = (Read*)malloc(batch.num_reads * sizeof(Read)); + batch.haps = (Haplotype*)malloc(batch.num_haps * sizeof(Haplotype)); + batch.results = (float*)malloc(sizeof(float) * batch.num_reads * batch.num_haps); + + long total_read_length = 0; + long total_hap_length = 0; + + for (int r = 0; r < batch.num_reads; r++) { + string bases, q, i, d, c; + is >> bases >> q >> i >> d >> c >> ws; + normalize(q, 6); + normalize(i); + normalize(d); + normalize(c); + int length = bases.size(); + total_read_length += length; + + Read* read = &batch.reads[r]; + read->length = length; + read->bases = strndup(bases.c_str(), length); + read->q = strndup(q.c_str(), length); + read->i = strndup(i.c_str(), length); + read->d = strndup(d.c_str(), length); + read->c = strndup(c.c_str(), length); + } + + for (int h = 0; h < batch.num_haps; h++) { + string bases; + is >> bases >> ws; + int length = bases.size(); + total_hap_length += length; + + Haplotype* hap = &batch.haps[h]; + hap->length = length; + hap->bases = strndup(bases.c_str(), length); + } + + batch.num_cells = total_read_length * total_hap_length; + + return batch; +} + +vector read_testfile(string filename) { + istream *is; + ifstream ifs; + if (filename == "") { + printf("Reading test data from stdin"); + is = &std::cin; + } + else { + printf("Reading test data from file: %s\n", filename.c_str()); + ifs.open(filename.c_str()); + if (!ifs.is_open()) { + printf("Cannot open file : %s", filename.c_str()); + exit(0); + } + is = &ifs; + } + + vector batches; + while (!is->eof()) { + Batch batch = read_batch(*is); + batches.push_back(batch); + } + + return batches; +} + + +void check_results(Batch& batch) { + int batch_size = batch.num_reads * batch.num_haps; + vector expected_results(batch_size); + testcase testcases[batch_size]; + + testcase *tc = testcases; + for (int r = 0; r < batch.num_reads; r++) { + for (int h = 0; h < batch.num_haps; h++) { + tc->rslen = batch.reads[r].length; + tc->haplen = batch.haps[h].length; + tc->hap = batch.haps[h].bases; + tc->rs = batch.reads[r].bases; + tc->q = batch.reads[r].q; + tc->i = batch.reads[r].i; + tc->d = batch.reads[r].d; + tc->c = batch.reads[r].c; + tc++; + } + } + + for (int i = 0; i < batch_size; i++) { + computelikelihoodsfloat(&testcases[i], &expected_results[i]); + } + + return; +} + +void time_pairhmm(Batch& batch, int num_pe=0, bool check=false) { + + long batch_cells = 0; + for (int r = 0; r < batch.num_reads; r++) { + for (int h = 0; h < batch.num_haps; h++) { + batch_cells += batch.reads[r].length * batch.haps[h].length; + } + } + + g_total_cells += batch_cells; + + if (check) { + check_results(batch); + } + return; +} + +int main(int argc, char** argv) { + // disable stdout buffering + setbuf(stdout, NULL); + + initPairHMM(); + + ConvertChar::init(); + + vector batches; + + // simple arg passing through env with no checking for now + const char* env_batch = getenv("BATCH"); + int batch = env_batch ? atoi(env_batch) : -1; + + const char* env_check = getenv("CHECK"); + bool check = env_check ? (atoi(env_check) ? true : false) : false; + + #ifdef CPU_ONLY + check = true; + #endif + + const char* env_loop = getenv("LOOP"); + int loop = env_loop ? atoi(env_loop) : 1; + + const char* env_num_pe = getenv("NUM_PE"); + int num_pe = env_num_pe ? atoi(env_num_pe) : 0; + + const char* env_testfile = argv[1]; + + + // display config + printf("Environment variable values:\n"); + printf(" TESTFILE = %s\n", env_testfile); + printf(" BATCH = %s\n", batch > 0 ? env_batch : "undefined (all batches)"); + printf(" NUM_PE = %s\n", num_pe > 0 ? env_num_pe : "undefined (all PEs)"); + printf(" CHECK = %d\n", check); + printf(" LOOP = %d\n", loop); + + batches = read_testfile(env_testfile); + + // run single batch or all batches + if (batch >= 0) { + while (loop) { + // printf("Batch %d", batch); + time_pairhmm(batches[batch], num_pe, check); + loop--; + } + } + else { + while (loop) { + for (int i = 0; i < batches.size(); i++) { + // printf("Batch %d", i); + time_pairhmm(batches[i], num_pe, check); + } + loop--; + } + } + + printf("\nPairHMM completed.\n"); + +} diff --git a/src/examples/PairHMM/PairHMMUnitTest.h b/src/examples/PairHMM/PairHMMUnitTest.h new file mode 100644 index 00000000..80f77346 --- /dev/null +++ b/src/examples/PairHMM/PairHMMUnitTest.h @@ -0,0 +1,143 @@ +// *************************************************************************** +// Copyright (c) 2016-2017, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// *************************************************************************** + +#ifndef PAIRHMM_H +#define PAIRHMM_H + +#define QUOTE(x) #x +#define STR(x) QUOTE(x) + +#define MAX_READ_LENGTH 32*1024 +#define MAX_HAP_LENGTH 32*1024 + +#define MAX_NUM_RESULTS 64*1024*1024 + +#ifndef ROWS +#define ROWS 26 +#endif + +#ifndef COLS +#define COLS 8 +#endif + + +#ifndef DBG +#ifdef DEBUG +#define DBG(M, ...) printf("[AOC-DEBUG] (%s:%d) " M "\n", __FUNCTION__, __LINE__, ##__VA_ARGS__) +#else +#define DBG(M, ...) +#endif +#endif + + +typedef struct { + char base; + char position; + short hap_num; + float y_init; +} HapData; + +typedef struct { + char base; + char position; + short read_num; + float mx; // Pr(match to insert gap) + float my; // Pr(match to delete gap) + float gg; // Pr(gap to gap) + float mm_1m_qual; // Pr(match to match) * (1 - read_qual) + float mm_qual_div3; // Pr(match to match) * read_qual / 3 + float gm_1m_qual; // Pr(gap to match) * (1 - read_qual) + float gm_qual_div3; // Pr(gap to match) * read_qual / 3 +} ReadData; + +typedef struct { + float m, x, y; +} PeData; + +typedef struct { + float m, x; + bool final_result; + bool first_hap; + int read_num; + int hap_num; +} RowResults; + +typedef struct { + HapData hap_row[COLS+1]; + HapData hap_col[ROWS+2]; + ReadData reads[COLS]; + PeData pe_data[2][COLS]; + float result; + bool first_col; + bool last_row; +} PairHmmInputData; + +typedef struct { + unsigned int read_length; + unsigned int hap_length; + float y_init; + unsigned int num_rows; +} PairHmmGlobalControlData; + +typedef struct { + PeData pe_data[2][COLS]; + HapData hap_row[COLS+1]; + float result; +} PairHmmOutputData; + +typedef struct { + int result_read_num; + int result_hap_num; + float result; +} PairHmmResultData; + +typedef struct { + char id[64]; + char version[64]; + int rows; + int cols; +} PairHmmAttributes; + + +void print_pe_data(int r, int c, ReadData read, HapData hap, PeData out) { + read.base = read.base > 32 ? read.base : '.'; + hap.base = hap.base > 32 ? hap.base : '.'; + DBG("PE %d %d %c %c %.6e %.6e %.6e %d %d %d %d %d", + r, c, + read.base, hap.base, // 0, 1 + out.m, out.x, out.y, // 2, 3, 4 + read.position & FIRST, // 5 + read.position & LAST, // 6 + hap.position & FIRST, // 7 + hap.position & LAST, // 8 + read.position & LAST && hap.position & LAST // 9 + ); +} + + +#endif diff --git a/src/examples/PairHMM/README b/src/examples/PairHMM/README new file mode 100644 index 00000000..45a81bea --- /dev/null +++ b/src/examples/PairHMM/README @@ -0,0 +1,9 @@ +In order to run this example using the PairHMM C interfaceAPI, follow the steps as outlined: + +1. make sure you have the library path set by setting LD_LIBRARY_PATH to actual path of .so file in build/native/libgkl_pairhmm_c.so + +2. run make command + +3. pairhmm executable will be generated + +4. Point it to the input test via command line argumentsgit \ No newline at end of file diff --git a/src/examples/PairHMM/pairhmm_common.h b/src/examples/PairHMM/pairhmm_common.h new file mode 100644 index 00000000..f996c3c3 --- /dev/null +++ b/src/examples/PairHMM/pairhmm_common.h @@ -0,0 +1,46 @@ +#ifndef PAIRHMM_COMMON_H +#define PAIRHMM_COMMON_H + +#if defined(_MSC_VER) + #include // SIMD intrinsics for Windows +#else + #include // SIMD intrinsics for GCC +#endif + +#include +#include + +#define CAT(X,Y) X##Y +#define CONCAT(X,Y) CAT(X,Y) + +#define MIN_ACCEPTED 1e-28f +#define NUM_DISTINCT_CHARS 5 +#define AMBIG_CHAR 4 + +typedef struct { + int rslen, haplen; + const char *q, *i, *d, *c; + const char *hap, *rs; +} testcase; + +class ConvertChar { + static uint8_t conversionTable[255] ; + + public: + static void init() { + assert (NUM_DISTINCT_CHARS == 5) ; + assert (AMBIG_CHAR == 4) ; + + conversionTable['A'] = 0 ; + conversionTable['C'] = 1 ; + conversionTable['T'] = 2 ; + conversionTable['G'] = 3 ; + conversionTable['N'] = 4 ; + } + + static inline uint8_t get(uint8_t input) { + return conversionTable[input] ; + } +}; + +#endif // PAIRHMM_COMMON_H diff --git a/src/examples/PairHMM/shacc_pairhmm.h b/src/examples/PairHMM/shacc_pairhmm.h new file mode 100644 index 00000000..2f6e0b01 --- /dev/null +++ b/src/examples/PairHMM/shacc_pairhmm.h @@ -0,0 +1,38 @@ +#ifndef SHACC_PAIRHMM_H +#define SHACC_PAIRHMM_H + +#ifdef __APPLE__ +#define WEAK __attribute__((weak_import)) +#else +#define WEAK __attribute__((weak)) +#endif + +namespace shacc_pairhmm { + + struct Read { + int length; + const char* bases; + const char* q; + const char* i; + const char* d; + const char* c; + }; + + struct Haplotype { + int length; + const char* bases; + }; + + struct Batch { + int num_reads; + int num_haps; + long num_cells; + Read* reads; + Haplotype* haps; + float* results; + }; + + extern WEAK bool calculate(Batch& batch); +} + +#endif diff --git a/src/examples/SmithWaterman/Makefile b/src/examples/SmithWaterman/Makefile new file mode 100644 index 00000000..9cc9b14b --- /dev/null +++ b/src/examples/SmithWaterman/Makefile @@ -0,0 +1,19 @@ +CC = g++ +CFLAGS= -O3 +SHARED_LIBRARIES = -L../../../build/native -lgkl_smithwaterman_c + +all: swpairwise clean + +swpairwise: SmithWatermanUnitTest.o + $(CC) -o $@ $(SHARED_LIBRARIES) $^ $(CFLAGS) + +SmithWatermanUnitTest.o: SmithWatermanUnitTest.c SmithWatermanUnitTest.h + $(CC) -c $(CFLAGS) $< + +.PHONY: clean cleanest + +clean: + rm *.o + +cleanest: clean + rm swpairwise diff --git a/src/examples/SmithWaterman/README b/src/examples/SmithWaterman/README new file mode 100644 index 00000000..c6c5b463 --- /dev/null +++ b/src/examples/SmithWaterman/README @@ -0,0 +1,10 @@ +In order to run this example using the SmithWaterman C interfaceAPI, follow the steps as outlined: + +1. make sure you have the library path set by setting LD_LIBRARY_PATH to actual path of .so file in build/native/libgkl_smithwaterman_c.so + +2. run make command + +3. swpairwise executable will be generated + +4. Point it to the input test via command line arguments + diff --git a/src/examples/SmithWaterman/SmithWatermanUnitTest.c b/src/examples/SmithWaterman/SmithWatermanUnitTest.c new file mode 100644 index 00000000..548a9c39 --- /dev/null +++ b/src/examples/SmithWaterman/SmithWatermanUnitTest.c @@ -0,0 +1,189 @@ +#include +#include"SmithWatermanUnitTest.h" + +#define DEFAULT_MATCH 1 +#define DEFAULT_MISMATCH -1 +#define DEFAULT_OPEN 1 +#define DEFAULT_EXTEND 1 + +int32_t w_match, w_mismatch, w_open, w_extend; +int8_t bt = 0; +char *pairFileName; +int64_t SW_cells = 0; + +void parseCmdLine(int argc, char *argv[]) +{ + int i; + w_match = DEFAULT_MATCH; + w_mismatch = DEFAULT_MISMATCH; + w_open = DEFAULT_OPEN; + w_extend = DEFAULT_EXTEND; + + int pairFlag = 0; + for(i = 1; i < argc; i+=2) + { + if(strcmp(argv[i], "-match") == 0) + { + w_match = atoi(argv[i + 1]); + } + if(strcmp(argv[i], "-mismatch") == 0) + { + w_mismatch = atoi(argv[i + 1]); + } + if(strcmp(argv[i], "-gapo") == 0) + { + w_open = atoi(argv[i + 1]); + } + if(strcmp(argv[i], "-gape") == 0) + { + w_extend = atoi(argv[i + 1]); + } + if(strcmp(argv[i], "-pairs") == 0) + { + pairFileName = argv[i + 1]; + pairFlag = 1; + } + if(strcmp(argv[i], "-bt") == 0) + { + bt = atoi(argv[i + 1]); + } + } + if(pairFlag == 0) + { + printf("ERROR! pairFileName not specified.\n"); + exit(0); + } +} + +int loadPairs(SeqPair *seqPairArray) + { + FILE *pairFile = fopen(pairFileName, "r"); + + if(pairFile == NULL) + { + fprintf(stderr, "Could not open file: %s\n", pairFileName); + exit(0); + } + + uint8_t *seqBuf = (uint8_t *)_mm_malloc(MAX_SEQ_LEN * 2 * MAX_NUM_PAIRS * sizeof(int8_t), 64); + char overhangStrategy[20]; + + int32_t numPairs = 0; + while(numPairs < MAX_NUM_PAIRS) + { + if(!fgets((char *)(seqBuf + numPairs * 2 * MAX_SEQ_LEN), MAX_SEQ_LEN, pairFile)) + { + break; + } + if(!fgets((char *)(seqBuf + (numPairs * 2 + 1) * MAX_SEQ_LEN), MAX_SEQ_LEN, pairFile)) + { + printf("ERROR! Odd number of sequences in %s\n", pairFileName); + break; + } + if(!fgets(overhangStrategy, 20, pairFile)) + { + printf("ERROR! Overhang Strategy value of the last sequence is not available %s\n", pairFileName); + break; + } + + SeqPair sp; + // sp.id = numPairs; + sp.seq1 = seqBuf + numPairs * 2 * MAX_SEQ_LEN; + sp.seq2 = seqBuf + (numPairs * 2 + 1) * MAX_SEQ_LEN; + sp.len1 = strnlen((char *)(seqBuf + numPairs * 2 * MAX_SEQ_LEN), MAX_SEQ_LEN) - 1; + sp.len2 = strnlen((char *)(seqBuf + (numPairs * 2 + 1) * MAX_SEQ_LEN), MAX_SEQ_LEN) - 1; + + if(strncmp(overhangStrategy, "SOFTCLIP", 8) == 0) + { + sp.overhangStrategy = SOFTCLIP; + } + else if(strncmp(overhangStrategy, "INDEL", 5) == 0) + { + sp.overhangStrategy = INDEL; + } + else if(strncmp(overhangStrategy, "LEADING_INDEL", 13) == 0) + { + sp.overhangStrategy = LEADING_INDEL; + } + else if(strncmp(overhangStrategy, "IGNORE", 8) == 0) + { + sp.overhangStrategy = IGNORE; + } + else + { + sp.overhangStrategy = IGNORE; + } + //printf("len1 = %d, len2 = %d\n", sp.len1, sp.len2); + //printf("%s\n", sp.seq1); + sp.score = 0; + seqPairArray[numPairs] = sp; + numPairs++; + SW_cells += (sp.len1 * sp.len2); + } + if(numPairs == MAX_NUM_PAIRS) + { + printf("Reached max limit of number of pairs that can be processed." + "\nPotentially there are few more pairs left to be processed\n"); + } + fclose(pairFile); + return numPairs; + } + + +int main(int argc, char *argv[]) +{ + + parseCmdLine(argc, argv); + SeqPair *seqPairArray = (SeqPair *)_mm_malloc(MAX_NUM_PAIRS * sizeof(SeqPair), 64); + int32_t numPairs = loadPairs(seqPairArray); + char *cigarArray = (char *)_mm_malloc(4 * MAX_SEQ_LEN * numPairs * sizeof(char), 64); + int i; + initNative(); + + for(i = 0; i < numPairs; i++) + { + char *myCigarArray = cigarArray + i * 4 * MAX_SEQ_LEN; + SeqPair *p = seqPairArray + i; + p->cigar = myCigarArray; + + align(p->seq1, p->seq2, p->cigar, w_match, w_mismatch, w_open, w_extend, p->overhangStrategy, &(p->score), &(p->alignmentOffset)); + + } +#if 1 + if(bt) + { + for(i = 0; i < numPairs; i++) + { + printf("%s", seqPairArray[i].cigar); +#if 0 + int32_t cigarCount = seqPairArray[i].cigarCount; + int16_t *cigarArray = seqPairArray[i].cigar; + int j; + for(j = cigarCount; j >= 0; j--) + { + printf("%d", cigarArray[2 * j + 1]); + switch(cigarArray[2 * j]) + { + case MATCH: + printf("M"); + break; + case INSERT: + printf("I"); + break; + case DELETE: + printf("D"); + break; + case SOFTCLIP: + printf("S"); + break; + default: + printf("R"); + break; + } + } +#endif + printf(" %d\n", seqPairArray[i].alignmentOffset); + } + } +#endif +} diff --git a/src/examples/SmithWaterman/SmithWatermanUnitTest.h b/src/examples/SmithWaterman/SmithWatermanUnitTest.h new file mode 100644 index 00000000..66493960 --- /dev/null +++ b/src/examples/SmithWaterman/SmithWatermanUnitTest.h @@ -0,0 +1,65 @@ +#ifndef SMITHWATERMAN_COMMON_H +#define SMITHWATERMAN_COMMON_H + +#if defined(_MSC_VER) + #include // SIMD intrinsics for Windows +#else + #include // SIMD intrinsics for GCC +#endif + +#define __STDC_LIMIT_MACROS + +#include +#include +#include + + + +#define CAT(X,Y) X##Y +#define CONCAT(X,Y) CAT(X,Y) + +#define MATCH 0 +#define INSERT 1 +#define DELETE 2 +#define INSERT_EXT 4 +#define DELETE_EXT 8 +#define SOFTCLIP 9 +#define INDEL 10 +#define LEADING_INDEL 11 +#define IGNORE 12 + +typedef struct dnaSeqPair +{ + int32_t id; + uint8_t *seq1; + uint8_t *seq2; + int16_t len1, len2; + int8_t overhangStrategy; + int32_t score; + char *cigar; + int16_t cigarCount; + int16_t alignmentOffset; +}SeqPair; + +//the maximum DNA sequence length +#define MAX_SEQ_LEN 1024 +#define MAX_NUM_PAIRS 80000 +#define MATRIX_MIN_CUTOFF -100000000 +#define LOW_INIT_VALUE (INT32_MIN/2) +#define max(x, y) ((x)>(y)?(x):(y)) +#define min(x, y) ((x)<(y)?(x):(y)) +#define DUMMY1 'B' +#define DUMMY2 'D' + +int32_t w_match; + int32_t w_mismatch; + int32_t w_open; + int32_t w_extend; + + int32_t *E_; + int16_t *backTrack_; + int16_t *cigarBuf_; + + +//extern void align(char* ref, char* alt, char*cigar, int match, int mismatch, int open, int extend, int strategy, int score, int offset); +#endif \ No newline at end of file diff --git a/src/main/native/compression/IntelDeflater.cc b/src/main/native/compression/IntelDeflater.cc index 0b8b6971..42728740 100644 --- a/src/main/native/compression/IntelDeflater.cc +++ b/src/main/native/compression/IntelDeflater.cc @@ -146,36 +146,6 @@ JNIEXPORT void JNICALL Java_com_intel_gkl_compression_IntelDeflater_resetNative } -/** -* Generate Dynamic Huffman tables -* This function will be called only if we are implementing the fully dynamic huffman implementation which is not set as default in current implementation -* current implementation we use semi-dynamic huffman with tables generated using only first 64k block of stream -*/ - -JNIEXPORT void JNICALL Java_com_intel_gkl_compression_IntelDeflater_generateHuffman -(JNIEnv * env, jobject obj) { - - jbyteArray inputBuffer = (jbyteArray)env->GetObjectField(obj, FID_inputBuffer); - - jint level = env->GetIntField(obj, FID_level); - - if(level == 1) { - isal_zstream* lz_stream = (isal_zstream*)env->GetLongField(obj, FID_lz_stream); - - jbyte* input = (jbyte*)env->GetPrimitiveArrayCritical(inputBuffer, 0); - - struct isal_huff_histogram *histogram; - struct isal_hufftables *hufftables_custom; - - memset(histogram, 0, sizeof(isal_huff_histogram)); - isal_update_histogram((unsigned char*)input, 64*1024, histogram); - isal_create_hufftables(hufftables_custom, histogram); - lz_stream->hufftables = hufftables_custom; - - env->SetLongField(obj, FID_lz_stream, (jlong)lz_stream); - env->ReleasePrimitiveArrayCritical(inputBuffer, input, 0); - } -} /** * Deflate the data. diff --git a/src/main/native/pairhmm/CMakeLists.txt b/src/main/native/pairhmm/CMakeLists.txt index 2885f207..e73ae816 100644 --- a/src/main/native/pairhmm/CMakeLists.txt +++ b/src/main/native/pairhmm/CMakeLists.txt @@ -49,3 +49,13 @@ set(TARGET gkl_pairhmm_fpga) add_library(${TARGET} SHARED IntelPairHmm.cc avx_impl.cc avx512_impl.cc pairhmm_common.cc) target_link_libraries(${TARGET} gkl_pairhmm_shacc) install(TARGETS ${TARGET} DESTINATION ${CMAKE_BINARY_DIR}) + +#--------------------------------------------------------------------- +# pairhmm_c +#--------------------------------------------------------------------- +set(TARGET gkl_pairhmm_c) +add_library(${TARGET} SHARED IntelPairHmmCSource.cpp avx_impl.cc avx512_impl.cc pairhmm_common.cc) +if (APPLE) + target_link_libraries(${TARGET} gkl_pairhmm_shacc) +endif() +install(TARGETS ${TARGET} DESTINATION ${CMAKE_BINARY_DIR}) \ No newline at end of file diff --git a/src/main/native/pairhmm/IntelPairHmmCSource.cpp b/src/main/native/pairhmm/IntelPairHmmCSource.cpp new file mode 100644 index 00000000..91eb4401 --- /dev/null +++ b/src/main/native/pairhmm/IntelPairHmmCSource.cpp @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#include "IntelPairHmm.h" +#include "pairhmm_common.h" +#include "avx_impl.h" +#ifndef __APPLE__ + #include "avx512_impl.h" +#endif +#include "Context.h" +#include"debug.h" +bool g_use_double; +int g_max_threads; +bool g_use_fpga; + +Context g_ctxf; +Context g_ctxd; + +float (*g_compute_full_prob_float)(testcase *tc); +double (*g_compute_full_prob_double)(testcase *tc); + +/* + * Method: initNative + */ +void initPairHMM() +{ + // set function pointers + if(is_avx512_supported()) + { +#ifndef __APPLE__ + DBG("Using CPU-supported AVX-512 instructions"); + g_compute_full_prob_float = compute_fp_avx512s; + g_compute_full_prob_double = compute_fp_avx512d; +#else + assert(false); +#endif + } + else + { + g_compute_full_prob_float = compute_fp_avxs; + g_compute_full_prob_double = compute_fp_avxd; + } + // init convert char table + ConvertChar::init(); +} + +/* Computelikelihoodsfloat method takes in one vector of input and computes the pHMM result float */ + +void computelikelihoodsfloat(testcase *testcases, float *expected_results) +{ + + float result_final = 0; + + float result_float = g_compute_full_prob_float(testcases); + + result_final = (double)(log10f(result_float) - g_ctxf.LOG10_INITIAL_CONSTANT); + + (*expected_results) = (float)result_final; + + return; + +} + +/* Computelikelihoodsdouble method takes in one vector of input and computes the pHMM result double */ + +void computelikelihoodsdouble(testcase *testcases, double *expected_results) +{ + + double result_final = 0; + double result_double = g_compute_full_prob_double(testcases); + result_final = log10(result_double) - g_ctxd.LOG10_INITIAL_CONSTANT; + + (*expected_results) = (double)result_final; + + return; + +} + diff --git a/src/main/native/smithwaterman/CMakeLists.txt b/src/main/native/smithwaterman/CMakeLists.txt index ee06c354..4fa520c2 100644 --- a/src/main/native/smithwaterman/CMakeLists.txt +++ b/src/main/native/smithwaterman/CMakeLists.txt @@ -19,9 +19,14 @@ include_directories(../common) #--------------------------------------------------------------------- set(TARGET gkl_smithwaterman) - add_library(${TARGET} SHARED IntelSmithWaterman.cc avx2_impl.cc avx512_impl.cc) install(TARGETS ${TARGET} DESTINATION ${CMAKE_BINARY_DIR}) +set(TARGET gkl_smithwaterman_c) + +add_library(${TARGET} SHARED + IntelSmithWatermanCSource.cpp avx2_impl.cc avx512_impl.cc) + +install(TARGETS ${TARGET} DESTINATION ${CMAKE_BINARY_DIR}) \ No newline at end of file diff --git a/src/main/native/smithwaterman/IntelSmithWaterman.cc b/src/main/native/smithwaterman/IntelSmithWaterman.cc index 09cabab8..40768102 100644 --- a/src/main/native/smithwaterman/IntelSmithWaterman.cc +++ b/src/main/native/smithwaterman/IntelSmithWaterman.cc @@ -20,7 +20,7 @@ static jfieldID FID_reflength; static jfieldID FID_altlength; -int32_t (*g_runSWOnePairBT)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount); +int32_t (*g_runSWOnePairBT)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount, int16_t *score); JNIEXPORT void JNICALL Java_com_intel_gkl_smithwaterman_IntelSmithWaterman_initNative (JNIEnv * env, jclass obj ) @@ -58,9 +58,10 @@ JNIEXPORT jint JNICALL Java_com_intel_gkl_smithwaterman_IntelSmithWaterman_align jint count = 0; jint offset = 0; + jint score =0; // call the low level routine - offset = g_runSWOnePairBT(match, mismatch, open, extend,(uint8_t*) reference, (uint8_t*) alternate,refLength, altLength, strategy, (char *) cigarArray, (int16_t*) &count); + offset = g_runSWOnePairBT(match, mismatch, open, extend,(uint8_t*) reference, (uint8_t*) alternate,refLength, altLength, strategy, (char *) cigarArray, (int16_t*) &count, (int16_t*) &score); // release buffers env->ReleasePrimitiveArrayCritical(ref, reference, 0); diff --git a/src/main/native/smithwaterman/IntelSmithWatermanCSource.cpp b/src/main/native/smithwaterman/IntelSmithWatermanCSource.cpp new file mode 100644 index 00000000..cfbae9a4 --- /dev/null +++ b/src/main/native/smithwaterman/IntelSmithWatermanCSource.cpp @@ -0,0 +1,60 @@ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include "avx2_impl.h" +#ifndef __APPLE__ + #include "avx512_impl.h" +#endif + +#include "IntelSmithWatermanCSource.h" +#include "smithwaterman_common.h" + +int32_t (*g_runSWOnePairBT)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount, int16_t *score); + + +void initNative() +{ + +if(is_avx512_supported()) + { + #ifndef __APPLE__ + DBG("Using CPU-supported AVX-512 instructions"); + g_runSWOnePairBT = runSWOnePairBT_fp_avx512; + + #else + assert(false); + #endif + } + else + { + g_runSWOnePairBT = runSWOnePairBT_fp_avx2; + } + return; +} + +/* performns smithwaterman alignment on one pair of ref and alt vector and computes, score and cigar string */ + +void align(char* ref, char* alt, char*cigar, int match, int mismatch, int open, int extend, int strategy, int *score, int *offset) +{ + + int count = 0; + int alignmentoffset =0; + + int refLength = strlen(ref); + int altLength = strlen(alt); + + (*offset) = g_runSWOnePairBT(match, mismatch, open, extend,(uint8_t*) ref, (uint8_t*) alt,refLength, altLength, strategy, (char *) cigar, (int16_t*) &count, (int16_t*) &score); + + + return; +} + + + diff --git a/src/main/native/smithwaterman/IntelSmithWatermanCSource.h b/src/main/native/smithwaterman/IntelSmithWatermanCSource.h new file mode 100644 index 00000000..c254dfbc --- /dev/null +++ b/src/main/native/smithwaterman/IntelSmithWatermanCSource.h @@ -0,0 +1,11 @@ +#ifdef __cplusplus +extern "C" { +#endif + +void initNative(); +void align(char* ref, char* alt, char*cigar, int match, int mismatch, int open, int extend, int strategy, int *score, int *offset); + + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/src/main/native/smithwaterman/PairWiseSW.h b/src/main/native/smithwaterman/PairWiseSW.h index 8c7a7708..dbc76851 100644 --- a/src/main/native/smithwaterman/PairWiseSW.h +++ b/src/main/native/smithwaterman/PairWiseSW.h @@ -414,12 +414,10 @@ void inline getCIGAR(SeqPair *p, int16_t *cigarBuf_, int32_t tid) } -int32_t CONCAT(runSWOnePairBT_,SIMD_ENGINE)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount) +int32_t CONCAT(runSWOnePairBT_,SIMD_ENGINE)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount, int16_t *score) { - - - int32_t w_match = match; + int32_t w_match = match; int32_t w_mismatch = mismatch; int32_t w_open = open; int32_t w_extend = extend; @@ -430,6 +428,7 @@ int32_t CONCAT(runSWOnePairBT_,SIMD_ENGINE)(int32_t match, int32_t mismatch, int SeqPair p; + p.seq1 = seq1; p.seq2 = seq2; p.len1 = len1; @@ -440,10 +439,12 @@ int32_t CONCAT(runSWOnePairBT_,SIMD_ENGINE)(int32_t match, int32_t mismatch, int smithWatermanBackTrack(&p, match, mismatch, open, extend, E_, 0); getCIGAR(&p, cigarBuf_, 0); + (*score)= p.score; + (*cigarCount) = p.cigarCount; free(E_); free(backTrack_); free(cigarBuf_); - return p.alignmentOffset; - } + return p.alignmentOffset;; +} diff --git a/src/main/native/smithwaterman/avx2_impl.cc b/src/main/native/smithwaterman/avx2_impl.cc index 26708574..c3202d2f 100644 --- a/src/main/native/smithwaterman/avx2_impl.cc +++ b/src/main/native/smithwaterman/avx2_impl.cc @@ -2,6 +2,6 @@ #include "avx2-smithwaterman.h" -int32_t (*runSWOnePairBT_fp_avx2)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount)= &runSWOnePairBT_avx2; +int32_t (*runSWOnePairBT_fp_avx2)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount, int16_t *score)= &runSWOnePairBT_avx2; diff --git a/src/main/native/smithwaterman/avx2_impl.h b/src/main/native/smithwaterman/avx2_impl.h index c06178a5..db54d5ff 100644 --- a/src/main/native/smithwaterman/avx2_impl.h +++ b/src/main/native/smithwaterman/avx2_impl.h @@ -3,7 +3,7 @@ #include "smithwaterman_common.h" -extern int32_t (*runSWOnePairBT_fp_avx2)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount); +extern int32_t (*runSWOnePairBT_fp_avx2)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount, int16_t *score); #endif //AVX2_IMPL_H diff --git a/src/main/native/smithwaterman/avx512_impl.cc b/src/main/native/smithwaterman/avx512_impl.cc index d008825a..e65e2a30 100644 --- a/src/main/native/smithwaterman/avx512_impl.cc +++ b/src/main/native/smithwaterman/avx512_impl.cc @@ -4,12 +4,11 @@ #include "avx512-smithwaterman.h" -int32_t (*runSWOnePairBT_fp_avx512)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount)= &runSWOnePairBT_avx512; - +int32_t (*runSWOnePairBT_fp_avx512)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount, int16_t *score)= &runSWOnePairBT_avx512; #else -int32_t (*runSWOnePairBT_fp_avx512)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount)= NULL; +int32_t (*runSWOnePairBT_fp_avx512)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount, int16_t *score)= NULL; #endif diff --git a/src/main/native/smithwaterman/avx512_impl.h b/src/main/native/smithwaterman/avx512_impl.h index 0894d159..31562525 100644 --- a/src/main/native/smithwaterman/avx512_impl.h +++ b/src/main/native/smithwaterman/avx512_impl.h @@ -3,7 +3,7 @@ #include "smithwaterman_common.h" -extern int32_t (*runSWOnePairBT_fp_avx512)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount); +extern int32_t (*runSWOnePairBT_fp_avx512)(int32_t match, int32_t mismatch, int32_t open, int32_t extend,uint8_t *seq1, uint8_t *seq2, int32_t len1, int32_t len2, int8_t overhangStrategy, char *cigarArray, int16_t *cigarCount, int16_t *score); #endif //AVX512_IMPL_H