From dc169e1f8ab077b64ccc3b2dfeec89ea823db80c Mon Sep 17 00:00:00 2001 From: Muhammad Saad Shamim Date: Sun, 31 Mar 2024 18:40:33 -0500 Subject: [PATCH 1/4] Update README.md --- C++/PowerMethod/README.md | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/C++/PowerMethod/README.md b/C++/PowerMethod/README.md index cb79b76..a456255 100644 --- a/C++/PowerMethod/README.md +++ b/C++/PowerMethod/README.md @@ -3,23 +3,20 @@ Very fast and memory efficient functions for the computation of the principal ei There are two ways to read the input file: either from hic file or from an ASCII file produced by juicer tools dump. **Reading from hic file:** -This package requires straw.cpp and straw.h from Aiden Lab ( https://github.com/aidenlab/straw/tree/master/C%2B%2B ). +This package requires `straw.cpp` and `straw.h` from Aiden Lab ( https://github.com/aidenlab/straw/tree/master/C%2B%2B ). To create the executable, make sure that you are using version 7 of gcc and g++, go to the PowerMethod folder and do: -**g++ -O2 -o ev.exe theEigenVector_flip_new.cpp theBestEigen.c thdMul.c hgFlipSign.c STRAW/C++/straw.cpp -I . -I STRAW/C++ -lz -lcurl -lpthread** -where STRAW is the folder containing straw (May 2022 version). -This will create an executable file **ev.exe**. -Run -./ev.exe -or -./ev.exe -h -to see the usage and options. This way you can create an eigenvector for a particular chromosome. + +`g++ -std=c++11 -O2 -o ev.exe theEigenVector_flip_new.cpp theBestEigen.c thdMul.c hgFlipSign.c STRAW/C++/straw.cpp -I . -I STRAW/C++ -lz -lcurl -lpthread` + +where STRAW is the folder containing straw (March 2024 version). +This will create an executable file `ev.exe`. +Run `./ev.exe` or `./ev.exe -h` to see the usage and options. This way you can create an eigenvector for a particular chromosome. To produce eigenvector for every chromosome in the hic file in the wiggle (wig) format, do: -**g++ -O2 -o GWevIntra.exe GWevIntra_new.cpp theBestEigen.c thdMul.c hgFlipSign.c STRAW/C++/straw.cpp -I . -I STRAW/C++ -lz -lcurl -lpthread** -Run -./GWevIntra.exe -to see te usage +`g++ -std=c++11 -O2 -o GWevIntra.exe GWevIntra_new.cpp theBestEigen.c thdMul.c hgFlipSign.c STRAW/C++/straw.cpp -I . -I STRAW/C++ -lz -lcurl -lpthread` + +Run `./GWevIntra.exe` to see the usage **Note:** if run on hg19 or hg38 genome with a resolution which is a divisor of 100,000, these two functions will flip the sign of the eigenvector if necessary so that positive sign corresponds to A and negative sighn to B compartment. @@ -27,13 +24,10 @@ to see te usage First of all run juicer tools dump to produce the file, say **dump.out**. Here you can choose either observed or oe (observed over expected) and choose the desired normalization. To create the ececutable do: -**g++ -O2 -o mainEigen.exe mainEigen.c theBestEigen.c thdMul.c -lpthread** +`g++ -O2 -o mainEigen.exe mainEigen.c theBestEigen.c thdMul.c -lpthread` this will create an executable file **mainEigen.exe**. -Run -./mainEigen.exe -or -./mainEigen.exe -h -to see the usage and options. +Run `./mainEigen.exe` or `./mainEigen.exe -h` to see the usage and options. + Since we are not reading hic file there are several new options: -**-m** an upper bound on the number of records (can get the exact value by 'wc -l infile') -**-s** the size of the chromosome (in bp); if specified, the matrix is assumed to be of dimension n by n where n is (chomosome size)/binsize rounded up; if not specified, n will be inferred from the data and may be slightly smaller than the true one. +`-m` an upper bound on the number of records (can get the exact value by 'wc -l infile') +`-s` the size of the chromosome (in bp); if specified, the matrix is assumed to be of dimension n by n where n is (chomosome size)/binsize rounded up; if not specified, n will be inferred from the data and may be slightly smaller than the true one. From 10d376e6327ad4b89f1cd2b17d7ab679bf84160b Mon Sep 17 00:00:00 2001 From: Muhammad Saad Shamim Date: Sun, 31 Mar 2024 18:41:04 -0500 Subject: [PATCH 2/4] Minor edit for isnan --- C++/PowerMethod/GWevIntra_new.cpp | 4 ++-- C++/PowerMethod/theEigenVector_flip_new.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/C++/PowerMethod/GWevIntra_new.cpp b/C++/PowerMethod/GWevIntra_new.cpp index bbcaabb..6a22938 100644 --- a/C++/PowerMethod/GWevIntra_new.cpp +++ b/C++/PowerMethod/GWevIntra_new.cpp @@ -143,7 +143,7 @@ int main(int argc, char *argv[]) { xx = (double *) malloc(m*sizeof(double)); p=0; for (q=0; qsecond.length % binsize, itr->second.length % binsize ); - if (!isnan(ev[j])) fprintf(fout,"%f\n",ev[j]); + if (!std::isnan(ev[j])) fprintf(fout,"%f\n",ev[j]); else fprintf(fout,"%s\n","0"); // else fprintf(fout,"%s\n","NaN"); } diff --git a/C++/PowerMethod/theEigenVector_flip_new.cpp b/C++/PowerMethod/theEigenVector_flip_new.cpp index 93f60c3..5ac2826 100644 --- a/C++/PowerMethod/theEigenVector_flip_new.cpp +++ b/C++/PowerMethod/theEigenVector_flip_new.cpp @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) { ii[k] = records[k].binX/binsize; jj[k] = records[k].binY/binsize; xx[k] = (double) records[k].counts; - if (isnan(xx[k])) xx[k] = 0; + if (std::isnan(xx[k])) xx[k] = 0; } clock_gettime(CLOCK_REALTIME,&t1); if (verb) printf("took %10.3f seconds for %ld records\n",((double) (t1.tv_sec - t0.tv_sec)) + ((double) (t1.tv_nsec - t0.tv_nsec))/1e9,nonZer); @@ -177,7 +177,7 @@ int main(int argc, char *argv[]) { if (strcmp(chr1,"chrY")!=0 && strcmp(chr1,"chrM")!=0 && (100000 % binsize == 0)) int junk = flipSign(genome1,ev,N,chr1,binsize); for (int j=0;j Date: Thu, 29 May 2025 14:17:57 -0400 Subject: [PATCH 3/4] bug fixes, updates --- .gitignore | 1 + C++/Lanczos/README.md | 107 +++++++++++++++++++++++++--------- C++/Lanczos/build_linux.sh | 68 +++++++++++++++++++++ C++/Lanczos/build_mac.sh | 69 ++++++++++++++++++++++ C++/Lanczos/build_windows.bat | 67 +++++++++++++++++++++ C++/Lanczos/getGWMatrix.cpp | 12 ++-- C++/Lanczos/s_dthMul.c | 1 + C++/Lanczos/s_fGW.cpp | 23 ++++++-- C++/Lanczos/s_fLan.cpp | 38 +++++++----- 9 files changed, 331 insertions(+), 55 deletions(-) create mode 100644 .gitignore create mode 100644 C++/Lanczos/build_linux.sh create mode 100755 C++/Lanczos/build_mac.sh create mode 100644 C++/Lanczos/build_windows.bat diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b883f1f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.exe diff --git a/C++/Lanczos/README.md b/C++/Lanczos/README.md index 97b7815..ea333b5 100644 --- a/C++/Lanczos/README.md +++ b/C++/Lanczos/README.md @@ -1,28 +1,79 @@ -**Lanczos Method with Selective Ortohanalization** - -These functions need liblapacke (and hence libblas and liblapack). To install them do: -sudo apt update -sudo apt install libopenblas-base -sudo apt install libopenblas-dev -sudo apt install liblapack3 -sudo apt install liblapack-dev -sudo apt install liblapacke-dev - -Then make sure that you are using version 7 of gcc and g++ - -**Rmark:** In my case the folder containing **straw** is **~/HiC/straw_may_2022**. The user should replace it by their own folder. - -To create an executable for computing few leading eigenvectors of the correlation matrix of contact matrix for a particular chromosome do: -**g++ -O2 -o Lan.exe s_fLan.cpp s_fSOLan.c s_dthMul.c hgFlipSign.c ~/HiC/straw_may_2022/C++/straw.cpp -I. -I ~/HiC/straw_may_2022/C++ -lz -lcurl -lpthread -lblas -llapack -llapacke** -Run -./Lan.exe -to see usage. -By default it uses unnormalized observed over expected (o/e) matrix. -Use -o flag to use observed matrix instead (usually not recommended) -Use -n norm to use normalized matrix; norm can be NONE (no normalization - default), VC, VC_SQRT, KR, SCALE, SCALA, etc. - -To do the above for Genome Wide (GW) contact matrix do: -**g++ -O2 -o GWev.exe s_fGW.cpp getGWMatrix.cpp s_fSOLan.c s_dthMul.c ~/HiC/straw_may_2022/C++/straw.cpp -I ~/HiC/straw_may_2022/C++ -lz -lcurl -lpthread -lblas -llapack -llapacke** -Run -./GWev.exe for usage. -By default it uses **inter**chromosomal matrix. To use the full matrix specife the -f flag +**Lanczos Method with Selective Orthogonalization** + +This package provides tools for computing leading eigenvectors of contact matrices from Hi-C data. + +## Building the Software + +### Prerequisites + +The software requires: +- C++ compiler (GCC/G++ 4.8 or later) +- OpenBLAS +- LAPACK/LAPACKE +- libcurl +- zlib +- straw library (place in `~/straw` or modify build script) + +### Platform-Specific Build Instructions + +#### Linux +```bash +# Make the script executable +chmod +x build_linux.sh +# Run the build script +./build_linux.sh +``` + +The script will automatically install required dependencies using apt-get. + +#### macOS +```bash +# Make the script executable +chmod +x build_mac.sh +# Run the build script +./build_mac.sh +``` + +The script will use Homebrew to install required dependencies. + +#### Windows +1. Install MinGW-w64 from [WinLibs](https://winlibs.com/) +2. Add MinGW-w64 bin directory to your PATH +3. Run the build script: +```cmd +build_windows.bat +``` + +You may need to modify the paths in `build_windows.bat` to match your MinGW-w64 installation. + +## Usage + +### Chromosome-Specific Analysis (Lan.exe) +```bash +./Lan.exe [options] [nv] + +Options: + -o Use observed matrix instead of observed/expected (o/e) matrix + -t Set tolerance (default: 1.0e-7) + -e Set epsilon (default: 1.0e-8) + -I Set maximum iterations (default: 200) + -n Set normalization method (default: NONE) + -T Set number of threads (default: 1) + -v Set verbosity level (default: 1) +``` + +### Genome-Wide Analysis (GWev.exe) +```bash +./GWev.exe [options] [nv] + +Options: + -f Use full matrix instead of inter-chromosomal only + -t Set tolerance (default: 1.0e-7) + -e Set epsilon (default: 1.0e-8) + -I Set maximum iterations (default: 200) + -T Set number of threads (default: 1) + -v Set verbosity level (default: 1) +``` + +## Output Format +The programs generate eigenvector files in WIG format that can be visualized in genome browsers. diff --git a/C++/Lanczos/build_linux.sh b/C++/Lanczos/build_linux.sh new file mode 100644 index 0000000..8f6f59a --- /dev/null +++ b/C++/Lanczos/build_linux.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# Exit on error +set -e + +echo "🔧 Setting up build environment..." + +# Install dependencies using apt +echo "📦 Installing dependencies..." +sudo apt-get update +sudo apt-get install -y \ + build-essential \ + gcc \ + g++ \ + libopenblas-dev \ + liblapack-dev \ + liblapacke-dev \ + libcurl4-openssl-dev \ + zlib1g-dev + +# Check if straw is available +STRAW_PATH="$HOME/straw" +if [ ! -d "$STRAW_PATH" ]; then + echo "⚠️ Warning: straw library not found at $STRAW_PATH" + echo "Please install straw library and place it in $STRAW_PATH" + echo "or modify this script with the correct path" + exit 1 +fi + +echo "🔨 Building executables..." + +# Common compiler flags +COMMON_FLAGS="-O2 -Wno-format-security -I/usr/include -I$STRAW_PATH/C++" +COMMON_LIBS="-L/usr/lib -lz -lcurl -lpthread -lopenblas -llapack -llapacke" + +# First compile straw library +echo "Building straw library..." +g++ $COMMON_FLAGS -c "$STRAW_PATH/C++/straw.cpp" -o straw.o + +# Compile Lan.exe +echo "Building Lan.exe..." +g++ $COMMON_FLAGS -std=c++11 -o Lan.exe \ + s_fLan.cpp \ + s_fSOLan.c \ + s_dthMul.c \ + hgFlipSign.c \ + straw.o \ + -I. \ + $COMMON_LIBS + +# Compile GWev.exe +echo "Building GWev.exe..." +g++ $COMMON_FLAGS -std=c++11 -o GWev.exe \ + s_fGW.cpp \ + getGWMatrix.cpp \ + s_fSOLan.c \ + s_dthMul.c \ + straw.o \ + $COMMON_LIBS + +# Clean up object files +rm -f straw.o + +echo "✅ Build completed successfully!" +echo +echo "You can now run:" +echo " ./Lan.exe - for chromosome-specific analysis" +echo " ./GWev.exe - for genome-wide analysis" \ No newline at end of file diff --git a/C++/Lanczos/build_mac.sh b/C++/Lanczos/build_mac.sh new file mode 100755 index 0000000..f1bacb9 --- /dev/null +++ b/C++/Lanczos/build_mac.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +# Exit on error +set -e + +echo "🔧 Setting up build environment..." + +# Assume Homebrew is installed + +# Install dependencies +echo "📦 Installing dependencies..." +brew install openblas +brew install lapack +brew install gcc@13 # Latest stable GCC +brew install curl +brew install zlib + +# Set up environment variables for OpenBLAS and LAPACK +export LDFLAGS="-L/opt/homebrew/opt/openblas/lib -L/opt/homebrew/opt/lapack/lib" +export CPPFLAGS="-I/opt/homebrew/opt/openblas/include -I/opt/homebrew/opt/lapack/include" + +# Check if straw is available +STRAW_PATH="../../../straw" +if [ ! -d "$STRAW_PATH" ]; then + echo "⚠️ Warning: straw library not found at $STRAW_PATH" + echo "Please install straw library and place it in $STRAW_PATH" + echo "or modify this script with the correct path" + exit 1 +fi + +echo "🔨 Building executables..." + +# Common compiler flags +COMMON_FLAGS="-O2 -Wno-format-security -I/opt/homebrew/opt/openblas/include -I/opt/homebrew/opt/lapack/include -I$STRAW_PATH/C++" +COMMON_LIBS="-L/opt/homebrew/opt/openblas/lib -L/opt/homebrew/opt/lapack/lib -lz -lcurl -lpthread -lopenblas -llapack -llapacke" + +# First compile straw library +echo "Building straw library..." +g++-13 $COMMON_FLAGS -c "$STRAW_PATH/C++/straw.cpp" -o straw.o + +# Compile Lan.exe +echo "Building Lan.exe..." +g++-13 $COMMON_FLAGS -std=c++11 -o Lan.exe \ + s_fLan.cpp \ + s_fSOLan.c \ + s_dthMul.c \ + hgFlipSign.c \ + straw.o \ + -I. \ + $COMMON_LIBS + +# Compile GWev.exe +echo "Building GWev.exe..." +g++-13 $COMMON_FLAGS -std=c++11 -o GWev.exe \ + s_fGW.cpp \ + getGWMatrix.cpp \ + s_fSOLan.c \ + s_dthMul.c \ + straw.o \ + $COMMON_LIBS + +# Clean up object files +rm -f straw.o + +echo "✅ Build completed successfully!" +echo +echo "You can now run:" +echo " ./Lan.exe - for chromosome-specific analysis" +echo " ./GWev.exe - for genome-wide analysis" \ No newline at end of file diff --git a/C++/Lanczos/build_windows.bat b/C++/Lanczos/build_windows.bat new file mode 100644 index 0000000..2415de4 --- /dev/null +++ b/C++/Lanczos/build_windows.bat @@ -0,0 +1,67 @@ +@echo off +setlocal enabledelayedexpansion + +echo 🔧 Setting up build environment... + +REM Check if MinGW-w64 is installed and in PATH +where g++ >nul 2>&1 +if %ERRORLEVEL% NEQ 0 ( + echo Error: g++ not found. Please install MinGW-w64 and add it to your PATH + echo You can download it from: https://winlibs.com/ + exit /b 1 +) + +REM Set paths - MODIFY THESE AS NEEDED +set "MINGW_PATH=C:\mingw64" +set "STRAW_PATH=%USERPROFILE%\straw" +set "OPENBLAS_PATH=%MINGW_PATH%\opt\openblas" + +REM Check if straw exists +if not exist "%STRAW_PATH%" ( + echo ⚠️ Warning: straw library not found at %STRAW_PATH% + echo Please install straw library and place it in %STRAW_PATH% + echo or modify this script with the correct path + exit /b 1 +) + +echo 🔨 Building executables... + +REM Common compiler flags +set "COMMON_FLAGS=-O2 -Wno-format-security -I%MINGW_PATH%\include -I%STRAW_PATH%\C++" +set "COMMON_LIBS=-L%MINGW_PATH%\lib -lz -lcurl -lpthread -lopenblas -llapack -llapacke" + +REM First compile straw library +echo Building straw library... +g++ %COMMON_FLAGS% -c "%STRAW_PATH%\C++\straw.cpp" -o straw.o + +REM Compile Lan.exe +echo Building Lan.exe... +g++ %COMMON_FLAGS% -std=c++11 -o Lan.exe ^ + s_fLan.cpp ^ + s_fSOLan.c ^ + s_dthMul.c ^ + hgFlipSign.c ^ + straw.o ^ + -I. ^ + %COMMON_LIBS% + +REM Compile GWev.exe +echo Building GWev.exe... +g++ %COMMON_FLAGS% -std=c++11 -o GWev.exe ^ + s_fGW.cpp ^ + getGWMatrix.cpp ^ + s_fSOLan.c ^ + s_dthMul.c ^ + straw.o ^ + %COMMON_LIBS% + +REM Clean up object files +del straw.o + +echo ✅ Build completed successfully! +echo. +echo You can now run: +echo Lan.exe - for chromosome-specific analysis +echo GWev.exe - for genome-wide analysis + +endlocal \ No newline at end of file diff --git a/C++/Lanczos/getGWMatrix.cpp b/C++/Lanczos/getGWMatrix.cpp index 4c20072..3c642a0 100755 --- a/C++/Lanczos/getGWMatrix.cpp +++ b/C++/Lanczos/getGWMatrix.cpp @@ -13,15 +13,15 @@ unsigned int getMatrix(string fname, int binsize, string norm, string ob, bool i string unit("BP"); ifstream fin; - long master = 0L; + int64_t master = 0LL; map chromosomeMap; string genomeID; - int version = 0; - long nviPosition = 0; - long nviLength = 0; - long totalFileSize; + int32_t nChrs = 0; + int32_t version = 0; + int64_t nviPosition = 0LL; + int64_t nviLength = 0LL; + int64_t totalFileSize; - int nChrs; vector chroms; vector chrLen; fin.open(fname, fstream::in); diff --git a/C++/Lanczos/s_dthMul.c b/C++/Lanczos/s_dthMul.c index 99e0bd4..c969593 100755 --- a/C++/Lanczos/s_dthMul.c +++ b/C++/Lanczos/s_dthMul.c @@ -24,6 +24,7 @@ void *Mul(void *threadid) { res[i[p]] += x[p]*v[j[p]]; res[j[p]] += x[p]*v[i[p]]; } + return NULL; } void utmvMul(unsigned int *i,unsigned int *j,float *x,long m,double *v,unsigned int k,double *res, int nth, double **rs) { diff --git a/C++/Lanczos/s_fGW.cpp b/C++/Lanczos/s_fGW.cpp index b9110b5..2126f10 100755 --- a/C++/Lanczos/s_fGW.cpp +++ b/C++/Lanczos/s_fGW.cpp @@ -15,11 +15,22 @@ unsigned int getMatrix(string fname, int binsize, string norm, string ob, bool i static void usage(const char *argv0) { - fprintf(stderr, "Usage: %s [-f (for full matrix)][-t tol][-e eps][-I max_iterations][-T threads][-v verbose] <[nv]>\n", argv0); - fprintf(stderr, " : hic file\n"); - fprintf(stderr, " : Eigenvector base name\n"); - fprintf(stderr, " : resolution in bp\n"); - fprintf(stderr, " : optional; default is 2\n"); + fprintf(stderr, "Usage: %s [options] [nv]\n\n", argv0); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -f Use full matrix instead of inter-chromosomal only\n"); + fprintf(stderr, " -t Set tolerance (default: 1.0e-7)\n"); + fprintf(stderr, " -e Set epsilon (default: 1.0e-8)\n"); + fprintf(stderr, " -I Set maximum iterations (default: 200)\n"); + fprintf(stderr, " -T Set number of threads (default: 1)\n"); + fprintf(stderr, " -v Set verbosity level (default: 1)\n"); + fprintf(stderr, " -h Show this help message\n\n"); + fprintf(stderr, "Required arguments:\n"); + fprintf(stderr, " Path to .hic file\n"); + fprintf(stderr, " Base name for output eigenvector files\n"); + fprintf(stderr, " Resolution in base pairs\n"); + fprintf(stderr, " [nv] Number of eigenvectors (optional, default: 2)\n\n"); + fprintf(stderr, "Output:\n"); + fprintf(stderr, " Creates .wig files for each eigenvector with the naming pattern: _Ev.wig\n"); } int main(int argc, char *argv[]) { @@ -125,7 +136,7 @@ int main(int argc, char *argv[]) { for (int j0=0;j0 readHeader(istream &fin, long &masterIndexPosition, string &genomeID, int &numChromosomes, int &version, long &nviPosition, long &nviLength); - int flipSign(const char *genome, double *x, int n, char *chr, int binsize); static void usage(const char *argv0) { - fprintf(stderr, "Usage: %s [-o (observed)][-t tol][-e eps][-I max_iterations][-n normalization][-T threads][-v verbose] <[nv]>\n", argv0); - fprintf(stderr, " : hic file\n"); - fprintf(stderr, " : chromosomee\n"); - fprintf(stderr, " : Eigenvectors output base name\n"); - fprintf(stderr, " : resolution in bp\n"); - fprintf(stderr, " : optional; default is 2\n"); + fprintf(stderr, "Usage: %s [options] [nv]\n\n", argv0); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -o Use observed matrix instead of observed/expected (o/e) matrix\n"); + fprintf(stderr, " -t Set tolerance (default: 1.0e-7)\n"); + fprintf(stderr, " -e Set epsilon (default: 1.0e-8)\n"); + fprintf(stderr, " -I Set maximum iterations (default: 200)\n"); + fprintf(stderr, " -n Set normalization method (default: NONE)\n"); + fprintf(stderr, " -T Set number of threads (default: 1)\n"); + fprintf(stderr, " -v Set verbosity level (default: 1)\n"); + fprintf(stderr, " -h Show this help message\n\n"); + fprintf(stderr, "Required arguments:\n"); + fprintf(stderr, " Path to .hic file\n"); + fprintf(stderr, " Chromosome name (e.g., chr1)\n"); + fprintf(stderr, " Base name for output eigenvector files\n"); + fprintf(stderr, " Resolution in base pairs\n"); + fprintf(stderr, " [nv] Number of eigenvectors (optional, default: 2)\n"); } int main(int argc, char *argv[]) { @@ -96,14 +104,14 @@ int main(int argc, char *argv[]) { printf("\n"); // chromosome map for finding matrix - long master = 0L; + int64_t master = 0LL; map chromosomeMap; string genomeID; - int numChromosomes = 0; - int version = 0; - long nviPosition = 0; - long nviLength = 0; - long totalFileSize; + int32_t numChromosomes = 0; + int32_t version = 0; + int64_t nviPosition = 0LL; + int64_t nviLength = 0LL; + int64_t totalFileSize; chromosomeMap = readHeader(fin, master, genomeID, numChromosomes, version, nviPosition, nviLength); map::iterator itr0 = chromosomeMap.find(chrom); @@ -179,7 +187,7 @@ int main(int argc, char *argv[]) { for (int j0=0;j0 Date: Thu, 29 May 2025 15:13:26 -0400 Subject: [PATCH 4/4] fix wig output --- C++/Lanczos/s_fGW.cpp | 49 +++++++++++++++++++++++------------------- C++/Lanczos/s_fLan.cpp | 32 +++++++++++++++++++++------ 2 files changed, 53 insertions(+), 28 deletions(-) diff --git a/C++/Lanczos/s_fGW.cpp b/C++/Lanczos/s_fGW.cpp index 2126f10..58769e1 100755 --- a/C++/Lanczos/s_fGW.cpp +++ b/C++/Lanczos/s_fGW.cpp @@ -146,29 +146,34 @@ int main(int argc, char *argv[]) { exit(EXIT_FAILURE); } - fprintf(fout,"track type=wiggle_0\n"); - int a = 0; - for (int b=0; b (chroms.at(b).c_str()); - char *chr1 = (char *) malloc((strlen(chr)+4)*sizeof(char)); - if (!strstr(chr,"chr")) strcpy(chr1,"chr"); - else strcpy(chr1,""); - strcat(chr1,chr); - if (strcmp(chr1,"chrMT") == 0) strcpy(chr1,"chrM"); - fprintf(fout,"chrom=%s ",chr1); - fprintf(fout,"start=1 step=%d span=%d\n",binsize,binsize); - for (int c=0;c< ((int) ceil(chrLen.at(b)/((double) binsize)));c++) { - if (c == ((int) ceil(chrLen.at(b)/((double) binsize))) - 1) fprintf(fout,"fixedStep chrom=%s start=%d step=%d span=%d\n",chr1,c*binsize+1,chrLen.at(b) % binsize, chrLen.at(b) % binsize ); - if (!std::isnan(ev[j0][a])) fprintf(fout,"%lg\n",ev[j0][a++]); - else { - fprintf(fout,"%s\n","0"); - a++; - } - } - } - + fprintf(fout, "track type=wiggle_0 name=\"Eigenvector %d\" description=\"Eigenvector %d from genome-wide analysis\"\n", j0+1, j0+1); + int a = 0; + for (int b=0; b (chroms.at(b).c_str()); + char *chr1 = (char *) malloc((strlen(chr)+4)*sizeof(char)); + if (!strstr(chr,"chr")) strcpy(chr1,"chr"); + else strcpy(chr1,""); + strcat(chr1,chr); + if (strcmp(chr1,"chrMT") == 0) strcpy(chr1,"chrM"); + + // Calculate number of bins for this chromosome + int numBins = (int) ceil(chrLen.at(b)/((double) binsize)); + + // Write all bins with uniform size + fprintf(fout, "fixedStep chrom=%s start=1 step=%d span=%d\n", chr1, binsize, binsize); + for (int c=0; c (chrom.c_str()); + char *chr1 = (char *) malloc((strlen(chr)+4)*sizeof(char)); + if (!strstr(chr,"chr")) strcpy(chr1,"chr"); + else strcpy(chr1,""); + strcat(chr1,chr); + if (strcmp(chr1,"chrMT") == 0) strcpy(chr1,"chrM"); + + // Write all bins with uniform size + fprintf(fout, "fixedStep chrom=%s start=1 step=%d span=%d\n", chr1, binsize, binsize); + for (p=0; p