Skip to content

make_ld_matrix

Oleksandr Frei edited this page Dec 5, 2016 · 13 revisions

Making single LD matrix across all chromosomes can be challenging due to memory limitations. We suggest the following approach:

import glob
import os.path
import os
import subprocess
import sys
import pandas as pd

def execute_command(command):
    print("Execute command: {}".format(command))
    print(subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()[0].decode("utf-8"))
    #print(subprocess.check_output(command.split()).decode("utf-8"))

ref = {'9m': r'C:\Users\Oleksandr\Documents\GitHub\python_convert\9279485_ref.bim',
       '2m': r'C:\Users\Oleksandr\Documents\GitHub\python_convert\2558411_ref.bim'}

tasks = [('2m', 'p8', 0.8),
         ('2m', 'p2', 0.2),
         ('2m', 'p1', 0.1),
         ('9m', 'p8', 0.8),
         ('9m', 'p2', 0.2),
         ('9m', 'p1', 0.1)]

for template, thr_name, thr_value in tasks:
    for chr in range(1, 23):  # we skip chr23 - this is intended
        file = 'E:\{0}\ALL.chr{1}.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz'.format(template, chr);
        execute_command(r'python make_ld_matrix.py --vcf {4} --ref {5} --ld_window_kb 10000 --ld_window_r2 {2} --chunksize 1000000 --savemat E:\{0}\ldmat_{0}_{1}_chr{3}.mat'.format(template, thr_name, thr_value, chr, file, ref[template]))

Later all matrices can be merged within matlab code.

function [LDmat, mafvec] = join_ld(file_pattern)
    id1 = {}; id2 = {}; mafvec = [];
    for i=1:22
        tmp = load(sprintf('%s_chr%i.mat', file_pattern, i));
        id1{i} = tmp.id1'; id2{i} = tmp.id2'; nsnp = tmp.nsnp;
        if isempty(mafvec), mafvec = nan(nsnp, 1); end;
        mafvec(~isnan(tmp.mafvec)) = tmp.mafvec(~isnan(tmp.mafvec));
    end
    LDmat = sparse(double([id1{:}]'), double([id2{:}]'), true, double(nsnp), double(nsnp));
    LDmat = LDmat | speye(double(nsnp));
    LDmat = LDmat | (LDmat - LDmat');
end


#folder = 'H:\\ftp\\ip113.ucsd.edu\\1000Genome\\phase3\\build37_released\\tmp';
folder = '/space/syn03/1/data/oleksandr/LDmatrix/tmp';
filename = 'ldmat_2m_p8'; [LDr2_p8sparse, mafvec] = join_ld(fullfile(folder, filename)); save(filename, 'LDr2_p8sparse', 'mafvec', '-v7.3'); clear('LDr2_p8sparse');
filename = 'ldmat_2m_p2'; [LDr2_p2sparse, mafvec] = join_ld(fullfile(folder, filename)); save(filename, 'LDr2_p2sparse', 'mafvec', '-v7.3'); clear('LDr2_p2sparse');
filename = 'ldmat_2m_p1'; [LDr2_p1sparse, mafvec] = join_ld(fullfile(folder, filename)); save(filename, 'LDr2_p1sparse', 'mafvec', '-v7.3'); clear('LDr2_p1sparse');

filename = 'ldmat_9m_p8'; [LDr2_p8sparse, mafvec] = join_ld(fullfile(folder, filename)); save(filename, 'LDr2_p8sparse', 'mafvec', '-v7.3'); clear('LDr2_p8sparse');
filename = 'ldmat_9m_p2'; [LDr2_p2sparse, mafvec] = join_ld(fullfile(folder, filename)); save(filename, 'LDr2_p2sparse', 'mafvec', '-v7.3'); clear('LDr2_p2sparse');
filename = 'ldmat_9m_p1'; [LDr2_p1sparse, mafvec] = join_ld(fullfile(folder, filename)); save(filename, 'LDr2_p1sparse', 'mafvec', '-v7.3'); clear('LDr2_p1sparse');

Clone this wiki locally