-
Notifications
You must be signed in to change notification settings - Fork 68
Open
Description
When I run inpainting/tests/run_tests.py I get the following error:
Loading model checkpoint...
Successfully loaded model checkpoint
ic| args: Namespace(pdb='2KL8.pdb', contigs=['A1-20,5,A26-79'], length=None, checkpoint='/home/ramon/progs/RFDesign/inpainting/weights/BFF_mix_epoch25.pt', inpaint_str=None, inpaint_seq=None, n_cycle=15, tmpl_conf='0.5', num_designs=2, res_translate=None, tie_translate=None, floating_points=None, block_rotate=None, verbose=False, out='out/2KL8_test', dump_pdb=True, dump_trb=True, dump_npz=True, dump_fasta=True, dump_all=True, input_json=None, outdir='out', prefix='2KL8_test')
On design 0
ic| args.contigs: ['A1-20,5,A26-79']
on cycle 0
Traceback (most recent call last):
File "/home/ramon/progs/RFDesign/inpainting/tests/../inpaint.py", line 435, in <module>
main()
File "/home/ramon/progs/RFDesign/inpainting/tests/../inpaint.py", line 361, in main
out = inf_method(model, msa_hot, msa_extra_hot, seq, t1d, t2d, idx_pdb, design_params['MAXCYCLE'])
File "/home/ramon/progs/RFDesign/inpainting/inf_methods.py", line 16, in classic_inference
msa_prev, pair_prev, xyz_prev = model(msa[:,i_cycle],
File "/home/ramon/miniforge3/envs/SE3-nvidia2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/home/ramon/progs/RFDesign/inpainting/model/RoseTTAFoldModel.py", line 60, in forward
msa, pair, xyz, lddt = self.extractor(msa_latent, msa_full, pair, idx,
File "/home/ramon/miniforge3/envs/SE3-nvidia2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/home/ramon/progs/RFDesign/inpainting/model/Track_module.py", line 459, in forward
xyz, state = self.init_str(idx, msa, pair)
File "/home/ramon/miniforge3/envs/SE3-nvidia2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/home/ramon/progs/RFDesign/inpainting/model/InitStrGenerator.py", line 90, in forward
Gout = self.transformer(G)
File "/home/ramon/miniforge3/envs/SE3-nvidia2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/home/ramon/miniforge3/envs/SE3-nvidia2/lib/python3.9/site-packages/torch/nn/modules/container.py", line 139, in forward
input = module(input)
File "/home/ramon/miniforge3/envs/SE3-nvidia2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/home/ramon/progs/RFDesign/inpainting/model/InitStrGenerator.py", line 35, in forward
x = self.TConv(xin, e_idx, e_attr)
File "/home/ramon/miniforge3/envs/SE3-nvidia2/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/home/ramon/miniforge3/envs/SE3-nvidia2/lib/python3.9/site-packages/torch_geometric/nn/conv/transformer_conv.py", line 147, in forward
out = self.propagate(edge_index, x=x, edge_attr=edge_attr, size=None)
File "/home/ramon/miniforge3/envs/SE3-nvidia2/lib/python3.9/site-packages/torch_geometric/nn/conv/message_passing.py", line 237, in propagate
out = self.message(**msg_kwargs)
File "/home/ramon/miniforge3/envs/SE3-nvidia2/lib/python3.9/site-packages/torch_geometric/nn/conv/transformer_conv.py", line 179, in message
alpha = softmax(alpha, index, ptr, size_i)
RuntimeError: nvrtc: error: invalid value for --gpu-architecture (-arch)
nvrtc compilation failed:
#define NAN __int_as_float(0x7fffffff)
#define POS_INFINITY __int_as_float(0x7f800000)
#define NEG_INFINITY __int_as_float(0xff800000)
template<typename T>
__device__ T maximum(T a, T b) {
return isnan(a) ? a : (a > b ? a : b);
}
template<typename T>
__device__ T minimum(T a, T b) {
return isnan(a) ? a : (a < b ? a : b);
}
extern "C" __global__
void fused_sub_exp(float* tsrc_1, float* tsrc_max_9, float* aten_exp) {
{
if (512 * blockIdx.x + threadIdx.x<24648 ? 1 : 0) {
float v = __ldg(tsrc_1 + 512 * blockIdx.x + threadIdx.x);
float v_1 = __ldg(tsrc_max_9 + 512 * blockIdx.x + threadIdx.x);
aten_exp[512 * blockIdx.x + threadIdx.x] = expf(v - v_1);
}
}
}
I had to install jaxlib 0.4, as explained in #56.
I found the following issue in py torch pytorch/pytorch#87595, but in my case
import torch
a = torch.tensor([2, 2, 3]).cuda(0)
print(a.prod())
works, and returns tensor(12, device='cuda:0').
And the output of python -c "import torch;print(torch.cuda.get_device_capability(0))" is: (8, 9).
My hardware is a GeForce RTX 4070 with CUDA Version: 12.3.
Metadata
Metadata
Assignees
Labels
No labels