forked from karpathy/nanoGPT
-
Notifications
You must be signed in to change notification settings - Fork 28
Open
Description
Training... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0% -:--:--
Traceback (most recent call last):
File "train.py", line 1185, in <module>
main()
File "train.py", line 1175, in main
trainer.train()
File "train.py", line 990, in train
self.log_metrics(losses, running_mfu, current_epoch, self.tokens_trained, current_dataset)
File "train.py", line 803, in log_metrics
self.export_model_graph()
File "train.py", line 785, in export_model_graph
self.writer.add_graph(self.model, (dummy_input, dummy_targets, dummy_iter_num))
File "writer.py", line 841, in add_graph
graph(model, input_to_model, verbose, use_strict_trace)
File "_pytorch_graph.py", line 337, in graph
raise e
File "_pytorch_graph.py", line 331, in graph
trace = torch.jit.trace(model, args, strict=use_strict_trace)
File "_trace.py", line 1002, in trace
traced_func = _trace_impl(
File "_trace.py", line 698, in _trace_impl
return trace_module(
File "_trace.py", line 1278, in trace_module
module._c._create_method_from_trace(
File "module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "module.py", line 1726, in _slow_forward
result = self.forward(*input, **kwargs)
File "model.py", line 409, in forward
x = block(x, iter_num)
File "module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "module.py", line 1726, in _slow_forward
result = self.forward(*input, **kwargs)
File "model.py", line 174, in forward
return custom_forward(x)
File "model.py", line 167, in custom_forward
x = x + self.attn(self.ln_1(x), iter_num)
File "module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "module.py", line 1726, in _slow_forward
result = self.forward(*input, **kwargs)
File "attention_variations.py", line 506, in forward
outputs = selective_scan_fn(
File "selective_scan_interface.py", line 110, in selective_scan_fn
return SelectiveScanFn.apply(u, delta, A, B, C, D, z, delta_bias, delta_softplus, return_last_state)
File "function.py", line 575, in apply
return super().apply(*args, **kwargs)
File "selective_scan_interface.py", line 44, in forward
out, x, *rest = selective_scan_cuda.fwd(u, delta, A, B, C, D, z, delta_bias, delta_softplus)
RuntimeError: Expected B.scalar_type() == (!is_variable_B ? weight_type : input_type) to be true, but got false.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels