-
Notifications
You must be signed in to change notification settings - Fork 30
Perhaps the main_program is not set to ParallelExecutor. #80
Description
I0427 14:37:27.545102 21654 parallel_executor.cc:440] The Program will be executed on CUDA using ParallelExecutor, 1 cards are used, so 1 programs are executed in parallel.
I0427 14:37:27.635213 21654 build_strategy.cc:365] SeqOnlyAllReduceOps:0, num_trainers:1
I0427 14:37:27.762869 21654 parallel_executor.cc:307] Inplace strategy is enabled, when build_strategy.enable_inplace = True
I0427 14:37:27.837890 21654 parallel_executor.cc:375] Garbage collection strategy is enabled, when FLAGS_eager_delete_tensor_gb = 0
./python_paddle/lib/python3.6/site-packages/paddle/fluid/executor.py:782: UserWarning: The following exception is not an EOF exception.
"The following exception is not an EOF exception.")
Traceback (most recent call last):
File "./python_paddle/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"main", mod_spec)
File "./python_paddle/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "test.py", line 140, in
trainer.train(print_steps=print_steps)
File "./python_paddle/lib/python3.6/site-packages/paddlepalm/multihead_trainer.py", line 226, in train
rt_outputs, task_id = self.train_one_step(feed)
File "./python_paddle/lib/python3.6/site-packages/paddlepalm/multihead_trainer.py", line 282, in train_one_step
rt_outputs = self._trainers[task_id].train_one_step(batch)
File "./python_paddle/lib/python3.6/site-packages/paddlepalm/trainer.py", line 742, in train_one_step
rt_outputs = exe.run(distribute_train_prog, feed=feed, fetch_list=fetch_list)
File "./python_paddle/lib/python3.6/site-packages/paddle/fluid/executor.py", line 783, in run
six.reraise(*sys.exc_info())
File "./python_paddle/lib/python3.6/site-packages/six.py", line 693, in reraise
raise value
File "./python_paddle/lib/python3.6/site-packages/paddle/fluid/executor.py", line 778, in run
use_program_cache=use_program_cache)
File "./python_paddle/lib/python3.6/site-packages/paddle/fluid/executor.py", line 843, in _run_impl
return_numpy=return_numpy)
File "./python_paddle/lib/python3.6/site-packages/paddle/fluid/executor.py", line 677, in _run_parallel
tensors = exe.run(fetch_var_names)._move_to_list()
paddle.fluid.core_avx.EnforceNotMet:
C++ Call Stacks (More useful to developers):
0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const*, int)
1 paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const*, int)
2 paddle::framework::details::FastThreadedSSAGraphExecutor::InsertFetchOps(std::vector<std::string, std::allocatorstd::string > const&, std::vector<paddle::framework::LoDTensor, std::allocatorpaddle::framework::LoDTensor >, std::unordered_map<std::string, std::vector<paddle::framework::details::VarHandleBase, std::allocatorpaddle::framework::details::VarHandleBase* >, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, std::vector<paddle::framework::details::VarHandleBase*, std::allocatorpaddle::framework::details::VarHandleBase* > > > >, std::unordered_map<paddle::framework::details::OpHandleBase, std::atomic, std::hashpaddle::framework::details::OpHandleBase*, std::equal_topaddle::framework::details::OpHandleBase*, std::allocator<std::pair<paddle::framework::details::OpHandleBase* const, std::atomic > > >, std::vector<paddle::framework::details::OpHandleBase, std::allocatorpaddle::framework::details::OpHandleBase* >, std::vector<paddle::framework::details::OpHandleBase, std::allocatorpaddle::framework::details::OpHandleBase* >*)
3 paddle::framework::details::FastThreadedSSAGraphExecutor::Run(std::vector<std::string, std::allocatorstd::string > const&)
4 paddle::framework::details::ScopeBufferedMonitor::Apply(std::function<void ()> const&, bool)
5 paddle::framework::details::ScopeBufferedSSAGraphExecutor::Run(std::vector<std::string, std::allocatorstd::string > const&)
6 paddle::framework::ParallelExecutor::Run(std::vector<std::string, std::allocatorstd::string > const&)
Error Message Summary:
PreconditionNotMetError: Cannot find fetched variable(dvqa.tmp_1). Perhaps the main_program is not set to ParallelExecutor.
[Hint: Expected fetched_var_it != fetched_vars->end(), but received fetched_var_it == fetched_vars->end().] at (/paddle/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc:147)