diff --git a/Examples/Parareal/ERK/bm1.nml b/Examples/Parareal/ERK/bm1.nml new file mode 100644 index 00000000..0aef05cb --- /dev/null +++ b/Examples/Parareal/ERK/bm1.nml @@ -0,0 +1,22 @@ +! +! Test parameters for the nonlinear Schroedinger eq +! + +&PARAMS + + Tfin = 13.5 + nx = 1024 1024 + + nsteps = 1 + nsteps_rk = 1 1 + rk_order = 4 4 + + eq_type = 3 + ic_type = 3 + + split_damp=.true. + split_rho=0.0245436926061703 ! pi/128 + Lx=25.1327412287183 ! 8 pi + + pfasst_nml="parareal.nml" ! Use the common input file for pfasst parameters +/ diff --git a/Examples/Parareal/ERK/bm1serial.nml b/Examples/Parareal/ERK/bm1serial.nml new file mode 100644 index 00000000..78a0c3ea --- /dev/null +++ b/Examples/Parareal/ERK/bm1serial.nml @@ -0,0 +1,23 @@ +! +! Test parameters for the nonlinear Schroedinger eq +! + +&PARAMS + + Tfin = 13.5 + nx = 1024 + + nsteps = 4096 + nsteps_rk = 128 + rk_order = 4 + + eq_type = 3 + ic_type = 3 + + split_damp=.false. + split_rho=0.0245436926061703 ! pi/128 + Lx=25.1327412287183 ! 8 pi + + pfasst_nml="serial.nml" ! Use the common input file for pfasst parameters +! pfasst_nml="parareal.nml" ! Use the common input file for pfasst parameters +/ diff --git a/Examples/Parareal/ERK/nls_bm1.npy b/Examples/Parareal/ERK/nls_bm1.npy new file mode 100644 index 00000000..fbb7be4e Binary files /dev/null and b/Examples/Parareal/ERK/nls_bm1.npy differ diff --git a/Examples/Parareal/ERK/serial.nml b/Examples/Parareal/ERK/serial.nml new file mode 100644 index 00000000..ca05682d --- /dev/null +++ b/Examples/Parareal/ERK/serial.nml @@ -0,0 +1,32 @@ +&PF_PARAMS + ! These are internal pfasst variables that must be set + nlevels = 1 + + ! These are internal pfasst variables that can be reset + niters = 1 ! default is 5 - 0 leaves predictor on coarse level + + ! optional variables to control termination (defaults are 0.0) + abs_res_tol = 1.d-10 + rel_res_tol = 1.d-10 + + nnodes = 2 2 + + ! Choices to run parareal instead of PFASST + use_rk_stepper=.true. + use_sdc_sweeper=.false. + + ! These flags control the output. For fastest timings, turn them off + save_residuals=.false. + save_delta_q0=.false. + save_errors=.false. + + ! Flag for timings. For fastest timing of only full algorithm, set to 1 + save_timings = 1 + + ! Flag for saving solutions, needed for problems with no exact solution + save_solutions = 1 + + ! Directory name for output + outdir="serial" +/ + diff --git a/Examples/Parareal/ERK/src/stepper_include.f90 b/Examples/Parareal/ERK/src/stepper_include.f90 index c1023314..4d4e10e0 100644 --- a/Examples/Parareal/ERK/src/stepper_include.f90 +++ b/Examples/Parareal/ERK/src/stepper_include.f90 @@ -70,7 +70,7 @@ end subroutine destroy !F_EVAL: evaluates the nonlinear function f(t,y(t)) at y, t. subroutine f_eval(this, y, t, level_index, f) - use probin, only: eq_type,splitting + use probin, only: eq_type,split_damp ! arguments class(my_stepper_t), intent(inout) :: this class(pf_encap_t), intent(in) :: y @@ -90,7 +90,7 @@ subroutine f_eval(this, y, t, level_index, f) call f_NL(this%yvec,this%fvec,this%fft_ops%opNL,this%tmp,fft) ! include damping - if (split_damping) this%fvec= this%fvec - this%fft_ops%opDamp*this%yvec + if (split_damp) this%fvec= this%fvec - this%fft_ops%opDamp*this%yvec end subroutine f_eval subroutine compA(this, dt, i, j, F, val) diff --git a/Examples/Parareal/Make.package b/Examples/Parareal/Make.package index a0f5c582..001098af 100644 --- a/Examples/Parareal/Make.package +++ b/Examples/Parareal/Make.package @@ -9,6 +9,7 @@ endif FFLAGS += -I$(LIBPFASST)/include -I$(LIBNPY)/include LDFLAGS += -L$(LIBNPY)/libnpy_fortran_mod -lnpy + build/main_rk.o : ../src/main_rk.f90 build/probin.o build/hooks_rk.o build/level_rk.o build/stepper_$(DIM)d.o build/utils_$(DIM)d.o $(LIBPFASST)/lib/libpfasst.a build/probin.o :../src/probin.f90 build/dim_$(DIM)d.o $(LIBPFASST)/lib/libpfasst.a build/utils_$(DIM)d.o : ../src/utils_$(DIM)d.f90 build/probin.o diff --git a/Examples/Parareal/src/probin.f90 b/Examples/Parareal/src/probin.f90 index 75fac697..1dd0a322 100644 --- a/Examples/Parareal/src/probin.f90 +++ b/Examples/Parareal/src/probin.f90 @@ -36,7 +36,8 @@ module probin real(pfdp), save :: gamma ! parameters for split damping real(pfdp), save :: d0,d1,r0,r1 - logical, save :: split_damping + logical, save :: split_damp + real(pfdp), save :: split_rho character(len=32), save :: pfasst_nml character(len=64), save :: output ! directory name for output @@ -46,7 +47,7 @@ module probin integer :: ios,iostat namelist /params/ nx,ic_type, eq_type, nsteps,nsteps_rk,rk_order, dt, Tfin namelist /params/ pfasst_nml, lam1,lam2,a,b,c, nu, t00, sigma, beta, gamma, splitting - namelist /params/ kfreqx,kfreqy,kfreqz,Lx,Ly,Lz,d0,d1,r0,r1,split_damping + namelist /params/ kfreqx,kfreqy,kfreqz,Lx,Ly,Lz,d0,d1,r0,r1,split_damp,split_rho contains @@ -87,7 +88,8 @@ subroutine probin_init(pf_fname) Ly = two_pi Lz = two_pi ! Default damping parameters are 0 - split_damping=.true. + split_damp=.true. + split_rho=two_pi/256.0_pfdp d0=0.0_pfdp d1=0.0_pfdp r0=0.0_pfdp diff --git a/Examples/Parareal/src/utils_1d.f90 b/Examples/Parareal/src/utils_1d.f90 index 0d150496..a4016bea 100644 --- a/Examples/Parareal/src/utils_1d.f90 +++ b/Examples/Parareal/src/utils_1d.f90 @@ -250,6 +250,7 @@ subroutine f_NL(yvec,fvec,opNL,tmp,fft) case DEFAULT call pf_stop(__FILE__,__LINE__,'Bad case in SELECT',eq_type) end select + end subroutine f_NL end module pf_mod_zutils @@ -275,7 +276,7 @@ module pf_mod_fftops contains subroutine fftops_init(this,fft,nx) - use probin, only: d0,d1,r0,r1 + use probin, only: d0,d1,r0,r1,split_damp,split_rho class(pf_fft_ops_t), intent(inout) :: this type(pf_fft_t), pointer, intent(in) :: fft integer, intent(in) :: nx @@ -289,8 +290,6 @@ subroutine fftops_init(this,fft,nx) if (istat .ne. 0) call pf_stop(__FILE__,__LINE__,'Allocate failed ',istat) allocate(this%opNL(nx),STAT=istat) if (istat .ne. 0) call pf_stop(__FILE__,__LINE__,'Allocate failed ',istat) - allocate(this%opDamp(nx),STAT=istat) - if (istat .ne. 0) call pf_stop(__FILE__,__LINE__,'Allocate failed ',istat) call fft%make_deriv(this%ddx) ! First derivative call fft%make_lap(this%lap) ! Second derivative @@ -299,19 +298,29 @@ subroutine fftops_init(this,fft,nx) call set_ops(this%opL,this%opNL,this%ddx,this%lap) ! Create damping op - this%opDamp= d0*this%ddx**r0 + d1*abs(this%ddx)**r1 - ! add damping to linear operator - this%opL=this%opL+this%opDamp + if (split_damp) then + allocate(this%opDamp(nx),STAT=istat) + if (istat .ne. 0) call pf_stop(__FILE__,__LINE__,'Allocate failed ',istat) + ! this%opDamp= d0*this%ddx**r0 + d1*abs(this%ddx)**r1 + this%opDamp= abs(this%ddx)**2/tan(two_pi/4.0_pfdp + split_rho) + + ! add damping to linear operator + this%opL=this%opL+this%opDamp + end if deallocate(this%lap) deallocate(this%ddx) end subroutine fftops_init subroutine fftops_destroy(this) + use probin, only: split_damp class(pf_fft_ops_t), intent(inout) :: this deallocate(this%opL) deallocate(this%opNL) - deallocate(this%opDamp) + if (split_damp) then + deallocate(this%opDamp) + end if + end subroutine fftops_destroy !> Routine to return out put the solution to numpy (dimension dependent) diff --git a/Makefile.defaults b/Makefile.defaults index 5acf24ee..94c3d0d3 100644 --- a/Makefile.defaults +++ b/Makefile.defaults @@ -12,7 +12,7 @@ USE_PETSC ?= FALSE USE_HYPRE ?= FALSE USE_SUNDIALS ?= FALSE USE_AMREX ?= FALSE -USE_FFT ?= FALSE +USE_FFT ?= TRUE # File for compiler options diff --git a/Makefile.examples/Makefile.local b/Makefile.examples/Makefile.local index ee8601b8..91bc3025 100644 --- a/Makefile.examples/Makefile.local +++ b/Makefile.examples/Makefile.local @@ -7,10 +7,12 @@ AR=ar rcs FFLAGS = -Ibuild -Jinclude -cpp -ffree-line-length-none # Use this flag on newer compilers with stricter bounds checking -#FFLAGS = -fallow-argument-mismatch +# FFLAGS_EXTRA is used for F77 in building dfftpack +#FFLAGS += -fallow-argument-mismatch +#FFLAGS_EXTRA += fallow-argument-mismatch ifeq ($(DEBUG),TRUE) FFLAGS += -fcheck=all -fbacktrace -g -ffpe-trap=invalid,zero,overflow -fbounds-check -fimplicit-none -ffree-line-length-none else FFLAGS += -O3 -endif \ No newline at end of file +endif diff --git a/Makefile.examples/Makefile.local.cori b/Makefile.examples/Makefile.local.cori new file mode 100644 index 00000000..edc86917 --- /dev/null +++ b/Makefile.examples/Makefile.local.cori @@ -0,0 +1,27 @@ +# Set some compiler flags + +AR = ar rcs +ifeq ($(PE_ENV),INTEL) +#FC = mpiifort +#CC = mpiiCC +FC = ftn +LD = ftn -static +CC = cc +FFLAGS = -Ibuild -module include -cpp -static +ifeq ($(DEBUG),TRUE) +FFLAGS += -g -O0 +endif +endif + +ifeq ($(PE_ENV),CRAY) +#FC = mpiifort +#CC = mpiiCC +FC = ftn +LD = ftn +CC = cc +FFLAGS = -Ibuild -Jinclude +LDLAGS = -Ibuild -Jinclude +ifeq ($(DEBUG),TRUE) +FFLAGS += -g +endif +endif diff --git a/README.md b/README.md index 93c6a771..51e237b0 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,25 @@ Approximation Scheme in Space and Time (PFASST) algorithm. It is written in Fortran (mostly F90, with some F03), but can be interfaced with C and C++ fairly easily. +## References +- Matthew Emmett, Michael Minion: Toward an efficient parallel in time method for partial differential equations, Commun. Appl. Math. Comput. Sci. 7(1), 105-132, 2012. [http://dx.doi.org/10.2140/camcos.2012.7.105](http://dx.doi.org/10.2140/camcos.2012.7.105) +- Matthew Emmett, Michael Minion: Efficient Implementation of a Multi-Level Parallel in Time Algorithm. In: Erhel, J., Gander, M., Halpern, L., Pichot, G., Sassi, T., Widlund, O. (eds) Domain Decomposition Methods in Science and Engineering XXI. Lecture Notes in Computational Science and Engineering, vol 98. Springer, Cham, 2014. [https://doi.org/10.1007/978-3-319-05789-7_33](https://doi.org/10.1007/978-3-319-05789-7_33) + +## Documentation +We are currently writing better documentation, examples, and a tutorial, at [https://libpfasst.github.io/LibPFASST](https://libpfasst.github.io/LibPFASST). +The documentation can be edited on the gh-pages branch of this repo. + +## Requirements +- a Fortran compiler, for some external libraries like FFTW C/C++ compiler +- an MPI library +- makedepf90 for generating dependencies +- python 3.12 for the test (including numpy, matplotlib, pandas, scipy, tqdm, pytest) + +## Quickstart +- compile the library, see [https://libpfasst.github.io/LibPFASST/docs/build/html/compiling.html](https://libpfasst.github.io/LibPFASST/docs/build/html/compiling.html) +- look through the tutorials, see [https://libpfasst.github.io/LibPFASST/docs/build/html/tutorial.html](https://libpfasst.github.io/LibPFASST/docs/build/html/tutorial.html) + +## License Libpfasst Copyright (c) 2018, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy) and Sebastian Goetschel. All rights reserved. @@ -23,9 +42,7 @@ You are under no obligation whatsoever to provide any bug fixes, patches, or upg -# Documentation -We are currently writing better documentation, examples, and a tutorial, at https://libpfasst.github.io/LibPFASST. -The documentation can be edited on the gh-pages branch of this repo. + diff --git a/pf/__init__.py b/pf/__init__.py index 57f4ba0b..f54fb934 100644 --- a/pf/__init__.py +++ b/pf/__init__.py @@ -1,5 +1,5 @@ -import io -import convergence -import speedup -import pfasst -import nwc +import pf.io +import pf.convergence +import pf.speedup +import pf.pfasst +import pf.nwc diff --git a/pf/convergence.py b/pf/convergence.py index 9b147e7f..c7f25c9c 100644 --- a/pf/convergence.py +++ b/pf/convergence.py @@ -2,7 +2,8 @@ from itertools import product import numpy as np -import pylab +#import pylab +import matplotlib.pyplot as plt import pf.io @@ -57,7 +58,7 @@ def errors(reference, approximate, **kwargs): err = abs(ref - app).max() errors[step, aiter, alev] = err except: - print 'WARNING: size mismatch:', step, aiter, alev + print('WARNING: size mismatch:', step, aiter, alev) return errors, steps, iters, levels @@ -84,7 +85,7 @@ def plot(errs, steps, iters, levels, **kwargs): # if not isinstance(ax, list): # ax = [ ax ] - pylab.figure() + plt.figure() for l, level in enumerate(levels): for i, iter in enumerate(iters): @@ -92,11 +93,11 @@ def plot(errs, steps, iters, levels, **kwargs): x = steps y = [ errs[step,iter,level] for step in steps ] - pylab.subplot(1, len(levels), l+1) + plt.subplot(1, len(levels), l+1) - pylab.semilogy(x, y, **kwargs) - pylab.title('level %d' % level) - pylab.xlabel('step/processor') - pylab.ylabel('max abs. error') + plt.semilogy(x, y, **kwargs) + plt.title('level %d' % level) + plt.xlabel('step/processor') + plt.ylabel('max abs. error') # return fig, ax diff --git a/pf/nwc.py b/pf/nwc.py index 6e2e312d..d50422c9 100644 --- a/pf/nwc.py +++ b/pf/nwc.py @@ -2,7 +2,7 @@ from os import getcwd, chdir, makedirs, devnull from subprocess import call from scipy.io import FortranFile -from pfasst import PFASST +from pf.pfasst import PFASST import numpy as np class NWC(PFASST): @@ -11,7 +11,7 @@ def __init__(self, home, params=None, **kwargs): if params is None: params = Params() PFASST.__init__(self, home, params, **kwargs) - print 'Setting up NWC' + print('Setting up NWC') self.nwc = self.home + '/nwc.sh' self.cwd = getcwd() + '/' self.nwc_base_string = 'echo\nscratch_dir ./scratch\npermanent_dir ./perm\n\n'\ @@ -33,7 +33,7 @@ def __init__(self, home, params=None, **kwargs): except: raise Exception('unsuccessful NWC run, check input!') else: - print 'exact dirs already exist' + print('exact dirs already exist') self.xtrans, self.ytrans = self._parse_transforms() self.initial = self._get_nwc_dmat('initial_condition') @@ -68,7 +68,7 @@ def _create_nwc_inp(self): def _run_nwc(self): FNULL = open(devnull, 'w') chdir(self.p.exact_dir) - print '---- running nwc ----' + print('---- running nwc ----') call([self.nwc, 'nw'], stdout=FNULL) chdir(self.cwd) FNULL.close() diff --git a/pf/pfasst.py b/pf/pfasst.py index ad9a6f77..4c06792c 100755 --- a/pf/pfasst.py +++ b/pf/pfasst.py @@ -66,7 +66,7 @@ class Params(object): verbose = attr.ib(default=False, repr=False) nersc = attr.ib(default=False) dt = attr.ib(default=None) - timings = attr.ib(default=False) + timings = attr.ib(default=0) vcycle = attr.ib(default=False) tolerance = attr.ib(default=1e-12) qtype = attr.ib(default='lobatto') @@ -127,7 +127,7 @@ class MagpicardParams(Params): base_string = attr.ib(default= '&pf_params\n\tnlevels = {}\n\tniters = {}\n\t'+ \ 'nnodes = {}\n\tnsweeps_pred = {}\n\tnsweeps = {}\n\t'+ \ - 'qtype = {}\n\techo_timings = {}\n\tabs_res_tol = {}\n\t'+ \ + 'qtype = {}\n\tsave_timings = {}\n\tabs_res_tol = {}\n\t'+ \ 'rel_res_tol = {}\n\tpipeline_pred = .true.\n\t' + \ 'pfasst_pred = .true.\n\tvcycle = {}\n/\n\n'+ \ '¶ms\n\tfbase = {}\n\t'+ \ @@ -165,9 +165,9 @@ def _make_list(self): solns = '.false.' if self.timings == True: - timings = '.true.' + timings = 3 else: - timings = '.false.' + timings = 2 if self.periodic == True: periodic = '.true.' @@ -224,7 +224,7 @@ class IMKParams(Params): '&pf_params\n\tnlevels = {}\n\tniters = {}\n\t'+ \ 'nnodes = {}\n\t' + \ 'nsweeps_pred = {}\n\tnsweeps = {}\n\t'+ \ - 'qtype = {}\n\techo_timings = {}\n\tabs_res_tol = {}\n\t'+ \ + 'qtype = {}\n\tsave_timings = {}\n\tabs_res_tol = {}\n\t'+ \ 'rel_res_tol = {}\n\tpipeline_pred = .true.\n\t' + \ 'pfasst_pred = .true.\n\tvcycle = {}\n/\n\n'+ \ '¶ms\n\tfbase = {}\n\t'+ \ @@ -271,9 +271,9 @@ def _make_list(self): solns = '.false.' if self.timings == True: - timings = '.true.' + timings = 3 else: - timings = '.false.' + timings = 2 if self.periodic == True: periodic = '.true.' @@ -362,7 +362,7 @@ def __init__(self, params=None, **kwargs): self.p = params self.exe = self.p.exe - for k, v in kwargs.iteritems(): + for k, v in iter(kwargs.items()): setattr(self.p, k, v) try: @@ -418,15 +418,19 @@ def run(self, ref=False): if self.p.verbose: nodes = ' '.join(map(str, self.p.nodes)) - print '---- running pfasst: tasks={}, nodes={}, dt={} ----'.format( - self.p.tasks, nodes, self.p.dt) + print('---- running pfasst: tasks={}, nodes={}, dt={} ----'.format( + self.p.tasks, nodes, self.p.dt)) command = self._build_command() - - output = check_output(command, stderr=STDOUT) + print(command) + output = check_output(command, stderr=STDOUT, text=True) except CalledProcessError as exc: print("Status : FAIL", exc.returncode, exc.output) + except Exception as e: + print(type(e)) + print(e) else: + print(output) trajectory, total_times = self._get_trajectory_from_output( output, self.p.nsteps, ref=ref) trajectory.to_pickle(pkl_path) @@ -493,7 +497,7 @@ def _get_trajectory_from_output(self, output, nsteps, ref=False): total_times[rank] = cpu_time - if self.p.timings: + if self.p.timings > 2: timings = read_all_timings(dname='.') self._merge_timings_into(trajectory, timings) @@ -506,7 +510,8 @@ def _get_trajectory_from_output(self, output, nsteps, ref=False): last_row = len(trajectory) - 1 if ref: last_row = 0 - trajectory.set_value(last_row, 'solution', sol) + #trajectory.set_value(last_row, 'solution', sol) + trajectory.at[last_row, 'solution'] = sol return trajectory, total_times @@ -533,7 +538,7 @@ def _merge_timings_into(self, trajectory, timings): @staticmethod def _get_solution(path_to_solution): - print path_to_solution + print(path_to_solution) f = FortranFile(path_to_solution) solution = f.read_record(np.complex_) f.close() @@ -681,7 +686,7 @@ def load(self, pkl_path): except: raise else: - print 'recovering results from pickle!' + print('recovering results from pickle!') self.loc[0] = traj.loc[0] return @@ -804,7 +809,7 @@ class object where each row in the object corresponds to the parameters results.astype({'dt': np.float_, 'nsteps': np.int_, 'error': np.float_}) slope, _ = self.get_linear_fit_loglog(results.dt, results.error) - print 'slope = {}'.format(slope) + print('slope = {}'.format(slope)) return results diff --git a/pf/speedup.py b/pf/speedup.py index b6418aaf..9b208b6e 100644 --- a/pf/speedup.py +++ b/pf/speedup.py @@ -56,26 +56,26 @@ def theoretical_speedup_fixed_block(serial, parallel, nvars, nnodes, C=-1, verbo OR = np.mean([ x.delta for x in parallel if x.timer == 'restrict%d' % (l) ]) measO.append(OI + OR) - print '' - print 'steps: ', N - print 'serial iterations: ', Ks - print 'mean cost of serial sweeps: ', Y0 - print 'estimated serial cost: ', Cs - print 'actual serial cost: ', measCs - print 'parallel levels: ', L - print 'parallel iterations: ', Kp - print 'parallel processors: ', P - print 'parallel blocks: ', B - print 'max cost of any/all comm: ', C - print 'alpha ratios: ', alpha - print 'estimated cost of sweeps: ', Y - print 'actual cost of sweeps: ', measY - print 'estimated cost of overhead: ', O - print 'actual cost of overhead: ', measO - print 'estimated parallel cost: ', Cp - print 'actual parallel cost: ', measCp - print 'estimated speedup: ', Cs/Cp - print 'actual speedup: ', measCs/measCp + print('') + print('steps: ', N) + print('serial iterations: ', Ks) + print('mean cost of serial sweeps: ', Y0) + print('estimated serial cost: ', Cs) + print('actual serial cost: ', measCs) + print('parallel levels: ', L) + print('parallel iterations: ', Kp) + print('parallel processors: ', P) + print('parallel blocks: ', B) + print('max cost of any/all comm: ', C) + print('alpha ratios: ', alpha) + print('estimated cost of sweeps: ', Y) + print('actual cost of sweeps: ', measY) + print('estimated cost of overhead: ', O) + print('actual cost of overhead: ', measO) + print('estimated parallel cost: ', Cp) + print('actual parallel cost: ', measCp) + print('estimated speedup: ', Cs/Cp) + print('actual speedup: ', measCs/measCp) return Cs / Cp @@ -116,10 +116,10 @@ def echo_speedup(sdname, pdname): ptime = list(ptime.itervalues()) nproc = len(processors) - print "processors: ", nproc - print "serial time: ", stime - print "parallel time: ", max(ptime) - print "speedup (slowest): ", stime/max(ptime) - print "efficiency (slowest): ", stime/max(ptime)/nproc - print "speedup (fastest): ", stime/min(ptime) - print "efficiency (fastest): ", stime/min(ptime)/nproc + print("processors: ", nproc) + print("serial time: ", stime) + print("parallel time: ", max(ptime)) + print("speedup (slowest): ", stime/max(ptime)) + print("efficiency (slowest): ", stime/max(ptime)/nproc) + print("speedup (fastest): ", stime/min(ptime)) + print("efficiency (fastest): ", stime/min(ptime)/nproc) diff --git a/src/pf_mpi.f90 b/src/pf_mpi.f90 index 27cf8f3c..d423c791 100644 --- a/src/pf_mpi.f90 +++ b/src/pf_mpi.f90 @@ -14,25 +14,25 @@ module pf_mod_comm_mpi use pf_mod_mpi, only: MPI_REAL16, MPI_REAL8 implicit none ! For normal double precision - integer, parameter :: myMPI_Datatype=MPI_REAL8 + integer, parameter :: myMPI_Datatype=MPI_REAL8 ! For quadruple precision (see top of pf_dtype.f90) - ! integer, parameter :: myMPI_Datatype=MPI_REAL16 + ! integer, parameter :: myMPI_Datatype=MPI_REAL16 contains !> Subroutine to create an MPI based PFASST communicator using the MPI communicator *mpi_comm*. subroutine pf_mpi_create(pf_comm, mpi_comm) type(pf_comm_t), intent(out) :: pf_comm integer, intent(in) :: mpi_comm - + integer :: ierror - + pf_comm%comm = mpi_comm !! assign communicator - - + + !> assign number of processors call mpi_comm_size(mpi_comm, pf_comm%nproc, ierror) - + !> assign procedure pointers pf_comm%post => pf_mpi_post pf_comm%recv => pf_mpi_recv @@ -45,12 +45,12 @@ end subroutine pf_mpi_create !> Subroutine to set up the PFASST communicator. - !! This should be called soon after adding levels to the PFASST controller + !! This should be called soon after adding levels to the PFASST controller subroutine pf_mpi_setup(pf_comm, pf,ierror) use pf_mod_mpi, only: MPI_REQUEST_NULL use pf_mod_stop, only: pf_stop - type(pf_comm_t), intent(inout) :: pf_comm !! communicator + type(pf_comm_t), intent(inout) :: pf_comm !! communicator type(pf_pfasst_t), intent(inout) :: pf !! main pfasst structure integer, intent(inout) :: ierror !! error flag @@ -61,7 +61,7 @@ subroutine pf_mpi_setup(pf_comm, pf,ierror) allocate(pf_comm%recvreq(pf%nlevels),stat=ierror) if (ierror /=0) call pf_stop(__FILE__,__LINE__,'allocate fail, error=',ierror) allocate(pf_comm%sendreq(pf%nlevels),stat=ierror) - if (ierror /=0) call pf_stop(__FILE__,__LINE__,'allocate fail, error=',ierror) + if (ierror /=0) call pf_stop(__FILE__,__LINE__,'allocate fail, error=',ierror) pf_comm%sendreq = MPI_REQUEST_NULL pf_comm%statreq = MPI_REQUEST_NULL !Tells the first send_status not to wait for previous one to arrive @@ -105,7 +105,7 @@ subroutine pf_mpi_send_status(pf, tag,istatus,ierror, dest) integer :: message(1),isize message = istatus - + if (pf%comm%statreq /= MPI_REQUEST_NULL) then if (pf%debug) print*, 'DEBUG --',pf%rank, 'waiting in send_status with statreq',pf%comm%statreq @@ -135,7 +135,7 @@ subroutine pf_mpi_recv_status(pf, tag,istatus,ierror, source) integer :: message(1) ! Get the message - call mpi_recv(message, 1, MPI_INTEGER4,source, tag, pf%comm%comm, stat, ierror) + call mpi_recv(message, 1, MPI_INTEGER4, source, tag, pf%comm%comm, stat, ierror) istatus=message(1) if (pf%debug) print *,'DEBUG- rank=',pf%rank,'in recv_status, istatus,message', istatus, message end subroutine pf_mpi_recv_status @@ -154,7 +154,7 @@ subroutine pf_mpi_send(pf, level, tag, blocking,ierror, dest) integer :: stat(MPI_STATUS_SIZE) - + if (blocking) then call mpi_ssend(level%send, level%mpibuflen, myMPI_Datatype, & dest, tag, pf%comm%comm, stat, ierror) diff --git a/src/pf_parallel.f90 b/src/pf_parallel.f90 index 6b8da41c..4418fa49 100644 --- a/src/pf_parallel.f90 +++ b/src/pf_parallel.f90 @@ -173,7 +173,7 @@ subroutine pf_predictor(pf, t0, dt, flags) else level_index = 1 c_lev => pf%levels(1) - if(pf%save_residuals) call pf_residual(pf, f_lev%index, dt,0) + call pf_residual(pf, f_lev%index, dt,0) end if !! diff --git a/test/nagumo/.depend b/test/nagumo/.depend index 322035e2..b3306e44 100644 --- a/test/nagumo/.depend +++ b/test/nagumo/.depend @@ -2,6 +2,5 @@ ./build/hooks.o : src/hooks.f90 ./build/solutions.o ./build/probin.o ./build/solutions.o : src/solutions.f90 ./build/probin.o ./build/probin.o : src/probin.f90 -./build/pf_optimization.o : src/pf_optimization.f90 ./build/solutions.o ./build/probin.o ./build/feval.o ./build/level.o -./build/main_split.o : src/main_split.f90 ./build/solutions.o ./build/probin.o ./build/hooks.o ./build/feval.o ./build/level.o ./build/pf_optimization.o -./build/level.o : src/level.f90 ./build/feval.o ./build/solutions.o ./build/probin.o +./build/pf_optimization_flex.o : src/pf_optimization_flex.f90 ./build/solutions.o ./build/probin.o ./build/feval.o +./build/main_split.o : src/main_split.f90 ./build/solutions.o ./build/probin.o ./build/hooks.o ./build/feval.o ./build/pf_optimization_flex.o diff --git a/test/nagumo/Makefile b/test/nagumo/Makefile index 5e659f7f..35a7829e 100644 --- a/test/nagumo/Makefile +++ b/test/nagumo/Makefile @@ -10,11 +10,12 @@ EXE = main_split.exe include $(LIBPFASST)/Makefile.defaults -FSRC = feval.f90 hooks.f90 solutions.f90 probin.f90 pf_optimization.f90 main_split.f90 level.f90 +FSRC = feval.f90 hooks.f90 solutions.f90 probin.f90 pf_optimization_flex.f90 main_split.f90 CSRC = numpy.c OBJ = $(addprefix $(BUILDDIR)/,$(FSRC:.f90=.o) $(CSRC:.c=.o)) -FFLAGS += -I$(LIBPFASST)/include +FFLAGS += -I$(LIBPFASST)/include +# -O -Wall -fcheck=all -g -fbacktrace all: $(EXE) diff --git a/test/nagumo/probin.nml b/test/nagumo/probin.nml index 8995dcfa..3c1565c1 100644 --- a/test/nagumo/probin.nml +++ b/test/nagumo/probin.nml @@ -3,7 +3,7 @@ nlevels = 3 ! These are internal pfasst variables that can be reset - niters = 250 ! default is 5 + niters = 50 ! default is 5 ! Type of quadrature nodes (default is 1=Gauss-Lobatto) qtype = 1 @@ -13,10 +13,10 @@ abs_res_tol = 1e-11 rel_res_tol = 1e-11 - PFASST_pred = .false. - - save_residuals = .false. + PFASST_pred = .true. + save_residuals = .true. + save_errors = .false. / @@ -34,7 +34,7 @@ nvars = 32 64 128 nnodes = 3 5 9 - sizex = 20.0 + Lx = 20.0 dt = 0.1 Tfin = 5 diff --git a/test/nagumo/src/feval.f90 b/test/nagumo/src/feval.f90 index f682bcec..840063ff 100644 --- a/test/nagumo/src/feval.f90 +++ b/test/nagumo/src/feval.f90 @@ -3,26 +3,39 @@ module feval use pf_mod_ndarray_oc use pf_mod_restrict use pf_mod_imexQ_oc -! use pf_mod_misdcQ_oc + use pf_mod_misdcQ_oc + use pf_mod_fftpackage use probin use solutions - use pf_mod_fftpackage - implicit none +! include 'fftw3.f03' + +! real(pfdp), parameter :: pi = 3.141592653589793_pfdp !defined in probin already +! real(pfdp), parameter :: two_pi = 6.2831853071795862_pfdp + + type, extends(pf_user_level_t) :: ad_level_t + contains + procedure :: restrict + procedure :: interpolate + end type ad_level_t - type, extends(pf_imexQ_oc_t) :: ad_sweeper_t !generalize so we can use misdc as well? +type, extends(pf_imexQ_oc_t) :: ad_sweeper_t !generalize so we can use misdc as well? ! type, extends(pf_misdcQ_oc_t) :: ad_sweeper_t !generalize so we can use misdc as well? +! type(c_ptr) :: ffft, ifft type(pf_fft_t), pointer :: fft_tool - complex(pfdp), allocatable :: ddx(:), lap(:) ! operators - real(pfdp), pointer :: u(:,:) - real(pfdp), pointer :: ydesired(:,:) +! complex(pfdp), pointer :: wk(:) ! work space + complex(pfdp), allocatable :: lap(:) ! operators + real(pfdp), pointer :: u(:,:,:) + real(pfdp), pointer :: ydesired(:,:,:) real(pfdp) :: alpha + integer :: nsteps_per_rank, nproc, myrank + real(pfdp), allocatable:: newton_f(:), newton_fprime(:), delta_s(:), delta_sbar(:) contains procedure :: f_eval procedure :: f_comp - procedure :: initialize - procedure :: destroy +! final :: destroy0, destroy1 + end type ad_sweeper_t contains @@ -52,8 +65,14 @@ subroutine f_eval(this, y, t, level_index, f, piece, flags, idx, step) real(pfdp), pointer :: yvec(:), fvec(:), p(:) complex(pfdp), pointer :: wk(:) - integer :: l, loopstart, loopend + integer :: l, loopstart, loopend, thisstep, mystep + thisstep = 1 + if(present(step)) thisstep = step + mystep = (thisstep - 1 - this%myrank)/this%nproc + 1 + +! print *, this%myrank, thisstep, mystep, size(this%u) + select case (piece) case (1) ! Explicit piece select case (flags) @@ -62,25 +81,26 @@ subroutine f_eval(this, y, t, level_index, f, piece, flags, idx, step) fvec => get_array1d_oc(f, 1) yvec => get_array1d_oc(y, 1) if (do_imex .eq. 1) then - fvec = this%u(idx, :) - (1.0_pfdp/3.0_pfdp*yvec*yvec-1.0_pfdp)*yvec + fvec = this%u(mystep, idx, :) - (gamma*1.0_pfdp/3.0_pfdp*yvec*yvec-1.0_pfdp)*yvec else - fvec = this%u(idx, :) + fvec = this%u(mystep, idx, :) endif ! second component: p fvec => get_array1d_oc(f, 2) if (do_imex .eq. 1) then p => get_array1d_oc(y, 2) - fvec = (yvec - this%ydesired(idx,:)) - (yvec*yvec-1.0_pfdp)*p + fvec = (yvec - this%ydesired(mystep, idx,:)) - (gamma*yvec*yvec-1.0_pfdp)*p else - fvec = (yvec - this%ydesired(idx,:)) + fvec = (yvec - this%ydesired(mystep, idx,:)) end if case (1) fvec => get_array1d_oc(f, 1) if (do_imex .eq. 1) then yvec => get_array1d_oc(y, 1) - fvec = this%u(idx, :) - (1.0_pfdp/3.0_pfdp*yvec*yvec-1.0_pfdp)*yvec +! print *, yvec + fvec = this%u(mystep, idx, :) - (gamma*1.0_pfdp/3.0_pfdp*yvec*yvec-1.0_pfdp)*yvec else - fvec = this%u(idx, :) + fvec = this%u(mystep, idx, :) endif case (2) ! evaluate y-y_d @@ -88,9 +108,9 @@ subroutine f_eval(this, y, t, level_index, f, piece, flags, idx, step) yvec => get_array1d_oc(y, 1) if (do_imex .eq. 1) then p => get_array1d_oc(y, 2) - fvec = (yvec -this%ydesired(idx,:)) - (yvec*yvec-1.0_pfdp)*p + fvec = (yvec -this%ydesired(mystep, idx,:)) - (gamma*yvec*yvec-1.0_pfdp)*p else - fvec = (yvec -this%ydesired(idx,:)) + fvec = (yvec -this%ydesired(mystep, idx,:)) end if case default stop "ERROR in f_eval: only 0, 1, 2 allowed as flags" @@ -113,12 +133,17 @@ subroutine f_eval(this, y, t, level_index, f, piece, flags, idx, step) do l = loopstart, loopend yvec => get_array1d_oc(y, l) fvec => get_array1d_oc(f, l) - call this%fft_tool%get_wk_ptr(wk) - wk = yvec - call this%fft_tool%fftf() - wk = nu * this%lap * wk - call this%fft_tool%fftb() - fvec=real(wk) + call this%fft_tool%conv(yvec,this%lap,fvec) + +! wk => this%fft_tool%get_wk_ptr_1d() +! wk = yvec +! ! call fftw_execute_dft(this%ffft, wk, wk) +! ! wk = nu * this%lap * wk / size(wk) +! ! call fftw_execute_dft(this%ifft, wk, wk) +! call this%fft_tool%fftf() +! wk = nu * this%lap * wk +! call this%fft_tool%fftb() +! fvec = real(wk) end do case (3) ! third piece (misdc) @@ -127,20 +152,20 @@ subroutine f_eval(this, y, t, level_index, f, piece, flags, idx, step) ! first component: y fvec => get_array1d_oc(f, 1) yvec => get_array1d_oc(y, 1) - fvec = - (1.0_pfdp/3.0_pfdp*yvec*yvec-1.0_pfdp)*yvec + fvec = - (gamma*1.0_pfdp/3.0_pfdp*yvec*yvec-1.0_pfdp)*yvec ! second component: p fvec => get_array1d_oc(f, 2) p => get_array1d_oc(y, 2) - fvec = - (yvec*yvec-1.0_pfdp)*p + fvec = - (gamma*yvec*yvec-1.0_pfdp)*p case (1) fvec => get_array1d_oc(f, 1) yvec => get_array1d_oc(y, 1) - fvec = - (1.0_pfdp/3.0_pfdp*yvec*yvec-1.0_pfdp)*yvec + fvec = - (gamma*1.0_pfdp/3.0_pfdp*yvec*yvec-1.0_pfdp)*yvec case (2) fvec => get_array1d_oc(f, 2) yvec => get_array1d_oc(y, 1) p => get_array1d_oc(y, 2) - fvec = - (yvec*yvec-1.0_pfdp)*p + fvec = - (gamma*yvec*yvec-1.0_pfdp)*p case default stop "ERROR in f_eval: only 0, 1, 2 allowed as flags" end select @@ -152,6 +177,7 @@ subroutine f_eval(this, y, t, level_index, f, piece, flags, idx, step) end subroutine f_eval + ! Solve for y and return f2 also. subroutine f_comp(this, y, t, dtq, rhs, level_index, f, piece, flags) class(ad_sweeper_t), intent(inout) :: this @@ -164,13 +190,17 @@ subroutine f_comp(this, y, t, dtq, rhs, level_index, f, piece, flags) integer, intent(in ) :: piece integer, intent(in ) :: flags - real(pfdp), pointer :: s(:), rhsvec(:), fvec(:), yold(:), p(:) - complex(pfdp), pointer :: wk(:) + real(pfdp), pointer :: s(:), rhsvec(:), fvec(:), p(:) +! complex(pfdp), pointer :: wk(:) type(pf_fft_t), pointer :: fft - integer :: l, loopstart, loopend + integer :: l, loopstart, loopend, newtoniter + real(pfdp) :: delta_s_norm2, damping + fft => this%fft_tool - call fft%get_wk_ptr(wk) +! wk => fft%get_wk_ptr_1d() + + if (piece == 2) then select case (flags) @@ -191,13 +221,24 @@ subroutine f_comp(this, y, t, dtq, rhs, level_index, f, piece, flags) s => get_array1d_oc(y, l) rhsvec => get_array1d_oc(rhs, l) fvec => get_array1d_oc(f, l) - - wk = rhsvec - call fft%fftf() - wk = wk / (1.0_pfdp - nu*dtq*this%lap) - call fft%fftb() - s = real(wk) + call fft%conv(rhsvec,1.0_pfdp/(1.0_pfdp - dtq*this%lap),s) fvec = (s - rhsvec) / dtq + +! ! wk => this%wk +! wk = rhsvec +! ! call fftw_execute_dft(this%ffft, wk, wk) +! ! wk = wk / (1.0_pfdp - nu*dtq*this%lap) / size(wk) +! ! call fftw_execute_dft(this%ifft, wk, wk) +! call fft%fftf() +! wk = wk / (1.0_pfdp - nu*dtq*this%lap) +! call fft%fftb() +! s = real(wk) +! fvec = (s - rhsvec) / dtq +! ! wk = s +! ! call fftw_execute_dft(this%ffft, wk, wk) +! ! wk = nu * this%lap * wk / size(wk) +! ! call fftw_execute_dft(this%ifft, wk, wk) +! ! fvec = real(wk) end do else if (piece == 3) then select case (flags) @@ -205,35 +246,77 @@ subroutine f_comp(this, y, t, dtq, rhs, level_index, f, piece, flags) fvec => get_array1d_oc(f, 1) rhsvec => get_array1d_oc(rhs, 1) s => get_array1d_oc(y, 1) - allocate(yold(size(s))) - yold=s - s = rhsvec/(1.0_pfdp+dtq*(1.0_pfdp/3.0_pfdp*yold*yold-1.0_pfdp)) - fvec = - (1.0_pfdp/3.0_pfdp*s*s-1.0_pfdp)*s - deallocate(yold) +! allocate(yold(size(s))) +! yold=s + s = rhsvec/(1.0_pfdp+dtq*(gamma*1.0_pfdp/3.0_pfdp*s*s-1.0_pfdp)) + fvec = - (gamma*1.0_pfdp/3.0_pfdp*s*s-1.0_pfdp)*s +! deallocate(yold) fvec => get_array1d_oc(f, 2) p => get_array1d_oc(y, 2) s => get_array1d_oc(y, 1) rhsvec => get_array1d_oc(rhs, 2) - p = rhsvec/(1.0_pfdp + dtq*(s*s-1.0_pfdp)) - fvec = - (s*s-1.0_pfdp)*p - case (1) + p = rhsvec/(1.0_pfdp + dtq*(gamma*s*s-1.0_pfdp)) + fvec = - (gamma*s*s-1.0_pfdp)*p + case (1) ! solving y+dt*y*(1/3*yold*yold-1.0) = rhs fvec => get_array1d_oc(f, 1) rhsvec => get_array1d_oc(rhs, 1) s => get_array1d_oc(y, 1) - allocate(yold(size(s))) - yold=s - s = rhsvec/(1.0_pfdp+dtq*(1.0_pfdp/3.0_pfdp*yold*yold-1.0_pfdp)) - fvec = - (1.0_pfdp/3.0_pfdp*s*s-1.0_pfdp)*s - deallocate(yold) - case (2) + + ! with lagging: avoid nonlinear solve + if(lagging .eqv. .true.) then + s = rhsvec/(1.0_pfdp+dtq*(gamma*1.0_pfdp/3.0_pfdp*s*s-1.0_pfdp)) + else + ! without lagging: use plain Newton to solve, damping with natural monotonicity test + newtoniter = 0 + this%newton_f = gamma*1.0_pfdp/3.0_pfdp*dtq*s*s*s+ (1.0_pfdp-dtq)*s- rhsvec + do +! if (maxval(abs(this%newton_f)) < 1e-6) exit + this%newton_fprime = gamma*dtq*s*s+ (1.0_pfdp-dtq) + if (maxval(abs(this%newton_fprime)) < 1e-12) then + print *, "Newton: derivative is close to zero", & + maxval(abs(this%newton_f)), maxval(this%newton_fprime), minval(this%newton_fprime) + exit + end if + this%delta_s = this%newton_f/this%newton_fprime + delta_s_norm2 = sum(this%delta_s*this%delta_s) + + if (sqrt(delta_s_norm2) < 1e-12 ) exit + damping = 1 + do + s = s - damping * this%delta_s + this%newton_f = gamma*1.0_pfdp/3.0_pfdp*dtq*s*s*s+ (1.0_pfdp-dtq)*s- rhsvec + this%delta_sbar = this%newton_f/this%newton_fprime +! print *, newtoniter, damping, delta_s_norm2, sum(this%delta_sbar*this%delta_sbar) + if ( sum(this%delta_sbar*this%delta_sbar) .le. (1-damping/2.0)*delta_s_norm2 ) exit + s = s + damping * this%delta_s + damping = damping/2 + if(damping < 1e-7) exit + end do + if (damping < 1e-7) then + print *, "Newton: no damping parameter found", & + maxval(abs(this%newton_f)), maxval(this%newton_fprime), minval(this%newton_fprime) + exit + end if + newtoniter = newtoniter + 1 + if (newtoniter > max_newton_iter) then + print *, "Newton did not converge in maximum iterations", & + maxval(abs(this%newton_f)), maxval(this%newton_fprime), minval(this%newton_fprime) + exit + end if +! s = s - this%newton_f/this%newton_fprime + end do + end if + + fvec = - (gamma*1.0_pfdp/3.0_pfdp*s*s-1.0_pfdp)*s + case (2) ! solving p+dt*(y*y-1.0)p=rhs fvec => get_array1d_oc(f, 2) p => get_array1d_oc(y, 2) s => get_array1d_oc(y, 1) rhsvec => get_array1d_oc(rhs, 2) - p = rhsvec/(1.0_pfdp + dtq*(s*s-1.0_pfdp)) - fvec = - (s*s-1.0_pfdp)*p + p = rhsvec/(1.0_pfdp + dtq*(gamma*s*s-1.0_pfdp)) + fvec = - (gamma*s*s-1.0_pfdp)*p case default stop "ERROR in f3eval: only 0, 1, 2 allowed as flags" end select @@ -244,109 +327,128 @@ subroutine f_comp(this, y, t, dtq, rhs, level_index, f, piece, flags) end subroutine f_comp - subroutine sweeper_setup(sweeper, nvars, nnodes) + + subroutine setup(sweeper, nvars, nnodes, nsteps, nproc) class(pf_sweeper_t), intent(inout) :: sweeper - integer, intent(in) :: nvars, nnodes + integer, intent(in) :: nvars, nnodes, nsteps, nproc class(ad_sweeper_t), pointer :: this integer :: i +! type(c_ptr) :: wk real(pfdp) :: kx, dx this => as_ad_sweeper(sweeper) this%use_LUq = .true. + ! create in-place, complex fft plans +! wk = fftw_alloc_complex(int(nvars, c_size_t)) +! call c_f_pointer(wk, this%wk, [nvars]) allocate(this%fft_tool) - call this%fft_tool%fft_setup([nvars],1,[sizex]) + call this%fft_tool%fft_setup([nvars],1,[Lx]) + +! this%ffft = fftw_plan_dft_1d(nvars, & +! this%wk, this%wk, FFTW_FORWARD, FFTW_ESTIMATE) +! this%ifft = fftw_plan_dft_1d(nvars, & +! this%wk, this%wk, FFTW_BACKWARD, FFTW_ESTIMATE) ! create operators - allocate(this%ddx(nvars)) +! allocate(this%ddx(nvars)) allocate(this%lap(nvars)) +! do i = 1, nvars +! if (i <= nvars/2+1) then +! kx = two_pi / Lx * dble(i-1) +! else +! kx = two_pi / Lx * dble(-nvars + i - 1) +! end if +! +! this%ddx(i) = (0.0_pfdp, 1.0_pfdp) * kx +! +! if (kx**2 < 1e-13) then +! this%lap(i) = 0.0_pfdp +! else +! this%lap(i) = -kx**2 +! end if +! end do call this%fft_tool%make_lap(this%lap) - call this%fft_tool%make_deriv(this%ddx) +! call this%fft_tool%make_deriv_1d(this%ddx) + this%nproc = nproc + this%nsteps_per_rank = nsteps + this%myrank = -1 + + if(lagging .eqv. .false.) then + allocate(this%newton_f(nvars)) + allocate(this%newton_fprime(nvars)) + allocate(this%delta_s(nvars)) + allocate(this%delta_sbar(nvars)) + end if + ! allocate control and desired state - allocate(this%u(nnodes,nvars)) - allocate(this%ydesired(nnodes,nvars)) + allocate(this%u(nsteps,nnodes,nvars)) + allocate(this%ydesired(nsteps,nnodes,nvars)) !work%u = 0 !work%ydesired = 0 - end subroutine sweeper_setup + end subroutine setup + + subroutine setrank(sweeper, rank) + class(pf_sweeper_t), intent(inout) :: sweeper + integer, intent(in) :: rank + class(ad_sweeper_t), pointer :: this + this => as_ad_sweeper(sweeper) + this%myrank = rank + end subroutine setrank - subroutine initialize(this, pf,level_index) - class(ad_sweeper_t), intent(inout) :: this - type(pf_pfasst_t), intent(inout),target :: pf - integer, intent(in) :: level_index - - integer :: nx, nnodes - nx=pf%levels(level_index)%lev_shape(1) - nnodes=pf%levels(level_index)%nnodes - ! Call the imex sweeper initialize - call this%imexQ_oc_initialize(pf,level_index) + subroutine destroy(sweeper) + class(pf_sweeper_t), intent(inout) :: sweeper + class(ad_sweeper_t), pointer :: this + this => as_ad_sweeper(sweeper) +! deallocate(this%wk) +! deallocate(this%ddx) + deallocate(this%lap) +! call fftw_destroy_plan(this%ffft) +! call fftw_destroy_plan(this%ifft) +! + call this%fft_tool%fft_destroy() + deallocate(this%fft_tool) - this%use_LUq = .true. - - allocate(this%fft_tool) - call this%fft_tool%fft_setup([nx],1,[sizex]) + if(allocated(this%newton_f)) deallocate(this%newton_f) + if(allocated(this%newton_fprime)) deallocate(this%newton_fprime) + if(allocated(this%delta_s)) deallocate(this%delta_s) + if(allocated(this%delta_sbar)) deallocate(this%delta_sbar) + + deallocate(this%u) + deallocate(this%ydesired) - ! create operators - allocate(this%ddx(nx)) - allocate(this%lap(nx)) - call this%fft_tool%make_lap(this%lap) - call this%fft_tool%make_deriv(this%ddx) +! call this%destroy(lev) - ! allocate control and desired state - allocate(this%u(nnodes,nx)) - allocate(this%ydesired(nnodes,nx)) - !work%u = 0 - !work%ydesired = 0 - end subroutine initialize - + end subroutine destroy - subroutine initialize_oc(s, t0, dt, nodes, nvars, alpha) + subroutine initialize_oc(s, alpha) class(pf_sweeper_t), intent(inout) :: s - real(pfdp), intent(in) :: nodes(:), t0, dt, alpha - integer, intent(in) :: nvars + real(pfdp), intent(in) :: alpha - integer :: nnodes, i + integer :: nnodes, i, nsteps, step class(ad_sweeper_t), pointer :: sweeper sweeper => as_ad_sweeper(s) - - nnodes = size(nodes) - - do i = 1, nnodes - sweeper%u(i,:) = 0.0_pfdp -! call exact_rhs(t0+dt*nodes(i), nvars, work%u(i,:)) -! work%u(i,:) = work%u(i,:)*0.5 + + nsteps = size(sweeper%u,1) + nnodes = size(sweeper%u,2) + do step=1, nsteps + do i=1, nnodes + sweeper%u(step,i,:) = 0.0_pfdp + end do end do sweeper%alpha = alpha end subroutine initialize_oc - subroutine destroy(this,pf,level_index) - class(ad_sweeper_t), intent(inout) :: this - type(pf_pfasst_t), target, intent(inout) :: pf - integer, intent(in) :: level_index - - !> Destroy imex sweeper - call this%imexQ_oc_destroy(pf,level_index) - - !> Destroy local stuff - deallocate(this%ddx) - deallocate(this%lap) - - deallocate(this%u) - deallocate(this%ydesired) - - call this%fft_tool%fft_destroy() - deallocate(this%fft_tool) - - end subroutine destroy - - subroutine fill_ydesired_nagumo(s, pf) + subroutine fill_ydesired_nagumo(s, pf, step) class(pf_sweeper_t), intent(inout) :: s type(pf_pfasst_t), intent(inout) :: pf + integer, intent(in) :: step integer :: nnodes, i character(len=256) :: fname class(ad_sweeper_t), pointer :: sweeper @@ -354,7 +456,12 @@ subroutine fill_ydesired_nagumo(s, pf) nnodes = pf%levels(pf%nlevels)%nnodes do i = 1, nnodes - call pf%levels(pf%nlevels)%Q(i)%pack(sweeper%ydesired(i,:), 1) + call pf%levels(pf%nlevels)%Q(i)%pack(sweeper%ydesired(step,i,:), 1) +! write(fname, "('y_d','r',i0.2,'s',i0.2,'m',i0.2,'.npy')") pf%rank, step, i + write(fname, "('y_d','r',i0.2,'m',i0.2,'.npy')") (step-1)*sweeper%nproc + sweeper%myrank, i + +! call ndarray_dump_numpy(trim(pf%outdir)//c_null_char,trim(fname)//c_null_char, ' as_ad_sweeper(s) + nsteps = size(sweeper%u, 1) nnodes = size(nodes) - - call sweeper%fft_tool%get_wk_ptr(wk) - do m = 1, nnodes - sweeper%u(m,:) = 0.0_pfdp - if ( t0+dt*nodes(m) > 2.5 ) then - wk = solAt25 - call sweeper%fft_tool%fftf() - wk = nu*sweeper%lap*wk - call sweeper%fft_tool%fftb() - sweeper%u(m,:) = (1.0_pfdp/3.0_pfdp*solAt25(:)**3 - solAt25(:) -real(wk)) *0.5 - end if + allocate(wk(size(solAt25))) +! wk => sweeper%wk +! wk => sweeper%fft_tool%get_wk_ptr_1d() + wk = solAt25 +! ! call fftw_execute_dft(sweeper%ffft, wk, wk) +! ! wk = nu * sweeper%lap * wk / size(wk) +! ! call fftw_execute_dft(sweeper%ifft, wk, wk) +! call sweeper%fft_tool%fftf() +! wk = nu*sweeper%lap*wk +! call sweeper%fft_tool%fftb() + call sweeper%fft_tool%conv(wk,nu*sweeper%lap,wk) + + do n=1, nsteps + do m = 1, nnodes + thisstep = (n-1)*sweeper%nproc + sweeper%myrank + sweeper%u(n, m,:) = 0.0_pfdp + if ( t0+thisstep*dt + dt*nodes(m) > 2.5 ) then + sweeper%u(n,m,:) = (gamma*1.0_pfdp/3.0_pfdp*solAt25(:)**3 - solAt25(:) - wk(:)) *0.5 + end if + end do end do + deallocate(wk) end subroutine + subroutine dump_stuff(pf,data, fbase) + type(pf_pfasst_t), intent(inout) :: pf + real(pfdp), intent(in) :: data(:,:,:) + character(len = *), intent(in ) :: fbase + + integer :: nnodes, i, nsteps, n + character(len=256) :: fname + + nsteps = size(data, 1) + nnodes = size(data, 2) + do n = 1, nsteps + do i=1, nnodes + write(fname, "(A,'s',i0.4,'m',i0.2,'.npy')") trim(fbase), n, i!(n-1)*stepper%nproc + stepper%myrank, i + + !call ndarray_dump_numpy(trim(pf%outdir)//c_null_char,trim(fname)//c_null_char, ' as_ad_sweeper(s) + + nsteps = size(sweeper%u, 1) + nnodes = pf%levels(pf%nlevels)%nnodes + do n = 1, nsteps + do i = 1, nnodes + write(fname, "(A,'r',i0.2,'m',i0.2,'.npy')") trim(fbase), (n-1)*sweeper%nproc + sweeper%myrank, i + + !call ndarray_dump_numpy(trim(pf%outdir)//c_null_char,trim(fname)//c_null_char, ' as_ad_sweeper(s) nvars = product(pf%levels(pf%nlevels)%lev_shape) nnodes = size(nodes,1) - + nsteps = size(sweeper%u,1) + + allocate(uexact(nvars)) allocate(udiff(nvars)) allocate(ex(nnodes)) @@ -407,40 +571,54 @@ subroutine dump_exact_control(s, pf, solAt25, t0, dt, nodes, L2errorCtrl, LinfEr LinfErrorCtrl = 0.0_pfdp LinfExactCtrl = 0.0_pfdp + L2errorCtrl = 0.0 + L2exactCtrl = 0.0 - call sweeper%fft_tool%get_wk_ptr(wk) - do i = 1, nnodes -! call exact_rhs(t0+dt*nodes(i), nvars, uexact) - uexact = 0.0_pfdp - if ( t0+dt*nodes(i) > 2.5 ) then - wk = solAt25 - call sweeper%fft_tool%fftf() - wk = nu*sweeper%lap*wk - call sweeper%fft_tool%fftb() - uexact(:) = (1.0_pfdp/3.0_pfdp*solAt25(:)**3 - solAt25(:) -real(wk)) - end if - - udiff(:) = uexact(:) - sweeper%u(i,:) - - ! compute norm - ex(i) = 0.0_pfdp - diff(i) = 0.0_pfdp - do n = 1, nvars - ex(i) = ex(i) + uexact(n)**2.0 - diff(i) = diff(i) + udiff(n)**2.0 +! wk => sweeper%wk +! wk => sweeper%fft_tool%get_wk_ptr_1d() + allocate(wk(size(solAt25))) + wk = solAt25 +! call sweeper%fft_tool%fftf() +! wk = nu*sweeper%lap*wk +! call sweeper%fft_tool%fftb() + call sweeper%fft_tool%conv(wk,nu*sweeper%lap,wk) + + do step = 1, nsteps + do i = 1, nnodes + thisstep = (step-1)*sweeper%nproc + sweeper%myrank + uexact = 0.0_pfdp + if ( t0 + thisstep*dt + dt*nodes(i) > 2.5 ) then + uexact(:) = (1.0_pfdp/3.0_pfdp*solAt25(:)**3 - solAt25(:) - wk(:)) + end if + + write(fname, "('uexact','r',i0.2,'m',i0.2,'.npy')") (step-1)*sweeper%nproc + sweeper%myrank, i +! call ndarray_dump_numpy(trim(pf%outdir)//c_null_char,trim(fname)//c_null_char, ' as_ad_sweeper(s) - nnodes = size(grad,1) - nvars = size(grad,2) + nsteps = size(grad,1) + nnodes = size(grad,2) + nvars = size(grad,3) allocate(obj(nnodes)) LinftyNormGrad = 0 L2NormGradSq = 0 - do m = 1, nnodes - grad(m,:) = grad(m,:) + sweeper%alpha * sweeper%u(m,:) - LinftyNormGrad = max(maxval(abs(grad(m,:))), LinftyNormGrad) - !obj(m) = sum((grad(m,:)**2.0))*sizex/nvars - obj(m) = 0.0_pfdp - do n = 1, nvars - obj(m) = obj(m) + grad(m,n)**2.0 - end do - obj(m) = obj(m)*sizex/(nvars) - end do + do step = 1, nsteps + do m = 1, nnodes + grad(step,m,:) = grad(step,m,:) + sweeper%alpha * sweeper%u(step,m,:) + LinftyNormGrad = max(maxval(abs(grad(step,m,:))), LinftyNormGrad) + !obj(m) = sum((grad(m,:)**2.0))*Lx/nvars + obj(m) = 0.0_pfdp + do n = 1, nvars + obj(m) = obj(m) + grad(step,m,n)**2.0 + end do + obj(m) = obj(m)*Lx/(nvars) + end do - L2NormGradSq = 0.0 - do m=1, nnodes-1 - L2normGradSq = L2normGradSq + (obj(m)+obj(m+1))*(nodes(m+1)-nodes(m))*dt + do m=1, nnodes-1 + L2normGradSq = L2normGradSq + (obj(m)+obj(m+1))*(nodes(m+1)-nodes(m))*dt + end do end do - + L2NormGradSq = 0.5*L2NormGradSq !0.5 for trapezoidal rule deallocate(obj) end subroutine construct_gradient - subroutine update_control(s, delta, step) + subroutine update_control(s, delta, stepsize) class(pf_sweeper_t), intent(inout) :: s - real(pfdp), intent(in) :: delta(:,:), step + real(pfdp), intent(in) :: delta(:,:,:), stepsize - integer :: m, nnodes + integer :: m, nnodes, step, nsteps class(ad_sweeper_t), pointer :: sweeper sweeper => as_ad_sweeper(s) - nnodes = size(delta,1) - - do m = 1, nnodes - sweeper%u(m,:) = sweeper%u(m,:) + step * delta(m,:) + nnodes = size(delta,2) + nsteps = size(delta,1) + + do step = 1, nsteps + do m = 1, nnodes + sweeper%u(step,m,:) = sweeper%u(step,m,:) + stepsize * delta(step,m,:) + end do end do end subroutine update_control @@ -517,29 +701,30 @@ subroutine control_L2Q(s, dt, nodes, nvars, L2normSq) real(pfdp), intent(out) :: L2normSq real(pfdp), pointer :: obj(:) - integer :: m, n, nnodes + integer :: m, n, nnodes, step, nsteps class(ad_sweeper_t), pointer :: sweeper; sweeper => as_ad_sweeper(s) + nsteps = size(sweeper%u, 1) nnodes = size(nodes) allocate(obj(nnodes)) - do m=1, nnodes - !print *, 'sum u/nvars = ', sum(work%u(m,:))*sizex/nvars, 'sum u2/nvars = ', sum(work%u(m,:)**2.0)*sizex/nvars, & - ! maxval(work%u(m,:)), minval(work%u(m,:)) - obj(m) = 0.0_pfdp - !obj(m) = sum(work%u(m,:)**2.0)*sizex/nvars !rectangle rule - do n = 1, nvars - obj(m) = obj(m) + sweeper%u(m,n)**2.0 - end do - obj(m) = obj(m)*sizex/(nvars) - end do - L2normSq = 0.0 - do m=1, nnodes-1 - L2normSq = L2normSq + (obj(m)+obj(m+1))*(nodes(m+1)-nodes(m))*dt + + do step=1, nsteps + do m=1, nnodes + obj(m) = 0.0_pfdp + do n = 1, nvars + obj(m) = obj(m) + sweeper%u(step,m,n)**2.0 + end do + obj(m) = obj(m)*Lx/(nvars) + end do + + do m=1, nnodes-1 + L2normSq = L2normSq + (obj(m)+obj(m+1))*(nodes(m+1)-nodes(m))*dt + end do end do L2normSq = 0.5*L2normSq !0.5 for trapezoidal rule @@ -548,11 +733,11 @@ subroutine control_L2Q(s, dt, nodes, nvars, L2normSq) end subroutine control_L2Q - subroutine objective_function(s, sol, nvars, m, objective) + subroutine objective_function(s, sol, nvars, m, objective, step) ! actually just the tracking part of the objective class(pf_sweeper_t), intent(inout) :: s class(pf_encap_t), intent(in ) :: sol - integer, intent(in) :: nvars, m + integer, intent(in) :: nvars, m, step real(pfdp), intent(out) :: objective real(pfdp), pointer :: y(:), f(:) !, obj(:) @@ -565,12 +750,12 @@ subroutine objective_function(s, sol, nvars, m, objective) allocate(f(nvars)) y => get_array1d_oc(sol, 1) - f = (y -sweeper%ydesired(m,:)) + f = (y -sweeper%ydesired(step,m,:)) objective = 0.0_pfdp do n = 1, nvars objective = objective + f(n)**2 end do - objective = objective * sizex / (nvars) + objective = objective * Lx / (nvars) deallocate(f) end subroutine objective_function @@ -578,27 +763,237 @@ end subroutine objective_function ! this shouldbe in a separate module function compute_scalar_prod(f, g, nodes, dt) result(r) - real(pfdp), intent(in) :: f(:,:), g(:,:), nodes(:), dt + real(pfdp), intent(in) :: f(:,:,:), g(:,:,:), nodes(:), dt real(pfdp) :: r real(pfdp), pointer :: obj(:) - integer :: nnodes, nvars, m, i + integer :: nnodes, nvars, m, i, step, nsteps r = 0.0_pfdp - nnodes = size(f,1) - nvars = size(f,2) + nsteps = size(f,1) + nnodes = size(f,2) + nvars = size(f,3) allocate(obj(nnodes)) - do m = 1, nnodes - obj(m) = 0.0_pfdp - do i = 1, nvars - obj(m) = obj(m) + f(m,i)*g(m,i) + do step = 1, nsteps + do m = 1, nnodes + obj(m) = 0.0_pfdp + do i = 1, nvars + obj(m) = obj(m) + f(step,m,i)*g(step,m,i) + end do + obj(m) = obj(m)*Lx/(nvars) + end do + do m=1, nnodes-1 + r = r + (obj(m)+obj(m+1))*(nodes(m+1)-nodes(m))*dt end do - obj(m) = obj(m)*sizex/(nvars) - end do - do m=1, nnodes-1 - r = r + (obj(m)+obj(m+1))*(nodes(m+1)-nodes(m))*dt end do r = r * 0.5 deallocate(obj) end function compute_scalar_prod + subroutine restrict_control(sG, sF) + class(pf_sweeper_t), intent(inout) :: sG, sF + + real(pfdp), pointer :: uF(:,:,:), uG(:,:,:) + integer :: nvarF, nvarG, xrat, nnodesF, nnodesG, trat, m, nsteps, step + + class(ad_sweeper_t), pointer :: sweeperF, sweeperG + sweeperF => as_ad_sweeper(sF) + sweeperG => as_ad_sweeper(sG) + + uF => sweeperF%u + uG => sweeperG%u + + nsteps = size(uF,1) + nnodesF = size(uF,2) + nnodesG = size(uG,2) + nvarF = size(uF,3) + nvarG = size(uG,3) + + xrat = nvarF / nvarG + trat = ceiling(real(nnodesF) / real(nnodesG)) +! print *, 'restrict u', xrat, trat + + do step=1,nsteps + uG(step,:,:) = uF(step,::trat,::xrat) + end do + end subroutine restrict_control + + + subroutine restrict_ydesired(sG, sF) + class(pf_sweeper_t), intent(inout) :: sG, sF + + real(pfdp), pointer :: ydesiredF(:,:,:), ydesiredG(:,:,:) + integer :: nvarF, nvarG, xrat, nnodesF, nnodesG, trat, m, nsteps, step + + class(ad_sweeper_t), pointer :: sweeperF, sweeperG + sweeperF => as_ad_sweeper(sF) + sweeperG => as_ad_sweeper(sG) + + ydesiredF => sweeperF%ydesired + ydesiredG => sweeperG%ydesired + + nsteps = size(ydesiredF, 1) + nnodesF = size(ydesiredF,2) + nnodesG = size(ydesiredG,2) + nvarF = size(ydesiredF,3) + nvarG = size(ydesiredG,3) + + xrat = nvarF / nvarG + trat = ceiling(real(nnodesF) / real(nnodesG)) +! print *, 'restrict ydesired', xrat, trat + + do step=1,nsteps + ydesiredG(step,:,:) = ydesiredF(step,::trat,::xrat) + end do + end subroutine restrict_ydesired + + + subroutine restrict_for_adjoint(pf, t0, dt, which, step) + type(pf_pfasst_t), intent(inout) :: pf + real(pfdp), intent(in) :: t0, dt + integer, intent(in) :: which, step + real(pfdp), pointer :: tF(:), tG(:) !zF(:), zG(:) + integer :: l, m !, nnodesF, nnodesG, nvarsF, nvarsG + +! call pf%levels(pf%nlevels)%ulevel%sweeper%evaluate_all(pf%levels(pf%nlevels), t0+dt*pf%levels(pf%nlevels)%nodes, which, step) + do l = pf%nlevels, 2, -1 +! allocate(tF(pf%levels(l)%nnodes)) +! allocate(tG(pf%levels(l-1)%nnodes)) +! tF = pf%state%t0 + pf%state%dt*pf%levels(l)%nodes +! tG = pf%state%t0 + pf%state%dt*pf%levels(l-1)%nodes + call restrict_ts(pf%levels(l), pf%levels(l-1), pf%levels(l)%Q, pf%levels(l-1)%Q, t0+dt*pf%levels(l)%nodes, which) !tF +! call pf%levels(l-1)%ulevel%sweeper%evaluate_all(pf%levels(l-1), t0+dt*pf%levels(l-1)%nodes, which, step) +! deallocate(tF) +! deallocate(tG) + end do + end subroutine restrict_for_adjoint + + subroutine restrict_and_evaluate(pf, t0, dt, which, step) + type(pf_pfasst_t), intent(inout) :: pf + real(pfdp), intent(in) :: t0, dt + integer, intent(in) :: which, step + real(pfdp), pointer :: tF(:), tG(:) !zF(:), zG(:) + integer :: l, m !, nnodesF, nnodesG, nvarsF, nvarsG + +! call pf%levels(pf%nlevels)%ulevel%sweeper%evaluate_all(pf%levels(pf%nlevels), t0+dt*pf%levels(pf%nlevels)%nodes, which, step) + do l = pf%nlevels, 2, -1 +! allocate(tF(pf%levels(l)%nnodes)) +! allocate(tG(pf%levels(l-1)%nnodes)) +! tF = pf%state%t0 + pf%state%dt*pf%levels(l)%nodes +! tG = pf%state%t0 + pf%state%dt*pf%levels(l-1)%nodes +! call restrict_sdc(pf%levels(l), pf%levels(l-1), pf%levels(l)%Q, pf%levels(l-1)%Q, .false., t0+dt*pf%levels(l)%nodes, which) !tF +! call pf%levels(l-1)%ulevel%sweeper%evaluate_all(pf%levels(l-1), t0+dt*pf%levels(l-1)%nodes, which, step) + call pf_residual(pf, l, dt, which) + call restrict_time_space_fas(pf, t0, dt, l, flags=which, mystep=step) +! call save(pf%levels(l), which) +! deallocate(tF) +! deallocate(tG) + end do + end subroutine restrict_and_evaluate + + + subroutine interp1(qF, qC, adF, adC) + class(ad_sweeper_t), pointer :: adF, adC + real(pfdp), pointer, intent(inout) :: qF(:), qC(:) + + class(ad_sweeper_t), pointer :: sweeperF, sweeperC +! complex(pfdp), pointer :: wkF(:), wkC(:) + type(pf_fft_t), pointer :: fftF,fftC + + integer :: NxF, NxC, xrat,i + + sweeperC => as_ad_sweeper(adC) + sweeperF => as_ad_sweeper(adF) + fftC => sweeperC%fft_tool + fftF => sweeperF%fft_tool + + NxF = size(qF) + NxC = size(qC) + xrat = NxF / NxC + + if (xrat == 1) then + qF = qC + return + endif + + call fftC%interp(qC,fftF,qF) + +! wkF => fftF%get_wk_ptr_1d() +! wkC => fftC%get_wk_ptr_1d() +! +! wkC = qC +! call fftC%fftf() +! +! wkF = 0.0d0 +! wkF(1:NxC/2) = wkC(1:NxC/2) +! wkF(NxF-NxC/2+2:NxF) = wkC(NxC/2+2:NxC) +! ! wkF = wkF*2.0_pfdp +! +! call fftF%fftb() +! qF = real(wkF) + end subroutine interp1 + + + subroutine interpolate(this, f_lev, c_lev, f_vec, c_vec, t, flags) + class(ad_level_t), intent(inout) :: this + class(pf_level_t), intent(inout) :: f_lev + class(pf_level_t), intent(inout) :: c_lev + class(pf_encap_t), intent(inout) :: f_vec, c_vec + real(pfdp), intent(in ) :: t + integer, intent(in), optional :: flags + + integer :: which + class(ad_sweeper_t), pointer :: adF, adG + real(pfdp), pointer :: f(:), g(:) + + which = 0 + if(present(flags)) which = flags + + adG => as_ad_sweeper(c_lev%ulevel%sweeper) + adF => as_ad_sweeper(f_lev%ulevel%sweeper) + + if ((which .eq. 0) .or. (which .eq. 1)) then + f => get_array1d_oc(f_vec,1) + g => get_array1d_oc(c_vec,1) + call interp1(f, g, adF, adG) + end if + if ((which .eq. 0) .or. (which .eq. 2)) then + f => get_array1d_oc(f_vec,2) + g => get_array1d_oc(c_vec,2) + call interp1(f, g, adF, adG) + end if + end subroutine interpolate + + + subroutine restrict(this, f_lev, c_lev, f_vec, c_vec, t, flags) + class(ad_level_t), intent(inout) :: this + class(pf_level_t), intent(inout) :: f_lev + class(pf_level_t), intent(inout) :: c_lev + class(pf_encap_t), intent(inout) :: f_vec, c_vec + real(pfdp), intent(in ) :: t + integer, intent(in), optional :: flags + + real(pfdp), pointer :: f(:), g(:) + + integer :: nvarF, nvarG, xrat, which + which = 0 + if(present(flags)) which = flags + + if ((which .eq. 0) .or. (which .eq. 1)) then + f => get_array1d_oc(f_vec,1) + g => get_array1d_oc(c_vec,1) + nvarF = size(f) + nvarG = size(g) + xrat = nvarF / nvarG + g = f(::xrat) + end if + if ((which .eq. 0) .or. (which .eq. 2)) then + f => get_array1d_oc(f_vec,2) + g => get_array1d_oc(c_vec,2) + nvarF = size(f) + nvarG = size(g) + xrat = nvarF / nvarG + g = f(::xrat) + end if + end subroutine restrict + end module feval diff --git a/test/nagumo/src/hooks.f90 b/test/nagumo/src/hooks.f90 index b9e22be5..4264ee82 100644 --- a/test/nagumo/src/hooks.f90 +++ b/test/nagumo/src/hooks.f90 @@ -13,7 +13,10 @@ subroutine echo_error_hook(pf, level_index) use pf_mod_utils use solutions type(pf_pfasst_t), intent(inout) :: pf - integer, intent(in ) :: level_index + integer, intent(in) :: level_index + +! class(pf_level_t), intent(inout) :: level +! type(pf_state_t), intent(in) :: state ! real(c_double) :: yexact(product(level%shape)), pexact(product(level%shape)) real(pfdp), pointer :: qend(:), q0(:) @@ -29,11 +32,11 @@ subroutine echo_error_hook(pf, level_index) qend => get_array1d_oc(pf%levels(level_index)%Q(pf%levels(level_index)%nnodes),1) q0 => get_array1d_oc(pf%levels(level_index)%Q(1),2) t = pf%state%t0+pf%state%dt -! call exact_y(t, pf%levels(level_index)%nvars, yexact) +! call exact_y(t, level%nvars, yexact) max_y=maxval(abs(qend)) ! err_y = maxval(abs(qend-yexact)) -! call exact_p(state%t0, pf%levels(level_index)%nvars, pexact) +! call exact_p(state%t0, level%nvars, pexact) max_p = maxval(abs(q0)) ! err_p = maxval(abs(q0-pexact)) @@ -43,28 +46,19 @@ subroutine echo_error_hook(pf, level_index) ynorms(m) = pf%levels(level_index)%R(m)%norm(1) end do - !call pf_residual(pf, pf%levels(level_index), state%dt, 0) -! res= pf%levels(level_index)%residual -! print '(" rank:",i5," lev:",i5," step:",i5," t=",es10.3," t0=",es10.3," iter:",i3," Max_y:",es13.6, & -! &" Max_p:",es13.6," yres:",es13.6," pres:",es13.6," abs res:",es13.6," rel res:",es13.6)', & -! pf%rank, level%index, state%step+1, t, state%t0, state%iter, max_y, max_p, & -! maxval(abs(ynorms)), maxval(abs(pnorms)), res, pf%levels(level_index)%residual_rel res = min(pf%levels(level_index)%residual, pf%levels(level_index)%residual_rel) -! print '(" rank:",i5," lev:",i5," step:",i5," iter:",i3, & -! &" Res:",es13.6)', & -! pf%rank, pf%levels(level_index)%index, state%step+1, state%iter, & -! res - print '("rank:",i4.3," step: ",i3.3," iter: ",i4.3," level: ",i2.2," res: ",es18.10)', & - pf%rank, pf%state%step+1, pf%state%iter,level_index, res - call flush - -! un = 1000+pf%state%step+1 -! write(stepstring,"(I0.3)") pf%state%step+1 + !res = pf%levels(level_index)%residual + print '(" rank:",i5," lev:",i5," step:",i5," iter:",i3," res:",es13.6)', & + pf%rank, level_index, pf%state%step+1, pf%state%iter, res + + +! un = 1000+state%step+1 +! write(stepstring,"(I0.3)") state%step+1 ! write(Vstring,"(I0.2)") N_Vcycles ! write(Kstring,"(I0.3)") kfreq ! fout = trim(foutbase)//'N_V'//trim(Vstring)//'N_step'//trim(stepstring)//'K'//trim(Kstring)//'.m' ! open(unit=un, file = fout, status = 'unknown', action = 'write', position='append') -! write(un,*) level%level,pf%state%step+1,t,pf%state%iter,max_y, err_y, max_p, err_p, res +! write(un,*) level%level,state%step+1,t,state%iter,max_y, err_y, max_p, err_p, res ! close(un) end subroutine echo_error_hook @@ -74,7 +68,7 @@ subroutine echo_residual_hook(pf, level_index) use iso_c_binding use pf_mod_utils type(pf_pfasst_t), intent(inout) :: pf - integer, intent(in ) :: level_index + integer, intent(in) :: level_index real(pfdp), pointer :: ry(:), rp(:) diff --git a/test/nagumo/src/main_split.f90 b/test/nagumo/src/main_split.f90 index f0f47f7f..4c015fe1 100644 --- a/test/nagumo/src/main_split.f90 +++ b/test/nagumo/src/main_split.f90 @@ -4,8 +4,7 @@ program main use pf_mod_ndarray_oc use pf_mod_optimization - use pf_my_level - use feval + use feval use hooks use probin use solutions @@ -27,20 +26,26 @@ program main character(len = 64) :: shell_cmd integer :: iout,nout - real(pfdp), pointer :: gradient(:,:), prevGrad(:,:), searchDir(:,:), prevSearchDir(:,:), savedAdjoint(:,:), solAt25(:) + real(pfdp), pointer :: gradient(:,:,:), prevGrad(:,:,:), searchDir(:,:,:), prevSearchDir(:,:,:), & + savedStates(:,:,:), savedAdjoint(:,:,:), solAt25(:) real(pfdp) :: LinftyNormGrad, LinftyNormU, objective, objectiveNew, L2NormUSq, L2NormGradSq, & dx, stepSize, prevStepSize, directionTimesGradient, beta, & globObj, globObjNew, globDirXGrad, prevGlobDirXGrad, globL2NormGradSq, & num, denom, globNum, globDenom, globLinftyNormGrad, & L2errorCtrl, LinfErrorCtrl, globL2errorCtrl, globLinfErrorCtrl, & L2exactCtrl, LinfEXactCtrl, globL2exactCtrl, globLinfExactCtrl, & - abs_res_tol_save, rel_res_tol_save - logical :: stepTooSmall, predict + abs_res_tol_save, rel_res_tol_save, gamma_save, & + initialGradNorm + logical :: stepTooSmall, predict, retry integer :: itersState, itersAdjoint, root, sumItersState, sumItersAdjoint character(8) :: date character(10) :: time integer :: time_start(8), time_end(8) real(pfdp) :: time_start_sec, time_end_sec + real(pfdp), pointer :: tmp(:,:), tmpnodes(:), tmprmat(:,:) + integer :: stat(MPI_STATUS_SIZE) + integer :: nsteps_per_rank, nstepsTo25, step + ! @@ -68,7 +73,16 @@ program main call pf_mpi_create(comm, MPI_COMM_WORLD) call pf_pfasst_create(pf, comm, fname=pfasst_nml) + nsteps_per_rank = nsteps / comm%nproc + + pf%state%nsteps = nsteps + + pf%save_timings = 0 + do l = 1, pf%nlevels +! pf%levels(l)%nsweeps = nsweeps(l) +! pf%levels(l)%nsweeps_pred = nsweeps_pred(l) + pf%levels(l)%nnodes = nnodes(l) ! Allocate the user specific level object @@ -77,17 +91,28 @@ program main ! Add the sweeper to the level allocate(ad_sweeper_t::pf%levels(l)%ulevel%sweeper) - - ! Set the size of the data on this level + call pf_level_set_size(pf,l,[nvars(l)]) - end do + + call setup(pf%levels(l)%ulevel%sweeper, nvars(l), nnodes(l), nsteps_per_rank, comm%nproc) + + ! Allocate the shape array for level (here just one dimension) +! allocate(pf%levels(l)%shape(1)) +! pf%levels(l)%shape(1) = nvars(l) + ! Set the size of the send/receive buffer +! pf%levels(l)%mpibuflen = nvars(l) + end do call pf_pfasst_setup(pf) - - pf%outdir = output + + pf%state%finest_level = pf%nlevels + pf%debug = .false. + do l = 1, pf%nlevels + call setrank(pf%levels(l)%ulevel%sweeper, pf%rank) + end do ! ! ! run @@ -96,155 +121,245 @@ program main ! call pf_add_hook(pf, 3, PF_PRE_PREDICTOR, echo_error_hook) ! call pf_add_hook(pf, 3, PF_POST_PREDICTOR, echo_error_hook) ! call pf_add_hook(pf,3,PF_POST_ITERATION,echo_error_hook) - call pf_add_hook(pf,pf%nlevels,PF_POST_BLOCK,echo_error_hook) -! call pf_add_hook(pf,-1,PF_POST_SWEEP,echo_error_hook) + call pf_add_hook(pf,pf%nlevels,PF_POST_BLOCK,echo_error_hook) !STEP +! call pf_add_hook(pf,-1,PF_POST_SWEEP,echo_residual_hook) ! call pf_add_hook(pf,-1,PF_POST_ITERATION,echo_residual_hook) + if(do_imex /= 1 .and. lagging .eqv. .false.) & + write(output, "(A,'_nl')") trim(output) + + write(output, "(A,'_gamma',F0.2)") trim(output), gamma + if(pf%rank == 0) print *, 'output directory for npy: ', output + pf%outdir = output + + if (len_trim(output) > 0) then + ! Make directory for Data if it does not exist +! call ndarray_mkdir(output, len_trim(output)) +! call pf_add_hook(pf, pf%nlevels, PF_POST_STEP, ndarray_oc_dump_all_hook) +! call pf_add_hook(pf, pf%nlevels, PF_POST_STEP, ndarray_oc_dump_hook) + end if if (nsteps < comm%nproc) then nsteps = comm%nproc end if + nsteps_per_rank = nsteps / pf%comm%nproc + if(nsteps_per_rank*pf%comm%nproc /= nsteps) stop "ERROR: nsteps not an integer multiple of nproc" + if(pf%rank == 0) print *, "computing ", nsteps_per_rank, "steps per CPU" + + call initialize_results(pf) + + !call system('if [ ! -e ./Dat ]; then mkdir Dat; fi') + !shell_cmd = 'if [ ! -e ./Dat/'//trim(fbase)//' ]; then mkdir Dat/'//trim(fbase)//'; fi' + !call system(shell_cmd) + ! open output files + write (fname, "(A,I0.2,A3,I0.3,A6,I0.3,A6,I0.3)") 'Niter',pf%niters,'_Nx',nvars(pf%nlevels),'_Nstep',nsteps,'_Nproc',comm%nproc + foutbase = 'Dat/'//trim(fbase)//'/'//trim(fname) +! print *,'foutbase=',foutbase +! logfilename = trim(logfile) +! if (warmstart .eq. 1) then +! predict = .false. +! else +! predict = .true. +! endif if (warmstart .eq. 1) then predict = .false. - else - predict = .true. - endif + logfile = trim(logfile)//'_warm' + if(restart_interval < max_opt_iter) then + write (logfilename, "(A,'_rest',I0.4)") trim(logfile), restart_interval + logfile = logfilename + end if + if(compress) then + write (logfilename, "(A,'_compress',I0.12)") trim(logfile), N_digits + logfile = logfilename + write (logfilename, "(A,'_storage',I0.12)") trim(logfile), storage_level + logfile = logfilename + if(inexact_adjoint) then +logfile = trim(logfile)//'_inexAdj' +end if +end if + +else +predict = .true. +logfile = trim(logfile)//'_cold' +endif +if(adapt_res_tol) then +logfile = trim(logfile)//'_adapt' +end if + +write(logfilename, "(A,'_tol',i0.3,'_optiter',i0.4,'_Nstep',i0.3,'_Nproc',i0.3,'.log')") trim(logfile), test_no, max_opt_iter, nsteps, comm%nproc + + +! open(unit=101, file = foutbase, status = 'unknown', action = 'write') ! Output the run parameters if (pf%rank == 0) then call pf_print_options(pf, 6,.TRUE.) - nout = 6 +! fout = 'Dat/'//trim(fbase)//'/'//trim(fname)//'_params.m' +! print*, fout +! open(unit=103, file = fout, status = 'unknown', action = 'write') + do iout=2,2 !1,2 + if (iout .eq. 1) then + nout = 103 + else + nout = 6 + endif - write(nout,*) 'Nproc=',comm%nproc - write(nout,*) 'Ndim=',ndim - write(nout,*) 'Nnodes=',nnodes(1:pf%nlevels) - write(nout,*) 'Nvars=',nvars(1:pf%nlevels) - write(nout,*) 'Finterp=',Finterp - write(nout,*) 'nprob=',nprob - write(nout,*) 'nsteps=',nsteps - write(nout,*) 'dt=',dt - write(nout,*) 'nu=',nu - write(nout,*) 'v=',v - write(nout,*) 'kfreq=',kfreq - write(nout,*) 'do_spec',do_spec - write(nout,*) 'fbase=',fbase - write(nout,*) 'sizex=',sizex - if (do_imex .eq. 1) then - write(nout,*) 'use IMEX sweeper' - else - write(nout,*) 'use MISDC sweeper' - endif - if (warmstart .eq. 1) then - write(nout,*) 'warm start with previous state solution' - else - write(nout,*) 'cold start with spread initial value' - endif + write(nout,*) 'Nproc=',comm%nproc + write(nout,*) 'Ndim=',ndim + write(nout,*) 'Nnodes=',nnodes(1:pf%nlevels) + write(nout,*) 'Nvars=',nvars(1:pf%nlevels) + write(nout,*) 'Finterp=',Finterp + write(nout,*) 'nprob=',nprob + write(nout,*) 'nsteps=',nsteps + write(nout,*) 'nsteps_per_rank=',nsteps_per_rank + write(nout,*) 'dt=',dt + write(nout,*) 'nu=',nu + write(nout,*) 'v=',v + write(nout,*) 'kfreq=',kfreq + write(nout,*) 'do_spec',do_spec + write(nout,*) 'fbase=',fbase + if (do_spec .eq. 0) then + write(nout,*) 'N_Vcycles=',N_Vcycles + write(nout,*) 'Nrelax',Nrelax + write(nout,*) 'mg_interp_order=',mg_interp_order + endif + write(nout,*) 'gamma=',gamma + write(nout,*) 'logfile=',logfilename + if (do_imex .eq. 1) then + write(nout,*) 'use IMEX sweeper' + else + if( lagging .eqv. .false.) then + write(nout,*) 'use MISDC sweeper (nonlinear)' + else + write(nout,*) 'use MISDC sweeper (w/ lagging)' + endif + endif + if (warmstart .eq. 1) then + write(nout,*) 'warm start with previous state solution' + else + write(nout,*) 'cold start with spread initial value' + endif + if(adapt_res_tol) then + write(nout, *) 'adapt residual tolerance, (grad_decrease, factor)=', res_tol_graddec, res_tol_factor + end if + write(nout,*) 'regularization: alpha=', alpha + write(nout,*) 'maximum number of optimization iterations=', max_opt_iter + write(nout,*) 'optimization tolerances (gradient, objective)=', tol_grad, tol_obj + end do end if + !alpha = 1e-6 !0.0_pfdp set in probin call ndarray_oc_build(q1, pf%levels(pf%nlevels)%lev_shape) do l = 1, pf%nlevels - call initialize_oc(pf%levels(l)%ulevel%sweeper, pf%rank*dt, dt, pf%levels(l)%nodes, nvars(l), alpha) + call initialize_oc(pf%levels(l)%ulevel%sweeper, alpha) end do ! for Nagumo model: solve state equation with zero right hand side to determine natural ! solution used in objective function ! solve up to high accuracy - abs_res_tol_save = abs_res_tol - rel_res_tol_save = rel_res_tol - abs_res_tol = 1e-11 - rel_res_tol = 1e-11 + gamma_save = gamma +! gamma = 1.0_pfdp + abs_res_tol_save = pf%abs_res_tol + rel_res_tol_save = pf%rel_res_tol + if(pf%rank == 0) & + print *, abs_res_tol_save, rel_res_tol_save, gamma_save - pf%q0_style = 0 - - ! set Q, F to zero - do l = 1, pf%nlevels-1 - do m = 1, pf%levels(l)%nnodes - call pf%levels(l)%Q(m)%setval(0.0_pfdp, 0) - call pf%levels(l)%pQ(m)%setval(0.0_pfdp, 0) ! only on coarser levels, also tauQ - if(m < pf%levels(l)%nnodes) then - call pf%levels(l)%tauQ(m)%setval(0.0_pfdp, 0) - call pf%levels(l)%I(m)%setval(0.0_pfdp, 0) - end if - do p = 1,size(pf%levels(l)%F(1,:)) - call pf%levels(l)%F(m,p)%setval(0.0_pfdp, 0) - end do - end do - end do - do m = 1, pf%levels(pf%nlevels)%nnodes - call pf%levels(pf%nlevels)%Q(m)%setval(0.0_pfdp, 0) - do p = 1,size(pf%levels(pf%nlevels)%F(1,:)) - call pf%levels(pf%nlevels)%F(m,p)%setval(0.0_pfdp, 0) - end do - end do +! print *, n_digits, 1.23456789, floor(1.23456789*N_digits)/dble(N_digits) + + pf%abs_res_tol = 1e-11 + pf%rel_res_tol = 1e-11 + + pf%q0_style = 0 if (pf%rank .eq. 0) then - call initial(q1, pf%rank*dt, pf%rank*dt+dt) !dt should be given by rank !initial_rd + call initial(q1, 0.0_pfdp, dt) else q1%yflatarray = 0.0_pfdp end if call pf%levels(pf%nlevels)%q0%copy(q1, 1) if (pf%rank .eq. 0) print *, ' **** solve state with zero control ***' - pf%state%pfblock=1 - call pf_pfasst_block_oc(pf, dt, nsteps, .true., 1) - ! solution at t=2.5 has to be send to all later ranks - allocate(solAt25(nvars(pf%nlevels))) - if ( abs(pf%rank+1)*dt - 2.5 .le. 1e-6 ) then - ! here for simplicity broadcast, better: create subgroup - call pf%levels(pf%nlevels)%Q(pf%levels(pf%nlevels)%nnodes)%pack(solAt25, 1) - !root = pf%rank - end if - root = floor(2.5/dt) - 1 - call mpi_bcast(solAt25, nvars(pf%nlevels), MPI_REAL8, root, pf%comm%comm, ierror) - if (pf%rank > root) then - do m = 1, pf%levels(pf%nlevels)%nnodes - call pf%levels(pf%nlevels)%Q(m)%unpack(solAt25, 1) - !call pf%levels(pf%nlevels)%encap%setval(pf%levels(pf%nlevels)%Q(m), 0.0_pfdp, 1) - end do - end if - ! now every rank has values for the objective in Q(m) -> create y_desired with this - call fill_ydesired_nagumo(pf%levels(pf%nlevels)%ulevel%sweeper, pf) - call initialize_control(pf%levels(pf%nlevels)%ulevel%sweeper, solAt25, pf%rank*dt, dt, pf%levels(pf%nlevels)%nodes) - do l = pf%nlevels-1,1,-1 - call restrict_control(pf%levels(l)%ulevel%sweeper, pf%levels(l+1)%ulevel%sweeper) - call restrict_ydesired(pf%levels(l)%ulevel%sweeper, pf%levels(l+1)%ulevel%sweeper) + ! solution at t=2.5 has to be send to all later ranks + allocate(solAt25(nvars(pf%nlevels))) + solAt25 = 0.0_pfdp + + ! only compute up to t=2.5 + nstepsTo25=floor(2.5/dt) +! if (pf%rank .eq. 0) print *, nstepsTo25 + do step=1, nsteps_per_rank ! nstepsTo25 + pf%state%pfblock = step + call pf_pfasst_block_oc(pf, dt, step*pf%comm%nproc, .true., 1, step=(step-1)*pf%comm%nproc + pf%rank) + call fill_ydesired_nagumo(pf%levels(pf%nlevels)%ulevel%sweeper, pf, step) + if ( abs(((step-1)*pf%comm%nproc + pf%rank+1)*dt-2.5) < 1e-6) then + print *, pf%rank, ((step-1)*pf%comm%nproc + pf%rank+1)*dt + call pf%levels(pf%nlevels)%Q(pf%levels(pf%nlevels)%nnodes)%pack(solAt25, 1) + +! print *, "----state at t=2.5----" +! call pf%levels(pf%nlevels)%Q(pf%levels(pf%nlevels)%nnodes)%eprint(1) +! print *, "---------------------------" + end if + +! call pf_pfasst_block_oc(pf, dt, nsteps, .true., flags=1) + if( step < nsteps_per_rank) then + call pf%levels(pf%nlevels)%qend%pack(pf%levels(pf%nlevels)%send, 1) !< Pack away your last solution + call pf_broadcast(pf, pf%levels(pf%nlevels)%send, pf%levels(pf%nlevels)%mpibuflen, pf%comm%nproc-1) + call pf%levels(pf%nlevels)%q0%unpack(pf%levels(pf%nlevels)%send, 1) !< Everyone resets their q0 + end if end do -! call dump_control_work1(pf%levels(pf%nlevels)%ctx, pf, 'uinit') - - abs_res_tol = abs_res_tol_save - rel_res_tol = rel_res_tol_save - - ! set Q, F to zero - do l = 1, pf%nlevels-1 - do m = 1, pf%levels(l)%nnodes - call pf%levels(l)%Q(m)%setval(0.0_pfdp, 0) - call pf%levels(l)%pQ(m)%setval(0.0_pfdp, 0) ! only on coarser levels, also tauQ - if(m < pf%levels(l)%nnodes) then - call pf%levels(l)%tauQ(m)%setval(0.0_pfdp, 0) - call pf%levels(l)%I(m)%setval(0.0_pfdp, 0) + + ! determine which cpu has 2.5 as final time + ! smarter way? + do step=1,nsteps_per_rank + do m=0, pf%comm%nproc-1 + if (abs(((step-1)*pf%comm%nproc + m + 1)*dt -2.5) < 1e-6 ) then + root = m end if - do p = 1,size(pf%levels(l)%F(1,:)) - call pf%levels(l)%F(m,p)%setval(0.0_pfdp, 0) - end do end do end do - +! if ( pf%rank==pf%comm%nproc-1) then +! call pf%levels(pf%nlevels)%Q(pf%levels(pf%nlevels)%nnodes)%pack(solAt25, 1) +! end if +! root = pf%comm%nproc-1 +! print *, pf%rank, root + + call mpi_bcast(solAt25, nvars(pf%nlevels), MPI_REAL8, root, pf%comm%comm, ierror) do m = 1, pf%levels(pf%nlevels)%nnodes - call pf%levels(pf%nlevels)%Q(m)%setval(0.0_pfdp, 0) - do p = 1,size(pf%levels(pf%nlevels)%F(1,:)) - call pf%levels(pf%nlevels)%F(m,p)%setval(0.0_pfdp, 0) - end do + call pf%levels(pf%nlevels)%Q(m)%unpack(solAt25, 1) + end do + ! now every rank has values for the objective in Q(m) -> fill remaining y_desired with this + do step=1, nsteps_per_rank + if( ((step-1)*pf%comm%nproc + pf%rank+1)*dt > 2.5) then + call fill_ydesired_nagumo(pf%levels(pf%nlevels)%ulevel%sweeper, pf, step) + end if + end do + call initialize_control(pf%levels(pf%nlevels)%ulevel%sweeper, solAt25, 0.0_pfdp, dt, pf%levels(pf%nlevels)%nodes) + do l = pf%nlevels-1,1,-1 + call restrict_control(pf%levels(l)%ulevel%sweeper, pf%levels(l+1)%ulevel%sweeper) + call restrict_ydesired(pf%levels(l)%ulevel%sweeper, pf%levels(l+1)%ulevel%sweeper) end do - allocate(gradient(pf%levels(pf%nlevels)%nnodes, nvars(pf%nlevels))) - allocate(prevGrad(pf%levels(pf%nlevels)%nnodes, nvars(pf%nlevels))) - allocate(searchDir(pf%levels(pf%nlevels)%nnodes, nvars(pf%nlevels))) - allocate(prevSearchDir(pf%levels(pf%nlevels)%nnodes, nvars(pf%nlevels))) - allocate(savedAdjoint(pf%levels(pf%nlevels)%nnodes, nvars(pf%nlevels))) + pf%abs_res_tol = abs_res_tol_save + pf%rel_res_tol = rel_res_tol_save + gamma = gamma_save + + if(pf%rank == 0) & + print *, pf%abs_res_tol, pf%rel_res_tol, gamma + + ! if(pf%rank == 0) & + ! open(unit=105, file = logfilename , & + ! status = 'unknown', action = 'write') + ! if(pf%rank == 0) write(105,*) 'iter ', 'L2_grad ', 'objective ', 'stepsize ' + + allocate(gradient(nsteps_per_rank, pf%levels(pf%nlevels)%nnodes, nvars(pf%nlevels))) + allocate(prevGrad(nsteps_per_rank, pf%levels(pf%nlevels)%nnodes, nvars(pf%nlevels))) + allocate(searchDir(nsteps_per_rank, pf%levels(pf%nlevels)%nnodes, nvars(pf%nlevels))) + allocate(prevSearchDir(nsteps_per_rank, pf%levels(pf%nlevels)%nnodes, nvars(pf%nlevels))) + allocate(savedStates(nsteps_per_rank, pf%levels(pf%nlevels)%nnodes, nvars(pf%nlevels))) + allocate(savedAdjoint(nsteps_per_rank, pf%levels(pf%nlevels)%nnodes, nvars(pf%nlevels))) gradient = 0 prevGrad = 0 prevGlobDirXGrad = 0.0_pfdp @@ -257,6 +372,8 @@ program main itersState = 0 itersAdjoint = 0 + retry = .true. + call date_and_time(date=date, time=time, values=time_start) if (pf%rank .eq. 0) & print *, 'start optimization on ', date, ', ', time @@ -267,28 +384,57 @@ program main ! call mpi_barrier(pf%comm%comm, ierror) if ( k .eq. 1 ) then - call evaluate_objective(pf, q1, dt, nsteps, .true., alpha, objective, L2NormUSq, savedAdjoint) + call evaluate_objective(pf, q1, dt, nsteps_per_rank, .true., alpha, objective, L2NormUSq, savedStates) itersState = itersState + pf%state%itcnt + +! if(pf%rank == pf%comm%nproc-1) then +! print *, "----state at final time----" +! call pf%levels(pf%nlevels)%qend%eprint(1) +! print *, "---------------------------" +! call dump_stuff(pf, savedStates, "y") +! end if ! exit else objective = objectiveNew !can we return globObjNew from linesearch, avoids communication; is objective needed somewhere? end if call mpi_allreduce(objective, globObj, 1, MPI_REAL8, MPI_SUM, pf%comm%comm, ierror) if(pf%rank == 0) print *, k, 'objective (L2) = ', globObj -! exit + !exit ! call mpi_barrier(pf%comm%comm, ierror) if ( k .eq. 1 ) then ! in later iterations, this is done in linesearch - call evaluate_gradient(pf, q1, dt, nsteps, .true., gradient, LinftyNormGrad, L2NormGradSq, savedAdjoint) + call evaluate_gradient(pf, q1, dt, nsteps_per_rank, .true., gradient, LinftyNormGrad, L2NormGradSq, & + savedStates, savedAdjoint) + itersAdjoint = itersAdjoint + pf%state%itcnt + +! if(pf%rank == 0) then +! print *, "----adjoint at t=0----" +! call pf%levels(pf%nlevels)%q0%eprint(2) +! print *, "---------------------------" +! call dump_stuff(pf, savedAdjoint, "p") +! end if + end if !print *, k, pf%rank, pf%nlevels, 'objective (L2) = ', objective - !print *, k, pf%rank, pf%nlevels, 'gradient (Linfty, L2) = ', LinftyNormGrad, sqrt(L2NormGradSq) -! exit +! print *, k, pf%rank, pf%nlevels, 'gradient (Linfty, L2) = ', LinftyNormGrad, sqrt(L2NormGradSq) +! exit ! done optimizing? call mpi_allreduce(L2NormGradSq, globL2NormGradSq, 1, MPI_REAL8, MPI_SUM, pf%comm%comm, ierror) call mpi_allreduce(LinftyNormGrad, globLinftyNormGrad, 1, MPI_REAL8, MPI_MAX, pf%comm%comm, ierror) if(pf%rank == 0) print *, k, 'gradient (L2, Linf) = ', sqrt(globL2NormGradSq), globLinftyNormGrad +! if(pf%rank == 0) write(105,*) k, sqrt(globL2NormGradSq), globObj, prevStepSize + if(k == 1) then + initialGradNorm = sqrt(globL2NormGradSq) + else + if(adapt_res_tol .and. sqrt(globL2NormGradSq) < res_tol_graddec*initialGradNorm) then + initialGradNorm = sqrt(globL2NormGradSq) + pf%abs_res_tol = pf%abs_res_tol * res_tol_factor + pf%rel_res_tol = pf%rel_res_tol * res_tol_factor + if(pf%rank == 0) print *, 'adapted residual tolerances: ', pf%abs_res_tol, pf%rel_res_tol + predict = .true. + end if + end if if (sqrt(globL2NormGradSq) < tol_grad) then if(pf%rank == 0) print *, 'optimality condition satisfied (gradient norm small enough), stopping' !call write_control_work1(pf%levels(pf%nlevels)%ctx, k, "u_sdc_split_final") @@ -367,21 +513,38 @@ program main ! end do ! end do ! end if - + +! if ((.not. predict) .and. mod(k,restart_interval) == 0 ) then ! use cold start every n iterations +! predict = .true. +! else if(warmstart == 1) then +! predict = .false. +! end if + stepTooSmall = .false. ! call armijo_step(pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedAdjoint, alpha, & ! globObj, globDirXGrad, objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepSize, stepTooSmall) ! call wolfe_powell_step(pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedAdjoint, alpha, & ! globObj, globDirXGrad, objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepSize, stepTooSmall) - call strong_wolfe_step(pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedAdjoint, alpha, & + call strong_wolfe_step(pf, q1, dt, nsteps_per_rank, itersState, itersAdjoint, predict, searchDir, gradient, & + savedStates, savedAdjoint, alpha, & globObj, globDirXGrad, objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepSize, stepTooSmall) - if (stepTooSmall) exit + if (stepTooSmall) then + if (adapt_res_tol .and. retry) then + initialGradNorm = sqrt(globL2NormGradSq) + pf%abs_res_tol = pf%abs_res_tol * res_tol_factor + pf%rel_res_tol = pf%rel_res_tol * res_tol_factor + if(pf%rank == 0) print *, 'adapted residual tolerances: ', pf%abs_res_tol, pf%rel_res_tol + predict = .true. + else + exit + end if + end if ! update for next iteration !prevStepSize = 10*stepSize prevStepSize = stepSize - + end do call mpi_barrier(pf%comm%comm, ierror) @@ -395,7 +558,8 @@ program main print *, 'duration [s]: ', time_end_sec-time_start_sec end if - call dump_exact_control(pf%levels(pf%nlevels)%ulevel%sweeper, pf, solAt25, pf%rank*dt, dt, pf%levels(pf%nlevels)%nodes, & + call dump_control(pf%levels(pf%nlevels)%ulevel%sweeper, pf, 'u') + call dump_exact_control(pf%levels(pf%nlevels)%ulevel%sweeper, pf, solAt25, 0.0_pfdp, dt, pf%levels(pf%nlevels)%nodes, & L2errorCtrl, LinfErrorCtrl, L2exactCtrl, LinfExactCtrl) call mpi_allreduce(L2errorCtrl, globL2errorCtrl, 1, MPI_REAL8, MPI_SUM, pf%comm%comm, ierror) call mpi_allreduce(LinfErrorCtrl, globLinfErrorCtrl, 1, MPI_REAL8, MPI_MAX, pf%comm%comm, ierror) @@ -435,10 +599,16 @@ program main ! print *, pf%rank, "destroy q1" call ndarray_oc_destroy(q1) +! print *, pf%rank, "destroy levels" + do l = 1, pf%nlevels + call destroy(pf%levels(l)%ulevel%sweeper) + end do + ! print *, pf%rank, "destroy pf" call pf_pfasst_destroy(pf) ! print *, pf%rank, "mpi_finalize" call mpi_finalize(ierror) +! call fftw_cleanup() end program main diff --git a/test/nagumo/src/numpy.c b/test/nagumo/src/numpy.c index e56f349e..29447571 100644 --- a/test/nagumo/src/numpy.c +++ b/test/nagumo/src/numpy.c @@ -20,7 +20,7 @@ void ndarray_dump_numpy(char *dname, char *fname, char endian[5], int dim, int * FILE* fp; char errmsg[BUFLEN]; char header[256*256]; - char buf[128], shp[BUFLEN]; + char buf[BUFLEN], shp[BUFLEN]; /* open output file */ snprintf(buf, BUFLEN, "%s/%s", dname, fname); diff --git a/test/nagumo/src/pf_optimization.f90 b/test/nagumo/src/pf_optimization.f90 deleted file mode 100644 index 4e5f6fe3..00000000 --- a/test/nagumo/src/pf_optimization.f90 +++ /dev/null @@ -1,525 +0,0 @@ -module pf_mod_optimization - use pfasst - use pf_mod_dtype - use pf_mod_utils - use pf_mod_mpi - use pf_mod_ndarray_oc - use pf_mod_parallel_oc - use pf_my_level - use feval - use probin, only: solve_y - use solutions - implicit none - - real(pfdp), parameter, private :: armijoDecrease = 1e-4 - real(pfdp), parameter, private :: minStepSize = 1e-6 - real(pfdp), parameter, private :: maxStepSize = 1e6 - real(pfdp), parameter, private :: stepIncFactor = 10 - real(pfdp), parameter, private :: c2 = 0.9 - integer, parameter, private :: maxIterZoom = 20 - - -contains - - subroutine evaluate_objective(pf, q1, dt, nsteps, predict, alpha, objective, L2NormUSq, savedAdjoint, predictAdj) - type(pf_pfasst_t), intent(inout) :: pf - type(pf_ndarray_oc_t), intent(inout) :: q1 - real(pfdp), intent(in ) :: dt, alpha - integer, intent(in ) :: nsteps - logical, intent(in ) :: predict - real(pfdp), intent( out) :: objective, L2NormUSq - real(pfdp), intent(inout) :: savedAdjoint(:,:) - logical, optional, intent(in ) :: predictAdj - integer :: m - real(pfdp), pointer :: obj(:) - real(pfdp) :: t(pf%levels(pf%nlevels)%nnodes) - logical :: predictAdjoint - - predictAdjoint = .true. - if (present(predictAdj) ) predictAdjoint = predictAdj - - pf%state%itcnt = 0 ! reset iteration count here, not inside pf_pfasst_block_oc - ! how to count skipped sweeps for state? - - solve_y = .true. - - if (pf%rank .eq. 0) then - call initial(q1, 0.0_pfdp, dt) !dt should be given by rank !initial_rd - else - q1%yflatarray = 0.0_pfdp - end if - if ((pf%rank .eq. 0) .or. predict) then - call pf%levels(pf%nlevels)%q0%copy(q1, 1) !do this only for rank 0 or predict = true? - end if - -! if (do_mixed .eq. 1 ) then -! q1%pflatarray = 0.0_pfdp -! call pf%levels(pf%nlevels)%qend%copy(q1, 2) -! if( predict .eqv. .false. .and. predictAdjoint .eqv. .true. ) then -! ! call predictor just for adjoint part -! call pf_predictor(pf, pf%rank*dt, dt, 2) -! ! do m = 1, pf%levels(pf%nlevels)%nnodes -! ! call pf%levels(pf%nlevels)%encap%unpack(pf%levels(pf%nlevels)%Q(m), savedAdjoint(m,:), 2) -! ! end do -! ! -! ! t = pf%rank*dt + dt*pf%levels(pf%nlevels)%nodes -! ! call pf%levels(pf%nlevels)%sweeper%evaluate_all(pf%levels(pf%nlevels), t, 2) -! end if -! end if - - if (pf%rank .eq. 0) print *, ' ********* solve state **************' - -! if (do_mixed .eq. 1) then -! call pf_pfasst_block(pf, dt, nsteps, predict, 0) -! ! need to save values in adjoint component of Q(m)! -! ! if(pf%rank .eq. 0) print *, 'saving adjoint' -! do m = 1, pf%levels(pf%nlevels)%nnodes -! call pf%levels(pf%nlevels)%Q(m)%pack(savedAdjoint(m,:), 2) -! end do -! ! print *, pf%rank, maxval(savedAdjoint(1,:)) -! else - call pf_pfasst_block_oc(pf, dt, nsteps, predict, 1) -! end if - -! if (pf%rank .eq. 0) print *, ' ********* compute objective **************' - - allocate(obj(pf%levels(pf%nlevels)%nnodes)) - do m = 1, pf%levels(pf%nlevels)%nnodes -! if (pf%rank .eq. 0) print *, m - call objective_function(pf%levels(pf%nlevels)%ulevel%sweeper, pf%levels(pf%nlevels)%Q(m), & - product(pf%levels(pf%nlevels)%lev_shape), m, obj(m)) - end do - objective = 0.0 - do m = 1, pf%levels(pf%nlevels)%nnodes-1 - objective = objective + & - (obj(m)+obj(m+1))*(pf%levels(pf%nlevels)%nodes(m+1)-pf%levels(pf%nlevels)%nodes(m))*dt - end do - objective = 0.5*objective !0.5 for trapezoidal rule - call control_L2Q(pf%levels(pf%nlevels)%ulevel%sweeper, dt, pf%levels(pf%nlevels)%nodes, & - product(pf%levels(pf%nlevels)%lev_shape), L2NormUSq) - objective = 0.5*objective + 0.5*alpha*L2NormUSq - deallocate(obj) - end subroutine evaluate_objective - - - - ! assumes state solution in Q(:), component 1, of pf object - ! how to do that dimension independent? this is just for 1d because of the gradient array - subroutine evaluate_gradient(pf, q1, dt, nsteps, predict, gradient, LinftyNormGrad, L2NormGradSq, savedAdjoint) - type(pf_pfasst_t), intent(inout) :: pf - type(pf_ndarray_oc_t), target, intent(inout) :: q1 - real(pfdp), intent(in ) :: dt - integer, intent(in ) :: nsteps - logical, intent(in ) :: predict - real(pfdp), intent( out) :: gradient(:,:), LinftyNormGrad, L2NormGradSq - real(pfdp), intent(inout) :: savedAdjoint(:,:) - integer :: m - integer :: dest, source, ierror, stat(MPI_STATUS_SIZE) - real(pfdp), allocatable :: terminal(:), tosend(:) - real(pfdp) :: t(pf%levels(pf%nlevels)%nnodes) - - solve_y = .false. - - pf%state%itcnt = 0 ! reset iteration count here, not inside pf_pfasst_block_oc - ! how to count skipped sweeps for state? - - allocate(terminal(size(savedAdjoint,2))) -! allocate(tosend(size(savedAdjoint,2))) - - !call initial(q1, pf%rank*dt, (pf%rank+1)*dt) - q1%pflatarray = 0.0_pfdp ! only if no final time term in objective, otherwise this is nonzero, and terminal needs to be added to this - - terminal = 0.0_pfdp - -! if (do_mixed .eq. 1 ) then -! -! if(pf%rank /= pf%state%last) then -! source = modulo(pf%rank+1, pf%comm%nproc) -! call mpi_recv(terminal, size(savedAdjoint,2), MPI_REAL8, & -! source, 1, pf%comm%comm, stat, ierror) -! ! print *, pf%rank, maxval(terminal) -! end if -! -! savedAdjoint(1,:) = savedAdjoint(1,:) + terminal -! -! if (pf%rank /= pf%state%first) then -! dest = modulo(pf%rank-1, pf%comm%nproc) -! call mpi_send(savedAdjoint(1,:), size(savedAdjoint,2), MPI_REAL8, & !mpi_send(savedAdjoint(1,:),... -! dest, 1, pf%comm%comm, stat, ierror) -! end if -! -! q1%pflatarray = terminal -! -! ! shift solution values computed simultaneously with state by 'terminal' -! ! would be sufficient to do this only if rank /= last -! call pf%levels(pf%nlevels)%Q(1)%unpack(savedAdjoint(1,:), 2) -! do m = 2, pf%levels(pf%nlevels)%nnodes -! savedAdjoint(m,:) = savedAdjoint(m,:) + terminal -! call pf%levels(pf%nlevels)%Q(m)%unpack(savedAdjoint(m,:), 2) -! savedAdjoint(m,:) = savedAdjoint(m,:) - terminal -! end do -! savedAdjoint(1,:) = savedAdjoint(m,:) - terminal ! reset, to use as initial guess in warm start -! -! t = pf%rank*dt + dt*pf%levels(pf%nlevels)%nodes -! call pf%levels(pf%nlevels)%ulevel%sweeper%evaluate_all(pf%levels(pf%nlevels), t, 2) -! -! ! restrict Q(m) from finest to coarser levels; evaluates on coarse levels -! call restrict_for_adjoint(pf, 2); -! end if - - !if (((pf%rank+1)*dt >= Tfin) .or. predict) then - ! call pf%levels(pf%nlevels)%encap%copy(pf%levels(pf%nlevels)%qend, c_loc(q1), 2) - !end if - call pf%levels(pf%nlevels)%qend%copy(q1, 2) - ! do this only for final step or predict = true? - !call restrict_for_adjoint(pf, 1) - - - if (pf%rank .eq. 0) print *, '********* solve adjoint *************' -! call mpi_barrier(pf%comm%comm, ierror) -! if (do_mixed .eq. 1) then -! call pf_pfasst_block(pf, dt, nsteps, .false., 2) ! do not predict if do_mixed - ! try predict here instead of copying: this should be similar to solving state and adjoint completely separate -! else - if(predict) pf%q0_style = 1 - call pf_pfasst_block_oc(pf, dt, nsteps, predict, 2) !predict - pf%q0_style = 0 -! end if - - if (pf%rank .eq. 0) print *, '********* compute gradient *************' - - do m = 1, pf%levels(pf%nlevels)%nnodes - call pf%levels(pf%nlevels)%Q(m)%pack(gradient(m,:), 2) - ! if (do_mixed .eq. 1 ) gradient(m,:) = gradient(m,:) + savedAdjoint(m,:) ! in this variant, we solve the full adjoint, so no adding is needed - end do - - call construct_gradient(pf%levels(pf%nlevels)%ulevel%sweeper, gradient, pf%levels(pf%nlevels)%nodes, & - LinftyNormGrad, L2NormGradSq) - -! deallocate(tosend) - deallocate(terminal) - end subroutine evaluate_gradient - - - - subroutine armijo_step(pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedAdjoint, alpha, & - globObj, globDirXGrad, objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepSize, stepTooSmall) - type(pf_pfasst_t), intent(inout) :: pf - type(pf_ndarray_oc_t), target, intent(inout) :: q1 - real(pfdp), intent(in ) :: dt, alpha, globObj, globDirXGrad - integer, intent(in ) :: nsteps - integer, intent(inout) :: itersState, itersAdjoint - logical, intent(in ) :: predict - real(pfdp), intent(in ) :: searchDir(:,:) - real(pfdp), intent( out) :: objectiveNew, L2NormUSq - real(pfdp), intent(inout) :: stepSize, LinftyNormGrad, L2NormGradSq, gradient(:,:), savedAdjoint(:,:) - logical, intent(inout) :: stepTooSmall - - real(pfdp) :: globObjNew, directionTimesGradient, globDirXGradNew - integer :: l, ierror - !real(pfdp) :: armijoDecrease ! this should be in a structure global to the module, and set in - ! ! something like init_optimization, along with other parameters - !real(pfdp) :: minStepSize - !armijoDecrease = 1e-4 - !minStepSize = 1e-6 - - do - call update_control(pf%levels(pf%nlevels)%ulevel%sweeper, searchDir, stepSize) - !restrict control - do l = pf%nlevels-1,1,-1 - call restrict_control(pf%levels(l)%ulevel%sweeper, pf%levels(l+1)%ulevel%sweeper) - end do - - call evaluate_objective(pf, q1, dt, nsteps, predict, alpha, objectiveNew, L2NormUSq, savedAdjoint) - itersState = itersState + pf%state%itcnt - - call mpi_allreduce(objectiveNew, globObjNew, 1, MPI_REAL8, MPI_SUM, pf%comm%comm, ierror) - if(pf%rank == 0) print *, stepSize, 'objectiveNew (L2) = ', globObjNew - - if (globObjNew < globObj + armijoDecrease*stepSize*globDirXGrad) then - ! evaluate gradient to be consistent with wolfe_powell_step - call evaluate_gradient(pf, q1, dt, nsteps, predict, gradient, LinftyNormGrad, L2NormGradSq, savedAdjoint) - itersAdjoint = itersAdjoint + pf%state%itcnt - return - end if - - call update_control(pf%levels(pf%nlevels)%ulevel%sweeper, searchDir, -stepSize) - ! no need to restrict here, coarse levels get overwritten above - stepSize = 0.5 * stepSize - if (stepSize < minStepSize) then - stepTooSmall = .true. - if (pf%rank .eq. 0) print *, 'no step found, stopping' - !call write_control(pf%levels(pf%nlevels)%ctx, k, "u_sdc_split_final") - return - end if - end do - end subroutine armijo_step - - - -! subroutine zoom(stepLowIn, stepHighIn, stepSizeIn, stepSizeOut, & -! pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedAdjoint, alpha, & -! globObj, globDirXGrad, objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepTooSmall, globObjLowIn) -! type(pf_pfasst_t), intent(inout) :: pf -! type(pf_ndarray_oc_t), target, intent(inout) :: q1 -! real(pfdp), intent(in ) :: dt, alpha, globObj, globDirXGrad -! integer, intent(in ) :: nsteps -! integer, intent(inout) :: itersState, itersAdjoint -! logical, intent(in ) :: predict -! real(pfdp), intent(in ) :: searchDir(:,:) -! real(pfdp), intent( out) :: objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepSizeOut -! real(pfdp), intent(in ) :: stepSizeIn, stepLowIn, stepHighIn, globObjLowIn -! real(pfdp), intent(inout) :: gradient(:,:), savedAdjoint(:,:) -! logical, intent(inout) :: stepTooSmall -! -! real(pfdp) :: directionTimesGradient, globObjNew, globDirXGradNew -! real(pfdp) :: stepSize, stepLow, stepHigh, globObjLow -! integer :: l, ierror, iter -! -! stepSize = stepSizeIn -! stepLow = stepLowIn -! stepHigh = stepHighIn -! globObjLow = globObjLowIn -! -! iter = 0 -! if (pf%rank .eq. 0) print *, 'in zoom' -! -! do -! call update_control(pf%levels(pf%nlevels)%ctx, searchDir, -stepSize) -! stepSize = 0.5_pfdp*(stepLow+stepHigh) -! iter = iter + 1 -! if (stepSize < minStepSize .or. iter > maxIterZoom) then -! stepTooSmall = .true. -! if (pf%rank .eq. 0) print *, 'no step found (< min or > maxIter), stopping' -! !call write_control(pf%levels(pf%nlevels)%ctx, k, "u_sdc_split_final") -! stepSizeOut = stepSize -! return -! end if -! -! call update_control(pf%levels(pf%nlevels)%ctx, searchDir, stepSize) -! !restrict control -! do l = pf%nlevels-1,1,-1 -! call restrict_control(pf%levels(l)%ctx, pf%levels(l+1)%ctx) -! end do -! -! call evaluate_objective(pf, q1, dt, nsteps, predict, alpha, objectiveNew, L2NormUSq, savedAdjoint) -! itersState = itersState + pf%state%itcnt -! call mpi_allreduce(objectiveNew, globObjNew, 1, MPI_REAL8, MPI_SUM, pf%comm%comm, ierror) -! if(pf%rank == 0) print *, stepSize, 'objectiveNew (L2) = ', globObjNew -! -! if ( (globObjNew > globObj + armijoDecrease*stepSize*globDirXGrad) .or. & -! (globObjNew >= globObjLow) ) then -! stepHigh = stepSize -! if (pf%rank .eq. 0) print *, 'set new stepHigh to stepSize; stepHigh = ', stepHigh, 'stepLow = ', stepLow -! else -! call evaluate_gradient(pf, q1, dt, nsteps, predict, gradient, LinftyNormGrad, L2NormGradSq, savedAdjoint) -! itersAdjoint = itersAdjoint + pf%state%itcnt -! directionTimesGradient = compute_scalar_prod(searchDir, gradient, pf%levels(pf%nlevels)%nodes, dt) -! call mpi_allreduce(directionTimesGradient, globDirXGradNew, 1, MPI_REAL8, MPI_SUM, & -! pf%comm%comm, ierror) -! if (pf%rank .eq. 0) print *, 'globDirXGradNew = ', globDirXGradNew -! -! -! if (abs(globDirXGradNew) <= -c2*globDirXGrad) then -! stepSizeOut = stepSize -! return -! end if -! -! if (globDirXGradNew*(stepHigh-stepLow) >= 0) then -! if(pf%rank == 0) print *, stepSize, stepHigh, stepLow -! stepHigh = stepLow -! if (pf%rank .eq. 0) print *, 'set new stepHigh to stepLow; stepHigh = ', stepHigh, 'stepLow = ', stepLow, 'stepSize = ', stepSize -! end if -! stepLow = stepSize -! if (pf%rank .eq. 0) print *, 'set new stepLow; stepHigh = ', stepHigh, 'stepLow = ', stepLow, 'stepSize = ', stepSize -! globObjLow = globObjNew -! end if -! end do -! end subroutine zoom -! -! -! subroutine wolfe_powell_step(pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedAdjoint, alpha, & -! globObj, globDirXGrad, objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepSize, stepTooSmall) -! type(pf_pfasst_t), intent(inout) :: pf -! type(pf_ndarray_oc_t), target, intent(inout) :: q1 -! real(pfdp), intent(in ) :: dt, alpha, globObj, globDirXGrad -! integer, intent(in ) :: nsteps -! integer, intent(inout) :: itersState, itersAdjoint -! logical, intent(in ) :: predict -! real(pfdp), intent(in ) :: searchDir(:,:) -! real(pfdp), intent( out) :: objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq -! real(pfdp), intent(inout) :: stepSize, gradient(:,:), savedAdjoint(:,:) -! logical, intent(inout) :: stepTooSmall -! -! integer :: l, ierror, iter -! real(pfdp) :: prevStepSize, directionTimesGradient, globDirXGradNew, globObjNew, globObjPrev -! !, maxStepSize, minStepSize, stepIncFactor, armijoDecrease, c2, -! prevStepSize = 0.0_pfdp -! !maxStepSize = 1e6 -! !minStepSize = 1e-6 -! !stepIncFactor = 10 -! !armijoDecrease = 1e-4 -! !c2 = 0.9 -! globObjPrev = globObj -! iter = 1 -! -! do -! call update_control(pf%levels(pf%nlevels)%ctx, searchDir, stepSize) -! !restrict control -! do l = pf%nlevels-1,1,-1 -! call restrict_control(pf%levels(l)%ctx, pf%levels(l+1)%ctx) -! end do -! ! call dump_control(pf%levels(pf%nlevels)%ctx, pf, 'u1') -! -! call evaluate_objective(pf, q1, dt, nsteps, predict, alpha, objectiveNew, L2NormUSq, savedAdjoint) -! itersState = itersState + pf%state%itcnt -! call mpi_allreduce(objectiveNew, globObjNew, 1, MPI_REAL8, MPI_SUM, pf%comm%comm, ierror) -! if(pf%rank == 0) print *, stepSize, 'objectiveNew (L2) = ', globObjNew -! -! if ( (globObjNew > globObj + armijoDecrease*stepSize*globDirXGrad) .or. & -! ( globObjNew >= globObjPrev .and. iter > 1) ) then -! call zoom(prevStepSize, stepSize, stepSize, stepSize, & -! pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedAdjoint, alpha, globObj, globDirXGrad, & -! objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepTooSmall, globObjPrev) -! return -! end if -! -! if (pf%rank .eq. 0) print *, 'evaluate new gradient' -! call evaluate_gradient(pf, q1, dt, nsteps, predict, gradient, LinftyNormGrad, L2NormGradSq, savedAdjoint) -! itersAdjoint = itersAdjoint + pf%state%itcnt -! directionTimesGradient = compute_scalar_prod(searchDir, gradient, pf%levels(pf%nlevels)%nodes, dt) -! call mpi_allreduce(directionTimesGradient, globDirXGradNew, 1, MPI_REAL8, MPI_SUM, & -! pf%comm%comm, ierror) -! if (pf%rank .eq. 0) print *, 'globDirXGradNew = ', globDirXGradNew -! if (abs(globDirXGradNew) <= -c2*globDirXGrad) then -! return -! end if -! -! if (globDirXGradNew >= 0) then -! call zoom(stepSize, prevStepSize, stepSize, stepSize, & -! pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedAdjoint, alpha, globObj, globDirXGrad, & -! objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepTooSmall, globObjNew) -! return -! end if -! -! globObjPrev = globObjNew -! prevStepSize = stepSize -! stepSize = stepIncFactor*stepSize -! iter = iter + 1 -! if (stepSize > maxStepSize) then -! stepTooSmall = .true. -! if (pf%rank .eq. 0) print *, 'no step found (> max), stopping' -! !call write_control(pf%levels(pf%nlevels)%ctx, k, "u_sdc_split_final") -! return -! end if -! call update_control(pf%levels(pf%nlevels)%ctx, searchDir, -prevStepSize) -! end do -! -! end subroutine wolfe_powell_step - - - subroutine strong_wolfe_step(pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedAdjoint, alpha, & - globObj, globDirXGrad, objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepSize, stepTooSmall) - type(pf_pfasst_t), intent(inout) :: pf - type(pf_ndarray_oc_t), target, intent(inout) :: q1 - real(pfdp), intent(in ) :: dt, alpha, globObj, globDirXGrad - integer, intent(in ) :: nsteps - integer, intent(inout) :: itersState, itersAdjoint - logical, intent(in ) :: predict - real(pfdp), intent(in ) :: searchDir(:,:) - real(pfdp), intent( out) :: objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq - real(pfdp), intent(inout) :: stepSize, gradient(:,:), savedAdjoint(:,:) - logical, intent(inout) :: stepTooSmall - integer :: l, ierror - real(pfdp) :: low, high - real(pfdp) :: directionTimesGradient, globObjNew, globDirXGradNew - logical :: first - - first = .true. - - low = 0.0_pfdp - high = stepSize - !high = 1.0_pfdp - !if (stepSize > high) high = stepSize - - do - call update_control(pf%levels(pf%nlevels)%ulevel%sweeper, searchDir, high) !stepSize) - !restrict control - do l = pf%nlevels-1,1,-1 - call restrict_control(pf%levels(l)%ulevel%sweeper, pf%levels(l+1)%ulevel%sweeper) - end do -! call dump_control(pf%levels(pf%nlevels)%ctx, pf, 'u1') - - call evaluate_objective(pf, q1, dt, nsteps, predict, alpha, objectiveNew, L2NormUSq, savedAdjoint, first) - first = .false. - - itersState = itersState + pf%state%itcnt - call mpi_allreduce(objectiveNew, globObjNew, 1, MPI_REAL8, MPI_SUM, pf%comm%comm, ierror) - if(pf%rank == 0) print *, high, 'objectiveNew (L2) = ', globObjNew ! *, stepSize, - - call update_control(pf%levels(pf%nlevels)%ulevel%sweeper, searchDir, -high) !-stepSize) - - if (globObjNew < globObj + armijoDecrease*stepSize*globDirXGrad) then - high = 2.0*high - else ! Armijo condition not satisfied, exit loop and reduce stepsize - exit - end if - end do - - do - stepSize = 0.5*(low+high) - - if (stepSize < minStepSize ) then - stepTooSmall = .true. - if (pf%rank .eq. 0) print *, 'no step found (< min), stopping' - return - end if - - call update_control(pf%levels(pf%nlevels)%ulevel%sweeper, searchDir, stepSize) - do l = pf%nlevels-1,1,-1 - call restrict_control(pf%levels(l)%ulevel%sweeper, pf%levels(l+1)%ulevel%sweeper) - end do - - call evaluate_objective(pf, q1, dt, nsteps, predict, alpha, objectiveNew, L2NormUSq, savedAdjoint, first) - itersState = itersState + pf%state%itcnt - call mpi_allreduce(objectiveNew, globObjNew, 1, MPI_REAL8, MPI_SUM, pf%comm%comm, ierror) - if(pf%rank == 0) print *, stepSize, 'objectiveNew (L2) = ', globObjNew - - if (globObjNew >= globObj + armijoDecrease*stepSize*globDirXGrad) then - call update_control(pf%levels(pf%nlevels)%ulevel%sweeper, searchDir, -stepSize) - high = stepSize - cycle - end if - - ! now Armijo is satisfied again - call evaluate_gradient(pf, q1, dt, nsteps, predict, gradient, LinftyNormGrad, L2NormGradSq, savedAdjoint) - itersAdjoint = itersAdjoint + pf%state%itcnt - directionTimesGradient = compute_scalar_prod(searchDir, gradient, pf%levels(pf%nlevels)%nodes, dt) - call mpi_allreduce(directionTimesGradient, globDirXGradNew, 1, MPI_REAL8, MPI_SUM, & - pf%comm%comm, ierror) - - if (pf%rank .eq. 0) print *, 'globDirXGradNew = ', globDirXGradNew, '-c2*globDirXGrad', -c2*globDirXGrad - - -! if (abs(globDirXGradNew) <= -c2*globDirXGrad) then ! strong Wolfe conditions satisfied -! return -! end if - - if (globDirXGradNew < c2*globDirXGrad) then -! if(pf%rank == 0) print *, stepSize, high, low - low = stepSize - if (pf%rank .eq. 0) print *, 'set new stepLow; stepHigh = ', high, 'stepLow = ', low, 'stepSize = ', stepSize - elseif (globDirXGradNew > -c2*globDirXGrad) then - high = stepSize - if (pf%rank .eq. 0) print *, 'set new stepHigh; stepHigh = ', high, 'stepLow = ', low, 'stepSize = ', stepSize - else - exit - end if - - call update_control(pf%levels(pf%nlevels)%ulevel%sweeper, searchDir, -stepSize) - end do - - end subroutine strong_wolfe_step - -end module pf_mod_optimization - diff --git a/test/nagumo/src/pf_optimization_flex.f90 b/test/nagumo/src/pf_optimization_flex.f90 new file mode 100644 index 00000000..ef17b633 --- /dev/null +++ b/test/nagumo/src/pf_optimization_flex.f90 @@ -0,0 +1,643 @@ +module pf_mod_optimization + use pfasst + use pf_mod_dtype + use pf_mod_utils + use pf_mod_mpi + use pf_mod_ndarray_oc + use pf_mod_parallel_oc + use feval + use solutions + implicit none + + real(pfdp), parameter, private :: armijoDecrease = 1e-4 + real(pfdp), parameter, private :: minStepSize = 1e-6 + real(pfdp), parameter, private :: maxStepSize = 1e6 + real(pfdp), parameter, private :: stepIncFactor = 10 + real(pfdp), parameter, private :: c2 = 0.1 !0.9 + integer, parameter, private :: maxIterZoom = 20 + +contains + + subroutine restrict_compress_interpolate(pf, t0, dt, coarse_level, flags) + type(pf_pfasst_t), target, intent(inout) :: pf + real(pfdp), intent(in ) :: t0, dt + integer, intent(in ) :: coarse_level, flags + class(pf_level_t), pointer :: c_lev_p + class(pf_level_t), pointer :: f_lev_p + real(pfdp), allocatable :: f_times(:), c_times(:), compressed_values(:) + class(pf_encap_t), allocatable :: cf_delta(:) ! Coarse in time but fine in space + + integer :: level_index, m, j, nvars, nnodes + + ! restrict to coarse level + do level_index = pf%nlevels,coarse_level+1,-1 + f_lev_p => pf%levels(level_index); + c_lev_p => pf%levels(level_index-1) + allocate(f_times(f_lev_p%nnodes)) + f_times = t0 + dt*f_lev_p%nodes + call restrict_ts(f_lev_p, c_lev_p, f_lev_p%Q, c_lev_p%Q, f_times, flags) + deallocate(f_times) + end do + +! if(compress) then +! nvars = product(pf%levels(coarse_level)%shape) +! nnodes = pf%levels(coarse_level)%nnodes +! +! allocate(compressed_values(nvars)) +! +! do m=1, nnodes +! call pf%levels(coarse_level)%Q(m)%pack(compressed_values,flags) +! compressed_values(:) = floor(compressed_values(:)*N_digits)/dble(N_digits) +! call pf%levels(coarse_level)%Q(m)%unpack(compressed_values,flags) +! end do +! +! deallocate(compressed_values) +! end if + + ! and interpolate again + do level_index = coarse_level+1,pf%nlevels + f_lev_p => pf%levels(level_index); + c_lev_p => pf%levels(level_index-1) + call f_lev_p%ulevel%factory%create_array(cf_delta, c_lev_p%nnodes, f_lev_p%index, f_lev_p%lev_shape) + allocate(c_times(c_lev_p%nnodes)) + c_times = t0 + dt*c_lev_p%nodes + + do m = 1, c_lev_p%nnodes + call cf_delta(m)%setval(0.0_pfdp,flags) + call f_lev_p%ulevel%interpolate(f_lev_p, c_lev_p, cf_delta(m), c_lev_p%Q(m), c_times(m), flags) + end do + call pf_apply_mat(f_lev_p%Q, 1.0_pfdp, f_lev_p%tmat, cf_delta, .true., flags) + deallocate(c_times) + call f_lev_p%ulevel%factory%destroy_array(cf_delta) + end do + + end subroutine restrict_compress_interpolate + + subroutine evaluate_objective(pf, q1, dt, nsteps, predict, alpha, objective, L2NormUSq, savedStates) + type(pf_pfasst_t), intent(inout) :: pf + type(pf_ndarray_oc_t), target, intent(inout) :: q1 + real(pfdp), intent(in ) :: dt, alpha + integer, intent(in ) :: nsteps + logical, intent(in ) :: predict + real(pfdp), intent( out) :: objective, L2NormUSq + real(pfdp), intent(inout) :: savedStates(:,:,:) !nsteps, nnodes, nvars + integer :: m, step, nnodes, thisstep, ierror, kk, level_index + real(pfdp) :: stepobjective, roundedValue + real(pfdp), pointer :: obj(:) !, savedStatesCompressed(:) + +! ! for restrict/interpolate (storage) +! class(pf_level_t), pointer :: c_lev_p +! class(pf_level_t), pointer :: f_lev_p +! real(pfdp), allocatable :: f_times(:) !! Simulation time at fine nodes +! !--- + + nnodes = pf%levels(pf%nlevels)%nnodes + allocate(obj(nnodes)) + objective = 0 + +! allocate(savedStatesCompressed(size(savedStates,3))) + + pf%state%itcnt = 0 ! reset iteration count here, not inside pf_pfasst_block_oc + + q1%yflatarray = 0.0_pfdp + q1%pflatarray = 0.0_pfdp + if (pf%rank .eq. 0) call initial(q1, 0.0_pfdp, dt) ! sequential run, this is always set at the beginning + + if ((pf%rank .eq. 0) .or. predict) then + call pf%levels(pf%nlevels)%q0%copy(q1, 1) !do this only for rank 0 or predict = true? + end if +! call pf%levels(pf%nlevels)%q0%copy(q1, 1) + if (pf%rank .eq. 0) print *, ' ********* solve state **************' + + pf%q0_style = 0 + if(.not. predict) pf%q0_style = 2 + + do step = 1, nsteps + + thisstep = (step-1)*pf%comm%nproc + pf%rank + pf%state%pfblock = step + + +! if (nsteps == 1 ) then +! call pf_pfasst_block_oc(pf, dt, step*pf%comm%nproc, predict, 1, step=thisstep) +! else + if(.not. predict) then ! warm start, load saved state values + do m = 1, nnodes + call pf%levels(pf%nlevels)%Q(m)%unpack(savedStates(step,m,:), 1) + end do + if(compress) then + call restrict_compress_interpolate(pf, thisstep*dt, dt, storage_level, 1) +! do kk=1, size(savedStates,3) +! roundedValue = floor(pf%levels(pf%nlevels)%Q(m)%yflatarray(kk)*N_digits)/N_digits +! pf%levels(pf%nlevels)%Q(m)%yflatarray(kk) = roundedValue +! end do +! savedStatesCompressed(:)= floor(savedStates(step,m,:)*N_digits)/dble(N_digits) +! call pf%levels(pf%nlevels)%Q(m)%unpack(savedStatesCompressed(:), 1) +! +! ! restrict to coarse level +! do level_index = pf%nlevels,2,-1 +! f_lev_p => pf%levels(level_index); +! c_lev_p => pf%levels(level_index-1) +! allocate(f_times(f_lev_p%nnodes)) +! f_times = thisstep*dt + dt*f_lev_p%nodes +! call restrict_ts(f_lev_p, c_lev_p, f_lev_p%Q, c_lev_p%Q, f_times, 1) +! deallocate(f_times) +! end do +! ! and interpolate again +! do level_index = 2,pf%nlevels +! call interpolate_time_space(pf, thisstep*dt, dt, level_index, .false., 1) +! end do + + end if +! else +! call pf%levels(pf%nlevels)%Q(m)%unpack(savedStates(step,m,:), 1) +! end if +! call pf%levels(pf%nlevels)%Q(m)%unpack(savedStates(step,m,:), 1) + +! end do +! ! use correct initial value at first rank, previous one at other ranks + if (pf%rank .eq. 0) then + call pf%levels(pf%nlevels)%Q(1)%copy(pf%levels(pf%nlevels)%q0, 1) +! additional: get difference between q0 and Q(1) from savedStates and add that to the other Q(m)s? can only be done on rank 0 + else +! if (pf%rank > 0) then + call pf%levels(pf%nlevels)%q0%copy(pf%levels(pf%nlevels)%Q(1), 1) + end if + ! re-evaluate function values + call pf%levels(pf%nlevels)%ulevel%sweeper%evaluate_all(pf, pf%state%finest_level, & + thisstep*dt+dt*pf%levels(pf%nlevels)%nodes, flags=1, step=thisstep+1) +! call restrict_and_evaluate(pf, thisstep*dt, dt, 1, thisstep+1) + end if + + call pf_pfasst_block_oc(pf, dt, step*pf%comm%nproc, .true., 1, step=thisstep) +! end if + + ! evaluate objective on current step + do m = 1, nnodes + call objective_function(pf%levels(pf%nlevels)%ulevel%sweeper, pf%levels(pf%nlevels)%Q(m), & + product(pf%levels(pf%nlevels)%lev_shape), m, obj(m), step) + + ! record state solution + call pf%levels(pf%nlevels)%Q(m)%pack(savedStates(step,m,:), 1) + end do + stepobjective = 0 + do m = 1, pf%levels(pf%nlevels)%nnodes-1 + stepobjective = stepobjective + & + (obj(m)+obj(m+1))*(pf%levels(pf%nlevels)%nodes(m+1)-pf%levels(pf%nlevels)%nodes(m))*dt + end do + objective = objective+stepobjective + + ! copy/broadcase qend to q0 for next step + if( step < nsteps ) then + call pf%levels(pf%nlevels)%qend%pack(pf%levels(pf%nlevels)%send, 1) !< Pack away your last solution + call pf_broadcast(pf, pf%levels(pf%nlevels)%send, pf%levels(pf%nlevels)%mpibuflen, pf%comm%nproc-1) +! if (pf%rank .eq. 0) then + call pf%levels(pf%nlevels)%q0%unpack(pf%levels(pf%nlevels)%send, 1) !< Everyone resets their q0 +! end if +! if(pf%rank == pf%comm%nproc-1) then +! call pf_mpi_send(pf, pf%levels(pf%nlevels), 111, .true., ierror, 1) +! end if +! if(pf%rank == 0) then +! call pf_mpi_recv(pf, pf%levels(pf%nlevels), 111, .true., ierror, 1) +! call pf%levels(pf%nlevels)%qend%unpack(pf%levels(pf%nlevels)%recv, 1) +! end if + end if + end do + + objective = 0.5*objective !0.5 for trapezoidal rule + call control_L2Q(pf%levels(pf%nlevels)%ulevel%sweeper, dt, pf%levels(pf%nlevels)%nodes, & + product(pf%levels(pf%nlevels)%lev_shape), L2NormUSq) + objective = 0.5*objective + 0.5*alpha*L2NormUSq +! deallocate(savedStatesCompressed) + deallocate(obj) + + end subroutine evaluate_objective + + + subroutine evaluate_gradient(pf, q1, dt, nsteps, predict, gradient, LinftyNormGrad, L2NormGradSq, savedStates, savedAdjoint) + type(pf_pfasst_t), intent(inout) :: pf + type(pf_ndarray_oc_t), target, intent(inout) :: q1 + real(pfdp), intent(in ) :: dt + integer, intent(in ) :: nsteps + logical, intent(in ) :: predict + real(pfdp), intent( out) :: gradient(:,:,:), LinftyNormGrad, L2NormGradSq + real(pfdp), intent(inout) :: savedStates(:,:,:), savedAdjoint(:,:,:) + + integer :: m, step, nnodes, thisstep, ierror + real(pfdp), pointer :: obj(:) !, savedAdjointCompressed(:) + +! allocate(savedAdjointCompressed(size(savedAdjoint,3))) + + nnodes = pf%levels(pf%nlevels)%nnodes + q1%pflatarray = 0.0_pfdp + if ((pf%rank==pf%comm%nproc-1) .or. predict) & + call pf%levels(pf%nlevels)%qend%copy(q1, 2) ! this is only done for final time + + + pf%state%itcnt = 0 ! reset iteration count here, not inside pf_pfasst_block_oc + + pf%q0_style = 1 + if(.not. predict) pf%q0_style = 2 + + + + if (pf%rank .eq. 0) print *, '********* solve adjoint *************' + + do step = nsteps, 1, -1 + + thisstep = (step-1)*pf%comm%nproc + pf%rank + pf%state%pfblock = step + +! if (nsteps == 1) then +! call pf_pfasst_block_oc(pf, dt, step*pf%comm%nproc, predict, 2, step=thisstep) +! else + ! assign savedStates to [Q(m), 1] + do m = 1, nnodes + call pf%levels(pf%nlevels)%Q(m)%unpack(savedStates(step,m,:), 1) + end do + if(inexact_adjoint) then + call restrict_compress_interpolate(pf, thisstep*dt, dt, storage_level, 1) + end if + call restrict_for_adjoint(pf, thisstep*dt, dt, 1, thisstep+1) ! save states at all levels instead of restricting? + + if(.not. predict) then + do m = 1, nnodes + call pf%levels(pf%nlevels)%Q(m)%unpack(savedAdjoint(step,m,:), 2) + end do + if(compress) then + call restrict_compress_interpolate(pf, thisstep*dt, dt, storage_level, 2) + end if +! call pf%levels(pf%nlevels)%Q(m)%unpack(savedAdjoint(step,m,:), 2) + +! ! do kk=1, size(savedStates,3) +! ! roundedValue = floor(pf%levels(pf%nlevels)%Q(m)%yflatarray(kk)*N_digits)/N_digits +! ! pf%levels(pf%nlevels)%Q(m)%yflatarray(kk) = roundedValue +! ! end do +! savedAdjointCompressed(:)= floor(savedAdjoint(step,m,:)*N_digits)/dble(N_digits) +! call pf%levels(pf%nlevels)%Q(m)%unpack(savedAdjointCompressed(:), 2) +! ! end if +! else +! call pf%levels(pf%nlevels)%Q(m)%unpack(savedAdjoint(step,m,:), 2) +! end if +! end do + if(pf%rank==pf%comm%nproc-1) then + ! this happens in sweep anyway, but not in restriction of q0/qend at start of predictor + call pf%levels(pf%nlevels)%Q(nnodes)%copy(pf%levels(pf%nlevels)%qend, 2) + else +! if(pf%rank < pf%comm%nproc-1) then + call pf%levels(pf%nlevels)%qend%copy(pf%levels(pf%nlevels)%Q(nnodes), 2) + end if + ! re-evaluate function values + call pf%levels(pf%nlevels)%ulevel%sweeper%evaluate_all(pf, pf%state%finest_level, & + thisstep*dt+dt*pf%levels(pf%nlevels)%nodes, flags=2, step=thisstep+1) +! call restrict_and_evaluate(pf, thisstep*dt, dt, 2, thisstep+1) + end if + + call pf_pfasst_block_oc(pf, dt, step*pf%comm%nproc, .true., 2, step=thisstep) +! end if + + do m = 1, pf%levels(pf%nlevels)%nnodes + call pf%levels(pf%nlevels)%Q(m)%pack(gradient(step, m, :), 2) + savedAdjoint(step,m,:) = gradient(step,m,:) + end do + +! call pf%levels(pf%nlevels)%qend%copy(pf%levels(pf%nlevels)%q0, 2) + if(step > 1) then + call pf%levels(pf%nlevels)%q0%pack(pf%levels(pf%nlevels)%send, 2) !< Pack away your last solution + call pf_broadcast(pf, pf%levels(pf%nlevels)%send, pf%levels(pf%nlevels)%mpibuflen, 0) +! if(pf%rank == pf%comm%nproc-1) then + call pf%levels(pf%nlevels)%qend%unpack(pf%levels(pf%nlevels)%send, 2) !< Everyone resets their qend +! end if +! if(pf%rank == 0) then +! call pf_mpi_send(pf, pf%levels(pf%nlevels), 111, .true., ierror, 2) +! end if +! if(pf%rank == pf%comm%nproc-1) then +! call pf_mpi_recv(pf, pf%levels(pf%nlevels), 111, .true., ierror, 2) +! call pf%levels(pf%nlevels)%q0%unpack(pf%levels(pf%nlevels)%recv, 2) +! end if + end if + end do + + call construct_gradient(pf%levels(pf%nlevels)%ulevel%sweeper, gradient, pf%levels(pf%nlevels)%nodes, & + LinftyNormGrad, L2NormGradSq) + + pf%q0_style = 0 +! deallocate(savedAdjointCompressed) + end subroutine evaluate_gradient + +! subroutine armijo_step(pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedAdjoint, alpha, & +! globObj, globDirXGrad, objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepSize, stepTooSmall) +! type(pf_pfasst_t), intent(inout) :: pf +! type(ndarray_oc), target, intent(inout) :: q1 +! real(pfdp), intent(in ) :: dt, alpha, globObj, globDirXGrad +! integer, intent(in ) :: nsteps +! integer, intent(inout) :: itersState, itersAdjoint +! logical, intent(in ) :: predict +! real(pfdp), intent(in ) :: searchDir(:,:) +! real(pfdp), intent( out) :: objectiveNew, L2NormUSq +! real(pfdp), intent(inout) :: stepSize, LinftyNormGrad, L2NormGradSq, gradient(:,:), savedAdjoint(:,:) +! logical, intent(inout) :: stepTooSmall +! +! real(pfdp) :: globObjNew, directionTimesGradient, globDirXGradNew +! integer :: l, ierror +! !real(pfdp) :: armijoDecrease ! this should be in a structure global to the module, and set in +! ! ! something like init_optimization, along with other parameters +! !real(pfdp) :: minStepSize +! !armijoDecrease = 1e-4 +! !minStepSize = 1e-6 +! +! do +! call update_control(pf%levels(pf%nlevels)%ctx, searchDir, stepSize) +! !restrict control +! do l = pf%nlevels-1,1,-1 +! call restrict_control(pf%levels(l)%ctx, pf%levels(l+1)%ctx) +! end do +! +! call evaluate_objective(pf, q1, dt, nsteps, predict, alpha, objectiveNew, L2NormUSq, savedAdjoint) +! itersState = itersState + pf%state%itcnt +! +! call mpi_allreduce(objectiveNew, globObjNew, 1, MPI_REAL8, MPI_SUM, pf%comm%comm, ierror) +! if(pf%rank == 0) print *, stepSize, 'objectiveNew (L2) = ', globObjNew +! +! if (globObjNew < globObj + armijoDecrease*stepSize*globDirXGrad) then +! ! evaluate gradient to be consistent with wolfe_powell_step +! call evaluate_gradient(pf, q1, dt, nsteps, predict, gradient, LinftyNormGrad, L2NormGradSq, savedAdjoint) +! itersAdjoint = itersAdjoint + pf%state%itcnt +! return +! end if +! +! call update_control(pf%levels(pf%nlevels)%ctx, searchDir, -stepSize) +! ! no need to restrict here, coarse levels get overwritten above +! stepSize = 0.5 * stepSize +! if (stepSize < minStepSize) then +! stepTooSmall = .true. +! if (pf%rank .eq. 0) print *, 'no step found, stopping' +! !call write_control(pf%levels(pf%nlevels)%ctx, k, "u_sdc_split_final") +! return +! end if +! end do +! end subroutine armijo_step +! +! +! +! subroutine zoom(stepLowIn, stepHighIn, stepSizeIn, stepSizeOut, & +! pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedStates, alpha, & +! globObj, globDirXGrad, objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepTooSmall, globObjLowIn) +! type(pf_pfasst_t), intent(inout) :: pf +! type(ndarray_oc), target, intent(inout) :: q1 +! real(pfdp), intent(in ) :: dt, alpha, globObj, globDirXGrad +! integer, intent(in ) :: nsteps +! integer, intent(inout) :: itersState, itersAdjoint +! logical, intent(in ) :: predict +! real(pfdp), intent(in ) :: searchDir(:,:,:) +! real(pfdp), intent( out) :: objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepSizeOut +! real(pfdp), intent(in ) :: stepSizeIn, stepLowIn, stepHighIn, globObjLowIn +! real(pfdp), intent(inout) :: gradient(:,:,:), savedStates(:,:,:) +! logical, intent(inout) :: stepTooSmall +! +! real(pfdp) :: directionTimesGradient, directionTimesGradientNew +! real(pfdp) :: stepSize, stepLow, stepHigh, objectiveLow +! integer :: l, ierror, iter +! +! stepSize = stepSizeIn +! stepLow = stepLowIn +! stepHigh = stepHighIn +! objectiveLow = globObjLowIn +! +! iter = 0 +! if (pf%rank .eq. 0) print *, 'in zoom' +! +! do +! call update_control(pf%levels(pf%nlevels)%ctx, searchDir, -stepSize) +! stepSize = 0.5_pfdp*(stepLow+stepHigh) +! iter = iter + 1 +! if (stepSize < minStepSize .or. iter > maxIterZoom) then +! stepTooSmall = .true. +! if (pf%rank .eq. 0) print *, 'no step found (< min or > maxIter), stopping' +! !call write_control(pf%levels(pf%nlevels)%ctx, k, "u_sdc_split_final") +! stepSizeOut = stepSize +! return +! end if +! +! call update_control(pf%levels(pf%nlevels)%ctx, searchDir, stepSize) +! !restrict control +! do l = pf%nlevels-1,1,-1 +! call restrict_control(pf%levels(l)%ctx, pf%levels(l+1)%ctx) +! end do +! +! call evaluate_objective(pf, q1, dt, nsteps, predict, alpha, objectiveNew, L2NormUSq, savedStates) +! itersState = itersState + pf%state%itcnt +! if(pf%rank == 0) print *, stepSize, 'objectiveNew (L2) = ', objectiveNew +! +! if ( (objectiveNew > globObj + armijoDecrease*stepSize*globDirXGrad) .or. & +! (objectiveNew >= objectiveLow) ) then +! stepHigh = stepSize +! if (pf%rank .eq. 0) print *, 'set new stepHigh to stepSize; stepHigh = ', stepHigh, 'stepLow = ', stepLow +! else +! call evaluate_gradient(pf, q1, dt, nsteps, predict, gradient, LinftyNormGrad, L2NormGradSq, savedStates) +! itersAdjoint = itersAdjoint + pf%state%itcnt +! directionTimesGradientNew = compute_scalar_prod(searchDir, gradient, pf%levels(pf%nlevels)%nodes, dt) +! +! if (abs(directionTimesGradientNew) <= -c2*globDirXGrad) then +! stepSizeOut = stepSize +! return +! end if +! +! if (directionTimesGradientNew*(stepHigh-stepLow) >= 0) then +! if(pf%rank == 0) print *, stepSize, stepHigh, stepLow +! stepHigh = stepLow +! if (pf%rank .eq. 0) print *, 'set new stepHigh to stepLow; stepHigh = ', stepHigh, 'stepLow = ', stepLow, 'stepSize = ', stepSize +! end if +! stepLow = stepSize +! if (pf%rank .eq. 0) print *, 'set new stepLow; stepHigh = ', stepHigh, 'stepLow = ', stepLow, 'stepSize = ', stepSize +! objectiveLow = objectiveNew +! end if +! end do +! end subroutine zoom +! +! +! subroutine wolfe_powell_step(pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedStates, alpha, & +! objective, directionTimesGradient, objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepSize, stepTooSmall) +! type(pf_pfasst_t), intent(inout) :: pf +! type(ndarray_oc), target, intent(inout) :: q1 +! real(pfdp), intent(in ) :: dt, alpha, objective, directionTimesGradient +! integer, intent(in ) :: nsteps +! integer, intent(inout) :: itersState, itersAdjoint +! logical, intent(in ) :: predict +! real(pfdp), intent(in ) :: searchDir(:,:,:) +! real(pfdp), intent( out) :: objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq +! real(pfdp), intent(inout) :: stepSize, gradient(:,:,:), savedStates(:,:,:) +! logical, intent(inout) :: stepTooSmall +! +! integer :: l, ierror, iter +! real(pfdp) :: prevStepSize, directionTimesGradientNew, prevObjective +! prevStepSize = 0.0_pfdp +! prevObjective = objective +! iter = 1 +! ! +! do +! call update_control(pf%levels(pf%nlevels)%ctx, searchDir, stepSize) +! !restrict control +! do l = pf%nlevels-1,1,-1 +! call restrict_control(pf%levels(l)%ctx, pf%levels(l+1)%ctx) +! end do +! +! call evaluate_objective(pf, q1, dt, nsteps, predict, alpha, objectiveNew, L2NormUSq, savedStates) +! itersState = itersState + pf%state%itcnt +! if(pf%rank == 0) print *, stepSize, 'objectiveNew (L2) = ', objectiveNew +! +! if ( (objectiveNew > objective + armijoDecrease*stepSize*directionTimesGradient) .or. & +! ( objectiveNew >= prevObjective .and. iter > 1) ) then +! call zoom(prevStepSize, stepSize, stepSize, stepSize, & +! pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedStates, alpha, objective, directionTimesGradient, & +! objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepTooSmall, prevObjective) +! return +! end if +! ! +! if (pf%rank .eq. 0) print *, 'evaluate new gradient' +! call evaluate_gradient(pf, q1, dt, nsteps, predict, gradient, LinftyNormGrad, L2NormGradSq, savedStates) +! itersAdjoint = itersAdjoint + pf%state%itcnt +! directionTimesGradientNew = compute_scalar_prod(searchDir, gradient, pf%levels(pf%nlevels)%nodes, dt) +! if (pf%rank .eq. 0) print *, 'globDirXGradNew = ', directionTimesGradientNew +! if (abs(directionTimesGradientNew) <= -c2*directionTimesGradient) then +! return +! end if +! ! +! if (directionTimesGradientNew >= 0) then +! call zoom(stepSize, prevStepSize, stepSize, stepSize, & +! pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, savedStates, alpha, objective, directionTimesGradient, & +! objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, stepTooSmall, objectiveNew) +! return +! end if +! +! prevObjective = objectiveNew +! prevStepSize = stepSize +! stepSize = stepIncFactor*stepSize +! iter = iter + 1 +! if (stepSize > maxStepSize) then +! stepTooSmall = .true. +! if (pf%rank .eq. 0) print *, 'no step found (> max), stopping' +! return +! end if +! call update_control(pf%levels(pf%nlevels)%ctx, searchDir, -prevStepSize) +! end do +! +! end subroutine wolfe_powell_step + + subroutine strong_wolfe_step(pf, q1, dt, nsteps, itersState, itersAdjoint, predict, searchDir, gradient, & + savedStates, savedAdjoint, & + alpha, globObj, globDirXGrad, objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq, & + stepSize, stepTooSmall) + type(pf_pfasst_t), intent(inout) :: pf + type(pf_ndarray_oc_t), target, intent(inout) :: q1 + real(pfdp), intent(in ) :: dt, alpha, globObj, globDirXGrad + integer, intent(in ) :: nsteps + integer, intent(inout) :: itersState, itersAdjoint + logical, intent(in ) :: predict + real(pfdp), intent(in ) :: searchDir(:,:,:) + real(pfdp), intent( out) :: objectiveNew, L2NormUSq, LinftyNormGrad, L2NormGradSq + real(pfdp), intent(inout) :: stepSize, gradient(:,:,:), savedStates(:,:,:), savedAdjoint(:,:,:) + logical, intent(inout) :: stepTooSmall + integer :: l, ierror + real(pfdp) :: low, high + real(pfdp) :: directionTimesGradient, globObjNew, globDirXGradNew + logical :: first + + first = .true. + + low = 0.0_pfdp + high = stepSize + !high = 1.0_pfdp + !if (stepSize > high) high = stepSize + + do + call update_control(pf%levels(pf%nlevels)%ulevel%sweeper, searchDir, high) !stepSize) + !restrict control + do l = pf%nlevels-1,1,-1 + call restrict_control(pf%levels(l)%ulevel%sweeper, pf%levels(l+1)%ulevel%sweeper) + end do + ! call dump_control(pf%levels(pf%nlevels)%ctx, pf, 'u1') + + !call evaluate_objective(pf, q1, dt, nsteps, predict, alpha, objectiveNew, L2NormUSq, savedAdjoint, first) + call evaluate_objective(pf, q1, dt, nsteps, predict, alpha, objectiveNew, L2NormUSq, savedStates) + first = .false. + + itersState = itersState + pf%state%itcnt + call mpi_allreduce(objectiveNew, globObjNew, 1, MPI_REAL8, MPI_SUM, pf%comm%comm, ierror) + if(pf%rank == 0) print *, high, 'objectiveNew (L2) = ', globObjNew ! *, stepSize, + + call update_control(pf%levels(pf%nlevels)%ulevel%sweeper, searchDir, -high) !-stepSize) + + if (globObjNew < globObj + armijoDecrease*stepSize*globDirXGrad) then + high = 2.0*high + else ! Armijo condition not satisfied, exit loop and reduce stepsize + exit + end if + end do + + do + stepSize = 0.5*(low+high) + + if (stepSize < minStepSize ) then + stepTooSmall = .true. + if (pf%rank .eq. 0) print *, 'no step found (< min), stopping' + return + end if + + if (abs(low-high) < minStepSize ) then + stepTooSmall = .true. + if (pf%rank .eq. 0) print *, 'no step found (low==high), stopping' + return + end if + + + call update_control(pf%levels(pf%nlevels)%ulevel%sweeper, searchDir, stepSize) + do l = pf%nlevels-1,1,-1 + call restrict_control(pf%levels(l)%ulevel%sweeper, pf%levels(l+1)%ulevel%sweeper) + end do + + call evaluate_objective(pf, q1, dt, nsteps, predict, alpha, objectiveNew, L2NormUSq, savedStates) + !(pf, q1, dt, nsteps, predict, alpha, objectiveNew, L2NormUSq, savedAdjoint, first) + itersState = itersState + pf%state%itcnt + call mpi_allreduce(objectiveNew, globObjNew, 1, MPI_REAL8, MPI_SUM, pf%comm%comm, ierror) + if(pf%rank == 0) print *, stepSize, 'objectiveNew (L2) = ', globObjNew + + if (globObjNew >= globObj + armijoDecrease*stepSize*globDirXGrad) then + call update_control(pf%levels(pf%nlevels)%ulevel%sweeper, searchDir, -stepSize) + high = stepSize + cycle + end if + + ! now Armijo is satisfied again + !call evaluate_gradient(pf, q1, dt, nsteps, predict, gradient, LinftyNormGrad, L2NormGradSq, savedAdjoint) + call evaluate_gradient(pf, q1, dt, nsteps, predict, gradient, LinftyNormGrad, L2NormGradSq, savedStates, savedAdjoint) + itersAdjoint = itersAdjoint + pf%state%itcnt + directionTimesGradient = compute_scalar_prod(searchDir, gradient, pf%levels(pf%nlevels)%nodes, dt) + call mpi_allreduce(directionTimesGradient, globDirXGradNew, 1, MPI_REAL8, MPI_SUM, & + pf%comm%comm, ierror) + + if (pf%rank .eq. 0) print *, 'globDirXGradNew = ', globDirXGradNew, '-c2*globDirXGrad', -c2*globDirXGrad + + + ! if (abs(globDirXGradNew) <= -c2*globDirXGrad) then ! strong Wolfe conditions satisfied + ! return + ! end if + + if (globDirXGradNew < c2*globDirXGrad) then + ! if(pf%rank == 0) print *, stepSize, high, low + low = stepSize + if (pf%rank .eq. 0) print *, 'set new stepLow; stepHigh = ', high, 'stepLow = ', low, 'stepSize = ', stepSize + elseif (globDirXGradNew > -c2*globDirXGrad) then + high = stepSize + if (pf%rank .eq. 0) print *, 'set new stepHigh; stepHigh = ', high, 'stepLow = ', low, 'stepSize = ', stepSize + else + exit + end if + + call update_control(pf%levels(pf%nlevels)%ulevel%sweeper, searchDir, -stepSize) + end do + + end subroutine strong_wolfe_step + +end module pf_mod_optimization + diff --git a/test/nagumo/src/probin.f90 b/test/nagumo/src/probin.f90 index 147a20f3..69fa49df 100644 --- a/test/nagumo/src/probin.f90 +++ b/test/nagumo/src/probin.f90 @@ -1,5 +1,9 @@ module probin use pf_mod_dtype + use iso_fortran_env + + double precision, parameter :: pi = 3.141592653589793d0 +! double precision, parameter :: two_pi = 6.2831853071795862d0 integer, parameter :: maxlevs = 3 @@ -8,7 +12,7 @@ module probin character(len=64), save :: problem_type, window_type double precision, save :: v ! advection velocity - double precision, save :: sizex ! domain size + double precision, save :: Lx ! domain size double precision, save :: nu ! viscosity double precision, save :: t0 ! initial time for exact solution double precision, save :: sigma ! initial condition parameter @@ -36,15 +40,26 @@ module probin integer, save :: do_mixed ! set to 1 to sweep on state and adjoint simultaneously (but without communication in adjoint) ! integer, save :: nsweeps(maxlevs) ! Sweeps at each levels ! integer, save :: nsweeps_pred(maxlevs) ! Sweeps during predictor + integer, save :: test_no ! optimization problem parameters double precision, save :: alpha ! regularization parameter double precision, save :: tol_grad ! gradient tolerance, stop optimization if gradient norm below double precision, save :: tol_obj ! objective function tolerance, stop optimization if objective function value below integer, save :: max_opt_iter ! maximum number of optimization iterations - + integer, save :: restart_interval ! in case of warmstart, do a full prediction step every n iterations + double precision, save :: gamma ! to steer influence of nonlinearity + integer, save :: max_newton_iter ! maximum number of Newton iterations if nonlinear solver is used + logical, save :: lagging ! if false, use Newton to solve nonlinearity logical, save :: solve_y - + logical, save :: adapt_res_tol ! if true, reduce res_tol's by a given factor if gradient norm has decreased by some other factor + double precision, save :: res_tol_factor + double precision, save :: res_tol_graddec + logical, save :: compress + integer(kind=int64), save :: N_digits + integer, save :: storage_level + logical, save :: inexact_adjoint + character(len=32), save :: pfasst_nml character(len=20), save :: fbase ! base name for run character(len=64), save :: foutbase ! base name for output file @@ -59,11 +74,12 @@ module probin integer :: ios,iostat namelist /params/ Finterp, ndim, nnodes, nvars,nprob, nsteps namelist /params/ spatial_order,interp_order, mg_interp_order, do_spec, N_Vcycles,Nrelax - namelist /params/ pfasst_nml,fbase ,poutmod + namelist /params/ pfasst_nml,fbase ,poutmod, output, restart_interval namelist /params/ abs_res_tol, rel_res_tol, tol_grad, tol_obj - namelist /params/ v, nu, t0, dt, Tfin,sigma, kfreq, sizex, max_opt_iter, alpha - namelist /params/ do_imex, warmstart, do_mixed, logfile !, nsweeps, nsweeps_pred - + namelist /params/ v, nu, t0, dt, Tfin,sigma, kfreq, Lx, max_opt_iter, alpha, gamma + namelist /params/ do_imex, warmstart, do_mixed, logfile, inexact_adjoint !, nsweeps, nsweeps_pred + namelist /params/ max_newton_iter, lagging, adapt_res_tol, res_tol_factor + namelist /params/ res_tol_graddec, test_no, compress, N_digits, storage_level contains subroutine probin_init(filename) @@ -84,7 +100,7 @@ subroutine probin_init(filename) nsteps = -1 v = 0.0_pfdp - sizex = 1._pfdp + Lx = 1._pfdp nu = 0.02_pfdp sigma = 0.004_pfdp kfreq = 1 @@ -95,6 +111,10 @@ subroutine probin_init(filename) abs_res_tol = 0.0 rel_res_tol = 0.0 + adapt_res_tol = .false. + res_tol_factor = 0.1 + res_tol_graddec= 0.5 + spatial_order=2 interp_order = 2 @@ -103,6 +123,10 @@ subroutine probin_init(filename) Nrelax = 1 mg_interp_order = 2 + gamma = 1 + + test_no=0 + do_imex = 1 warmstart = 0 do_mixed = 0 @@ -110,13 +134,22 @@ subroutine probin_init(filename) max_opt_iter = 200 tol_grad = 1e-6 tol_obj = 1e-6 + restart_interval = max_opt_iter poutmod = 1 logfile = "progress.log" - - pfasst_nml=filename + pfasst_nml = filename + output = "Dat/pfasst_V/numpy" + lagging = .true. + max_newton_iter = 10 + + compress = .false. + N_digits = 100000000 ! 8 digits + storage_level = 1 + inexact_adjoint = .false. + ! nsweeps = [1, 1, 1] ! nsweeps_pred = [1, 1, 1] ! @@ -154,8 +187,6 @@ subroutine probin_init(filename) ! wtype = PF_WINDOW_RING ! end select - output = "Dat/pfasst_V/numpy" - end subroutine probin_init end module probin diff --git a/test/nagumo/src/solutions.f90 b/test/nagumo/src/solutions.f90 index 79c4faff..b491c967 100644 --- a/test/nagumo/src/solutions.f90 +++ b/test/nagumo/src/solutions.f90 @@ -1,7 +1,7 @@ module solutions use pf_mod_dtype use pf_mod_ndarray_oc - use probin, only : sizex + use probin implicit none contains @@ -16,7 +16,7 @@ subroutine initial(q0, t0, tend) q0%yflatarray = 0.0_pfdp nvars = size(q0%yflatarray) - dx = sizex/dble(nvars) + dx = Lx/dble(nvars) do i = 1, nvars x = dble(i-1)*dx if( x >= 9.0 .and. x <= 11.0 ) q0%yflatarray(i) = 1.2*sqrt(3.0) diff --git a/test/test_EXP.py b/test/test_EXP.py index d2292529..0add56b5 100644 --- a/test/test_EXP.py +++ b/test/test_EXP.py @@ -85,8 +85,8 @@ def errors(out): def test_advdiff(nodes, nu, splitting, mpi_tasks, nml,nsteps): command = 'mpirun -np {} {} {} nnodes={} nu={} splitting={} nsteps={} '.format(mpi_tasks, EXE, nml, nodes, nu, splitting, nsteps) - print command - output = subprocess.check_output(command.split()) # This will run all the tests + print(command) + output = subprocess.check_output(command.split(), text=True) # This will run all the tests try: err = errors(output) # Try to read the output for error statistics diff --git a/test/test_imk.py b/test/test_imk.py index 46665d04..7f7f189c 100644 --- a/test/test_imk.py +++ b/test/test_imk.py @@ -18,7 +18,8 @@ if exc.errno == EEXIST: pass else: - raise OSError, exc.message + print(exc.message) + raise OSError #, exc.message params = IMKParams(exe=exe, nodes=[3], nterms=[2], \ nsteps=128, tfinal=1.0, \ @@ -57,15 +58,15 @@ def test_toda(levels, vcycle, sdc, nodes, nterms): e0 = particle0 - ref_particle0 e8 = particle8 - ref_particle8 - print particle0 - print e0 - print particle8 - print e8 + print(particle0) + print(e0) + print(particle8) + print(e8) - print ref_particle8 + print(ref_particle8) e = max(e0, e8) - print 'e = {}'.format(e) + print('e = {}'.format(e)) assert abs(e) < 1e-6 diff --git a/test/test_mag_toda.py b/test/test_mag_toda.py index 5d270ee0..beb57beb 100644 --- a/test/test_mag_toda.py +++ b/test/test_mag_toda.py @@ -51,7 +51,7 @@ def make_pfasst(): """scrape the output looking for the error statement""" def errors(out): - rx = re.compile(r"error:\s*step:\s*(\d+)\s*iter:\s*(\d+)\s*level:\s*(\d+)\s*error:\s*(\S+)") + rx = re.compile(r"step:\s*(\d+)\s*iter:\s*(\d+)\s*level:\s*(\d+)\s*error:\s*(\S+)") cast = [int, int, int, float] errors = [] @@ -67,7 +67,7 @@ def errors(out): """scrape the output looking for the resid statement""" def resids(out): - rx = re.compile(r"resid:\s*time:\s*(\S+)\s*step:\s*(\d+)\s*rank:\s*(\d+)\s*iter:\s*(\d+)\s*level:\s*(\d+)\s*resid:\s*(\S+)") + rx = re.compile(r"time:\s*(\S+)\s*step:\s*(\d+)\s*rank:\s*(\d+)\s*iter:\s*(\d+)\s*level:\s*(\d+)\s*resid:\s*(\S+)") cast = [float, int, int, int,int, float] resids = [] @@ -90,11 +90,13 @@ def resids(out): def test_advdiff(nodes, mpi_tasks, nml,nsteps): command = 'mpirun -np {} {} {} nnodes={} nsteps={}'.format(mpi_tasks, EXE, nml, nodes, nsteps) - print command - output = subprocess.check_output(command.split()) # This will run all the tests + print(command) + output = subprocess.check_output(command.split(), text=True) # This will run all the tests + print(output) try: err = resids(output) # Try to read the output for error statistics + print(err) except ValueError: assert True, \ 'Expected poor convergence behavior\nunable to parse output\n {}'.format(line) diff --git a/test/test_nagumo.py b/test/test_nagumo.py index c29fbfea..fbe1c282 100644 --- a/test/test_nagumo.py +++ b/test/test_nagumo.py @@ -15,7 +15,7 @@ def make_pfasst(): pfasst = [] nml = NMLFILE.format('probin') - mpi_tasks = 32 + mpi_tasks = 1 #32 for max_opt_iter in [0, 1]: # 0: check state solution; 1: check adjoint solution pfasst.append((mpi_tasks, nml, max_opt_iter)) @@ -24,18 +24,22 @@ def make_pfasst(): """scrape the output looking for the error statement""" def errors(out): #rx = re.compile(r"error:\s*step:\s*(\d+)\s*iter:\s*(\d+)\s*level:\s*(\d+)\s*error:\s*(\S+)") - rx = re.compile(r"rank:\s*(\d+)\s*step:\s*(\d+)\s*iter:\s*(\d+)\s*level:\s*(\d+)\s*res:\s*(\S+)") + rx = re.compile(r"rank:\s*(\d+)\s*lev:\s*(\d+)\s*step:\s*(\d+)\s*iter:\s*(\d+)\s*res:\s*(\S+)") +# rank: 0 lev: 3 step: 17 iter: 2 res: 3.172733E-13 + cast = [int, int, int, int, float] errors = [] for line in out.splitlines(): + print(line) m = rx.search(line) if m: + print(m) try: errors.append(ErrorTuple(*[c(x) for c, x in zip(cast, m.groups())])) except ValueError: raise ValueError - + print(errors) return errors """set up the list of tests to do and call the routines to make the list of tests""" @@ -50,8 +54,8 @@ def errors(out): def test_nagumo(mpi_tasks, nml, max_opt_iter): command = 'mpirun -np {} {} {} max_opt_iter={}'.format(mpi_tasks, EXE, nml, max_opt_iter) - print command - output = subprocess.check_output(command.split()) # This will run all the tests + print(command) + output = subprocess.check_output(command.split(), text=True) # This will run all the tests try: err = errors(output) # Try to read the output for error statistics @@ -72,7 +76,7 @@ def test_nagumo(mpi_tasks, nml, max_opt_iter): maxstep = max([x.step for x in err if x.rank == minrank]) maxiter = max([x.iter for x in err if x.step == maxstep and x.rank == minrank]) lasterr = max([x.residual for x in err if x.step == maxstep and x.iter == maxiter and x.rank == minrank]) - #print minrank, maxstep, maxiter, lasterr + #print(minrank, maxstep, maxiter, lasterr) assert lasterr < TOL, "error: {}, tol: {}".format(lasterr, TOL) # This decides if the test was successful