Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2130,6 +2130,8 @@ This point release addresses the following issues:
- The command `singularity help` now only provides help regarding the usage of
the `singularity` command. To display an image's `help` message, use
`singularity run-help <image path>` instead
- Fixed an issue preventing containers from gracefully shutting down upon
recieving SIGTERM #947.

### Removed Deprecated Commands

Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ The following have contributed code and/or documentation to this repository.
- Tarcisio Fedrizzi <tarcisio.fedrizzi@gmail.com>
- Thomas Hamel <hmlth@t-hamel.fr>
- Tim Wright <7im.Wright@protonmail.com>
- Trevor Nichols <teb99@protonmail.com>
- Tru Huynh <tru@pasteur.fr>
- Tyson Whitehead <twhitehead@gmail.com>
- Vanessa Sochat <vsoch@users.noreply.github.com>
Expand Down
42 changes: 39 additions & 3 deletions internal/app/starter/master_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"os/signal"
"strings"
"syscall"
"time"

"github.com/sylabs/singularity/v4/internal/pkg/runtime/engine"
"github.com/sylabs/singularity/v4/internal/pkg/util/crypt"
Expand Down Expand Up @@ -201,16 +202,31 @@ func hostCleanup(cleanupSocket, imageFd int) error {
func Master(rpcSocket, masterSocket, postStartSocket, cleanupSocket, containerPid, imageFd int, e *engine.Engine) {
var status syscall.WaitStatus
fatalChan := make(chan error, 1)
var fatal error

// we could receive signal from child with CreateContainer call so we
// set the signal handler earlier to queue signals until MonitorContainer
// is called to handle them
// Use a channel size of two here, since we may receive SIGURG, which is
// used for non-cooperative goroutine preemption starting with Go 1.14.
ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
signals := make(chan os.Signal, 2)
signal.Notify(signals)

ctx := context.TODO()
go func() {
for {
select {
case sig := <-signals:
switch sig {
case syscall.SIGINT, syscall.SIGTERM:
sylog.Debugf("Container Master requested to stop: %v", sig)
cancel()
}
case <-ctx.Done():
return
}
}
}()

go createContainer(ctx, rpcSocket, containerPid, e, fatalChan)

Expand All @@ -220,9 +236,28 @@ func Master(rpcSocket, masterSocket, postStartSocket, cleanupSocket, containerPi
var err error
status, err = e.MonitorContainer(containerPid, signals)
fatalChan <- err
cancel()
}()

fatal := <-fatalChan
<-ctx.Done()

// Wait 3 seconds for container to exit gracefully
select {
case fatal := <-fatalChan:
sylog.Debugf("Container exited gracefully: %v", fatal)
case <-time.After(3 * time.Second):
sylog.Debugf("Grace period expired, sending SIGTERM to container")
syscall.Kill(containerPid, syscall.SIGTERM)

// Force kill after 3 seconds
select {
case fatal := <-fatalChan:
sylog.Debugf("Container exited after SIGTERM: %v", fatal)
case <-time.After(3 * time.Second):
sylog.Debugf("Container still alive, sending SIGKILL")
syscall.Kill(containerPid, syscall.SIGKILL)
}
}

if err := e.CleanupContainer(ctx, fatal, status); err != nil {
sylog.Errorf("Container cleanup failed: %s", err)
Expand All @@ -237,6 +272,7 @@ func Master(rpcSocket, masterSocket, postStartSocket, cleanupSocket, containerPi
}

// reset signal handlers
signal.Stop(signals)
signal.Reset()

exitCode := 0
Expand Down