From 3482b62877ff3a1bc63fa3db5e6d7272116e8500 Mon Sep 17 00:00:00 2001 From: vaLentin chernoZemski Date: Mon, 3 Nov 2025 12:52:36 +0200 Subject: [PATCH] Unknown reader errors should close reader/tailer to avoid mtail looping on broken fds. If mtail is tailing a file under a mount, and this mount is gone, mtail reader might block on read error: bad file descriptor .... This will DoS mtail which won't respond to standard signals and significantly increase its CPU consumption. Better strategy is to simply close reader/tailer on the broken fd and let the tailer do its job. Another retryable cases like syscall.ESTALE can be added in future, but for now fail fast seems like a logical approach. --- internal/tailer/logstream/filestream.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/internal/tailer/logstream/filestream.go b/internal/tailer/logstream/filestream.go index f77b1adbf..65dd1d11e 100644 --- a/internal/tailer/logstream/filestream.go +++ b/internal/tailer/logstream/filestream.go @@ -124,8 +124,20 @@ func (fs *fileStream) stream(ctx context.Context, wg *sync.WaitGroup, waker wake } // Close this stream. return + } else { + glog.Infof("stream(%s): read error: %v", fs.sourcename, err) + // We got unknown stream read error != syscall.ESTALE which currently we don't know how to handle. + // Those might be read error: (is a directory|bad file descriptor|...) etc. + // Without this patch mtail istarting to loop on broken fds that are gone/incorrect, + // start consuming a lot of CPU cycles, and can't handle signals besides SIGKILL. + // If we end up in such unrecovarable/unhandled (for now) situation, it is better to use fail fast approach. + // We close the reader and the tailer for this source and let the tailer pick-up the new file(s) if any on its own later. + // Those read error: (is directory|bad file descriptor) can happen if we mtail files under mounts, and mounts are gone. + // Those might be docker/container/etc. setups where mtail is running on host. + lr.Finish(ctx) + close(fs.lines) + return } - glog.Infof("stream(%s): read error: %v", fs.sourcename, err) } // If we have read no bytes and are at EOF, check for truncation and rotation.