From ecee8d6adcea22dbda6c47e2dd0c913bb01ae60b Mon Sep 17 00:00:00 2001 From: Arnaud Rebillout Date: Thu, 5 Jun 2025 14:31:56 +0700 Subject: [PATCH 1/2] scan: Log the number of files removed after the source was scanned When a *mirror* is scanned, the number of files removed is logged: ``` 2025/05/19 01:21:10.644 UTC [archive-4.kali.org] Requesting file list via rsync... 2025/05/19 01:22:42.670 UTC [archive-4.kali.org] Indexed 490848 files (490847 known), 3807 removed ``` However when the *source* is scanned, it's not logged: ``` 2025/05/19 00:25:22.639 UTC [source] Scanning the filesystem... 2025/05/19 00:25:58.265 UTC [source] Indexing the files... 2025/05/19 00:26:02.360 UTC [source] Scanned 490848 files ``` The number of removed files can be useful information, and it's trivial to add it to the log, so let's add it. Additionally, this commit trades the term "Scanned" for "Indexed", to align with the term that is used for the *mirror* scans (above). After this commit, the logs now look like that: ``` 2025/05/19 00:25:22.639 UTC [source] Scanning the filesystem... 2025/05/19 00:25:58.265 UTC [source] Indexing the files... 2025/05/19 00:26:02.360 UTC [source] Indexed 490848 files, 3807 removed ``` --- scan/scan.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scan/scan.go b/scan/scan.go index 621a6dfd..766171b1 100644 --- a/scan/scan.go +++ b/scan/scan.go @@ -560,7 +560,7 @@ func ScanSource(r *database.Redis, forceRehash bool, stop <-chan struct{}) (err return err } - log.Infof("[source] Scanned %d files", count) + log.Infof("[source] Indexed %d files, %d removed", count, len(toremove)) return nil } From c99df61003e6603efe2fd34a094842e478bc67ee Mon Sep 17 00:00:00 2001 From: Arnaud Rebillout Date: Fri, 6 Jun 2025 09:38:44 +0700 Subject: [PATCH 2/2] scan: Log when the result of a mirror scan is commited to the database So far, when a mirror is scanned, we have two logs: when it starts, and when it ends: ``` 2025/05/19 01:21:10.644 UTC [archive-4.kali.org] Requesting file list via rsync... 2025/05/19 01:22:42.670 UTC [archive-4.kali.org] Indexed 490848 files (490847 known), 3807 removed ``` We don't have a timestamp in the middle, to indicate that the scan is finished, and we're about to commit the result to the database. Too bad, as it's sometimes useful to know how long it took for the scan, and how long it took for the commit. And as we can see, when the source is scanned, we do have this information: ``` 2025/05/19 00:25:22.639 UTC [source] Scanning the filesystem... 2025/05/19 00:25:58.265 UTC [source] Indexing the files... 2025/05/19 00:26:02.360 UTC [source] Indexed 490848 files, 3807 removed ``` So this commit aligns the logs for mirror scan with the logs for source scan: it adds this extra log before starting the commit operation. Now the logs look like that: ``` 2025/05/19 01:21:10.644 UTC [archive-4.kali.org] Requesting file list via rsync... 2025/05/19 01:22:37.927 UTC [archive-4.kali.org] Indexing the files... 2025/05/19 01:22:42.670 UTC [archive-4.kali.org] Indexed 490848 files (490847 known), 3807 removed ``` --- scan/scan.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scan/scan.go b/scan/scan.go index 766171b1..5e679738 100644 --- a/scan/scan.go +++ b/scan/scan.go @@ -149,6 +149,8 @@ func Scan(typ core.ScannerType, r *database.Redis, c *mirrors.Cache, url string, return nil, err } + log.Infof("[%s] Indexing the files...", name) + // Exec multi s.ScannerCommit()