From 49d6c5d830ccff9b8d411c845ce33d3dad48e8b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Misty=20De=20M=C3=A9o?= Date: Fri, 30 Jan 2026 11:48:21 -0800 Subject: [PATCH 1/2] ExtractorYoutubeDL: skip YouTube super resolution videos YouTube has started upscaling older videos that are below a certain resolution. yt-dlp identifies these via the `-sr` suffix in the format ID. It's probably a good idea to avoid downloading these, and just focus on the originals instead. yt-dlp's `--dump-single-json` option ignores the `--format` argument, so we can't use that to filter out super resolution videos. Instead, we'll need to check every format we get while iterating and skip it if it ends with `-sr`. See: https://alexwlchan.net/til/2025/ignore-ai-scaled-videos/?ref=mastodon --- .../modules/extractor/ExtractorYoutubeDL.java | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java b/contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java index bbfcd6d89..a28047da2 100644 --- a/contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java +++ b/contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java @@ -420,6 +420,11 @@ public void write(int b) throws IOException {} */ protected void streamYdlOutput(InputStream in, YoutubeDLResults results) throws IOException { TeedInputStream tee = new TeedInputStream(in, results.jsonFile); + // Since we read objects a single field at a time, we can't + // skip the entire object in one action; this lets us track + // whether we should ignore subsequent fields from the same + // object. + boolean skipObject = false; try (JsonReader jsonReader = new JsonReader(new InputStreamReader(tee, "UTF-8"))) { while (true) { JsonToken nextToken = jsonReader.peek(); @@ -440,6 +445,7 @@ protected void streamYdlOutput(InputStream in, YoutubeDLResults results) throws return; case END_OBJECT: jsonReader.endObject(); + skipObject = false; break; case NAME: jsonReader.nextName(); @@ -451,8 +457,22 @@ protected void streamYdlOutput(InputStream in, YoutubeDLResults results) throws jsonReader.nextString(); break; case STRING: + if (skipObject) { + break; + } + String value = jsonReader.nextString(); - if ("$.url".equals(jsonReader.getPath()) + // Format IDs ending with -sr are YouTube "Super Resolution" + // upscaled videos; avoid downloading these in favour of the + // original files. + // https://alexwlchan.net/til/2025/ignore-ai-scaled-videos/ + if ("$.format_id".equals(jsonReader.getPath()) + || jsonReader.getPath().matches("^\\$\\.entries\\[\\d+\\]\\.format_id$")) { + if (value.endsWith("-sr")) { + skipObject = true; + break; + } + } else if ("$.url".equals(jsonReader.getPath()) || jsonReader.getPath().matches("^\\$\\.entries\\[\\d+\\]\\.url$")) { results.videoUrls.add(value); } else if ("$.webpage_url".equals(jsonReader.getPath()) From 3e19a2487963a8542dae181ea6af810eee3315d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Misty=20De=20M=C3=A9o?= Date: Fri, 30 Jan 2026 13:08:45 -0800 Subject: [PATCH 2/2] ExtractorYoutubeDL: configuration for skipping -sr --- .../modules/extractor/ExtractorYoutubeDL.java | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java b/contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java index a28047da2..5950fc7bb 100644 --- a/contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java +++ b/contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java @@ -207,6 +207,19 @@ public void setProcessArguments(List processArguments) { kp.put("processArguments", processArguments); } + { + setSkipSuperResolution(false); + } + public boolean getSkipSuperResolution() { + return (Boolean) kp.get("skipSuperResolution"); + } + /** + * Whether or not to download Super Resolution upscaled videos. + */ + public void setSkipSuperResolution(boolean skipSuperResolution) { + kp.put("skipSuperResolution",skipSuperResolution); + } + @Override public void start() { if (!isRunning) { @@ -463,12 +476,12 @@ protected void streamYdlOutput(InputStream in, YoutubeDLResults results) throws String value = jsonReader.nextString(); // Format IDs ending with -sr are YouTube "Super Resolution" - // upscaled videos; avoid downloading these in favour of the - // original files. + // upscaled videos; if requested, avoid downloading these in + // favour of the original files. // https://alexwlchan.net/til/2025/ignore-ai-scaled-videos/ if ("$.format_id".equals(jsonReader.getPath()) || jsonReader.getPath().matches("^\\$\\.entries\\[\\d+\\]\\.format_id$")) { - if (value.endsWith("-sr")) { + if (value.endsWith("-sr") && getSkipSuperResolution()) { skipObject = true; break; }