diff --git a/contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java b/contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java index bbfcd6d89..5950fc7bb 100644 --- a/contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java +++ b/contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java @@ -207,6 +207,19 @@ public void setProcessArguments(List processArguments) { kp.put("processArguments", processArguments); } + { + setSkipSuperResolution(false); + } + public boolean getSkipSuperResolution() { + return (Boolean) kp.get("skipSuperResolution"); + } + /** + * Whether or not to download Super Resolution upscaled videos. + */ + public void setSkipSuperResolution(boolean skipSuperResolution) { + kp.put("skipSuperResolution",skipSuperResolution); + } + @Override public void start() { if (!isRunning) { @@ -420,6 +433,11 @@ public void write(int b) throws IOException {} */ protected void streamYdlOutput(InputStream in, YoutubeDLResults results) throws IOException { TeedInputStream tee = new TeedInputStream(in, results.jsonFile); + // Since we read objects a single field at a time, we can't + // skip the entire object in one action; this lets us track + // whether we should ignore subsequent fields from the same + // object. + boolean skipObject = false; try (JsonReader jsonReader = new JsonReader(new InputStreamReader(tee, "UTF-8"))) { while (true) { JsonToken nextToken = jsonReader.peek(); @@ -440,6 +458,7 @@ protected void streamYdlOutput(InputStream in, YoutubeDLResults results) throws return; case END_OBJECT: jsonReader.endObject(); + skipObject = false; break; case NAME: jsonReader.nextName(); @@ -451,8 +470,22 @@ protected void streamYdlOutput(InputStream in, YoutubeDLResults results) throws jsonReader.nextString(); break; case STRING: + if (skipObject) { + break; + } + String value = jsonReader.nextString(); - if ("$.url".equals(jsonReader.getPath()) + // Format IDs ending with -sr are YouTube "Super Resolution" + // upscaled videos; if requested, avoid downloading these in + // favour of the original files. + // https://alexwlchan.net/til/2025/ignore-ai-scaled-videos/ + if ("$.format_id".equals(jsonReader.getPath()) + || jsonReader.getPath().matches("^\\$\\.entries\\[\\d+\\]\\.format_id$")) { + if (value.endsWith("-sr") && getSkipSuperResolution()) { + skipObject = true; + break; + } + } else if ("$.url".equals(jsonReader.getPath()) || jsonReader.getPath().matches("^\\$\\.entries\\[\\d+\\]\\.url$")) { results.videoUrls.add(value); } else if ("$.webpage_url".equals(jsonReader.getPath())