From a136d2063d0a220a641914d4fedb375a13b16ceb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20K=C3=BCrten?= Date: Tue, 11 Aug 2020 18:36:19 +0200 Subject: [PATCH] Add method WikiClean#cleanContent() and improve Javadoc --- src/main/java/org/wikiclean/WikiClean.java | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/wikiclean/WikiClean.java b/src/main/java/org/wikiclean/WikiClean.java index 8148286..f492b7f 100644 --- a/src/main/java/org/wikiclean/WikiClean.java +++ b/src/main/java/org/wikiclean/WikiClean.java @@ -178,12 +178,27 @@ public String getWikiMarkup(String s) { /** * Cleans a Wikipedia article. - * @param page Wikipedia article + * @param page Wikipedia article contained in XML such as .... * @return cleaned output */ public String clean(String page) { String content = getWikiMarkup(page); + String cleaned = cleanContent(content); + + if (withTitle) { + return getTitle(page) + "\n\n" + cleaned.trim(); + } + + return cleaned.trim(); + } + + /** + * Cleans a Wikipedia article. + * @param content Wikipedia article content (the Wikitext markup) + * @return cleaned output + */ + public String cleanContent(String content) { if (!withFooter) { content = removeFooter(content); } @@ -217,10 +232,6 @@ public String clean(String page) { // Finally, fold multiple newlines. content = compressMultipleNewlines(content); - if (withTitle) { - return getTitle(page) + "\n\n" + content.trim(); - } - return content.trim(); }