diff --git a/src/main/java/org/wikiclean/WikiClean.java b/src/main/java/org/wikiclean/WikiClean.java index 8148286..f492b7f 100644 --- a/src/main/java/org/wikiclean/WikiClean.java +++ b/src/main/java/org/wikiclean/WikiClean.java @@ -178,12 +178,27 @@ public String getWikiMarkup(String s) { /** * Cleans a Wikipedia article. - * @param page Wikipedia article + * @param page Wikipedia article contained in XML such as .... * @return cleaned output */ public String clean(String page) { String content = getWikiMarkup(page); + String cleaned = cleanContent(content); + + if (withTitle) { + return getTitle(page) + "\n\n" + cleaned.trim(); + } + + return cleaned.trim(); + } + + /** + * Cleans a Wikipedia article. + * @param content Wikipedia article content (the Wikitext markup) + * @return cleaned output + */ + public String cleanContent(String content) { if (!withFooter) { content = removeFooter(content); } @@ -217,10 +232,6 @@ public String clean(String page) { // Finally, fold multiple newlines. content = compressMultipleNewlines(content); - if (withTitle) { - return getTitle(page) + "\n\n" + content.trim(); - } - return content.trim(); }