diff --git a/src/main/java/dev/imabad/mceventsuite/core/util/BadWords.java b/src/main/java/dev/imabad/mceventsuite/core/util/BadWords.java new file mode 100644 index 0000000..e005bac --- /dev/null +++ b/src/main/java/dev/imabad/mceventsuite/core/util/BadWords.java @@ -0,0 +1,124 @@ +package dev.imabad.mceventsuite.core.util; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +/** + * Originally created by Pim De Witte. + * + * Performance drastically improved by over an order of magnitude by Thomas G. P. Nappo (Jire). + * Garbage production has been eliminated as well. + */ +public class BadWords { + static Map words = new HashMap<>(); + + static int largestWordLength = 0; + + public static void loadConfigs() { + try { + BufferedReader reader = new BufferedReader(new InputStreamReader(new URL("https://docs.google.com/spreadsheets/d/1hIEi2YG3ydav1E06Bzf2mQbGZ12kh2fe4ISgLg_UBuM/export?format=csv").openConnection().getInputStream())); + String line = ""; + int counter = 0; + while((line = reader.readLine()) != null) { + counter++; + String[] content = null; + try { + content = line.split(","); + if(content.length == 0) { + continue; + } + String word = content[0]; + String[] ignore_in_combination_with_words = new String[]{}; + if(content.length > 1) { + ignore_in_combination_with_words = content[1].split("_"); + } + + if(word.length() > largestWordLength) { + largestWordLength = word.length(); + } + words.put(word.replaceAll(" ", ""), ignore_in_combination_with_words); + + } catch(Exception e) { + e.printStackTrace(); + } + + } + System.out.println("Loaded " + counter + " words to filter out"); + } catch (IOException e) { + e.printStackTrace(); + } + + } + + + /** + * Iterates over a String input and checks whether a cuss word was found in a list, then checks if the word should be ignored (e.g. bass contains the word *ss). + * @param input + * @return + */ + + public static ArrayList badWordsFound(String input) { + if(words.size() == 0){ + loadConfigs(); + } + if(input == null) { + return new ArrayList<>(); + } + + // don't forget to remove leetspeak, probably want to move this to its own function and use regex if you want to use this + + input = input.replaceAll("1","i"); + input = input.replaceAll("!","i"); + input = input.replaceAll("3","e"); + input = input.replaceAll("4","a"); + input = input.replaceAll("@","a"); + input = input.replaceAll("5","s"); + input = input.replaceAll("7","t"); + input = input.replaceAll("0","o"); + input = input.replaceAll("9","g"); + + + ArrayList badWords = new ArrayList<>(); + input = input.toLowerCase().replaceAll("[^a-zA-Z]", ""); + + // iterate over each letter in the word + for(int start = 0; start < input.length(); start++) { + // from each letter, keep going to find bad words until either the end of the sentence is reached, or the max word length is reached. + for(int offset = 1; offset < (input.length()+1 - start) && offset < largestWordLength; offset++) { + String wordToCheck = input.substring(start, start + offset); + if(words.containsKey(wordToCheck)) { + // for example, if you want to say the word bass, that should be possible. + String[] ignoreCheck = words.get(wordToCheck); + boolean ignore = false; + for(int s = 0; s < ignoreCheck.length; s++ ) { + if(input.contains(ignoreCheck[s])) { + ignore = true; + break; + } + } + if(!ignore) { + badWords.add(wordToCheck); + } + } + } + } + for(String s: badWords) { + System.out.println(s + " qualified as a bad word"); + } + return badWords; + } + + public static String filterText(String input, String username) { + ArrayList badWords = badWordsFound(input); + if(badWords.size() > 0) { + return "This message was blocked because a bad word was found. If you believe this word should not be blocked, please message support."; + } + return input; + } + +}