diff --git a/src/MorseCodeInterpreter.java b/src/MorseCodeInterpreter.java new file mode 100644 index 0000000..e9000df --- /dev/null +++ b/src/MorseCodeInterpreter.java @@ -0,0 +1,46 @@ +import token.Token; + +import java.util.List; + +/** + * @author Mark Nash + * + * The entry point to Morse Code interpreter. Call "interpret" from anywhere + * in your code. + */ +public class MorseCodeInterpreter { + + /** + * A runtime exception that happened while tokenizing a string of + * Morse Code. The message of the exception further specifies the issue. + */ + static class MorseCodeRuntimeException extends RuntimeException { + MorseCodeRuntimeException(String message) { + super(message); + } + } + + /** + * Reads the file, creates a list of tokens from the four token types, + * parses those tokens into English + * @param path A string of the file path to translate + * @return The interpreted string, null is returned if the file contained + * incorrect input + */ + public static String interpret(String path) { + if (path == null) { + throw new IllegalArgumentException( + "Morse Code file path cannot be null."); + } + List tokenList; + try { + tokenList = new MorseCodeTokenizer(path).tokenize(); + } catch (Exception e) { + e.printStackTrace(); + return null; + } + String interpretedString = new MorseCodeParser(tokenList).parse(); + return interpretedString; + } + +} diff --git a/src/MorseCodeParser.java b/src/MorseCodeParser.java new file mode 100644 index 0000000..2101baa --- /dev/null +++ b/src/MorseCodeParser.java @@ -0,0 +1,89 @@ +import token.*; + +import java.util.LinkedList; +import java.util.List; + +/** + * @author Mark Nash + * + * Builds the string based on the tokens that have been read in + */ +public class MorseCodeParser { + + /** The tokens that have been read in. */ + private LinkedList tokenList; + + /** + * Each index is a state and the data is the character that that state + * corresponds to. + */ + private static final char[] STATES_TO_CHARS = { '\0', 'e', 't', 'i', 'a', + 'n', 'm', 's', 'u', 'r', 'w', 'd', 'k', 'g', 'o', 'h', 'v', 'f', + '\0', 'l', '\0', 'p', 'j', 'b', 'x', 'c', 'y', 'z', 'q', '\0' }; + + /** Store the token list. */ + public MorseCodeParser(List tokenList) { + this.tokenList = (LinkedList)tokenList; + } + + /** + * Create meaning out of the list of tokens + * @return The string that the list of tokens translates to + */ + public String parse() { + StringBuilder result = new StringBuilder(); + for (Token token : tokenList) { + if (token instanceof CharToken) { + result.append(decodeCharacter(token.getValue())); + } + else if (token instanceof SpaceToken) { + result.append(token.getValue()); + } + else if (token instanceof NewLineToken) { + result.append(token.getValue()); + } + else { + // This is a CharSeparatorToken. The tokenizer has already + // separated the characters, so nothing needs to be done + // with this + } + } + return result.toString(); + } + + /** + * Determine the character from running a DFA state machine. + * An example of one can be found here: + * http://sound.whsites.net/articles/morse-f5.gif + * Each state is numbered starting from 0 going + * top to bottom, right to left. + * + * @param tokenValue A string of '.' and or '-' + * @return an ascii character that the Morse Code string corresponds to + */ + private char decodeCharacter(String tokenValue) { + int currState = 0; + for (char c : tokenValue.toCharArray()) { + switch (c) { + case '.': + currState = currState * 2 + 1; + break; + case '-': + currState = (currState + 1) * 2; + break; + default: + // this has already been dealt with during tokenizing + } + } + if (currState > 29) { + currState = 29; + } + char parsedChar = STATES_TO_CHARS[currState]; + + if (parsedChar == '\0') { + throw new MorseCodeInterpreter.MorseCodeRuntimeException( + "Illegal character: '" + tokenValue + "'"); + } + return parsedChar; + } +} diff --git a/src/MorseCodeTokenizer.java b/src/MorseCodeTokenizer.java new file mode 100644 index 0000000..b556773 --- /dev/null +++ b/src/MorseCodeTokenizer.java @@ -0,0 +1,107 @@ +import token.*; + +import java.io.*; +import java.util.LinkedList; +import java.util.List; + +/** + * @author Mark Nash + * + * Creates tokens for the whole file. The tokens are: + * || -> a character seperator + * |||| -> a space + * '\n' -> a newline + * '[.-]+' -> a character + * + * The four legal characters that can show up in a file are: + * '.', '-', '|', '\n' + */ +public class MorseCodeTokenizer { + + /** The file reader. */ + private BufferedReader reader; + + /** An illegal character has been found while reading the file. */ + private static class IllegalMorseCodeCharacterException extends Exception { + public IllegalMorseCodeCharacterException(char offendingCharacter) { + super("'" + offendingCharacter + "' " + "(" + + (int)offendingCharacter + ") is not a valid character. " + + "Only 4 characters are excepted: '.', '-', '|', and '\\n'"); + } + } + + /** + * Opens the file and creates a reader object + * @param filePath The path of the file to open + * @throws FileNotFoundException if the file does not exist. + */ + public MorseCodeTokenizer(String filePath) throws FileNotFoundException { + reader = new BufferedReader(new FileReader(filePath)); + } + + /** + * Reads the characters in the file and creates tokens out of them. + * @return A List of tokens from the reading of the file + * @throws MorseCodeInterpreter.MorseCodeRuntimeException A '|' character + * was read without another '|' with it + * @throws IllegalMorseCodeCharacterException An unaccepted character of the + * four characters was read + * @throws IOException Error closing the file or reading a character + */ + public List tokenize() throws + MorseCodeInterpreter.MorseCodeRuntimeException, + IllegalMorseCodeCharacterException, + IOException { + + LinkedList tokenList = new LinkedList<>(); + StringBuilder currChar = new StringBuilder(); + int intChar; + char c; + while ((intChar = reader.read()) != -1) { + c = (char)intChar; + switch (c) { + case '.': + case '-': + currChar.append(c); + break; + case '|': + intChar = reader.read(); + c = (char)intChar; + if (intChar == -1 || c != '|') { + throw new MorseCodeInterpreter.MorseCodeRuntimeException + ("'|' is not a valid token"); + } + if (tokenList.size() > 0 && currChar.length() == 0 && + tokenList.getLast() instanceof CharSeparatorToken) { + tokenList.removeLast(); + tokenList.add(new SpaceToken()); + } else { + if (currChar.length() != 0) { + tokenList.add(new CharToken(currChar.toString())); + currChar = new StringBuilder(); + } + tokenList.add(new CharSeparatorToken()); + } + break; + case '\r': + break; + case '\n': + if (currChar.length() != 0) { + tokenList.add(new CharToken(currChar.toString())); + currChar = new StringBuilder(); + } + tokenList.add(new NewLineToken()); + break; + default: + throw new IllegalMorseCodeCharacterException(c); + } + } + if (currChar.length() != 0) { + tokenList.add(new CharToken(currChar.toString())); + } + + reader.close(); + return tokenList; + } + +} diff --git a/src/token/CharSeparatorToken.java b/src/token/CharSeparatorToken.java new file mode 100644 index 0000000..82bd4f1 --- /dev/null +++ b/src/token/CharSeparatorToken.java @@ -0,0 +1,20 @@ +package token; + +/** + * @author Mark Nash + * + * Separates characters in a Morse Code file. + */ +public class CharSeparatorToken implements Token { + + @Override + public String getValue() { + return "||"; + } + + @Override + public String toString() { + return "CHAR_SEPERATOR"; + } + +} diff --git a/src/token/CharToken.java b/src/token/CharToken.java new file mode 100644 index 0000000..11864b8 --- /dev/null +++ b/src/token/CharToken.java @@ -0,0 +1,26 @@ +package token; + +/** + * @author Mark Nash + * + * Holds a string of '.' and/or '-'. + */ +public class CharToken implements Token { + + private String value; + + public CharToken(String value) { + this.value = value; + } + + @Override + public String getValue() { + return value; + } + + @Override + public String toString() { + return "'" + value + "'"; + } + +} diff --git a/src/token/NewLineToken.java b/src/token/NewLineToken.java new file mode 100644 index 0000000..26d97b5 --- /dev/null +++ b/src/token/NewLineToken.java @@ -0,0 +1,20 @@ +package token; + +/** + * @author Mark Nash + * + * A newline token. + */ +public class NewLineToken implements Token { + + @Override + public String getValue() { + return "\n"; + } + + @Override + public String toString() { + return "NEW_LINE"; + } + +} diff --git a/src/token/SpaceToken.java b/src/token/SpaceToken.java new file mode 100644 index 0000000..2738867 --- /dev/null +++ b/src/token/SpaceToken.java @@ -0,0 +1,20 @@ +package token; + +/** + * @author Mark Nash + * + * A token that represents a space between characters. + */ +public class SpaceToken implements Token { + + @Override + public String getValue() { + return " "; + } + + @Override + public String toString() { + return "SPACE"; + } + +} diff --git a/src/token/Token.java b/src/token/Token.java new file mode 100644 index 0000000..da352c1 --- /dev/null +++ b/src/token/Token.java @@ -0,0 +1,12 @@ +package token; + +/** + * @author Mark Nash + * + * A sort of token that has a semantic meaning. The data in an instance of one + * of these tokens is the meaning, and the name is the token type. + */ +public interface Token { + + String getValue(); +} diff --git a/test/MorseCodeInterpreterTest.java b/test/MorseCodeInterpreterTest.java new file mode 100644 index 0000000..9758a67 --- /dev/null +++ b/test/MorseCodeInterpreterTest.java @@ -0,0 +1,14 @@ +public class MorseCodeInterpreterTest { + + public static void main(String[] args) { + for (String path : args) { + test(path); + } + } + + private static void test(String path) { + System.out.println(path + " {\n" + + MorseCodeInterpreter.interpret(path) + + "\n}\n"); + } +} diff --git a/test/alphabet.txt b/test/alphabet.txt new file mode 100644 index 0000000..fbecea3 --- /dev/null +++ b/test/alphabet.txt @@ -0,0 +1 @@ +.-||-...||-.-.||-..||.||..-.||--.||....||..||.---||-.-||.-..||--||-.||---||.--.||--.-||.-.||...||-||..-||...-||.--||-..-||-.--||--.. \ No newline at end of file diff --git a/test/given.txt b/test/given.txt new file mode 100644 index 0000000..ec5ec69 --- /dev/null +++ b/test/given.txt @@ -0,0 +1,2 @@ +-..||---||--. +....||.||.-..||.-..||---||||.--||---||.-.||.-..||-.. \ No newline at end of file diff --git a/test/spacing.txt b/test/spacing.txt new file mode 100644 index 0000000..581ca9d --- /dev/null +++ b/test/spacing.txt @@ -0,0 +1,4 @@ + +.-||-. +.-||||-. +.-||||||||-. diff --git a/test/stress.txt b/test/stress.txt new file mode 100644 index 0000000..bf60799 --- /dev/null +++ b/test/stress.txt @@ -0,0 +1,6 @@ + + +.. + +.||.--.||||.-.||-.-||--||-..||.--.||||||||- +..||||....||---||.--.||.||||-.--||---||..-||||.-..||..||-.-||.||||--||-.--||||-.-.||---||-..||.