From e6f79fcfc0ec457d0322ec622a1aea68133a116a Mon Sep 17 00:00:00 2001 From: Mark Nash Date: Sat, 23 Mar 2019 10:11:05 -0400 Subject: [PATCH 1/7] The proof of concept algorithm works. Refactoring for standards and readibility. --- src/MorseCodeInterpreter.java | 65 +++++++++++++++++++++++++++++++++++ test/fle | 1 + 2 files changed, 66 insertions(+) create mode 100644 src/MorseCodeInterpreter.java create mode 100644 test/fle diff --git a/src/MorseCodeInterpreter.java b/src/MorseCodeInterpreter.java new file mode 100644 index 0000000..ee77d44 --- /dev/null +++ b/src/MorseCodeInterpreter.java @@ -0,0 +1,65 @@ +import java.io.*; + +public class MorseCodeInterpreter { + + private static final class IllegalMorseCodeCharacterException extends Exception { + public IllegalMorseCodeCharacterException() { + super("Only 4 characters are excepted: '.', '-', '|', and '\\n'"); + } + } + + private static final char[] STATES_TO_CHARS = { '\0', 'e', 't', 'i', 'a', 'n', 'm', + 's', 'u', 'r', 'w', 'd', 'k', 'g', 'o', 'h', 'v', 'f', '\0', 'l', + '\0', 'p', 'j', 'b', 'x', 'c', 'y', 'z', 'q', '\0' }; + + private static final int[][] TRANSITIONS = { + {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}, // . + {2, 4, 6, 8, 10, 12, 14, 16, 29, 29, 22, 24, 26, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}, // - + }; + + public static String interpret(String path) throws Exception { + if (path == null) { + throw new IllegalArgumentException( + "Morse Code file path cannot be null."); + } + StringBuilder returnString = new StringBuilder(); + String currLine; + BufferedReader fileReader = new BufferedReader((new FileReader(path))); + while ((currLine = fileReader.readLine()) != null) { + int currState = 0; + boolean readOnePipe = false; + for (char c : currLine.toCharArray()) { + switch (c) { + case '.': + currState = TRANSITIONS[0][currState]; + break; + case '-': + currState = TRANSITIONS[1][currState]; + break; + case '|': + if (readOnePipe) { + if (currState == 0) { + returnString.append(' '); + } else { + returnString.append(STATES_TO_CHARS[currState]); + currState = 0; + } + readOnePipe = false; + } else { + readOnePipe = true; + } + break; + default: + throw new IllegalMorseCodeCharacterException(); + } + } + returnString.append('\n'); + } + return returnString.substring(0, returnString.length() - 1); + } + + public static void main(String[] args) throws Exception{ + System.out.println(MorseCodeInterpreter.interpret("test/fle")); + } + +} diff --git a/test/fle b/test/fle new file mode 100644 index 0000000..6d98966 --- /dev/null +++ b/test/fle @@ -0,0 +1 @@ +.||..||...||....|| \ No newline at end of file From 86b6395cb6771f46c24bf9e530aef005f638cf70 Mon Sep 17 00:00:00 2001 From: Mark Nash Date: Sat, 23 Mar 2019 12:06:42 -0400 Subject: [PATCH 2/7] Refactored for testability --- src/MorseCodeInterpreter.java | 60 ++++----------------- src/MorseCodeParser.java | 67 +++++++++++++++++++++++ src/MorseCodeTokenizer.java | 88 +++++++++++++++++++++++++++++++ src/token/CharSeperatorToken.java | 9 ++++ src/token/CharToken.java | 15 ++++++ src/token/NewLineToken.java | 9 ++++ src/token/SpaceToken.java | 9 ++++ src/token/Token.java | 6 +++ 8 files changed, 214 insertions(+), 49 deletions(-) create mode 100644 src/MorseCodeParser.java create mode 100644 src/MorseCodeTokenizer.java create mode 100644 src/token/CharSeperatorToken.java create mode 100644 src/token/CharToken.java create mode 100644 src/token/NewLineToken.java create mode 100644 src/token/SpaceToken.java create mode 100644 src/token/Token.java diff --git a/src/MorseCodeInterpreter.java b/src/MorseCodeInterpreter.java index ee77d44..623199a 100644 --- a/src/MorseCodeInterpreter.java +++ b/src/MorseCodeInterpreter.java @@ -1,64 +1,26 @@ -import java.io.*; +import token.Token; + +import java.util.List; public class MorseCodeInterpreter { - private static final class IllegalMorseCodeCharacterException extends Exception { - public IllegalMorseCodeCharacterException() { - super("Only 4 characters are excepted: '.', '-', '|', and '\\n'"); + static class MorseCodeRuntimeException extends Exception { + MorseCodeRuntimeException(String message) { + super(message); } } - private static final char[] STATES_TO_CHARS = { '\0', 'e', 't', 'i', 'a', 'n', 'm', - 's', 'u', 'r', 'w', 'd', 'k', 'g', 'o', 'h', 'v', 'f', '\0', 'l', - '\0', 'p', 'j', 'b', 'x', 'c', 'y', 'z', 'q', '\0' }; - - private static final int[][] TRANSITIONS = { - {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}, // . - {2, 4, 6, 8, 10, 12, 14, 16, 29, 29, 22, 24, 26, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}, // - - }; - - public static String interpret(String path) throws Exception { + public static String interpret(String path) { if (path == null) { throw new IllegalArgumentException( "Morse Code file path cannot be null."); } - StringBuilder returnString = new StringBuilder(); - String currLine; - BufferedReader fileReader = new BufferedReader((new FileReader(path))); - while ((currLine = fileReader.readLine()) != null) { - int currState = 0; - boolean readOnePipe = false; - for (char c : currLine.toCharArray()) { - switch (c) { - case '.': - currState = TRANSITIONS[0][currState]; - break; - case '-': - currState = TRANSITIONS[1][currState]; - break; - case '|': - if (readOnePipe) { - if (currState == 0) { - returnString.append(' '); - } else { - returnString.append(STATES_TO_CHARS[currState]); - currState = 0; - } - readOnePipe = false; - } else { - readOnePipe = true; - } - break; - default: - throw new IllegalMorseCodeCharacterException(); - } - } - returnString.append('\n'); - } - return returnString.substring(0, returnString.length() - 1); + List tokenList = new MorseCodeTokenizer(path).tokenize(); + String interpreted = new MorseCodeParser(tokenList).parse(); + return interpreted; } - public static void main(String[] args) throws Exception{ + public static void main(String[] args) { System.out.println(MorseCodeInterpreter.interpret("test/fle")); } diff --git a/src/MorseCodeParser.java b/src/MorseCodeParser.java new file mode 100644 index 0000000..3a2a944 --- /dev/null +++ b/src/MorseCodeParser.java @@ -0,0 +1,67 @@ +import token.*; + +import java.util.LinkedList; +import java.util.List; + +public class MorseCodeParser { + + private LinkedList tokenList; + + private static final char[] STATES_TO_CHARS = { '\0', 'e', 't', 'i', 'a', + 'n', 'm', 's', 'u', 'r', 'w', 'd', 'k', 'g', 'o', 'h', 'v', 'f', + '\0', 'l', '\0', 'p', 'j', 'b', 'x', 'c', 'y', 'z', 'q', '\0' }; + + private static final int[][] TRANSITIONS = { + {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}, // . + {2, 4, 6, 8, 10, 12, 14, 16, 29, 29, 22, 24, 26, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}, // - + }; + + public MorseCodeParser(List tokenList) { + this.tokenList = (LinkedList)tokenList; + } + + public String parse() { + StringBuilder result = new StringBuilder(); + for (Token token : tokenList) { + if (token instanceof CharToken) { + result.append(parseCharacter((CharToken)token)); + } + else if (token instanceof SpaceToken) { + result.append(' '); + } + else if (token instanceof NewLineToken) { + result.append('\n'); + } + else { + // This is a CharSeperatorToken. The tokenizer has already + // separated the characters, so nothing needs to be done + // with this + } + } + return result.toString(); + } + + private Character parseCharacter(CharToken token) { + int currState = 0; + for (char c : token.getValue().toCharArray()) { + switch (c) { + case '.': + currState = TRANSITIONS[0][currState]; + break; + case '-': + currState = TRANSITIONS[1][currState]; + break; + default: + // this has already been dealt with during tokenizing + } + } + char parsedChar = STATES_TO_CHARS[currState]; + + if (parsedChar == '\0') { + new MorseCodeInterpreter.MorseCodeRuntimeException( + "Illegal character: '" + token.getValue() + "'" + ).printStackTrace(); + } + return parsedChar; + } +} diff --git a/src/MorseCodeTokenizer.java b/src/MorseCodeTokenizer.java new file mode 100644 index 0000000..b652bd4 --- /dev/null +++ b/src/MorseCodeTokenizer.java @@ -0,0 +1,88 @@ +import token.*; + +import java.io.*; +import java.util.LinkedList; +import java.util.List; +import java.util.NoSuchElementException; + +public class MorseCodeTokenizer { + + private Reader reader; + + private static class IllegalMorseCodeCharacterException extends Exception { + public IllegalMorseCodeCharacterException(char offendingCharacter) { + super(offendingCharacter + '(' + + String.format("%04x", (int) offendingCharacter) + ')' + + " is not a valid character. Only 4 characters are " + + "excepted: '.', '-', '|', and '\\n'"); + } + } + + public MorseCodeTokenizer(String filePath) { + try { + reader = new BufferedReader(new InputStreamReader( + new FileInputStream(filePath))); + + } + catch (FileNotFoundException fnfe) { + fnfe.printStackTrace(); + } + } + + public List tokenize() { + char c; + int charInt; + LinkedList tokenList = new LinkedList<>(); + StringBuilder currChar = new StringBuilder(); + boolean lastCharWasPipe = false; + try { + while ((charInt = reader.read()) != -1) { + c = (char)charInt; + System.out.println(c + "'" + currChar.toString() + "'" + tokenList); + switch (c) { + case '.': + case '-': + currChar.append(c); + break; + case '|': + if (lastCharWasPipe) { + Token lastToken = tokenList.getLast(); // peak + if (lastToken instanceof CharSeperatorToken) { + tokenList.removeLast(); + tokenList.add(new SpaceToken()); + } + else { + tokenList.add(new CharSeperatorToken()); + } + lastCharWasPipe = false; + } + else { + tokenList.add(new CharToken(currChar.toString())); + currChar = new StringBuilder(); + lastCharWasPipe = true; + } + break; + case '\n': + if (currChar.length() != 0) { + tokenList.add(new CharToken(currChar.toString())); + currChar = new StringBuilder(); + } + tokenList.add(new NewLineToken()); + break; + default: + throw new IllegalMorseCodeCharacterException(c); + } + } + tokenList.add(new CharToken(currChar.toString())); + } + catch (NoSuchElementException nse) { + new MorseCodeInterpreter.MorseCodeRuntimeException( + "Line cannot start with '||'").printStackTrace(); + } + catch (Exception e) { + e.printStackTrace(); + } + + return tokenList; + } +} diff --git a/src/token/CharSeperatorToken.java b/src/token/CharSeperatorToken.java new file mode 100644 index 0000000..7b7c418 --- /dev/null +++ b/src/token/CharSeperatorToken.java @@ -0,0 +1,9 @@ +package token; + +public class CharSeperatorToken extends Token { + + public String getValue() { + return "||"; + } + +} diff --git a/src/token/CharToken.java b/src/token/CharToken.java new file mode 100644 index 0000000..463cee9 --- /dev/null +++ b/src/token/CharToken.java @@ -0,0 +1,15 @@ +package token; + +public class CharToken extends Token { + + private String value; + + public CharToken(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + +} diff --git a/src/token/NewLineToken.java b/src/token/NewLineToken.java new file mode 100644 index 0000000..bef6086 --- /dev/null +++ b/src/token/NewLineToken.java @@ -0,0 +1,9 @@ +package token; + +public class NewLineToken extends Token { + + public String getValue() { + return "\n"; + } + +} diff --git a/src/token/SpaceToken.java b/src/token/SpaceToken.java new file mode 100644 index 0000000..393a0d9 --- /dev/null +++ b/src/token/SpaceToken.java @@ -0,0 +1,9 @@ +package token; + +public class SpaceToken extends Token { + + public String getValue() { + return "||||"; + } + +} diff --git a/src/token/Token.java b/src/token/Token.java new file mode 100644 index 0000000..33cee31 --- /dev/null +++ b/src/token/Token.java @@ -0,0 +1,6 @@ +package token; + +public abstract class Token { + + abstract String getValue(); +} From 6b9af09942dfcbe2e960c11a91bacc94c1fba40e Mon Sep 17 00:00:00 2001 From: Mark Nash Date: Sat, 23 Mar 2019 13:09:20 -0400 Subject: [PATCH 3/7] Fixed some testing bugs --- src/MorseCodeTokenizer.java | 84 +++++++++++++++---------------- src/token/CharSeperatorToken.java | 5 ++ src/token/CharToken.java | 5 ++ src/token/NewLineToken.java | 5 ++ src/token/SpaceToken.java | 5 ++ 5 files changed, 61 insertions(+), 43 deletions(-) diff --git a/src/MorseCodeTokenizer.java b/src/MorseCodeTokenizer.java index b652bd4..75fd642 100644 --- a/src/MorseCodeTokenizer.java +++ b/src/MorseCodeTokenizer.java @@ -7,12 +7,11 @@ public class MorseCodeTokenizer { - private Reader reader; + private BufferedReader reader; private static class IllegalMorseCodeCharacterException extends Exception { public IllegalMorseCodeCharacterException(char offendingCharacter) { - super(offendingCharacter + '(' + - String.format("%04x", (int) offendingCharacter) + ')' + + super(offendingCharacter + "(" + (int)offendingCharacter + ")" + " is not a valid character. Only 4 characters are " + "excepted: '.', '-', '|', and '\\n'"); } @@ -20,9 +19,7 @@ public IllegalMorseCodeCharacterException(char offendingCharacter) { public MorseCodeTokenizer(String filePath) { try { - reader = new BufferedReader(new InputStreamReader( - new FileInputStream(filePath))); - + reader = new BufferedReader(new FileReader(filePath)); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); @@ -30,50 +27,51 @@ public MorseCodeTokenizer(String filePath) { } public List tokenize() { - char c; - int charInt; LinkedList tokenList = new LinkedList<>(); StringBuilder currChar = new StringBuilder(); boolean lastCharWasPipe = false; try { - while ((charInt = reader.read()) != -1) { - c = (char)charInt; - System.out.println(c + "'" + currChar.toString() + "'" + tokenList); - switch (c) { - case '.': - case '-': - currChar.append(c); - break; - case '|': - if (lastCharWasPipe) { - Token lastToken = tokenList.getLast(); // peak - if (lastToken instanceof CharSeperatorToken) { - tokenList.removeLast(); - tokenList.add(new SpaceToken()); - } - else { - tokenList.add(new CharSeperatorToken()); + String line; + while ((line = reader.readLine()) != null) { + for (char c : line.toCharArray()) { + //System.out.println(c + " '" + currChar.toString() + "' " + tokenList); + switch (c) { + case '.': + case '-': + currChar.append(c); + break; + case '|': + if (lastCharWasPipe) { + Token lastToken = tokenList.getLast(); // peak + if (lastToken instanceof CharSeperatorToken) { + tokenList.removeLast(); + tokenList.add(new SpaceToken()); + } else { + tokenList.add(new CharSeperatorToken()); + } + lastCharWasPipe = false; + } else { + if (currChar.length() != 0) { + tokenList.add(new CharToken(currChar.toString())); + currChar = new StringBuilder(); + } + lastCharWasPipe = true; } - lastCharWasPipe = false; - } - else { - tokenList.add(new CharToken(currChar.toString())); - currChar = new StringBuilder(); - lastCharWasPipe = true; - } - break; - case '\n': - if (currChar.length() != 0) { - tokenList.add(new CharToken(currChar.toString())); - currChar = new StringBuilder(); - } - tokenList.add(new NewLineToken()); - break; - default: - throw new IllegalMorseCodeCharacterException(c); + break; + default: + throw new IllegalMorseCodeCharacterException(c); + } } + if (currChar.length() != 0) { + tokenList.add(new CharToken(currChar.toString())); + currChar = new StringBuilder(); + } + tokenList.add(new NewLineToken()); + } + if (tokenList.size() > 0 && + !(tokenList.getLast() instanceof NewLineToken)) { + tokenList.add(new CharToken(currChar.toString())); } - tokenList.add(new CharToken(currChar.toString())); } catch (NoSuchElementException nse) { new MorseCodeInterpreter.MorseCodeRuntimeException( diff --git a/src/token/CharSeperatorToken.java b/src/token/CharSeperatorToken.java index 7b7c418..0d1fa45 100644 --- a/src/token/CharSeperatorToken.java +++ b/src/token/CharSeperatorToken.java @@ -6,4 +6,9 @@ public String getValue() { return "||"; } + @Override + public String toString() { + return "CHAR_SEPERATOR"; + } + } diff --git a/src/token/CharToken.java b/src/token/CharToken.java index 463cee9..908925a 100644 --- a/src/token/CharToken.java +++ b/src/token/CharToken.java @@ -12,4 +12,9 @@ public String getValue() { return value; } + @Override + public String toString() { + return "'" + getValue() + "'"; + } + } diff --git a/src/token/NewLineToken.java b/src/token/NewLineToken.java index bef6086..f3a6c90 100644 --- a/src/token/NewLineToken.java +++ b/src/token/NewLineToken.java @@ -6,4 +6,9 @@ public String getValue() { return "\n"; } + @Override + public String toString() { + return "NEW_LINE"; + } + } diff --git a/src/token/SpaceToken.java b/src/token/SpaceToken.java index 393a0d9..c7f3abf 100644 --- a/src/token/SpaceToken.java +++ b/src/token/SpaceToken.java @@ -6,4 +6,9 @@ public String getValue() { return "||||"; } + @Override + public String toString() { + return "SPACE"; + } + } From e724bb4180f317742e3bf43e2b783e406af0e2b3 Mon Sep 17 00:00:00 2001 From: Mark Nash Date: Sat, 23 Mar 2019 13:25:18 -0400 Subject: [PATCH 4/7] Simplified character parsing --- src/MorseCodeParser.java | 12 +++++------- src/MorseCodeTokenizer.java | 6 +++--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/MorseCodeParser.java b/src/MorseCodeParser.java index 3a2a944..c67e870 100644 --- a/src/MorseCodeParser.java +++ b/src/MorseCodeParser.java @@ -11,11 +11,6 @@ public class MorseCodeParser { 'n', 'm', 's', 'u', 'r', 'w', 'd', 'k', 'g', 'o', 'h', 'v', 'f', '\0', 'l', '\0', 'p', 'j', 'b', 'x', 'c', 'y', 'z', 'q', '\0' }; - private static final int[][] TRANSITIONS = { - {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}, // . - {2, 4, 6, 8, 10, 12, 14, 16, 29, 29, 22, 24, 26, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}, // - - }; - public MorseCodeParser(List tokenList) { this.tokenList = (LinkedList)tokenList; } @@ -46,15 +41,18 @@ private Character parseCharacter(CharToken token) { for (char c : token.getValue().toCharArray()) { switch (c) { case '.': - currState = TRANSITIONS[0][currState]; + currState = currState * 2 + 1; break; case '-': - currState = TRANSITIONS[1][currState]; + currState = (currState + 1) * 2; break; default: // this has already been dealt with during tokenizing } } + if (currState > 29) { + currState = 29; + } char parsedChar = STATES_TO_CHARS[currState]; if (parsedChar == '\0') { diff --git a/src/MorseCodeTokenizer.java b/src/MorseCodeTokenizer.java index 75fd642..502d579 100644 --- a/src/MorseCodeTokenizer.java +++ b/src/MorseCodeTokenizer.java @@ -11,9 +11,9 @@ public class MorseCodeTokenizer { private static class IllegalMorseCodeCharacterException extends Exception { public IllegalMorseCodeCharacterException(char offendingCharacter) { - super(offendingCharacter + "(" + (int)offendingCharacter + ")" + - " is not a valid character. Only 4 characters are " + - "excepted: '.', '-', '|', and '\\n'"); + super("'" + offendingCharacter + "' " + "(" + + (int)offendingCharacter + ") is not a valid character. " + + "Only 4 characters are excepted: '.', '-', '|', and '\\n'"); } } From 07eeda8f6e24d5326a319ba23144a94091fc9ec3 Mon Sep 17 00:00:00 2001 From: Mark Nash Date: Sat, 23 Mar 2019 13:48:51 -0400 Subject: [PATCH 5/7] Refactored to throw exceptions to main method instead of continuing --- src/MorseCodeInterpreter.java | 12 +++-- src/MorseCodeTokenizer.java | 97 +++++++++++++++++------------------ 2 files changed, 55 insertions(+), 54 deletions(-) diff --git a/src/MorseCodeInterpreter.java b/src/MorseCodeInterpreter.java index 623199a..00c6e17 100644 --- a/src/MorseCodeInterpreter.java +++ b/src/MorseCodeInterpreter.java @@ -15,9 +15,15 @@ public static String interpret(String path) { throw new IllegalArgumentException( "Morse Code file path cannot be null."); } - List tokenList = new MorseCodeTokenizer(path).tokenize(); - String interpreted = new MorseCodeParser(tokenList).parse(); - return interpreted; + List tokenList; + try { + tokenList = new MorseCodeTokenizer(path).tokenize(); + } catch (Exception e) { + e.printStackTrace(); + return null; + } + String interpretedString = new MorseCodeParser(tokenList).parse(); + return interpretedString; } public static void main(String[] args) { diff --git a/src/MorseCodeTokenizer.java b/src/MorseCodeTokenizer.java index 502d579..10f6eeb 100644 --- a/src/MorseCodeTokenizer.java +++ b/src/MorseCodeTokenizer.java @@ -17,70 +17,65 @@ public IllegalMorseCodeCharacterException(char offendingCharacter) { } } - public MorseCodeTokenizer(String filePath) { - try { - reader = new BufferedReader(new FileReader(filePath)); - } - catch (FileNotFoundException fnfe) { - fnfe.printStackTrace(); - } + public MorseCodeTokenizer(String filePath) throws FileNotFoundException { + reader = new BufferedReader(new FileReader(filePath)); } - public List tokenize() { + public List tokenize() throws + NoSuchElementException, + MorseCodeInterpreter.MorseCodeRuntimeException, + IllegalMorseCodeCharacterException, + IOException { + LinkedList tokenList = new LinkedList<>(); StringBuilder currChar = new StringBuilder(); boolean lastCharWasPipe = false; - try { - String line; - while ((line = reader.readLine()) != null) { - for (char c : line.toCharArray()) { - //System.out.println(c + " '" + currChar.toString() + "' " + tokenList); - switch (c) { - case '.': - case '-': - currChar.append(c); - break; - case '|': - if (lastCharWasPipe) { - Token lastToken = tokenList.getLast(); // peak - if (lastToken instanceof CharSeperatorToken) { - tokenList.removeLast(); - tokenList.add(new SpaceToken()); - } else { - tokenList.add(new CharSeperatorToken()); - } - lastCharWasPipe = false; + String line; + while ((line = reader.readLine()) != null) { + for (char c : line.toCharArray()) { + switch (c) { + case '.': + case '-': + currChar.append(c); + break; + case '|': + if (lastCharWasPipe) { + if (tokenList.size() == 0) { + throw new MorseCodeInterpreter. + MorseCodeRuntimeException( + "Line cannot start with '||'"); + } else if (tokenList.getLast() instanceof + CharSeperatorToken) { + tokenList.removeLast(); + tokenList.add(new SpaceToken()); } else { - if (currChar.length() != 0) { - tokenList.add(new CharToken(currChar.toString())); - currChar = new StringBuilder(); - } - lastCharWasPipe = true; + tokenList.add(new CharSeperatorToken()); } - break; - default: - throw new IllegalMorseCodeCharacterException(c); - } - } - if (currChar.length() != 0) { - tokenList.add(new CharToken(currChar.toString())); - currChar = new StringBuilder(); + lastCharWasPipe = false; + } else { + if (currChar.length() != 0) { + tokenList.add(new CharToken( + currChar.toString())); + currChar = new StringBuilder(); + } + lastCharWasPipe = true; + } + break; + default: + throw new IllegalMorseCodeCharacterException(c); } - tokenList.add(new NewLineToken()); } - if (tokenList.size() > 0 && - !(tokenList.getLast() instanceof NewLineToken)) { + if (currChar.length() != 0) { tokenList.add(new CharToken(currChar.toString())); + currChar = new StringBuilder(); } - } - catch (NoSuchElementException nse) { - new MorseCodeInterpreter.MorseCodeRuntimeException( - "Line cannot start with '||'").printStackTrace(); - } - catch (Exception e) { - e.printStackTrace(); + tokenList.add(new NewLineToken()); } + if (tokenList.size() > 0 && + !(tokenList.getLast() instanceof NewLineToken)) { + tokenList.add(new CharToken(currChar.toString())); + } return tokenList; } } From 12e8ec416b78f6c6ffbfd3d11b0b7037b22283ea Mon Sep 17 00:00:00 2001 From: Mark Nash Date: Sat, 23 Mar 2019 22:04:05 -0400 Subject: [PATCH 6/7] Commented and tested --- src/MorseCodeInterpreter.java | 23 ++++-- src/MorseCodeParser.java | 42 ++++++++--- src/MorseCodeTokenizer.java | 75 ++++++++++++------- ...atorToken.java => CharSeparatorToken.java} | 8 +- src/token/CharToken.java | 10 ++- src/token/NewLineToken.java | 8 +- src/token/SpaceToken.java | 10 ++- src/token/Token.java | 10 ++- test/MorseCodeInterpreterTest.java | 14 ++++ test/alphabet.txt | 1 + test/fle | 1 - test/given.txt | 2 + test/stress.txt | 6 ++ 13 files changed, 160 insertions(+), 50 deletions(-) rename src/token/{CharSeperatorToken.java => CharSeparatorToken.java} (53%) create mode 100644 test/MorseCodeInterpreterTest.java create mode 100644 test/alphabet.txt delete mode 100644 test/fle create mode 100644 test/given.txt create mode 100644 test/stress.txt diff --git a/src/MorseCodeInterpreter.java b/src/MorseCodeInterpreter.java index 00c6e17..e9000df 100644 --- a/src/MorseCodeInterpreter.java +++ b/src/MorseCodeInterpreter.java @@ -2,14 +2,31 @@ import java.util.List; +/** + * @author Mark Nash + * + * The entry point to Morse Code interpreter. Call "interpret" from anywhere + * in your code. + */ public class MorseCodeInterpreter { - static class MorseCodeRuntimeException extends Exception { + /** + * A runtime exception that happened while tokenizing a string of + * Morse Code. The message of the exception further specifies the issue. + */ + static class MorseCodeRuntimeException extends RuntimeException { MorseCodeRuntimeException(String message) { super(message); } } + /** + * Reads the file, creates a list of tokens from the four token types, + * parses those tokens into English + * @param path A string of the file path to translate + * @return The interpreted string, null is returned if the file contained + * incorrect input + */ public static String interpret(String path) { if (path == null) { throw new IllegalArgumentException( @@ -26,8 +43,4 @@ public static String interpret(String path) { return interpretedString; } - public static void main(String[] args) { - System.out.println(MorseCodeInterpreter.interpret("test/fle")); - } - } diff --git a/src/MorseCodeParser.java b/src/MorseCodeParser.java index c67e870..2101baa 100644 --- a/src/MorseCodeParser.java +++ b/src/MorseCodeParser.java @@ -3,32 +3,47 @@ import java.util.LinkedList; import java.util.List; +/** + * @author Mark Nash + * + * Builds the string based on the tokens that have been read in + */ public class MorseCodeParser { + /** The tokens that have been read in. */ private LinkedList tokenList; + /** + * Each index is a state and the data is the character that that state + * corresponds to. + */ private static final char[] STATES_TO_CHARS = { '\0', 'e', 't', 'i', 'a', 'n', 'm', 's', 'u', 'r', 'w', 'd', 'k', 'g', 'o', 'h', 'v', 'f', '\0', 'l', '\0', 'p', 'j', 'b', 'x', 'c', 'y', 'z', 'q', '\0' }; + /** Store the token list. */ public MorseCodeParser(List tokenList) { this.tokenList = (LinkedList)tokenList; } + /** + * Create meaning out of the list of tokens + * @return The string that the list of tokens translates to + */ public String parse() { StringBuilder result = new StringBuilder(); for (Token token : tokenList) { if (token instanceof CharToken) { - result.append(parseCharacter((CharToken)token)); + result.append(decodeCharacter(token.getValue())); } else if (token instanceof SpaceToken) { - result.append(' '); + result.append(token.getValue()); } else if (token instanceof NewLineToken) { - result.append('\n'); + result.append(token.getValue()); } else { - // This is a CharSeperatorToken. The tokenizer has already + // This is a CharSeparatorToken. The tokenizer has already // separated the characters, so nothing needs to be done // with this } @@ -36,9 +51,19 @@ else if (token instanceof NewLineToken) { return result.toString(); } - private Character parseCharacter(CharToken token) { + /** + * Determine the character from running a DFA state machine. + * An example of one can be found here: + * http://sound.whsites.net/articles/morse-f5.gif + * Each state is numbered starting from 0 going + * top to bottom, right to left. + * + * @param tokenValue A string of '.' and or '-' + * @return an ascii character that the Morse Code string corresponds to + */ + private char decodeCharacter(String tokenValue) { int currState = 0; - for (char c : token.getValue().toCharArray()) { + for (char c : tokenValue.toCharArray()) { switch (c) { case '.': currState = currState * 2 + 1; @@ -56,9 +81,8 @@ private Character parseCharacter(CharToken token) { char parsedChar = STATES_TO_CHARS[currState]; if (parsedChar == '\0') { - new MorseCodeInterpreter.MorseCodeRuntimeException( - "Illegal character: '" + token.getValue() + "'" - ).printStackTrace(); + throw new MorseCodeInterpreter.MorseCodeRuntimeException( + "Illegal character: '" + tokenValue + "'"); } return parsedChar; } diff --git a/src/MorseCodeTokenizer.java b/src/MorseCodeTokenizer.java index 10f6eeb..a601cc3 100644 --- a/src/MorseCodeTokenizer.java +++ b/src/MorseCodeTokenizer.java @@ -3,12 +3,25 @@ import java.io.*; import java.util.LinkedList; import java.util.List; -import java.util.NoSuchElementException; +/** + * @author Mark Nash + * + * Creates tokens for the whole file. The tokens are: + * || -> a character seperator + * |||| -> a space + * '\n' -> a newline + * '[.-]+' -> a character + * + * The four legal characters that can show up in a file are: + * '.', '-', '|', '\n' + */ public class MorseCodeTokenizer { + /** The file reader. */ private BufferedReader reader; + /** An illegal character has been found while reading the file. */ private static class IllegalMorseCodeCharacterException extends Exception { public IllegalMorseCodeCharacterException(char offendingCharacter) { super("'" + offendingCharacter + "' " + "(" + @@ -17,48 +30,57 @@ public IllegalMorseCodeCharacterException(char offendingCharacter) { } } + /** + * Opens the file and creates a reader object + * @param filePath The path of the file to open + * @throws FileNotFoundException if the file does not exist. + */ public MorseCodeTokenizer(String filePath) throws FileNotFoundException { reader = new BufferedReader(new FileReader(filePath)); } + /** + * Reads the characters in the file and creates tokens out of them. + * @return A List of tokens from the reading of the file + * @throws MorseCodeInterpreter.MorseCodeRuntimeException A '|' character + * was read without another '|' with it + * @throws IllegalMorseCodeCharacterException An unaccepted character of the + * four characters was read + * @throws IOException Error closing the file or reading a line + */ public List tokenize() throws - NoSuchElementException, MorseCodeInterpreter.MorseCodeRuntimeException, IllegalMorseCodeCharacterException, IOException { LinkedList tokenList = new LinkedList<>(); StringBuilder currChar = new StringBuilder(); - boolean lastCharWasPipe = false; String line; while ((line = reader.readLine()) != null) { - for (char c : line.toCharArray()) { + char[] charsOfLine = line.toCharArray(); + for (int i = 0; i < charsOfLine.length; i++) { + char c = charsOfLine[i]; switch (c) { case '.': case '-': currChar.append(c); break; case '|': - if (lastCharWasPipe) { - if (tokenList.size() == 0) { - throw new MorseCodeInterpreter. - MorseCodeRuntimeException( - "Line cannot start with '||'"); - } else if (tokenList.getLast() instanceof - CharSeperatorToken) { - tokenList.removeLast(); - tokenList.add(new SpaceToken()); - } else { - tokenList.add(new CharSeperatorToken()); - } - lastCharWasPipe = false; + if (i++ == charsOfLine.length || + charsOfLine[i] != '|') { + throw new MorseCodeInterpreter. + MorseCodeRuntimeException( + "'|' is not a valid token"); + } + if (tokenList.size() > 0 && currChar.length() == 0 && + tokenList.getLast() + instanceof CharSeparatorToken) { + tokenList.removeLast(); + tokenList.add(new SpaceToken()); } else { - if (currChar.length() != 0) { - tokenList.add(new CharToken( - currChar.toString())); - currChar = new StringBuilder(); - } - lastCharWasPipe = true; + tokenList.add(new CharToken(currChar.toString())); + currChar = new StringBuilder(); + tokenList.add(new CharSeparatorToken()); } break; default: @@ -72,10 +94,9 @@ public List tokenize() throws tokenList.add(new NewLineToken()); } - if (tokenList.size() > 0 && - !(tokenList.getLast() instanceof NewLineToken)) { - tokenList.add(new CharToken(currChar.toString())); - } + tokenList.removeLast(); // the guaranteed extraneous newline + + reader.close(); return tokenList; } } diff --git a/src/token/CharSeperatorToken.java b/src/token/CharSeparatorToken.java similarity index 53% rename from src/token/CharSeperatorToken.java rename to src/token/CharSeparatorToken.java index 0d1fa45..82bd4f1 100644 --- a/src/token/CharSeperatorToken.java +++ b/src/token/CharSeparatorToken.java @@ -1,7 +1,13 @@ package token; -public class CharSeperatorToken extends Token { +/** + * @author Mark Nash + * + * Separates characters in a Morse Code file. + */ +public class CharSeparatorToken implements Token { + @Override public String getValue() { return "||"; } diff --git a/src/token/CharToken.java b/src/token/CharToken.java index 908925a..11864b8 100644 --- a/src/token/CharToken.java +++ b/src/token/CharToken.java @@ -1,6 +1,11 @@ package token; -public class CharToken extends Token { +/** + * @author Mark Nash + * + * Holds a string of '.' and/or '-'. + */ +public class CharToken implements Token { private String value; @@ -8,13 +13,14 @@ public CharToken(String value) { this.value = value; } + @Override public String getValue() { return value; } @Override public String toString() { - return "'" + getValue() + "'"; + return "'" + value + "'"; } } diff --git a/src/token/NewLineToken.java b/src/token/NewLineToken.java index f3a6c90..26d97b5 100644 --- a/src/token/NewLineToken.java +++ b/src/token/NewLineToken.java @@ -1,7 +1,13 @@ package token; -public class NewLineToken extends Token { +/** + * @author Mark Nash + * + * A newline token. + */ +public class NewLineToken implements Token { + @Override public String getValue() { return "\n"; } diff --git a/src/token/SpaceToken.java b/src/token/SpaceToken.java index c7f3abf..2738867 100644 --- a/src/token/SpaceToken.java +++ b/src/token/SpaceToken.java @@ -1,9 +1,15 @@ package token; -public class SpaceToken extends Token { +/** + * @author Mark Nash + * + * A token that represents a space between characters. + */ +public class SpaceToken implements Token { + @Override public String getValue() { - return "||||"; + return " "; } @Override diff --git a/src/token/Token.java b/src/token/Token.java index 33cee31..da352c1 100644 --- a/src/token/Token.java +++ b/src/token/Token.java @@ -1,6 +1,12 @@ package token; -public abstract class Token { +/** + * @author Mark Nash + * + * A sort of token that has a semantic meaning. The data in an instance of one + * of these tokens is the meaning, and the name is the token type. + */ +public interface Token { - abstract String getValue(); + String getValue(); } diff --git a/test/MorseCodeInterpreterTest.java b/test/MorseCodeInterpreterTest.java new file mode 100644 index 0000000..9758a67 --- /dev/null +++ b/test/MorseCodeInterpreterTest.java @@ -0,0 +1,14 @@ +public class MorseCodeInterpreterTest { + + public static void main(String[] args) { + for (String path : args) { + test(path); + } + } + + private static void test(String path) { + System.out.println(path + " {\n" + + MorseCodeInterpreter.interpret(path) + + "\n}\n"); + } +} diff --git a/test/alphabet.txt b/test/alphabet.txt new file mode 100644 index 0000000..d9f7cfa --- /dev/null +++ b/test/alphabet.txt @@ -0,0 +1 @@ +.-||-...||-.-.||-..||.||..-.||--.||....||..||.---||-.-||.-..||--||-.||---||.--.||--.-||.-.||...||-||..-||...-||.--||-..-||-.--||--.. diff --git a/test/fle b/test/fle deleted file mode 100644 index 6d98966..0000000 --- a/test/fle +++ /dev/null @@ -1 +0,0 @@ -.||..||...||....|| \ No newline at end of file diff --git a/test/given.txt b/test/given.txt new file mode 100644 index 0000000..ec5ec69 --- /dev/null +++ b/test/given.txt @@ -0,0 +1,2 @@ +-..||---||--. +....||.||.-..||.-..||---||||.--||---||.-.||.-..||-.. \ No newline at end of file diff --git a/test/stress.txt b/test/stress.txt new file mode 100644 index 0000000..e5a2b65 --- /dev/null +++ b/test/stress.txt @@ -0,0 +1,6 @@ + + +.. + +.||.--.||||.-.||-.-||--||-..||.--.||||- +..||||....||---||.--.||.||||-.--||---||..-||||.-..||..||-.-||.||||--||-.--||||-.-.||---||-..||. \ No newline at end of file From 9c8ebc2169e8ee649859e3c26408d7ee8c95c709 Mon Sep 17 00:00:00 2001 From: Mark Nash Date: Sat, 23 Mar 2019 22:53:31 -0400 Subject: [PATCH 7/7] Fixed new line issue and chained space issue --- src/MorseCodeTokenizer.java | 77 ++++++++++++++++++++----------------- test/alphabet.txt | 2 +- test/spacing.txt | 4 ++ test/stress.txt | 4 +- 4 files changed, 48 insertions(+), 39 deletions(-) create mode 100644 test/spacing.txt diff --git a/src/MorseCodeTokenizer.java b/src/MorseCodeTokenizer.java index a601cc3..b556773 100644 --- a/src/MorseCodeTokenizer.java +++ b/src/MorseCodeTokenizer.java @@ -46,7 +46,7 @@ public MorseCodeTokenizer(String filePath) throws FileNotFoundException { * was read without another '|' with it * @throws IllegalMorseCodeCharacterException An unaccepted character of the * four characters was read - * @throws IOException Error closing the file or reading a line + * @throws IOException Error closing the file or reading a character */ public List tokenize() throws MorseCodeInterpreter.MorseCodeRuntimeException, @@ -55,48 +55,53 @@ public List tokenize() throws LinkedList tokenList = new LinkedList<>(); StringBuilder currChar = new StringBuilder(); - String line; - while ((line = reader.readLine()) != null) { - char[] charsOfLine = line.toCharArray(); - for (int i = 0; i < charsOfLine.length; i++) { - char c = charsOfLine[i]; - switch (c) { - case '.': - case '-': - currChar.append(c); - break; - case '|': - if (i++ == charsOfLine.length || - charsOfLine[i] != '|') { - throw new MorseCodeInterpreter. - MorseCodeRuntimeException( - "'|' is not a valid token"); - } - if (tokenList.size() > 0 && currChar.length() == 0 && - tokenList.getLast() - instanceof CharSeparatorToken) { - tokenList.removeLast(); - tokenList.add(new SpaceToken()); - } else { + int intChar; + char c; + while ((intChar = reader.read()) != -1) { + c = (char)intChar; + switch (c) { + case '.': + case '-': + currChar.append(c); + break; + case '|': + intChar = reader.read(); + c = (char)intChar; + if (intChar == -1 || c != '|') { + throw new MorseCodeInterpreter.MorseCodeRuntimeException + ("'|' is not a valid token"); + } + if (tokenList.size() > 0 && currChar.length() == 0 && + tokenList.getLast() instanceof CharSeparatorToken) { + tokenList.removeLast(); + tokenList.add(new SpaceToken()); + } else { + if (currChar.length() != 0) { tokenList.add(new CharToken(currChar.toString())); currChar = new StringBuilder(); - tokenList.add(new CharSeparatorToken()); } - break; - default: - throw new IllegalMorseCodeCharacterException(c); - } - } - if (currChar.length() != 0) { - tokenList.add(new CharToken(currChar.toString())); - currChar = new StringBuilder(); + tokenList.add(new CharSeparatorToken()); + } + break; + case '\r': + break; + case '\n': + if (currChar.length() != 0) { + tokenList.add(new CharToken(currChar.toString())); + currChar = new StringBuilder(); + } + tokenList.add(new NewLineToken()); + break; + default: + throw new IllegalMorseCodeCharacterException(c); } - tokenList.add(new NewLineToken()); } - - tokenList.removeLast(); // the guaranteed extraneous newline + if (currChar.length() != 0) { + tokenList.add(new CharToken(currChar.toString())); + } reader.close(); return tokenList; } + } diff --git a/test/alphabet.txt b/test/alphabet.txt index d9f7cfa..fbecea3 100644 --- a/test/alphabet.txt +++ b/test/alphabet.txt @@ -1 +1 @@ -.-||-...||-.-.||-..||.||..-.||--.||....||..||.---||-.-||.-..||--||-.||---||.--.||--.-||.-.||...||-||..-||...-||.--||-..-||-.--||--.. +.-||-...||-.-.||-..||.||..-.||--.||....||..||.---||-.-||.-..||--||-.||---||.--.||--.-||.-.||...||-||..-||...-||.--||-..-||-.--||--.. \ No newline at end of file diff --git a/test/spacing.txt b/test/spacing.txt new file mode 100644 index 0000000..581ca9d --- /dev/null +++ b/test/spacing.txt @@ -0,0 +1,4 @@ + +.-||-. +.-||||-. +.-||||||||-. diff --git a/test/stress.txt b/test/stress.txt index e5a2b65..bf60799 100644 --- a/test/stress.txt +++ b/test/stress.txt @@ -2,5 +2,5 @@ .. -.||.--.||||.-.||-.-||--||-..||.--.||||- -..||||....||---||.--.||.||||-.--||---||..-||||.-..||..||-.-||.||||--||-.--||||-.-.||---||-..||. \ No newline at end of file +.||.--.||||.-.||-.-||--||-..||.--.||||||||- +..||||....||---||.--.||.||||-.--||---||..-||||.-..||..||-.-||.||||--||-.--||||-.-.||---||-..||.