parmentf · parmentf · Dec 2, 2018 · Dec 2, 2018 · Dec 2, 2018 · Dec 2, 2018
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 *.sublime-*
 node_modules
+lib
diff --git a/.npmignore b/.npmignore
@@ -0,0 +1,2 @@
+*.sublime-*
+node_modules
diff --git a/.travis.yml b/.travis.yml
@@ -1,4 +1,5 @@
 language: node_js
 node_js:
     - 6
-    - 8
+    - 8
+    - 10
diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@ npm install sentence-tokenizer
 Require the module:
 
 ```js
-var Tokenizer = require('sentence-tokenizer');
+var Tokenizer = require('sentence-tokenizer').Tokenizer;
 ```
 
 Instanciate a tokenizer, with the name of the utterer:
@@ -27,13 +27,13 @@ var tokenizer = new Tokenizer('Chuck');
 Set the entry to work on:
 
 ```js
-tokenizer.setEntry("This is an entry. Possibly composed of several sentences.");
+tokenizer.entry = "This is an entry. Possibly composed of several sentences.";
 ```
 
 Get the sentences:
 
 ```js
-console.log(tokenizer.getSentences());
+console.log(tokenizer.sentences);
 ```
 
 Which should produce:

diff --git a/lib/tokenizer.d.ts b/lib/tokenizer.d.ts
diff --git a/lib/tokenizer.js b/lib/tokenizer.js
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -6,7 +6,12 @@
   "scripts": {
     "lint": "eslint lib",
     "test": "mocha",
-    "test-w": "mocha -w"
+    "test:w": "mocha -w",
+    "build": "tsc",
+    "build:w":  "tsc --watch",
+    "pretest": "npm run build",
+    "prepublish": "npm run build",
+    "postversion": "git push && git push --tags"
   },
   "homepage": "http://github.com/parmentf/node-sentence-tokenizer",
   "repository": {
@@ -24,7 +29,9 @@
     "debug": "4.1.0"
   },
   "devDependencies": {
+    "@types/node": "10.12.11",
     "eslint": "5.9.0",
-    "mocha": "5.2.0"
+    "mocha": "5.2.0",
+    "typescript": "3.2.1"
   }
 }
diff --git a/src/tokenizer.ts b/src/tokenizer.ts
@@ -0,0 +1,61 @@
+const debug = require('debug')('tokenizer');
+
+const compact = (str: string): string => str.trim().replace('  ', ' ');
+
+export class Tokenizer {
+    username: string
+    botname: string
+    protected _entry: string
+    protected _sentences: string[]
+
+    constructor(username: string = 'Guy', botname: string = 'ECTOR') {
+        this.username = username
+        this.botname = botname
+        this._entry = '';
+        this._sentences = [];
+    }
+
+    set entry(value: string) {
+        this._entry = compact(value)
+        this._sentences = []
+    }
+
+    // Split the entry into sentences.
+    get sentences(): string[] {
+        // this.sentences = this.entry.split(/[\.!]\s/);
+        if (!this._entry) return [];
+        const words: string[] = this._entry.split(' ');
+        const endingWords = words.filter((w: string): boolean =>
+            w.endsWith('.') || w.endsWith('!') || w.endsWith('?')
+        );
+
+        const botnameRegExp = new RegExp("\\W?" + this.botname.normalize() + "\\W?");
+        const usernameRegExp = new RegExp("\\W?" + this.username.normalize() + "\\W?");
+        this._sentences = [];
+        let lastSentence: string = words[0];
+        words.reduce((prev, cur: string): string => {
+            const curNormalized: string = cur.normalize();
+            let curReplaced: string = cur;
+            if (curNormalized.search(botnameRegExp) !== -1) {
+                curReplaced = cur.replace(this.botname,"{yourname}");
+            }
+            else if (curNormalized.search(usernameRegExp) !== -1) {
+                curReplaced = cur.replace(this.username,"{myname}");
+            }
+
+            if (endingWords.indexOf(prev) !== -1) {
+                this._sentences.push(compact(lastSentence));
+                lastSentence = "";
+            }
+            lastSentence = lastSentence + " " + curReplaced;
+            return cur;
+        });
+        this._sentences.push(compact(lastSentence));
+        return this._sentences;        
+    }
+
+    // Get the tokens of one sentence
+    getTokens(sentenceIndex: number = 0): string[] {
+        return this._sentences[sentenceIndex].split(' ');
+    }
+}
diff --git a/test/test.js b/test/test.js
@@ -5,7 +5,7 @@
 var debug = require('debug')('tokenizer:test');
 var assert = require('assert');
 
-var Tokenizer = require('../lib/tokenizer');
+var Tokenizer = require('../lib/tokenizer').Tokenizer;
 
 describe('Tokenizer creations', function () {
   describe('No botname', function () {
@@ -35,8 +35,8 @@ describe('Sentences token', function () {
     " N'est-ce pas ? " +
     " Et avec une URL en plus, c'est mieux: http://google.com." +
     " Mais il nous manque encore un mail: gg@gggg.kk";
-    tokenizer.setEntry(entry);
-    var sentences = tokenizer.getSentences();
+    tokenizer.entry = entry;
+    var sentences = tokenizer.sentences;
 
     it("should get 4 sentences", function () {
       assert.equal(sentences.length, 4);
@@ -71,8 +71,8 @@ describe('Sentences token', function () {
   describe('Two sentences', function () {
     var entry = "Salut." +
     " Hello.";
-    tokenizer.setEntry(entry);
-    var sentences = tokenizer.getSentences();
+    tokenizer.entry = entry;
+    var sentences = tokenizer.sentences;
 
     it("should get 2 sentences", function () {
       assert.equal(sentences.length, 2);
@@ -83,8 +83,8 @@ describe('Sentences token', function () {
     debug('Only one sentence!');
     var entry = "Hello.";
     var tokenizer2 = new Tokenizer('François');
-    tokenizer2.setEntry(entry);
-    var sentences = tokenizer2.getSentences();
+    tokenizer2.entry = entry;
+    var sentences = tokenizer2.sentences;
 
     it('should get one sentence', function () {
       assert.equal(sentences.length, 1);
@@ -97,8 +97,8 @@ describe('Sentences token', function () {
 
   describe('Empty sentence', function () {
     var entry = "    ";
-    tokenizer.setEntry(entry);
-    var sentences = tokenizer.getSentences();
+    tokenizer.entry = entry;
+    var sentences = tokenizer.sentences;
 
     it('should handle gracefully', function () {
       assert.equal(sentences.length, 0);
@@ -107,8 +107,8 @@ describe('Sentences token', function () {
 
   describe('False end', function () {
     var entry = "Bon sang ce n'est pas ça. Bon sang";
-    tokenizer.setEntry(entry);
-    var sentences = tokenizer.getSentences();
+    tokenizer.entry = entry;
+    var sentences = tokenizer.sentences;
 
     it('should produce only 2 sentences', function () {
       assert.equal(sentences.length, 2);
@@ -117,8 +117,8 @@ describe('Sentences token', function () {
 
   describe('Names', function () {
     var entry = "Salut ECTOR. Je m'appelle François.";
-    tokenizer.setEntry(entry);
-    var sentences = tokenizer.getSentences();
+    tokenizer.entry = entry;
+    var sentences = tokenizer.sentences;
 
     it('botname replaced', function () {
       assert.equal(sentences[0], 'Salut {yourname}.');
@@ -136,8 +136,8 @@ describe('Word tokens', function() {
   " Je suis fort aise que tu m'écoutes." +
   " Très!!!" +
   " Appelle-moi François, si tu veux...";
-  tokenizer.setEntry(entry);
-  tokenizer.getSentences();
+  tokenizer.entry = entry;
+  var sentences = tokenizer.sentences; // eslint-disable-line no-unused-vars
 
   describe('First sentence', function () {
     var tokens = tokenizer.getTokens(0);

diff --git a/tsconfig.json b/tsconfig.json
@@ -0,0 +1,15 @@
+{
+    "compileOnSave": true,
+    "compilerOptions": {
+        "declaration": true,
+        "module": "commonjs",
+        "noFallthroughCasesInSwitch": true,
+        "noImplicitReturns": true,
+        "outDir": "./lib",
+        "sourceMap": true,
+        "target": "es2018",
+    },
+    "include": [
+        "./src/**/*"
+    ]
+}