From 0f3056e7c600d4dfcca04e4fad75713e14e979e7 Mon Sep 17 00:00:00 2001
From: Yuelin Zhang <zyuelin@amazon.com>
Date: Sat, 4 Aug 2018 19:27:07 -0700
Subject: [PATCH 1/5] Migrate PR 11935

Migrate PR 11935 here.
Revise code based on code review:
1. set constructor variabls for each class
2. add doc strings
3. improve coding style
3. add README
---
 .gitignore                                    |   7 +
 .../PredictLabels/.idea/PredictLabels.iml     |  11 +
 mxnet-bot/PredictLabels/.idea/misc.xml        |   7 +
 mxnet-bot/PredictLabels/.idea/modules.xml     |   8 +
 mxnet-bot/PredictLabels/.idea/workspace.xml   | 212 ++++++++++++++++++
 mxnet-bot/PredictLabels/DataFetcher.py        | 135 +++++++++++
 mxnet-bot/PredictLabels/Dockerfile            |  26 +++
 mxnet-bot/PredictLabels/Dockerrun.aws.json    |   7 +
 mxnet-bot/PredictLabels/Predictor.py          | 132 +++++++++++
 mxnet-bot/PredictLabels/README.md             |  25 +++
 mxnet-bot/PredictLabels/SentenceParser.py     | 134 +++++++++++
 mxnet-bot/PredictLabels/Trainer.py            |  87 +++++++
 mxnet-bot/PredictLabels/application.py        | 119 ++++++++++
 mxnet-bot/PredictLabels/cron.yaml             |  21 ++
 mxnet-bot/PredictLabels/plot_piechart.py      |  47 ++++
 mxnet-bot/PredictLabels/requirements.txt      |  33 +++
 mxnet-bot/PredictLabels/stopwords.txt         |   1 +
 mxnet-bot/PredictLabels/test_datafetcher.py   | 116 ++++++++++
 mxnet-bot/PredictLabels/test_predictor.py     |  95 ++++++++
 mxnet-bot/PredictLabels/test_sentenceparse.py |  66 ++++++
 mxnet-bot/PredictLabels/test_trainer.py       |  66 ++++++
 21 files changed, 1355 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 mxnet-bot/PredictLabels/.idea/PredictLabels.iml
 create mode 100644 mxnet-bot/PredictLabels/.idea/misc.xml
 create mode 100644 mxnet-bot/PredictLabels/.idea/modules.xml
 create mode 100644 mxnet-bot/PredictLabels/.idea/workspace.xml
 create mode 100644 mxnet-bot/PredictLabels/DataFetcher.py
 create mode 100644 mxnet-bot/PredictLabels/Dockerfile
 create mode 100755 mxnet-bot/PredictLabels/Dockerrun.aws.json
 create mode 100644 mxnet-bot/PredictLabels/Predictor.py
 create mode 100644 mxnet-bot/PredictLabels/README.md
 create mode 100644 mxnet-bot/PredictLabels/SentenceParser.py
 create mode 100644 mxnet-bot/PredictLabels/Trainer.py
 create mode 100644 mxnet-bot/PredictLabels/application.py
 create mode 100644 mxnet-bot/PredictLabels/cron.yaml
 create mode 100644 mxnet-bot/PredictLabels/plot_piechart.py
 create mode 100644 mxnet-bot/PredictLabels/requirements.txt
 create mode 100644 mxnet-bot/PredictLabels/stopwords.txt
 create mode 100644 mxnet-bot/PredictLabels/test_datafetcher.py
 create mode 100644 mxnet-bot/PredictLabels/test_predictor.py
 create mode 100644 mxnet-bot/PredictLabels/test_sentenceparse.py
 create mode 100644 mxnet-bot/PredictLabels/test_trainer.py
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6d9b737
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+# Pycharm Project
+.idea/
+
+# OSX Stuff
+.DS_Store
+.DS_Store
+mxnet-bot/.DS_Store
diff --git a/mxnet-bot/PredictLabels/.idea/PredictLabels.iml b/mxnet-bot/PredictLabels/.idea/PredictLabels.iml
new file mode 100644
index 0000000..6711606
--- /dev/null
+++ b/mxnet-bot/PredictLabels/.idea/PredictLabels.iml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/.idea/misc.xml b/mxnet-bot/PredictLabels/.idea/misc.xml
new file mode 100644
index 0000000..7a5c067
--- /dev/null
+++ b/mxnet-bot/PredictLabels/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
+  <component name="PyCharmProfessionalAdvertiser">
+    <option name="shown" value="true" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/.idea/modules.xml b/mxnet-bot/PredictLabels/.idea/modules.xml
new file mode 100644
index 0000000..18fdcdd
--- /dev/null
+++ b/mxnet-bot/PredictLabels/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/PredictLabels.iml" filepath="$PROJECT_DIR$/.idea/PredictLabels.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/.idea/workspace.xml b/mxnet-bot/PredictLabels/.idea/workspace.xml
new file mode 100644
index 0000000..15f95e5
--- /dev/null
+++ b/mxnet-bot/PredictLabels/.idea/workspace.xml
@@ -0,0 +1,212 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="028f7063-2c5d-4a6b-be24-591f81619277" name="Default" comment="" />
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
+    <option name="TRACKING_ENABLED" value="true" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileEditorManager">
+    <leaf>
+      <file leaf-file-name="application.py" pinned="false" current-in-tab="true">
+        <entry file="file://$PROJECT_DIR$/application.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="243">
+              <caret line="28" column="14" lean-forward="true" selection-start-line="28" selection-start-column="14" selection-end-line="28" selection-end-column="14" />
+              <folding>
+                <element signature="e#920#972#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+    </leaf>
+  </component>
+  <component name="IdeDocumentHistory">
+    <option name="CHANGED_PATHS">
+      <list>
+        <option value="$PROJECT_DIR$/DataFetcher.py" />
+        <option value="$PROJECT_DIR$/plot_piechart.py" />
+        <option value="$PROJECT_DIR$/Predictor.py" />
+        <option value="$PROJECT_DIR$/application.py" />
+        <option value="$PROJECT_DIR$/test_trainer.py" />
+        <option value="$PROJECT_DIR$/test_datafetcher.py" />
+        <option value="$PROJECT_DIR$/test_predictor.py" />
+        <option value="$PROJECT_DIR$/test_sentenceparse.py" />
+        <option value="$PROJECT_DIR$/Trainer.py" />
+        <option value="$PROJECT_DIR$/SentenceParser.py" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectFrameBounds">
+    <option name="y" value="23" />
+    <option name="width" value="1024" />
+    <option name="height" value="548" />
+  </component>
+  <component name="ProjectView">
+    <navigator proportions="" version="1">
+      <foldersAlwaysOnTop value="true" />
+    </navigator>
+    <panes>
+      <pane id="ProjectPane">
+        <subPane>
+          <expand>
+            <path>
+              <item name="PredictLabels" type="b2602c69:ProjectViewProjectNode" />
+              <item name="PredictLabels" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="PredictLabels" type="b2602c69:ProjectViewProjectNode" />
+              <item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
+            </path>
+          </expand>
+          <select />
+        </subPane>
+      </pane>
+      <pane id="Course" />
+      <pane id="Scope" />
+    </panes>
+  </component>
+  <component name="PropertiesComponent">
+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
+    <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
+  </component>
+  <component name="RunDashboard">
+    <option name="ruleStates">
+      <list>
+        <RuleState>
+          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
+        </RuleState>
+        <RuleState>
+          <option name="name" value="StatusDashboardGroupingRule" />
+        </RuleState>
+      </list>
+    </option>
+  </component>
+  <component name="SvnConfiguration">
+    <configuration />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="028f7063-2c5d-4a6b-be24-591f81619277" name="Default" comment="" />
+      <created>1533604709424</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1533604709424</updated>
+    </task>
+    <servers />
+  </component>
+  <component name="ToolWindowManager">
+    <frame x="0" y="23" width="1024" height="548" extended-state="0" />
+    <editor active="true" />
+    <layout>
+      <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.25" />
+      <window_info anchor="bottom" id="TODO" order="6" />
+      <window_info anchor="bottom" id="Event Log" side_tool="true" />
+      <window_info anchor="bottom" id="Run" order="2" />
+      <window_info anchor="bottom" id="Version Control" show_stripe_button="false" />
+      <window_info anchor="bottom" id="Python Console" />
+      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
+      <window_info anchor="bottom" id="Terminal" />
+      <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
+      <window_info id="Favorites" side_tool="true" />
+      <window_info anchor="bottom" id="Find" order="1" />
+      <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
+      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
+      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
+      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
+      <window_info anchor="bottom" id="Message" order="0" />
+      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
+    </layout>
+  </component>
+  <component name="VcsContentAnnotationSettings">
+    <option name="myLimit" value="2678400000" />
+  </component>
+  <component name="editorHistoryManager">
+    <entry file="file://$PROJECT_DIR$/test_datafetcher.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="203">
+          <caret line="25" column="50" lean-forward="true" selection-start-line="25" selection-start-column="50" selection-end-line="25" selection-end-column="50" />
+          <folding>
+            <element signature="e#786#801#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/Trainer.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="230">
+          <caret line="21" column="27" selection-start-line="21" selection-start-column="27" selection-end-line="21" selection-end-column="27" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/test_trainer.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="194">
+          <caret line="21" column="17" selection-start-line="21" selection-start-column="17" selection-end-line="21" selection-end-column="17" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/test_predictor.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="231">
+          <caret line="22" column="15" selection-start-line="22" selection-start-column="15" selection-end-line="22" selection-end-column="15" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/DataFetcher.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="1760">
+          <caret line="123" selection-start-line="123" selection-end-line="123" />
+          <folding>
+            <element signature="e#849#886#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/test_sentenceparse.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="311">
+          <caret line="22" column="19" lean-forward="true" selection-start-line="22" selection-start-column="19" selection-end-line="22" selection-end-column="19" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/Predictor.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="133">
+          <caret line="17" column="46" lean-forward="true" selection-start-line="17" selection-start-column="46" selection-end-line="17" selection-end-column="46" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/plot_piechart.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="630">
+          <caret line="42" column="38" selection-start-line="42" selection-start-column="38" selection-end-line="42" selection-end-column="38" />
+          <folding>
+            <element signature="e#786#801#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/SentenceParser.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="115">
+          <caret line="18" column="29" selection-start-line="18" selection-start-column="29" selection-end-line="18" selection-end-column="29" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/application.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="243">
+          <caret line="28" column="14" lean-forward="true" selection-start-line="28" selection-start-column="14" selection-end-line="28" selection-end-column="14" />
+          <folding>
+            <element signature="e#920#972#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+  </component>
+</project>
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/DataFetcher.py b/mxnet-bot/PredictLabels/DataFetcher.py
new file mode 100644
index 0000000..cef3ede
--- /dev/null
+++ b/mxnet-bot/PredictLabels/DataFetcher.py
@@ -0,0 +1,135 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This scipt is served to fetch GitHub issues into a json file
+from __future__ import print_function
+import os
+import requests
+import json
+import re
+import pandas as pd
+import logging
+
+
+class DataFetcher:
+
+    def __init__(self,
+                 github_user=os.environ.get("github_user"),
+                 github_oauth_token=os.environ.get("github_oauth_token"),
+                 repo=os.environ.get("repo")):
+        """
+        This DataFetcher serves to fetch issues data
+        Args:
+            github_user(str): the github id. ie: "CathyZhang0822"
+            github_oauth_token(str): the github oauth token, paired with github_user to realize authorization
+            repo(str): the repo name
+        """
+        self.github_user = github_user
+        self.github_oauth_token = github_oauth_token
+        self.repo = repo
+        self.auth = (self.github_user, self.github_oauth_token)
+        self.json_data = None
+
+    def cleanstr(self, raw_string, sub_string):
+        """
+        This method is to convert all non-alphanumeric charaters from 
+        raw_string into substring
+        """
+        clean = re.sub("[^0-9a-zA-Z]", sub_string, raw_string)
+        return clean.lower()
+
+    def count_pages(self, state):
+        """
+        This method is to count how many pages of issues/labels in total
+        state can be "open"/"closed"/"all"
+        """
+        url = 'https://api.github.com/repos/%s/issues' % self.repo
+        response = requests.get(url, {'state': state},
+                                auth=self.auth)
+        assert response.status_code == 200, "Authorization failed"
+        if "link" not in response.headers:
+            return 1
+        return int(self.cleanstr(response.headers['link'], " ").split()[-3])
+    
+    def fetch_issues(self, issue_nums):
+        """
+        This method is to fetch issues data
+        issue_num: a list of issue ids
+        return issues' data in pandas dataframe format
+        """
+        assert issue_nums != [], "Empty Input!"
+        logging.info("Reading issues:{}".format(", ".join([str(num) for num in issue_nums])))
+        data = []
+        for number in issue_nums:
+            url = 'https://api.github.com/repos/' + self.repo + '/issues/' + str(number)
+            response = requests.get(url, auth=self.auth)
+            item = response.json()
+            assert 'title' in item, "{} issues doesn't exist!".format(str(number))
+            data += [{'id': str(number), 'title': item['title'], 'body': item['body']}]
+        return pd.DataFrame(data)
+
+    def data2json(self, state, labels=None, other_labels=False):
+        """
+        This method is to store issues' data into a json file, return json file's name
+        state can be either "open"/"closed"/"all"
+        labels is a list of target labels we are interested in
+        other_labels can be either "True"/"False"
+        """
+        assert state in set(['all', 'open', 'closed']), "Invalid State!"
+        logging.info("Reading {} issues..".format(state))
+        pages = self.count_pages(state)
+        data = []
+        for x in range(1, pages+1):
+            url = 'https://api.github.com/repos/' + self.repo + '/issues?page=' + str(x) \
+                  + '&per_page=30'.format(repo=self.repo)
+            response = requests.get(url,
+                                    {'state': state,
+                                     'base': 'master',
+                                     'sort': 'created'},
+                                    auth=self.auth)
+            for item in response.json():
+                if "pull_request" in item:
+                    continue
+                if "labels" in item:
+                    issue_labels=list(set([item['labels'][i]['name'] for i in range(len(item['labels']))]))
+                else:
+                    continue
+                if labels is not None:
+                    # fetch issue which has at least one target label
+                    for label in labels:
+                        if label in issue_labels:
+                            if other_labels:
+                                # besides target labels, we still want other labels
+                                data += [{'id': item['number'],'title': item['title'], 'body': item['body'], 'labels': issue_labels}]
+                            else:
+                                # only record target labels
+                                if(label in set(["Feature", "Call for Contribution", "Feature request"])):
+                                    label = "Feature"
+                                data += [{'id': item['number'], 'title': item['title'], 'body': item['body'], 'labels': label}]
+                            # if have this break, then we only pick up the first target label
+                            break
+                else:
+                    # fetch all issues
+                    data += [{'id': item['number'], 'title': item['title'], 'body': item['body'], 'labels': issue_labels}]
+        self.json_data = data
+        s_labels = "_".join(labels) if labels is not None else "all_labels"
+        filename = "{}_data.json_{}".format(state,s_labels)
+        logging.info("Writing json file..")
+        with open(filename, 'w') as write_file:
+            json.dump(data, write_file)
+        logging.info("{} json file is ready!".format(filename))
+        return filename
diff --git a/mxnet-bot/PredictLabels/Dockerfile b/mxnet-bot/PredictLabels/Dockerfile
new file mode 100644
index 0000000..16f9c31
--- /dev/null
+++ b/mxnet-bot/PredictLabels/Dockerfile
@@ -0,0 +1,26 @@
+FROM python:3.6.6
+
+# Update packages
+RUN apt-get update
+
+# Install prerequisite for matplotlib
+RUN apt-get -y install libxft-dev libfreetype6 libfreetype6-dev
+
+# Bundle app source
+COPY . /src
+
+EXPOSE 8000
+WORKDIR /src
+
+#install Python modules
+RUN pip install -r requirements.txt
+
+# Environment Variables
+ENV github_user your_github_id
+ENV github_oauth_token your_github_read_only_token
+ENV repo repo_name
+
+# Run it
+ENTRYPOINT ["python", "application.py"]
+
+
diff --git a/mxnet-bot/PredictLabels/Dockerrun.aws.json b/mxnet-bot/PredictLabels/Dockerrun.aws.json
new file mode 100755
index 0000000..73464c7
--- /dev/null
+++ b/mxnet-bot/PredictLabels/Dockerrun.aws.json
@@ -0,0 +1,7 @@
+{
+  "AWSEBDockerrunVersion": "1",
+  "Logging": "/tmp/sample-app",
+  "Image": {
+  	"Update": "false"
+  }
+}
diff --git a/mxnet-bot/PredictLabels/Predictor.py b/mxnet-bot/PredictLabels/Predictor.py
new file mode 100644
index 0000000..c2db5d2
--- /dev/null
+++ b/mxnet-bot/PredictLabels/Predictor.py
@@ -0,0 +1,132 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from sklearn.preprocessing import LabelEncoder
+from SentenceParser import SentenceParser
+from DataFetcher import DataFetcher
+import numpy as np
+import pickle
+import re
+import logging
+
+
+class Predictor:
+    # keywords will be used to apply rule-based algorithms
+    keywords = {"ci": ["ci", "ccache", "jenkins"],
+                "flaky": ["flaky"],
+                "gluon": ["gluon"],
+                "coda": ["cuda", "cudnn"],
+                "scala": ["scala"],
+                "mkldnn": ["mkldnn, mkl"],
+                "onnx": ["onnx"]}
+
+    def __init__(self):
+        """
+        Predictor serves to apply rule-based and ML algorithms to predict labels
+        """
+        self.tv = None
+        self.labels = None
+        self.clf = None
+        self.reload()
+
+    def reload(self):
+        """
+        This method is to load models
+        """
+        self.tv = pickle.load(open("/tmp/Vectorizer.p", "rb"))
+        self.labels = pickle.load(open("/tmp/Labels.p", "rb"))
+        self.clf = pickle.load(open("/tmp/Classifier.p", "rb"))
+
+    def tokenize(self, row):
+        """
+        This method is to tokenize a sentence into a list of words
+        Args:
+            row(string): a sentence
+        Return:
+            words(list): a list of words
+        """
+        row = re.sub('[^a-zA-Z0-9]', ' ', row).lower()
+        words = set(row.split())
+        return words
+
+    def rule_based(self, issues):
+        """
+        This method applies rule_based algorithms to predict labels
+        Args:
+            issues(list): a list of issue numbers
+        Return:
+            rule_based_predictions(list of lists): labels which satisfy rules
+        """
+        DF = DataFetcher()
+        df_test = DF.fetch_issues(issues)
+        rule_based_predictions = []
+        for i in range(len(issues)):
+            # extract every issue's title
+            row = df_test.loc[i, 'title']
+            # apply rule-based algorithms
+            single_issue_predictions = []
+            if "feature request" in row.lower(): single_issue_predictions.append("Feature")
+            if "c++" in row.lower(): single_issue_predictions.append("C++")
+            tokens = self.tokenize(row)
+            for k, v in self.keywords.items():
+                for keyword in v:
+                    if keyword in tokens:
+                        single_issue_predictions.append(k)
+            rule_based_predictions.append(single_issue_predictions)
+        return rule_based_predictions
+
+    def ml_predict(self, issues, threshold=0.3):
+        """
+        This method applies machine learning algorithms to predict labels
+        Args:
+            issues(list): a list of issue numbers
+            threshold(float): threshold of probability
+        Return:
+            ml_predictions(list of lists): predictions
+        """
+        # step1: fetch data
+        DF = DataFetcher()
+        df_test = DF.fetch_issues(issues)
+        # step2: data cleaning
+        SP = SentenceParser()
+        SP.data = df_test
+        SP.clean_body('body', True, True)
+        SP.merge_column(['title', 'title', 'title', 'body'], 'train')
+        test_text=SP.process_text('train', True, False, True)
+        # step3: word embedding
+        test_data_tfidf = self.tv.transform(test_text).toarray()
+        le = LabelEncoder()
+        le.fit_transform(self.labels)
+        # step4: classification
+        probs = self.clf.predict_proba(test_data_tfidf)
+        # pick up top 2 predictions which exceeds threshold
+        best_n = np.argsort(probs, axis=1)[:, -2:]
+        ml_predictions=[]
+        for i in range(len(best_n)):
+            # INFO:Predictor:issue:11919,Performance:0.47353076240017744,Question:0.2440056213336274
+            logging.info("issue:{}, {}:{}, {}:{}".format(str(issues[i]), str(le.classes_[best_n[i][-1]]), str(probs[i][best_n[i][-1]]),
+                        str(le.classes_[best_n[i][-2]]), str(probs[i][best_n[i][-2]])))
+            single_issue_predictions = [le.classes_[best_n[i][j]] for j in range(-1, -3, -1) if probs[i][best_n[i][j]] > threshold]
+            ml_predictions.append(single_issue_predictions)
+        return ml_predictions
+
+    def predict(self, issues):
+        # return predictions of both rule_base algorithms and machine learning methods
+        rule_based_predictions = self.rule_based(issues)
+        ml_predictions = self.ml_predict(issues)
+        predictions = [list(set(rule_based_predictions[i]+ml_predictions[i])) for i in range(len(ml_predictions))]
+        return predictions
diff --git a/mxnet-bot/PredictLabels/README.md b/mxnet-bot/PredictLabels/README.md
new file mode 100644
index 0000000..22f5537
--- /dev/null
+++ b/mxnet-bot/PredictLabels/README.md
@@ -0,0 +1,25 @@
+# Elastic Beanstalk Web Server
+
+A web server built on [AWS Elastic Beanstalk](https://aws.amazon.com/elasticbeanstalk/) which can response to GET/POST requests and realize self-maintenance. It mainly has 2 features:
+  * Train models: it will retrain Machine Learning models every 24 hours automatically using latest data.
+  * Predict labels: once it receives GET/POST requests with issues ID, it will send predictions back.
+
+## Set up
+*Make sure you are in current directory.*
+* Configure Dockerfile: In `Dockerfile`. Set environment variables (last 3 lines) with real `github_user`, `github_oauth_token`(READ only token) and `repo`.
+* Open terminal, run:
+```bash
+zip eb.zip application.py cron.yaml DataFetcher.py \
+Dockerfile Dockerrun.aws.json plot_piechart.py Predictor.py SentenceParser.py Trainer.py \
+requirements.txt stopwords.txt
+```
+It will zip all needed files into `eb.zip`
+* Manually create a new Elastic Beanstalk application.
+    1. Go to AWS Elastic Beanstalk console, click ***Create New Application***. Fill in *Application Name* and *Description*, click ***Create***.
+    2. Under ***Select environment tier***, select ***Web server environment***, click ***Select***.
+    3. Under **Base configuration**, select **Preconfigured platform**. In its dropdown, select **Docker**. Then select ***Upload your code***, upload `eb.zip`.
+    4. Click ***Configure more options***. Modify Intances, in the dropdown of Instance type, select t2.large. Click ***Create Environment*** (No need to select a security group, EB will create one.)
+    5. It will take about 2 minutes to setup the environment. 
+    6. Once the environment is setup, it will take 5-10 minutes to generate models. 
+    7. Write down URL. (ie: http://labelbot-env.pgc55xzpte.us-east-1.elasticbeanstalk.com)
+    
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/SentenceParser.py b/mxnet-bot/PredictLabels/SentenceParser.py
new file mode 100644
index 0000000..786d4a9
--- /dev/null
+++ b/mxnet-bot/PredictLabels/SentenceParser.py
@@ -0,0 +1,134 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script serves to do data cleaning
+from bs4 import BeautifulSoup
+import nltk
+# fix ssl certificate errors
+import ssl
+try:
+    _create_unverified_https_context = ssl._create_unverified_context
+except AttributeError:
+    pass
+else:
+    ssl._create_default_https_context = _create_unverified_https_context
+import os.path
+import pandas as pd
+import re
+import sys
+import logging
+
+
+class SentenceParser:
+
+    regex_str = [
+        r'<[^>]+>',                                                                     # HTML tags
+        r'(?:@[\w_]+)',                                                                 # @-mentions
+        r"(?:\#+[\w_]+[\w\'_\-]*[\w_]+)",                                               # hash-tags
+        r'http[s]?://(?:[a-z]|[0-9]|[$-_@.&amp;+]|[!*\(\),]|(?:%[0-9a-f][0-9a-f]))+',   # URLs
+        r'(?:(?:\d+,?)+(?:\.?\d+)?)',                                                   # numbers
+        r"(?:[a-z][a-z'\-_]+[a-z])",                                                    # words with - and '
+        r'(?:[\w_]+)',                                                                  # other words
+        r'(?:\S)'                                                                       # anything else
+    ]
+    # English Stopwords
+    with open('stopwords.txt') as file:
+        stopwords = file.read().split()
+    file.close()
+
+    def __init__(self):
+        """
+        SentenceParser serves to clean text content
+        """
+        self.data = None
+        # extract words stem
+        self.porter = nltk.PorterStemmer()
+        # a set of stopwords
+        self.stops = set(self.stopwords)
+
+    def read_file(self, filepath, filetype, encod='ISO-8859-1', header=None):
+        """
+        This method is to read csv/json/xlsx files
+        """
+        logging.info('Start reading File')
+        if not os.path.isfile(filepath):
+            logging.error("File Not Exist!")
+            sys.exit()
+        if filetype == 'csv':
+            df = pd.read_csv(filepath, encoding=encod, header=header)
+        elif filetype == 'json':
+            df = pd.read_json(filepath, encoding=encod, lines=False)
+        elif filetype == 'xlsx':
+            df = pd.read_excel(filepath, encoding=encod, header=header)
+        else:
+            logging.error("Extension Type not Accepted!")
+            sys.exit()
+
+        logging.debug(df)
+        self.data = df
+
+    def merge_column(self, columns, name):
+        """
+        This method is to merge columns of a pandas dataframe
+        """
+        logging.info('Merge headers %s to %s', str(columns), name)
+        self.data[name] = ''
+        for header in columns:
+            self.data[name] += ' ' + self.data[header]
+  
+    def clean_body(self, column, remove_template=True, remove_code=True):
+        """
+        This methods is to remove template and code from issue's body
+        """
+        logging.info("Start Removing Templates..")
+        for i in range(len(self.data)):
+            # remove 'Environment info' part
+            if remove_template and "## Environment info" in self.data[column][i]:
+                index = self.data.loc[i, column].find("## Environment info")
+                self.data.loc[i, column] = self.data.loc[i, column][:index]
+            # remove code
+            if remove_code and "```" in self.data[column][i]:
+                sample = self.data[column][i].split("```")
+                sample = [sample[i*2] for i in range(0, int((len(sample)+1)/2))]
+                self.data.loc[i, column] = " ".join(sample)
+
+    def process_text(self, column, remove_symbol=True, remove_stopwords=False, stemming=False):
+        """
+        This method is to remove symbols/remove stopwords/extract words stem
+        """
+        logging.info("Start Data Cleaning...")
+        # remove some symbols
+        self.data[column] = self.data[column].str.replace(r'[\n\r\t]+', ' ')
+        # remove URLs
+        self.data[column] = self.data[column].str.replace(self.regex_str[3], ' ')
+        tempcol = self.data[column].values.tolist()
+
+        for i in range(len(tempcol)):
+            row = BeautifulSoup(tempcol[i], 'html.parser').get_text().lower()
+            # remove symbols
+            if remove_symbol:
+                row = re.sub('[^a-zA-Z]', ' ', row)
+            words = row.split()
+            # remove stopwords
+            if remove_stopwords:
+                words = [w for w in words if w not in self.stops and not w.replace('.', '', 1).isdigit()]
+            # extract words stem
+            if stemming:
+                words = [self.porter.stem(w) for w in words] 
+            row = ' '.join(words)
+            tempcol[i] = row.lower()
+        return tempcol
diff --git a/mxnet-bot/PredictLabels/Trainer.py b/mxnet-bot/PredictLabels/Trainer.py
new file mode 100644
index 0000000..c465c6a
--- /dev/null
+++ b/mxnet-bot/PredictLabels/Trainer.py
@@ -0,0 +1,87 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script is served to train Machine Learning models
+from DataFetcher import DataFetcher
+from SentenceParser import SentenceParser
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.svm import SVC
+from sklearn.preprocessing import LabelEncoder
+import pickle
+import logging
+
+
+class Trainer:
+
+    def __init__(self):
+        """
+        Trainer is to train issues using Machine Learning methods.
+        self.labels(list): a list of target labels
+        self.tv: TFIDF model (trigram, max_features = 10000)
+        self.clf: Classifier (SVC, kenerl = 'rbf')
+        """
+        self.labels = ["Performance", "Test", "Question",
+                       "Feature request", "Call for contribution",
+                       "Feature", "Example", "Doc",
+                       "Installation", "Build", "Bug"]
+        self.tv = TfidfVectorizer(min_df=0.00009, ngram_range=(1, 3), max_features=10000)
+        self.clf = SVC(gamma=0.5, C=100, probability=True)
+
+    def train(self):
+        """
+        This method is to train and save models.
+        """
+        logging.info("Start training issues of general labels")
+        # Step1: Fetch issues with general labels
+        logging.info("Fetching Data..")
+        DF = DataFetcher()
+        filename = DF.data2json('all', self.labels, False)
+        # Step2: Clean data
+        logging.info("Cleaning Data..")
+        SP = SentenceParser()
+        SP.read_file(filename, 'json')
+        SP.clean_body('body', True, True)
+        SP.merge_column(['title', 'title', 'title', 'body'], 'train')
+        text = SP.process_text('train', True, False, True)
+        df = SP.data
+        # Step3: Word Embedding
+        logging.info("Word Embedding..")
+        # tv = TfidfVectorizer(min_df=0.00009, ngram_range=(1, 3), max_features=10000)
+        tv = self.tv
+        X = tv.fit_transform(text).toarray()
+        # Labels
+        labels = SP.data['labels']
+        le = LabelEncoder()
+        Y = le.fit_transform(labels)
+        # Step4: Train Classifier
+        # SVC, kernel = 'rbf'
+        logging.info("Training Data..")
+        # clf = SVC(gamma=0.5, C=100, probability=True)
+        clf = self.clf
+        clf.fit(X, Y)
+        # Step5: save models
+        logging.info("Saving Models..")
+        pickle.dump(tv, open("/tmp/Vectorizer.p", "wb"))
+        pickle.dump(clf, open("/tmp/Classifier.p", "wb"))
+        pickle.dump(labels, open("/tmp/Labels.p", "wb"))
+        logging.info("Completed!")
+        return
+
+
+
+
+
diff --git a/mxnet-bot/PredictLabels/application.py b/mxnet-bot/PredictLabels/application.py
new file mode 100644
index 0000000..1aec229
--- /dev/null
+++ b/mxnet-bot/PredictLabels/application.py
@@ -0,0 +1,119 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This is a web server built based on Flask framework and AWS Elastic Beanstalk service 
+# It will response to http GET/POST requests
+from flask import Flask, jsonify, request, send_file
+from apscheduler.schedulers.background import BackgroundScheduler
+from apscheduler.triggers.interval import IntervalTrigger
+from Predictor import Predictor
+from Trainer import Trainer
+import plot_piechart
+import timeit
+import atexit
+import logging
+import os.path
+
+logging.getLogger().setLevel(logging.INFO)
+
+application = Flask(__name__)
+
+if not os.path.exists('/tmp/Classifier.p'):
+    trainer = Trainer()
+    trainer.train()
+predictor = Predictor()
+
+
+# GET '/'
+@application.route('/')
+def index():
+    return "Hello!  -Bot"
+
+
+# GET '/issues/<issue>'
+# return predictions of an issue
+@application.route('/issues/<issue>')
+def get_prediction(issue):
+    l = predictor.predict([issue])
+    return " ".join(l[0])
+
+
+# POST '/predict'
+# return predictions of issues
+@application.route('/predict', methods=['POST'])
+def predict():
+    # get prediction results of multiple issues
+    # data would be a json file {"issues":[1,2,3]}
+    data = request.get_json()["issues"]
+    #predictions = predict_labels.predict(data)
+    predictions = predictor.predict(data)
+    response = []
+    for i in range(len(data)):
+        response.append({"number":data[i], "predictions":predictions[i]})
+    return jsonify(response)
+
+
+# POST '/draw'
+# return an image's binary code
+@application.route('/draw', methods=['POST'])
+def plot():
+    # requests.post(url,json={"fracs":[], "labels":[]})
+    data = request.get_json()
+    fracs = data["fracs"]
+    labels = data["labels"]
+    filename = plot_piechart.draw_pie(fracs, labels)
+    return send_file(filename, mimetype='image/png')
+
+
+# helper function
+def train_models():
+    start = timeit.default_timer()
+    trainer = Trainer()
+    trainer.train()
+    stop = timeit.default_timer()
+    # reload models
+    predictor.reload()
+    time = int(stop - start)
+    logging.info("Training completed! Time cost: {} min, {} seconds".format(str(int(time/60)), str(time%60)))
+    return 
+
+
+# Once the server is running, it will retrain ML models every 24 hours
+@application.before_first_request
+def initialize():
+    scheduler = BackgroundScheduler()
+    scheduler.start()
+    scheduler.add_job(
+        func=train_models,
+        trigger=IntervalTrigger(hours=24),
+        id='Training_Job',
+        name='Update models every 24 hours',
+        replace_existing=True)
+    # Shut down the scheduler when exiting the app
+    atexit.register(lambda: scheduler.shutdown())
+
+
+initialize()
+
+
+# run the app.
+if __name__ == "__main__":
+    # Set debug to True enables debug output.
+    # This 'application.debug = True' should be removed before deploying a production app.
+    application.debug = True
+    application.threaded = True
+    application.run('0.0.0.0', 8000)
diff --git a/mxnet-bot/PredictLabels/cron.yaml b/mxnet-bot/PredictLabels/cron.yaml
new file mode 100644
index 0000000..f47da88
--- /dev/null
+++ b/mxnet-bot/PredictLabels/cron.yaml
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+version: 1
+cron: 
+  - name: "task1"
+    url: "/scheduled"
+    schedule: "* * * * *"
diff --git a/mxnet-bot/PredictLabels/plot_piechart.py b/mxnet-bot/PredictLabels/plot_piechart.py
new file mode 100644
index 0000000..6f3cd20
--- /dev/null
+++ b/mxnet-bot/PredictLabels/plot_piechart.py
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime
+import matplotlib
+# set 'agg' as matplotlib backend
+matplotlib.use('agg', warn=False, force=True)
+from matplotlib import pyplot as plt
+import logging
+
+
+def make_autopct(values):
+    def my_autopct(pct):
+        total = sum(values)
+        val = int(round(pct * total / 100.0))
+        return '{p:.2f}% ({v:d})'.format(p=pct, v=val)
+
+    return my_autopct
+
+
+def draw_pie(fracs, labels):
+    """
+    This method is to plot the pie chart of labels, then save it into '/tmp/' folder
+    """
+    logging.info("Drawing the pie chart..")
+    fig = plt.figure()
+    plt.pie(fracs, labels=labels, autopct=make_autopct(fracs), shadow=True)
+    plt.title("Top 10 labels for newly opened issues")
+    figname = "piechart_{}_{}.png".format(str(datetime.datetime.today().date()),
+                                          str(datetime.datetime.today().time()))
+    fig.savefig("/tmp/{}".format(figname))
+    pic_path = "/tmp/{}".format(figname)
+    return pic_path
diff --git a/mxnet-bot/PredictLabels/requirements.txt b/mxnet-bot/PredictLabels/requirements.txt
new file mode 100644
index 0000000..c587d93
--- /dev/null
+++ b/mxnet-bot/PredictLabels/requirements.txt
@@ -0,0 +1,33 @@
+APScheduler==3.5.1
+beautifulsoup4==4.6.0
+boto3==1.7.59
+botocore==1.10.59
+bs4==0.0.1
+certifi==2018.4.16
+chardet==3.0.4
+click==6.7
+cycler==0.10.0
+DateTime==4.2
+docutils==0.14
+Flask==1.0.2
+idna==2.7
+itsdangerous==0.24
+Jinja2==2.10
+jmespath==0.9.3
+kiwisolver==1.0.1
+matplotlib==2.2.2
+MarkupSafe==1.0
+nltk==3.3
+numpy==1.14.5
+pandas==0.23.3
+pyparsing==2.2.0
+python-dateutil==2.7.3
+pytz==2018.5
+requests==2.19.1
+scikit-learn==0.19.2
+scipy==1.1.0
+six==1.11.0
+sklearn==0.0
+urllib3==1.23
+Werkzeug==0.14.1
+zope.interface==4.5.0
diff --git a/mxnet-bot/PredictLabels/stopwords.txt b/mxnet-bot/PredictLabels/stopwords.txt
new file mode 100644
index 0000000..c41ef55
--- /dev/null
+++ b/mxnet-bot/PredictLabels/stopwords.txt
@@ -0,0 +1 @@
+i me my myself we our ours ourselves you you're you've you'll you'd your yours yourself yourselves he him his himself she she's her hers herself it it's its itself they them their theirs themselves what which who whom this that that'll these those am is are was were be been being have has had having do does did doing a an the and but if or because as until while of at by for with about against between into through during before after above below to from up down in out on off over under again further then once here there when where why how all any both each few more most other some such no nor not only own same so than too very s t can will just don don't should should've now d ll m o re ve y ain aren aren't couldn couldn't didn didn't doesn doesn't hadn hadn't hasn hasn't haven haven't isn isn't ma mightn mightn't mustn mustn't needn needn't shan shan't shouldn shouldn't wasn wasn't weren weren't won won't wouldn wouldn't
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/test_datafetcher.py b/mxnet-bot/PredictLabels/test_datafetcher.py
new file mode 100644
index 0000000..588b000
--- /dev/null
+++ b/mxnet-bot/PredictLabels/test_datafetcher.py
@@ -0,0 +1,116 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import requests
+import boto3
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+from DataFetcher import DataFetcher
+import unittest
+import pandas as pd
+from DataFetcher import DataFetcher
+from pandas.util.testing import assert_frame_equal
+# some version issue
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+# test coverage: 93%
+class TestLabelBot(unittest.TestCase):
+
+    def setUp(self):
+        self.df = DataFetcher()
+        self.df.repo = "apache/incubator-mxnet"
+        self.df.github_user = "cathy"
+        self.df.github_oauth_token = "123"
+
+    def tearDown(self):
+        pass
+
+    def test_cleanstr(self):
+        new_string = self.df.cleanstr("a_b", "")
+        self.assertEqual(new_string, "ab")
+
+    def test_count_pages(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = [{ "body":"issue's body",
+                                                "created_at":"2018-07-28T18:27:17Z",
+                                                "comments":"0",
+                                                "number":11925,
+                                                "labels":[{'name':'Doc'}],
+                                                "state":"open",
+                                                "title":"issue's title",
+                                                "html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+                                              },
+                                              { "body":"issue's body",
+                                                "created_at":"2018-07-28T18:27:17Z",
+                                                "comments":"0",
+                                                "number":11924,
+                                                "labels":[],
+                                                "state":"closed",
+                                                "title":"issue's title",
+                                                "html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+                                              }]
+            page = self.df.count_pages('all')
+            self.assertEqual(page,1)
+
+    def test_fetch_issues(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = { "body":"issue's body",
+                                                "created_at":"2018-07-28T18:27:17Z",
+                                                "comments":"0",
+                                                "number":11925,
+                                                "labels":[{'name':'Feature'}],
+                                                "state":"open",
+                                                "title":"issue's title",
+                                                "html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+                                              }
+            data = self.df.fetch_issues([11925])
+            expected_data = [{'id':"11925", 'title':"issue's title",'body':"issue's body"}]
+            assert_frame_equal(data, pd.DataFrame(expected_data))
+
+    def test_data2json(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = [{ "body":"issue's body",
+                                                "created_at":"2018-07-28T18:27:17Z",
+                                                "comments":"0",
+                                                "number":11925,
+                                                "labels":[{'name':'Feature'}],
+                                                "state":"open",
+                                                "title":"issue's title",
+                                                "html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+                                              },
+                                              { "body":"issue's body",
+                                                "created_at":"2018-07-28T18:27:17Z",
+                                                "comments":"0",
+                                                "number":11924,
+                                                "labels":[],
+                                                "state":"closed",
+                                                "title":"issue's title",
+                                                "html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+                                              }]
+            self.df.data2json('all', labels=["Feature"], other_labels=False)
+            expected_data = [{'id': 11925, 'title': "issue's title", 'body': "issue's body", 'labels': 'Feature'}]
+            self.assertEqual(expected_data, self.df.json_data)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mxnet-bot/PredictLabels/test_predictor.py b/mxnet-bot/PredictLabels/test_predictor.py
new file mode 100644
index 0000000..3580da8
--- /dev/null
+++ b/mxnet-bot/PredictLabels/test_predictor.py
@@ -0,0 +1,95 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import requests
+import boto3
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+from DataFetcher import DataFetcher
+import unittest
+from Predictor import Predictor
+# some version issue
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+
+# test coverage: 100%
+class TestLabelBot(unittest.TestCase):
+
+    def setUp(self):
+        self.pr = Predictor()
+
+    def tearDown(self):
+        pass
+
+    def test_tokenize(self):
+        words = self.pr.tokenize("hello_world")
+        self.assertEqual(words, set(['hello','world']))
+
+    def test_rule_based(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = {
+                                                "body": "issue's body",
+                                                "created_at": "2018-07-28T18:27:17Z",
+                                                "comments": "0",
+                                                "number": 11925,
+                                                "labels": [{'name': 'Doc'}],
+                                                "state": "open",
+                                                "title": "a feature requests for scala package",
+                                                "html_url": "https://github.com/apache/incubator-mxnet/issues/11925",
+                                              }
+            predictions = self.pr.rule_based([11925])
+            self.assertEqual([['Feature','scala']], predictions)
+
+    def test_ml_predict(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = {
+                                                "body": "test",
+                                                "created_at": "2018-07-28T18:27:17Z",
+                                                "comments": "0",
+                                                "number": 11925,
+                                                "labels": [{'name': 'Doc'}],
+                                                "state": "open",
+                                                "title": "a feature requests for scala package",
+                                                "html_url": "https://github.com/apache/incubator-mxnet/issues/11925",
+                                              }
+            predictions = self.pr.ml_predict([11925])
+            self.assertEqual([['Feature']], predictions)
+
+    def test_predict(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = {
+                                                "body": "test",
+                                                "created_at": "2018-07-28T18:27:17Z",
+                                                "comments": "0",
+                                                "number": 11925,
+                                                "labels": [{'name': 'Doc'}],
+                                                "state": "open",
+                                                "title": "a feature requests for scala package",
+                                                "html_url": "https://github.com/apache/incubator-mxnet/issues/11925",
+                                              }
+            predictions = self.pr.predict([11925])
+            self.assertEqual([['Feature', 'scala']], predictions)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mxnet-bot/PredictLabels/test_sentenceparse.py b/mxnet-bot/PredictLabels/test_sentenceparse.py
new file mode 100644
index 0000000..a81b3c4
--- /dev/null
+++ b/mxnet-bot/PredictLabels/test_sentenceparse.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import requests
+import boto3
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+import unittest
+import pandas as pd
+from SentenceParser import SentenceParser
+from pandas.util.testing import assert_frame_equal
+# some version issue
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+# test coverage: 88%
+class TestSentenceParser(unittest.TestCase):
+
+    def setUp(self):
+        self.sp = SentenceParser()
+        self.sp.data = pd.DataFrame([{'id': 11925, 'title': "issue's title",
+                                      'body': " bug ``` import pandas``` ## Environment info",
+                                      'labels': ['Doc']}])
+
+    def test_read_file(self):
+        self.sp.read_file('all_data.json_Feature', 'json')
+        expected_data = [{'id': 11925, 'title': "issue's title", 'body': "issue's body", 'labels': ['Doc']},
+                         {'id': 11924, 'title': "issue's title", 'body': "issue's body", 'labels': []}]
+        assert_frame_equal(self.sp.data, pd.DataFrame(expected_data))
+
+    def test_merge_column(self):
+        self.sp.merge_column(['title', 'body'], 'train')
+        expected_data = [{'id': 11925, 'title': "issue's title", 'body': " bug ``` import pandas``` ## Environment info",
+                          'labels': ['Doc'],
+                          'train': " issue's title  bug ``` import pandas``` ## Environment info"}]
+        assert_frame_equal(self.sp.data, pd.DataFrame(expected_data))
+
+    def test_clean_body(self):
+        self.sp.clean_body('body', True, True)
+        expected_data = [{'id': 11925, 'title': "issue's title", 'body': " bug   ", 'labels': ['Doc']}]
+        assert_frame_equal(self.sp.data, pd.DataFrame(expected_data))
+
+    def test_process_text(self):
+        data = self.sp.process_text('body', True, True, True)
+        expected_data = ['bug import panda environ info']
+        self.assertEqual(data, expected_data)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mxnet-bot/PredictLabels/test_trainer.py b/mxnet-bot/PredictLabels/test_trainer.py
new file mode 100644
index 0000000..0abdace
--- /dev/null
+++ b/mxnet-bot/PredictLabels/test_trainer.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import unittest
+import requests
+import boto3
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+from DataFetcher import DataFetcher
+from Trainer import Trainer
+# some version issue
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+
+# test coverage: 100%
+class TestTrainer(unittest.TestCase):
+
+    def setUp(self):
+        self.trainer = Trainer()
+
+    def test_train(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = [{
+												"body": "I was looking at the mxnet.\
+                                                metric source code and documentation",
+                                                "created_at": "2018-07-28T18:27:17Z",
+                                                "comments": "0",
+                                                "number": 11925,
+                                                "labels": [{'name':'Doc'}],
+                                                "state": "open",
+                                                "title": "Confusion in documentation/implementation of F1, MCC metrics",
+                                                "html_url": "https://github.com/apache/incubator-mxnet/issues/11925",
+                                              },
+                                              { "body": "I train a CNN with python under mxnet gluon mys C++ code crash when i call MXPredsetInput.",
+                                                "created_at": "2018-07-28T18:27:17Z",
+                                                "comments": "0",
+                                                "number": 11924,
+                                                "labels": [{'name':'Bug'}],
+                                                "state": "closed",
+                                                "title": "Issue in exporting gluon model",
+                                                "html_url": "https://github.com/apache/incubator-mxnet/issues/11924",
+                                              }]
+            self.trainer.train()
+
+
+if __name__ == "__main__":
+    unittest.main()

From 7f5a0530c71e66262b1d1c1f68f216beea3fa2c7 Mon Sep 17 00:00:00 2001
From: Yuelin Zhang <zyuelin@amazon.com>
Date: Sat, 4 Aug 2018 19:27:07 -0700
Subject: [PATCH 2/5] Migrate PR 11935

Migrate PR 11935 here.
Revise code based on code review:
1. set constructor variabls for each class
2. add doc strings
3. improve coding style
3. add README
---
 .gitignore                                    |   6 +-
 .../PredictLabels/.idea/PredictLabels.iml     |  11 +
 mxnet-bot/PredictLabels/.idea/misc.xml        |   7 +
 mxnet-bot/PredictLabels/.idea/modules.xml     |   8 +
 mxnet-bot/PredictLabels/.idea/workspace.xml   | 212 ++++++++++++++++++
 mxnet-bot/PredictLabels/DataFetcher.py        | 135 +++++++++++
 mxnet-bot/PredictLabels/Dockerfile            |  26 +++
 mxnet-bot/PredictLabels/Dockerrun.aws.json    |   7 +
 mxnet-bot/PredictLabels/Predictor.py          | 132 +++++++++++
 mxnet-bot/PredictLabels/README.md             |  25 +++
 mxnet-bot/PredictLabels/SentenceParser.py     | 134 +++++++++++
 mxnet-bot/PredictLabels/Trainer.py            |  87 +++++++
 mxnet-bot/PredictLabels/application.py        | 119 ++++++++++
 mxnet-bot/PredictLabels/cron.yaml             |  21 ++
 mxnet-bot/PredictLabels/plot_piechart.py      |  47 ++++
 mxnet-bot/PredictLabels/requirements.txt      |  33 +++
 mxnet-bot/PredictLabels/stopwords.txt         |   1 +
 mxnet-bot/PredictLabels/test_datafetcher.py   | 116 ++++++++++
 mxnet-bot/PredictLabels/test_predictor.py     |  95 ++++++++
 mxnet-bot/PredictLabels/test_sentenceparse.py |  66 ++++++
 mxnet-bot/PredictLabels/test_trainer.py       |  66 ++++++
 21 files changed, 1352 insertions(+), 2 deletions(-)
 create mode 100644 mxnet-bot/PredictLabels/.idea/PredictLabels.iml
 create mode 100644 mxnet-bot/PredictLabels/.idea/misc.xml
 create mode 100644 mxnet-bot/PredictLabels/.idea/modules.xml
 create mode 100644 mxnet-bot/PredictLabels/.idea/workspace.xml
 create mode 100644 mxnet-bot/PredictLabels/DataFetcher.py
 create mode 100644 mxnet-bot/PredictLabels/Dockerfile
 create mode 100755 mxnet-bot/PredictLabels/Dockerrun.aws.json
 create mode 100644 mxnet-bot/PredictLabels/Predictor.py
 create mode 100644 mxnet-bot/PredictLabels/README.md
 create mode 100644 mxnet-bot/PredictLabels/SentenceParser.py
 create mode 100644 mxnet-bot/PredictLabels/Trainer.py
 create mode 100644 mxnet-bot/PredictLabels/application.py
 create mode 100644 mxnet-bot/PredictLabels/cron.yaml
 create mode 100644 mxnet-bot/PredictLabels/plot_piechart.py
 create mode 100644 mxnet-bot/PredictLabels/requirements.txt
 create mode 100644 mxnet-bot/PredictLabels/stopwords.txt
 create mode 100644 mxnet-bot/PredictLabels/test_datafetcher.py
 create mode 100644 mxnet-bot/PredictLabels/test_predictor.py
 create mode 100644 mxnet-bot/PredictLabels/test_sentenceparse.py
 create mode 100644 mxnet-bot/PredictLabels/test_trainer.py

diff --git a/.gitignore b/.gitignore
index 1279279..83aa10e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
-
-mxnet-bot/.DS_Store
+# Pycharm Project
+.idea/
+# OSX Stuff
 .DS_Store
+mxnet-bot/.DS_Store
diff --git a/mxnet-bot/PredictLabels/.idea/PredictLabels.iml b/mxnet-bot/PredictLabels/.idea/PredictLabels.iml
new file mode 100644
index 0000000..6711606
--- /dev/null
+++ b/mxnet-bot/PredictLabels/.idea/PredictLabels.iml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/.idea/misc.xml b/mxnet-bot/PredictLabels/.idea/misc.xml
new file mode 100644
index 0000000..7a5c067
--- /dev/null
+++ b/mxnet-bot/PredictLabels/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
+  <component name="PyCharmProfessionalAdvertiser">
+    <option name="shown" value="true" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/.idea/modules.xml b/mxnet-bot/PredictLabels/.idea/modules.xml
new file mode 100644
index 0000000..18fdcdd
--- /dev/null
+++ b/mxnet-bot/PredictLabels/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/PredictLabels.iml" filepath="$PROJECT_DIR$/.idea/PredictLabels.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/.idea/workspace.xml b/mxnet-bot/PredictLabels/.idea/workspace.xml
new file mode 100644
index 0000000..15f95e5
--- /dev/null
+++ b/mxnet-bot/PredictLabels/.idea/workspace.xml
@@ -0,0 +1,212 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="028f7063-2c5d-4a6b-be24-591f81619277" name="Default" comment="" />
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
+    <option name="TRACKING_ENABLED" value="true" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileEditorManager">
+    <leaf>
+      <file leaf-file-name="application.py" pinned="false" current-in-tab="true">
+        <entry file="file://$PROJECT_DIR$/application.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="243">
+              <caret line="28" column="14" lean-forward="true" selection-start-line="28" selection-start-column="14" selection-end-line="28" selection-end-column="14" />
+              <folding>
+                <element signature="e#920#972#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+    </leaf>
+  </component>
+  <component name="IdeDocumentHistory">
+    <option name="CHANGED_PATHS">
+      <list>
+        <option value="$PROJECT_DIR$/DataFetcher.py" />
+        <option value="$PROJECT_DIR$/plot_piechart.py" />
+        <option value="$PROJECT_DIR$/Predictor.py" />
+        <option value="$PROJECT_DIR$/application.py" />
+        <option value="$PROJECT_DIR$/test_trainer.py" />
+        <option value="$PROJECT_DIR$/test_datafetcher.py" />
+        <option value="$PROJECT_DIR$/test_predictor.py" />
+        <option value="$PROJECT_DIR$/test_sentenceparse.py" />
+        <option value="$PROJECT_DIR$/Trainer.py" />
+        <option value="$PROJECT_DIR$/SentenceParser.py" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectFrameBounds">
+    <option name="y" value="23" />
+    <option name="width" value="1024" />
+    <option name="height" value="548" />
+  </component>
+  <component name="ProjectView">
+    <navigator proportions="" version="1">
+      <foldersAlwaysOnTop value="true" />
+    </navigator>
+    <panes>
+      <pane id="ProjectPane">
+        <subPane>
+          <expand>
+            <path>
+              <item name="PredictLabels" type="b2602c69:ProjectViewProjectNode" />
+              <item name="PredictLabels" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="PredictLabels" type="b2602c69:ProjectViewProjectNode" />
+              <item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
+            </path>
+          </expand>
+          <select />
+        </subPane>
+      </pane>
+      <pane id="Course" />
+      <pane id="Scope" />
+    </panes>
+  </component>
+  <component name="PropertiesComponent">
+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
+    <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
+  </component>
+  <component name="RunDashboard">
+    <option name="ruleStates">
+      <list>
+        <RuleState>
+          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
+        </RuleState>
+        <RuleState>
+          <option name="name" value="StatusDashboardGroupingRule" />
+        </RuleState>
+      </list>
+    </option>
+  </component>
+  <component name="SvnConfiguration">
+    <configuration />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="028f7063-2c5d-4a6b-be24-591f81619277" name="Default" comment="" />
+      <created>1533604709424</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1533604709424</updated>
+    </task>
+    <servers />
+  </component>
+  <component name="ToolWindowManager">
+    <frame x="0" y="23" width="1024" height="548" extended-state="0" />
+    <editor active="true" />
+    <layout>
+      <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.25" />
+      <window_info anchor="bottom" id="TODO" order="6" />
+      <window_info anchor="bottom" id="Event Log" side_tool="true" />
+      <window_info anchor="bottom" id="Run" order="2" />
+      <window_info anchor="bottom" id="Version Control" show_stripe_button="false" />
+      <window_info anchor="bottom" id="Python Console" />
+      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
+      <window_info anchor="bottom" id="Terminal" />
+      <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
+      <window_info id="Favorites" side_tool="true" />
+      <window_info anchor="bottom" id="Find" order="1" />
+      <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
+      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
+      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
+      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
+      <window_info anchor="bottom" id="Message" order="0" />
+      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
+    </layout>
+  </component>
+  <component name="VcsContentAnnotationSettings">
+    <option name="myLimit" value="2678400000" />
+  </component>
+  <component name="editorHistoryManager">
+    <entry file="file://$PROJECT_DIR$/test_datafetcher.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="203">
+          <caret line="25" column="50" lean-forward="true" selection-start-line="25" selection-start-column="50" selection-end-line="25" selection-end-column="50" />
+          <folding>
+            <element signature="e#786#801#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/Trainer.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="230">
+          <caret line="21" column="27" selection-start-line="21" selection-start-column="27" selection-end-line="21" selection-end-column="27" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/test_trainer.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="194">
+          <caret line="21" column="17" selection-start-line="21" selection-start-column="17" selection-end-line="21" selection-end-column="17" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/test_predictor.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="231">
+          <caret line="22" column="15" selection-start-line="22" selection-start-column="15" selection-end-line="22" selection-end-column="15" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/DataFetcher.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="1760">
+          <caret line="123" selection-start-line="123" selection-end-line="123" />
+          <folding>
+            <element signature="e#849#886#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/test_sentenceparse.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="311">
+          <caret line="22" column="19" lean-forward="true" selection-start-line="22" selection-start-column="19" selection-end-line="22" selection-end-column="19" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/Predictor.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="133">
+          <caret line="17" column="46" lean-forward="true" selection-start-line="17" selection-start-column="46" selection-end-line="17" selection-end-column="46" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/plot_piechart.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="630">
+          <caret line="42" column="38" selection-start-line="42" selection-start-column="38" selection-end-line="42" selection-end-column="38" />
+          <folding>
+            <element signature="e#786#801#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/SentenceParser.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="115">
+          <caret line="18" column="29" selection-start-line="18" selection-start-column="29" selection-end-line="18" selection-end-column="29" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/application.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="243">
+          <caret line="28" column="14" lean-forward="true" selection-start-line="28" selection-start-column="14" selection-end-line="28" selection-end-column="14" />
+          <folding>
+            <element signature="e#920#972#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+  </component>
+</project>
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/DataFetcher.py b/mxnet-bot/PredictLabels/DataFetcher.py
new file mode 100644
index 0000000..cef3ede
--- /dev/null
+++ b/mxnet-bot/PredictLabels/DataFetcher.py
@@ -0,0 +1,135 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This scipt is served to fetch GitHub issues into a json file
+from __future__ import print_function
+import os
+import requests
+import json
+import re
+import pandas as pd
+import logging
+
+
+class DataFetcher:
+
+    def __init__(self,
+                 github_user=os.environ.get("github_user"),
+                 github_oauth_token=os.environ.get("github_oauth_token"),
+                 repo=os.environ.get("repo")):
+        """
+        This DataFetcher serves to fetch issues data
+        Args:
+            github_user(str): the github id. ie: "CathyZhang0822"
+            github_oauth_token(str): the github oauth token, paired with github_user to realize authorization
+            repo(str): the repo name
+        """
+        self.github_user = github_user
+        self.github_oauth_token = github_oauth_token
+        self.repo = repo
+        self.auth = (self.github_user, self.github_oauth_token)
+        self.json_data = None
+
+    def cleanstr(self, raw_string, sub_string):
+        """
+        This method is to convert all non-alphanumeric charaters from 
+        raw_string into substring
+        """
+        clean = re.sub("[^0-9a-zA-Z]", sub_string, raw_string)
+        return clean.lower()
+
+    def count_pages(self, state):
+        """
+        This method is to count how many pages of issues/labels in total
+        state can be "open"/"closed"/"all"
+        """
+        url = 'https://api.github.com/repos/%s/issues' % self.repo
+        response = requests.get(url, {'state': state},
+                                auth=self.auth)
+        assert response.status_code == 200, "Authorization failed"
+        if "link" not in response.headers:
+            return 1
+        return int(self.cleanstr(response.headers['link'], " ").split()[-3])
+    
+    def fetch_issues(self, issue_nums):
+        """
+        This method is to fetch issues data
+        issue_num: a list of issue ids
+        return issues' data in pandas dataframe format
+        """
+        assert issue_nums != [], "Empty Input!"
+        logging.info("Reading issues:{}".format(", ".join([str(num) for num in issue_nums])))
+        data = []
+        for number in issue_nums:
+            url = 'https://api.github.com/repos/' + self.repo + '/issues/' + str(number)
+            response = requests.get(url, auth=self.auth)
+            item = response.json()
+            assert 'title' in item, "{} issues doesn't exist!".format(str(number))
+            data += [{'id': str(number), 'title': item['title'], 'body': item['body']}]
+        return pd.DataFrame(data)
+
+    def data2json(self, state, labels=None, other_labels=False):
+        """
+        This method is to store issues' data into a json file, return json file's name
+        state can be either "open"/"closed"/"all"
+        labels is a list of target labels we are interested in
+        other_labels can be either "True"/"False"
+        """
+        assert state in set(['all', 'open', 'closed']), "Invalid State!"
+        logging.info("Reading {} issues..".format(state))
+        pages = self.count_pages(state)
+        data = []
+        for x in range(1, pages+1):
+            url = 'https://api.github.com/repos/' + self.repo + '/issues?page=' + str(x) \
+                  + '&per_page=30'.format(repo=self.repo)
+            response = requests.get(url,
+                                    {'state': state,
+                                     'base': 'master',
+                                     'sort': 'created'},
+                                    auth=self.auth)
+            for item in response.json():
+                if "pull_request" in item:
+                    continue
+                if "labels" in item:
+                    issue_labels=list(set([item['labels'][i]['name'] for i in range(len(item['labels']))]))
+                else:
+                    continue
+                if labels is not None:
+                    # fetch issue which has at least one target label
+                    for label in labels:
+                        if label in issue_labels:
+                            if other_labels:
+                                # besides target labels, we still want other labels
+                                data += [{'id': item['number'],'title': item['title'], 'body': item['body'], 'labels': issue_labels}]
+                            else:
+                                # only record target labels
+                                if(label in set(["Feature", "Call for Contribution", "Feature request"])):
+                                    label = "Feature"
+                                data += [{'id': item['number'], 'title': item['title'], 'body': item['body'], 'labels': label}]
+                            # if have this break, then we only pick up the first target label
+                            break
+                else:
+                    # fetch all issues
+                    data += [{'id': item['number'], 'title': item['title'], 'body': item['body'], 'labels': issue_labels}]
+        self.json_data = data
+        s_labels = "_".join(labels) if labels is not None else "all_labels"
+        filename = "{}_data.json_{}".format(state,s_labels)
+        logging.info("Writing json file..")
+        with open(filename, 'w') as write_file:
+            json.dump(data, write_file)
+        logging.info("{} json file is ready!".format(filename))
+        return filename
diff --git a/mxnet-bot/PredictLabels/Dockerfile b/mxnet-bot/PredictLabels/Dockerfile
new file mode 100644
index 0000000..16f9c31
--- /dev/null
+++ b/mxnet-bot/PredictLabels/Dockerfile
@@ -0,0 +1,26 @@
+FROM python:3.6.6
+
+# Update packages
+RUN apt-get update
+
+# Install prerequisite for matplotlib
+RUN apt-get -y install libxft-dev libfreetype6 libfreetype6-dev
+
+# Bundle app source
+COPY . /src
+
+EXPOSE 8000
+WORKDIR /src
+
+#install Python modules
+RUN pip install -r requirements.txt
+
+# Environment Variables
+ENV github_user your_github_id
+ENV github_oauth_token your_github_read_only_token
+ENV repo repo_name
+
+# Run it
+ENTRYPOINT ["python", "application.py"]
+
+
diff --git a/mxnet-bot/PredictLabels/Dockerrun.aws.json b/mxnet-bot/PredictLabels/Dockerrun.aws.json
new file mode 100755
index 0000000..73464c7
--- /dev/null
+++ b/mxnet-bot/PredictLabels/Dockerrun.aws.json
@@ -0,0 +1,7 @@
+{
+  "AWSEBDockerrunVersion": "1",
+  "Logging": "/tmp/sample-app",
+  "Image": {
+  	"Update": "false"
+  }
+}
diff --git a/mxnet-bot/PredictLabels/Predictor.py b/mxnet-bot/PredictLabels/Predictor.py
new file mode 100644
index 0000000..c2db5d2
--- /dev/null
+++ b/mxnet-bot/PredictLabels/Predictor.py
@@ -0,0 +1,132 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from sklearn.preprocessing import LabelEncoder
+from SentenceParser import SentenceParser
+from DataFetcher import DataFetcher
+import numpy as np
+import pickle
+import re
+import logging
+
+
+class Predictor:
+    # keywords will be used to apply rule-based algorithms
+    keywords = {"ci": ["ci", "ccache", "jenkins"],
+                "flaky": ["flaky"],
+                "gluon": ["gluon"],
+                "coda": ["cuda", "cudnn"],
+                "scala": ["scala"],
+                "mkldnn": ["mkldnn, mkl"],
+                "onnx": ["onnx"]}
+
+    def __init__(self):
+        """
+        Predictor serves to apply rule-based and ML algorithms to predict labels
+        """
+        self.tv = None
+        self.labels = None
+        self.clf = None
+        self.reload()
+
+    def reload(self):
+        """
+        This method is to load models
+        """
+        self.tv = pickle.load(open("/tmp/Vectorizer.p", "rb"))
+        self.labels = pickle.load(open("/tmp/Labels.p", "rb"))
+        self.clf = pickle.load(open("/tmp/Classifier.p", "rb"))
+
+    def tokenize(self, row):
+        """
+        This method is to tokenize a sentence into a list of words
+        Args:
+            row(string): a sentence
+        Return:
+            words(list): a list of words
+        """
+        row = re.sub('[^a-zA-Z0-9]', ' ', row).lower()
+        words = set(row.split())
+        return words
+
+    def rule_based(self, issues):
+        """
+        This method applies rule_based algorithms to predict labels
+        Args:
+            issues(list): a list of issue numbers
+        Return:
+            rule_based_predictions(list of lists): labels which satisfy rules
+        """
+        DF = DataFetcher()
+        df_test = DF.fetch_issues(issues)
+        rule_based_predictions = []
+        for i in range(len(issues)):
+            # extract every issue's title
+            row = df_test.loc[i, 'title']
+            # apply rule-based algorithms
+            single_issue_predictions = []
+            if "feature request" in row.lower(): single_issue_predictions.append("Feature")
+            if "c++" in row.lower(): single_issue_predictions.append("C++")
+            tokens = self.tokenize(row)
+            for k, v in self.keywords.items():
+                for keyword in v:
+                    if keyword in tokens:
+                        single_issue_predictions.append(k)
+            rule_based_predictions.append(single_issue_predictions)
+        return rule_based_predictions
+
+    def ml_predict(self, issues, threshold=0.3):
+        """
+        This method applies machine learning algorithms to predict labels
+        Args:
+            issues(list): a list of issue numbers
+            threshold(float): threshold of probability
+        Return:
+            ml_predictions(list of lists): predictions
+        """
+        # step1: fetch data
+        DF = DataFetcher()
+        df_test = DF.fetch_issues(issues)
+        # step2: data cleaning
+        SP = SentenceParser()
+        SP.data = df_test
+        SP.clean_body('body', True, True)
+        SP.merge_column(['title', 'title', 'title', 'body'], 'train')
+        test_text=SP.process_text('train', True, False, True)
+        # step3: word embedding
+        test_data_tfidf = self.tv.transform(test_text).toarray()
+        le = LabelEncoder()
+        le.fit_transform(self.labels)
+        # step4: classification
+        probs = self.clf.predict_proba(test_data_tfidf)
+        # pick up top 2 predictions which exceeds threshold
+        best_n = np.argsort(probs, axis=1)[:, -2:]
+        ml_predictions=[]
+        for i in range(len(best_n)):
+            # INFO:Predictor:issue:11919,Performance:0.47353076240017744,Question:0.2440056213336274
+            logging.info("issue:{}, {}:{}, {}:{}".format(str(issues[i]), str(le.classes_[best_n[i][-1]]), str(probs[i][best_n[i][-1]]),
+                        str(le.classes_[best_n[i][-2]]), str(probs[i][best_n[i][-2]])))
+            single_issue_predictions = [le.classes_[best_n[i][j]] for j in range(-1, -3, -1) if probs[i][best_n[i][j]] > threshold]
+            ml_predictions.append(single_issue_predictions)
+        return ml_predictions
+
+    def predict(self, issues):
+        # return predictions of both rule_base algorithms and machine learning methods
+        rule_based_predictions = self.rule_based(issues)
+        ml_predictions = self.ml_predict(issues)
+        predictions = [list(set(rule_based_predictions[i]+ml_predictions[i])) for i in range(len(ml_predictions))]
+        return predictions
diff --git a/mxnet-bot/PredictLabels/README.md b/mxnet-bot/PredictLabels/README.md
new file mode 100644
index 0000000..22f5537
--- /dev/null
+++ b/mxnet-bot/PredictLabels/README.md
@@ -0,0 +1,25 @@
+# Elastic Beanstalk Web Server
+
+A web server built on [AWS Elastic Beanstalk](https://aws.amazon.com/elasticbeanstalk/) which can response to GET/POST requests and realize self-maintenance. It mainly has 2 features:
+  * Train models: it will retrain Machine Learning models every 24 hours automatically using latest data.
+  * Predict labels: once it receives GET/POST requests with issues ID, it will send predictions back.
+
+## Set up
+*Make sure you are in current directory.*
+* Configure Dockerfile: In `Dockerfile`. Set environment variables (last 3 lines) with real `github_user`, `github_oauth_token`(READ only token) and `repo`.
+* Open terminal, run:
+```bash
+zip eb.zip application.py cron.yaml DataFetcher.py \
+Dockerfile Dockerrun.aws.json plot_piechart.py Predictor.py SentenceParser.py Trainer.py \
+requirements.txt stopwords.txt
+```
+It will zip all needed files into `eb.zip`
+* Manually create a new Elastic Beanstalk application.
+    1. Go to AWS Elastic Beanstalk console, click ***Create New Application***. Fill in *Application Name* and *Description*, click ***Create***.
+    2. Under ***Select environment tier***, select ***Web server environment***, click ***Select***.
+    3. Under **Base configuration**, select **Preconfigured platform**. In its dropdown, select **Docker**. Then select ***Upload your code***, upload `eb.zip`.
+    4. Click ***Configure more options***. Modify Intances, in the dropdown of Instance type, select t2.large. Click ***Create Environment*** (No need to select a security group, EB will create one.)
+    5. It will take about 2 minutes to setup the environment. 
+    6. Once the environment is setup, it will take 5-10 minutes to generate models. 
+    7. Write down URL. (ie: http://labelbot-env.pgc55xzpte.us-east-1.elasticbeanstalk.com)
+    
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/SentenceParser.py b/mxnet-bot/PredictLabels/SentenceParser.py
new file mode 100644
index 0000000..786d4a9
--- /dev/null
+++ b/mxnet-bot/PredictLabels/SentenceParser.py
@@ -0,0 +1,134 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script serves to do data cleaning
+from bs4 import BeautifulSoup
+import nltk
+# fix ssl certificate errors
+import ssl
+try:
+    _create_unverified_https_context = ssl._create_unverified_context
+except AttributeError:
+    pass
+else:
+    ssl._create_default_https_context = _create_unverified_https_context
+import os.path
+import pandas as pd
+import re
+import sys
+import logging
+
+
+class SentenceParser:
+
+    regex_str = [
+        r'<[^>]+>',                                                                     # HTML tags
+        r'(?:@[\w_]+)',                                                                 # @-mentions
+        r"(?:\#+[\w_]+[\w\'_\-]*[\w_]+)",                                               # hash-tags
+        r'http[s]?://(?:[a-z]|[0-9]|[$-_@.&amp;+]|[!*\(\),]|(?:%[0-9a-f][0-9a-f]))+',   # URLs
+        r'(?:(?:\d+,?)+(?:\.?\d+)?)',                                                   # numbers
+        r"(?:[a-z][a-z'\-_]+[a-z])",                                                    # words with - and '
+        r'(?:[\w_]+)',                                                                  # other words
+        r'(?:\S)'                                                                       # anything else
+    ]
+    # English Stopwords
+    with open('stopwords.txt') as file:
+        stopwords = file.read().split()
+    file.close()
+
+    def __init__(self):
+        """
+        SentenceParser serves to clean text content
+        """
+        self.data = None
+        # extract words stem
+        self.porter = nltk.PorterStemmer()
+        # a set of stopwords
+        self.stops = set(self.stopwords)
+
+    def read_file(self, filepath, filetype, encod='ISO-8859-1', header=None):
+        """
+        This method is to read csv/json/xlsx files
+        """
+        logging.info('Start reading File')
+        if not os.path.isfile(filepath):
+            logging.error("File Not Exist!")
+            sys.exit()
+        if filetype == 'csv':
+            df = pd.read_csv(filepath, encoding=encod, header=header)
+        elif filetype == 'json':
+            df = pd.read_json(filepath, encoding=encod, lines=False)
+        elif filetype == 'xlsx':
+            df = pd.read_excel(filepath, encoding=encod, header=header)
+        else:
+            logging.error("Extension Type not Accepted!")
+            sys.exit()
+
+        logging.debug(df)
+        self.data = df
+
+    def merge_column(self, columns, name):
+        """
+        This method is to merge columns of a pandas dataframe
+        """
+        logging.info('Merge headers %s to %s', str(columns), name)
+        self.data[name] = ''
+        for header in columns:
+            self.data[name] += ' ' + self.data[header]
+  
+    def clean_body(self, column, remove_template=True, remove_code=True):
+        """
+        This methods is to remove template and code from issue's body
+        """
+        logging.info("Start Removing Templates..")
+        for i in range(len(self.data)):
+            # remove 'Environment info' part
+            if remove_template and "## Environment info" in self.data[column][i]:
+                index = self.data.loc[i, column].find("## Environment info")
+                self.data.loc[i, column] = self.data.loc[i, column][:index]
+            # remove code
+            if remove_code and "```" in self.data[column][i]:
+                sample = self.data[column][i].split("```")
+                sample = [sample[i*2] for i in range(0, int((len(sample)+1)/2))]
+                self.data.loc[i, column] = " ".join(sample)
+
+    def process_text(self, column, remove_symbol=True, remove_stopwords=False, stemming=False):
+        """
+        This method is to remove symbols/remove stopwords/extract words stem
+        """
+        logging.info("Start Data Cleaning...")
+        # remove some symbols
+        self.data[column] = self.data[column].str.replace(r'[\n\r\t]+', ' ')
+        # remove URLs
+        self.data[column] = self.data[column].str.replace(self.regex_str[3], ' ')
+        tempcol = self.data[column].values.tolist()
+
+        for i in range(len(tempcol)):
+            row = BeautifulSoup(tempcol[i], 'html.parser').get_text().lower()
+            # remove symbols
+            if remove_symbol:
+                row = re.sub('[^a-zA-Z]', ' ', row)
+            words = row.split()
+            # remove stopwords
+            if remove_stopwords:
+                words = [w for w in words if w not in self.stops and not w.replace('.', '', 1).isdigit()]
+            # extract words stem
+            if stemming:
+                words = [self.porter.stem(w) for w in words] 
+            row = ' '.join(words)
+            tempcol[i] = row.lower()
+        return tempcol
diff --git a/mxnet-bot/PredictLabels/Trainer.py b/mxnet-bot/PredictLabels/Trainer.py
new file mode 100644
index 0000000..c465c6a
--- /dev/null
+++ b/mxnet-bot/PredictLabels/Trainer.py
@@ -0,0 +1,87 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script is served to train Machine Learning models
+from DataFetcher import DataFetcher
+from SentenceParser import SentenceParser
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.svm import SVC
+from sklearn.preprocessing import LabelEncoder
+import pickle
+import logging
+
+
+class Trainer:
+
+    def __init__(self):
+        """
+        Trainer is to train issues using Machine Learning methods.
+        self.labels(list): a list of target labels
+        self.tv: TFIDF model (trigram, max_features = 10000)
+        self.clf: Classifier (SVC, kenerl = 'rbf')
+        """
+        self.labels = ["Performance", "Test", "Question",
+                       "Feature request", "Call for contribution",
+                       "Feature", "Example", "Doc",
+                       "Installation", "Build", "Bug"]
+        self.tv = TfidfVectorizer(min_df=0.00009, ngram_range=(1, 3), max_features=10000)
+        self.clf = SVC(gamma=0.5, C=100, probability=True)
+
+    def train(self):
+        """
+        This method is to train and save models.
+        """
+        logging.info("Start training issues of general labels")
+        # Step1: Fetch issues with general labels
+        logging.info("Fetching Data..")
+        DF = DataFetcher()
+        filename = DF.data2json('all', self.labels, False)
+        # Step2: Clean data
+        logging.info("Cleaning Data..")
+        SP = SentenceParser()
+        SP.read_file(filename, 'json')
+        SP.clean_body('body', True, True)
+        SP.merge_column(['title', 'title', 'title', 'body'], 'train')
+        text = SP.process_text('train', True, False, True)
+        df = SP.data
+        # Step3: Word Embedding
+        logging.info("Word Embedding..")
+        # tv = TfidfVectorizer(min_df=0.00009, ngram_range=(1, 3), max_features=10000)
+        tv = self.tv
+        X = tv.fit_transform(text).toarray()
+        # Labels
+        labels = SP.data['labels']
+        le = LabelEncoder()
+        Y = le.fit_transform(labels)
+        # Step4: Train Classifier
+        # SVC, kernel = 'rbf'
+        logging.info("Training Data..")
+        # clf = SVC(gamma=0.5, C=100, probability=True)
+        clf = self.clf
+        clf.fit(X, Y)
+        # Step5: save models
+        logging.info("Saving Models..")
+        pickle.dump(tv, open("/tmp/Vectorizer.p", "wb"))
+        pickle.dump(clf, open("/tmp/Classifier.p", "wb"))
+        pickle.dump(labels, open("/tmp/Labels.p", "wb"))
+        logging.info("Completed!")
+        return
+
+
+
+
+
diff --git a/mxnet-bot/PredictLabels/application.py b/mxnet-bot/PredictLabels/application.py
new file mode 100644
index 0000000..1aec229
--- /dev/null
+++ b/mxnet-bot/PredictLabels/application.py
@@ -0,0 +1,119 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This is a web server built based on Flask framework and AWS Elastic Beanstalk service 
+# It will response to http GET/POST requests
+from flask import Flask, jsonify, request, send_file
+from apscheduler.schedulers.background import BackgroundScheduler
+from apscheduler.triggers.interval import IntervalTrigger
+from Predictor import Predictor
+from Trainer import Trainer
+import plot_piechart
+import timeit
+import atexit
+import logging
+import os.path
+
+logging.getLogger().setLevel(logging.INFO)
+
+application = Flask(__name__)
+
+if not os.path.exists('/tmp/Classifier.p'):
+    trainer = Trainer()
+    trainer.train()
+predictor = Predictor()
+
+
+# GET '/'
+@application.route('/')
+def index():
+    return "Hello!  -Bot"
+
+
+# GET '/issues/<issue>'
+# return predictions of an issue
+@application.route('/issues/<issue>')
+def get_prediction(issue):
+    l = predictor.predict([issue])
+    return " ".join(l[0])
+
+
+# POST '/predict'
+# return predictions of issues
+@application.route('/predict', methods=['POST'])
+def predict():
+    # get prediction results of multiple issues
+    # data would be a json file {"issues":[1,2,3]}
+    data = request.get_json()["issues"]
+    #predictions = predict_labels.predict(data)
+    predictions = predictor.predict(data)
+    response = []
+    for i in range(len(data)):
+        response.append({"number":data[i], "predictions":predictions[i]})
+    return jsonify(response)
+
+
+# POST '/draw'
+# return an image's binary code
+@application.route('/draw', methods=['POST'])
+def plot():
+    # requests.post(url,json={"fracs":[], "labels":[]})
+    data = request.get_json()
+    fracs = data["fracs"]
+    labels = data["labels"]
+    filename = plot_piechart.draw_pie(fracs, labels)
+    return send_file(filename, mimetype='image/png')
+
+
+# helper function
+def train_models():
+    start = timeit.default_timer()
+    trainer = Trainer()
+    trainer.train()
+    stop = timeit.default_timer()
+    # reload models
+    predictor.reload()
+    time = int(stop - start)
+    logging.info("Training completed! Time cost: {} min, {} seconds".format(str(int(time/60)), str(time%60)))
+    return 
+
+
+# Once the server is running, it will retrain ML models every 24 hours
+@application.before_first_request
+def initialize():
+    scheduler = BackgroundScheduler()
+    scheduler.start()
+    scheduler.add_job(
+        func=train_models,
+        trigger=IntervalTrigger(hours=24),
+        id='Training_Job',
+        name='Update models every 24 hours',
+        replace_existing=True)
+    # Shut down the scheduler when exiting the app
+    atexit.register(lambda: scheduler.shutdown())
+
+
+initialize()
+
+
+# run the app.
+if __name__ == "__main__":
+    # Set debug to True enables debug output.
+    # This 'application.debug = True' should be removed before deploying a production app.
+    application.debug = True
+    application.threaded = True
+    application.run('0.0.0.0', 8000)
diff --git a/mxnet-bot/PredictLabels/cron.yaml b/mxnet-bot/PredictLabels/cron.yaml
new file mode 100644
index 0000000..f47da88
--- /dev/null
+++ b/mxnet-bot/PredictLabels/cron.yaml
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+version: 1
+cron: 
+  - name: "task1"
+    url: "/scheduled"
+    schedule: "* * * * *"
diff --git a/mxnet-bot/PredictLabels/plot_piechart.py b/mxnet-bot/PredictLabels/plot_piechart.py
new file mode 100644
index 0000000..6f3cd20
--- /dev/null
+++ b/mxnet-bot/PredictLabels/plot_piechart.py
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime
+import matplotlib
+# set 'agg' as matplotlib backend
+matplotlib.use('agg', warn=False, force=True)
+from matplotlib import pyplot as plt
+import logging
+
+
+def make_autopct(values):
+    def my_autopct(pct):
+        total = sum(values)
+        val = int(round(pct * total / 100.0))
+        return '{p:.2f}% ({v:d})'.format(p=pct, v=val)
+
+    return my_autopct
+
+
+def draw_pie(fracs, labels):
+    """
+    This method is to plot the pie chart of labels, then save it into '/tmp/' folder
+    """
+    logging.info("Drawing the pie chart..")
+    fig = plt.figure()
+    plt.pie(fracs, labels=labels, autopct=make_autopct(fracs), shadow=True)
+    plt.title("Top 10 labels for newly opened issues")
+    figname = "piechart_{}_{}.png".format(str(datetime.datetime.today().date()),
+                                          str(datetime.datetime.today().time()))
+    fig.savefig("/tmp/{}".format(figname))
+    pic_path = "/tmp/{}".format(figname)
+    return pic_path
diff --git a/mxnet-bot/PredictLabels/requirements.txt b/mxnet-bot/PredictLabels/requirements.txt
new file mode 100644
index 0000000..c587d93
--- /dev/null
+++ b/mxnet-bot/PredictLabels/requirements.txt
@@ -0,0 +1,33 @@
+APScheduler==3.5.1
+beautifulsoup4==4.6.0
+boto3==1.7.59
+botocore==1.10.59
+bs4==0.0.1
+certifi==2018.4.16
+chardet==3.0.4
+click==6.7
+cycler==0.10.0
+DateTime==4.2
+docutils==0.14
+Flask==1.0.2
+idna==2.7
+itsdangerous==0.24
+Jinja2==2.10
+jmespath==0.9.3
+kiwisolver==1.0.1
+matplotlib==2.2.2
+MarkupSafe==1.0
+nltk==3.3
+numpy==1.14.5
+pandas==0.23.3
+pyparsing==2.2.0
+python-dateutil==2.7.3
+pytz==2018.5
+requests==2.19.1
+scikit-learn==0.19.2
+scipy==1.1.0
+six==1.11.0
+sklearn==0.0
+urllib3==1.23
+Werkzeug==0.14.1
+zope.interface==4.5.0
diff --git a/mxnet-bot/PredictLabels/stopwords.txt b/mxnet-bot/PredictLabels/stopwords.txt
new file mode 100644
index 0000000..c41ef55
--- /dev/null
+++ b/mxnet-bot/PredictLabels/stopwords.txt
@@ -0,0 +1 @@
+i me my myself we our ours ourselves you you're you've you'll you'd your yours yourself yourselves he him his himself she she's her hers herself it it's its itself they them their theirs themselves what which who whom this that that'll these those am is are was were be been being have has had having do does did doing a an the and but if or because as until while of at by for with about against between into through during before after above below to from up down in out on off over under again further then once here there when where why how all any both each few more most other some such no nor not only own same so than too very s t can will just don don't should should've now d ll m o re ve y ain aren aren't couldn couldn't didn didn't doesn doesn't hadn hadn't hasn hasn't haven haven't isn isn't ma mightn mightn't mustn mustn't needn needn't shan shan't shouldn shouldn't wasn wasn't weren weren't won won't wouldn wouldn't
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/test_datafetcher.py b/mxnet-bot/PredictLabels/test_datafetcher.py
new file mode 100644
index 0000000..588b000
--- /dev/null
+++ b/mxnet-bot/PredictLabels/test_datafetcher.py
@@ -0,0 +1,116 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import requests
+import boto3
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+from DataFetcher import DataFetcher
+import unittest
+import pandas as pd
+from DataFetcher import DataFetcher
+from pandas.util.testing import assert_frame_equal
+# some version issue
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+# test coverage: 93%
+class TestLabelBot(unittest.TestCase):
+
+    def setUp(self):
+        self.df = DataFetcher()
+        self.df.repo = "apache/incubator-mxnet"
+        self.df.github_user = "cathy"
+        self.df.github_oauth_token = "123"
+
+    def tearDown(self):
+        pass
+
+    def test_cleanstr(self):
+        new_string = self.df.cleanstr("a_b", "")
+        self.assertEqual(new_string, "ab")
+
+    def test_count_pages(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = [{ "body":"issue's body",
+                                                "created_at":"2018-07-28T18:27:17Z",
+                                                "comments":"0",
+                                                "number":11925,
+                                                "labels":[{'name':'Doc'}],
+                                                "state":"open",
+                                                "title":"issue's title",
+                                                "html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+                                              },
+                                              { "body":"issue's body",
+                                                "created_at":"2018-07-28T18:27:17Z",
+                                                "comments":"0",
+                                                "number":11924,
+                                                "labels":[],
+                                                "state":"closed",
+                                                "title":"issue's title",
+                                                "html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+                                              }]
+            page = self.df.count_pages('all')
+            self.assertEqual(page,1)
+
+    def test_fetch_issues(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = { "body":"issue's body",
+                                                "created_at":"2018-07-28T18:27:17Z",
+                                                "comments":"0",
+                                                "number":11925,
+                                                "labels":[{'name':'Feature'}],
+                                                "state":"open",
+                                                "title":"issue's title",
+                                                "html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+                                              }
+            data = self.df.fetch_issues([11925])
+            expected_data = [{'id':"11925", 'title':"issue's title",'body':"issue's body"}]
+            assert_frame_equal(data, pd.DataFrame(expected_data))
+
+    def test_data2json(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = [{ "body":"issue's body",
+                                                "created_at":"2018-07-28T18:27:17Z",
+                                                "comments":"0",
+                                                "number":11925,
+                                                "labels":[{'name':'Feature'}],
+                                                "state":"open",
+                                                "title":"issue's title",
+                                                "html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+                                              },
+                                              { "body":"issue's body",
+                                                "created_at":"2018-07-28T18:27:17Z",
+                                                "comments":"0",
+                                                "number":11924,
+                                                "labels":[],
+                                                "state":"closed",
+                                                "title":"issue's title",
+                                                "html_url":"https://github.com/apache/incubator-mxnet/issues/11925",
+                                              }]
+            self.df.data2json('all', labels=["Feature"], other_labels=False)
+            expected_data = [{'id': 11925, 'title': "issue's title", 'body': "issue's body", 'labels': 'Feature'}]
+            self.assertEqual(expected_data, self.df.json_data)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mxnet-bot/PredictLabels/test_predictor.py b/mxnet-bot/PredictLabels/test_predictor.py
new file mode 100644
index 0000000..3580da8
--- /dev/null
+++ b/mxnet-bot/PredictLabels/test_predictor.py
@@ -0,0 +1,95 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import requests
+import boto3
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+from DataFetcher import DataFetcher
+import unittest
+from Predictor import Predictor
+# some version issue
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+
+# test coverage: 100%
+class TestLabelBot(unittest.TestCase):
+
+    def setUp(self):
+        self.pr = Predictor()
+
+    def tearDown(self):
+        pass
+
+    def test_tokenize(self):
+        words = self.pr.tokenize("hello_world")
+        self.assertEqual(words, set(['hello','world']))
+
+    def test_rule_based(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = {
+                                                "body": "issue's body",
+                                                "created_at": "2018-07-28T18:27:17Z",
+                                                "comments": "0",
+                                                "number": 11925,
+                                                "labels": [{'name': 'Doc'}],
+                                                "state": "open",
+                                                "title": "a feature requests for scala package",
+                                                "html_url": "https://github.com/apache/incubator-mxnet/issues/11925",
+                                              }
+            predictions = self.pr.rule_based([11925])
+            self.assertEqual([['Feature','scala']], predictions)
+
+    def test_ml_predict(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = {
+                                                "body": "test",
+                                                "created_at": "2018-07-28T18:27:17Z",
+                                                "comments": "0",
+                                                "number": 11925,
+                                                "labels": [{'name': 'Doc'}],
+                                                "state": "open",
+                                                "title": "a feature requests for scala package",
+                                                "html_url": "https://github.com/apache/incubator-mxnet/issues/11925",
+                                              }
+            predictions = self.pr.ml_predict([11925])
+            self.assertEqual([['Feature']], predictions)
+
+    def test_predict(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = {
+                                                "body": "test",
+                                                "created_at": "2018-07-28T18:27:17Z",
+                                                "comments": "0",
+                                                "number": 11925,
+                                                "labels": [{'name': 'Doc'}],
+                                                "state": "open",
+                                                "title": "a feature requests for scala package",
+                                                "html_url": "https://github.com/apache/incubator-mxnet/issues/11925",
+                                              }
+            predictions = self.pr.predict([11925])
+            self.assertEqual([['Feature', 'scala']], predictions)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mxnet-bot/PredictLabels/test_sentenceparse.py b/mxnet-bot/PredictLabels/test_sentenceparse.py
new file mode 100644
index 0000000..a81b3c4
--- /dev/null
+++ b/mxnet-bot/PredictLabels/test_sentenceparse.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import requests
+import boto3
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+import unittest
+import pandas as pd
+from SentenceParser import SentenceParser
+from pandas.util.testing import assert_frame_equal
+# some version issue
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+# test coverage: 88%
+class TestSentenceParser(unittest.TestCase):
+
+    def setUp(self):
+        self.sp = SentenceParser()
+        self.sp.data = pd.DataFrame([{'id': 11925, 'title': "issue's title",
+                                      'body': " bug ``` import pandas``` ## Environment info",
+                                      'labels': ['Doc']}])
+
+    def test_read_file(self):
+        self.sp.read_file('all_data.json_Feature', 'json')
+        expected_data = [{'id': 11925, 'title': "issue's title", 'body': "issue's body", 'labels': ['Doc']},
+                         {'id': 11924, 'title': "issue's title", 'body': "issue's body", 'labels': []}]
+        assert_frame_equal(self.sp.data, pd.DataFrame(expected_data))
+
+    def test_merge_column(self):
+        self.sp.merge_column(['title', 'body'], 'train')
+        expected_data = [{'id': 11925, 'title': "issue's title", 'body': " bug ``` import pandas``` ## Environment info",
+                          'labels': ['Doc'],
+                          'train': " issue's title  bug ``` import pandas``` ## Environment info"}]
+        assert_frame_equal(self.sp.data, pd.DataFrame(expected_data))
+
+    def test_clean_body(self):
+        self.sp.clean_body('body', True, True)
+        expected_data = [{'id': 11925, 'title': "issue's title", 'body': " bug   ", 'labels': ['Doc']}]
+        assert_frame_equal(self.sp.data, pd.DataFrame(expected_data))
+
+    def test_process_text(self):
+        data = self.sp.process_text('body', True, True, True)
+        expected_data = ['bug import panda environ info']
+        self.assertEqual(data, expected_data)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mxnet-bot/PredictLabels/test_trainer.py b/mxnet-bot/PredictLabels/test_trainer.py
new file mode 100644
index 0000000..0abdace
--- /dev/null
+++ b/mxnet-bot/PredictLabels/test_trainer.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import unittest
+import requests
+import boto3
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+from DataFetcher import DataFetcher
+from Trainer import Trainer
+# some version issue
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+
+# test coverage: 100%
+class TestTrainer(unittest.TestCase):
+
+    def setUp(self):
+        self.trainer = Trainer()
+
+    def test_train(self):
+        with patch('DataFetcher.requests.get') as mocked_get:
+            mocked_get.return_value.status_code = 200
+            mocked_get.return_value.json.return_value = [{
+												"body": "I was looking at the mxnet.\
+                                                metric source code and documentation",
+                                                "created_at": "2018-07-28T18:27:17Z",
+                                                "comments": "0",
+                                                "number": 11925,
+                                                "labels": [{'name':'Doc'}],
+                                                "state": "open",
+                                                "title": "Confusion in documentation/implementation of F1, MCC metrics",
+                                                "html_url": "https://github.com/apache/incubator-mxnet/issues/11925",
+                                              },
+                                              { "body": "I train a CNN with python under mxnet gluon mys C++ code crash when i call MXPredsetInput.",
+                                                "created_at": "2018-07-28T18:27:17Z",
+                                                "comments": "0",
+                                                "number": 11924,
+                                                "labels": [{'name':'Bug'}],
+                                                "state": "closed",
+                                                "title": "Issue in exporting gluon model",
+                                                "html_url": "https://github.com/apache/incubator-mxnet/issues/11924",
+                                              }]
+            self.trainer.train()
+
+
+if __name__ == "__main__":
+    unittest.main()

From 9db9d5123ee14fc84ed7db0957337f4cd50c9f59 Mon Sep 17 00:00:00 2001
From: Yuelin Zhang <zyuelin@amazon.com>
Date: Wed, 8 Aug 2018 01:01:07 -0700
Subject: [PATCH 3/5] revise code

1. Use tempfile to store models instead of hard coding
2. Break lines
---
 .../PredictLabels/.idea/PredictLabels.iml     |  11 -
 mxnet-bot/PredictLabels/.idea/misc.xml        |   7 -
 mxnet-bot/PredictLabels/.idea/modules.xml     |   8 -
 mxnet-bot/PredictLabels/.idea/workspace.xml   | 212 ------------------
 mxnet-bot/PredictLabels/Predictor.py          |  16 +-
 mxnet-bot/PredictLabels/README.md             |   2 +-
 mxnet-bot/PredictLabels/Trainer.py            |  42 +++-
 mxnet-bot/PredictLabels/application.py        |  11 +-
 mxnet-bot/PredictLabels/test_predictor.py     |   7 +
 9 files changed, 54 insertions(+), 262 deletions(-)
 delete mode 100644 mxnet-bot/PredictLabels/.idea/PredictLabels.iml
 delete mode 100644 mxnet-bot/PredictLabels/.idea/misc.xml
 delete mode 100644 mxnet-bot/PredictLabels/.idea/modules.xml
 delete mode 100644 mxnet-bot/PredictLabels/.idea/workspace.xml

diff --git a/mxnet-bot/PredictLabels/.idea/PredictLabels.iml b/mxnet-bot/PredictLabels/.idea/PredictLabels.iml
deleted file mode 100644
index 6711606..0000000
--- a/mxnet-bot/PredictLabels/.idea/PredictLabels.iml
+++ /dev/null
@@ -1,11 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-  <component name="TestRunnerService">
-    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/.idea/misc.xml b/mxnet-bot/PredictLabels/.idea/misc.xml
deleted file mode 100644
index 7a5c067..0000000
--- a/mxnet-bot/PredictLabels/.idea/misc.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
-  <component name="PyCharmProfessionalAdvertiser">
-    <option name="shown" value="true" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/.idea/modules.xml b/mxnet-bot/PredictLabels/.idea/modules.xml
deleted file mode 100644
index 18fdcdd..0000000
--- a/mxnet-bot/PredictLabels/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/PredictLabels.iml" filepath="$PROJECT_DIR$/.idea/PredictLabels.iml" />
-    </modules>
-  </component>
-</project>
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/.idea/workspace.xml b/mxnet-bot/PredictLabels/.idea/workspace.xml
deleted file mode 100644
index 15f95e5..0000000
--- a/mxnet-bot/PredictLabels/.idea/workspace.xml
+++ /dev/null
@@ -1,212 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ChangeListManager">
-    <list default="true" id="028f7063-2c5d-4a6b-be24-591f81619277" name="Default" comment="" />
-    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
-    <option name="TRACKING_ENABLED" value="true" />
-    <option name="SHOW_DIALOG" value="false" />
-    <option name="HIGHLIGHT_CONFLICTS" value="true" />
-    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
-    <option name="LAST_RESOLUTION" value="IGNORE" />
-  </component>
-  <component name="FileEditorManager">
-    <leaf>
-      <file leaf-file-name="application.py" pinned="false" current-in-tab="true">
-        <entry file="file://$PROJECT_DIR$/application.py">
-          <provider selected="true" editor-type-id="text-editor">
-            <state relative-caret-position="243">
-              <caret line="28" column="14" lean-forward="true" selection-start-line="28" selection-start-column="14" selection-end-line="28" selection-end-column="14" />
-              <folding>
-                <element signature="e#920#972#0" expanded="true" />
-              </folding>
-            </state>
-          </provider>
-        </entry>
-      </file>
-    </leaf>
-  </component>
-  <component name="IdeDocumentHistory">
-    <option name="CHANGED_PATHS">
-      <list>
-        <option value="$PROJECT_DIR$/DataFetcher.py" />
-        <option value="$PROJECT_DIR$/plot_piechart.py" />
-        <option value="$PROJECT_DIR$/Predictor.py" />
-        <option value="$PROJECT_DIR$/application.py" />
-        <option value="$PROJECT_DIR$/test_trainer.py" />
-        <option value="$PROJECT_DIR$/test_datafetcher.py" />
-        <option value="$PROJECT_DIR$/test_predictor.py" />
-        <option value="$PROJECT_DIR$/test_sentenceparse.py" />
-        <option value="$PROJECT_DIR$/Trainer.py" />
-        <option value="$PROJECT_DIR$/SentenceParser.py" />
-      </list>
-    </option>
-  </component>
-  <component name="ProjectFrameBounds">
-    <option name="y" value="23" />
-    <option name="width" value="1024" />
-    <option name="height" value="548" />
-  </component>
-  <component name="ProjectView">
-    <navigator proportions="" version="1">
-      <foldersAlwaysOnTop value="true" />
-    </navigator>
-    <panes>
-      <pane id="ProjectPane">
-        <subPane>
-          <expand>
-            <path>
-              <item name="PredictLabels" type="b2602c69:ProjectViewProjectNode" />
-              <item name="PredictLabels" type="462c0819:PsiDirectoryNode" />
-            </path>
-            <path>
-              <item name="PredictLabels" type="b2602c69:ProjectViewProjectNode" />
-              <item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
-            </path>
-          </expand>
-          <select />
-        </subPane>
-      </pane>
-      <pane id="Course" />
-      <pane id="Scope" />
-    </panes>
-  </component>
-  <component name="PropertiesComponent">
-    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
-    <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
-  </component>
-  <component name="RunDashboard">
-    <option name="ruleStates">
-      <list>
-        <RuleState>
-          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
-        </RuleState>
-        <RuleState>
-          <option name="name" value="StatusDashboardGroupingRule" />
-        </RuleState>
-      </list>
-    </option>
-  </component>
-  <component name="SvnConfiguration">
-    <configuration />
-  </component>
-  <component name="TaskManager">
-    <task active="true" id="Default" summary="Default task">
-      <changelist id="028f7063-2c5d-4a6b-be24-591f81619277" name="Default" comment="" />
-      <created>1533604709424</created>
-      <option name="number" value="Default" />
-      <option name="presentableId" value="Default" />
-      <updated>1533604709424</updated>
-    </task>
-    <servers />
-  </component>
-  <component name="ToolWindowManager">
-    <frame x="0" y="23" width="1024" height="548" extended-state="0" />
-    <editor active="true" />
-    <layout>
-      <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.25" />
-      <window_info anchor="bottom" id="TODO" order="6" />
-      <window_info anchor="bottom" id="Event Log" side_tool="true" />
-      <window_info anchor="bottom" id="Run" order="2" />
-      <window_info anchor="bottom" id="Version Control" show_stripe_button="false" />
-      <window_info anchor="bottom" id="Python Console" />
-      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
-      <window_info anchor="bottom" id="Terminal" />
-      <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
-      <window_info id="Favorites" side_tool="true" />
-      <window_info anchor="bottom" id="Find" order="1" />
-      <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
-      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
-      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
-      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
-      <window_info anchor="bottom" id="Message" order="0" />
-      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
-    </layout>
-  </component>
-  <component name="VcsContentAnnotationSettings">
-    <option name="myLimit" value="2678400000" />
-  </component>
-  <component name="editorHistoryManager">
-    <entry file="file://$PROJECT_DIR$/test_datafetcher.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="203">
-          <caret line="25" column="50" lean-forward="true" selection-start-line="25" selection-start-column="50" selection-end-line="25" selection-end-column="50" />
-          <folding>
-            <element signature="e#786#801#0" expanded="true" />
-          </folding>
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/Trainer.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="230">
-          <caret line="21" column="27" selection-start-line="21" selection-start-column="27" selection-end-line="21" selection-end-column="27" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/test_trainer.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="194">
-          <caret line="21" column="17" selection-start-line="21" selection-start-column="17" selection-end-line="21" selection-end-column="17" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/test_predictor.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="231">
-          <caret line="22" column="15" selection-start-line="22" selection-start-column="15" selection-end-line="22" selection-end-column="15" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/DataFetcher.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="1760">
-          <caret line="123" selection-start-line="123" selection-end-line="123" />
-          <folding>
-            <element signature="e#849#886#0" expanded="true" />
-          </folding>
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/test_sentenceparse.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="311">
-          <caret line="22" column="19" lean-forward="true" selection-start-line="22" selection-start-column="19" selection-end-line="22" selection-end-column="19" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/Predictor.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="133">
-          <caret line="17" column="46" lean-forward="true" selection-start-line="17" selection-start-column="46" selection-end-line="17" selection-end-column="46" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/plot_piechart.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="630">
-          <caret line="42" column="38" selection-start-line="42" selection-start-column="38" selection-end-line="42" selection-end-column="38" />
-          <folding>
-            <element signature="e#786#801#0" expanded="true" />
-          </folding>
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/SentenceParser.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="115">
-          <caret line="18" column="29" selection-start-line="18" selection-start-column="29" selection-end-line="18" selection-end-column="29" />
-        </state>
-      </provider>
-    </entry>
-    <entry file="file://$PROJECT_DIR$/application.py">
-      <provider selected="true" editor-type-id="text-editor">
-        <state relative-caret-position="243">
-          <caret line="28" column="14" lean-forward="true" selection-start-line="28" selection-start-column="14" selection-end-line="28" selection-end-column="14" />
-          <folding>
-            <element signature="e#920#972#0" expanded="true" />
-          </folding>
-        </state>
-      </provider>
-    </entry>
-  </component>
-</project>
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/Predictor.py b/mxnet-bot/PredictLabels/Predictor.py
index c2db5d2..9b27fab 100644
--- a/mxnet-bot/PredictLabels/Predictor.py
+++ b/mxnet-bot/PredictLabels/Predictor.py
@@ -22,6 +22,7 @@
 import pickle
 import re
 import logging
+import os
 
 
 class Predictor:
@@ -41,15 +42,14 @@ def __init__(self):
         self.tv = None
         self.labels = None
         self.clf = None
-        self.reload()
 
-    def reload(self):
+    def reload(self, tv_file, clf_file, labels_file):
         """
         This method is to load models
         """
-        self.tv = pickle.load(open("/tmp/Vectorizer.p", "rb"))
-        self.labels = pickle.load(open("/tmp/Labels.p", "rb"))
-        self.clf = pickle.load(open("/tmp/Classifier.p", "rb"))
+        self.tv = pickle.load(open(tv_file, "rb"))
+        self.clf = pickle.load(open(clf_file, "rb"))
+        self.labels = pickle.load(open(labels_file, "rb"))
 
     def tokenize(self, row):
         """
@@ -79,8 +79,10 @@ def rule_based(self, issues):
             row = df_test.loc[i, 'title']
             # apply rule-based algorithms
             single_issue_predictions = []
-            if "feature request" in row.lower(): single_issue_predictions.append("Feature")
-            if "c++" in row.lower(): single_issue_predictions.append("C++")
+            if "feature request" in row.lower():
+                single_issue_predictions.append("Feature")
+            if "c++" in row.lower():
+                single_issue_predictions.append("C++")
             tokens = self.tokenize(row)
             for k, v in self.keywords.items():
                 for keyword in v:
diff --git a/mxnet-bot/PredictLabels/README.md b/mxnet-bot/PredictLabels/README.md
index 22f5537..e2a63a1 100644
--- a/mxnet-bot/PredictLabels/README.md
+++ b/mxnet-bot/PredictLabels/README.md
@@ -19,7 +19,7 @@ It will zip all needed files into `eb.zip`
     2. Under ***Select environment tier***, select ***Web server environment***, click ***Select***.
     3. Under **Base configuration**, select **Preconfigured platform**. In its dropdown, select **Docker**. Then select ***Upload your code***, upload `eb.zip`.
     4. Click ***Configure more options***. Modify Intances, in the dropdown of Instance type, select t2.large. Click ***Create Environment*** (No need to select a security group, EB will create one.)
-    5. It will take about 2 minutes to setup the environment. 
+    5. It will take about 10 minutes to setup the environment. 
     6. Once the environment is setup, it will take 5-10 minutes to generate models. 
     7. Write down URL. (ie: http://labelbot-env.pgc55xzpte.us-east-1.elasticbeanstalk.com)
     
\ No newline at end of file
diff --git a/mxnet-bot/PredictLabels/Trainer.py b/mxnet-bot/PredictLabels/Trainer.py
index c465c6a..66f7558 100644
--- a/mxnet-bot/PredictLabels/Trainer.py
+++ b/mxnet-bot/PredictLabels/Trainer.py
@@ -21,29 +21,48 @@
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.svm import SVC
 from sklearn.preprocessing import LabelEncoder
+import tempfile
 import pickle
 import logging
 
 
 class Trainer:
+    # target labels that we are interested in
+    labels = ["Performance", "Test", "Question",
+               "Feature request", "Call for contribution",
+               "Feature", "Example", "Doc",
+               "Installation", "Build", "Bug"]
 
-    def __init__(self):
+    def __init__(self, 
+                 tv=TfidfVectorizer(min_df=0.00009, ngram_range=(1, 3), max_features=10000), 
+                 clf=SVC(gamma=0.5, C=100, probability=True),
+                 tmp_tv_file=tempfile.NamedTemporaryFile(),
+                 tmp_clf_file=tempfile.NamedTemporaryFile(),
+                 tmp_labels_file=tempfile.NamedTemporaryFile()):
         """
         Trainer is to train issues using Machine Learning methods.
         self.labels(list): a list of target labels
         self.tv: TFIDF model (trigram, max_features = 10000)
         self.clf: Classifier (SVC, kenerl = 'rbf')
+        self.tmp_tv_file: tempfile to store Vectorizer
+        self.tmp_clf_file: tempfile to store Classifier
+        self.tmp_labels_file: tempfile to store Labels
         """
-        self.labels = ["Performance", "Test", "Question",
-                       "Feature request", "Call for contribution",
-                       "Feature", "Example", "Doc",
-                       "Installation", "Build", "Bug"]
-        self.tv = TfidfVectorizer(min_df=0.00009, ngram_range=(1, 3), max_features=10000)
-        self.clf = SVC(gamma=0.5, C=100, probability=True)
+        self.tv = tv
+        self.clf = clf
+        self.tmp_tv_file = tmp_tv_file
+        self.tmp_clf_file = tmp_clf_file
+        self.tmp_labels_file = tmp_labels_file
 
     def train(self):
         """
         This method is to train and save models.
+        It has 5 steps:
+        1. Fetch issues
+        2. Clean data
+        3. Word embedding
+        4. Train models
+        5. Save models
         """
         logging.info("Start training issues of general labels")
         # Step1: Fetch issues with general labels
@@ -75,11 +94,12 @@ def train(self):
         clf.fit(X, Y)
         # Step5: save models
         logging.info("Saving Models..")
-        pickle.dump(tv, open("/tmp/Vectorizer.p", "wb"))
-        pickle.dump(clf, open("/tmp/Classifier.p", "wb"))
-        pickle.dump(labels, open("/tmp/Labels.p", "wb"))
+        pickle.dump(tv, open(self.tmp_tv_file.name, 'wb'))
+        pickle.dump(clf, open(self.tmp_clf_file.name, 'wb'))
+        pickle.dump(labels, open(self.tmp_labels_file.name, 'wb'))
+        tmp_files = {"tv_file":self.tmp_tv_file.name, 'clf_file':self.tmp_clf_file.name, 'labels_file':self.tmp_labels_file.name}
         logging.info("Completed!")
-        return
+        return tmp_files
 
 
 
diff --git a/mxnet-bot/PredictLabels/application.py b/mxnet-bot/PredictLabels/application.py
index 1aec229..88df384 100644
--- a/mxnet-bot/PredictLabels/application.py
+++ b/mxnet-bot/PredictLabels/application.py
@@ -32,9 +32,6 @@
 
 application = Flask(__name__)
 
-if not os.path.exists('/tmp/Classifier.p'):
-    trainer = Trainer()
-    trainer.train()
 predictor = Predictor()
 
 
@@ -83,10 +80,12 @@ def plot():
 def train_models():
     start = timeit.default_timer()
     trainer = Trainer()
-    trainer.train()
+    tmp_files = trainer.train()
     stop = timeit.default_timer()
     # reload models
-    predictor.reload()
+    predictor.reload(tv_file=tmp_files['tv_file'], 
+                     clf_file=tmp_files['clf_file'],
+                     labels_file=tmp_files['labels_file'])
     time = int(stop - start)
     logging.info("Training completed! Time cost: {} min, {} seconds".format(str(int(time/60)), str(time%60)))
     return 
@@ -106,6 +105,8 @@ def initialize():
     # Shut down the scheduler when exiting the app
     atexit.register(lambda: scheduler.shutdown())
 
+# train initial models
+train_models()
 
 initialize()
 
diff --git a/mxnet-bot/PredictLabels/test_predictor.py b/mxnet-bot/PredictLabels/test_predictor.py
index 3580da8..938a1cb 100644
--- a/mxnet-bot/PredictLabels/test_predictor.py
+++ b/mxnet-bot/PredictLabels/test_predictor.py
@@ -56,9 +56,13 @@ def test_rule_based(self):
                                                 "html_url": "https://github.com/apache/incubator-mxnet/issues/11925",
                                               }
             predictions = self.pr.rule_based([11925])
+            print(predictions)
             self.assertEqual([['Feature','scala']], predictions)
 
     def test_ml_predict(self):
+        self.pr.reload(tv_file='Vectorizer.p', 
+                       clf_file='Classifier.p',
+                       labels_file='Labels.p')
         with patch('DataFetcher.requests.get') as mocked_get:
             mocked_get.return_value.status_code = 200
             mocked_get.return_value.json.return_value = {
@@ -75,6 +79,9 @@ def test_ml_predict(self):
             self.assertEqual([['Feature']], predictions)
 
     def test_predict(self):
+        self.pr.reload(tv_file='Vectorizer.p', 
+                       clf_file='Classifier.p',
+                       labels_file='Labels.p')
         with patch('DataFetcher.requests.get') as mocked_get:
             mocked_get.return_value.status_code = 200
             mocked_get.return_value.json.return_value = {

From 02021329867a260584e12f875b7bb325368437e2 Mon Sep 17 00:00:00 2001
From: Yuelin Zhang <zyuelin@amazon.com>
Date: Wed, 8 Aug 2018 12:34:56 -0700
Subject: [PATCH 4/5] revise code

1. Put all of them in a tempfile.TemporaryDirectory
2. Use with statement to open files
---
 mxnet-bot/PredictLabels/Predictor.py   | 12 ++++++++----
 mxnet-bot/PredictLabels/Trainer.py     | 23 +++++++++++------------
 mxnet-bot/PredictLabels/application.py |  6 ++----
 3 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/mxnet-bot/PredictLabels/Predictor.py b/mxnet-bot/PredictLabels/Predictor.py
index 84436a2..e1da3ea 100644
--- a/mxnet-bot/PredictLabels/Predictor.py
+++ b/mxnet-bot/PredictLabels/Predictor.py
@@ -22,6 +22,7 @@
 import pickle
 import re
 import logging
+import os
 
 
 class Predictor:
@@ -42,13 +43,16 @@ def __init__(self):
         self.labels = None
         self.clf = None
 
-    def reload(self, tv_file, clf_file, labels_file):
+    def reload(self, tmp_dir):
         """
         This method is to load models
         """
-        self.tv = pickle.load(open(tv_file, "rb"))
-        self.clf = pickle.load(open(clf_file, "rb"))
-        self.labels = pickle.load(open(labels_file, "rb"))
+        with open(os.path.join(tmp_dir.name,'Vectorizer.p'), "rb") as tv:
+            self.tv = pickle.load(tv)
+        with open(os.path.join(tmp_dir.name,'Classifier.p'), "rb") as clf:
+            self.clf = pickle.load(clf)
+        with open(os.path.join(tmp_dir.name,'Labels.p'), "rb") as labels:
+            self.labels = pickle.load(labels)
 
     def tokenize(self, row):
         """
diff --git a/mxnet-bot/PredictLabels/Trainer.py b/mxnet-bot/PredictLabels/Trainer.py
index 87fc4bb..839709d 100644
--- a/mxnet-bot/PredictLabels/Trainer.py
+++ b/mxnet-bot/PredictLabels/Trainer.py
@@ -24,6 +24,7 @@
 import tempfile
 import pickle
 import logging
+import os
 
 
 class Trainer:
@@ -36,9 +37,8 @@ class Trainer:
     def __init__(self, 
                  tv=TfidfVectorizer(min_df=0.00009, ngram_range=(1, 3), max_features=10000), 
                  clf=SVC(gamma=0.5, C=100, probability=True),
-                 tmp_tv_file=tempfile.NamedTemporaryFile(),
-                 tmp_clf_file=tempfile.NamedTemporaryFile(),
-                 tmp_labels_file=tempfile.NamedTemporaryFile()):
+                 tmp_dir = tempfile.TemporaryDirectory()
+                 ):
         """
         Trainer is to train issues using Machine Learning methods.
         self.labels(list): a list of target labels
@@ -50,10 +50,7 @@ def __init__(self,
         """
         self.tv = tv
         self.clf = clf
-        self.tmp_tv_file = tmp_tv_file
-        self.tmp_clf_file = tmp_clf_file
-        self.tmp_labels_file = tmp_labels_file
-
+        self.tmp_dir = tmp_dir
 
     def train(self):
         """
@@ -95,9 +92,11 @@ def train(self):
         clf.fit(X, Y)
         # Step5: save models
         logging.info("Saving Models..")
-        pickle.dump(tv, open(self.tmp_tv_file.name, 'wb'))
-        pickle.dump(clf, open(self.tmp_clf_file.name, 'wb'))
-        pickle.dump(labels, open(self.tmp_labels_file.name, 'wb'))
-        tmp_files = {"tv_file": self.tmp_tv_file.name, 'clf_file': self.tmp_clf_file.name, 'labels_file': self.tmp_labels_file.name}
+        with open(os.path.join(self.tmp_dir.name,'Vectorizer.p'), 'wb') as tv_file:
+            pickle.dump(tv, tv_file)
+        with open(os.path.join(self.tmp_dir.name,'Classifier.p'), 'wb') as clf_file:
+            pickle.dump(clf, clf_file)
+        with open(os.path.join(self.tmp_dir.name,'Labels.p'), 'wb') as labels_file:
+            pickle.dump(labels, labels_file)
         logging.info("Completed!")
-        return tmp_files
+        return self.tmp_dir
diff --git a/mxnet-bot/PredictLabels/application.py b/mxnet-bot/PredictLabels/application.py
index 0c8e557..1103a69 100644
--- a/mxnet-bot/PredictLabels/application.py
+++ b/mxnet-bot/PredictLabels/application.py
@@ -80,12 +80,10 @@ def plot():
 def train_models():
     start = timeit.default_timer()
     trainer = Trainer()
-    tmp_files = trainer.train()
+    tmp_dir = trainer.train()
     stop = timeit.default_timer()
     # reload models
-    predictor.reload(tv_file=tmp_files['tv_file'], 
-                     clf_file=tmp_files['clf_file'],
-                     labels_file=tmp_files['labels_file'])
+    predictor.reload(tmp_dir=tmp_dir)
     time = int(stop - start)
     logging.info("Training completed! Time cost: {} min, {} seconds".format(str(int(time/60)), str(time % 60)))
     return 

From fa72a714e15babad3deb90490355a7778ef8e3e4 Mon Sep 17 00:00:00 2001
From: Yuelin Zhang <zyuelin@amazon.com>
Date: Thu, 9 Aug 2018 16:32:17 -0700
Subject: [PATCH 5/5] correct typos

correct typos
---
 mxnet-bot/PredictLabels/Predictor.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mxnet-bot/PredictLabels/Predictor.py b/mxnet-bot/PredictLabels/Predictor.py
index e1da3ea..56a1627 100644
--- a/mxnet-bot/PredictLabels/Predictor.py
+++ b/mxnet-bot/PredictLabels/Predictor.py
@@ -27,13 +27,13 @@
 
 class Predictor:
     # keywords will be used to apply rule-based algorithms
-    keywords = {"ci": ["ci", "ccache", "jenkins"],
-                "flaky": ["flaky"],
-                "gluon": ["gluon"],
-                "coda": ["cuda", "cudnn"],
-                "scala": ["scala"],
+    keywords = {"CI": ["ci", "ccache", "jenkins"],
+                "Flaky": ["flaky"],
+                "Gluon": ["gluon"],
+                "Cuda": ["cuda", "cudnn"],
+                "Scala": ["scala"],
                 "mkldnn": ["mkldnn, mkl"],
-                "onnx": ["onnx"]}
+                "ONNX": ["onnx"]}
 
     def __init__(self):
         """