theodore-ando · WardLT · Sep 19, 2018 · Sep 21, 2018 · Sep 21, 2018 · Sep 26, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,14 @@
+language: python
+python:
+- "3.6"
+cache: pip
+install:
+- pip install --upgrade pip
+- pip install -e .
+- pip install -r requirements.txt -U --upgrade-strategy eager
+- pip install -r test-requirements.txt
+script:
+- flake8 .
+- travis_wait 50 nosetests --with-coverage --cover-package=active_learning
+after_success:
+- coveralls
diff --git a/README.md b/README.md
@@ -1,23 +1,23 @@
 # Active Learning for Python
+[![Build Status](https://travis-ci.org/globus-labs/active-learning.svg?branch=master)](https://travis-ci.org/globus-labs/active-learning)
+[![Coverage Status](https://coveralls.io/repos/github/globus-labs/active-learning/badge.svg?branch=master)](https://coveralls.io/github/globus-labs/active-learning?branch=master)
 
-This is a toolkit for active learning in python designed to be used in conjunction 
-with scikit-learn models.  Its structure comes from Roman Garnett's active learning [toolbox for Matlab](https://github.com/rmgarnett/active_learning).
+Toolkit for active learning in Python designed to be used in conjunction with scikit-learn models.
 
 ## Installation
 
-You can install by cloning with `git clone https://github.com/theodore-ando/active-learning` followed
+You can install by cloning with `git clone https://github.com/globus-labs/active-learning` followed
 by `pip install -e ./active-learning`
 
-## Basic usage
+## Usage
 
-See [example.ipynb](example.ipynb) for the basic usage of the API and a simple comparison of some query strategies.
-[example_live.ipynb](example_live.ipynb) shows how easy it is to integrate a real person into the labeling loop. 
+Examples and tutorials TBD.
 
-## Advanced Usage
+## See Also 
 
-In the works.
+Roman Garnett's active learning [toolbox for Matlab](https://github.com/rmgarnett/active_learning).
 
-# License
+## License
 
 Copyright 2018 Theodore Ando
 
@@ -31,4 +31,4 @@ Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
-limitations under the License.
+limitations under the License.
diff --git a/active_learning/__init__.py b/active_learning/__init__.py
@@ -1,6 +0,0 @@
-from . import query_strats
-from . import scoring
-from . import selectors
-from . import utils
-
-name = "active-learning"

diff --git a/active_learning/active_learning.py b/active_learning/active_learning.py
diff --git a/active_learning/objective.py b/active_learning/objective.py
@@ -0,0 +1,32 @@
+"""Objective functions used in defining an active learning problem"""
+
+from typing import List
+import numpy as np
+
+
+class ObjectiveFunction:
+    """Class that generates objective function scores for regression functions"""
+
+    def score(self, y: List, y_uncert: List = None) -> List[float]:
+        """Generate the objective function score
+
+        Args:
+            y (list): Values of a class for many entries
+            y_uncert (list): Any kind of uncertainty values
+        Returns:
+            ([float]): Scores where minimal values are preferred
+        """
+        raise NotImplementedError
+
+
+class Maximize(ObjectiveFunction):
+    """Find the maximum scalar value"""
+
+    def score(self, y: List, y_uncert: List = None) -> List[float]:
+        return np.multiply(y, -1)
+
+
+class Minimize(ObjectiveFunction):
+
+    def score(self, y: List, y_uncert: List = None) -> List[float]:
+        return y
diff --git a/active_learning/problem.py b/active_learning/problem.py
@@ -0,0 +1,103 @@
+"""Classes and methods related to defining an active learning problem"""
+
+from .objective import ObjectiveFunction, Minimize
+from typing import List, Tuple, Union
+import numpy as np
+
+
+class ActiveLearningProblem:
+    """Class for defining an active learning problem.
+
+    The main point in defining an active learning problem is to define the total search space,
+    which points in this space have already been labeled, and what those labels are.
+
+    Optionally, you can define the budget of how many points are left to label.
+    """
+
+    def __init__(self, points, labeled_ixs: List[int], labels,
+                 budget=None, target_label=1, objective_fun: ObjectiveFunction = Minimize()):
+        """Set up the active learning problem
+
+        Args:
+            points (ndarray): Coordinates of all points in the search space
+            labeled_ixs ([int]): Indices of points that have been labeled
+            labels (ndarray): Labels for the labeled points, in same order as labeled_ixs
+            budget (int): How many entries are budgeted to be labeled (default: all of them)
+            target_label (int): Index the desired class, used in classification problems
+            objective_fun (ObjectiveFunction): Objective function, used in regression problems
+        """
+
+        # TODO: Add batch size and support for grouping points together -lw
+        self.points = points
+        self.labeled_ixs = labeled_ixs
+        self.labels = list(labels)
+        self.target_label = target_label
+        self.objective_fun = objective_fun
+
+        # Set the budget
+        self.budget = budget
+        if budget is None:
+            self.budget = len(points) - len(labeled_ixs)
+
+    @classmethod
+    def from_labeled_and_unlabled(cls, labeled_points, labels, unlabeled_points, **kwargs):
+        """Construct an active learning problem from labeled and unlabled points
+
+        Args:
+            labeled_points (ndarray): Coordinates of points with labels
+            labels (ndarray): Labels of those points
+            unlabeled_points (ndarray): Points that could possibly be labeled
+        """
+
+        points = np.vstack((labeled_points, unlabeled_points))
+        labeled_ixs = list(range(len(labeled_points)))
+
+        return cls(points, labeled_ixs, labels, **kwargs)
+
+    def get_unlabeled_ixs(self) -> List[int]:
+        """Get a list of the unlabeled indices
+
+        Returns:
+            ([int]) Unlabeled indices
+        """
+        return list(
+            set(range(len(self.points))).difference(self.labeled_ixs)
+        )
+
+    def get_labeled_ixs(self) -> List[int]:
+        """Get a list of the labeled indices
+
+        Returns:
+            ([int]): Labeled indices
+        """
+        return list(self.labeled_ixs)
+
+    def add_label(self, ind: int, label: float):
+        """Add a label to the labeled set
+
+        Args:
+            ind (int): Index of point to label
+            label (float): Label of that point
+        """
+
+        if ind in self.labeled_ixs:
+            raise AttributeError('Index already included in labeled set')
+        self.labeled_ixs.append(ind)
+        self.labels.append(label)
+
+    def get_labeled_points(self) -> Tuple[np.ndarray, List[Union[float, int]]]:
+        """Get the labeled points and their labels
+
+        Returns:
+            - (ndarray): Coordinates of all points with labels
+            - (ndarray): Labels for all labeled points
+        """
+        return self.points[self.labeled_ixs], self.labels
+
+    def get_unlabeled_points(self) -> np.ndarray:
+        """Get the coordinates of all unlabeled points
+
+        Returns:
+            (list) Coordinates of all unlabeled points
+        """
+        return self.points[self.get_unlabeled_ixs()]
diff --git a/active_learning/query_strats/__init__.py b/active_learning/query_strats/__init__.py
@@ -1,13 +1,5 @@
+"""General active learning querying strategies"""
 
-from .argmax import argmax
+from .random_sampling import RandomQuery
 
-from .active_search import active_search
-from .batch_active_search import seq_sim_batch
-from .mcal_regression import mcal_regression
-from .random_sampling import random_sampling
-from .rfr_balanced import rfr_balanced
-from .greedy import greedy
-from .greedy_regression import greedy_regression
-from .rfr_variance import rfr_variance
-from .three_ds import three_ds
-from .uncertainty_sampling import uncertainty_sampling
+__all__ = ['RandomQuery']