Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions py_stringmatching/tests/test_sim_Affine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# coding=utf-8

from __future__ import unicode_literals

import math
import unittest

from nose.tools import *
from py_stringmatching.similarity_measure.affine import Affine

class AffineTestCases(unittest.TestCase):
def setUp(self):
self.affine = Affine()
self.affine_with_params1 = Affine(gap_start=2, gap_continuation=0.5)
self.sim_func = lambda s1, s2: (int(1 if s1 == s2 else 0))
self.affine_with_params2 = Affine(gap_continuation=0.2, sim_func=self.sim_func)

def test_valid_input(self):
self.assertAlmostEqual(self.affine.get_raw_score('dva', 'deeva'), 1.5)
self.assertAlmostEqual(self.affine_with_params1.get_raw_score('dva', 'deeve'), -0.5)
self.assertAlmostEqual(self.affine_with_params2.get_raw_score('AAAGAATTCA', 'AAATCA'),
4.4)
self.assertAlmostEqual(self.affine_with_params2.get_raw_score(' ', ' '), 1)
self.assertEqual(self.affine.get_raw_score('', 'deeva'), 0)

def test_valid_input_non_ascii(self):
self.assertAlmostEqual(self.affine.get_raw_score(u'dva', u'dáóva'), 1.5)
self.assertAlmostEqual(self.affine.get_raw_score('dva', 'dáóva'), 1.5)
self.assertAlmostEqual(self.affine.get_raw_score('dva', b'd\xc3\xa1\xc3\xb3va'), 1.5)

def test_get_gap_start(self):
self.assertEqual(self.affine_with_params1.get_gap_start(), 2)

def test_get_gap_continuation(self):
self.assertEqual(self.affine_with_params2.get_gap_continuation(), 0.2)

def test_get_sim_func(self):
self.assertEqual(self.affine_with_params2.get_sim_func(), self.sim_func)

def test_set_gap_start(self):
af = Affine(gap_start=1)
self.assertEqual(af.get_gap_start(), 1)
self.assertAlmostEqual(af.get_raw_score('dva', 'deeva'), 1.5)
self.assertEqual(af.set_gap_start(2), True)
self.assertEqual(af.get_gap_start(), 2)
self.assertAlmostEqual(af.get_raw_score('dva', 'deeva'), 0.5)

def test_set_gap_continuation(self):
af = Affine(gap_continuation=0.3)
self.assertEqual(af.get_gap_continuation(), 0.3)
self.assertAlmostEqual(af.get_raw_score('dva', 'deeva'), 1.7)
self.assertEqual(af.set_gap_continuation(0.7), True)
self.assertEqual(af.get_gap_continuation(), 0.7)
self.assertAlmostEqual(af.get_raw_score('dva', 'deeva'), 1.3)

def test_set_sim_func(self):
fn1 = lambda s1, s2: (int(1 if s1 == s2 else 0))
fn2 = lambda s1, s2: (int(2 if s1 == s2 else -1))
af = Affine(sim_func=fn1)
self.assertEqual(af.get_sim_func(), fn1)
self.assertAlmostEqual(af.get_raw_score('dva', 'deeva'), 1.5)
self.assertEqual(af.set_sim_func(fn2), True)
self.assertEqual(af.get_sim_func(), fn2)
self.assertAlmostEqual(af.get_raw_score('dva', 'deeva'), 4.5)

@raises(TypeError)
def test_invalid_input1_raw_score(self):
self.affine.get_raw_score(None, 'MARHTA')

@raises(TypeError)
def test_invalid_input2_raw_score(self):
self.affine.get_raw_score('MARHTA', None)

@raises(TypeError)
def test_invalid_input3_raw_score(self):
self.affine.get_raw_score('MARHTA', 12.90)

@raises(TypeError)
def test_invalid_input4_raw_score(self):
self.affine.get_raw_score(12.90, 'MARTHA')

@raises(TypeError)
def test_invalid_input5_raw_score(self):
self.affine.get_raw_score(None, None)

@raises(TypeError)
def test_invalid_input6_raw_score(self):
self.affine.get_raw_score(12.90, 12.90)
115 changes: 115 additions & 0 deletions py_stringmatching/tests/test_sim_BagDistance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# coding=utf-8

from __future__ import unicode_literals

import math
import unittest

from nose.tools import *
from py_stringmatching.similarity_measure.bag_distance import BagDistance

class BagDistanceTestCases(unittest.TestCase):
def setUp(self):
self.bd = BagDistance()

def test_valid_input_raw_score(self):
self.assertEqual(self.bd.get_raw_score('a', ''), 1)
self.assertEqual(self.bd.get_raw_score('', 'a'), 1)
self.assertEqual(self.bd.get_raw_score('abc', ''), 3)
self.assertEqual(self.bd.get_raw_score('', 'abc'), 3)
self.assertEqual(self.bd.get_raw_score('', ''), 0)
self.assertEqual(self.bd.get_raw_score('a', 'a'), 0)
self.assertEqual(self.bd.get_raw_score('abc', 'abc'), 0)
self.assertEqual(self.bd.get_raw_score('a', 'ab'), 1)
self.assertEqual(self.bd.get_raw_score('b', 'ab'), 1)
self.assertEqual(self.bd.get_raw_score('ac', 'abc'), 1)
self.assertEqual(self.bd.get_raw_score('abcdefg', 'xabxcdxxefxgx'), 6)
self.assertEqual(self.bd.get_raw_score('ab', 'a'), 1)
self.assertEqual(self.bd.get_raw_score('ab', 'b'), 1)
self.assertEqual(self.bd.get_raw_score('abc', 'ac'), 1)
self.assertEqual(self.bd.get_raw_score('xabxcdxxefxgx', 'abcdefg'), 6)
self.assertEqual(self.bd.get_raw_score('a', 'b'), 1)
self.assertEqual(self.bd.get_raw_score('ab', 'ac'), 1)
self.assertEqual(self.bd.get_raw_score('ac', 'bc'), 1)
self.assertEqual(self.bd.get_raw_score('abc', 'axc'), 1)
self.assertEqual(self.bd.get_raw_score('xabxcdxxefxgx', '1ab2cd34ef5g6'), 6)
self.assertEqual(self.bd.get_raw_score('example', 'samples'), 2)
self.assertEqual(self.bd.get_raw_score('sturgeon', 'urgently'), 2)
self.assertEqual(self.bd.get_raw_score('bag_distance', 'frankenstein'), 6)
self.assertEqual(self.bd.get_raw_score('distance', 'difference'), 5)
self.assertEqual(self.bd.get_raw_score('java was neat', 'scala is great'), 6)

def test_valid_input_sim_score(self):
self.assertEqual(self.bd.get_sim_score('a', ''), 0.0)
self.assertEqual(self.bd.get_sim_score('', 'a'), 0.0)
self.assertEqual(self.bd.get_sim_score('abc', ''), 0.0)
self.assertEqual(self.bd.get_sim_score('', 'abc'), 0.0)
self.assertEqual(self.bd.get_sim_score('', ''), 1.0)
self.assertEqual(self.bd.get_sim_score('a', 'a'), 1.0)
self.assertEqual(self.bd.get_sim_score('abc', 'abc'), 1.0)
self.assertEqual(self.bd.get_sim_score('a', 'ab'), 1.0 - (1.0/2.0))
self.assertEqual(self.bd.get_sim_score('b', 'ab'), 1.0 - (1.0/2.0))
self.assertEqual(self.bd.get_sim_score('ac', 'abc'), 1.0 - (1.0/3.0))
self.assertEqual(self.bd.get_sim_score('abcdefg', 'xabxcdxxefxgx'), 1.0 - (6.0/13.0))
self.assertEqual(self.bd.get_sim_score('ab', 'a'), 1.0 - (1.0/2.0))
self.assertEqual(self.bd.get_sim_score('ab', 'b'), 1.0 - (1.0/2.0))
self.assertEqual(self.bd.get_sim_score('abc', 'ac'), 1.0 - (1.0/3.0))
self.assertEqual(self.bd.get_sim_score('xabxcdxxefxgx', 'abcdefg'), 1.0 - (6.0/13.0))
self.assertEqual(self.bd.get_sim_score('a', 'b'), 0.0)
self.assertEqual(self.bd.get_sim_score('ab', 'ac'), 1.0 - (1.0/2.0))
self.assertEqual(self.bd.get_sim_score('ac', 'bc'), 1.0 - (1.0/2.0))
self.assertEqual(self.bd.get_sim_score('abc', 'axc'), 1.0 - (1.0/3.0))
self.assertEqual(self.bd.get_sim_score('xabxcdxxefxgx', '1ab2cd34ef5g6'), 1.0 - (6.0/13.0))
self.assertEqual(self.bd.get_sim_score('example', 'samples'), 1.0 - (2.0/7.0))
self.assertEqual(self.bd.get_sim_score('sturgeon', 'urgently'), 1.0 - (2.0/8.0))
self.assertEqual(self.bd.get_sim_score('bag_distance', 'frankenstein'), 1.0 - (6.0/12.0))
self.assertEqual(self.bd.get_sim_score('distance', 'difference'), 1.0 - (5.0/10.0))
self.assertEqual(self.bd.get_sim_score('java was neat', 'scala is great'), 1.0 - (6.0/14.0))

@raises(TypeError)
def test_invalid_input1_raw_score(self):
self.bd.get_raw_score('a', None)

@raises(TypeError)
def test_invalid_input2_raw_score(self):
self.bd.get_raw_score(None, 'b')

@raises(TypeError)
def test_invalid_input3_raw_score(self):
self.bd.get_raw_score(None, None)

@raises(TypeError)
def test_invalid_input4_raw_score(self):
self.bd.get_raw_score('MARHTA', 12.90)

@raises(TypeError)
def test_invalid_input5_raw_score(self):
self.bd.get_raw_score(12.90, 'MARTHA')

@raises(TypeError)
def test_invalid_input6_raw_score(self):
self.bd.get_raw_score(12.90, 12.90)

@raises(TypeError)
def test_invalid_input1_sim_score(self):
self.bd.get_sim_score('a', None)

@raises(TypeError)
def test_invalid_input2_sim_score(self):
self.bd.get_sim_score(None, 'b')

@raises(TypeError)
def test_invalid_input3_sim_score(self):
self.bd.get_sim_score(None, None)

@raises(TypeError)
def test_invalid_input4_sim_score(self):
self.bd.get_sim_score('MARHTA', 12.90)

@raises(TypeError)
def test_invalid_input5_sim_score(self):
self.bd.get_sim_score(12.90, 'MARTHA')

@raises(TypeError)
def test_invalid_input6_sim_score(self):
self.bd.get_sim_score(12.90, 12.90)
98 changes: 98 additions & 0 deletions py_stringmatching/tests/test_sim_Cosine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# coding=utf-8

from __future__ import unicode_literals

import math
import unittest

from nose.tools import *

from py_stringmatching.similarity_measure.cosine import Cosine

class CosineTestCases(unittest.TestCase):
def setUp(self):
self.cos = Cosine()

def test_valid_input_raw_score(self):
self.assertEqual(self.cos.get_raw_score(['data', 'science'], ['data']), 1.0 / (math.sqrt(2) * math.sqrt(1)))
self.assertEqual(self.cos.get_raw_score(['data', 'science'], ['science', 'good']),
1.0 / (math.sqrt(2) * math.sqrt(2)))
self.assertEqual(self.cos.get_raw_score([], ['data']), 0.0)
self.assertEqual(self.cos.get_raw_score(['data', 'data', 'science'], ['data', 'management']),
1.0 / (math.sqrt(2) * math.sqrt(2)))
self.assertEqual(self.cos.get_raw_score(['data', 'management'], ['data', 'data', 'science']),
1.0 / (math.sqrt(2) * math.sqrt(2)))
self.assertEqual(self.cos.get_raw_score([], []), 1.0)
self.assertEqual(self.cos.get_raw_score(set([]), set([])), 1.0)
self.assertEqual(self.cos.get_raw_score({1, 1, 2, 3, 4}, {2, 3, 4, 5, 6, 7, 7, 8}),
3.0 / (math.sqrt(4) * math.sqrt(7)))

def test_valid_input_sim_score(self):
self.assertEqual(self.cos.get_sim_score(['data', 'science'], ['data']), 1.0 / (math.sqrt(2) * math.sqrt(1)))
self.assertEqual(self.cos.get_sim_score(['data', 'science'], ['science', 'good']),
1.0 / (math.sqrt(2) * math.sqrt(2)))
self.assertEqual(self.cos.get_sim_score([], ['data']), 0.0)
self.assertEqual(self.cos.get_sim_score(['data', 'data', 'science'], ['data', 'management']),
1.0 / (math.sqrt(2) * math.sqrt(2)))
self.assertEqual(self.cos.get_sim_score(['data', 'management'], ['data', 'data', 'science']),
1.0 / (math.sqrt(2) * math.sqrt(2)))
self.assertEqual(self.cos.get_sim_score([], []), 1.0)
self.assertEqual(self.cos.get_sim_score(set([]), set([])), 1.0)
self.assertEqual(self.cos.get_sim_score({1, 1, 2, 3, 4}, {2, 3, 4, 5, 6, 7, 7, 8}),
3.0 / (math.sqrt(4) * math.sqrt(7)))

@raises(TypeError)
def test_invalid_input1_raw_score(self):
self.cos.get_raw_score(1, 1)

@raises(TypeError)
def test_invalid_input4_raw_score(self):
self.cos.get_raw_score(['a'], None)

@raises(TypeError)
def test_invalid_input2_raw_score(self):
self.cos.get_raw_score(None, ['b'])

@raises(TypeError)
def test_invalid_input3_raw_score(self):
self.cos.get_raw_score(None, None)

@raises(TypeError)
def test_invalid_input5_raw_score(self):
self.cos.get_raw_score(['MARHTA'], 'MARTHA')

@raises(TypeError)
def test_invalid_input6_raw_score(self):
self.cos.get_raw_score('MARHTA', ['MARTHA'])

@raises(TypeError)
def test_invalid_input7_raw_score(self):
self.cos.get_raw_score('MARTHA', 'MARTHA')

@raises(TypeError)
def test_invalid_input1_sim_score(self):
self.cos.get_sim_score(1, 1)

@raises(TypeError)
def test_invalid_input4_sim_score(self):
self.cos.get_sim_score(['a'], None)

@raises(TypeError)
def test_invalid_input2_sim_score(self):
self.cos.get_sim_score(None, ['b'])

@raises(TypeError)
def test_invalid_input3_sim_score(self):
self.cos.get_sim_score(None, None)

@raises(TypeError)
def test_invalid_input5_sim_score(self):
self.cos.get_sim_score(['MARHTA'], 'MARTHA')

@raises(TypeError)
def test_invalid_input6_sim_score(self):
self.cos.get_sim_score('MARHTA', ['MARTHA'])

@raises(TypeError)
def test_invalid_input7_sim_score(self):
self.cos.get_sim_score('MARTHA', 'MARTHA')
Loading