From aecb32e1bc872f66ac7d043482583c10f70aee2d Mon Sep 17 00:00:00 2001 From: Louis MARTIN Date: Fri, 18 Jan 2019 02:35:17 -0800 Subject: [PATCH] Fix error in formula for deletion computation In order to compute the correct deletions "delgramcountergood_rep" we need to take the intersection between deletions in predictions and deletions in references, however the code computed the difference between the deletions in predictions and the counts in references (not even the deletions in references). --- SARI.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/SARI.py b/SARI.py index fc07ca9..d2f627b 100644 --- a/SARI.py +++ b/SARI.py @@ -70,8 +70,8 @@ def SARIngram(sgrams, cgrams, rgramslist, numref): # DELETION delgramcounter_rep = sgramcounter_rep - cgramcounter_rep - delgramcountergood_rep = delgramcounter_rep - rgramcounter delgramcounterall_rep = sgramcounter_rep - rgramcounter + delgramcountergood_rep = delgramcounter_rep & rgramcounterall_rep deltmpscore1 = 0 deltmpscore2 = 0 for delgram in delgramcountergood_rep: @@ -82,9 +82,11 @@ def SARIngram(sgrams, cgrams, rgramslist, numref): delscore_precision = deltmpscore1 / len(delgramcounter_rep) delscore_recall = 0 if len(delgramcounterall_rep) > 0: - delscore_recall = deltmpscore1 / len(delgramcounterall_rep) + # Deletions recall is not actually used for SARI (only deletions precision) + delscore_recall = deltmpscore2 / len(delgramcounterall_rep) delscore = 0 if delscore_precision > 0 or delscore_recall > 0: + # This F1 is not actually used for SARI (only deletions precision) delscore = 2 * delscore_precision * delscore_recall / (delscore_precision + delscore_recall)