From e7c6b059b3cca113b38d4a693562e58eb95daa73 Mon Sep 17 00:00:00 2001 From: Daniel Swanson Date: Wed, 20 Jul 2022 21:54:08 -0500 Subject: [PATCH 1/2] allow :deprel as annotation type --- src/midas_loop/server/nlp/common.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/midas_loop/server/nlp/common.clj b/src/midas_loop/server/nlp/common.clj index 199e8a2..133a31b 100644 --- a/src/midas_loop/server/nlp/common.clj +++ b/src/midas_loop/server/nlp/common.clj @@ -112,6 +112,6 @@ tx)) (defn write-probas [node key token-probas-pairs] - (when-not (#{:sentence/probas :xpos/probas :upos/probas :head/probas} key) + (when-not (#{:sentence/probas :xpos/probas :upos/probas :head/probas :deprel/probas} key) (throw (ex-info "Invalid probas key:" {:key key}))) (-write-probas node key token-probas-pairs)) From ae6e8e38db44ee8917a47a9f9b36f883ad675f4e Mon Sep 17 00:00:00 2001 From: Daniel Swanson Date: Sun, 7 Aug 2022 16:02:24 -0500 Subject: [PATCH 2/2] more :deprel stuff --- src/midas_loop/routes/conllu/document.clj | 14 +++++++++++--- src/midas_loop/server/nlp/common.clj | 2 +- src/midas_loop/xtdb/queries/document.clj | 15 ++++++++++++--- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/midas_loop/routes/conllu/document.clj b/src/midas_loop/routes/conllu/document.clj index 15cbf35..2108309 100644 --- a/src/midas_loop/routes/conllu/document.clj +++ b/src/midas_loop/routes/conllu/document.clj @@ -20,12 +20,16 @@ "upos-gold-dec" '[?ugr :desc] "head-gold-inc" '[?hgr :asc] "head-gold-dec" '[?hgr :desc] + "deprel-gold-inc" '[?dgr :asc] + "deprel-gold-dec" '[?dgr :desc] "xpos-mean-top-proba-inc" '[?xmtp :asc] "xpos-mean-top-proba-dec" '[?xmtp :desc] "upos-mean-top-proba-inc" '[?umtp :asc] "upos-mean-top-proba-dec" '[?umtp :desc] "head-mean-top-proba-inc" '[?hmtp :asc] - "head-mean-top-proba-dec" '[?hmtp :desc]}) + "head-mean-top-proba-dec" '[?hmtp :desc] + "deprel-mean-top-proba-inc" '[?dmtp :asc] + "deprel-mean-top-proba-dec" '[?dmtp :desc]}) (defn document-query [{:keys [node] :as req}] (let [{:keys [limit offset order-by]} (-> req :parameters :query)] @@ -41,7 +45,7 @@ (bad-request (str "order-by parameter must be one of the following: " sort-set)) :else - (let [query {:find '[(distinct ?d) ?id ?dn ?tc ?sc ?xgr ?ugr ?hgr ?xmtp ?umtp ?hmtp ?smtp] + (let [query {:find '[(distinct ?d) ?id ?dn ?tc ?sc ?xgr ?ugr ?hgr ?dgr ?xmtp ?umtp ?hmtp ?dmtp ?smtp] :where '[[?d :document/id ?id] [?d :document/name ?dn] [?d :document/sentences ?s] @@ -50,9 +54,11 @@ [?d :document/*xpos-gold-rate ?xgr] [?d :document/*upos-gold-rate ?ugr] [?d :document/*head-gold-rate ?hgr] + [?d :document/*deprel-gold-rate ?dgr] [?d :document/*xpos-mean-top-proba ?xmtp] [?d :document/*upos-mean-top-proba ?umtp] [?d :document/*head-mean-top-proba ?hmtp] + [?d :document/*deprel-mean-top-proba ?dmtp] [?d :document/*sentence-mean-top-proba ?smtp]] :order-by [(sort-map order-by)] :limit limit @@ -60,7 +66,7 @@ count-query {:find '[(count ?d)] :where '[[?d :document/id]]} result (xt/q (xt/db node) query)] - (ok {:docs (mapv (fn [[_ id name tcount scount xgr ugr hgr xmtp umtp hmtp smtp :as vals]] + (ok {:docs (mapv (fn [[_ id name tcount scount xgr ugr hgr dgr xmtp umtp hmtp dmtp smtp :as vals]] {:id id :name name :sentence_count scount @@ -68,9 +74,11 @@ :xpos_gold_rate xgr :upos_gold_rate ugr :head_gold_rate hgr + :deprel_gold_rate dgr :xpos_mean_top_proba (if (= -1 xmtp) nil xmtp) :upos_mean_top_proba (if (= -1 umtp) nil umtp) :head_mean_top_proba (if (= -1 hmtp) nil hmtp) + :deprel_mean_top_proba (if (= -1 dmtp) nil dmtp) :sentence_mean_top_proba (if (= -1 smtp) nil smtp)}) result) :total (ffirst (xt/q (xt/db node) count-query))})))))) diff --git a/src/midas_loop/server/nlp/common.clj b/src/midas_loop/server/nlp/common.clj index 133a31b..2949f30 100644 --- a/src/midas_loop/server/nlp/common.clj +++ b/src/midas_loop/server/nlp/common.clj @@ -37,7 +37,7 @@ (predict-prob-dists [this node sentence])) (s/def ::url valid-url?) -(s/def ::anno-type #{:sentence :xpos :upos :head-deprel}) +(s/def ::anno-type #{:sentence :xpos :upos :head-deprel :deprel}) (s/def ::type #{:http}) (s/def ::http-config (s/keys :req-un [::url ::type])) ;; Maybe extend with other methods in the future diff --git a/src/midas_loop/xtdb/queries/document.clj b/src/midas_loop/xtdb/queries/document.clj index 7b109bc..b5517e3 100644 --- a/src/midas_loop/xtdb/queries/document.clj +++ b/src/midas_loop/xtdb/queries/document.clj @@ -101,19 +101,28 @@ [?head :head/quality "gold"]] :in [?d]} ?d) - ?head-gold]] + ?head-gold] + [(q {:find [?deprel] + :where [[?d :document/sentences ?s] + [?s :sentence/tokens ?t] + [?t :token/deprel ?deprel] + [?deprel :deprel/quality "gold"]] + :in [?d]} + ?d)]] :in '[?id]}] (let [res (xt/q (xt/db node) query document-id) - [scount tcount xgr ugr hgr] (first res) + [scount tcount xgr ugr hgr dgr] (first res) stats {:document/*sentence-count scount :document/*token-count tcount :document/*xpos-gold-rate (/ xgr tcount) :document/*upos-gold-rate (/ ugr tcount) :document/*head-gold-rate (/ hgr tcount) + :document/*deprel-gold-rate (/ dgr tcount) :document/*xpos-mean-top-proba (calculate-probas-stats node document-id "xpos") :document/*upos-mean-top-proba (calculate-probas-stats node document-id "upos") :document/*head-mean-top-proba (calculate-probas-stats node document-id "head") + :document/*deprel-mean-top-proba (calculate-probas-stats node document-id "deprel") :document/*sentence-mean-top-proba (calculate-sentence-probas-stats node document-id)}] (when-not (= 1 (count res)) (throw (ex-info "ID produced a result set that did not have exactly one member!" {:document-id document-id}))) - (cxe/put node (merge (cxe/entity node document-id) stats))))) \ No newline at end of file + (cxe/put node (merge (cxe/entity node document-id) stats)))))