From 7350a2737e4950146c59926a566e4801ba826710 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 8 Aug 2025 00:32:40 +0100 Subject: [PATCH 01/44] Renewed reporting --- test2text/pages/report.py | 273 +++++++++++++++++++++++++------------- 1 file changed, 181 insertions(+), 92 deletions(-) diff --git a/test2text/pages/report.py b/test2text/pages/report.py index a9eaecb..e61eb9a 100644 --- a/test2text/pages/report.py +++ b/test2text/pages/report.py @@ -1,100 +1,189 @@ from itertools import groupby - +import numpy as np import streamlit as st -from test2text.services.db import DbClient - -def add_new_line(summary): - return summary.replace("\n", "
") +from test2text.services.db import DbClient +from test2text.services.utils import unpack_float32 +from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_vectors_2d + def make_a_report(): - st.header("Test2Text Report") - - db = DbClient("./private/requirements.db") - - st.subheader("Table of Contents") - - data = db.conn.execute(""" - SELECT - Requirements.id as req_id, - Requirements.external_id as req_external_id, - Requirements.summary as req_summary, - - Annotations.id as anno_id, - Annotations.summary as anno_summary, - - AnnotationsToRequirements.cached_distance as distance, - - TestCases.id as case_id, - TestCases.test_script as test_script, - TestCases.test_case as test_case - FROM - Requirements - JOIN AnnotationsToRequirements ON Requirements.id = AnnotationsToRequirements.requirement_id - JOIN Annotations ON Annotations.id = AnnotationsToRequirements.annotation_id - JOIN CasesToAnnos ON Annotations.id = CasesToAnnos.annotation_id - JOIN TestCases ON TestCases.id = CasesToAnnos.case_id - ORDER BY - Requirements.id, AnnotationsToRequirements.cached_distance, TestCases.id - """) - - current_annotations = {} - current_test_scripts = set() - - def write_requirement(req_id, req_external_id, req_summary, - current_annotations: set[tuple], current_test_scripts: set): - if req_id is None and req_external_id is None: - return False - - with st.expander(f"#{req_id} Requirement {req_external_id}"): - st.subheader(f"Requirement {req_external_id}") - st.html(f"

{add_new_line(req_summary)}

") - st.subheader("Annotations") - anno, summary, dist = st.columns(3) - with anno: - st.write("Annonation's id") - with summary: - st.write("Summary") - with dist: - st.write("Distance") - for anno_id, anno_summary, distance in current_annotations: - anno, summary, dist = st.columns(3) - with anno: - st.write(f"{anno_id}") - with summary: - st.html( - f"{add_new_line(anno_summary)}" - ) - with dist: - st.write(round(distance, 2)) - - st.subheader("Test Scripts") - for test_script in current_test_scripts: - st.markdown(f"- {test_script}") - - progress_bar = st.empty() - rows = data.fetchall() - if not rows: - st.error("There is no data to inspect.\nPlease upload annotations.") - return None - max_progress = len(rows) - index = 0 - for (req_id, req_external_id, req_summary), group in groupby(rows, lambda x: x[0:3]): - current_annotations = set() - current_test_scripts = set() - index += 1 - for _, _, _, anno_id, anno_summary, distance, case_id, test_script, test_case in group: - current_annotations.add((anno_id, anno_summary, distance)) - current_test_scripts.add(test_script) - write_requirement(req_id=req_id, req_external_id=req_external_id, req_summary=req_summary, - current_annotations=current_annotations, current_test_scripts=current_test_scripts) - - - progress_bar.progress(round(index*100/max_progress), text="Processing...") - progress_bar.empty() - db.conn.close() - + st.header("Test2Text Report") + + + db = DbClient("./private/requirements.db") + + + def write_annotations(current_annotations: set[tuple]): + anno, summary, dist = st.columns(3) + with anno: + st.write("Annonation's id") + with summary: + st.write("Summary") + with dist: + st.write("Distance") + for anno_id, anno_summary, _, distance in current_annotations: + anno, summary, dist = st.columns(3) + with anno: + st.write(f"{anno_id}") + with summary: + st.write(anno_summary) + with dist: + st.write(round(distance, 2)) + + with st.container(border=True): + st.subheader("Filter requirements") + with st.expander("🔍 Filters"): + r_id, summary, embed = st.columns(3) + with r_id: + filter_id = st.text_input("ID", value="", key="filter_id") + st.info("Filter by external ID") + with summary: + filter_summary = st.text_input("Text content", value="", key="filter_summary") + st.info("Search concrete phrases using SQL like expressions") + with embed: + filter_embedding = st.text_input("Smart rearch", value="", key="filter_embedding") + st.info("Search using embeddings") + + where_clauses = [] + params = [] + + if filter_id.strip(): + where_clauses.append("Requirements.id = ?") + params.append(filter_id.strip()) + + if filter_summary.strip(): + where_clauses.append("Requirements.summary LIKE ?") + params.append(f"%{filter_summary.strip()}%") + + # TODO embeddings фильтр не реализован + if filter_embedding.strip(): + st.info("Фильтрация по embeddings не реализована в демо. Используйте другие фильтры.") + + where_sql = "" + if where_clauses: + where_sql = f"WHERE {' AND '.join(where_clauses)}" + + + with st.container(border=True): + st.session_state.update({"req_form_submitting": True}) + sql = f""" + SELECT + Requirements.id as req_id, + Requirements.external_id as req_external_id, + Requirements.summary as req_summary + FROM + Requirements + {where_sql} + ORDER BY + Requirements.id + """ + data = db.conn.execute(sql, params) + + requirements_dict = {f"#{req_id} Requirement {req_external_id}": req_id for (req_id, req_external_id, _) in data.fetchall()} + st.subheader("Choose 1 of filtered requirements") + option = st.selectbox( + "Choose a requirement to work with", + requirements_dict.keys(), + key="filter_req_id" + ) + + if option: + clause = "Requirements.id = ?" + if clause in where_clauses: + idx = where_clauses.index(clause) + params.insert(idx, requirements_dict[option]) + else: + where_clauses.append(clause) + params.append(requirements_dict[option]) + where_sql = "" + if where_clauses: + where_sql = f"WHERE {' AND '.join(where_clauses)}" + + sql = f""" + SELECT + Requirements.id as req_id, + Requirements.external_id as req_external_id, + Requirements.summary as req_summary, + Requirements.embedding as req_embedding, + + Annotations.id as anno_id, + Annotations.summary as anno_summary, + Annotations.embedding as anno_embedding, + + AnnotationsToRequirements.cached_distance as distance, + + TestCases.id as case_id, + TestCases.test_script as test_script, + TestCases.test_case as test_case + FROM + Requirements + JOIN AnnotationsToRequirements ON Requirements.id = AnnotationsToRequirements.requirement_id + JOIN Annotations ON Annotations.id = AnnotationsToRequirements.annotation_id + JOIN CasesToAnnos ON Annotations.id = CasesToAnnos.annotation_id + JOIN TestCases ON TestCases.id = CasesToAnnos.case_id + {where_sql} + ORDER BY + Requirements.id, AnnotationsToRequirements.cached_distance, TestCases.id + """ + + rows = data.fetchall() + if not rows: + st.error("There is no data to inspect.\n" + "Please upload annotations and requirements.") + return None + + st.subheader("Filter Test cases") + + with st.expander("🔍 Filters"): + radius, limit = st.columns(2) + with radius: + filter_radius = st.number_input("Insert a radius", key="filter_radius") + st.info("Max distance to annotation") + with limit: + filter_limit = st.text_input("Limit", value="", key="filter_limit") + st.info("Limit of selected test cases") + + for (req_id, req_external_id, req_summary, req_embedding), group in groupby(rows, lambda x: x[0:4]): + with st.container(border=True): + st.subheader(f" Inspect Requirement {req_external_id}") + st.write(req_summary) + current_test_cases = dict() + for _, _, _, _, anno_id, anno_summary, anno_embedding, distance, case_id, test_script, test_case in group: + current_annotation = current_test_cases.get(test_case, set()) + current_test_cases.update({test_case: current_annotation}) + current_test_cases[test_case].add((anno_id, anno_summary, anno_embedding, distance)) + + t_cs, anno, viz = st.columns(3) + with t_cs: + with st.container(border=True): + st.write("Test Cases") + st.markdown(""" + + """, unsafe_allow_html=True) + st.radio("", current_test_cases.keys(), key="radio_choice") + if st.session_state["radio_choice"]: + with anno: + with st.container(border=True): + st.write("Annotations") + write_annotations(current_annotations=current_test_cases[st.session_state["radio_choice"]]) + + with viz: + with st.container(border=True): + pass + #req_dot = np.array(unpack_float32(req_embedding)) TODO + #plot_vectors_2d(minifold_vectors_2d(np.array([req_dot])), "Requirements") + + db.conn.close() + + if __name__ == "__main__": - make_a_report() + make_a_report() \ No newline at end of file From e040979081268a8fef1ad6acc9af82ce04531cbc Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 8 Aug 2025 13:59:34 +0100 Subject: [PATCH 02/44] Fixed visualization in plotting of two graphics in 2d --- .../visualisation/visualize_vectors.py | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/test2text/services/visualisation/visualize_vectors.py b/test2text/services/visualisation/visualize_vectors.py index e86c7ce..7247cfe 100644 --- a/test2text/services/visualisation/visualize_vectors.py +++ b/test2text/services/visualisation/visualize_vectors.py @@ -53,7 +53,18 @@ def extract_requirement_vectors(db: DbClient): def minifold_vectors_2d(vectors: np.array): - tsne = TSNE(n_components=2, random_state=0) + """ + Reduces high-dimensional vectors to 2D using TSNE. + Handles cases where the number of samples is too small for TSNE by returning the input as-is. + """ + n_samples = vectors.shape[0] + # TSNE requires perplexity < n_samples + if n_samples < 2: + # Not enough samples for TSNE, just return the original (reshaped to 2D if needed) + return vectors.reshape(n_samples, -1)[:, :2] + # Set perplexity to a safe value + perplexity = min(30, max(1, (n_samples - 1) // 3)) + tsne = TSNE(n_components=2, random_state=0, perplexity=perplexity) vectors_2d = tsne.fit_transform(vectors) return vectors_2d @@ -79,10 +90,16 @@ def plot_vectors_3d(vectors_3d: np.array, title): st.plotly_chart(fig, use_container_width=True) -def plot_2_sets_in_one_2d(first_set_of_vec, second_set_of_vec, first_title, second_title): +def plot_2_sets_in_one_2d(first_set_of_vec, second_set_of_vec, first_title, second_title, first_color="blue", second_color="green"): fig = go.Figure() - fig.add_trace(go.Scatter(x=first_set_of_vec[:, 0], y=first_set_of_vec[:, 1], mode='markers', name={first_title})) - fig.add_trace(go.Scatter(x=second_set_of_vec[:, 0], y=second_set_of_vec[:, 1], mode='markers', name={second_title})) + fig.add_trace(go.Scatter(x=first_set_of_vec[:, 0], y=first_set_of_vec[:, 1], + mode='markers', + name=first_title, + marker=dict(color=f"{first_color}"))) + fig.add_trace(go.Scatter(x=second_set_of_vec[:, 0], y=second_set_of_vec[:, 1], + mode='markers', + name=second_title, + marker=dict(color=f"{second_color}"))) fig.update_layout(title=f"{first_title} vs {second_title}", xaxis_title='X', yaxis_title='Y') st.plotly_chart(fig) From 821eae715f99c235de43c3f25bfb8385e64149fc Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 8 Aug 2025 14:00:28 +0100 Subject: [PATCH 03/44] Fixed visualization in plotting of two graphics in 2d and added test case filters --- test2text/pages/report.py | 69 ++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/test2text/pages/report.py b/test2text/pages/report.py index e61eb9a..92feb9c 100644 --- a/test2text/pages/report.py +++ b/test2text/pages/report.py @@ -4,17 +4,13 @@ from test2text.services.db import DbClient from test2text.services.utils import unpack_float32 -from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_vectors_2d +from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_vectors_2d, \ + plot_2_sets_in_one_2d - def make_a_report(): - - st.header("Test2Text Report") - - db = DbClient("./private/requirements.db") - + st.header("Test2Text Report") def write_annotations(current_annotations: set[tuple]): anno, summary, dist = st.columns(3) @@ -91,6 +87,7 @@ def write_annotations(current_annotations: set[tuple]): ) if option: + clause = "Requirements.id = ?" if clause in where_clauses: idx = where_clauses.index(clause) @@ -98,10 +95,40 @@ def write_annotations(current_annotations: set[tuple]): else: where_clauses.append(clause) params.append(requirements_dict[option]) + + st.subheader("Filter Test cases") + + with st.expander("🔍 Filters"): + radius, limit = st.columns(2) + with radius: + filter_radius = st.number_input("Insert a radius", + value=0.00, + step=0.01, + key="filter_radius") + st.info("Max distance to annotation") + with limit: + filter_limit = st.number_input( + "Test case limit to show", + min_value=1, + max_value=15, + value=15, + step=1, + key="filter_limit" + ) + st.info("Limit of selected test cases") + + if filter_radius: + where_clauses.append("distance >= ?") + params.append(f"{filter_radius}") + + if filter_limit: + params.append(f"{filter_limit}") + where_sql = "" if where_clauses: where_sql = f"WHERE {' AND '.join(where_clauses)}" + sql = f""" SELECT Requirements.id as req_id, @@ -127,27 +154,18 @@ def write_annotations(current_annotations: set[tuple]): {where_sql} ORDER BY Requirements.id, AnnotationsToRequirements.cached_distance, TestCases.id + LIMIT ? """ - + data = db.conn.execute(sql, params) rows = data.fetchall() if not rows: st.error("There is no data to inspect.\n" "Please upload annotations and requirements.") return None - st.subheader("Filter Test cases") - - with st.expander("🔍 Filters"): - radius, limit = st.columns(2) - with radius: - filter_radius = st.number_input("Insert a radius", key="filter_radius") - st.info("Max distance to annotation") - with limit: - filter_limit = st.text_input("Limit", value="", key="filter_limit") - st.info("Limit of selected test cases") for (req_id, req_external_id, req_summary, req_embedding), group in groupby(rows, lambda x: x[0:4]): - with st.container(border=True): + with st.container(): st.subheader(f" Inspect Requirement {req_external_id}") st.write(req_summary) current_test_cases = dict() @@ -178,9 +196,16 @@ def write_annotations(current_annotations: set[tuple]): with viz: with st.container(border=True): - pass - #req_dot = np.array(unpack_float32(req_embedding)) TODO - #plot_vectors_2d(minifold_vectors_2d(np.array([req_dot])), "Requirements") + req_dot = np.array(unpack_float32(req_embedding)) + anno_embeddings = [ + unpack_float32(anno_emb) + for _, _, anno_emb,_ in current_test_cases[st.session_state["radio_choice"]] + ] + + anno_embeddings_np = np.array(anno_embeddings) + plot_2_sets_in_one_2d(minifold_vectors_2d(np.array([req_dot])), + minifold_vectors_2d(anno_embeddings_np), + "Requirements", "Annotations", first_color="red", second_color="green") db.conn.close() From f49bcb84be8a267a00fb22887c209a57e8a8a6f5 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 8 Aug 2025 14:29:04 +0100 Subject: [PATCH 04/44] Renamed report.py --- main.py | 6 +++--- test2text/pages/{report.py => report_by_req.py} | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) rename test2text/pages/{report.py => report_by_req.py} (99%) diff --git a/main.py b/main.py index f7f5bc4..85c2213 100644 --- a/main.py +++ b/main.py @@ -3,7 +3,7 @@ from test2text.pages.upload.annotations import show_annotations from test2text.pages.upload.requirements import show_requirements from test2text.services.embeddings.cache_distances import show_distances_histogram -from test2text.pages.report import make_a_report +from test2text.pages.report_by_req import make_a_report from test2text.services.visualisation.visualize_vectors import visualize_vectors @@ -41,14 +41,14 @@ def add_logo(): title="Requirements", icon=":material/database_upload:") cache_distances = st.Page(show_distances_histogram, title="Cache Distances", icon=":material/cached:") - report = st.Page(make_a_report, + report_by_req = st.Page(make_a_report, title="Report", icon=":material/publish:") visualization = st.Page(visualize_vectors, title="Visualize Vectors", icon=":material/dataset:") pages = { "Upload": [annotations, requirements], "Update": [cache_distances], - "Inspect": [report, visualization], + "Inspect": [report_by_req, visualization], } pg = st.navigation(pages) diff --git a/test2text/pages/report.py b/test2text/pages/report_by_req.py similarity index 99% rename from test2text/pages/report.py rename to test2text/pages/report_by_req.py index 92feb9c..ea9d6f0 100644 --- a/test2text/pages/report.py +++ b/test2text/pages/report_by_req.py @@ -165,6 +165,7 @@ def write_annotations(current_annotations: set[tuple]): for (req_id, req_external_id, req_summary, req_embedding), group in groupby(rows, lambda x: x[0:4]): + st.divider() with st.container(): st.subheader(f" Inspect Requirement {req_external_id}") st.write(req_summary) From e28020f927f06f672a25eb1ad48ae63a3072966d Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 8 Aug 2025 16:36:11 +0100 Subject: [PATCH 05/44] added reporting by selecting test cases --- main.py | 7 +- test2text/pages/report_by_tc.py | 217 ++++++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+), 2 deletions(-) create mode 100644 test2text/pages/report_by_tc.py diff --git a/main.py b/main.py index 85c2213..799a257 100644 --- a/main.py +++ b/main.py @@ -4,6 +4,7 @@ from test2text.pages.upload.requirements import show_requirements from test2text.services.embeddings.cache_distances import show_distances_histogram from test2text.pages.report_by_req import make_a_report +from test2text.pages.report_by_tc import make_a_tc_report from test2text.services.visualisation.visualize_vectors import visualize_vectors @@ -42,13 +43,15 @@ def add_logo(): cache_distances = st.Page(show_distances_histogram, title="Cache Distances", icon=":material/cached:") report_by_req = st.Page(make_a_report, - title="Report", icon=":material/publish:") + title="Requirement's Report", icon=":material/publish:") + report_by_tc = st.Page(make_a_tc_report, + title="Test cases Report", icon=":material/publish:") visualization = st.Page(visualize_vectors, title="Visualize Vectors", icon=":material/dataset:") pages = { "Upload": [annotations, requirements], "Update": [cache_distances], - "Inspect": [report_by_req, visualization], + "Inspect": [report_by_req, report_by_tc, visualization], } pg = st.navigation(pages) diff --git a/test2text/pages/report_by_tc.py b/test2text/pages/report_by_tc.py new file mode 100644 index 0000000..4f9f7af --- /dev/null +++ b/test2text/pages/report_by_tc.py @@ -0,0 +1,217 @@ +from itertools import groupby +import numpy as np +import streamlit as st + +from test2text.services.db import DbClient +from test2text.services.utils import unpack_float32 +from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_vectors_2d, \ + plot_2_sets_in_one_2d + + +def make_a_tc_report(): + db = DbClient("./private/requirements.db") + st.header("Test2Text Report") + + def write_requirements(current_requirements: set[tuple]): + req, summary, dist = st.columns(3) + with req: + st.write("Requirements's id") + with summary: + st.write("Summary") + with dist: + st.write("Distance") + + for req_id, req_external_id, req_summary, _, distance in current_requirements: + req, summary, dist = st.columns(3) + with req: + st.write(f"#{req_id} Requirement {req_external_id}") + with summary: + st.write(req_summary) + with dist: + st.write(distance) + + with st.container(border=True): + st.subheader("Filter test cases") + with st.expander("🔍 Filters"): + r_id, summary, embed = st.columns(3) + with r_id: + filter_id = st.text_input("ID", value="", key="filter_id") + st.info("Filter by external ID") + with summary: + filter_summary = st.text_input("Text content", value="", key="filter_summary") + st.info("Search concrete phrases using SQL like expressions") + with embed: + filter_embedding = st.text_input("Smart rearch", value="", key="filter_embedding") + st.info("Search using embeddings") + + where_clauses = [] + params = [] + + if filter_id.strip(): + where_clauses.append("Testcases.id = ?") + params.append(filter_id.strip()) + + if filter_summary.strip(): + where_clauses.append("Testcases.test_case LIKE ?") + params.append(f"%{filter_summary.strip()}%") + + # TODO embeddings фильтр не реализован + if filter_embedding.strip(): + st.info("Фильтрация по embeddings не реализована в демо. Используйте другие фильтры.") + + where_sql = "" + if where_clauses: + where_sql = f"WHERE {' AND '.join(where_clauses)}" + + + with st.container(border=True): + st.session_state.update({"tc_form_submitting": True}) + sql = f""" + SELECT + TestCases.id as case_id, + TestCases.test_script as test_script, + TestCases.test_case as test_case + FROM + TestCases + {where_sql} + ORDER BY + TestCases.id + """ + data = db.conn.execute(sql, params) + + tc_dict = {f"#{tc_id} Testcase {test_case}": tc_id for (tc_id, _, test_case) in data.fetchall()} + st.subheader("Choose ONE of filtered test casees") + option = st.selectbox( + "Choose a requirement to work with", + tc_dict.keys(), + key="filter_tc_id" + ) + + if option: + + clause = "Testcases.id = ?" + if clause in where_clauses: + idx = where_clauses.index(clause) + params.insert(idx, tc_dict[option]) + else: + where_clauses.append(clause) + params.append(tc_dict[option]) + + st.subheader("Filter Requirements") + + with st.expander("🔍 Filters"): + radius, limit = st.columns(2) + with radius: + filter_radius = st.number_input("Insert a radius", + value=0.00, + step=0.01, + key="filter_radius") + st.info("Max distance to annotation") + with limit: + filter_limit = st.number_input( + "Requirement's limit to show", + min_value=1, + max_value=15, + value=15, + step=1, + key="filter_limit" + ) + st.info("Limit of selected requirements") + + if filter_radius: + where_clauses.append("distance >= ?") + params.append(f"{filter_radius}") + + if filter_limit: + params.append(f"{filter_limit}") + + where_sql = "" + if where_clauses: + where_sql = f"WHERE {' AND '.join(where_clauses)}" + + + sql = f""" + SELECT + TestCases.id as case_id, + TestCases.test_script as test_script, + TestCases.test_case as test_case, + + Annotations.id as anno_id, + Annotations.summary as anno_summary, + Annotations.embedding as anno_embedding, + + AnnotationsToRequirements.cached_distance as distance, + + Requirements.id as req_id, + Requirements.external_id as req_external_id, + Requirements.summary as req_summary, + Requirements.embedding as req_embedding + FROM + TestCases + JOIN CasesToAnnos ON TestCases.id = CasesToAnnos.case_id + JOIN Annotations ON Annotations.id = CasesToAnnos.annotation_id + JOIN AnnotationsToRequirements ON Annotations.id = AnnotationsToRequirements.annotation_id + JOIN Requirements ON Requirements.id = AnnotationsToRequirements.requirement_id + {where_sql} + ORDER BY + case_id, distance, req_id + LIMIT ? + """ + data = db.conn.execute(sql, params) + rows = data.fetchall() + if not rows: + st.error("There is no data to inspect.\n" + "Please upload annotations and requirements.") + return None + + + for (tc_id, test_script, test_case), group in groupby(rows, lambda x: x[0:3]): + st.divider() + with st.container(): + st.subheader(f"Inspect #{tc_id} Test case {test_case}") + current_annotations = dict() + for _, _, _, anno_id, anno_summary, anno_embedding, distance, req_id, req_external_id, req_summary, req_embedding in group: + current_annotation = (anno_id, anno_summary, anno_embedding) + current_reqs = current_annotations.get(current_annotation, set()) + current_annotations.update({current_annotation: current_reqs}) + current_annotations[current_annotation].add((req_id, req_external_id, req_summary, req_embedding, distance)) + + t_cs, anno, viz = st.columns(3) + with t_cs: + with st.container(border=True): + st.write("Annotations") + st.markdown(""" + + """, unsafe_allow_html=True) + reqs_by_anno = {f"#{anno_id} Annotation {anno_summary}": (anno_id, anno_summary, anno_embedding) for (anno_id, anno_summary, anno_embedding) in current_annotations.keys()} + radio_choice = st.radio("", reqs_by_anno.keys(), key="radio_choice") + if radio_choice: + with anno: + with st.container(border=True): + st.write("Requirements") + write_requirements(current_annotations[reqs_by_anno[radio_choice]]) + + with viz: + with st.container(border=True): + anno_dot = np.array(unpack_float32(anno_embedding)) + req_embeddings = [ + unpack_float32(req_emb) + for _, _, _, req_emb,_ in current_annotations[reqs_by_anno[radio_choice]] + ] + + req_embeddings_np = np.array(req_embeddings) + plot_2_sets_in_one_2d(minifold_vectors_2d(np.array([anno_dot])), + minifold_vectors_2d(req_embeddings_np), + "Annotations", "Requirements",first_color="red", second_color="green") + + db.conn.close() + + +if __name__ == "__main__": + make_a_tc_report() \ No newline at end of file From 10d32a03785d4c860ca22031de15027de2dbfae5 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 8 Aug 2025 17:11:00 +0100 Subject: [PATCH 06/44] added folder for reports --- test2text/pages/reports/__init__.py | 0 test2text/pages/{ => reports}/report_by_req.py | 0 test2text/pages/{ => reports}/report_by_tc.py | 5 +++-- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 test2text/pages/reports/__init__.py rename test2text/pages/{ => reports}/report_by_req.py (100%) rename test2text/pages/{ => reports}/report_by_tc.py (97%) diff --git a/test2text/pages/reports/__init__.py b/test2text/pages/reports/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test2text/pages/report_by_req.py b/test2text/pages/reports/report_by_req.py similarity index 100% rename from test2text/pages/report_by_req.py rename to test2text/pages/reports/report_by_req.py diff --git a/test2text/pages/report_by_tc.py b/test2text/pages/reports/report_by_tc.py similarity index 97% rename from test2text/pages/report_by_tc.py rename to test2text/pages/reports/report_by_tc.py index 4f9f7af..efd581e 100644 --- a/test2text/pages/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -168,7 +168,8 @@ def write_requirements(current_requirements: set[tuple]): for (tc_id, test_script, test_case), group in groupby(rows, lambda x: x[0:3]): st.divider() with st.container(): - st.subheader(f"Inspect #{tc_id} Test case {test_case}") + st.subheader(f"Inspect #{tc_id} Test case '{test_case}'") + st.write(f"From test script {test_script}") current_annotations = dict() for _, _, _, anno_id, anno_summary, anno_embedding, distance, req_id, req_external_id, req_summary, req_embedding in group: current_annotation = (anno_id, anno_summary, anno_embedding) @@ -189,7 +190,7 @@ def write_requirements(current_requirements: set[tuple]): } """, unsafe_allow_html=True) - reqs_by_anno = {f"#{anno_id} Annotation {anno_summary}": (anno_id, anno_summary, anno_embedding) for (anno_id, anno_summary, anno_embedding) in current_annotations.keys()} + reqs_by_anno = {f"#{anno_id} Annotation '{anno_summary}'": (anno_id, anno_summary, anno_embedding) for (anno_id, anno_summary, anno_embedding) in current_annotations.keys()} radio_choice = st.radio("", reqs_by_anno.keys(), key="radio_choice") if radio_choice: with anno: From 71378f1651ca645256a21e2dad2f71ed2c161acb Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 8 Aug 2025 17:12:09 +0100 Subject: [PATCH 07/44] added documentation page that describes how to use it. --- main.py | 9 +++-- test2text/pages/documentation.py | 61 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 test2text/pages/documentation.py diff --git a/main.py b/main.py index 799a257..d7b3f5e 100644 --- a/main.py +++ b/main.py @@ -1,10 +1,11 @@ import streamlit as st +from test2text.pages.documentation import show_documentation from test2text.pages.upload.annotations import show_annotations from test2text.pages.upload.requirements import show_requirements from test2text.services.embeddings.cache_distances import show_distances_histogram -from test2text.pages.report_by_req import make_a_report -from test2text.pages.report_by_tc import make_a_tc_report +from test2text.pages.reports.report_by_req import make_a_report +from test2text.pages.reports.report_by_tc import make_a_tc_report from test2text.services.visualisation.visualize_vectors import visualize_vectors @@ -36,6 +37,9 @@ def add_logo(): st.set_page_config(page_title="Test2Text App", layout="wide", initial_sidebar_state="auto") add_logo() + about = st.Page(show_documentation, + title="About application", icon=":material/info:") + annotations = st.Page(show_annotations, title="Annotations", icon=":material/database_upload:") requirements = st.Page(show_requirements, @@ -49,6 +53,7 @@ def add_logo(): visualization = st.Page(visualize_vectors, title="Visualize Vectors", icon=":material/dataset:") pages = { + "Home": [about], "Upload": [annotations, requirements], "Update": [cache_distances], "Inspect": [report_by_req, report_by_tc, visualization], diff --git a/test2text/pages/documentation.py b/test2text/pages/documentation.py new file mode 100644 index 0000000..945ab9f --- /dev/null +++ b/test2text/pages/documentation.py @@ -0,0 +1,61 @@ +import streamlit as st + +def show_documentation(): + st.markdown(""" + # Test2Text Application Documentation + + ## About the Application + + **Test2Text** is a tool for showing saved test cases, requirements, and annotations, as well as for generating reports and analyzing requirements coverage by tests. + The application helps automate working with test requirements and provides a convenient interface for analyzing the relationships between test cases and requirements. + + --- + + ## Application Pages Overview + + ### 1. **About application** + - **Description:** This page contains the user guide, a description of all pages, and instructions for working with the application. + - **How to use:** Simply read the description to understand the purpose of the application. + + ### 2. **Annotations** + - **Description:** Work with annotations that link requirements and test cases. + - **How to use:** + - View existing annotations. + - Add new annotations to link requirements and test cases. + + ### 3. **Requirements** + - **Description:** View selected requirements. + - **How to use:** + - Browse the list of requirements. + - Add new requirements. + - Link requirements with annotations and test cases. + + ### 4. **Reports** + - **Description:** Generate reports on test cases, requirements, and their relationships. + - **How to use:** + - Select the desired report type (e.g., by test case or by requirement). + - Use filters to refine the report. + - Analyze selected requirements or test cases by showed and plotted distances. + + ### 5. **Cache distances** + - **Description:** Update distances by embeddings (vector representations) for intelligent matching of requirements and annotations. + - **How to use:** + - Enter a search query or embedding. + - Get relevant results based on vector search. + + ### 6. **Visualize vectors** + - **Description:** Visualise distances by embeddings (vector representations) of requirements and annotations. + - **How to use:** + - Run script that will get all the data from database and will plot it to 2d and 3d graphics. + --- + + ## Usage Tips + + - Use filters and search for quick access to the information you need. + - Link test cases with requirements via annotations for better coverage analysis. + - Regularly review reports to monitor the quality of your tests. + - Refer to the "Documentation" page for help on using the application. + + --- + """ + ) \ No newline at end of file From 0e78f3ff5b751a04ac4311790789c0bb47a2692c Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Mon, 11 Aug 2025 10:55:54 +0100 Subject: [PATCH 08/44] deleted unused import --- test2text/pages/reports/report_by_req.py | 3 +-- test2text/pages/reports/report_by_tc.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index ea9d6f0..0c860cc 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -4,8 +4,7 @@ from test2text.services.db import DbClient from test2text.services.utils import unpack_float32 -from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_vectors_2d, \ - plot_2_sets_in_one_2d +from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_2_sets_in_one_2d def make_a_report(): diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index efd581e..752b2cc 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -4,8 +4,7 @@ from test2text.services.db import DbClient from test2text.services.utils import unpack_float32 -from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_vectors_2d, \ - plot_2_sets_in_one_2d +from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_2_sets_in_one_2d def make_a_tc_report(): From f8a4a9a3cff09388518956152cc233d86e553076 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Mon, 11 Aug 2025 11:53:51 +0100 Subject: [PATCH 09/44] added 3D visualization --- test2text/pages/reports/report_by_req.py | 30 +++++++++++-------- test2text/pages/reports/report_by_tc.py | 28 ++++++++++------- .../visualisation/visualize_vectors.py | 9 ++++-- 3 files changed, 41 insertions(+), 26 deletions(-) diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index 0c860cc..d54a0d5 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -4,7 +4,8 @@ from test2text.services.db import DbClient from test2text.services.utils import unpack_float32 -from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_2_sets_in_one_2d +from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_2_sets_in_one_2d, \ + minifold_vectors_3d, plot_2_sets_in_one_3d def make_a_report(): @@ -177,7 +178,6 @@ def write_annotations(current_annotations: set[tuple]): t_cs, anno, viz = st.columns(3) with t_cs: with st.container(border=True): - st.write("Test Cases") st.markdown(""" """, unsafe_allow_html=True) - st.radio("", current_test_cases.keys(), key="radio_choice") + st.radio("Test Cases", current_test_cases.keys(), key="radio_choice") if st.session_state["radio_choice"]: with anno: with st.container(border=True): - st.write("Annotations") + st.write("Annotations for chosen test case") write_annotations(current_annotations=current_test_cases[st.session_state["radio_choice"]]) with viz: with st.container(border=True): - req_dot = np.array(unpack_float32(req_embedding)) + st.write("Visualization") + select = st.selectbox("Choose type of visualization", ["2D", "3D"]) anno_embeddings = [ unpack_float32(anno_emb) - for _, _, anno_emb,_ in current_test_cases[st.session_state["radio_choice"]] + for _, _, anno_emb, _ in current_test_cases[st.session_state["radio_choice"]] ] - - anno_embeddings_np = np.array(anno_embeddings) - plot_2_sets_in_one_2d(minifold_vectors_2d(np.array([req_dot])), - minifold_vectors_2d(anno_embeddings_np), - "Requirements", "Annotations", first_color="red", second_color="green") - + requirement_vectors = np.array([np.array(unpack_float32(req_embedding))]) + annotation_vectors = np.array(anno_embeddings) + if select == "2D": + + plot_2_sets_in_one_2d(minifold_vectors_2d(requirement_vectors), + minifold_vectors_2d(annotation_vectors), + "Requirement", "Annotations", first_color="red", second_color="green") + else: + reqs_vectors_3d = minifold_vectors_3d(requirement_vectors) + anno_vectors_3d = minifold_vectors_3d(annotation_vectors) + plot_2_sets_in_one_3d(reqs_vectors_3d, anno_vectors_3d, "Requerement", "Annotations") db.conn.close() diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 752b2cc..339e92f 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -4,7 +4,8 @@ from test2text.services.db import DbClient from test2text.services.utils import unpack_float32 -from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_2_sets_in_one_2d +from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_2_sets_in_one_2d, \ + minifold_vectors_3d, plot_2_sets_in_one_3d def make_a_tc_report(): @@ -14,7 +15,7 @@ def make_a_tc_report(): def write_requirements(current_requirements: set[tuple]): req, summary, dist = st.columns(3) with req: - st.write("Requirements's id") + st.write("Requirement") with summary: st.write("Summary") with dist: @@ -179,7 +180,6 @@ def write_requirements(current_requirements: set[tuple]): t_cs, anno, viz = st.columns(3) with t_cs: with st.container(border=True): - st.write("Annotations") st.markdown(""" - """, unsafe_allow_html=True) - st.radio("Test Cases", current_test_cases.keys(), key="radio_choice") + + """, unsafe_allow_html=True) + if st.session_state["radio_choice"]: with anno: with st.container(border=True): - st.write("Annotations for chosen test case") + st.write("Annotations") + st.info("List of Annotations for chosen Test case") write_annotations(current_annotations=current_test_cases[st.session_state["radio_choice"]]) - with viz: with st.container(border=True): st.write("Visualization") diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 339e92f..2931404 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -180,23 +180,26 @@ def write_requirements(current_requirements: set[tuple]): t_cs, anno, viz = st.columns(3) with t_cs: with st.container(border=True): - st.markdown(""" - - """, unsafe_allow_html=True) + st.write("Annotations") + st.info("Annotations linked to chosen Test case") reqs_by_anno = {f"#{anno_id} Annotation '{anno_summary}'": (anno_id, anno_summary, anno_embedding) for (anno_id, anno_summary, anno_embedding) in current_annotations.keys()} - radio_choice = st.radio("", reqs_by_anno.keys(), key="radio_choice") + radio_choice = st.radio("Annotation's id + summary", reqs_by_anno.keys(), key="radio_choice") + st.markdown(""" + + """, unsafe_allow_html=True) + if radio_choice: with anno: with st.container(border=True): st.write("Requirements") + st.info("Found Requirements for chosen annotation") write_requirements(current_annotations[reqs_by_anno[radio_choice]]) - with viz: with st.container(border=True): st.write("Visualization") From 1dae49922880ba131f577edd781e239260f6d6d8 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Mon, 11 Aug 2025 20:02:09 +0100 Subject: [PATCH 11/44] some small changes in texts --- test2text/pages/documentation.py | 6 +++--- test2text/pages/reports/report_by_req.py | 4 ++-- test2text/pages/reports/report_by_tc.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/test2text/pages/documentation.py b/test2text/pages/documentation.py index 945ab9f..c1ebc63 100644 --- a/test2text/pages/documentation.py +++ b/test2text/pages/documentation.py @@ -50,12 +50,12 @@ def show_documentation(): --- ## Usage Tips - - - Use filters and search for quick access to the information you need. + + - Upload annotations and requirements to the app's database. - Link test cases with requirements via annotations for better coverage analysis. + - Use filters and search for quick access to the information you need. - Regularly review reports to monitor the quality of your tests. - Refer to the "Documentation" page for help on using the application. - --- """ ) \ No newline at end of file diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index 54e99a1..46f35dc 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -159,8 +159,8 @@ def write_annotations(current_annotations: set[tuple]): data = db.conn.execute(sql, params) rows = data.fetchall() if not rows: - st.error("There is no data to inspect.\n" - "Please upload annotations and requirements.") + st.error("There is no requested data to inspect.\n" + "Please check filters, completeness of the data or upload new annotations and requirements.") return None diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 2931404..86bdd9a 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -160,8 +160,8 @@ def write_requirements(current_requirements: set[tuple]): data = db.conn.execute(sql, params) rows = data.fetchall() if not rows: - st.error("There is no data to inspect.\n" - "Please upload annotations and requirements.") + st.error("There is no requested data to inspect.\n" + "Please check filters, completeness of the data or upload new annotations and requirements.") return None From f1c29fca80483f82774566d4b7d918aff0b049b7 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Tue, 12 Aug 2025 13:59:54 +0100 Subject: [PATCH 12/44] added embedding's filter for requirements --- test2text/pages/reports/report_by_req.py | 31 ++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index 46f35dc..029b90f 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -1,13 +1,17 @@ from itertools import groupby import numpy as np import streamlit as st +from sqlite_vec import serialize_float32 from test2text.services.db import DbClient +from test2text.services.embeddings.embed import embed_requirement from test2text.services.utils import unpack_float32 from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_2_sets_in_one_2d, \ minifold_vectors_3d, plot_2_sets_in_one_3d + + def make_a_report(): db = DbClient("./private/requirements.db") st.header("Test2Text Report") @@ -54,9 +58,17 @@ def write_annotations(current_annotations: set[tuple]): where_clauses.append("Requirements.summary LIKE ?") params.append(f"%{filter_summary.strip()}%") - # TODO embeddings фильтр не реализован + + distance_sql = "" + distance_order_sql = "" + query_embedding_bytes = None if filter_embedding.strip(): - st.info("Фильтрация по embeddings не реализована в демо. Используйте другие фильтры.") + query_embedding = embed_requirement(filter_embedding.strip()) + query_embedding_bytes = serialize_float32(query_embedding) + distance_sql = (", " + "vec_distance_L2(embedding, ?) AS distance") + distance_order_sql = "distance ASC, " + where_sql = "" if where_clauses: @@ -70,15 +82,20 @@ def write_annotations(current_annotations: set[tuple]): Requirements.id as req_id, Requirements.external_id as req_external_id, Requirements.summary as req_summary + {distance_sql} FROM Requirements {where_sql} ORDER BY - Requirements.id + {distance_order_sql}Requirements.id """ - data = db.conn.execute(sql, params) + data = db.conn.execute(sql, params + [query_embedding_bytes] if distance_sql else params) + if distance_sql: + requirements_dict = {f"#{req_id} Requirement {req_external_id} [smart search d={distance}]": req_id for + (req_id, req_external_id, _, distance) in data.fetchall()} + else: + requirements_dict = {f"#{req_id} Requirement {req_external_id}": req_id for (req_id, req_external_id, _) in data.fetchall()} - requirements_dict = {f"#{req_id} Requirement {req_external_id}": req_id for (req_id, req_external_id, _) in data.fetchall()} st.subheader("Choose 1 of filtered requirements") option = st.selectbox( "Choose a requirement to work with", @@ -102,7 +119,7 @@ def write_annotations(current_annotations: set[tuple]): radius, limit = st.columns(2) with radius: filter_radius = st.number_input("Insert a radius", - value=0.00, + value=1.00, step=0.01, key="filter_radius") st.info("Max distance to annotation") @@ -118,7 +135,7 @@ def write_annotations(current_annotations: set[tuple]): st.info("Limit of selected test cases") if filter_radius: - where_clauses.append("distance >= ?") + where_clauses.append("distance <= ?") params.append(f"{filter_radius}") if filter_limit: From b2faa3bd1d59c4adf96d556872610aedd3617c87 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Wed, 13 Aug 2025 13:01:03 +0100 Subject: [PATCH 13/44] added embeddings to test cases and filtering by them in report --- test2text/pages/reports/report_by_tc.py | 23 ++++++++--- test2text/services/db/client.py | 2 +- test2text/services/db/tables/test_case.py | 41 +++++++++++++------ .../services/loaders/index_annotations.py | 39 +++++++++++------- 4 files changed, 72 insertions(+), 33 deletions(-) diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 86bdd9a..5575a35 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -1,13 +1,16 @@ from itertools import groupby import numpy as np import streamlit as st +from sqlite_vec import serialize_float32 from test2text.services.db import DbClient +from test2text.services.embeddings.embed import embed_requirement from test2text.services.utils import unpack_float32 from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_2_sets_in_one_2d, \ minifold_vectors_3d, plot_2_sets_in_one_3d + def make_a_tc_report(): db = DbClient("./private/requirements.db") st.header("Test2Text Report") @@ -55,9 +58,17 @@ def write_requirements(current_requirements: set[tuple]): where_clauses.append("Testcases.test_case LIKE ?") params.append(f"%{filter_summary.strip()}%") - # TODO embeddings фильтр не реализован + distance_sql = "" + distance_order_sql = "" + query_embedding_bytes = None if filter_embedding.strip(): - st.info("Фильтрация по embeddings не реализована в демо. Используйте другие фильтры.") + query_embedding = embed_requirement(filter_embedding.strip()) + query_embedding_bytes = serialize_float32(query_embedding) + distance_sql = (", " + "vec_distance_L2(embedding, ?) AS distance") + distance_order_sql = "distance ASC, " + + where_sql = "" if where_clauses: @@ -71,13 +82,14 @@ def write_requirements(current_requirements: set[tuple]): TestCases.id as case_id, TestCases.test_script as test_script, TestCases.test_case as test_case + {distance_sql} FROM TestCases {where_sql} ORDER BY - TestCases.id + {distance_order_sql}TestCases.id """ - data = db.conn.execute(sql, params) + data = db.conn.execute(sql, params + [query_embedding_bytes] if distance_sql else params) tc_dict = {f"#{tc_id} Testcase {test_case}": tc_id for (tc_id, _, test_case) in data.fetchall()} st.subheader("Choose ONE of filtered test casees") @@ -129,7 +141,6 @@ def write_requirements(current_requirements: set[tuple]): if where_clauses: where_sql = f"WHERE {' AND '.join(where_clauses)}" - sql = f""" SELECT TestCases.id as case_id, @@ -182,7 +193,7 @@ def write_requirements(current_requirements: set[tuple]): with st.container(border=True): st.write("Annotations") st.info("Annotations linked to chosen Test case") - reqs_by_anno = {f"#{anno_id} Annotation '{anno_summary}'": (anno_id, anno_summary, anno_embedding) for (anno_id, anno_summary, anno_embedding) in current_annotations.keys()} + reqs_by_anno = {f"#{anno_id} {anno_summary}": (anno_id, anno_summary, anno_embedding) for (anno_id, anno_summary, anno_embedding) in current_annotations.keys()} radio_choice = st.radio("Annotation's id + summary", reqs_by_anno.keys(), key="radio_choice") st.markdown(""" - """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) if st.session_state["radio_choice"]: with anno: with st.container(border=True): st.write("Annotations") st.info("List of Annotations for chosen Test case") - write_annotations(current_annotations=current_test_cases[st.session_state["radio_choice"]]) + write_annotations( + current_annotations=current_test_cases[ + st.session_state["radio_choice"] + ] + ) with viz: with st.container(border=True): st.write("Visualization") - select = st.selectbox("Choose type of visualization", ["2D", "3D"]) + select = st.selectbox( + "Choose type of visualization", ["2D", "3D"] + ) anno_embeddings = [ unpack_float32(anno_emb) - for _, _, anno_emb, _ in current_test_cases[st.session_state["radio_choice"]] + for _, _, anno_emb, _ in current_test_cases[ + st.session_state["radio_choice"] + ] ] - requirement_vectors = np.array([np.array(unpack_float32(req_embedding))]) + requirement_vectors = np.array( + [np.array(unpack_float32(req_embedding))] + ) annotation_vectors = np.array(anno_embeddings) if select == "2D": - - plot_2_sets_in_one_2d(minifold_vectors_2d(requirement_vectors), - minifold_vectors_2d(annotation_vectors), - "Requirement", "Annotations", first_color="red", second_color="green") + plot_2_sets_in_one_2d( + minifold_vectors_2d(requirement_vectors), + minifold_vectors_2d(annotation_vectors), + "Requirement", + "Annotations", + first_color="red", + second_color="green", + ) else: - reqs_vectors_3d = minifold_vectors_3d(requirement_vectors) - anno_vectors_3d = minifold_vectors_3d(annotation_vectors) - plot_2_sets_in_one_3d(reqs_vectors_3d, anno_vectors_3d, "Requerement", "Annotations") + reqs_vectors_3d = minifold_vectors_3d( + requirement_vectors + ) + anno_vectors_3d = minifold_vectors_3d( + annotation_vectors + ) + plot_2_sets_in_one_3d( + reqs_vectors_3d, + anno_vectors_3d, + "Requerement", + "Annotations", + ) db.conn.close() - - + + if __name__ == "__main__": - make_a_report() \ No newline at end of file + make_a_report() diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 566eee9..73fb49d 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -6,9 +6,12 @@ from test2text.services.db import get_db_client from test2text.services.embeddings.embed import embed_requirement from test2text.services.utils import unpack_float32 -from test2text.services.visualisation.visualize_vectors import minifold_vectors_2d, plot_2_sets_in_one_2d, \ - minifold_vectors_3d, plot_2_sets_in_one_3d - +from test2text.services.visualisation.visualize_vectors import ( + minifold_vectors_2d, + plot_2_sets_in_one_2d, + minifold_vectors_3d, + plot_2_sets_in_one_3d, +) def make_a_tc_report(): @@ -41,10 +44,14 @@ def write_requirements(current_requirements: set[tuple]): filter_id = st.text_input("ID", value="", key="filter_id") st.info("Filter by external ID") with summary: - filter_summary = st.text_input("Text content", value="", key="filter_summary") + filter_summary = st.text_input( + "Text content", value="", key="filter_summary" + ) st.info("Search concrete phrases using SQL like expressions") with embed: - filter_embedding = st.text_input("Smart rearch", value="", key="filter_embedding") + filter_embedding = st.text_input( + "Smart rearch", value="", key="filter_embedding" + ) st.info("Search using embeddings") where_clauses = [] @@ -64,17 +71,13 @@ def write_requirements(current_requirements: set[tuple]): if filter_embedding.strip(): query_embedding = embed_requirement(filter_embedding.strip()) query_embedding_bytes = serialize_float32(query_embedding) - distance_sql = (", " - "vec_distance_L2(embedding, ?) AS distance") + distance_sql = ", vec_distance_L2(embedding, ?) AS distance" distance_order_sql = "distance ASC, " - - where_sql = "" if where_clauses: where_sql = f"WHERE {' AND '.join(where_clauses)}" - with st.container(border=True): st.session_state.update({"tc_form_submitting": True}) sql = f""" @@ -89,18 +92,23 @@ def write_requirements(current_requirements: set[tuple]): ORDER BY {distance_order_sql}TestCases.id """ - data = db.conn.execute(sql, params + [query_embedding_bytes] if distance_sql else params) + data = db.conn.execute( + sql, params + [query_embedding_bytes] if distance_sql else params + ) if distance_sql: - tc_dict = {f"#{tc_id} Testcase {test_case} [smart search d={distance}]": tc_id for - (tc_id, _, test_case, distance) in data.fetchall()} + tc_dict = { + f"#{tc_id} Testcase {test_case} [smart search d={distance}]": tc_id + for (tc_id, _, test_case, distance) in data.fetchall() + } else: - tc_dict = {f"#{tc_id} Testcase {test_case}": tc_id for (tc_id, _, test_case) in data.fetchall()} + tc_dict = { + f"#{tc_id} Testcase {test_case}": tc_id + for (tc_id, _, test_case) in data.fetchall() + } st.subheader("Choose ONE of filtered test cases") option = st.selectbox( - "Choose a requirement to work with", - tc_dict.keys(), - key="filter_tc_id" + "Choose a requirement to work with", tc_dict.keys(), key="filter_tc_id" ) if option: @@ -117,10 +125,9 @@ def write_requirements(current_requirements: set[tuple]): with st.expander("🔍 Filters"): radius, limit = st.columns(2) with radius: - filter_radius = st.number_input("Insert a radius", - value=0.00, - step=0.01, - key="filter_radius") + filter_radius = st.number_input( + "Insert a radius", value=0.00, step=0.01, key="filter_radius" + ) st.info("Max distance to annotation") with limit: filter_limit = st.number_input( @@ -129,7 +136,7 @@ def write_requirements(current_requirements: set[tuple]): max_value=15, value=15, step=1, - key="filter_limit" + key="filter_limit", ) st.info("Limit of selected requirements") @@ -174,31 +181,72 @@ def write_requirements(current_requirements: set[tuple]): data = db.conn.execute(sql, params) rows = data.fetchall() if not rows: - st.error("There is no requested data to inspect.\n" - "Please check filters, completeness of the data or upload new annotations and requirements.") + st.error( + "There is no requested data to inspect.\n" + "Please check filters, completeness of the data or upload new annotations and requirements." + ) return None - - for (tc_id, test_script, test_case), group in groupby(rows, lambda x: x[0:3]): + for (tc_id, test_script, test_case), group in groupby( + rows, lambda x: x[0:3] + ): st.divider() with st.container(): st.subheader(f"Inspect #{tc_id} Test case '{test_case}'") st.write(f"From test script {test_script}") current_annotations = dict() - for _, _, _, anno_id, anno_summary, anno_embedding, distance, req_id, req_external_id, req_summary, req_embedding in group: + for ( + _, + _, + _, + anno_id, + anno_summary, + anno_embedding, + distance, + req_id, + req_external_id, + req_summary, + req_embedding, + ) in group: current_annotation = (anno_id, anno_summary, anno_embedding) - current_reqs = current_annotations.get(current_annotation, set()) + current_reqs = current_annotations.get( + current_annotation, set() + ) current_annotations.update({current_annotation: current_reqs}) - current_annotations[current_annotation].add((req_id, req_external_id, req_summary, req_embedding, distance)) + current_annotations[current_annotation].add( + ( + req_id, + req_external_id, + req_summary, + req_embedding, + distance, + ) + ) t_cs, anno, viz = st.columns(3) with t_cs: with st.container(border=True): st.write("Annotations") st.info("Annotations linked to chosen Test case") - reqs_by_anno = {f"#{anno_id} {anno_summary}": (anno_id, anno_summary, anno_embedding) for (anno_id, anno_summary, anno_embedding) in current_annotations.keys()} - radio_choice = st.radio("Annotation's id + summary", reqs_by_anno.keys(), key="radio_choice") - st.markdown(""" + reqs_by_anno = { + f"#{anno_id} {anno_summary}": ( + anno_id, + anno_summary, + anno_embedding, + ) + for ( + anno_id, + anno_summary, + anno_embedding, + ) in current_annotations.keys() + } + radio_choice = st.radio( + "Annotation's id + summary", + reqs_by_anno.keys(), + key="radio_choice", + ) + st.markdown( + """ - """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) if radio_choice: with anno: with st.container(border=True): st.write("Requirements") st.info("Found Requirements for chosen annotation") - write_requirements(current_annotations[reqs_by_anno[radio_choice]]) + write_requirements( + current_annotations[reqs_by_anno[radio_choice]] + ) with viz: with st.container(border=True): st.write("Visualization") - select = st.selectbox("Choose type of visualization", ["2D", "3D"]) + select = st.selectbox( + "Choose type of visualization", ["2D", "3D"] + ) req_embeddings = [ unpack_float32(req_emb) - for _, _, _, req_emb, _ in current_annotations[reqs_by_anno[radio_choice]] + for _, _, _, req_emb, _ in current_annotations[ + reqs_by_anno[radio_choice] + ] ] - annotation_vectors = np.array([np.array(unpack_float32(anno_embedding))]) + annotation_vectors = np.array( + [np.array(unpack_float32(anno_embedding))] + ) requirement_vectors = np.array(req_embeddings) if select == "2D": - plot_2_sets_in_one_2d(minifold_vectors_2d(annotation_vectors), - minifold_vectors_2d(requirement_vectors), - "Annotation", "Requirements",first_color="red", second_color="green") + plot_2_sets_in_one_2d( + minifold_vectors_2d(annotation_vectors), + minifold_vectors_2d(requirement_vectors), + "Annotation", + "Requirements", + first_color="red", + second_color="green", + ) else: - reqs_vectors_3d = minifold_vectors_3d(requirement_vectors) - anno_vectors_3d = minifold_vectors_3d(annotation_vectors) - plot_2_sets_in_one_3d(anno_vectors_3d, reqs_vectors_3d,"Annotation", - "Requirements") + reqs_vectors_3d = minifold_vectors_3d( + requirement_vectors + ) + anno_vectors_3d = minifold_vectors_3d( + annotation_vectors + ) + plot_2_sets_in_one_3d( + anno_vectors_3d, + reqs_vectors_3d, + "Annotation", + "Requirements", + ) db.conn.close() - - + + if __name__ == "__main__": - make_a_tc_report() \ No newline at end of file + make_a_tc_report() diff --git a/test2text/services/db/tables/test_case.py b/test2text/services/db/tables/test_case.py index 9b5096d..7db4ab4 100644 --- a/test2text/services/db/tables/test_case.py +++ b/test2text/services/db/tables/test_case.py @@ -28,17 +28,23 @@ def init_table(self): and vec_length(embedding) == $embedding_size) ) ) - """).substitute(embedding_size=self.embedding_size)) - + """).substitute(embedding_size=self.embedding_size) + ) - def insert(self, test_script: str, test_case: str, embedding:list[float] = None) -> Optional[int]: + def insert( + self, test_script: str, test_case: str, embedding: list[float] = None + ) -> Optional[int]: cursor = self.connection.execute( """ INSERT OR IGNORE INTO TestCases (test_script, test_case, embedding) VALUES (?, ?, ?) RETURNING id """, - (test_script, test_case, serialize_float32(embedding) if embedding is not None else None), + ( + test_script, + test_case, + serialize_float32(embedding) if embedding is not None else None, + ), ) result = cursor.fetchone() cursor.close() diff --git a/test2text/services/visualisation/visualize_vectors.py b/test2text/services/visualisation/visualize_vectors.py index d873af6..5865a53 100644 --- a/test2text/services/visualisation/visualize_vectors.py +++ b/test2text/services/visualisation/visualize_vectors.py @@ -96,35 +96,34 @@ def plot_vectors_3d(vectors_3d: np.array, title): def plot_2_sets_in_one_2d( - first_set_of_vec, second_set_of_vec, first_title, second_title, first_color="blue", second_color="green" + first_set_of_vec, + second_set_of_vec, + first_title, + second_title, + first_color="blue", + second_color="green", ): fig = go.Figure() fig.add_trace( go.Scatter( x=first_set_of_vec[:, 0], y=first_set_of_vec[:, 1], - mode='markers', + mode="markers", name=first_title, - marker=dict( - color=f"{first_color}" - ) + marker=dict(color=f"{first_color}"), ) ) fig.add_trace( go.Scatter( x=second_set_of_vec[:, 0], y=second_set_of_vec[:, 1], - mode='markers', + mode="markers", name=second_title, - marker=dict( - color=f"{second_color}" - ) + marker=dict(color=f"{second_color}"), ) ) fig.update_layout( - title=f"{first_title} vs {second_title}", - xaxis_title='X', - yaxis_title='Y' + title=f"{first_title} vs {second_title}", xaxis_title="X", yaxis_title="Y" ) st.plotly_chart(fig) diff --git a/tests/test_db/test_tables/test_requirements.py b/tests/test_db/test_tables/test_requirements.py index bade188..adb372d 100644 --- a/tests/test_db/test_tables/test_requirements.py +++ b/tests/test_db/test_tables/test_requirements.py @@ -59,4 +59,4 @@ def test_insert_short_embedding(self): def test_insert_long_embedding(self): long_embedding = [0.1] * (self.db.requirements.embedding_size + 1) id1 = self.db.requirements.insert("Test Requirement 7", long_embedding) - self.assertIsNone(id1) \ No newline at end of file + self.assertIsNone(id1) diff --git a/tests/test_db/test_tables/test_test_cases.py b/tests/test_db/test_tables/test_test_cases.py index dbb92ef..a1bd922 100644 --- a/tests/test_db/test_tables/test_test_cases.py +++ b/tests/test_db/test_tables/test_test_cases.py @@ -50,10 +50,14 @@ def test_insert_embedding(self): def test_insert_short_embedding(self): short_embedding = [0.1] * (self.db.test_cases.embedding_size - 1) - id1 = self.db.test_cases.insert("Test Script 13", "Test Case 13", short_embedding) + id1 = self.db.test_cases.insert( + "Test Script 13", "Test Case 13", short_embedding + ) self.assertIsNone(id1) def test_insert_long_embedding(self): long_embedding = [0.1] * (self.db.test_cases.embedding_size + 1) - id1 = self.db.test_cases.insert("Test Script 14", "Test Case 14",long_embedding) - self.assertIsNone(id1) \ No newline at end of file + id1 = self.db.test_cases.insert( + "Test Script 14", "Test Case 14", long_embedding + ) + self.assertIsNone(id1) From a513d6f96257403e37c93ce4c9a47ed5d3b648b5 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Tue, 19 Aug 2025 13:56:07 +0100 Subject: [PATCH 19/44] improved documentation --- test2text/pages/documentation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test2text/pages/documentation.py b/test2text/pages/documentation.py index cf1b334..b18171d 100644 --- a/test2text/pages/documentation.py +++ b/test2text/pages/documentation.py @@ -14,37 +14,37 @@ def show_documentation(): ## Application Pages Overview - ### 1. **About application** + ### :gray-badge[:material/info: About application] - **Description:** This page contains the user guide, a description of all pages, and instructions for working with the application. - **How to use:** Simply read the description to understand the purpose of the application. - ### 2. **Annotations** + ### :gray-badge[:material/database_upload: Annotations] - **Description:** Work with annotations that link requirements and test cases. - **How to use:** - View existing annotations. - Add new annotations to link requirements and test cases. - ### 3. **Requirements** + ### :gray-badge[:material/database_upload: Requirements] - **Description:** View selected requirements. - **How to use:** - Browse the list of requirements. - Add new requirements. - Link requirements with annotations and test cases. - ### 4. **Reports** + ### :gray-badge[:material/publish: Reports] - **Description:** Generate reports on test cases, requirements, and their relationships. - **How to use:** - Select the desired report type (e.g., by test case or by requirement). - Use filters to refine the report. - Analyze selected requirements or test cases by showed and plotted distances. - ### 5. **Cache distances** + ### :gray-badge[:material/cached: Controls] - **Description:** Update distances by embeddings (vector representations) for intelligent matching of requirements and annotations. - **How to use:** - Enter a search query or embedding. - Get relevant results based on vector search. - ### 6. **Visualize vectors** + ### :gray-badge[:material/dataset: Visualize vectors] - **Description:** Visualise distances by embeddings (vector representations) of requirements and annotations. - **How to use:** - Run script that will get all the data from database and will plot it to 2d and 3d graphics. From 8c9032ee32a9c2588a53ecbaa25b14bc6ad9da6c Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Wed, 20 Aug 2025 11:22:43 +0100 Subject: [PATCH 20/44] updated documentation and README.md --- README.md | 6 ++ test2text/pages/documentation.py | 109 ++++++++++++++++++------------- 2 files changed, 69 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 86edc7c..8bdda3a 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,12 @@ To install the dependencies, run the following command: uv sync ``` +To bring a code to a single format: + +```bash +uvx ruff format +``` + ### PyTorch version PyTorch is default set to CPU distributive: diff --git a/test2text/pages/documentation.py b/test2text/pages/documentation.py index b18171d..a7c6ba6 100644 --- a/test2text/pages/documentation.py +++ b/test2text/pages/documentation.py @@ -7,55 +7,72 @@ def show_documentation(): ## About the Application - **Test2Text** is a tool for showing saved test cases, requirements, and annotations, as well as for generating reports and analyzing requirements coverage by tests. - The application helps automate working with test requirements and provides a convenient interface for analyzing the relationships between test cases and requirements. - - --- - - ## Application Pages Overview - - ### :gray-badge[:material/info: About application] - - **Description:** This page contains the user guide, a description of all pages, and instructions for working with the application. - - **How to use:** Simply read the description to understand the purpose of the application. - - ### :gray-badge[:material/database_upload: Annotations] - - **Description:** Work with annotations that link requirements and test cases. - - **How to use:** - - View existing annotations. - - Add new annotations to link requirements and test cases. + **Test2Text** is a tool for computing requirement's coverage by tests and generating relevant reports. + The application provides a convenient interface for analysis the relationships between test cases and requirements. + """) + st.divider() + st.markdown(""" + ## HOW TO USE - ### :gray-badge[:material/database_upload: Requirements] - - **Description:** View selected requirements. - - **How to use:** - - Browse the list of requirements. - - Add new requirements. - - Link requirements with annotations and test cases. + ### Upload data + Click :gray-badge[:material/database_upload: Annotations] or :gray-badge[:material/database_upload: Requirements] to upload annotations and requirements from CSV files to the app's database. - ### :gray-badge[:material/publish: Reports] - - **Description:** Generate reports on test cases, requirements, and their relationships. - - **How to use:** - - Select the desired report type (e.g., by test case or by requirement). - - Use filters to refine the report. - - Analyze selected requirements or test cases by showed and plotted distances. - - ### :gray-badge[:material/cached: Controls] - - **Description:** Update distances by embeddings (vector representations) for intelligent matching of requirements and annotations. - - **How to use:** - - Enter a search query or embedding. - - Get relevant results based on vector search. + ### Renew data + Click :gray-badge[:material/cached: Controls] to transform missed and new texts into numeral vectors (embeddings). + Update distances by embeddings for intelligent matching of requirements and annotations. - ### :gray-badge[:material/dataset: Visualize vectors] - - **Description:** Visualise distances by embeddings (vector representations) of requirements and annotations. - - **How to use:** - - Run script that will get all the data from database and will plot it to 2d and 3d graphics. - --- - - ## Usage Tips + ### Generate reports + Click :gray-badge[:material/publish: Requirement's Report] or :gray-badge[:material/publish: Test cases Report] to make a report. + Use filters to select desired information. Analyze selected requirements or test cases by showed and plotted distances - - Upload annotations and requirements to the app's database. - - Link test cases with requirements via annotations for better coverage analysis. - - Use filters and search for quick access to the information you need. - - Regularly review reports to monitor the quality of your tests. - - Refer to the "Documentation" page for help on using the application. + ### Visualize saved data + Click :gray-badge[:material/dataset: Visualize vectors] to plot distances between vector representations of all requirements and annotations. """) + st.divider() + st.markdown(""" + ### Methodology + The application use a pre-trained transformer model from the [sentence-transformers library](https://huggingface.co/sentence-transformers), specifically [nomic-ai/nomic-embed-text-v1](https://huggingface.co/nomic-ai/nomic-embed-text-v1), a model trained to produce high-quality vector embeddings for text. + The model returns, for each input text, a high-dimensional NumPy array (vector) of floating point numbers (the embedding). + This arrays give us a possibility to calculate Euclidian distances between test cases annotations and requirements to view how similar or dissimilar the two texts. + """) + + st.markdown(""" + #### Euclidean (L2) Distance Formula + The Euclidean (L2) distance is a measure of the straight-line distance between two points (or vectors) in a multidimensional space. + It is widely used to compute the similarity or dissimilarity between two vector representations, such as text embeddings. + """) + st.markdown(""" + Suppose we have two vectors: + """) + st.latex(r""" + [ \mathbf{a} = [a_1, a_2, ..., a_n] ], + """) + st.latex(r""" + [ \mathbf{b} = [b_1, b_2, ..., b_n] ] + """) + + st.markdown(""" + The L2 distance between **a** and **b** is calculated as: + """) + + st.latex(r""" + [ L_2(\mathbf{a}, \mathbf{b}) = \sqrt{(a_1 - b_1)^2 + (a_2 - b_2)^2 + \cdots + (a_n - b_n)^2} ] + """) + + st.markdown(""" + Or, more compactly: + """) + + st.latex(r""" + [ L_2(\mathbf{a}, \mathbf{b}) = \sqrt{\sum_{i=1}^n (a_i - b_i)^2} ] + """) + + st.markdown(""" + - A **smaller L2 distance** means the vectors are more similar. + - A **larger L2 distance** indicates greater dissimilarity. + """) + + st.markdown(""" + This formula is commonly used for comparing the semantic similarity of embeddings generated from text using models like Sentence Transformers. + """) From 4c99fac928dd4c1846389a67628d98737aa595dc Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Wed, 20 Aug 2025 12:56:22 +0100 Subject: [PATCH 21/44] added with operator for DBClient --- convert_trace_annos.py | 81 ++- test2text/pages/reports/report_by_req.py | 493 ++++++++--------- test2text/pages/reports/report_by_tc.py | 523 +++++++++--------- test2text/services/db/client.py | 6 +- .../annotation_embeddings_controls.py | 81 ++- .../services/embeddings/cache_distances.py | 55 +- .../services/loaders/convert_trace_annos.py | 100 ++-- .../services/loaders/index_annotations.py | 47 +- .../services/loaders/index_requirements.py | 84 +-- .../visualisation/visualize_vectors.py | 107 ++-- 10 files changed, 784 insertions(+), 793 deletions(-) diff --git a/convert_trace_annos.py b/convert_trace_annos.py index ebe3f44..0a89837 100644 --- a/convert_trace_annos.py +++ b/convert_trace_annos.py @@ -13,50 +13,49 @@ def is_empty(value): return True if value == EMPTY else False -def trace_test_cases_to_annos(db_path: Path, trace_file_path: Path): - db = get_db_client() +def trace_test_cases_to_annos(trace_file_path: Path): + with get_db_client() as db: - insertions = list() - logger.info("Reading trace file and inserting annotations into table...") - with open(trace_file_path, mode="r", newline="", encoding="utf-8") as trace_file: - reader = csv.reader(trace_file) - current_tc = EMPTY - concat_summary = EMPTY - test_script = EMPTY - global_columns = next(reader) - for row in reader: - if row[0] == "TestCaseStart": - current_tc = row[1] - test_script = EMPTY - concat_summary = EMPTY - next(reader) - elif row[0] == "Summary": - continue - elif row[0] == "TestCaseEnd": - if not is_empty(current_tc) and not is_empty(concat_summary): - case_id = db.test_cases.get_or_insert( - test_script=test_script, test_case=current_tc - ) - annotation_id = db.annotations.get_or_insert(summary=concat_summary) - insertions.append( - db.cases_to_annos.insert( - case_id=case_id, annotation_id=annotation_id + insertions = list() + logger.info("Reading trace file and inserting annotations into table...") + with open(trace_file_path, mode="r", newline="", encoding="utf-8") as trace_file: + reader = csv.reader(trace_file) + current_tc = EMPTY + concat_summary = EMPTY + test_script = EMPTY + global_columns = next(reader) + for row in reader: + if row[0] == "TestCaseStart": + current_tc = row[1] + test_script = EMPTY + concat_summary = EMPTY + next(reader) + elif row[0] == "Summary": + continue + elif row[0] == "TestCaseEnd": + if not is_empty(current_tc) and not is_empty(concat_summary): + case_id = db.test_cases.get_or_insert( + test_script=test_script, test_case=current_tc ) - ) - else: - if not is_empty(row[global_columns.index("TestCase")]): - if current_tc != row[global_columns.index("TestCase")]: - current_tc = row[global_columns.index("TestCase")] - if is_empty(test_script) and not is_empty( - row[global_columns.index("TestScript")] - ): - test_script = row[global_columns.index("TestScript")] - concat_summary += row[0] + annotation_id = db.annotations.get_or_insert(summary=concat_summary) + insertions.append( + db.cases_to_annos.insert( + case_id=case_id, annotation_id=annotation_id + ) + ) + else: + if not is_empty(row[global_columns.index("TestCase")]): + if current_tc != row[global_columns.index("TestCase")]: + current_tc = row[global_columns.index("TestCase")] + if is_empty(test_script) and not is_empty( + row[global_columns.index("TestScript")] + ): + test_script = row[global_columns.index("TestScript")] + concat_summary += row[0] - db.conn.commit() - logger.info( - f"Inserted {len(insertions)} testcase-annotations pairs to database. Successful: {sum(insertions)}" - ) + logger.info( + f"Inserted {len(insertions)} testcase-annotations pairs to database. Successful: {sum(insertions)}" + ) if __name__ == "__main__": diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index 50869db..c89e4b5 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -4,7 +4,6 @@ from sqlite_vec import serialize_float32 from test2text.services.db import get_db_client -from test2text.services.embeddings.embed import embed_requirement from test2text.services.utils import unpack_float32 from test2text.services.visualisation.visualize_vectors import ( minifold_vectors_2d, @@ -15,278 +14,278 @@ def make_a_report(): - db = get_db_client() - st.header("Test2Text Report") + with get_db_client() as db: + from test2text.services.embeddings.embed import embed_requirement + st.header("Test2Text Report") - def write_annotations(current_annotations: set[tuple]): - anno, summary, dist = st.columns(3) - with anno: - st.write("Annonation's id") - with summary: - st.write("Summary") - with dist: - st.write("Distance") - for anno_id, anno_summary, _, distance in current_annotations: + def write_annotations(current_annotations: set[tuple]): anno, summary, dist = st.columns(3) with anno: - st.write(f"{anno_id}") + st.write("Annonation's id") with summary: - st.write(anno_summary) + st.write("Summary") with dist: - st.write(round(distance, 2)) + st.write("Distance") + for anno_id, anno_summary, _, distance in current_annotations: + anno, summary, dist = st.columns(3) + with anno: + st.write(f"{anno_id}") + with summary: + st.write(anno_summary) + with dist: + st.write(round(distance, 2)) - with st.container(border=True): - st.subheader("Filter requirements") - with st.expander("🔍 Filters"): - r_id, summary, embed = st.columns(3) - with r_id: - filter_id = st.text_input("ID", value="", key="filter_id") - st.info("Filter by external ID") - with summary: - filter_summary = st.text_input( - "Text content", value="", key="filter_summary" - ) - st.info("Search concrete phrases using SQL like expressions") - with embed: - filter_embedding = st.text_input( - "Smart rearch", value="", key="filter_embedding" - ) - st.info("Search using embeddings") - - where_clauses = [] - params = [] - - if filter_id.strip(): - where_clauses.append("Requirements.id = ?") - params.append(filter_id.strip()) + with st.container(border=True): + st.subheader("Filter requirements") + with st.expander("🔍 Filters"): + r_id, summary, embed = st.columns(3) + with r_id: + filter_id = st.text_input("ID", value="", key="filter_id") + st.info("Filter by external ID") + with summary: + filter_summary = st.text_input( + "Text content", value="", key="filter_summary" + ) + st.info("Search concrete phrases using SQL like expressions") + with embed: + filter_embedding = st.text_input( + "Smart rearch", value="", key="filter_embedding" + ) + st.info("Search using embeddings") - if filter_summary.strip(): - where_clauses.append("Requirements.summary LIKE ?") - params.append(f"%{filter_summary.strip()}%") + where_clauses = [] + params = [] - distance_sql = "" - distance_order_sql = "" - query_embedding_bytes = None - if filter_embedding.strip(): - query_embedding = embed_requirement(filter_embedding.strip()) - query_embedding_bytes = serialize_float32(query_embedding) - distance_sql = ", vec_distance_L2(embedding, ?) AS distance" - distance_order_sql = "distance ASC, " + if filter_id.strip(): + where_clauses.append("Requirements.id = ?") + params.append(filter_id.strip()) - where_sql = "" - if where_clauses: - where_sql = f"WHERE {' AND '.join(where_clauses)}" + if filter_summary.strip(): + where_clauses.append("Requirements.summary LIKE ?") + params.append(f"%{filter_summary.strip()}%") - with st.container(border=True): - st.session_state.update({"req_form_submitting": True}) - sql = f""" - SELECT - Requirements.id as req_id, - Requirements.external_id as req_external_id, - Requirements.summary as req_summary - {distance_sql} - FROM - Requirements - {where_sql} - ORDER BY - {distance_order_sql}Requirements.id - """ - data = db.conn.execute( - sql, params + [query_embedding_bytes] if distance_sql else params - ) - if distance_sql: - requirements_dict = { - f"#{req_id} Requirement {req_external_id} [smart search d={distance}]": req_id - for (req_id, req_external_id, _, distance) in data.fetchall() - } - else: - requirements_dict = { - f"#{req_id} Requirement {req_external_id}": req_id - for (req_id, req_external_id, _) in data.fetchall() - } + distance_sql = "" + distance_order_sql = "" + query_embedding_bytes = None + if filter_embedding.strip(): + query_embedding = embed_requirement(filter_embedding.strip()) + query_embedding_bytes = serialize_float32(query_embedding) + distance_sql = ", vec_distance_L2(embedding, ?) AS distance" + distance_order_sql = "distance ASC, " - st.subheader("Choose 1 of filtered requirements") - option = st.selectbox( - "Choose a requirement to work with", - requirements_dict.keys(), - key="filter_req_id", - ) + where_sql = "" + if where_clauses: + where_sql = f"WHERE {' AND '.join(where_clauses)}" - if option: - clause = "Requirements.id = ?" - if clause in where_clauses: - idx = where_clauses.index(clause) - params.insert(idx, requirements_dict[option]) + with st.container(border=True): + st.session_state.update({"req_form_submitting": True}) + sql = f""" + SELECT + Requirements.id as req_id, + Requirements.external_id as req_external_id, + Requirements.summary as req_summary + {distance_sql} + FROM + Requirements + {where_sql} + ORDER BY + {distance_order_sql}Requirements.id + """ + data = db.conn.execute( + sql, params + [query_embedding_bytes] if distance_sql else params + ) + if distance_sql: + requirements_dict = { + f"#{req_id} Requirement {req_external_id} [smart search d={distance}]": req_id + for (req_id, req_external_id, _, distance) in data.fetchall() + } else: - where_clauses.append(clause) - params.append(requirements_dict[option]) + requirements_dict = { + f"#{req_id} Requirement {req_external_id}": req_id + for (req_id, req_external_id, _) in data.fetchall() + } - st.subheader("Filter Test cases") + st.subheader("Choose 1 of filtered requirements") + option = st.selectbox( + "Choose a requirement to work with", + requirements_dict.keys(), + key="filter_req_id", + ) - with st.expander("🔍 Filters"): - radius, limit = st.columns(2) - with radius: - filter_radius = st.number_input( - "Insert a radius", value=1.00, step=0.01, key="filter_radius" - ) - st.info("Max distance to annotation") - with limit: - filter_limit = st.number_input( - "Test case limit to show", - min_value=1, - max_value=15, - value=15, - step=1, - key="filter_limit", - ) - st.info("Limit of selected test cases") + if option: + clause = "Requirements.id = ?" + if clause in where_clauses: + idx = where_clauses.index(clause) + params.insert(idx, requirements_dict[option]) + else: + where_clauses.append(clause) + params.append(requirements_dict[option]) + + st.subheader("Filter Test cases") - if filter_radius: - where_clauses.append("distance >= ?") - params.append(f"{filter_radius}") + with st.expander("🔍 Filters"): + radius, limit = st.columns(2) + with radius: + filter_radius = st.number_input( + "Insert a radius", value=1.00, step=0.01, key="filter_radius" + ) + st.info("Max distance to annotation") + with limit: + filter_limit = st.number_input( + "Test case limit to show", + min_value=1, + max_value=15, + value=15, + step=1, + key="filter_limit", + ) + st.info("Limit of selected test cases") - if filter_limit: - params.append(f"{filter_limit}") + if filter_radius: + where_clauses.append("distance >= ?") + params.append(f"{filter_radius}") - where_sql = "" - if where_clauses: - where_sql = f"WHERE {' AND '.join(where_clauses)}" + if filter_limit: + params.append(f"{filter_limit}") - sql = f""" - SELECT - Requirements.id as req_id, - Requirements.external_id as req_external_id, - Requirements.summary as req_summary, - Requirements.embedding as req_embedding, - - Annotations.id as anno_id, - Annotations.summary as anno_summary, - Annotations.embedding as anno_embedding, - - AnnotationsToRequirements.cached_distance as distance, - - TestCases.id as case_id, - TestCases.test_script as test_script, - TestCases.test_case as test_case - FROM - Requirements - JOIN AnnotationsToRequirements ON Requirements.id = AnnotationsToRequirements.requirement_id - JOIN Annotations ON Annotations.id = AnnotationsToRequirements.annotation_id - JOIN CasesToAnnos ON Annotations.id = CasesToAnnos.annotation_id - JOIN TestCases ON TestCases.id = CasesToAnnos.case_id - {where_sql} - ORDER BY - Requirements.id, AnnotationsToRequirements.cached_distance, TestCases.id - LIMIT ? - """ - data = db.conn.execute(sql, params) - rows = data.fetchall() - if not rows: - st.error( - "There is no requested data to inspect.\n" - "Please check filters, completeness of the data or upload new annotations and requirements." - ) - return None + where_sql = "" + if where_clauses: + where_sql = f"WHERE {' AND '.join(where_clauses)}" - for (req_id, req_external_id, req_summary, req_embedding), group in groupby( - rows, lambda x: x[0:4] - ): - st.divider() - with st.container(): - st.subheader(f" Inspect Requirement {req_external_id}") - st.write(req_summary) - current_test_cases = dict() - for ( - _, - _, - _, - _, - anno_id, - anno_summary, - anno_embedding, - distance, - case_id, - test_script, - test_case, - ) in group: - current_annotation = current_test_cases.get(test_case, set()) - current_test_cases.update({test_case: current_annotation}) - current_test_cases[test_case].add( - (anno_id, anno_summary, anno_embedding, distance) - ) + sql = f""" + SELECT + Requirements.id as req_id, + Requirements.external_id as req_external_id, + Requirements.summary as req_summary, + Requirements.embedding as req_embedding, + + Annotations.id as anno_id, + Annotations.summary as anno_summary, + Annotations.embedding as anno_embedding, + + AnnotationsToRequirements.cached_distance as distance, + + TestCases.id as case_id, + TestCases.test_script as test_script, + TestCases.test_case as test_case + FROM + Requirements + JOIN AnnotationsToRequirements ON Requirements.id = AnnotationsToRequirements.requirement_id + JOIN Annotations ON Annotations.id = AnnotationsToRequirements.annotation_id + JOIN CasesToAnnos ON Annotations.id = CasesToAnnos.annotation_id + JOIN TestCases ON TestCases.id = CasesToAnnos.case_id + {where_sql} + ORDER BY + Requirements.id, AnnotationsToRequirements.cached_distance, TestCases.id + LIMIT ? + """ + data = db.conn.execute(sql, params) + rows = data.fetchall() + if not rows: + st.error( + "There is no requested data to inspect.\n" + "Please check filters, completeness of the data or upload new annotations and requirements." + ) + return None - t_cs, anno, viz = st.columns(3) - with t_cs: - with st.container(border=True): - st.write("Test Cases") - st.info("Test cases of chosen Requirement") - st.radio( - "Test cases name", - current_test_cases.keys(), - key="radio_choice", - ) - st.markdown( - """ - - """, - unsafe_allow_html=True, + for (req_id, req_external_id, req_summary, req_embedding), group in groupby( + rows, lambda x: x[0:4] + ): + st.divider() + with st.container(): + st.subheader(f" Inspect Requirement {req_external_id}") + st.write(req_summary) + current_test_cases = dict() + for ( + _, + _, + _, + _, + anno_id, + anno_summary, + anno_embedding, + distance, + case_id, + test_script, + test_case, + ) in group: + current_annotation = current_test_cases.get(test_case, set()) + current_test_cases.update({test_case: current_annotation}) + current_test_cases[test_case].add( + (anno_id, anno_summary, anno_embedding, distance) ) - if st.session_state["radio_choice"]: - with anno: - with st.container(border=True): - st.write("Annotations") - st.info("List of Annotations for chosen Test case") - write_annotations( - current_annotations=current_test_cases[ - st.session_state["radio_choice"] - ] - ) - with viz: - with st.container(border=True): - st.write("Visualization") - select = st.selectbox( - "Choose type of visualization", ["2D", "3D"] - ) - anno_embeddings = [ - unpack_float32(anno_emb) - for _, _, anno_emb, _ in current_test_cases[ - st.session_state["radio_choice"] - ] - ] - requirement_vectors = np.array( - [np.array(unpack_float32(req_embedding))] - ) - annotation_vectors = np.array(anno_embeddings) - if select == "2D": - plot_2_sets_in_one_2d( - minifold_vectors_2d(requirement_vectors), - minifold_vectors_2d(annotation_vectors), - "Requirement", - "Annotations", - first_color="red", - second_color="green", - ) - else: - reqs_vectors_3d = minifold_vectors_3d( - requirement_vectors + t_cs, anno, viz = st.columns(3) + with t_cs: + with st.container(border=True): + st.write("Test Cases") + st.info("Test cases of chosen Requirement") + st.radio( + "Test cases name", + current_test_cases.keys(), + key="radio_choice", + ) + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + + if st.session_state["radio_choice"]: + with anno: + with st.container(border=True): + st.write("Annotations") + st.info("List of Annotations for chosen Test case") + write_annotations( + current_annotations=current_test_cases[ + st.session_state["radio_choice"] + ] ) - anno_vectors_3d = minifold_vectors_3d( - annotation_vectors + with viz: + with st.container(border=True): + st.write("Visualization") + select = st.selectbox( + "Choose type of visualization", ["2D", "3D"] ) - plot_2_sets_in_one_3d( - reqs_vectors_3d, - anno_vectors_3d, - "Requerement", - "Annotations", + anno_embeddings = [ + unpack_float32(anno_emb) + for _, _, anno_emb, _ in current_test_cases[ + st.session_state["radio_choice"] + ] + ] + requirement_vectors = np.array( + [np.array(unpack_float32(req_embedding))] ) - db.conn.close() + annotation_vectors = np.array(anno_embeddings) + if select == "2D": + plot_2_sets_in_one_2d( + minifold_vectors_2d(requirement_vectors), + minifold_vectors_2d(annotation_vectors), + "Requirement", + "Annotations", + first_color="red", + second_color="green", + ) + else: + reqs_vectors_3d = minifold_vectors_3d( + requirement_vectors + ) + anno_vectors_3d = minifold_vectors_3d( + annotation_vectors + ) + plot_2_sets_in_one_3d( + reqs_vectors_3d, + anno_vectors_3d, + "Requirement", + "Annotations", + ) if __name__ == "__main__": diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 73fb49d..f5eb7ac 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -4,7 +4,6 @@ from sqlite_vec import serialize_float32 from test2text.services.db import get_db_client -from test2text.services.embeddings.embed import embed_requirement from test2text.services.utils import unpack_float32 from test2text.services.visualisation.visualize_vectors import ( minifold_vectors_2d, @@ -15,296 +14,296 @@ def make_a_tc_report(): - db = get_db_client() - st.header("Test2Text Report") + with get_db_client() as db: + from test2text.services.embeddings.embed import embed_requirement + st.header("Test2Text Report") - def write_requirements(current_requirements: set[tuple]): - req, summary, dist = st.columns(3) - with req: - st.write("Requirement") - with summary: - st.write("Summary") - with dist: - st.write("Distance") - - for req_id, req_external_id, req_summary, _, distance in current_requirements: + def write_requirements(current_requirements: set[tuple]): req, summary, dist = st.columns(3) with req: - st.write(f"#{req_id} Requirement {req_external_id}") + st.write("Requirement") with summary: - st.write(req_summary) + st.write("Summary") with dist: - st.write(distance) - - with st.container(border=True): - st.subheader("Filter test cases") - with st.expander("🔍 Filters"): - r_id, summary, embed = st.columns(3) - with r_id: - filter_id = st.text_input("ID", value="", key="filter_id") - st.info("Filter by external ID") - with summary: - filter_summary = st.text_input( - "Text content", value="", key="filter_summary" - ) - st.info("Search concrete phrases using SQL like expressions") - with embed: - filter_embedding = st.text_input( - "Smart rearch", value="", key="filter_embedding" - ) - st.info("Search using embeddings") - - where_clauses = [] - params = [] - - if filter_id.strip(): - where_clauses.append("Testcases.id = ?") - params.append(filter_id.strip()) - - if filter_summary.strip(): - where_clauses.append("Testcases.test_case LIKE ?") - params.append(f"%{filter_summary.strip()}%") - - distance_sql = "" - distance_order_sql = "" - query_embedding_bytes = None - if filter_embedding.strip(): - query_embedding = embed_requirement(filter_embedding.strip()) - query_embedding_bytes = serialize_float32(query_embedding) - distance_sql = ", vec_distance_L2(embedding, ?) AS distance" - distance_order_sql = "distance ASC, " - - where_sql = "" - if where_clauses: - where_sql = f"WHERE {' AND '.join(where_clauses)}" - - with st.container(border=True): - st.session_state.update({"tc_form_submitting": True}) - sql = f""" - SELECT - TestCases.id as case_id, - TestCases.test_script as test_script, - TestCases.test_case as test_case - {distance_sql} - FROM - TestCases - {where_sql} - ORDER BY - {distance_order_sql}TestCases.id - """ - data = db.conn.execute( - sql, params + [query_embedding_bytes] if distance_sql else params - ) - if distance_sql: - tc_dict = { - f"#{tc_id} Testcase {test_case} [smart search d={distance}]": tc_id - for (tc_id, _, test_case, distance) in data.fetchall() - } - else: - tc_dict = { - f"#{tc_id} Testcase {test_case}": tc_id - for (tc_id, _, test_case) in data.fetchall() - } + st.write("Distance") - st.subheader("Choose ONE of filtered test cases") - option = st.selectbox( - "Choose a requirement to work with", tc_dict.keys(), key="filter_tc_id" - ) - - if option: - clause = "Testcases.id = ?" - if clause in where_clauses: - idx = where_clauses.index(clause) - params.insert(idx, tc_dict[option]) - else: - where_clauses.append(clause) - params.append(tc_dict[option]) - - st.subheader("Filter Requirements") + for req_id, req_external_id, req_summary, _, distance in current_requirements: + req, summary, dist = st.columns(3) + with req: + st.write(f"#{req_id} Requirement {req_external_id}") + with summary: + st.write(req_summary) + with dist: + st.write(distance) + with st.container(border=True): + st.subheader("Filter test cases") with st.expander("🔍 Filters"): - radius, limit = st.columns(2) - with radius: - filter_radius = st.number_input( - "Insert a radius", value=0.00, step=0.01, key="filter_radius" + r_id, summary, embed = st.columns(3) + with r_id: + filter_id = st.text_input("ID", value="", key="filter_id") + st.info("Filter by external ID") + with summary: + filter_summary = st.text_input( + "Text content", value="", key="filter_summary" ) - st.info("Max distance to annotation") - with limit: - filter_limit = st.number_input( - "Requirement's limit to show", - min_value=1, - max_value=15, - value=15, - step=1, - key="filter_limit", + st.info("Search concrete phrases using SQL like expressions") + with embed: + filter_embedding = st.text_input( + "Smart rearch", value="", key="filter_embedding" ) - st.info("Limit of selected requirements") + st.info("Search using embeddings") + + where_clauses = [] + params = [] - if filter_radius: - where_clauses.append("distance <= ?") - params.append(f"{filter_radius}") + if filter_id.strip(): + where_clauses.append("Testcases.id = ?") + params.append(filter_id.strip()) - if filter_limit: - params.append(f"{filter_limit}") + if filter_summary.strip(): + where_clauses.append("Testcases.test_case LIKE ?") + params.append(f"%{filter_summary.strip()}%") + + distance_sql = "" + distance_order_sql = "" + query_embedding_bytes = None + if filter_embedding.strip(): + query_embedding = embed_requirement(filter_embedding.strip()) + query_embedding_bytes = serialize_float32(query_embedding) + distance_sql = ", vec_distance_L2(embedding, ?) AS distance" + distance_order_sql = "distance ASC, " where_sql = "" if where_clauses: where_sql = f"WHERE {' AND '.join(where_clauses)}" + with st.container(border=True): + st.session_state.update({"tc_form_submitting": True}) sql = f""" - SELECT - TestCases.id as case_id, - TestCases.test_script as test_script, - TestCases.test_case as test_case, - - Annotations.id as anno_id, - Annotations.summary as anno_summary, - Annotations.embedding as anno_embedding, - - AnnotationsToRequirements.cached_distance as distance, - - Requirements.id as req_id, - Requirements.external_id as req_external_id, - Requirements.summary as req_summary, - Requirements.embedding as req_embedding - FROM - TestCases - JOIN CasesToAnnos ON TestCases.id = CasesToAnnos.case_id - JOIN Annotations ON Annotations.id = CasesToAnnos.annotation_id - JOIN AnnotationsToRequirements ON Annotations.id = AnnotationsToRequirements.annotation_id - JOIN Requirements ON Requirements.id = AnnotationsToRequirements.requirement_id - {where_sql} - ORDER BY - case_id, distance, req_id - LIMIT ? - """ - data = db.conn.execute(sql, params) - rows = data.fetchall() - if not rows: - st.error( - "There is no requested data to inspect.\n" - "Please check filters, completeness of the data or upload new annotations and requirements." - ) - return None + SELECT + TestCases.id as case_id, + TestCases.test_script as test_script, + TestCases.test_case as test_case + {distance_sql} + FROM + TestCases + {where_sql} + ORDER BY + {distance_order_sql}TestCases.id + """ + data = db.conn.execute( + sql, params + [query_embedding_bytes] if distance_sql else params + ) + if distance_sql: + tc_dict = { + f"#{tc_id} Testcase {test_case} [smart search d={distance}]": tc_id + for (tc_id, _, test_case, distance) in data.fetchall() + } + else: + tc_dict = { + f"#{tc_id} Testcase {test_case}": tc_id + for (tc_id, _, test_case) in data.fetchall() + } + + st.subheader("Choose ONE of filtered test cases") + option = st.selectbox( + "Choose a requirement to work with", tc_dict.keys(), key="filter_tc_id" + ) + + if option: + clause = "Testcases.id = ?" + if clause in where_clauses: + idx = where_clauses.index(clause) + params.insert(idx, tc_dict[option]) + else: + where_clauses.append(clause) + params.append(tc_dict[option]) - for (tc_id, test_script, test_case), group in groupby( - rows, lambda x: x[0:3] - ): - st.divider() - with st.container(): - st.subheader(f"Inspect #{tc_id} Test case '{test_case}'") - st.write(f"From test script {test_script}") - current_annotations = dict() - for ( - _, - _, - _, - anno_id, - anno_summary, - anno_embedding, - distance, - req_id, - req_external_id, - req_summary, - req_embedding, - ) in group: - current_annotation = (anno_id, anno_summary, anno_embedding) - current_reqs = current_annotations.get( - current_annotation, set() + st.subheader("Filter Requirements") + + with st.expander("🔍 Filters"): + radius, limit = st.columns(2) + with radius: + filter_radius = st.number_input( + "Insert a radius", value=0.00, step=0.01, key="filter_radius" ) - current_annotations.update({current_annotation: current_reqs}) - current_annotations[current_annotation].add( - ( - req_id, - req_external_id, - req_summary, - req_embedding, - distance, - ) + st.info("Max distance to annotation") + with limit: + filter_limit = st.number_input( + "Requirement's limit to show", + min_value=1, + max_value=15, + value=15, + step=1, + key="filter_limit", ) + st.info("Limit of selected requirements") - t_cs, anno, viz = st.columns(3) - with t_cs: - with st.container(border=True): - st.write("Annotations") - st.info("Annotations linked to chosen Test case") - reqs_by_anno = { - f"#{anno_id} {anno_summary}": ( - anno_id, - anno_summary, - anno_embedding, - ) - for ( - anno_id, - anno_summary, - anno_embedding, - ) in current_annotations.keys() - } - radio_choice = st.radio( - "Annotation's id + summary", - reqs_by_anno.keys(), - key="radio_choice", + if filter_radius: + where_clauses.append("distance <= ?") + params.append(f"{filter_radius}") + + if filter_limit: + params.append(f"{filter_limit}") + + where_sql = "" + if where_clauses: + where_sql = f"WHERE {' AND '.join(where_clauses)}" + + sql = f""" + SELECT + TestCases.id as case_id, + TestCases.test_script as test_script, + TestCases.test_case as test_case, + + Annotations.id as anno_id, + Annotations.summary as anno_summary, + Annotations.embedding as anno_embedding, + + AnnotationsToRequirements.cached_distance as distance, + + Requirements.id as req_id, + Requirements.external_id as req_external_id, + Requirements.summary as req_summary, + Requirements.embedding as req_embedding + FROM + TestCases + JOIN CasesToAnnos ON TestCases.id = CasesToAnnos.case_id + JOIN Annotations ON Annotations.id = CasesToAnnos.annotation_id + JOIN AnnotationsToRequirements ON Annotations.id = AnnotationsToRequirements.annotation_id + JOIN Requirements ON Requirements.id = AnnotationsToRequirements.requirement_id + {where_sql} + ORDER BY + case_id, distance, req_id + LIMIT ? + """ + data = db.conn.execute(sql, params) + rows = data.fetchall() + if not rows: + st.error( + "There is no requested data to inspect.\n" + "Please check filters, completeness of the data or upload new annotations and requirements." + ) + return None + + for (tc_id, test_script, test_case), group in groupby( + rows, lambda x: x[0:3] + ): + st.divider() + with st.container(): + st.subheader(f"Inspect #{tc_id} Test case '{test_case}'") + st.write(f"From test script {test_script}") + current_annotations = dict() + for ( + _, + _, + _, + anno_id, + anno_summary, + anno_embedding, + distance, + req_id, + req_external_id, + req_summary, + req_embedding, + ) in group: + current_annotation = (anno_id, anno_summary, anno_embedding) + current_reqs = current_annotations.get( + current_annotation, set() ) - st.markdown( - """ - - """, - unsafe_allow_html=True, + current_annotations.update({current_annotation: current_reqs}) + current_annotations[current_annotation].add( + ( + req_id, + req_external_id, + req_summary, + req_embedding, + distance, + ) ) - if radio_choice: - with anno: - with st.container(border=True): - st.write("Requirements") - st.info("Found Requirements for chosen annotation") - write_requirements( - current_annotations[reqs_by_anno[radio_choice]] + t_cs, anno, viz = st.columns(3) + with t_cs: + with st.container(border=True): + st.write("Annotations") + st.info("Annotations linked to chosen Test case") + reqs_by_anno = { + f"#{anno_id} {anno_summary}": ( + anno_id, + anno_summary, + anno_embedding, ) - with viz: - with st.container(border=True): - st.write("Visualization") - select = st.selectbox( - "Choose type of visualization", ["2D", "3D"] - ) - req_embeddings = [ - unpack_float32(req_emb) - for _, _, _, req_emb, _ in current_annotations[ - reqs_by_anno[radio_choice] - ] - ] - annotation_vectors = np.array( - [np.array(unpack_float32(anno_embedding))] - ) - requirement_vectors = np.array(req_embeddings) - if select == "2D": - plot_2_sets_in_one_2d( - minifold_vectors_2d(annotation_vectors), - minifold_vectors_2d(requirement_vectors), - "Annotation", - "Requirements", - first_color="red", - second_color="green", - ) - else: - reqs_vectors_3d = minifold_vectors_3d( - requirement_vectors + for ( + anno_id, + anno_summary, + anno_embedding, + ) in current_annotations.keys() + } + radio_choice = st.radio( + "Annotation's id + summary", + reqs_by_anno.keys(), + key="radio_choice", + ) + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + + if radio_choice: + with anno: + with st.container(border=True): + st.write("Requirements") + st.info("Found Requirements for chosen annotation") + write_requirements( + current_annotations[reqs_by_anno[radio_choice]] ) - anno_vectors_3d = minifold_vectors_3d( - annotation_vectors + with viz: + with st.container(border=True): + st.write("Visualization") + select = st.selectbox( + "Choose type of visualization", ["2D", "3D"] ) - plot_2_sets_in_one_3d( - anno_vectors_3d, - reqs_vectors_3d, - "Annotation", - "Requirements", + req_embeddings = [ + unpack_float32(req_emb) + for _, _, _, req_emb, _ in current_annotations[ + reqs_by_anno[radio_choice] + ] + ] + annotation_vectors = np.array( + [np.array(unpack_float32(anno_embedding))] ) - db.conn.close() + requirement_vectors = np.array(req_embeddings) + if select == "2D": + plot_2_sets_in_one_2d( + minifold_vectors_2d(annotation_vectors), + minifold_vectors_2d(requirement_vectors), + "Annotation", + "Requirements", + first_color="red", + second_color="green", + ) + else: + reqs_vectors_3d = minifold_vectors_3d( + requirement_vectors + ) + anno_vectors_3d = minifold_vectors_3d( + annotation_vectors + ) + plot_2_sets_in_one_3d( + anno_vectors_3d, + reqs_vectors_3d, + "Annotation", + "Requirements", + ) if __name__ == "__main__": diff --git a/test2text/services/db/client.py b/test2text/services/db/client.py index 6d11046..d55bdc7 100644 --- a/test2text/services/db/client.py +++ b/test2text/services/db/client.py @@ -67,8 +67,8 @@ def close(self): self.conn.commit() self.conn.close() - def __del__(self): - self.close() - def __exit__(self, exc_type, exc_val, exc_tb): self.close() + + def __enter__(self): + return self \ No newline at end of file diff --git a/test2text/services/embeddings/annotation_embeddings_controls.py b/test2text/services/embeddings/annotation_embeddings_controls.py index 7bf1650..1c74134 100644 --- a/test2text/services/embeddings/annotation_embeddings_controls.py +++ b/test2text/services/embeddings/annotation_embeddings_controls.py @@ -6,53 +6,52 @@ def count_all_annotations() -> int: - db = get_db_client() - count = db.conn.execute("SELECT COUNT(*) FROM Annotations").fetchone()[0] - return count + with get_db_client() as db: + count = db.conn.execute("SELECT COUNT(*) FROM Annotations").fetchone()[0] + return count def count_embedded_annotations() -> int: - db = get_db_client() - count = db.conn.execute( - "SELECT COUNT(*) FROM Annotations WHERE embedding IS NOT NULL" - ).fetchone()[0] - return count + with get_db_client() as db: + count = db.conn.execute( + "SELECT COUNT(*) FROM Annotations WHERE embedding IS NOT NULL" + ).fetchone()[0] + return count OnProgress = Callable[[float], None] def embed_annotations(*_, embed_all=False, on_progress: OnProgress = None): - from .embed import embed_annotations_batch - - db = get_db_client() - annotations_count = count_all_annotations() - embedded_annotations_count = count_embedded_annotations() - if embed_all: - annotations_to_embed = annotations_count - else: - annotations_to_embed = annotations_count - embedded_annotations_count - - batch = [] - - def write_batch(batch: list[tuple[int, str]]): - embeddings = embed_annotations_batch([annotation for _, annotation in batch]) - for i, (anno_id, annotation) in enumerate(batch): - embedding = embeddings[i] - db.annotations.set_embedding(anno_id, embedding) - db.conn.commit() - - annotations = db.conn.execute(f""" - SELECT id, summary FROM Annotations - {"WHERE embedding IS NULL" if not embed_all else ""} - """) - - for i, (anno_id, summary) in enumerate(annotations.fetchall()): - if on_progress: - on_progress((i + 1) / annotations_to_embed) - batch.append((anno_id, summary)) - if len(batch) == BATCH_SIZE: - write_batch(batch) - batch = [] - - write_batch(batch) + with get_db_client() as db: + from .embed import embed_annotations_batch + annotations_count = count_all_annotations() + embedded_annotations_count = count_embedded_annotations() + if embed_all: + annotations_to_embed = annotations_count + else: + annotations_to_embed = annotations_count - embedded_annotations_count + + batch = [] + + def write_batch(batch: list[tuple[int, str]]): + embeddings = embed_annotations_batch([annotation for _, annotation in batch]) + for i, (anno_id, annotation) in enumerate(batch): + embedding = embeddings[i] + db.annotations.set_embedding(anno_id, embedding) + db.conn.commit() + + annotations = db.conn.execute(f""" + SELECT id, summary FROM Annotations + {"WHERE embedding IS NULL" if not embed_all else ""} + """) + + for i, (anno_id, summary) in enumerate(annotations.fetchall()): + if on_progress: + on_progress((i + 1) / annotations_to_embed) + batch.append((anno_id, summary)) + if len(batch) == BATCH_SIZE: + write_batch(batch) + batch = [] + + write_batch(batch) diff --git a/test2text/services/embeddings/cache_distances.py b/test2text/services/embeddings/cache_distances.py index 545a955..4633918 100644 --- a/test2text/services/embeddings/cache_distances.py +++ b/test2text/services/embeddings/cache_distances.py @@ -2,34 +2,33 @@ def refresh_and_get_distances() -> list[float]: - db = get_db_client() - db.annos_to_reqs.recreate_table() - # Link requirements to annotations - annotations = db.conn.execute(""" - SELECT - Annotations.id AS anno_id, - Requirements.id AS req_id, - vec_distance_L2(Annotations.embedding, Requirements.embedding) AS distance - FROM Annotations, Requirements - WHERE Annotations.embedding IS NOT NULL AND Requirements.embedding IS NOT NULL - ORDER BY req_id, distance - """) - # Visualize distances - distances = [] - current_req_id = None - current_req_annos = 0 - for i, (anno_id, req_id, distance) in enumerate(annotations.fetchall()): - distances.append(distance) - if req_id != current_req_id: - current_req_id = req_id - current_req_annos = 0 - if current_req_annos < 5 or distance < 0.7: - db.annos_to_reqs.insert( - annotation_id=anno_id, requirement_id=req_id, cached_distance=distance - ) - current_req_annos += 1 - db.conn.commit() - return distances + with get_db_client() as db: + db.annos_to_reqs.recreate_table() + # Link requirements to annotations + annotations = db.conn.execute(""" + SELECT + Annotations.id AS anno_id, + Requirements.id AS req_id, + vec_distance_L2(Annotations.embedding, Requirements.embedding) AS distance + FROM Annotations, Requirements + WHERE Annotations.embedding IS NOT NULL AND Requirements.embedding IS NOT NULL + ORDER BY req_id, distance + """) + # Visualize distances + distances = [] + current_req_id = None + current_req_annos = 0 + for i, (anno_id, req_id, distance) in enumerate(annotations.fetchall()): + distances.append(distance) + if req_id != current_req_id: + current_req_id = req_id + current_req_annos = 0 + if current_req_annos < 5 or distance < 0.7: + db.annos_to_reqs.insert( + annotation_id=anno_id, requirement_id=req_id, cached_distance=distance + ) + current_req_annos += 1 + return distances if __name__ == "__main__": diff --git a/test2text/services/loaders/convert_trace_annos.py b/test2text/services/loaders/convert_trace_annos.py index 3764c8a..e01ee0f 100644 --- a/test2text/services/loaders/convert_trace_annos.py +++ b/test2text/services/loaders/convert_trace_annos.py @@ -26,58 +26,56 @@ def write_table_row(*args, **kwargs): def trace_test_cases_to_annos(trace_files: list): - db = get_db_client() + with get_db_client() as db: - st.info( - "Reading trace files and inserting test case + annotations pairs into database..." - ) - write_table_row( - "File name", - "Extracted pairs test cases + annotations", - "Inserted to data base", - "Ignored (dublicates or wrong id)", - ) - for i, file in enumerate(trace_files): - stringio = io.StringIO(file.getvalue().decode("utf-8")) - reader = csv.reader(stringio) - current_tc = EMPTY - concat_summary = EMPTY - test_script = EMPTY - global_columns = next(reader) - insertions = list() - for row in reader: - if row[0] == "TestCaseStart": - current_tc = row[1] - test_script = EMPTY - concat_summary = EMPTY - elif row[0] == "Summary": - continue - elif row[0] == "TestCaseEnd": - if not is_empty(current_tc) and not is_empty(concat_summary): - case_id = db.test_cases.get_or_insert( - test_script=test_script, test_case=current_tc - ) - annotation_id = db.annotations.get_or_insert(summary=concat_summary) - insertions.append( - db.cases_to_annos.insert( - case_id=case_id, annotation_id=annotation_id - ) - ) - else: - if not is_empty(row[global_columns.index("TestCase")]): - if current_tc != row[global_columns.index("TestCase")]: - current_tc = row[global_columns.index("TestCase")] - if is_empty(test_script) and not is_empty( - row[global_columns.index("TestScript")] - ): - test_script = row[global_columns.index("TestScript")] - concat_summary += row[0] + st.info( + "Reading trace files and inserting test case + annotations pairs into database..." + ) write_table_row( - file.name, - len(insertions), - sum(insertions), - len(insertions) - sum(insertions), + "File name", + "Extracted pairs test cases + annotations", + "Inserted to data base", + "Ignored (dublicates or wrong id)", ) + for i, file in enumerate(trace_files): + stringio = io.StringIO(file.getvalue().decode("utf-8")) + reader = csv.reader(stringio) + current_tc = EMPTY + concat_summary = EMPTY + test_script = EMPTY + global_columns = next(reader) + insertions = list() + for row in reader: + if row[0] == "TestCaseStart": + current_tc = row[1] + test_script = EMPTY + concat_summary = EMPTY + elif row[0] == "Summary": + continue + elif row[0] == "TestCaseEnd": + if not is_empty(current_tc) and not is_empty(concat_summary): + case_id = db.test_cases.get_or_insert( + test_script=test_script, test_case=current_tc + ) + annotation_id = db.annotations.get_or_insert(summary=concat_summary) + insertions.append( + db.cases_to_annos.insert( + case_id=case_id, annotation_id=annotation_id + ) + ) + else: + if not is_empty(row[global_columns.index("TestCase")]): + if current_tc != row[global_columns.index("TestCase")]: + current_tc = row[global_columns.index("TestCase")] + if is_empty(test_script) and not is_empty( + row[global_columns.index("TestScript")] + ): + test_script = row[global_columns.index("TestScript")] + concat_summary += row[0] + write_table_row( + file.name, + len(insertions), + sum(insertions), + len(insertions) - sum(insertions), + ) - db.conn.commit() - db.conn.close() diff --git a/test2text/services/loaders/index_annotations.py b/test2text/services/loaders/index_annotations.py index e9de241..52236b9 100644 --- a/test2text/services/loaders/index_annotations.py +++ b/test2text/services/loaders/index_annotations.py @@ -12,27 +12,26 @@ def index_annotations_from_files(files: list, *_, on_file_start: OnFileStart = None): - db = get_db_client() - - for i, file in enumerate(files): - file_counter = None - if on_file_start: - file_counter = on_file_start(f"{i + 1}/{len(files)}", file.name) - stringio = io.StringIO(file.getvalue().decode("utf-8")) - reader = csv.reader(stringio) - insertions = [] - - for i, row in enumerate(reader): - if file_counter: - file_counter.write(i) - [summary, _, test_script, test_case, *_] = row - anno_id = db.annotations.get_or_insert(summary=summary) - tc_id = db.test_cases.get_or_insert( - test_script=test_script, test_case=test_case - ) - insertions.append( - db.cases_to_annos.insert(case_id=tc_id, annotation_id=anno_id) - ) - - db.conn.commit() - return None + with get_db_client() as db: + + for i, file in enumerate(files): + file_counter = None + if on_file_start: + file_counter = on_file_start(f"{i + 1}/{len(files)}", file.name) + stringio = io.StringIO(file.getvalue().decode("utf-8")) + reader = csv.reader(stringio) + insertions = [] + + for i, row in enumerate(reader): + if file_counter: + file_counter.write(i) + [summary, _, test_script, test_case, *_] = row + anno_id = db.annotations.get_or_insert(summary=summary) + tc_id = db.test_cases.get_or_insert( + test_script=test_script, test_case=test_case + ) + insertions.append( + db.cases_to_annos.insert(case_id=tc_id, annotation_id=anno_id) + ) + + return None diff --git a/test2text/services/loaders/index_requirements.py b/test2text/services/loaders/index_requirements.py index 713eb75..bfe71eb 100644 --- a/test2text/services/loaders/index_requirements.py +++ b/test2text/services/loaders/index_requirements.py @@ -19,49 +19,49 @@ def index_requirements_from_files( on_start_file: OnStartFile = None, on_requirement_written: OnRequirementWritten = None, ) -> tuple[int]: - db = get_db_client() - for i, file in enumerate(files): - if on_start_file: - on_start_file(i + 1, file.name) - stringio = io.StringIO(file.getvalue().decode("utf-8")) - reader = csv.reader(stringio) + with get_db_client() as db: + for i, file in enumerate(files): + if on_start_file: + on_start_file(i + 1, file.name) + stringio = io.StringIO(file.getvalue().decode("utf-8")) + reader = csv.reader(stringio) - try: - for _ in range(3): - next(reader) - except StopIteration: - raise ValueError( - f"The uploaded CSV file {file.name} does not have enough lines. " - "Please ensure it has at least 3 lines of data." - ) + try: + for _ in range(3): + next(reader) + except StopIteration: + raise ValueError( + f"The uploaded CSV file {file.name} does not have enough lines. " + "Please ensure it has at least 3 lines of data." + ) - batch = [] - last_requirement = "" - - def write_batch(): - nonlocal batch - embeddings = embed_requirements_batch( - [requirement for _, requirement in batch] - ) - for i, (external_id, requirement) in enumerate(batch): - embedding = embeddings[i] - db.requirements.insert(requirement, embedding, external_id) - if on_requirement_written: - on_requirement_written(external_id) - db.conn.commit() batch = [] + last_requirement = "" + + def write_batch(): + nonlocal batch + embeddings = embed_requirements_batch( + [requirement for _, requirement in batch] + ) + for i, (external_id, requirement) in enumerate(batch): + embedding = embeddings[i] + db.requirements.insert(requirement, embedding, external_id) + if on_requirement_written: + on_requirement_written(external_id) + db.conn.commit() + batch = [] - for row in reader: - [external_id, requirement, *_] = row - if requirement.startswith("..."): - requirement = last_requirement + requirement[3:] - last_requirement = requirement - batch.append((external_id, requirement)) - if len(batch) == BATCH_SIZE: - write_batch() - write_batch() - # Check requirements - cursor = db.conn.execute(""" - SELECT COUNT(*) FROM Requirements - """) - return cursor.fetchone()[0] + for row in reader: + [external_id, requirement, *_] = row + if requirement.startswith("..."): + requirement = last_requirement + requirement[3:] + last_requirement = requirement + batch.append((external_id, requirement)) + if len(batch) == BATCH_SIZE: + write_batch() + write_batch() + # Check requirements + cursor = db.conn.execute(""" + SELECT COUNT(*) FROM Requirements + """) + return cursor.fetchone()[0] diff --git a/test2text/services/visualisation/visualize_vectors.py b/test2text/services/visualisation/visualize_vectors.py index 5865a53..6480dd8 100644 --- a/test2text/services/visualisation/visualize_vectors.py +++ b/test2text/services/visualisation/visualize_vectors.py @@ -165,61 +165,60 @@ def plot_2_sets_in_one_3d( def visualize_vectors(): st.header("Visualizing vectors") - db = get_db_client() - Req_tab, Anno_tab, Req_Anno_tab = st.tabs( - ["Requirements", "Annotations", "Requirements vs Annotations"] - ) - with Req_tab: - st.subheader("Requirements vectors") - progress_bar = st.progress(0) - - requirement_vectors = extract_requirement_vectors(db) - progress_bar.progress(20, "Extracted") - reqs_vectors_2d = minifold_vectors_2d(requirement_vectors) - progress_bar.progress(40, "Minifolded for 2D") - plot_vectors_2d(reqs_vectors_2d, "Requirements") - progress_bar.progress(60, "Plotted in 2D") - reqs_vectors_3d = minifold_vectors_3d(requirement_vectors) - progress_bar.progress(80, "Minifolded for 3D") - plot_vectors_3d(reqs_vectors_3d, "Requirements") - progress_bar.progress(100, "Plotted in 3D") - - with Anno_tab: - st.subheader("Annotations vectors") - progress_bar = st.progress(0) - - annotation_vectors = extract_annotation_vectors(db) - progress_bar.progress(20, "Extracted") - anno_vectors_2d = minifold_vectors_2d(annotation_vectors) - progress_bar.progress(40, "Minifolded for 2D") - plot_vectors_2d(anno_vectors_2d, "Annotations") - progress_bar.progress(60, "Plotted in 2D") - anno_vectors_3d = minifold_vectors_3d(annotation_vectors) - progress_bar.progress(80, "Minifolded for 3D") - plot_vectors_3d(anno_vectors_3d, "Annotations") - progress_bar.progress(100, "Plotted in 3D") - - with Req_Anno_tab: - # Show how these 2 groups of vectors are different - st.subheader("Requirements vs Annotations") - progress_bar = st.progress(40, "Extracted") - plot_2_sets_in_one_2d( - reqs_vectors_2d, anno_vectors_2d, "Requerements", "Annotations" - ) - progress_bar.progress(60, "Plotted in 2D") - - plot_2_sets_in_one_3d( - reqs_vectors_3d, anno_vectors_3d, "Requerements", "Annotations" - ) - progress_bar.progress(80, "Plotted in 3D") - - anno_vectors_2d = minifold_vectors_2d(extract_closest_annotation_vectors(db)) - - plot_2_sets_in_one_2d( - reqs_vectors_2d, anno_vectors_2d, "Requerements", "Annotations" + with get_db_client() as db: + Req_tab, Anno_tab, Req_Anno_tab = st.tabs( + ["Requirements", "Annotations", "Requirements vs Annotations"] ) - progress_bar.progress(100, "Minifolded and Plotted in 2D") - db.conn.close() + with Req_tab: + st.subheader("Requirements vectors") + progress_bar = st.progress(0) + + requirement_vectors = extract_requirement_vectors(db) + progress_bar.progress(20, "Extracted") + reqs_vectors_2d = minifold_vectors_2d(requirement_vectors) + progress_bar.progress(40, "Minifolded for 2D") + plot_vectors_2d(reqs_vectors_2d, "Requirements") + progress_bar.progress(60, "Plotted in 2D") + reqs_vectors_3d = minifold_vectors_3d(requirement_vectors) + progress_bar.progress(80, "Minifolded for 3D") + plot_vectors_3d(reqs_vectors_3d, "Requirements") + progress_bar.progress(100, "Plotted in 3D") + + with Anno_tab: + st.subheader("Annotations vectors") + progress_bar = st.progress(0) + + annotation_vectors = extract_annotation_vectors(db) + progress_bar.progress(20, "Extracted") + anno_vectors_2d = minifold_vectors_2d(annotation_vectors) + progress_bar.progress(40, "Minifolded for 2D") + plot_vectors_2d(anno_vectors_2d, "Annotations") + progress_bar.progress(60, "Plotted in 2D") + anno_vectors_3d = minifold_vectors_3d(annotation_vectors) + progress_bar.progress(80, "Minifolded for 3D") + plot_vectors_3d(anno_vectors_3d, "Annotations") + progress_bar.progress(100, "Plotted in 3D") + + with Req_Anno_tab: + # Show how these 2 groups of vectors are different + st.subheader("Requirements vs Annotations") + progress_bar = st.progress(40, "Extracted") + plot_2_sets_in_one_2d( + reqs_vectors_2d, anno_vectors_2d, "Requerements", "Annotations" + ) + progress_bar.progress(60, "Plotted in 2D") + + plot_2_sets_in_one_3d( + reqs_vectors_3d, anno_vectors_3d, "Requerements", "Annotations" + ) + progress_bar.progress(80, "Plotted in 3D") + + anno_vectors_2d = minifold_vectors_2d(extract_closest_annotation_vectors(db)) + + plot_2_sets_in_one_2d( + reqs_vectors_2d, anno_vectors_2d, "Requerements", "Annotations" + ) + progress_bar.progress(100, "Minifolded and Plotted in 2D") if __name__ == "__main__": From a307b363f844e654dc81b3eb4457e8dc28d89b8a Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Wed, 20 Aug 2025 12:56:53 +0100 Subject: [PATCH 22/44] fixed documentation formatting --- test2text/pages/documentation.py | 54 ++++++++++++++++---------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/test2text/pages/documentation.py b/test2text/pages/documentation.py index a7c6ba6..4bb248c 100644 --- a/test2text/pages/documentation.py +++ b/test2text/pages/documentation.py @@ -7,72 +7,72 @@ def show_documentation(): ## About the Application - **Test2Text** is a tool for computing requirement's coverage by tests and generating relevant reports. - The application provides a convenient interface for analysis the relationships between test cases and requirements. + **Test2Text** is a tool for computing requirement's coverage by tests and generating relevant reports. + The application provides a convenient interface for analysis the relationships between test cases and requirements. """) st.divider() st.markdown(""" ## HOW TO USE ### Upload data - Click :gray-badge[:material/database_upload: Annotations] or :gray-badge[:material/database_upload: Requirements] to upload annotations and requirements from CSV files to the app's database. + Click :gray-badge[:material/database_upload: Annotations] or :gray-badge[:material/database_upload: Requirements] to upload annotations and requirements from CSV files to the app's database. ### Renew data - Click :gray-badge[:material/cached: Controls] to transform missed and new texts into numeral vectors (embeddings). - Update distances by embeddings for intelligent matching of requirements and annotations. + Click :gray-badge[:material/cached: Controls] to transform missed and new texts into numeral vectors (embeddings). + Update distances by embeddings for intelligent matching of requirements and annotations. ### Generate reports - Click :gray-badge[:material/publish: Requirement's Report] or :gray-badge[:material/publish: Test cases Report] to make a report. - Use filters to select desired information. Analyze selected requirements or test cases by showed and plotted distances + Click :gray-badge[:material/publish: Requirement's Report] or :gray-badge[:material/publish: Test cases Report] to make a report. + Use filters to select desired information. Analyze selected requirements or test cases by showed and plotted distances ### Visualize saved data - Click :gray-badge[:material/dataset: Visualize vectors] to plot distances between vector representations of all requirements and annotations. + Click :gray-badge[:material/dataset: Visualize vectors] to plot distances between vector representations of all requirements and annotations. """) st.divider() st.markdown(""" ### Methodology - The application use a pre-trained transformer model from the [sentence-transformers library](https://huggingface.co/sentence-transformers), specifically [nomic-ai/nomic-embed-text-v1](https://huggingface.co/nomic-ai/nomic-embed-text-v1), a model trained to produce high-quality vector embeddings for text. - The model returns, for each input text, a high-dimensional NumPy array (vector) of floating point numbers (the embedding). - This arrays give us a possibility to calculate Euclidian distances between test cases annotations and requirements to view how similar or dissimilar the two texts. + The application use a pre-trained transformer model from the [sentence-transformers library](https://huggingface.co/sentence-transformers), specifically [nomic-ai/nomic-embed-text-v1](https://huggingface.co/nomic-ai/nomic-embed-text-v1), a model trained to produce high-quality vector embeddings for text. + The model returns, for each input text, a high-dimensional NumPy array (vector) of floating point numbers (the embedding). + This arrays give us a possibility to calculate Euclidian distances between test cases annotations and requirements to view how similar or dissimilar the two texts. """) st.markdown(""" #### Euclidean (L2) Distance Formula - The Euclidean (L2) distance is a measure of the straight-line distance between two points (or vectors) in a multidimensional space. - It is widely used to compute the similarity or dissimilarity between two vector representations, such as text embeddings. + The Euclidean (L2) distance is a measure of the straight-line distance between two points (or vectors) in a multidimensional space. + It is widely used to compute the similarity or dissimilarity between two vector representations, such as text embeddings. """) st.markdown(""" - Suppose we have two vectors: - """) - st.latex(r""" - [ \mathbf{a} = [a_1, a_2, ..., a_n] ], + Suppose we have two vectors: """) st.latex(r""" - [ \mathbf{b} = [b_1, b_2, ..., b_n] ] - """) + [ \mathbf{a} = [a_1, a_2, ..., a_n] ], + """) + st.latex(r""" + [ \mathbf{b} = [b_1, b_2, ..., b_n] ] + """) st.markdown(""" - The L2 distance between **a** and **b** is calculated as: - """) + The L2 distance between **a** and **b** is calculated as: + """) st.latex(r""" - [ L_2(\mathbf{a}, \mathbf{b}) = \sqrt{(a_1 - b_1)^2 + (a_2 - b_2)^2 + \cdots + (a_n - b_n)^2} ] + [ L_2(\mathbf{a}, \mathbf{b}) = \sqrt{(a_1 - b_1)^2 + (a_2 - b_2)^2 + \cdots + (a_n - b_n)^2} ] """) st.markdown(""" - Or, more compactly: + Or, more compactly: """) st.latex(r""" - [ L_2(\mathbf{a}, \mathbf{b}) = \sqrt{\sum_{i=1}^n (a_i - b_i)^2} ] + [ L_2(\mathbf{a}, \mathbf{b}) = \sqrt{\sum_{i=1}^n (a_i - b_i)^2} ] """) st.markdown(""" - - A **smaller L2 distance** means the vectors are more similar. - - A **larger L2 distance** indicates greater dissimilarity. + - A **smaller L2 distance** means the vectors are more similar. + - A **larger L2 distance** indicates greater dissimilarity. """) st.markdown(""" - This formula is commonly used for comparing the semantic similarity of embeddings generated from text using models like Sentence Transformers. + This formula is commonly used for comparing the semantic similarity of embeddings generated from text using models like Sentence Transformers. """) From efd6bdb1e683d2809b9981dc1707c44e8e627ec2 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Wed, 20 Aug 2025 19:16:09 +0100 Subject: [PATCH 23/44] added functions for counting entries --- test2text/pages/documentation.py | 38 +++++++++--- test2text/services/db/client.py | 61 ++++++++++++++++++- .../annotation_embeddings_controls.py | 15 +---- 3 files changed, 91 insertions(+), 23 deletions(-) diff --git a/test2text/pages/documentation.py b/test2text/pages/documentation.py index 4bb248c..87be4e8 100644 --- a/test2text/pages/documentation.py +++ b/test2text/pages/documentation.py @@ -1,5 +1,7 @@ import streamlit as st +from test2text.services.db import get_db_client + def show_documentation(): st.markdown(""" @@ -9,32 +11,50 @@ def show_documentation(): **Test2Text** is a tool for computing requirement's coverage by tests and generating relevant reports. The application provides a convenient interface for analysis the relationships between test cases and requirements. + """) st.divider() st.markdown(""" ## HOW TO USE ### Upload data - Click :gray-badge[:material/database_upload: Annotations] or :gray-badge[:material/database_upload: Requirements] to upload annotations and requirements from CSV files to the app's database. + Click :gray-badge[:material/database_upload: Annotations] or :gray-badge[:material/database_upload: Requirements] to upload annotations and requirements from CSV files to the app's database. + Then Annotations and Requirements are loaded and Test cases are linked to Annotations go to the next chapter. ### Renew data Click :gray-badge[:material/cached: Controls] to transform missed and new texts into numeral vectors (embeddings). - Update distances by embeddings for intelligent matching of requirements and annotations. + Update distances by embeddings for intelligent matching of Requirements and Annotations. + After distances are refreshed (all Annotations linked with Requirement by distances) go to the next chapter. ### Generate reports Click :gray-badge[:material/publish: Requirement's Report] or :gray-badge[:material/publish: Test cases Report] to make a report. - Use filters to select desired information. Analyze selected requirements or test cases by showed and plotted distances + Use filters and Smart search based on embeddings to select desired information. + Analyze selected requirements or test cases by plotted distances. + List of all requirements/test cases and their annotations are shown here. ### Visualize saved data - Click :gray-badge[:material/dataset: Visualize vectors] to plot distances between vector representations of all requirements and annotations. + Click :gray-badge[:material/dataset: Visualize vectors] to plot distances between vector representations of all requirements and annotations in multidimensional spaces. """) st.divider() + with get_db_client() as db: + st.markdown("""## Database overview""") + table, row_count = st.columns(2) + with table: + st.write("Table name") + with row_count: + st.write("Number of entries") + for table_name, count in db.get_db_full_info.items(): + with table: + st.write(table_name) + with row_count: + st.write(count) + st.divider() st.markdown(""" ### Methodology The application use a pre-trained transformer model from the [sentence-transformers library](https://huggingface.co/sentence-transformers), specifically [nomic-ai/nomic-embed-text-v1](https://huggingface.co/nomic-ai/nomic-embed-text-v1), a model trained to produce high-quality vector embeddings for text. The model returns, for each input text, a high-dimensional NumPy array (vector) of floating point numbers (the embedding). - This arrays give us a possibility to calculate Euclidian distances between test cases annotations and requirements to view how similar or dissimilar the two texts. + This arrays give a possibility to calculate Euclidian distances between test cases annotations and requirements to show how similar or dissimilar the two texts. """) st.markdown(""" @@ -46,10 +66,10 @@ def show_documentation(): Suppose we have two vectors: """) st.latex(r""" - [ \mathbf{a} = [a_1, a_2, ..., a_n] ], + \mathbf{a} = [a_1, a_2, ..., a_n] , """) st.latex(r""" - [ \mathbf{b} = [b_1, b_2, ..., b_n] ] + \mathbf{b} = [b_1, b_2, ..., b_n] """) st.markdown(""" @@ -57,7 +77,7 @@ def show_documentation(): """) st.latex(r""" - [ L_2(\mathbf{a}, \mathbf{b}) = \sqrt{(a_1 - b_1)^2 + (a_2 - b_2)^2 + \cdots + (a_n - b_n)^2} ] + L_2(\mathbf{a}, \mathbf{b}) = \sqrt{(a_1 - b_1)^2 + (a_2 - b_2)^2 + \cdots + (a_n - b_n)^2} """) st.markdown(""" @@ -65,7 +85,7 @@ def show_documentation(): """) st.latex(r""" - [ L_2(\mathbf{a}, \mathbf{b}) = \sqrt{\sum_{i=1}^n (a_i - b_i)^2} ] + L_2(\mathbf{a}, \mathbf{b}) = \sqrt{\sum_{i=1}^n (a_i - b_i)^2} """) st.markdown(""" diff --git a/test2text/services/db/client.py b/test2text/services/db/client.py index d55bdc7..283731b 100644 --- a/test2text/services/db/client.py +++ b/test2text/services/db/client.py @@ -1,4 +1,6 @@ import sqlite3 +from typing import Union + import sqlite_vec import logging @@ -71,4 +73,61 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.close() def __enter__(self): - return self \ No newline at end of file + return self + + def get_table_names(self): + """ + Returns a list of all user-defined tables in the database. + + :return: List[str] - table names + """ + cursor = self.conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';" + ) + tables = [row[0] for row in cursor.fetchall()] + cursor.close() + return tables + + + @property + def get_db_full_info(self): + """ + Returns table information: + - row_count: number of records in the table + - columns: list of dicts as in get_extended_table_info (name, type, non-NULL count, typeof distribution) + + :return: dict + """ + db_tables_info = {} + table_names = self.get_table_names() + for table_name in table_names: + row_count = self.count_all_entries_in_table(table_name) + db_tables_info.update({ + table_name: row_count, + }) + return db_tables_info + + def count_all_entries_in_table(self, table: str) -> int: + count = self.conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0] + return count + + def count_notnull_entries_in_table(self,column: str, table: str) -> Union[int, None]: + if self.has_column(column, table): + count = self.conn.execute( + f"SELECT COUNT(*) FROM {table} WHERE {column} IS NOT NULL" + ).fetchone()[0] + return count + return None + + def has_column(self, column_name: str, table_name: str) -> bool: + """ + Returns True if the table has a column, otherwise False. + + :param column_name: name of the column + :param table_name: name of the table + :return: bool + """ + cursor = self.conn.execute(f'PRAGMA table_info("{table_name}")') + columns = [row[1] for row in cursor.fetchall()] # row[1] is the column name + cursor.close() + return column_name in columns \ No newline at end of file diff --git a/test2text/services/embeddings/annotation_embeddings_controls.py b/test2text/services/embeddings/annotation_embeddings_controls.py index 1c74134..60c3431 100644 --- a/test2text/services/embeddings/annotation_embeddings_controls.py +++ b/test2text/services/embeddings/annotation_embeddings_controls.py @@ -5,18 +5,7 @@ BATCH_SIZE = 30 -def count_all_annotations() -> int: - with get_db_client() as db: - count = db.conn.execute("SELECT COUNT(*) FROM Annotations").fetchone()[0] - return count - -def count_embedded_annotations() -> int: - with get_db_client() as db: - count = db.conn.execute( - "SELECT COUNT(*) FROM Annotations WHERE embedding IS NOT NULL" - ).fetchone()[0] - return count OnProgress = Callable[[float], None] @@ -25,8 +14,8 @@ def count_embedded_annotations() -> int: def embed_annotations(*_, embed_all=False, on_progress: OnProgress = None): with get_db_client() as db: from .embed import embed_annotations_batch - annotations_count = count_all_annotations() - embedded_annotations_count = count_embedded_annotations() + annotations_count = db.count_all_entries_in_table("Annotations") + embedded_annotations_count = db.count_embedded_entries_in_table("Annotations") if embed_all: annotations_to_embed = annotations_count else: From f4edf025f5f278473145bde9f5fa36f5e9732f50 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Wed, 20 Aug 2025 19:16:29 +0100 Subject: [PATCH 24/44] updated pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 73c607c..6bf19e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "test2text" version = "0.1.0" description = "" authors = [ - {name = "Nikolai Dorofeev - d0rich",email = "dorich2000@gmail.com"} + {name = "Nikolai Dorofeev - d0rich", email = "dorich2000@gmail.com", name = "Anna Yamkovaya - anngoroshi", email = "avyamkovaya@gmail.com"} ] readme = "README.md" requires-python = ">=3.9" From 413e04c35df946839cf181127846f70d6048ee7d Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Thu, 21 Aug 2025 10:33:49 +0100 Subject: [PATCH 25/44] fix some small mistakes --- test2text/pages/reports/report_by_req.py | 2 +- test2text/pages/reports/report_by_tc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index c89e4b5..8a65d4d 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -142,7 +142,7 @@ def write_annotations(current_annotations: set[tuple]): st.info("Limit of selected test cases") if filter_radius: - where_clauses.append("distance >= ?") + where_clauses.append("distance <= ?") params.append(f"{filter_radius}") if filter_limit: diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index f5eb7ac..7c5c20c 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -126,7 +126,7 @@ def write_requirements(current_requirements: set[tuple]): radius, limit = st.columns(2) with radius: filter_radius = st.number_input( - "Insert a radius", value=0.00, step=0.01, key="filter_radius" + "Insert a radius", value=1.00, step=0.01, key="filter_radius" ) st.info("Max distance to annotation") with limit: From e9d00b2dabe04f7506ccd48f58e554f341357557 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Thu, 21 Aug 2025 11:53:10 +0100 Subject: [PATCH 26/44] fixed a text wrapping in radio boxes --- test2text/pages/reports/report_by_req.py | 4 ++-- test2text/pages/reports/report_by_tc.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index 8a65d4d..8003a11 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -229,9 +229,9 @@ def write_annotations(current_annotations: set[tuple]): """ """, diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 7c5c20c..3e15d5f 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -249,9 +249,9 @@ def write_requirements(current_requirements: set[tuple]): """ """, From a1ebd2eccc05988ac2fbbea6ea219333a7b2f0b5 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Thu, 21 Aug 2025 13:32:47 +0100 Subject: [PATCH 27/44] fixed pyproject.toml --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6bf19e0..4d82387 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,8 @@ name = "test2text" version = "0.1.0" description = "" authors = [ - {name = "Nikolai Dorofeev - d0rich", email = "dorich2000@gmail.com", name = "Anna Yamkovaya - anngoroshi", email = "avyamkovaya@gmail.com"} + {name = "Nikolai Dorofeev - d0rich", email = "dorich2000@gmail.com"}, + {name = "Anna Yamkovaya - anngoroshi", email = "avyamkovaya@gmail.com"} ] readme = "README.md" requires-python = ">=3.9" From 8f4cb3c10019dbb5f132f57a1f436307e55c0d34 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Thu, 21 Aug 2025 13:34:09 +0100 Subject: [PATCH 28/44] fixed formatting --- convert_trace_annos.py | 9 ++++-- test2text/pages/reports/report_by_req.py | 27 ++++++++++++----- test2text/pages/reports/report_by_tc.py | 30 +++++++++++++++---- test2text/services/db/client.py | 15 ++++++---- .../annotation_embeddings_controls.py | 8 ++--- .../services/embeddings/cache_distances.py | 4 ++- .../services/loaders/convert_trace_annos.py | 6 ++-- .../services/loaders/index_annotations.py | 1 - .../visualisation/visualize_vectors.py | 4 ++- 9 files changed, 72 insertions(+), 32 deletions(-) diff --git a/convert_trace_annos.py b/convert_trace_annos.py index 0a89837..2ba729d 100644 --- a/convert_trace_annos.py +++ b/convert_trace_annos.py @@ -15,10 +15,11 @@ def is_empty(value): def trace_test_cases_to_annos(trace_file_path: Path): with get_db_client() as db: - insertions = list() logger.info("Reading trace file and inserting annotations into table...") - with open(trace_file_path, mode="r", newline="", encoding="utf-8") as trace_file: + with open( + trace_file_path, mode="r", newline="", encoding="utf-8" + ) as trace_file: reader = csv.reader(trace_file) current_tc = EMPTY concat_summary = EMPTY @@ -37,7 +38,9 @@ def trace_test_cases_to_annos(trace_file_path: Path): case_id = db.test_cases.get_or_insert( test_script=test_script, test_case=current_tc ) - annotation_id = db.annotations.get_or_insert(summary=concat_summary) + annotation_id = db.annotations.get_or_insert( + summary=concat_summary + ) insertions.append( db.cases_to_annos.insert( case_id=case_id, annotation_id=annotation_id diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index 8003a11..6f94c63 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -16,6 +16,7 @@ def make_a_report(): with get_db_client() as db: from test2text.services.embeddings.embed import embed_requirement + st.header("Test2Text Report") def write_annotations(current_annotations: set[tuple]): @@ -127,7 +128,10 @@ def write_annotations(current_annotations: set[tuple]): radius, limit = st.columns(2) with radius: filter_radius = st.number_input( - "Insert a radius", value=1.00, step=0.01, key="filter_radius" + "Insert a radius", + value=1.00, + step=0.01, + key="filter_radius", ) st.info("Max distance to annotation") with limit: @@ -188,9 +192,12 @@ def write_annotations(current_annotations: set[tuple]): ) return None - for (req_id, req_external_id, req_summary, req_embedding), group in groupby( - rows, lambda x: x[0:4] - ): + for ( + req_id, + req_external_id, + req_summary, + req_embedding, + ), group in groupby(rows, lambda x: x[0:4]): st.divider() with st.container(): st.subheader(f" Inspect Requirement {req_external_id}") @@ -209,7 +216,9 @@ def write_annotations(current_annotations: set[tuple]): test_script, test_case, ) in group: - current_annotation = current_test_cases.get(test_case, set()) + current_annotation = current_test_cases.get( + test_case, set() + ) current_test_cases.update({test_case: current_annotation}) current_test_cases[test_case].add( (anno_id, anno_summary, anno_embedding, distance) @@ -242,7 +251,9 @@ def write_annotations(current_annotations: set[tuple]): with anno: with st.container(border=True): st.write("Annotations") - st.info("List of Annotations for chosen Test case") + st.info( + "List of Annotations for chosen Test case" + ) write_annotations( current_annotations=current_test_cases[ st.session_state["radio_choice"] @@ -266,7 +277,9 @@ def write_annotations(current_annotations: set[tuple]): annotation_vectors = np.array(anno_embeddings) if select == "2D": plot_2_sets_in_one_2d( - minifold_vectors_2d(requirement_vectors), + minifold_vectors_2d( + requirement_vectors + ), minifold_vectors_2d(annotation_vectors), "Requirement", "Annotations", diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 3e15d5f..8b9df66 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -16,6 +16,7 @@ def make_a_tc_report(): with get_db_client() as db: from test2text.services.embeddings.embed import embed_requirement + st.header("Test2Text Report") def write_requirements(current_requirements: set[tuple]): @@ -27,7 +28,13 @@ def write_requirements(current_requirements: set[tuple]): with dist: st.write("Distance") - for req_id, req_external_id, req_summary, _, distance in current_requirements: + for ( + req_id, + req_external_id, + req_summary, + _, + distance, + ) in current_requirements: req, summary, dist = st.columns(3) with req: st.write(f"#{req_id} Requirement {req_external_id}") @@ -126,7 +133,10 @@ def write_requirements(current_requirements: set[tuple]): radius, limit = st.columns(2) with radius: filter_radius = st.number_input( - "Insert a radius", value=1.00, step=0.01, key="filter_radius" + "Insert a radius", + value=1.00, + step=0.01, + key="filter_radius", ) st.info("Max distance to annotation") with limit: @@ -212,7 +222,9 @@ def write_requirements(current_requirements: set[tuple]): current_reqs = current_annotations.get( current_annotation, set() ) - current_annotations.update({current_annotation: current_reqs}) + current_annotations.update( + {current_annotation: current_reqs} + ) current_annotations[current_annotation].add( ( req_id, @@ -262,9 +274,13 @@ def write_requirements(current_requirements: set[tuple]): with anno: with st.container(border=True): st.write("Requirements") - st.info("Found Requirements for chosen annotation") + st.info( + "Found Requirements for chosen annotation" + ) write_requirements( - current_annotations[reqs_by_anno[radio_choice]] + current_annotations[ + reqs_by_anno[radio_choice] + ] ) with viz: with st.container(border=True): @@ -285,7 +301,9 @@ def write_requirements(current_requirements: set[tuple]): if select == "2D": plot_2_sets_in_one_2d( minifold_vectors_2d(annotation_vectors), - minifold_vectors_2d(requirement_vectors), + minifold_vectors_2d( + requirement_vectors + ), "Annotation", "Requirements", first_color="red", diff --git a/test2text/services/db/client.py b/test2text/services/db/client.py index 283731b..6051ce2 100644 --- a/test2text/services/db/client.py +++ b/test2text/services/db/client.py @@ -88,7 +88,6 @@ def get_table_names(self): cursor.close() return tables - @property def get_db_full_info(self): """ @@ -102,16 +101,20 @@ def get_db_full_info(self): table_names = self.get_table_names() for table_name in table_names: row_count = self.count_all_entries_in_table(table_name) - db_tables_info.update({ - table_name: row_count, - }) + db_tables_info.update( + { + table_name: row_count, + } + ) return db_tables_info def count_all_entries_in_table(self, table: str) -> int: count = self.conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0] return count - def count_notnull_entries_in_table(self,column: str, table: str) -> Union[int, None]: + def count_notnull_entries_in_table( + self, column: str, table: str + ) -> Union[int, None]: if self.has_column(column, table): count = self.conn.execute( f"SELECT COUNT(*) FROM {table} WHERE {column} IS NOT NULL" @@ -130,4 +133,4 @@ def has_column(self, column_name: str, table_name: str) -> bool: cursor = self.conn.execute(f'PRAGMA table_info("{table_name}")') columns = [row[1] for row in cursor.fetchall()] # row[1] is the column name cursor.close() - return column_name in columns \ No newline at end of file + return column_name in columns diff --git a/test2text/services/embeddings/annotation_embeddings_controls.py b/test2text/services/embeddings/annotation_embeddings_controls.py index 60c3431..5f228af 100644 --- a/test2text/services/embeddings/annotation_embeddings_controls.py +++ b/test2text/services/embeddings/annotation_embeddings_controls.py @@ -5,15 +5,13 @@ BATCH_SIZE = 30 - - - OnProgress = Callable[[float], None] def embed_annotations(*_, embed_all=False, on_progress: OnProgress = None): with get_db_client() as db: from .embed import embed_annotations_batch + annotations_count = db.count_all_entries_in_table("Annotations") embedded_annotations_count = db.count_embedded_entries_in_table("Annotations") if embed_all: @@ -24,7 +22,9 @@ def embed_annotations(*_, embed_all=False, on_progress: OnProgress = None): batch = [] def write_batch(batch: list[tuple[int, str]]): - embeddings = embed_annotations_batch([annotation for _, annotation in batch]) + embeddings = embed_annotations_batch( + [annotation for _, annotation in batch] + ) for i, (anno_id, annotation) in enumerate(batch): embedding = embeddings[i] db.annotations.set_embedding(anno_id, embedding) diff --git a/test2text/services/embeddings/cache_distances.py b/test2text/services/embeddings/cache_distances.py index 4633918..ffa373a 100644 --- a/test2text/services/embeddings/cache_distances.py +++ b/test2text/services/embeddings/cache_distances.py @@ -25,7 +25,9 @@ def refresh_and_get_distances() -> list[float]: current_req_annos = 0 if current_req_annos < 5 or distance < 0.7: db.annos_to_reqs.insert( - annotation_id=anno_id, requirement_id=req_id, cached_distance=distance + annotation_id=anno_id, + requirement_id=req_id, + cached_distance=distance, ) current_req_annos += 1 return distances diff --git a/test2text/services/loaders/convert_trace_annos.py b/test2text/services/loaders/convert_trace_annos.py index e01ee0f..aad1ce9 100644 --- a/test2text/services/loaders/convert_trace_annos.py +++ b/test2text/services/loaders/convert_trace_annos.py @@ -27,7 +27,6 @@ def write_table_row(*args, **kwargs): def trace_test_cases_to_annos(trace_files: list): with get_db_client() as db: - st.info( "Reading trace files and inserting test case + annotations pairs into database..." ) @@ -57,7 +56,9 @@ def trace_test_cases_to_annos(trace_files: list): case_id = db.test_cases.get_or_insert( test_script=test_script, test_case=current_tc ) - annotation_id = db.annotations.get_or_insert(summary=concat_summary) + annotation_id = db.annotations.get_or_insert( + summary=concat_summary + ) insertions.append( db.cases_to_annos.insert( case_id=case_id, annotation_id=annotation_id @@ -78,4 +79,3 @@ def trace_test_cases_to_annos(trace_files: list): sum(insertions), len(insertions) - sum(insertions), ) - diff --git a/test2text/services/loaders/index_annotations.py b/test2text/services/loaders/index_annotations.py index 52236b9..781229b 100644 --- a/test2text/services/loaders/index_annotations.py +++ b/test2text/services/loaders/index_annotations.py @@ -13,7 +13,6 @@ def index_annotations_from_files(files: list, *_, on_file_start: OnFileStart = None): with get_db_client() as db: - for i, file in enumerate(files): file_counter = None if on_file_start: diff --git a/test2text/services/visualisation/visualize_vectors.py b/test2text/services/visualisation/visualize_vectors.py index 6480dd8..f6d5f2c 100644 --- a/test2text/services/visualisation/visualize_vectors.py +++ b/test2text/services/visualisation/visualize_vectors.py @@ -213,7 +213,9 @@ def visualize_vectors(): ) progress_bar.progress(80, "Plotted in 3D") - anno_vectors_2d = minifold_vectors_2d(extract_closest_annotation_vectors(db)) + anno_vectors_2d = minifold_vectors_2d( + extract_closest_annotation_vectors(db) + ) plot_2_sets_in_one_2d( reqs_vectors_2d, anno_vectors_2d, "Requerements", "Annotations" From 9b7575b5a78c80e23fa3c492bdd4721c6c8a295b Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Thu, 21 Aug 2025 13:51:19 +0100 Subject: [PATCH 29/44] fixed round distance and removed extra columns --- test2text/pages/reports/report_by_req.py | 34 +++++++-------------- test2text/pages/reports/report_by_tc.py | 38 ++++++++---------------- test2text/services/utils/math_utils.py | 2 ++ 3 files changed, 26 insertions(+), 48 deletions(-) create mode 100644 test2text/services/utils/math_utils.py diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index 6f94c63..f22edce 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -3,38 +3,26 @@ import streamlit as st from sqlite_vec import serialize_float32 -from test2text.services.db import get_db_client -from test2text.services.utils import unpack_float32 -from test2text.services.visualisation.visualize_vectors import ( - minifold_vectors_2d, - plot_2_sets_in_one_2d, - minifold_vectors_3d, - plot_2_sets_in_one_3d, -) +from test2text.services.utils.math_utils import round_distance def make_a_report(): + from test2text.services.db import get_db_client with get_db_client() as db: from test2text.services.embeddings.embed import embed_requirement + from test2text.services.utils import unpack_float32 + from test2text.services.visualisation.visualize_vectors import ( + minifold_vectors_2d, + plot_2_sets_in_one_2d, + minifold_vectors_3d, + plot_2_sets_in_one_3d, + ) st.header("Test2Text Report") def write_annotations(current_annotations: set[tuple]): - anno, summary, dist = st.columns(3) - with anno: - st.write("Annonation's id") - with summary: - st.write("Summary") - with dist: - st.write("Distance") for anno_id, anno_summary, _, distance in current_annotations: - anno, summary, dist = st.columns(3) - with anno: - st.write(f"{anno_id}") - with summary: - st.write(anno_summary) - with dist: - st.write(round(distance, 2)) + st.write(f"{anno_id} {anno_summary} {round_distance(distance)}") with st.container(border=True): st.subheader("Filter requirements") @@ -97,7 +85,7 @@ def write_annotations(current_annotations: set[tuple]): ) if distance_sql: requirements_dict = { - f"#{req_id} Requirement {req_external_id} [smart search d={distance}]": req_id + f"#{req_id} Requirement {req_external_id} [smart search d={round_distance(distance)}]": req_id for (req_id, req_external_id, _, distance) in data.fetchall() } else: diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 8b9df66..5f899c8 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -3,31 +3,25 @@ import streamlit as st from sqlite_vec import serialize_float32 -from test2text.services.db import get_db_client -from test2text.services.utils import unpack_float32 -from test2text.services.visualisation.visualize_vectors import ( - minifold_vectors_2d, - plot_2_sets_in_one_2d, - minifold_vectors_3d, - plot_2_sets_in_one_3d, -) +from test2text.services.utils.math_utils import round_distance def make_a_tc_report(): + from test2text.services.db import get_db_client with get_db_client() as db: from test2text.services.embeddings.embed import embed_requirement + from test2text.services.utils import unpack_float32 + from test2text.services.visualisation.visualize_vectors import ( + minifold_vectors_2d, + plot_2_sets_in_one_2d, + minifold_vectors_3d, + plot_2_sets_in_one_3d, + ) + st.header("Test2Text Report") def write_requirements(current_requirements: set[tuple]): - req, summary, dist = st.columns(3) - with req: - st.write("Requirement") - with summary: - st.write("Summary") - with dist: - st.write("Distance") - for ( req_id, req_external_id, @@ -35,13 +29,7 @@ def write_requirements(current_requirements: set[tuple]): _, distance, ) in current_requirements: - req, summary, dist = st.columns(3) - with req: - st.write(f"#{req_id} Requirement {req_external_id}") - with summary: - st.write(req_summary) - with dist: - st.write(distance) + st.write(f"#{req_id} Requirement {req_external_id} {req_summary} {round_distance(distance)}") with st.container(border=True): st.subheader("Filter test cases") @@ -104,12 +92,12 @@ def write_requirements(current_requirements: set[tuple]): ) if distance_sql: tc_dict = { - f"#{tc_id} Testcase {test_case} [smart search d={distance}]": tc_id + f"{test_case} [smart search d={round_distance(distance)}]": tc_id for (tc_id, _, test_case, distance) in data.fetchall() } else: tc_dict = { - f"#{tc_id} Testcase {test_case}": tc_id + test_case: tc_id for (tc_id, _, test_case) in data.fetchall() } diff --git a/test2text/services/utils/math_utils.py b/test2text/services/utils/math_utils.py new file mode 100644 index 0000000..51d8cdd --- /dev/null +++ b/test2text/services/utils/math_utils.py @@ -0,0 +1,2 @@ +def round_distance(distance: float) -> float: + return round(distance, 2) \ No newline at end of file From 38a02ec5ecf4923c706f2762c2cfd41e1df17e2e Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Thu, 21 Aug 2025 13:54:20 +0100 Subject: [PATCH 30/44] fixed formatting --- test2text/pages/reports/report_by_req.py | 1 + test2text/pages/reports/report_by_tc.py | 9 +++++---- test2text/services/utils/math_utils.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index f22edce..7dedc97 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -8,6 +8,7 @@ def make_a_report(): from test2text.services.db import get_db_client + with get_db_client() as db: from test2text.services.embeddings.embed import embed_requirement from test2text.services.utils import unpack_float32 diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 5f899c8..708bba9 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -8,6 +8,7 @@ def make_a_tc_report(): from test2text.services.db import get_db_client + with get_db_client() as db: from test2text.services.embeddings.embed import embed_requirement from test2text.services.utils import unpack_float32 @@ -18,7 +19,6 @@ def make_a_tc_report(): plot_2_sets_in_one_3d, ) - st.header("Test2Text Report") def write_requirements(current_requirements: set[tuple]): @@ -29,7 +29,9 @@ def write_requirements(current_requirements: set[tuple]): _, distance, ) in current_requirements: - st.write(f"#{req_id} Requirement {req_external_id} {req_summary} {round_distance(distance)}") + st.write( + f"#{req_id} Requirement {req_external_id} {req_summary} {round_distance(distance)}" + ) with st.container(border=True): st.subheader("Filter test cases") @@ -97,8 +99,7 @@ def write_requirements(current_requirements: set[tuple]): } else: tc_dict = { - test_case: tc_id - for (tc_id, _, test_case) in data.fetchall() + test_case: tc_id for (tc_id, _, test_case) in data.fetchall() } st.subheader("Choose ONE of filtered test cases") diff --git a/test2text/services/utils/math_utils.py b/test2text/services/utils/math_utils.py index 51d8cdd..f43ba16 100644 --- a/test2text/services/utils/math_utils.py +++ b/test2text/services/utils/math_utils.py @@ -1,2 +1,2 @@ def round_distance(distance: float) -> float: - return round(distance, 2) \ No newline at end of file + return round(distance, 2) From 83a1c190543218129d29fd213f72439a726aca50 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Thu, 21 Aug 2025 14:25:41 +0100 Subject: [PATCH 31/44] fixed controls page --- test2text/pages/controls/controls_page.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/test2text/pages/controls/controls_page.py b/test2text/pages/controls/controls_page.py index cbfc218..431bf0c 100644 --- a/test2text/pages/controls/controls_page.py +++ b/test2text/pages/controls/controls_page.py @@ -1,11 +1,10 @@ +from test2text.services.db import get_db_client + + def controls_page(): import streamlit as st import plotly.express as px - from test2text.services.embeddings.annotation_embeddings_controls import ( - count_all_annotations, - count_embedded_annotations, - ) st.header("Controls page") embedding_col, distances_col = st.columns(2) @@ -13,10 +12,9 @@ def controls_page(): st.subheader("Embedding") def refresh_counts(): - st.session_state["all_annotations_count"] = count_all_annotations() - st.session_state["embedded_annotations_count"] = ( - count_embedded_annotations() - ) + with get_db_client() as db: + st.session_state["all_annotations_count"] = db.count_all_entries_in_table("Annotations") + st.session_state["embedded_annotations_count"] = db.count_embedded_entries_in_table("Annotations") refresh_counts() From 14334627b93075541158af0cad29ed98efc670a0 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Thu, 21 Aug 2025 14:29:54 +0100 Subject: [PATCH 32/44] removed tc id, added req summary to selectbox --- test2text/pages/reports/report_by_req.py | 12 +++++++----- test2text/pages/reports/report_by_tc.py | 23 ++++++----------------- 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index 7dedc97..5c5c116 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -5,6 +5,7 @@ from test2text.services.utils.math_utils import round_distance +SUMMARY_LENGHT = 100 def make_a_report(): from test2text.services.db import get_db_client @@ -22,8 +23,9 @@ def make_a_report(): st.header("Test2Text Report") def write_annotations(current_annotations: set[tuple]): + st.write("id,", "Summary,", "Distance") for anno_id, anno_summary, _, distance in current_annotations: - st.write(f"{anno_id} {anno_summary} {round_distance(distance)}") + st.write(anno_id, anno_summary, round_distance(distance)) with st.container(border=True): st.subheader("Filter requirements") @@ -86,13 +88,13 @@ def write_annotations(current_annotations: set[tuple]): ) if distance_sql: requirements_dict = { - f"#{req_id} Requirement {req_external_id} [smart search d={round_distance(distance)}]": req_id - for (req_id, req_external_id, _, distance) in data.fetchall() + f"{req_external_id} {summary[:SUMMARY_LENGHT]}... [smart search d={round_distance(distance)}]": req_id + for (req_id, req_external_id, summary, distance) in data.fetchall() } else: requirements_dict = { - f"#{req_id} Requirement {req_external_id}": req_id - for (req_id, req_external_id, _) in data.fetchall() + f"{req_external_id} {summary[:SUMMARY_LENGHT]}...": req_id + for (req_id, req_external_id, summary) in data.fetchall() } st.subheader("Choose 1 of filtered requirements") diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 708bba9..c47a54d 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -22,24 +22,21 @@ def make_a_tc_report(): st.header("Test2Text Report") def write_requirements(current_requirements: set[tuple]): + st.write("External id,", "Summary,", "Distance") for ( - req_id, + _, req_external_id, req_summary, _, distance, ) in current_requirements: - st.write( - f"#{req_id} Requirement {req_external_id} {req_summary} {round_distance(distance)}" + st.write(req_external_id, req_summary, round_distance(distance) ) with st.container(border=True): st.subheader("Filter test cases") with st.expander("🔍 Filters"): - r_id, summary, embed = st.columns(3) - with r_id: - filter_id = st.text_input("ID", value="", key="filter_id") - st.info("Filter by external ID") + summary, embed = st.columns(2) with summary: filter_summary = st.text_input( "Text content", value="", key="filter_summary" @@ -54,9 +51,6 @@ def write_requirements(current_requirements: set[tuple]): where_clauses = [] params = [] - if filter_id.strip(): - where_clauses.append("Testcases.id = ?") - params.append(filter_id.strip()) if filter_summary.strip(): where_clauses.append("Testcases.test_case LIKE ?") @@ -108,13 +102,8 @@ def write_requirements(current_requirements: set[tuple]): ) if option: - clause = "Testcases.id = ?" - if clause in where_clauses: - idx = where_clauses.index(clause) - params.insert(idx, tc_dict[option]) - else: - where_clauses.append(clause) - params.append(tc_dict[option]) + where_clauses.append("Testcases.id = ?") + params.append(tc_dict[option]) st.subheader("Filter Requirements") From de7362328b7b6e981b8019faff4b8ccc0b3a36ee Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Thu, 21 Aug 2025 15:02:57 +0100 Subject: [PATCH 33/44] added labels to plots --- test2text/pages/reports/report_by_req.py | 19 +++++++++--- test2text/pages/reports/report_by_tc.py | 22 +++++++++---- .../visualisation/visualize_vectors.py | 31 +++++++++++++++---- 3 files changed, 55 insertions(+), 17 deletions(-) diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index 5c5c116..5b7fcb3 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -5,7 +5,7 @@ from test2text.services.utils.math_utils import round_distance -SUMMARY_LENGHT = 100 +SUMMARY_LENGTH = 100 def make_a_report(): from test2text.services.db import get_db_client @@ -88,12 +88,12 @@ def write_annotations(current_annotations: set[tuple]): ) if distance_sql: requirements_dict = { - f"{req_external_id} {summary[:SUMMARY_LENGHT]}... [smart search d={round_distance(distance)}]": req_id + f"{req_external_id} {summary[:SUMMARY_LENGTH]}... [smart search d={round_distance(distance)}]": req_id for (req_id, req_external_id, summary, distance) in data.fetchall() } else: requirements_dict = { - f"{req_external_id} {summary[:SUMMARY_LENGHT]}...": req_id + f"{req_external_id} {summary[:SUMMARY_LENGTH]}...": req_id for (req_id, req_external_id, summary) in data.fetchall() } @@ -262,6 +262,12 @@ def write_annotations(current_annotations: set[tuple]): st.session_state["radio_choice"] ] ] + anno_labels = [ + f"{anno_id} {anno_sum[:SUMMARY_LENGTH]}" + for anno_id, anno_sum, _, _ in current_test_cases[ + st.session_state["radio_choice"] + ] + ] requirement_vectors = np.array( [np.array(unpack_float32(req_embedding))] ) @@ -274,8 +280,9 @@ def write_annotations(current_annotations: set[tuple]): minifold_vectors_2d(annotation_vectors), "Requirement", "Annotations", - first_color="red", - second_color="green", + first_labels=[f"{req_external_id}"], + second_labels=anno_labels, + ) else: reqs_vectors_3d = minifold_vectors_3d( @@ -289,6 +296,8 @@ def write_annotations(current_annotations: set[tuple]): anno_vectors_3d, "Requirement", "Annotations", + first_labels=[f"{req_external_id}"], + second_labels=anno_labels, ) diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index c47a54d..4c64c21 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -6,6 +6,8 @@ from test2text.services.utils.math_utils import round_distance +SUMMARY_LENGTH = 100 + def make_a_tc_report(): from test2text.services.db import get_db_client @@ -272,6 +274,12 @@ def write_requirements(current_requirements: set[tuple]): reqs_by_anno[radio_choice] ] ] + req_labels = [ + f"{ext_id}" + for _, ext_id, req_sum, _, _ in current_annotations[ + reqs_by_anno[radio_choice] + ] + ] annotation_vectors = np.array( [np.array(unpack_float32(anno_embedding))] ) @@ -282,10 +290,10 @@ def write_requirements(current_requirements: set[tuple]): minifold_vectors_2d( requirement_vectors ), - "Annotation", - "Requirements", - first_color="red", - second_color="green", + first_title="Annotation", + second_title="Requirements", + first_labels=radio_choice, + second_labels=req_labels, ) else: reqs_vectors_3d = minifold_vectors_3d( @@ -297,8 +305,10 @@ def write_requirements(current_requirements: set[tuple]): plot_2_sets_in_one_3d( anno_vectors_3d, reqs_vectors_3d, - "Annotation", - "Requirements", + first_title="Annotation", + second_title="Requirements", + first_labels=radio_choice, + second_labels=req_labels, ) diff --git a/test2text/services/visualisation/visualize_vectors.py b/test2text/services/visualisation/visualize_vectors.py index f6d5f2c..d165b38 100644 --- a/test2text/services/visualisation/visualize_vectors.py +++ b/test2text/services/visualisation/visualize_vectors.py @@ -100,16 +100,20 @@ def plot_2_sets_in_one_2d( second_set_of_vec, first_title, second_title, - first_color="blue", + first_color="red", second_color="green", + first_labels=None, + second_labels=None ): fig = go.Figure() fig.add_trace( go.Scatter( x=first_set_of_vec[:, 0], y=first_set_of_vec[:, 1], - mode="markers", + mode="markers+text", name=first_title, + text=first_labels, + textposition="top center", marker=dict(color=f"{first_color}"), ) ) @@ -117,8 +121,10 @@ def plot_2_sets_in_one_2d( go.Scatter( x=second_set_of_vec[:, 0], y=second_set_of_vec[:, 1], - mode="markers", + mode="markers+text", name=second_title, + text=second_labels, + textposition="top center", marker=dict(color=f"{second_color}"), ) ) @@ -129,7 +135,14 @@ def plot_2_sets_in_one_2d( def plot_2_sets_in_one_3d( - first_set_of_vec, second_set_of_vec, first_title, second_title + first_set_of_vec, + second_set_of_vec, + first_title, + second_title, + first_color="red", + second_color="green", + first_labels=None, + second_labels=None ): fig = go.Figure() fig.add_trace( @@ -137,8 +150,11 @@ def plot_2_sets_in_one_3d( x=first_set_of_vec[:, 0], y=first_set_of_vec[:, 1], z=first_set_of_vec[:, 2], - mode="markers", + mode="markers+text", name=first_title, + text=first_labels, + textposition="top left", + marker=dict(color=f"{first_color}") ) ) @@ -147,8 +163,11 @@ def plot_2_sets_in_one_3d( x=second_set_of_vec[:, 0], y=second_set_of_vec[:, 1], z=second_set_of_vec[:, 2], - mode="markers", + mode="markers+text", name=second_title, + text=second_labels, + textposition="top center", + marker=dict(color=f"{second_color}") ) ) From 15e4a286674928979d8b7aa6853e773e53db3045 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Thu, 21 Aug 2025 15:06:21 +0100 Subject: [PATCH 34/44] fixed formatting --- test2text/pages/controls/controls_page.py | 9 ++++++--- test2text/pages/reports/report_by_req.py | 2 +- test2text/pages/reports/report_by_tc.py | 5 ++--- test2text/services/visualisation/visualize_vectors.py | 8 ++++---- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/test2text/pages/controls/controls_page.py b/test2text/pages/controls/controls_page.py index 431bf0c..e54cb10 100644 --- a/test2text/pages/controls/controls_page.py +++ b/test2text/pages/controls/controls_page.py @@ -5,7 +5,6 @@ def controls_page(): import streamlit as st import plotly.express as px - st.header("Controls page") embedding_col, distances_col = st.columns(2) with embedding_col: @@ -13,8 +12,12 @@ def controls_page(): def refresh_counts(): with get_db_client() as db: - st.session_state["all_annotations_count"] = db.count_all_entries_in_table("Annotations") - st.session_state["embedded_annotations_count"] = db.count_embedded_entries_in_table("Annotations") + st.session_state["all_annotations_count"] = ( + db.count_all_entries_in_table("Annotations") + ) + st.session_state["embedded_annotations_count"] = ( + db.count_embedded_entries_in_table("Annotations") + ) refresh_counts() diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index 5b7fcb3..86c3f59 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -7,6 +7,7 @@ SUMMARY_LENGTH = 100 + def make_a_report(): from test2text.services.db import get_db_client @@ -282,7 +283,6 @@ def write_annotations(current_annotations: set[tuple]): "Annotations", first_labels=[f"{req_external_id}"], second_labels=anno_labels, - ) else: reqs_vectors_3d = minifold_vectors_3d( diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 4c64c21..20ef894 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -8,6 +8,7 @@ SUMMARY_LENGTH = 100 + def make_a_tc_report(): from test2text.services.db import get_db_client @@ -32,8 +33,7 @@ def write_requirements(current_requirements: set[tuple]): _, distance, ) in current_requirements: - st.write(req_external_id, req_summary, round_distance(distance) - ) + st.write(req_external_id, req_summary, round_distance(distance)) with st.container(border=True): st.subheader("Filter test cases") @@ -53,7 +53,6 @@ def write_requirements(current_requirements: set[tuple]): where_clauses = [] params = [] - if filter_summary.strip(): where_clauses.append("Testcases.test_case LIKE ?") params.append(f"%{filter_summary.strip()}%") diff --git a/test2text/services/visualisation/visualize_vectors.py b/test2text/services/visualisation/visualize_vectors.py index d165b38..a0eef7f 100644 --- a/test2text/services/visualisation/visualize_vectors.py +++ b/test2text/services/visualisation/visualize_vectors.py @@ -103,7 +103,7 @@ def plot_2_sets_in_one_2d( first_color="red", second_color="green", first_labels=None, - second_labels=None + second_labels=None, ): fig = go.Figure() fig.add_trace( @@ -142,7 +142,7 @@ def plot_2_sets_in_one_3d( first_color="red", second_color="green", first_labels=None, - second_labels=None + second_labels=None, ): fig = go.Figure() fig.add_trace( @@ -154,7 +154,7 @@ def plot_2_sets_in_one_3d( name=first_title, text=first_labels, textposition="top left", - marker=dict(color=f"{first_color}") + marker=dict(color=f"{first_color}"), ) ) @@ -167,7 +167,7 @@ def plot_2_sets_in_one_3d( name=second_title, text=second_labels, textposition="top center", - marker=dict(color=f"{second_color}") + marker=dict(color=f"{second_color}"), ) ) From 3846c00692134557add08415f64a882482099e7d Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Thu, 21 Aug 2025 21:37:35 +0100 Subject: [PATCH 35/44] removed all sql text to services/db/client.py --- test2text/pages/reports/report_by_req.py | 61 +----- test2text/pages/reports/report_by_tc.py | 56 +----- test2text/services/db/client.py | 184 ++++++++++++++++-- .../annotation_embeddings_controls.py | 7 +- .../services/embeddings/cache_distances.py | 12 +- .../services/loaders/index_requirements.py | 7 +- .../visualisation/visualize_vectors.py | 15 +- 7 files changed, 199 insertions(+), 143 deletions(-) diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index 86c3f59..2021e3f 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -66,36 +66,24 @@ def write_annotations(current_annotations: set[tuple]): distance_sql = ", vec_distance_L2(embedding, ?) AS distance" distance_order_sql = "distance ASC, " - where_sql = "" - if where_clauses: - where_sql = f"WHERE {' AND '.join(where_clauses)}" - with st.container(border=True): st.session_state.update({"req_form_submitting": True}) - sql = f""" - SELECT - Requirements.id as req_id, - Requirements.external_id as req_external_id, - Requirements.summary as req_summary - {distance_sql} - FROM - Requirements - {where_sql} - ORDER BY - {distance_order_sql}Requirements.id - """ - data = db.conn.execute( - sql, params + [query_embedding_bytes] if distance_sql else params + data = db.get_ordered_values_from_requirements( + distance_sql, + where_clauses, + distance_order_sql, + params + [query_embedding_bytes] if distance_sql else params, ) + if distance_sql: requirements_dict = { f"{req_external_id} {summary[:SUMMARY_LENGTH]}... [smart search d={round_distance(distance)}]": req_id - for (req_id, req_external_id, summary, distance) in data.fetchall() + for (req_id, req_external_id, summary, distance) in data } else: requirements_dict = { f"{req_external_id} {summary[:SUMMARY_LENGTH]}...": req_id - for (req_id, req_external_id, summary) in data.fetchall() + for (req_id, req_external_id, summary) in data } st.subheader("Choose 1 of filtered requirements") @@ -144,39 +132,8 @@ def write_annotations(current_annotations: set[tuple]): if filter_limit: params.append(f"{filter_limit}") - where_sql = "" - if where_clauses: - where_sql = f"WHERE {' AND '.join(where_clauses)}" + rows = db.join_all_tables_by_requirements(where_clauses, params) - sql = f""" - SELECT - Requirements.id as req_id, - Requirements.external_id as req_external_id, - Requirements.summary as req_summary, - Requirements.embedding as req_embedding, - - Annotations.id as anno_id, - Annotations.summary as anno_summary, - Annotations.embedding as anno_embedding, - - AnnotationsToRequirements.cached_distance as distance, - - TestCases.id as case_id, - TestCases.test_script as test_script, - TestCases.test_case as test_case - FROM - Requirements - JOIN AnnotationsToRequirements ON Requirements.id = AnnotationsToRequirements.requirement_id - JOIN Annotations ON Annotations.id = AnnotationsToRequirements.annotation_id - JOIN CasesToAnnos ON Annotations.id = CasesToAnnos.annotation_id - JOIN TestCases ON TestCases.id = CasesToAnnos.case_id - {where_sql} - ORDER BY - Requirements.id, AnnotationsToRequirements.cached_distance, TestCases.id - LIMIT ? - """ - data = db.conn.execute(sql, params) - rows = data.fetchall() if not rows: st.error( "There is no requested data to inspect.\n" diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 20ef894..6cd4619 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -66,26 +66,13 @@ def write_requirements(current_requirements: set[tuple]): distance_sql = ", vec_distance_L2(embedding, ?) AS distance" distance_order_sql = "distance ASC, " - where_sql = "" - if where_clauses: - where_sql = f"WHERE {' AND '.join(where_clauses)}" - with st.container(border=True): st.session_state.update({"tc_form_submitting": True}) - sql = f""" - SELECT - TestCases.id as case_id, - TestCases.test_script as test_script, - TestCases.test_case as test_case - {distance_sql} - FROM - TestCases - {where_sql} - ORDER BY - {distance_order_sql}TestCases.id - """ - data = db.conn.execute( - sql, params + [query_embedding_bytes] if distance_sql else params + data = db.get_ordered_values_from_test_cases( + distance_sql, + where_clauses, + distance_order_sql, + params + [query_embedding_bytes] if distance_sql else params, ) if distance_sql: tc_dict = { @@ -136,39 +123,8 @@ def write_requirements(current_requirements: set[tuple]): if filter_limit: params.append(f"{filter_limit}") - where_sql = "" - if where_clauses: - where_sql = f"WHERE {' AND '.join(where_clauses)}" + rows = db.join_all_tables_by_test_cases(where_clauses, params) - sql = f""" - SELECT - TestCases.id as case_id, - TestCases.test_script as test_script, - TestCases.test_case as test_case, - - Annotations.id as anno_id, - Annotations.summary as anno_summary, - Annotations.embedding as anno_embedding, - - AnnotationsToRequirements.cached_distance as distance, - - Requirements.id as req_id, - Requirements.external_id as req_external_id, - Requirements.summary as req_summary, - Requirements.embedding as req_embedding - FROM - TestCases - JOIN CasesToAnnos ON TestCases.id = CasesToAnnos.case_id - JOIN Annotations ON Annotations.id = CasesToAnnos.annotation_id - JOIN AnnotationsToRequirements ON Annotations.id = AnnotationsToRequirements.annotation_id - JOIN Requirements ON Requirements.id = AnnotationsToRequirements.requirement_id - {where_sql} - ORDER BY - case_id, distance, req_id - LIMIT ? - """ - data = db.conn.execute(sql, params) - rows = data.fetchall() if not rows: st.error( "There is no requested data to inspect.\n" diff --git a/test2text/services/db/client.py b/test2text/services/db/client.py index 6051ce2..be4dc4d 100644 --- a/test2text/services/db/client.py +++ b/test2text/services/db/client.py @@ -88,6 +88,10 @@ def get_table_names(self): cursor.close() return tables + def get_column_values(self, columns: list[str], from_table: str): + cursor = self.conn.execute(f"SELECT {', '.join(columns)} FROM {from_table}") + return cursor.fetchall() + @property def get_db_full_info(self): """ @@ -100,7 +104,7 @@ def get_db_full_info(self): db_tables_info = {} table_names = self.get_table_names() for table_name in table_names: - row_count = self.count_all_entries_in_table(table_name) + row_count = self.count_all_entries(table_name) db_tables_info.update( { table_name: row_count, @@ -108,19 +112,17 @@ def get_db_full_info(self): ) return db_tables_info - def count_all_entries_in_table(self, table: str) -> int: - count = self.conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0] + def count_all_entries(self, from_table: str) -> int: + count = self.conn.execute(f"SELECT COUNT(*) FROM {from_table}").fetchone()[0] return count - def count_notnull_entries_in_table( - self, column: str, table: str + def count_notnull_entries( + self, columns: list[str], from_table: str ) -> Union[int, None]: - if self.has_column(column, table): - count = self.conn.execute( - f"SELECT COUNT(*) FROM {table} WHERE {column} IS NOT NULL" - ).fetchone()[0] - return count - return None + count = self.conn.execute( + f"SELECT COUNT(*) FROM {from_table} WHERE {', '.join(columns)} IS NOT NULL" + ).fetchone()[0] + return count def has_column(self, column_name: str, table_name: str) -> bool: """ @@ -134,3 +136,163 @@ def has_column(self, column_name: str, table_name: str) -> bool: columns = [row[1] for row in cursor.fetchall()] # row[1] is the column name cursor.close() return column_name in columns + + def get_null_entries(self, from_table: str) -> list: + cursor = self.conn.execute( + f"SELECT id, summary FROM {from_table} WHERE embedding IS NULL" + ) + return cursor.fetchall() + + def get_distances(self) -> list[tuple[int, int, float]]: + """ + Returns a list of tuples containing the id of the annotation and the id of the requirement, + and the distance between their embeddings (anno_id, req_id, distance). + The distance is calculated using the L2 norm. The results are ordered by requirement ID and distance. + """ + cursor = self.conn.execute(""" + SELECT + Annotations.id AS anno_id, + Requirements.id AS req_id, + vec_distance_L2(Annotations.embedding, Requirements.embedding) AS distance + FROM Annotations, Requirements + WHERE Annotations.embedding IS NOT NULL AND Requirements.embedding IS NOT NULL + ORDER BY req_id, distance + """) + return cursor.fetchall() + + def get_embeddings_from_annotations_to_requirements_table(self): + """ + Returns a list of annotation's embeddings that are stored in the AnnotationsToRequirements table. + The embeddings are ordered by annotation ID. + """ + cursor = self.conn.execute(""" + SELECT embedding FROM Annotations + WHERE id IN ( + SELECT DISTINCT annotation_id FROM AnnotationsToRequirements + ) + """) + return cursor.fetchall() + + def join_all_tables_by_requirements( + self, where_clauses="", params=None + ) -> list[tuple]: + """ + Join all tables related to requirements based on the provided where clauses and parameters. + return a list of tuples containing : + req_id, + req_external_id, + req_summary, + req_embedding, + anno_id, + anno_summary, + anno_embedding, + distance, + case_id, + test_script, + test_case + """ + where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else "" + sql = f""" + SELECT + Requirements.id as req_id, + Requirements.external_id as req_external_id, + Requirements.summary as req_summary, + Requirements.embedding as req_embedding, + + Annotations.id as anno_id, + Annotations.summary as anno_summary, + Annotations.embedding as anno_embedding, + + AnnotationsToRequirements.cached_distance as distance, + + TestCases.id as case_id, + TestCases.test_script as test_script, + TestCases.test_case as test_case + FROM + Requirements + JOIN AnnotationsToRequirements ON Requirements.id = AnnotationsToRequirements.requirement_id + JOIN Annotations ON Annotations.id = AnnotationsToRequirements.annotation_id + JOIN CasesToAnnos ON Annotations.id = CasesToAnnos.annotation_id + JOIN TestCases ON TestCases.id = CasesToAnnos.case_id + {where_sql} + ORDER BY + Requirements.id, AnnotationsToRequirements.cached_distance, TestCases.id + LIMIT ? + """ + data = self.conn.execute(sql, params) + return data.fetchall() + + def get_ordered_values_from_requirements( + self, distance_sql="", where_clauses="", distance_order_sql="", params=None + ) -> list[tuple]: + where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else "" + sql = f""" + SELECT + Requirements.id as req_id, + Requirements.external_id as req_external_id, + Requirements.summary as req_summary + {distance_sql} + FROM + Requirements + {where_sql} + ORDER BY + {distance_order_sql}Requirements.id + """ + data = self.conn.execute(sql, params) + return data.fetchall() + + def get_ordered_values_from_test_cases( + self, distance_sql="", where_clauses="", distance_order_sql="", params=None + ) -> list[tuple]: + where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else "" + sql = f""" + SELECT + TestCases.id as case_id, + TestCases.test_script as test_script, + TestCases.test_case as test_case + {distance_sql} + FROM + TestCases + {where_sql} + ORDER BY + {distance_order_sql}TestCases.id + """ + data = self.conn.execute(sql, params) + return data.fetchall() + + def join_all_tables_by_test_cases( + self, where_clauses="", params=None + ) -> list[tuple]: + where_sql = "" + if where_clauses: + where_sql = f"WHERE {' AND '.join(where_clauses)}" + + sql = f""" + SELECT + TestCases.id as case_id, + TestCases.test_script as test_script, + TestCases.test_case as test_case, + + Annotations.id as anno_id, + Annotations.summary as anno_summary, + Annotations.embedding as anno_embedding, + + AnnotationsToRequirements.cached_distance as distance, + + Requirements.id as req_id, + Requirements.external_id as req_external_id, + Requirements.summary as req_summary, + Requirements.embedding as req_embedding + FROM + TestCases + JOIN CasesToAnnos ON TestCases.id = CasesToAnnos.case_id + JOIN Annotations ON Annotations.id = CasesToAnnos.annotation_id + JOIN AnnotationsToRequirements ON Annotations.id = AnnotationsToRequirements.annotation_id + JOIN Requirements ON Requirements.id = AnnotationsToRequirements.requirement_id + {where_sql} + ORDER BY + case_id, distance, req_id + LIMIT ? + """ + data = self.conn.execute(sql, params) + return data.fetchall() diff --git a/test2text/services/embeddings/annotation_embeddings_controls.py b/test2text/services/embeddings/annotation_embeddings_controls.py index 5f228af..6297dc0 100644 --- a/test2text/services/embeddings/annotation_embeddings_controls.py +++ b/test2text/services/embeddings/annotation_embeddings_controls.py @@ -30,12 +30,9 @@ def write_batch(batch: list[tuple[int, str]]): db.annotations.set_embedding(anno_id, embedding) db.conn.commit() - annotations = db.conn.execute(f""" - SELECT id, summary FROM Annotations - {"WHERE embedding IS NULL" if not embed_all else ""} - """) + annotations = db.get_null_entries(from_table="Annotations") - for i, (anno_id, summary) in enumerate(annotations.fetchall()): + for i, (anno_id, summary) in enumerate(annotations): if on_progress: on_progress((i + 1) / annotations_to_embed) batch.append((anno_id, summary)) diff --git a/test2text/services/embeddings/cache_distances.py b/test2text/services/embeddings/cache_distances.py index ffa373a..ee9c1da 100644 --- a/test2text/services/embeddings/cache_distances.py +++ b/test2text/services/embeddings/cache_distances.py @@ -5,20 +5,12 @@ def refresh_and_get_distances() -> list[float]: with get_db_client() as db: db.annos_to_reqs.recreate_table() # Link requirements to annotations - annotations = db.conn.execute(""" - SELECT - Annotations.id AS anno_id, - Requirements.id AS req_id, - vec_distance_L2(Annotations.embedding, Requirements.embedding) AS distance - FROM Annotations, Requirements - WHERE Annotations.embedding IS NOT NULL AND Requirements.embedding IS NOT NULL - ORDER BY req_id, distance - """) + annotations = db.get_distances() # Visualize distances distances = [] current_req_id = None current_req_annos = 0 - for i, (anno_id, req_id, distance) in enumerate(annotations.fetchall()): + for i, (anno_id, req_id, distance) in enumerate(annotations): distances.append(distance) if req_id != current_req_id: current_req_id = req_id diff --git a/test2text/services/loaders/index_requirements.py b/test2text/services/loaders/index_requirements.py index bfe71eb..c112985 100644 --- a/test2text/services/loaders/index_requirements.py +++ b/test2text/services/loaders/index_requirements.py @@ -18,7 +18,7 @@ def index_requirements_from_files( *args, on_start_file: OnStartFile = None, on_requirement_written: OnRequirementWritten = None, -) -> tuple[int]: +) -> int: with get_db_client() as db: for i, file in enumerate(files): if on_start_file: @@ -61,7 +61,4 @@ def write_batch(): write_batch() write_batch() # Check requirements - cursor = db.conn.execute(""" - SELECT COUNT(*) FROM Requirements - """) - return cursor.fetchone()[0] + return db.count_all_entries(from_table="Requirements") diff --git a/test2text/services/visualisation/visualize_vectors.py b/test2text/services/visualisation/visualize_vectors.py index a0eef7f..da09b46 100644 --- a/test2text/services/visualisation/visualize_vectors.py +++ b/test2text/services/visualisation/visualize_vectors.py @@ -15,7 +15,7 @@ def extract_annotation_vectors(db: DbClient): vectors = [] - embeddings = db.conn.execute("SELECT embedding FROM Annotations") + embeddings = db.get_column_values("embedding", "Annotations") if embeddings.fetchone() is None: st.error("Embeddings is empty. Please fill embeddings in annotations.") return None @@ -27,23 +27,18 @@ def extract_annotation_vectors(db: DbClient): def extract_closest_annotation_vectors(db: DbClient): vectors = [] - embeddings = db.conn.execute(""" - SELECT embedding FROM Annotations - WHERE id IN ( - SELECT DISTINCT annotation_id FROM AnnotationsToRequirements - ) - """) - if embeddings.fetchone() is None: + embeddings = db.get_embeddings_from_annotations_to_requirements_table() + if not embeddings: st.error("Embeddings is empty. Please calculate and cache distances.") return None - for row in embeddings.fetchall(): + for row in embeddings: vectors.append(np.array(unpack_float32(row[0]))) return np.array(vectors) def extract_requirement_vectors(db: DbClient): vectors = [] - embeddings = db.conn.execute("SELECT embedding FROM Requirements") + embeddings = db.get_column_values("embedding", "Requirements") if embeddings.fetchone() is None: st.error("Embeddings is empty. Please fill embeddings in requirements.") return None From b12c2287f3d4377d9c3eee0038beb736624447c0 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Thu, 21 Aug 2025 21:43:32 +0100 Subject: [PATCH 36/44] removed all sql text to services/db/client.py from tests --- test2text/services/db/client.py | 7 +++++++ tests/test_db/test_tables/test_annotations.py | 12 ++---------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/test2text/services/db/client.py b/test2text/services/db/client.py index be4dc4d..1d1f3c6 100644 --- a/test2text/services/db/client.py +++ b/test2text/services/db/client.py @@ -296,3 +296,10 @@ def join_all_tables_by_test_cases( """ data = self.conn.execute(sql, params) return data.fetchall() + + def get_embeddings_by_id(self, id1: int, from_table: str): + cursor = self.conn.execute( + f"SELECT embedding FROM {from_table} WHERE id = ?", + (id1,) + ) + return cursor.fetchone() \ No newline at end of file diff --git a/tests/test_db/test_tables/test_annotations.py b/tests/test_db/test_tables/test_annotations.py index a2d2feb..7257172 100644 --- a/tests/test_db/test_tables/test_annotations.py +++ b/tests/test_db/test_tables/test_annotations.py @@ -67,11 +67,7 @@ def test_set_embedding(self): orig_embedding = [0.1] * self.db.annotations.embedding_size self.db.annotations.set_embedding(id1, orig_embedding) self.db.conn.commit() - cursor = self.db.conn.execute( - "SELECT embedding FROM Annotations WHERE id = ?", (id1,) - ) - result = cursor.fetchone() - cursor.close() + result = self.db.get_embeddings_by_id(id1, "Annotations") self.assertIsNotNone(result) read_embedding = unpack_float32(result[0]) self.assertEqual(len(read_embedding), self.db.annotations.embedding_size) @@ -80,11 +76,7 @@ def test_set_embedding(self): new_embedding = [0.9] * self.db.annotations.embedding_size self.db.annotations.set_embedding(id1, new_embedding) self.db.conn.commit() - cursor = self.db.conn.execute( - "SELECT embedding FROM Annotations WHERE id = ?", (id1,) - ) - result = cursor.fetchone() - cursor.close() + result = self.db.get_embeddings_by_id(id1, "Annotations") self.assertIsNotNone(result) read_embedding = unpack_float32(result[0]) self.assertEqual(len(read_embedding), self.db.annotations.embedding_size) From bc466b8051835ecd7e118b6e37c72704261008c4 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Thu, 21 Aug 2025 21:44:10 +0100 Subject: [PATCH 37/44] fixed formatting --- test2text/services/db/client.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test2text/services/db/client.py b/test2text/services/db/client.py index 1d1f3c6..e396198 100644 --- a/test2text/services/db/client.py +++ b/test2text/services/db/client.py @@ -299,7 +299,6 @@ def join_all_tables_by_test_cases( def get_embeddings_by_id(self, id1: int, from_table: str): cursor = self.conn.execute( - f"SELECT embedding FROM {from_table} WHERE id = ?", - (id1,) + f"SELECT embedding FROM {from_table} WHERE id = ?", (id1,) ) - return cursor.fetchone() \ No newline at end of file + return cursor.fetchone() From b9338c6431542c54e5cc0045f2aaabfc6881bd4c Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 22 Aug 2025 10:20:13 +0100 Subject: [PATCH 38/44] fixed new methods in client.py --- test2text/pages/controls/controls_page.py | 4 ++-- test2text/services/db/client.py | 8 ++++---- .../services/embeddings/annotation_embeddings_controls.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/test2text/pages/controls/controls_page.py b/test2text/pages/controls/controls_page.py index e54cb10..99c0115 100644 --- a/test2text/pages/controls/controls_page.py +++ b/test2text/pages/controls/controls_page.py @@ -13,10 +13,10 @@ def controls_page(): def refresh_counts(): with get_db_client() as db: st.session_state["all_annotations_count"] = ( - db.count_all_entries_in_table("Annotations") + db.count_all_entries("Annotations") ) st.session_state["embedded_annotations_count"] = ( - db.count_embedded_entries_in_table("Annotations") + db.count_notnull_entries("embedding",from_table="Annotations") ) refresh_counts() diff --git a/test2text/services/db/client.py b/test2text/services/db/client.py index e396198..b4ebc76 100644 --- a/test2text/services/db/client.py +++ b/test2text/services/db/client.py @@ -88,7 +88,7 @@ def get_table_names(self): cursor.close() return tables - def get_column_values(self, columns: list[str], from_table: str): + def get_column_values(self, *columns: str, from_table: str): cursor = self.conn.execute(f"SELECT {', '.join(columns)} FROM {from_table}") return cursor.fetchall() @@ -117,10 +117,10 @@ def count_all_entries(self, from_table: str) -> int: return count def count_notnull_entries( - self, columns: list[str], from_table: str - ) -> Union[int, None]: + self, *columns: str, from_table: str + ) -> int: count = self.conn.execute( - f"SELECT COUNT(*) FROM {from_table} WHERE {', '.join(columns)} IS NOT NULL" + f"SELECT COUNT(*) FROM {from_table} WHERE {' AND '.join([column + ' IS NOT NULL' for column in columns])}" ).fetchone()[0] return count diff --git a/test2text/services/embeddings/annotation_embeddings_controls.py b/test2text/services/embeddings/annotation_embeddings_controls.py index 6297dc0..acc932f 100644 --- a/test2text/services/embeddings/annotation_embeddings_controls.py +++ b/test2text/services/embeddings/annotation_embeddings_controls.py @@ -12,8 +12,8 @@ def embed_annotations(*_, embed_all=False, on_progress: OnProgress = None): with get_db_client() as db: from .embed import embed_annotations_batch - annotations_count = db.count_all_entries_in_table("Annotations") - embedded_annotations_count = db.count_embedded_entries_in_table("Annotations") + annotations_count = db.count_all_entries("Annotations") + embedded_annotations_count = db.count_notnull_entries("embedding",from_table="Annotations") if embed_all: annotations_to_embed = annotations_count else: From 58b82c066dc8a95d9984e5f6a6ac23aa345b6c67 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 22 Aug 2025 10:20:58 +0100 Subject: [PATCH 39/44] fixed extra fetches --- test2text/pages/reports/report_by_tc.py | 4 ++-- test2text/services/visualisation/visualize_vectors.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 6cd4619..2df5b68 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -77,11 +77,11 @@ def write_requirements(current_requirements: set[tuple]): if distance_sql: tc_dict = { f"{test_case} [smart search d={round_distance(distance)}]": tc_id - for (tc_id, _, test_case, distance) in data.fetchall() + for (tc_id, _, test_case, distance) in data } else: tc_dict = { - test_case: tc_id for (tc_id, _, test_case) in data.fetchall() + test_case: tc_id for (tc_id, _, test_case) in data } st.subheader("Choose ONE of filtered test cases") diff --git a/test2text/services/visualisation/visualize_vectors.py b/test2text/services/visualisation/visualize_vectors.py index da09b46..c63ee81 100644 --- a/test2text/services/visualisation/visualize_vectors.py +++ b/test2text/services/visualisation/visualize_vectors.py @@ -15,11 +15,11 @@ def extract_annotation_vectors(db: DbClient): vectors = [] - embeddings = db.get_column_values("embedding", "Annotations") - if embeddings.fetchone() is None: + embeddings = db.get_column_values("embedding", from_table="Annotations") + if not embeddings: st.error("Embeddings is empty. Please fill embeddings in annotations.") return None - for row in embeddings.fetchall(): + for row in embeddings: if row[0] is not None: vectors.append(np.array(unpack_float32(row[0]))) return np.array(vectors) @@ -38,11 +38,11 @@ def extract_closest_annotation_vectors(db: DbClient): def extract_requirement_vectors(db: DbClient): vectors = [] - embeddings = db.get_column_values("embedding", "Requirements") + embeddings = db.get_column_values("embedding", from_table="Requirements") if embeddings.fetchone() is None: st.error("Embeddings is empty. Please fill embeddings in requirements.") return None - for row in embeddings.fetchall(): + for row in embeddings: vectors.append(np.array(unpack_float32(row[0]))) return np.array(vectors) From 4db9b3ebe68fc3378ac4d1a33280d718f6d16699 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 22 Aug 2025 10:22:04 +0100 Subject: [PATCH 40/44] fixed formatting --- test2text/pages/controls/controls_page.py | 6 +++--- test2text/pages/reports/report_by_tc.py | 4 +--- test2text/services/db/client.py | 4 +--- .../services/embeddings/annotation_embeddings_controls.py | 4 +++- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/test2text/pages/controls/controls_page.py b/test2text/pages/controls/controls_page.py index 99c0115..166c333 100644 --- a/test2text/pages/controls/controls_page.py +++ b/test2text/pages/controls/controls_page.py @@ -12,11 +12,11 @@ def controls_page(): def refresh_counts(): with get_db_client() as db: - st.session_state["all_annotations_count"] = ( - db.count_all_entries("Annotations") + st.session_state["all_annotations_count"] = db.count_all_entries( + "Annotations" ) st.session_state["embedded_annotations_count"] = ( - db.count_notnull_entries("embedding",from_table="Annotations") + db.count_notnull_entries("embedding", from_table="Annotations") ) refresh_counts() diff --git a/test2text/pages/reports/report_by_tc.py b/test2text/pages/reports/report_by_tc.py index 2df5b68..94c5cb9 100644 --- a/test2text/pages/reports/report_by_tc.py +++ b/test2text/pages/reports/report_by_tc.py @@ -80,9 +80,7 @@ def write_requirements(current_requirements: set[tuple]): for (tc_id, _, test_case, distance) in data } else: - tc_dict = { - test_case: tc_id for (tc_id, _, test_case) in data - } + tc_dict = {test_case: tc_id for (tc_id, _, test_case) in data} st.subheader("Choose ONE of filtered test cases") option = st.selectbox( diff --git a/test2text/services/db/client.py b/test2text/services/db/client.py index b4ebc76..26facba 100644 --- a/test2text/services/db/client.py +++ b/test2text/services/db/client.py @@ -116,9 +116,7 @@ def count_all_entries(self, from_table: str) -> int: count = self.conn.execute(f"SELECT COUNT(*) FROM {from_table}").fetchone()[0] return count - def count_notnull_entries( - self, *columns: str, from_table: str - ) -> int: + def count_notnull_entries(self, *columns: str, from_table: str) -> int: count = self.conn.execute( f"SELECT COUNT(*) FROM {from_table} WHERE {' AND '.join([column + ' IS NOT NULL' for column in columns])}" ).fetchone()[0] diff --git a/test2text/services/embeddings/annotation_embeddings_controls.py b/test2text/services/embeddings/annotation_embeddings_controls.py index acc932f..8815506 100644 --- a/test2text/services/embeddings/annotation_embeddings_controls.py +++ b/test2text/services/embeddings/annotation_embeddings_controls.py @@ -13,7 +13,9 @@ def embed_annotations(*_, embed_all=False, on_progress: OnProgress = None): from .embed import embed_annotations_batch annotations_count = db.count_all_entries("Annotations") - embedded_annotations_count = db.count_notnull_entries("embedding",from_table="Annotations") + embedded_annotations_count = db.count_notnull_entries( + "embedding", from_table="Annotations" + ) if embed_all: annotations_to_embed = annotations_count else: From fb83cd7a05b028c1ca760e71c58203140d10726b Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 22 Aug 2025 10:28:08 +0100 Subject: [PATCH 41/44] fixed extra fetches --- test2text/services/visualisation/visualize_vectors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test2text/services/visualisation/visualize_vectors.py b/test2text/services/visualisation/visualize_vectors.py index c63ee81..67676b6 100644 --- a/test2text/services/visualisation/visualize_vectors.py +++ b/test2text/services/visualisation/visualize_vectors.py @@ -39,7 +39,7 @@ def extract_closest_annotation_vectors(db: DbClient): def extract_requirement_vectors(db: DbClient): vectors = [] embeddings = db.get_column_values("embedding", from_table="Requirements") - if embeddings.fetchone() is None: + if not embeddings: st.error("Embeddings is empty. Please fill embeddings in requirements.") return None for row in embeddings: From 73b513f7788c26fb63577168ec35ffecffdbdcb0 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 22 Aug 2025 10:29:59 +0100 Subject: [PATCH 42/44] fixed extra import --- test2text/services/db/client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test2text/services/db/client.py b/test2text/services/db/client.py index 26facba..1c84951 100644 --- a/test2text/services/db/client.py +++ b/test2text/services/db/client.py @@ -1,5 +1,4 @@ import sqlite3 -from typing import Union import sqlite_vec import logging From 53436df9e1f18912c6abb306119cfea5138ab62b Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 22 Aug 2025 15:12:33 +0100 Subject: [PATCH 43/44] fixed annotation's labels --- test2text/pages/reports/report_by_req.py | 5 +- .../visualisation/visualize_vectors.py | 48 ++++++++++++++----- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/test2text/pages/reports/report_by_req.py b/test2text/pages/reports/report_by_req.py index 2021e3f..4880a12 100644 --- a/test2text/pages/reports/report_by_req.py +++ b/test2text/pages/reports/report_by_req.py @@ -6,6 +6,7 @@ from test2text.services.utils.math_utils import round_distance SUMMARY_LENGTH = 100 +LABELS_SUMMARY_LENGTH = 15 def make_a_report(): @@ -221,8 +222,8 @@ def write_annotations(current_annotations: set[tuple]): ] ] anno_labels = [ - f"{anno_id} {anno_sum[:SUMMARY_LENGTH]}" - for anno_id, anno_sum, _, _ in current_test_cases[ + f"{anno_id}" + for anno_id, _, _, _ in current_test_cases[ st.session_state["radio_choice"] ] ] diff --git a/test2text/services/visualisation/visualize_vectors.py b/test2text/services/visualisation/visualize_vectors.py index 67676b6..bf40aeb 100644 --- a/test2text/services/visualisation/visualize_vectors.py +++ b/test2text/services/visualisation/visualize_vectors.py @@ -11,7 +11,7 @@ FONT_SIZE = 18 DOT_SIZE_2D = 20 DOT_SIZE_3D = 10 - +LABELS_SUMMARY_LENGTH = 15 def extract_annotation_vectors(db: DbClient): vectors = [] @@ -73,19 +73,30 @@ def minifold_vectors_3d(vectors: np.array): return vectors_3d -def plot_vectors_2d(vectors_2d: np.array, title): - fig = px.scatter(x=vectors_2d[:, 0], y=vectors_2d[:, 1]) - fig.update_layout(title=title, xaxis_title="X", yaxis_title="Y") +def plot_vectors_2d(vectors_2d: np.array, title: str, labels: list=None): + fig = px.scatter( + x=vectors_2d[:, 0], + y=vectors_2d[:, 1], + text=labels, + ) + fig.update_traces(textposition='top center') + fig.update_layout( + title=title, + xaxis_title="X", + yaxis_title="Y", + ) st.plotly_chart(fig, use_container_width=True) -def plot_vectors_3d(vectors_3d: np.array, title): +def plot_vectors_3d(vectors_3d: np.array, title: str, labels: list=None): fig = px.scatter_3d( x=vectors_3d[:, 0], y=vectors_3d[:, 1], z=vectors_3d[:, 2], color=vectors_3d[:, 2], + text=labels, ) + fig.update_traces(textposition='top center') fig.update_layout(title=title, xaxis_title="X", yaxis_title="Y") st.plotly_chart(fig, use_container_width=True) @@ -180,10 +191,10 @@ def plot_2_sets_in_one_3d( def visualize_vectors(): st.header("Visualizing vectors") with get_db_client() as db: - Req_tab, Anno_tab, Req_Anno_tab = st.tabs( + req_tab, anno_tab, req_anno_tab = st.tabs( ["Requirements", "Annotations", "Requirements vs Annotations"] ) - with Req_tab: + with req_tab: st.subheader("Requirements vectors") progress_bar = st.progress(0) @@ -191,18 +202,20 @@ def visualize_vectors(): progress_bar.progress(20, "Extracted") reqs_vectors_2d = minifold_vectors_2d(requirement_vectors) progress_bar.progress(40, "Minifolded for 2D") - plot_vectors_2d(reqs_vectors_2d, "Requirements") + req_labels = db.get_column_values("external_id", from_table="Requirements") + plot_vectors_2d(reqs_vectors_2d, "Requirements", labels=req_labels) progress_bar.progress(60, "Plotted in 2D") reqs_vectors_3d = minifold_vectors_3d(requirement_vectors) progress_bar.progress(80, "Minifolded for 3D") plot_vectors_3d(reqs_vectors_3d, "Requirements") progress_bar.progress(100, "Plotted in 3D") - with Anno_tab: + with anno_tab: st.subheader("Annotations vectors") progress_bar = st.progress(0) annotation_vectors = extract_annotation_vectors(db) + anno_labels = db.get_column_values("id", from_table="Annotations") progress_bar.progress(20, "Extracted") anno_vectors_2d = minifold_vectors_2d(annotation_vectors) progress_bar.progress(40, "Minifolded for 2D") @@ -213,17 +226,23 @@ def visualize_vectors(): plot_vectors_3d(anno_vectors_3d, "Annotations") progress_bar.progress(100, "Plotted in 3D") - with Req_Anno_tab: + with req_anno_tab: # Show how these 2 groups of vectors are different st.subheader("Requirements vs Annotations") progress_bar = st.progress(40, "Extracted") plot_2_sets_in_one_2d( - reqs_vectors_2d, anno_vectors_2d, "Requerements", "Annotations" + reqs_vectors_2d, + anno_vectors_2d, + first_title="Requirements", + second_title="Annotations", ) progress_bar.progress(60, "Plotted in 2D") plot_2_sets_in_one_3d( - reqs_vectors_3d, anno_vectors_3d, "Requerements", "Annotations" + reqs_vectors_3d, + anno_vectors_3d, + first_title="Requirements", + second_title="Annotations", ) progress_bar.progress(80, "Plotted in 3D") @@ -232,7 +251,10 @@ def visualize_vectors(): ) plot_2_sets_in_one_2d( - reqs_vectors_2d, anno_vectors_2d, "Requerements", "Annotations" + reqs_vectors_2d, + anno_vectors_2d, + first_title="Requirements", + second_title="Annotations", ) progress_bar.progress(100, "Minifolded and Plotted in 2D") From 298fa94d6e17fc2bad71d4317ba0c434bfc60616 Mon Sep 17 00:00:00 2001 From: anngoroshi <49633463+anngoroshi@users.noreply.github.com> Date: Fri, 22 Aug 2025 15:15:23 +0100 Subject: [PATCH 44/44] fixed annotation's labels --- test2text/services/visualisation/visualize_vectors.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test2text/services/visualisation/visualize_vectors.py b/test2text/services/visualisation/visualize_vectors.py index bf40aeb..fe59298 100644 --- a/test2text/services/visualisation/visualize_vectors.py +++ b/test2text/services/visualisation/visualize_vectors.py @@ -13,6 +13,7 @@ DOT_SIZE_3D = 10 LABELS_SUMMARY_LENGTH = 15 + def extract_annotation_vectors(db: DbClient): vectors = [] embeddings = db.get_column_values("embedding", from_table="Annotations") @@ -73,13 +74,13 @@ def minifold_vectors_3d(vectors: np.array): return vectors_3d -def plot_vectors_2d(vectors_2d: np.array, title: str, labels: list=None): +def plot_vectors_2d(vectors_2d: np.array, title: str, labels: list = None): fig = px.scatter( x=vectors_2d[:, 0], y=vectors_2d[:, 1], text=labels, ) - fig.update_traces(textposition='top center') + fig.update_traces(textposition="top center") fig.update_layout( title=title, xaxis_title="X", @@ -88,7 +89,7 @@ def plot_vectors_2d(vectors_2d: np.array, title: str, labels: list=None): st.plotly_chart(fig, use_container_width=True) -def plot_vectors_3d(vectors_3d: np.array, title: str, labels: list=None): +def plot_vectors_3d(vectors_3d: np.array, title: str, labels: list = None): fig = px.scatter_3d( x=vectors_3d[:, 0], y=vectors_3d[:, 1], @@ -96,7 +97,7 @@ def plot_vectors_3d(vectors_3d: np.array, title: str, labels: list=None): color=vectors_3d[:, 2], text=labels, ) - fig.update_traces(textposition='top center') + fig.update_traces(textposition="top center") fig.update_layout(title=title, xaxis_title="X", yaxis_title="Y") st.plotly_chart(fig, use_container_width=True) @@ -215,7 +216,6 @@ def visualize_vectors(): progress_bar = st.progress(0) annotation_vectors = extract_annotation_vectors(db) - anno_labels = db.get_column_values("id", from_table="Annotations") progress_bar.progress(20, "Extracted") anno_vectors_2d = minifold_vectors_2d(annotation_vectors) progress_bar.progress(40, "Minifolded for 2D")