Detecting duplicate Questionnaires

Optional 6 month visit completeness
2026-04-16 18:43:17 +01:00 · 2026-03-12 16:16:22 +00:00
3 changed files with 60 additions and 13 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -195,9 +195,10 @@ Endobest Reporting/
 jsons history/
 nul
-# Ignore all json, exe, log and xlsx files
+# Ignore all json, exe, log, txt and xlsx files
 *.json
 *.exe
 *.log
 *.txt
 /*.xlsx
 !eb_org_center_mapping.xlsx
--- a/eb_dashboard.py
+++ b/eb_dashboard.py
@@ -119,6 +119,7 @@ refresh_token = ""
 threads_list = []
 _token_refresh_lock = threading.Lock()
 on_retry_exhausted = "ask"  # "ask" | "ignore" | "abort" — set at startup
 fetch_six_month_visit = False   # Whether to fetch 6-month visit data (slow, ~5s per patient)
 _stored_username = ""       # Credentials stored at login for automatic re-login
 _stored_password = ""
 _threads_list_lock = threading.Lock()
@@ -354,6 +355,19 @@ def ask_on_retry_exhausted():
        on_retry_exhausted = "abort"
 def ask_fetch_six_month_visit():
    """Asks the user whether to fetch 6-month visit data (slow API call, ~5s per patient)."""
    global fetch_six_month_visit
    choice = questionary.select(
        "Fetch 6-month visit progress data? (slow, ~5s per patient) :",
        choices=[
            "No (skip, faster execution)",
            "Yes (fetch 6-month visit data)"
        ]
    ).ask()
    fetch_six_month_visit = (choice == "Yes (fetch 6-month visit data)")
 def wait_for_scheduled_launch():
    """Asks the user when to start the processing and waits if needed.
    Options: Immediately / In X minutes / At HH:MM
@@ -603,6 +617,12 @@ def _find_questionnaire_by_id(qcm_dict, qcm_id):
    if not isinstance(qcm_dict, dict):
        return None
    qcm_data = qcm_dict.get(qcm_id)
    if qcm_data and qcm_data.get("_count", 1) > 1:
        ctx = getattr(thread_local_storage, "current_patient_context", {"id": "Unknown", "pseudo": "Unknown"})
        logging.error(
            f"[DUPLICATE QCM] Patient {ctx['id']} ({ctx['pseudo']}): "
            f"Questionnaire id='{qcm_id}' appeared {qcm_data['_count']} times in API response — using last received copy"
        )
    return qcm_data.get("answers") if qcm_data else None
@@ -610,20 +630,32 @@ def _find_questionnaire_by_name(qcm_dict, name):
    """Finds a questionnaire by name (sequential search, returns first match)."""
    if not isinstance(qcm_dict, dict):
        return None
-    for qcm in qcm_dict.values():
+    matches = [qcm for qcm in qcm_dict.values()
-        if get_nested_value(qcm, ["questionnaire", "name"]) == name:
+               if get_nested_value(qcm, ["questionnaire", "name"]) == name]
-            return qcm.get("answers")
+    if len(matches) > 1:
-    return None
+        ctx = getattr(thread_local_storage, "current_patient_context", {"id": "Unknown", "pseudo": "Unknown"})
        ids = [get_nested_value(q, ["questionnaire", "id"]) for q in matches]
        logging.error(
            f"[DUPLICATE QCM] Patient {ctx['id']} ({ctx['pseudo']}): "
            f"Questionnaire name='{name}' matches {len(matches)} entries (ids: {ids}) — returning first match"
        )
    return matches[0].get("answers") if matches else None
 def _find_questionnaire_by_category(qcm_dict, category):
    """Finds a questionnaire by category (sequential search, returns first match)."""
    if not isinstance(qcm_dict, dict):
        return None
-    for qcm in qcm_dict.values():
+    matches = [qcm for qcm in qcm_dict.values()
-        if get_nested_value(qcm, ["questionnaire", "category"]) == category:
+               if get_nested_value(qcm, ["questionnaire", "category"]) == category]
-            return qcm.get("answers")
+    if len(matches) > 1:
-    return None
+        ctx = getattr(thread_local_storage, "current_patient_context", {"id": "Unknown", "pseudo": "Unknown"})
        ids = [get_nested_value(q, ["questionnaire", "id"]) for q in matches]
        logging.error(
            f"[DUPLICATE QCM] Patient {ctx['id']} ({ctx['pseudo']}): "
            f"Questionnaire category='{category}' matches {len(matches)} entries (ids: {ids}) — returning first match"
        )
    return matches[0].get("answers") if matches else None
 def _get_field_value_from_questionnaire(all_questionnaires, field_config):
@@ -1065,6 +1097,13 @@ def get_all_questionnaires_by_patient(patient_id, record_data):
    response.raise_for_status()
    response_data = response.json()
    # First pass: count occurrences of each q_id to detect duplicates at lookup time
    q_id_counts = {}
    for item in response_data:
        q_id = get_nested_value(item, path=["questionnaire", "id"])
        if q_id:
            q_id_counts[q_id] = q_id_counts.get(q_id, 0) + 1
    # Build dictionary with questionnaire metadata for searching
    results = {}
    for item in response_data:
@@ -1079,7 +1118,8 @@ def get_all_questionnaires_by_patient(patient_id, record_data):
                    "name": q_name,
                    "category": q_category
                },
-                "answers": answers
+                "answers": answers,
                "_count": q_id_counts[q_id]
            }
    return results
@@ -1238,9 +1278,12 @@ def _process_inclusion_data(inclusion, organization):
    output_inclusion = {}
    # --- Prepare all data sources ---
-    # 1. Launch Visit Search asynchronously (it's slow, ~5s)
+    # 1. Launch Visit Search asynchronously (it's slow, ~5s) — only if enabled by user
    # We use run_with_context to pass the patient identity to the new thread
-    visit_future = subtasks_thread_pool.submit(run_with_context, search_visit_by_pseudo_and_order, ctx, pseudo, 2)
+    if fetch_six_month_visit:
        visit_future = subtasks_thread_pool.submit(run_with_context, search_visit_by_pseudo_and_order, ctx, pseudo, 2)
    else:
        visit_future = None
    # 2. Prepare inclusion_data: enrich inclusion with organization info
    inclusion_data = dict(inclusion)
@@ -1265,7 +1308,7 @@ def _process_inclusion_data(inclusion, organization):
        request_data = None
    try:
-        six_month_visit_data = visit_future.result()
+        six_month_visit_data = visit_future.result() if visit_future is not None else {}
    except Exception as e:
        logging.error(f"Error searching 6-month visit for patient {pseudo}: {e}")
        six_month_visit_data = None
@@ -1335,6 +1378,9 @@ def main():
    if login_status == "Exit":
        return
    print()
    ask_fetch_six_month_visit()
    print()
    number_of_threads = int((questionary.text("Number of threads :", default="12",
                                              validate=lambda x: x.isdigit() and 0 < int(x) <= MAX_THREADS).ask()))
--- a/eb_org_center_mapping.xlsx
+++ b/eb_org_center_mapping.xlsx
Author	SHA1	Message	Date
Abdelkouddous LHACHIMI	00dd82290c	Detecting duplicate Questionnaires	2026-04-16 18:43:17 +01:00
Abdelkouddous LHACHIMI	3904948c32	Optional 6 month visit completeness	2026-03-12 16:16:22 +00:00