diff --git a/Endolife - Demandes 20261603.xlsx b/Endolife - Demandes 20261603.xlsx new file mode 100644 index 0000000..312bacb Binary files /dev/null and b/Endolife - Demandes 20261603.xlsx differ diff --git a/Endolife_Reporting - 261303.xlsx b/Endolife_Reporting - 261303.xlsx new file mode 100644 index 0000000..ee6a199 Binary files /dev/null and b/Endolife_Reporting - 261303.xlsx differ diff --git a/config/DO_Dashboard_Config.xlsx b/config/DO_Dashboard_Config.xlsx new file mode 100644 index 0000000..6f2632d Binary files /dev/null and b/config/DO_Dashboard_Config.xlsx differ diff --git a/config/Endobest_Dashboard_Config.xlsx b/config/Endobest_Dashboard_Config.xlsx deleted file mode 100644 index c7ec03a..0000000 Binary files a/config/Endobest_Dashboard_Config.xlsx and /dev/null differ diff --git a/config/do_dashboard_extended_template.xlsx b/config/do_dashboard_extended_template.xlsx new file mode 100644 index 0000000..e5451ec Binary files /dev/null and b/config/do_dashboard_extended_template.xlsx differ diff --git a/config/eb_dashboard_monitoring_template.xlsx b/config/do_dashboard_monitoring_template.xlsx similarity index 100% rename from config/eb_dashboard_monitoring_template.xlsx rename to config/do_dashboard_monitoring_template.xlsx diff --git a/config/eb_dashboard_extended_template.xlsx b/config/eb_dashboard_extended_template.xlsx deleted file mode 100644 index 8274c39..0000000 Binary files a/config/eb_dashboard_extended_template.xlsx and /dev/null differ diff --git a/do_dashboard.bat b/do_dashboard.bat new file mode 100644 index 0000000..79e59ec --- /dev/null +++ b/do_dashboard.bat @@ -0,0 +1,4 @@ +@echo off +call C:\PythonProjects\.rcvenv\Scripts\activate.bat +python do_dashboard.py %* + diff --git a/do_dashboard.py b/do_dashboard.py new file mode 100644 index 0000000..f39542d --- /dev/null +++ b/do_dashboard.py @@ -0,0 +1,1475 @@ + +# DO (Diagnostic Order) Dashboard Generator +# This script automates the collection and processing of diagnostic order requests from the Ziwig GDD platform. +# It authenticates with Ziwig's IAM and GDD APIs to gather all worklist requests, their details, and +# associated professional (prescriber/requester) information. The script generates a comprehensive JSON +# report containing all request data, with 100% configurable fields defined in an Excel configuration file. +# All fields are externalized and can be configured without any code modification. The configuration +# supports multiple data sources, custom functions for business logic, field dependencies and conditions, +# value transformations, and multi-line field definitions for complex calculations. Organization counters +# are computed directly from request details (no separate statistics API). It employs multithreading with +# configurable worker pools to parallelize request processing across the thread pool, significantly reducing +# execution time. Results are exported as structured JSON files for easy integration with downstream analytics +# tools. Built-in quality assurance includes comprehensive non-regression testing with configurable +# Warning/Critical thresholds, and user confirmation prompts when critical issues are detected. Excel export +# functionality enables generation of configurable Excel workbooks with data filtering, sorting, value +# replacement, and formula recalculation. Key features include automatic token refresh handling, retry +# mechanisms for transient API failures, progress tracking with real-time visual feedback, and support for +# complex data extraction using JSON path expressions. +import json +import logging +import msvcrt +import os +import re +import sys +import threading +import traceback +from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import timedelta, datetime +from time import perf_counter, sleep +import functools + +import httpx +import openpyxl +import questionary +from tqdm import tqdm +from rich.console import Console + +# Import centralized constants (SINGLE SOURCE OF TRUTH) +from do_dashboard_constants import ( + REQUESTS_FILE_NAME, + ORGANIZATIONS_FILE_NAME, + OLD_FILE_SUFFIX, + DASHBOARD_CONFIG_FILE_NAME, + REQUESTS_MAPPING_TABLE_NAME, + ORGANIZATIONS_MAPPING_TABLE_NAME, + ORG_CENTER_MAPPING_FILE_NAME, + ORG_CENTER_MAPPING_TABLE_NAME, + DEFAULT_USER_NAME, + DEFAULT_PASSWORD, + IAM_URL, + GDD_URL, + GDD_APP_ID, + ERROR_MAX_RETRY, + WAIT_BEFORE_RETRY, + WAIT_BEFORE_NEW_BATCH_OF_RETRIES, + MAX_BATCHS_OF_RETRIES, + MAX_THREADS, + DO_FILTERS, + DO_WORKLIST_PAGE_SIZE, + BAR_N_FMT_WIDTH, + BAR_TOTAL_FMT_WIDTH, + BAR_TIME_WIDTH, + BAR_RATE_WIDTH, + LOG_FILE_NAME, + API_TIMEOUT, + API_AUTH_LOGIN_ENDPOINT, + API_AUTH_CONFIG_TOKEN_ENDPOINT, + API_AUTH_REFRESH_TOKEN_ENDPOINT, + API_DO_WORKLIST_ENDPOINT, + API_DO_REQUEST_DETAIL_ENDPOINT, + API_DO_PROFESSIONALS_ENDPOINT +) + +# Import refactored modules +from do_dashboard_utils import ( + get_nested_value, + get_httpx_client, + clear_httpx_client, + get_thread_position, + get_config_path, + thread_local_storage, + run_with_context +) +from do_dashboard_quality_checks import ( + backup_output_files, + run_quality_checks, + run_check_only_mode, + set_dependencies as quality_set_dependencies, + enable_debug_mode +) +from do_dashboard_excel_export import ( + prepare_excel_export, + export_excel_only, + run_normal_mode_export, + set_dependencies as excel_set_dependencies +) + +logging.basicConfig(level=logging.WARNING, format='%(asctime)s - %(levelname)s - %(message)s', filename=LOG_FILE_NAME, + filemode='w') + + +# ============================================================================ +# BLOCK 1: CONFIGURATION & BASE INFRASTRUCTURE +# ============================================================================ + +# NOTE: All constants are imported from do_dashboard_constants.py (SINGLE SOURCE OF TRUTH) + +# --- Global Variables --- +access_token = "" +refresh_token = "" +threads_list = [] +_token_refresh_lock = threading.Lock() +on_retry_exhausted = "ask" # "ask" | "ignore" | "abort" — set at startup +_stored_username = "" # Credentials stored at login for automatic re-login +_stored_password = "" +_threads_list_lock = threading.Lock() +global_pbar = None +_global_pbar_lock = threading.Lock() +_user_interaction_lock = threading.Lock() + +# Global variables (mutable, set at runtime - not constants) +requests_mapping_config = [] +organizations_mapping_config = [] +excel_export_config = None +excel_export_enabled = False + +subtasks_thread_pool = ThreadPoolExecutor(40) +httpx_clients = {} +console = Console() + +# Share global variables with utility modules (required for thread-safe operations) +import do_dashboard_utils +do_dashboard_utils.httpx_clients = httpx_clients +do_dashboard_utils.threads_list = threads_list +do_dashboard_utils._threads_list_lock = _threads_list_lock + +# Inject console instance to modules +quality_set_dependencies(console) +excel_set_dependencies(console) + +# Detect and enable debug mode if --debug flag is present (and remove it from argv) +if "--debug" in sys.argv: + sys.argv.remove("--debug") + enable_debug_mode() + +# --- Progress Bar Configuration --- +custom_bar_format = ("{l_bar}{bar}" + f" {{n_fmt:>{BAR_N_FMT_WIDTH}}}/{{total_fmt:<{BAR_TOTAL_FMT_WIDTH}}} " + f"[{{elapsed:<{BAR_TIME_WIDTH}}}<{{remaining:>{BAR_TIME_WIDTH}}}, " + f"{{rate_fmt:>{BAR_RATE_WIDTH}}}]{{postfix}}") + + +# ============================================================================ +# BLOCK 2: DECORATORS & RESILIENCE +# ============================================================================ + +def new_token(): + """Refresh access token using the refresh token""" + global access_token, refresh_token + with _token_refresh_lock: + for attempt in range(ERROR_MAX_RETRY): + try: + client = get_httpx_client() + client.base_url = GDD_URL + response = client.post(API_AUTH_REFRESH_TOKEN_ENDPOINT, + headers={"Authorization": f"Bearer {access_token}"}, + json={"refresh_token": refresh_token}, timeout=20) + response.raise_for_status() + access_token = response.json()["access_token"] + refresh_token = response.json()["refresh_token"] + return + except httpx.RequestError as exc: + logging.warning(f"Refresh Token Error (Attempt {attempt + 1}) : {exc}") + clear_httpx_client() + except httpx.HTTPStatusError as exc: + logging.warning( + f"Refresh Token Error (Attempt {attempt + 1}) : {exc.response.status_code} for Url {exc.request.url}") + clear_httpx_client() + finally: + if attempt < ERROR_MAX_RETRY - 1: + sleep(WAIT_BEFORE_RETRY) + # Refresh token exhausted — attempt full re-login with stored credentials + logging.warning("Refresh token exhausted. Attempting re-login with stored credentials.") + _do_login(_stored_username, _stored_password) + logging.info("Re-login successful. New tokens acquired.") + + +def api_call_with_retry(func): + """Decorator for API calls with automatic retry and token refresh on 401 errors""" + @functools.wraps(func) + def wrapper(*args, **kwargs): + func_name = func.__name__ + total_attempts = 0 + batch_count = 1 + + while True: + for attempt in range(ERROR_MAX_RETRY): + total_attempts += 1 + try: + return func(*args, **kwargs) + except (httpx.RequestError, httpx.HTTPStatusError) as exc: + logging.warning(f"Error in {func_name} (Attempt {total_attempts}): {exc}") + + clear_httpx_client() + + if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 401: + logging.info(f"Token expired for {func_name}. Refreshing token.") + try: + new_token() + except (httpx.RequestError, httpx.HTTPStatusError) as token_exc: + logging.warning(f"Token refresh/re-login failed for {func_name}: {token_exc}") + + if attempt < ERROR_MAX_RETRY - 1: + sleep(WAIT_BEFORE_RETRY) + else: + if batch_count < MAX_BATCHS_OF_RETRIES: + logging.warning(f"Batch {batch_count}/{MAX_BATCHS_OF_RETRIES} failed for {func_name}. " + f"Waiting {WAIT_BEFORE_NEW_BATCH_OF_RETRIES}s before automatic retry batch.") + batch_count += 1 + sleep(WAIT_BEFORE_NEW_BATCH_OF_RETRIES) + break + else: + with _user_interaction_lock: + if on_retry_exhausted == "ignore": + ctx = getattr(thread_local_storage, "current_request_context", {"id": "Unknown"}) + logging.warning(f"[AUTO-IGNORE] Skipping {func_name} for Request {ctx['id']}. Error: {exc}") + return None + + elif on_retry_exhausted == "abort": + logging.critical(f"[AUTO-ABORT] Stopping script after persistent error in {func_name}. Error: {exc}") + raise httpx.RequestError(message=f"Persistent error in {func_name} (auto-aborted)") + + else: # "ask" — display error then interactive prompt + console.print(f"\n[bold red]Persistent error in {func_name} after {batch_count} batches ({total_attempts} attempts).[/bold red]") + console.print(f"[red]Exception: {exc}[/red]") + + choice = questionary.select( + f"What would you like to do for {func_name}?", + choices=[ + "Retry (try another batch of retries)", + "Ignore (return None and continue)", + "Stop script (critical error)" + ] + ).ask() + + if choice == "Retry (try another batch of retries)": + logging.info(f"User chose to retry {func_name}. Restarting batch sequence.") + batch_count = 1 + break + elif choice == "Ignore (return None and continue)": + ctx = getattr(thread_local_storage, "current_request_context", {"id": "Unknown"}) + logging.warning(f"[IGNORE] User opted to skip {func_name} for Request {ctx['id']}. Error: {exc}") + return None + else: + logging.critical(f"User chose to stop script after persistent error in {func_name}.") + raise httpx.RequestError(message=f"Persistent error in {func_name} (stopped by user)") + + return wrapper + + +# ============================================================================ +# BLOCK 3: AUTHENTICATION +# ============================================================================ + +def _do_login(username, password): + """Performs the two-step authentication (IAM → GDD) with the given credentials. + Updates global access_token and refresh_token on success. + Raises httpx.RequestError or httpx.HTTPStatusError on failure. + Must NOT acquire _token_refresh_lock (caller's responsibility). + """ + global access_token, refresh_token + + # Step 1: IAM login + client = get_httpx_client() + client.base_url = IAM_URL + response = client.post(API_AUTH_LOGIN_ENDPOINT, + json={"username": username, "password": password}, + timeout=20) + response.raise_for_status() + master_token = response.json()["access_token"] + user_id = response.json()["userId"] + + # Step 2: GDD config-token + client = get_httpx_client() + client.base_url = GDD_URL + response = client.post(API_AUTH_CONFIG_TOKEN_ENDPOINT, + headers={"Authorization": f"Bearer {master_token}"}, + json={"userId": user_id, "clientId": GDD_APP_ID, + "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"}, + timeout=20) + response.raise_for_status() + access_token = response.json()["access_token"] + refresh_token = response.json()["refresh_token"] + + +def login(): + global _stored_username, _stored_password + + user_name = (questionary.text("login :", default=DEFAULT_USER_NAME).ask()) + password = (questionary.password("password :", default=DEFAULT_PASSWORD).ask()) + if not (user_name and password): + return "Exit" + + try: + _do_login(user_name, password) + except httpx.RequestError as exc: + print(f"Login Error : {exc}") + logging.warning(f"Login Error : {exc}") + return "Error" + except httpx.HTTPStatusError as exc: + print(f"Login Error : {exc.response.status_code} for Url {exc.request.url}") + logging.warning(f"Login Error : {exc.response.status_code} for Url {exc.request.url}") + return "Error" + + _stored_username = user_name + _stored_password = password + print() + print("Login Success") + return "Success" + + +# ============================================================================ +# BLOCK 3B: STARTUP PARAMETERS & FILE UTILITIES +# ============================================================================ + +def ask_on_retry_exhausted(): + """Asks the user what to do when all API retry batches are exhausted.""" + global on_retry_exhausted + choice = questionary.select( + "On retry exhausted :", + choices=[ + "Ask (interactive prompt)", + "Ignore (return None and continue)", + "Abort (stop script)" + ] + ).ask() + + if choice is None or choice == "Ask (interactive prompt)": + on_retry_exhausted = "ask" + elif choice == "Ignore (return None and continue)": + on_retry_exhausted = "ignore" + else: + on_retry_exhausted = "abort" + + +def wait_for_scheduled_launch(): + """Asks the user when to start the processing and waits if needed. + Options: Immediately / In X minutes / At HH:MM + """ + choice = questionary.select( + "When to start processing ?", + choices=["Immediately", "In X minutes", "At HH:MM"] + ).ask() + + if choice is None or choice == "Immediately": + return + + if choice == "In X minutes": + minutes_str = questionary.text( + "Number of minutes :", + validate=lambda x: x.isdigit() and int(x) > 0 + ).ask() + if not minutes_str: + return + target_time = datetime.now() + timedelta(minutes=int(minutes_str)) + + else: # "At HH:MM" + time_str = questionary.text( + "Start time (HH:MM) :", + validate=lambda x: bool(re.match(r'^\d{2}:\d{2}$', x)) and + 0 <= int(x.split(':')[0]) <= 23 and + 0 <= int(x.split(':')[1]) <= 59 + ).ask() + if not time_str: + return + now = datetime.now() + h, m = int(time_str.split(':')[0]), int(time_str.split(':')[1]) + target_time = now.replace(hour=h, minute=m, second=0, microsecond=0) + if target_time <= now: + console.print("[yellow]⚠ Specified time is already past. Starting immediately.[/yellow]") + return + + print() + try: + while True: + remaining = target_time - datetime.now() + if remaining.total_seconds() <= 0: + break + total_secs = int(remaining.total_seconds()) + h = total_secs // 3600 + m = (total_secs % 3600) // 60 + s = total_secs % 60 + target_str = target_time.strftime('%H:%M:%S') + print(f"\r Starting in {h:02d}:{m:02d}:{s:02d}... (at {target_str}) — Ctrl+C to cancel ", + end="", flush=True) + sleep(1) + while msvcrt.kbhit(): + msvcrt.getwch() + print() + console.print("[green]✓ Starting processing.[/green]") + except KeyboardInterrupt: + print() + console.print("[bold red]Launch cancelled by user.[/bold red]") + raise SystemExit(0) + + +def load_json_file(filename): + """Load a JSON file from disk. Returns parsed data or None on error.""" + if os.path.exists(filename): + try: + with open(filename, 'r', encoding='utf-8') as f: + return json.load(f) + except Exception as e: + logging.warning(f"Could not load JSON file '{filename}': {e}") + console.print(f"[yellow]⚠ Warning: Could not load JSON file '{filename}': {e}[/yellow]") + return None + + +# ============================================================================ +# BLOCK 4: CONFIGURATION LOADING +# ============================================================================ + +def load_requests_mapping_config(): + """Loads and validates the requests mapping configuration from the Excel file.""" + global requests_mapping_config + config_path = os.path.join(get_config_path(), DASHBOARD_CONFIG_FILE_NAME) + + try: + workbook = openpyxl.load_workbook(config_path, data_only=True) + except FileNotFoundError: + error_msg = f"Error: Configuration file not found at: {config_path}" + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + if REQUESTS_MAPPING_TABLE_NAME not in workbook.sheetnames: + error_msg = f"Error: Sheet '{REQUESTS_MAPPING_TABLE_NAME}' not found in the configuration file." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + sheet = workbook[REQUESTS_MAPPING_TABLE_NAME] + headers = [cell.value for cell in sheet[1]] + + temp_config = [] + + for row_index, row in enumerate(sheet.iter_rows(min_row=2, values_only=True), start=2): + field_config = dict(zip(headers, row)) + + if field_config.get("source_name") == "Not Specified": + continue + + field_name = field_config.get("field_name") + if not field_name or not isinstance(field_name, str): + error_msg = f"Error in config file, row {row_index}: 'field_name' is mandatory." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + field_config["field_name"] = re.sub(r'\s*\([^)]*\)$', '', field_name).strip() + + # Parse source_id prefix + source_id_raw = field_config.get("source_id", "") + if source_id_raw and isinstance(source_id_raw, str): + if source_id_raw.startswith("q_id="): + field_config["source_type"] = "q_id" + field_config["source_value"] = source_id_raw[5:] + elif source_id_raw.startswith("q_name="): + field_config["source_type"] = "q_name" + field_config["source_value"] = source_id_raw[7:] + elif source_id_raw.startswith("q_category="): + field_config["source_type"] = "q_category" + field_config["source_value"] = source_id_raw[11:] + elif source_id_raw == "record": + field_config["source_type"] = "record" + field_config["source_value"] = None + elif source_id_raw == "request": + field_config["source_type"] = "request" + field_config["source_value"] = None + else: + field_config["source_type"] = None + field_config["source_value"] = source_id_raw + else: + field_config["source_type"] = None + field_config["source_value"] = None + + for json_field in ["field_path", "field_condition", "true_if_any", "value_labels"]: + value = field_config.get(json_field) + if value: + if not isinstance(value, str): + error_msg = f"Error in config file, row {row_index}, field '{json_field}': Invalid value, must be a JSON string." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + try: + field_config[json_field] = json.loads(value) + except json.JSONDecodeError: + error_msg = f"Error in config file, row {row_index}, field '{json_field}': Invalid JSON format." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + else: + field_config[json_field] = None + + if not field_config.get("field_path"): + error_msg = f"Error in config file, row {row_index}: 'field_path' is mandatory when a field is specified." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + temp_config.append(field_config) + + requests_mapping_config = temp_config + console.print(f"Loaded {len(requests_mapping_config)} fields from requests mapping configuration.", style="green") + + +def load_organizations_mapping_config(): + """Loads and validates the organizations mapping configuration from the Excel file.""" + global organizations_mapping_config + config_path = os.path.join(get_config_path(), DASHBOARD_CONFIG_FILE_NAME) + + try: + workbook = openpyxl.load_workbook(config_path, data_only=True) + except FileNotFoundError: + error_msg = f"Error: Configuration file not found at: {config_path}" + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + if ORGANIZATIONS_MAPPING_TABLE_NAME not in workbook.sheetnames: + logging.info(f"Sheet '{ORGANIZATIONS_MAPPING_TABLE_NAME}' not found. Organizations mapping is optional.") + organizations_mapping_config = [] + return + + sheet = workbook[ORGANIZATIONS_MAPPING_TABLE_NAME] + headers = [cell.value for cell in sheet[1]] + headers_filtered = [h for h in headers if h is not None] + + mapping_config = [] + try: + for row in sheet.iter_rows(min_row=2, values_only=True): + if all(cell is None for cell in row): + break + row_filtered = row[:len(headers_filtered)] + config_dict = dict(zip(headers_filtered, row_filtered)) + mapping_config.append(config_dict) + except Exception as e: + error_msg = f"Error parsing organizations mapping: {e}" + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + finally: + workbook.close() + + organizations_mapping_config = mapping_config + if mapping_config: + console.print(f"Loaded {len(organizations_mapping_config)} organizations from organizations mapping configuration.", style="green") + else: + console.print("No organizations mapping found (this is optional).", style="yellow") + + +def load_do_filters_config(): + """ + Loads the DO filters from the Named Range 'DO_Filters' in the config Excel file. + The Named Range contains a JSON string representing the filters object for the worklist API. + + Returns: + dict: Filters object (e.g. {"status": "all-admin", "study": "ENDOLIFE"}) + + Raises: + Exception: If the Named Range is not found or the value is not valid JSON. + """ + config_path = os.path.join(get_config_path(), DASHBOARD_CONFIG_FILE_NAME) + + try: + workbook = openpyxl.load_workbook(config_path, data_only=True) + except FileNotFoundError: + error_msg = f"Error: Configuration file not found at: {config_path}" + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + if DO_FILTERS not in workbook.defined_names: + error_msg = f"Error: Named range '{DO_FILTERS}' not found in configuration file." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + try: + named_range = workbook.defined_names[DO_FILTERS] + destinations = list(named_range.destinations) + if not destinations: + raise ValueError("Named range has no destinations") + sheet_name, cell_ref = destinations[0] + # Remove absolute reference markers ($) for cell access + cell_ref_clean = cell_ref.replace('$', '') + sheet = workbook[sheet_name] + cell_value = sheet[cell_ref_clean].value + except Exception as e: + error_msg = f"Error reading Named Range '{DO_FILTERS}': {e}" + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + finally: + workbook.close() + + if not cell_value or not isinstance(cell_value, str): + error_msg = f"Error: Named range '{DO_FILTERS}' is empty or not a string." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + try: + filters = json.loads(cell_value) + except json.JSONDecodeError as e: + error_msg = f"Error: Named range '{DO_FILTERS}' does not contain valid JSON: {e}" + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + console.print(f"Loaded DO filters: {filters}", style="green") + return filters + + +# ============================================================================ +# BLOCK 5: DATA SEARCH & EXTRACTION +# ============================================================================ + +def get_value_from_request(output_request, key): + """Helper to find a key in the nested output_request structure (groups → fields).""" + for group in output_request.values(): + if isinstance(group, dict) and key in group: + return group[key] + return None + + +# ============================================================================ +# BLOCK 6: CUSTOM FUNCTIONS & FIELD PROCESSING +# ============================================================================ + +def _execute_custom_function(function_name, args, output_request): + """Executes a custom function for a calculated field.""" + if function_name == "search_in_fields_using_regex": + if not args or len(args) < 2: + return "$$$$ Argument Error: search_in_fields_using_regex requires at least 2 arguments" + + regex_pattern = args[0] + field_names = args[1:] + + field_values = [] + all_undefined = True + + for field_name in field_names: + value = get_value_from_request(output_request, field_name) + field_values.append(value) + if value is not None and value != "undefined": + all_undefined = False + + if all_undefined: + return "undefined" + + try: + for value in field_values: + if isinstance(value, str) and re.search(regex_pattern, value, re.IGNORECASE): + return True + except re.error as e: + return f"$$$$ Regex Error: {e}" + + return False + + elif function_name == "extract_parentheses_content": + if not args or len(args) != 1: + return "$$$$ Argument Error: extract_parentheses_content requires 1 argument" + + field_name = args[0] + value = get_value_from_request(output_request, field_name) + + if value is None or value == "undefined": + return "undefined" + + match = re.search(r'\((.*?)\)', str(value)) + return match.group(1) if match else "undefined" + + elif function_name == "append_terminated_suffix": + if not args or len(args) != 2: + return "$$$$ Argument Error: append_terminated_suffix requires 2 arguments" + + status = get_value_from_request(output_request, args[0]) + is_terminated = get_value_from_request(output_request, args[1]) + + if status is None or status == "undefined": + return "undefined" + + if not isinstance(is_terminated, bool) or not is_terminated: + return status + + return f"{status} - AP" + + elif function_name == "if_then_else": + if not args or len(args) < 4: + return "$$$$ Argument Error: if_then_else requires at least 4 arguments" + + operator = args[0] + + def resolve_value(arg): + if isinstance(arg, bool): + return arg + if isinstance(arg, (int, float)): + return arg + if isinstance(arg, str) and arg.startswith("$"): + return arg[1:] + return get_value_from_request(output_request, arg) + + if operator == "is_true": + if len(args) != 4: + return "$$$$ Argument Error: is_true requires 4 arguments" + value = resolve_value(args[1]) + if value is None or value == "undefined": + return "undefined" + condition = (value is True) + result_if_true = resolve_value(args[2]) + result_if_false = resolve_value(args[3]) + + elif operator == "is_false": + if len(args) != 4: + return "$$$$ Argument Error: is_false requires 4 arguments" + value = resolve_value(args[1]) + if value is None or value == "undefined": + return "undefined" + condition = (value is False) + result_if_true = resolve_value(args[2]) + result_if_false = resolve_value(args[3]) + + elif operator == "all_true": + if len(args) != 4: + return "$$$$ Argument Error: all_true requires 4 arguments" + fields_arg = args[1] + if not isinstance(fields_arg, list): + return "$$$$ Argument Error: all_true requires arg1 to be a list of field names" + + conditions = [] + for field_name in fields_arg: + field_value = get_value_from_request(output_request, field_name) + if field_value is None or field_value == "undefined": + return "undefined" + conditions.append(field_value) + + condition = all(conditions) + result_if_true = resolve_value(args[2]) + result_if_false = resolve_value(args[3]) + + elif operator == "is_defined": + if len(args) != 4: + return "$$$$ Argument Error: is_defined requires 4 arguments" + value = resolve_value(args[1]) + condition = (value is not None and value != "undefined") + result_if_true = resolve_value(args[2]) + result_if_false = resolve_value(args[3]) + + elif operator == "is_undefined": + if len(args) != 4: + return "$$$$ Argument Error: is_undefined requires 4 arguments" + value = resolve_value(args[1]) + condition = (value is None or value == "undefined") + result_if_true = resolve_value(args[2]) + result_if_false = resolve_value(args[3]) + + elif operator == "all_defined": + if len(args) != 4: + return "$$$$ Argument Error: all_defined requires 4 arguments" + fields_arg = args[1] + if not isinstance(fields_arg, list): + return "$$$$ Argument Error: all_defined requires arg1 to be a list of field names" + + for field_name in fields_arg: + field_value = get_value_from_request(output_request, field_name) + if field_value is None or field_value == "undefined": + condition = False + break + else: + condition = True + + result_if_true = resolve_value(args[2]) + result_if_false = resolve_value(args[3]) + + elif operator == "==": + if len(args) != 5: + return "$$$$ Argument Error: == requires 5 arguments" + value1 = resolve_value(args[1]) + value2 = resolve_value(args[2]) + + if value1 is None or value1 == "undefined" or value2 is None or value2 == "undefined": + return "undefined" + + condition = (value1 == value2) + result_if_true = resolve_value(args[3]) + result_if_false = resolve_value(args[4]) + + elif operator == "!=": + if len(args) != 5: + return "$$$$ Argument Error: != requires 5 arguments" + value1 = resolve_value(args[1]) + value2 = resolve_value(args[2]) + + if value1 is None or value1 == "undefined" or value2 is None or value2 == "undefined": + return "undefined" + + condition = (value1 != value2) + result_if_true = resolve_value(args[3]) + result_if_false = resolve_value(args[4]) + + else: + return f"$$$$ Unknown Operator: {operator}" + + return result_if_true if condition else result_if_false + + elif function_name == "extract_value_from_array": + # Args: [array_field_name, key_path, search_value, value_path] + # array_field_name : name of an already-computed field in output_request containing the array + # key_path : relative path (list) to the key attribute within each array item + # search_value : JSON value to match against (string, number, bool, object...) + # value_path : relative path (list) to the value to extract from the matched item + if not args or len(args) != 4: + return "$$$$ Argument Error: extract_value_from_array requires 4 arguments" + + array_field_name, key_path, search_value, value_path = args + + if not isinstance(key_path, list): + return "$$$$ Argument Error: extract_value_from_array key_path (arg2) must be a list" + if not isinstance(value_path, list): + return "$$$$ Argument Error: extract_value_from_array value_path (arg4) must be a list" + + array = get_value_from_request(output_request, array_field_name) + + if array is None or array == "undefined": + return "undefined" + if not isinstance(array, list): + return "$$$$ Format Error : Array expected" + + for item in array: + if get_nested_value(item, key_path) == search_value: + return get_nested_value(item, value_path, default="undefined") + + return "undefined" + + return f"$$$$ Unknown Custom Function: {function_name}" + + +def process_requests_mapping(output_request, request_data): + """Processes and adds the requests mapping fields to the output request dictionary.""" + for field in requests_mapping_config: + field_name = field["field_name"] + field_group = field.get("field_group", "Extended_Fields") + final_value = "undefined" + + # Check condition + condition_field_name = field.get("field_condition") + if condition_field_name: + condition_value = get_value_from_request(output_request, condition_field_name) + + if condition_value is None or condition_value == "undefined": + final_value = "undefined" + elif not isinstance(condition_value, bool): + final_value = "$$$$ Condition Field Error" + elif not condition_value: + final_value = "N/A" + + # If condition allows, process the field + if final_value == "undefined": + source_name = field.get("source_name") + source_type = field.get("source_type") + field_path = field.get("field_path") + + # Get raw value from appropriate source + if source_name == "Calculated": + function_name = field.get("source_id") + args = field_path + final_value = _execute_custom_function(function_name, args, output_request) + elif source_type == "request": + final_value = get_nested_value(request_data, field_path, default="undefined") + else: + # source types not used in DO (q_id, q_name, q_category, record, etc.) + # return undefined to allow future extensibility + final_value = "undefined" + + # If the source data itself is missing, log a warning but continue + if final_value == "$$$$ No Data": + request_id = request_data.get("id", "Unknown") if isinstance(request_data, dict) else "Unknown" + logging.warning(f"No '{source_type}' data source found for Request {request_id} (Field: {field_name})") + final_value = "undefined" + + # Post-processing: Apply true_if_any and value_labels transformations + if final_value not in ["undefined", "$$$$ No Data"]: + check_values = field.get("true_if_any") + if check_values: + raw_value_set = set(final_value if isinstance(final_value, list) else [final_value]) + check_values_set = set(check_values if isinstance(check_values, list) else [check_values]) + final_value = not raw_value_set.isdisjoint(check_values_set) + + value_labels = field.get("value_labels") + if value_labels and final_value not in ["$$$$ Format Error : Array expected"]: + found = False + for label_map in value_labels: + if label_map.get("value") == final_value: + final_value = get_nested_value(label_map, ["text", "fr"], default=f"$$$$ Value Error : {final_value}") + found = True + break + if not found: + final_value = f"$$$$ Value Error : {final_value}" + + # Post-processing: If the value is a list, join it with a pipe + if isinstance(final_value, list): + final_value = "|".join(map(str, final_value)) + + # Post-processing: Format score dictionaries + if isinstance(final_value, dict) and 'total' in final_value and 'max' in final_value: + final_value = f"{final_value['total']}/{final_value['max']}" + + # Post-processing: Apply field template + field_template = field.get("field_template") + if field_template and final_value not in ["undefined", "N/A"] and isinstance(final_value, (str, int, float, bool)): + final_value = field_template.replace("$value", str(final_value)) + + if field_group not in output_request: + output_request[field_group] = {} + output_request[field_group][field_name] = final_value + + +# ============================================================================ +# BLOCK 7: BUSINESS API CALLS +# ============================================================================ + +@api_call_with_retry +def get_worklist_page(filters, page, page_size): + """Fetches one page of the diagnostic order worklist.""" + client = get_httpx_client() + client.base_url = GDD_URL + response = client.post( + API_DO_WORKLIST_ENDPOINT, + headers={"Authorization": f"Bearer {access_token}"}, + json={ + "lang": "fr-FR", + "filters": filters, + "limit": page_size, + "page": page, + "sort": [] + }, + timeout=API_TIMEOUT + ) + response.raise_for_status() + return response.json() + + +@api_call_with_retry +def get_request_detail_by_id(request_id): + """Fetches the full validation detail for a single request.""" + client = get_httpx_client() + client.base_url = GDD_URL + response = client.get( + f"{API_DO_REQUEST_DETAIL_ENDPOINT}/{request_id}/validation", + headers={"Authorization": f"Bearer {access_token}"}, + timeout=API_TIMEOUT + ) + response.raise_for_status() + return response.json() + + +@api_call_with_retry +def get_professionals(ids): + """ + Fetches professional display names for a list of IDs (prescriber / requester). + IDs are deduplicated before the call. Results are matched by ID for robustness. + + Returns: + dict: {professional_id: display_name} + """ + if not ids: + return {} + client = get_httpx_client() + client.base_url = GDD_URL + response = client.post( + API_DO_PROFESSIONALS_ENDPOINT, + headers={"Authorization": f"Bearer {access_token}"}, + json={"ids": ids}, + timeout=API_TIMEOUT + ) + response.raise_for_status() + data = response.json().get("data", []) + + result = {} + for pro in data: + pro_id = get_nested_value(pro, ["metadata", "id"]) + display = pro.get("display") + if pro_id: + result[pro_id] = display + return result + + +# ============================================================================ +# BLOCK 7b: ORGANIZATION CENTER MAPPING +# ============================================================================ + +def load_organization_center_mapping(): + """ + Loads organization ↔ center mapping from Excel file in script directory. + + Returns: + dict: {organization_name_normalized: center_name} or {} if error/skip + """ + mapping_file = ORG_CENTER_MAPPING_FILE_NAME + + if not os.path.exists(mapping_file): + console.print(f"[yellow]⚠ Mapping file not found at: {mapping_file}. Skipping center mapping.[/yellow]") + return {} + + try: + workbook = openpyxl.load_workbook(mapping_file) + except Exception as e: + console.print(f"[yellow]⚠ Error loading mapping file: {e}. Skipping center mapping.[/yellow]") + logging.warning(f"Error loading mapping file: {e}") + return {} + + if ORG_CENTER_MAPPING_TABLE_NAME not in workbook.sheetnames: + console.print(f"[yellow]⚠ Sheet '{ORG_CENTER_MAPPING_TABLE_NAME}' not found in mapping file. Skipping center mapping.[/yellow]") + return {} + + sheet = workbook[ORG_CENTER_MAPPING_TABLE_NAME] + headers = [cell.value for cell in sheet[1]] + + if "Organization_Name" not in headers or "Center_Name" not in headers: + console.print(f"[yellow]⚠ Required columns 'Organization_Name' or 'Center_Name' not found in mapping file. Skipping center mapping.[/yellow]") + return {} + + mapping_rows = [] + try: + for row in sheet.iter_rows(min_row=2, values_only=True): + if all(cell is None for cell in row): + continue + row_dict = dict(zip(headers, row)) + org_name = row_dict.get("Organization_Name") + center_name = row_dict.get("Center_Name") + if org_name and center_name: + mapping_rows.append({"Organization_Name": org_name, "Center_Name": center_name}) + except Exception as e: + console.print(f"[yellow]⚠ Error reading mapping file rows: {e}. Skipping center mapping.[/yellow]") + logging.warning(f"Error reading mapping file rows: {e}") + return {} + + # Validate: check for duplicates on normalized versions + org_names_normalized = {} + center_names_normalized = {} + + for row in mapping_rows: + org_name_raw = row["Organization_Name"] + center_name_raw = row["Center_Name"] + + org_normalized = org_name_raw.strip().lower() if isinstance(org_name_raw, str) else str(org_name_raw).strip().lower() + center_normalized = center_name_raw.strip().lower() if isinstance(center_name_raw, str) else str(center_name_raw).strip().lower() + + if org_normalized in org_names_normalized: + console.print(f"[yellow]⚠ Duplicate found in Organization_Name: '{org_name_raw}'. Skipping center mapping.[/yellow]") + return {} + + if center_normalized in center_names_normalized: + console.print(f"[yellow]⚠ Duplicate found in Center_Name: '{center_name_raw}'. Skipping center mapping.[/yellow]") + return {} + + org_names_normalized[org_normalized] = org_name_raw + center_names_normalized[center_normalized] = center_name_raw + + # Build mapping dict + mapping_dict = {} + for row in mapping_rows: + org_name_raw = row["Organization_Name"] + center_name_raw = row["Center_Name"] + org_normalized = org_name_raw.strip().lower() if isinstance(org_name_raw, str) else str(org_name_raw).strip().lower() + center_clean = center_name_raw.strip() if isinstance(center_name_raw, str) else str(center_name_raw).strip() + mapping_dict[org_normalized] = center_clean + + return mapping_dict + + +# ============================================================================ +# BLOCK 8: REQUEST PROCESSING +# ============================================================================ + +def _process_single_request(worklist_request, mapping_dict): + """ + Processes a single request from the worklist: + 1. Fetches full request detail + 2. Fetches prescriber and requester names (deduplicated single API call) + 3. Injects enrichment data (names, identity fields, center name, status override) + 4. Applies requests mapping to produce the output object + + Args: + worklist_request: Request object from the worklist (root-level fields available) + mapping_dict: Organization → center name mapping dict + + Returns: + Tuple of (output_request, request_meta) where request_meta contains + raw fields needed for organization building and sorting. + """ + request_id = worklist_request.get("id") + + # Set thread-local context for detailed error logging in decorators + ctx = {"id": request_id} + thread_local_storage.current_request_context = ctx + + # --- 1. Fetch request detail --- + request_detail = get_request_detail_by_id(request_id) + if request_detail is None: + request_detail = {} + + # --- 2. Fetch professional names (prescriber + requester, deduplicated) --- + prescriber_id = worklist_request.get("prescriber") + requester_id = worklist_request.get("requester") + + # Deduplicate IDs before API call + unique_ids = list({pid for pid in [prescriber_id, requester_id] if pid}) + professionals = get_professionals(unique_ids) if unique_ids else {} + + # Inject professional names (None if not found) + request_detail["prescriberName"] = professionals.get(prescriber_id) if prescriber_id else None + request_detail["requesterName"] = professionals.get(requester_id) if requester_id else None + + # --- 3. Inject patient identity fields from worklist --- + identity = worklist_request.get("identity") or {} + request_detail["lastname"] = identity.get("lastname") + request_detail["firstname"] = identity.get("firstname") + request_detail["birthday"] = identity.get("birthday") + + # --- 4. Status override: diagnostic_status takes precedence if defined --- + diagnostic_status = request_detail.get("diagnostic_status") + if diagnostic_status is not None and diagnostic_status != "": + request_detail["status"] = diagnostic_status + + # --- 5. Center mapping: inject Center_Name from labeledOrganization --- + labeled_org = worklist_request.get("labeledOrganization") + if labeled_org: + org_normalized = labeled_org.strip().lower() + request_detail["Center_Name"] = mapping_dict.get(org_normalized, labeled_org) + else: + request_detail["Center_Name"] = None + + # Also inject organization and labeledOrganization for mapping access + request_detail["organization"] = worklist_request.get("organization") + request_detail["labeledOrganization"] = labeled_org + + # --- 6. Apply requests mapping to produce output object --- + output_request = {} + process_requests_mapping(output_request, request_detail) + + # --- 7. Build meta for organization building and sorting --- + request_meta = { + "org_id": worklist_request.get("organization"), + "org_name": labeled_org, + "center_name": request_detail.get("Center_Name"), + "status": request_detail.get("status"), + "diagnostic_result": request_detail.get("diagnostic_result"), + "lastname": request_detail.get("lastname"), + "firstname": request_detail.get("firstname"), + "id": request_id + } + + return output_request, request_meta + + +# ============================================================================ +# BLOCK 9: ORGANIZATIONS BUILDING +# ============================================================================ + +def build_organizations(request_metas): + """ + Builds the organizations summary list from collected request metadata. + + Each organization entry contains the organization identity and counters + derived from the status and diagnostic_result of its requests. + + Args: + request_metas: List of request_meta dicts (from _process_single_request) + + Returns: + List of organization dicts, sorted by center_name then id + """ + org_map = {} + + for meta in request_metas: + org_id = meta.get("org_id") + if not org_id: + continue + + if org_id not in org_map: + org_map[org_id] = { + "id": org_id, + "name": meta.get("org_name"), + "center_name": meta.get("center_name"), + "total_count": 0, + "sent_count": 0, # status == "active" + "accepted_count": 0, # status == "accepted" + "rejected_count": 0, # status == "rejected" + "sequencing_count": 0, # status == "waiting" + "ai_count": 0, # status == "in progress" + "result_available_count": 0, # status == "finished" + "report_available_count": 0, # status == "signed" + "positive_count": 0, # diagnostic_result == "POSITIVE" + "negative_count": 0, # diagnostic_result == "NEGATIVE" + "uninterpretable_count": 0 # diagnostic_result == "UNINTERPRETABLE" + } + + org = org_map[org_id] + org["total_count"] += 1 + + status = meta.get("status") + if status == "active": + org["sent_count"] += 1 + elif status == "accepted": + org["accepted_count"] += 1 + elif status == "rejected": + org["rejected_count"] += 1 + elif status == "waiting": + org["sequencing_count"] += 1 + elif status == "in progress": + org["ai_count"] += 1 + elif status == "finished": + org["result_available_count"] += 1 + elif status == "signed": + org["report_available_count"] += 1 + + diagnostic_result = meta.get("diagnostic_result") + if diagnostic_result == "POSITIVE": + org["positive_count"] += 1 + elif diagnostic_result == "NEGATIVE": + org["negative_count"] += 1 + elif diagnostic_result == "UNINTERPRETABLE": + org["uninterpretable_count"] += 1 + + organizations = list(org_map.values()) + organizations.sort(key=lambda o: (o.get("center_name") or "", o.get("id") or "")) + return organizations + + +# ============================================================================ +# BLOCK 10: MAIN EXECUTION +# ============================================================================ + +def main(): + global global_pbar, excel_export_config, excel_export_enabled + + # --- Check for CLI Check_Only mode --- + check_only_mode = "--check-only" in sys.argv + + if check_only_mode: + run_check_only_mode(sys.argv) + return + + # --- Check for CLI Excel_Only mode --- + excel_only_mode = "--excel-only" in sys.argv + + if excel_only_mode: + print() + load_requests_mapping_config() + load_organizations_mapping_config() + + export_excel_only(sys.argv, REQUESTS_FILE_NAME, ORGANIZATIONS_FILE_NAME, + requests_mapping_config, organizations_mapping_config) + return + + # === NORMAL MODE: Full data collection === + + print() + login_status = login() + + while login_status == "Error": + login_status = login() + if login_status == "Exit": + return + + print() + number_of_threads = int((questionary.text("Number of threads :", default="12", + validate=lambda x: x.isdigit() and 0 < int(x) <= MAX_THREADS).ask())) + + print() + ask_on_retry_exhausted() + + print() + wait_for_scheduled_launch() + + print() + load_requests_mapping_config() + load_organizations_mapping_config() + + # Load DO filters from config + print() + do_filters = load_do_filters_config() + + # Load and validate Excel export configuration + print() + console.print("[bold cyan]Loading Excel export configuration...[/bold cyan]") + + excel_export_config, has_config_critical, _ = \ + prepare_excel_export(requests_mapping_config, organizations_mapping_config) + + if has_config_critical: + print() + answer = questionary.confirm( + "⚠ Critical configuration errors detected. Continue anyway?", + default=False + ).ask() + if not answer: + console.print("[bold red]Aborted by user[/bold red]") + return + else: + excel_export_enabled = False + else: + excel_export_enabled = True if excel_export_config else False + + # Load center mapping + print() + print("Loading organization center mapping...") + mapping_dict = load_organization_center_mapping() + + # === FETCH WORKLIST (paginated) === + print() + start_time = perf_counter() + + with console.status("[bold green]Fetching worklist (page 1)...", spinner="dots"): + first_page = get_worklist_page(do_filters, 1, DO_WORKLIST_PAGE_SIZE) + + metadata = first_page.get("metadata", {}) + total_requests = metadata.get("total", 0) + total_pages = metadata.get("pages", 1) + + print(f"{total_requests} requests across {total_pages} pages...") + print() + + # === SUBMIT ALL REQUESTS TO THREAD POOL AS PAGES ARRIVE === + all_futures = [] + + with ThreadPoolExecutor(max_workers=number_of_threads) as thread_pool: + + # Progress bar 1: page fetching + with tqdm(total=total_pages, unit="page", + desc=f"{'Fetching pages':<52}", + position=0, leave=True, + bar_format=custom_bar_format) as pages_pbar: + + # Submit first page requests + for worklist_request in first_page.get("data", []): + f = thread_pool.submit(run_with_context, _process_single_request, + {"id": worklist_request.get("id")}, + worklist_request, mapping_dict) + all_futures.append(f) + pages_pbar.update(1) + + # Fetch and submit remaining pages + for page_num in range(2, total_pages + 1): + page_data = get_worklist_page(do_filters, page_num, DO_WORKLIST_PAGE_SIZE) + for worklist_request in page_data.get("data", []): + f = thread_pool.submit(run_with_context, _process_single_request, + {"id": worklist_request.get("id")}, + worklist_request, mapping_dict) + all_futures.append(f) + pages_pbar.update(1) + + print() + + # Progress bar 2: request processing + all_results = [] # list of (output_request, request_meta) + + with tqdm(total=total_requests, unit="req.", + desc=f"{'Processing requests':<52}", + position=0, leave=True, + bar_format=custom_bar_format) as processing_pbar: + + global_pbar = processing_pbar + + for future in as_completed(all_futures): + try: + result = future.result() + all_results.append(result) + except Exception as exc: + logging.critical(f"Critical exception in request worker: {exc}", exc_info=True) + print(f"\nCRITICAL ERROR in request processing thread:") + print(f"Exception: {exc}") + traceback.print_exc() + thread_pool.shutdown(wait=False, cancel_futures=True) + raise + finally: + with _global_pbar_lock: + if global_pbar: + global_pbar.update(1) + + # === SORT RESULTS === + print() + print() + print("Sorting results...") + + all_results.sort(key=lambda x: ( + x[1].get("lastname") or "", + x[1].get("firstname") or "", + x[1].get("id") or "" + )) + + output_requests = [r[0] for r in all_results] + request_metas = [r[1] for r in all_results] + + # === BUILD ORGANIZATIONS === + print("Building organizations summary...") + organizations_list = build_organizations(request_metas) + + try: + # === QUALITY CHECKS === + print() + has_regression_critical = run_quality_checks( + current_requests=output_requests, + old_requests_filename=REQUESTS_FILE_NAME + ) + + # === CHECK FOR CRITICAL ISSUES AND ASK USER CONFIRMATION === + if has_regression_critical: + print() + console.print("[bold red]⚠ CRITICAL issues detected in quality checks![/bold red]") + confirm_write = questionary.confirm( + "Do you want to write the results anyway?", + default=True + ).ask() + + if not confirm_write: + console.print("[yellow]✗ Output writing cancelled by user. Files were not modified.[/yellow]") + console.print("[yellow] You can re-run the script to try again.[/yellow]") + print() + print(f"Elapsed time : {str(timedelta(seconds=perf_counter() - start_time))}") + return + + # === BACKUP OLD FILES === + backup_output_files() + + # === WRITE NEW FILES === + print("Writing files...") + + with open(REQUESTS_FILE_NAME, 'w', encoding='utf-8') as f_json: + json.dump(output_requests, f_json, indent=4, ensure_ascii=False) + with open(ORGANIZATIONS_FILE_NAME, 'w', encoding='utf-8') as f_json: + json.dump(organizations_list, f_json, indent=4, ensure_ascii=False) + + console.print("[green]✓ Data saved to JSON files[/green]") + print() + + # === EXCEL EXPORT === (temporarily disabled for JSON generation testing) + # run_normal_mode_export(excel_export_enabled, excel_export_config, + # requests_mapping_config, organizations_mapping_config) + + except IOError as io_err: + logging.critical(f"Error while writing JSON file : {io_err}") + print(f"Error while writing JSON file : {io_err}") + except Exception as exc: + logging.critical(f"Error during final processing : {exc}") + print(f"Error during final processing : {exc}") + + print() + print(f"Elapsed time : {str(timedelta(seconds=perf_counter() - start_time))}") + + +if __name__ == '__main__': + + try: + main() + except Exception as e: + logging.critical(f"Script terminated prematurely due to an exception: {e}", exc_info=True) + print(f"Script stopped due to an error : {e}") + finally: + if 'subtasks_thread_pool' in globals() and subtasks_thread_pool: + subtasks_thread_pool.shutdown(wait=False, cancel_futures=True) + print('\n') + input("Press Enter to exit...") diff --git a/do_dashboard_check_only-exe.bat b/do_dashboard_check_only-exe.bat new file mode 100644 index 0000000..d11833c --- /dev/null +++ b/do_dashboard_check_only-exe.bat @@ -0,0 +1,3 @@ +@echo off +do_dashboard.exe --check-only %* + diff --git a/do_dashboard_check_only.bat b/do_dashboard_check_only.bat new file mode 100644 index 0000000..a0d15f2 --- /dev/null +++ b/do_dashboard_check_only.bat @@ -0,0 +1,4 @@ +@echo off +call C:\PythonProjects\.rcvenv\Scripts\activate.bat +python do_dashboard.py --check-only %* + diff --git a/do_dashboard_check_only_debug-exe.bat b/do_dashboard_check_only_debug-exe.bat new file mode 100644 index 0000000..1a49332 --- /dev/null +++ b/do_dashboard_check_only_debug-exe.bat @@ -0,0 +1,3 @@ +@echo off +do_dashboard.exe --check-only --debug %* + diff --git a/do_dashboard_check_only_debug.bat b/do_dashboard_check_only_debug.bat new file mode 100644 index 0000000..e21b536 --- /dev/null +++ b/do_dashboard_check_only_debug.bat @@ -0,0 +1,4 @@ +@echo off +call C:\PythonProjects\.rcvenv\Scripts\activate.bat +python do_dashboard.py --check-only --debug %* + diff --git a/do_dashboard_constants.py b/do_dashboard_constants.py new file mode 100644 index 0000000..c245a98 --- /dev/null +++ b/do_dashboard_constants.py @@ -0,0 +1,135 @@ +""" +DO Dashboard - Centralized Constants Module + +This module defines ALL constants used across the DO (Diagnostic Order) Dashboard application. +It serves as the single source of truth for all configuration values. + +All other modules MUST import constants from this module, NOT define them locally. + +Structure: +- File names & paths +- Table names (Excel sheets) +- API endpoints +- Authentication credentials +- Threading & retry parameters +- DO filters config +- UI formatting constants +""" + +# ============================================================================ +# FILE NAMES & PATHS +# ============================================================================ + +REQUESTS_FILE_NAME = "do_requests.json" +ORGANIZATIONS_FILE_NAME = "do_organizations.json" +OLD_FILE_SUFFIX = "_old" +CONFIG_FOLDER_NAME = "config" + +# ============================================================================ +# EXCEL CONFIGURATION FILES +# ============================================================================ + +DASHBOARD_CONFIG_FILE_NAME = "DO_Dashboard_Config.xlsx" +ORG_CENTER_MAPPING_FILE_NAME = "do_org_center_mapping.xlsx" + +# ============================================================================ +# TABLE NAMES (Excel sheets in DASHBOARD_CONFIG_FILE_NAME) +# ============================================================================ + +REQUESTS_MAPPING_TABLE_NAME = "Requests_Mapping" +ORGANIZATIONS_MAPPING_TABLE_NAME = "Organizations_Mapping" +EXCEL_WORKBOOKS_TABLE_NAME = "Excel_Workbooks" +EXCEL_SHEETS_TABLE_NAME = "Excel_Sheets" +REGRESSION_CHECK_TABLE_NAME = "Regression_Check" +ORG_CENTER_MAPPING_TABLE_NAME = "Org_Center_Mapping" + +# ============================================================================ +# DO FILTERS CONFIGURATION +# ============================================================================ + +# Named range in DASHBOARD_CONFIG_FILE_NAME containing the JSON filters object +# for the worklist API call (e.g. {"status": "all-admin", "study": "ENDOLIFE"}) +DO_FILTERS = "DO_Filters" + +# Number of requests per page for worklist pagination +DO_WORKLIST_PAGE_SIZE = 50 + +# ============================================================================ +# API ENDPOINTS & AUTHENTICATION +# ============================================================================ + +IAM_URL = "https://api-auth.ziwig-connect.com" +GDD_URL = "https://api-lab.ziwig-connect.com" +GDD_APP_ID = "4f5ac063-6a22-4e2c-bda5-b50c0dddab79" + +DEFAULT_USER_NAME = "ziwig-invest2@yopmail.com" +DEFAULT_PASSWORD = "pbrrA765$bP3beiuyuiyhiuy!agxagx" + +# ============================================================================ +# API ENDPOINTS +# ============================================================================ + +# Authentication endpoints +API_AUTH_LOGIN_ENDPOINT = "/api/auth/ziwig-pro/login" +API_AUTH_CONFIG_TOKEN_ENDPOINT = "/api/auth/config-token" +API_AUTH_REFRESH_TOKEN_ENDPOINT = "/api/auth/refreshToken" + +# GDD (Diagnostic Order) endpoints +API_DO_WORKLIST_ENDPOINT = "/api/requests/worklist-filter" +API_DO_REQUEST_DETAIL_ENDPOINT = "/api/requests" # + /{id}/validation +API_DO_PROFESSIONALS_ENDPOINT = "/api/entity-manager/meta/modele_fr/data/nodes/pro/nodes" + +# ============================================================================ +# THREADING & RETRY PARAMETERS +# ============================================================================ + +ERROR_MAX_RETRY = 10 +WAIT_BEFORE_RETRY = 1 +WAIT_BEFORE_NEW_BATCH_OF_RETRIES = 20 +MAX_BATCHS_OF_RETRIES = 3 +MAX_THREADS = 40 + +# Excel operation retry parameters (for handling transient xlwings/Excel failures) +EXCEL_COM_MAX_RETRIES = 3 +EXCEL_COM_RETRY_DELAY = 0.5 + +# ============================================================================ +# LOGGING CONFIGURATION +# ============================================================================ + +LOG_FILE_NAME = "dashboard.log" + +# ============================================================================ +# API CONFIGURATION +# ============================================================================ + +API_TIMEOUT = 60 # seconds - timeout for all API calls + +# ============================================================================ +# EXCEL EXPORT CONFIGURATION +# ============================================================================ + +# Output file conflict handling actions +OUTPUT_ACTION_OVERWRITE = "Overwrite" +OUTPUT_ACTION_INCREMENT = "Increment" +OUTPUT_ACTION_BACKUP = "Backup" +OUTPUT_ACTIONS = [OUTPUT_ACTION_OVERWRITE, OUTPUT_ACTION_INCREMENT, OUTPUT_ACTION_BACKUP] + +# Excel export data source types +SOURCE_TYPE_REQUESTS = "Requests" +SOURCE_TYPE_ORGANIZATIONS = "Organizations" +SOURCE_TYPE_VARIABLE = "Variable" +SOURCE_TYPES = [SOURCE_TYPE_REQUESTS, SOURCE_TYPE_ORGANIZATIONS, SOURCE_TYPE_VARIABLE] + +# Excel export target types (for data filling) +TARGET_TYPE_TABLE = "Table" # Excel structured table (ListObject) - has headers, supports Resize() +TARGET_TYPE_NAMED_RANGE = "NamedRange" # Simple named range - no headers, resize via Name.RefersTo + +# ============================================================================ +# UI FORMATTING (Progress bars) +# ============================================================================ + +BAR_N_FMT_WIDTH = 4 +BAR_TOTAL_FMT_WIDTH = 4 +BAR_TIME_WIDTH = 8 +BAR_RATE_WIDTH = 10 diff --git a/do_dashboard_debug-exe.bat b/do_dashboard_debug-exe.bat new file mode 100644 index 0000000..d73da04 --- /dev/null +++ b/do_dashboard_debug-exe.bat @@ -0,0 +1,3 @@ +@echo off +do_dashboard.exe --debug %* + diff --git a/do_dashboard_debug.bat b/do_dashboard_debug.bat new file mode 100644 index 0000000..f695341 --- /dev/null +++ b/do_dashboard_debug.bat @@ -0,0 +1,4 @@ +@echo off +call C:\PythonProjects\.rcvenv\Scripts\activate.bat +python do_dashboard.py --debug %* + diff --git a/do_dashboard_excel_export.py b/do_dashboard_excel_export.py new file mode 100644 index 0000000..3e76578 --- /dev/null +++ b/do_dashboard_excel_export.py @@ -0,0 +1,2094 @@ +""" +DO Dashboard - Excel Export Module + +This module handles generation of Excel workbooks from Requests and Organizations data. +Fully configurable via external Excel configuration file (DO_Dashboard_Config.xlsx). + +Features: +- Config-driven workbook generation (no code changes needed) +- Support for Variable templates and Table data fills +- Configurable filtering, sorting, and value replacement +- xlwings-based data processing with automatic formula recalculation +- Robust error handling and logging +""" + +import functools +import json +import logging +import os +import re +import shutil +import tempfile +import traceback +import zipfile +from datetime import datetime, timedelta, timezone +from time import perf_counter +from zoneinfo import ZoneInfo + +import openpyxl +from openpyxl.utils import get_column_letter +from rich.console import Console + +try: + import xlwings as xw +except ImportError: + xw = None + +from do_dashboard_utils import get_nested_value, get_config_path +from do_dashboard_constants import ( + REQUESTS_FILE_NAME, + ORGANIZATIONS_FILE_NAME, + DASHBOARD_CONFIG_FILE_NAME, + EXCEL_WORKBOOKS_TABLE_NAME, + EXCEL_SHEETS_TABLE_NAME, + OUTPUT_ACTION_OVERWRITE, + OUTPUT_ACTION_INCREMENT, + OUTPUT_ACTION_BACKUP, + OUTPUT_ACTIONS, + SOURCE_TYPE_REQUESTS, + SOURCE_TYPE_ORGANIZATIONS, + SOURCE_TYPE_VARIABLE, + SOURCE_TYPES, + TARGET_TYPE_TABLE, + TARGET_TYPE_NAMED_RANGE, + EXCEL_COM_MAX_RETRIES, + EXCEL_COM_RETRY_DELAY +) + +# ============================================================================ +# CONSTANTS +# ============================================================================ + +EXCEL_OUTPUT_FOLDER = os.getcwd() # Current working directory + +# ============================================================================ +# MODULE DEPENDENCIES (injected from main module) +# ============================================================================ + +console = None + +# NOTE: Constants imported from do_dashboard_constants.py (SINGLE SOURCE OF TRUTH): +# Configuration Files: +# - REQUESTS_FILE_NAME, ORGANIZATIONS_FILE_NAME, DASHBOARD_CONFIG_FILE_NAME +# - EXCEL_WORKBOOKS_TABLE_NAME, EXCEL_SHEETS_TABLE_NAME +# Output Handling: +# - OUTPUT_ACTION_OVERWRITE, OUTPUT_ACTION_INCREMENT, OUTPUT_ACTION_BACKUP, OUTPUT_ACTIONS +# Data Sources: +# - SOURCE_TYPE_REQUESTS, SOURCE_TYPE_ORGANIZATIONS, SOURCE_TYPE_VARIABLE, SOURCE_TYPES +# +# NOTE: Mapping table names (INCLUSIONS_MAPPING_TABLE_NAME, ORGANIZATIONS_MAPPING_TABLE_NAME) +# are defined in constants but loaded/used in main script (do_dashboard.py) + + +def set_dependencies(console_instance): + """ + Inject console instance from main module. + + Args: + console_instance: Rich Console instance for formatted output + + Note: + File and table names are imported directly from do_dashboard_constants.py + (SINGLE SOURCE OF TRUTH) + """ + global console + console = console_instance + + +# ============================================================================ +# PUBLIC FUNCTIONS +# ============================================================================ + +def load_excel_export_config(console_instance=None): + """ + Load and validate Excel export configuration from config file. + + Args: + console_instance: Optional Rich Console instance + + Returns: + Tuple of (excel_workbooks_config, excel_sheets_config, has_error, error_messages) + - excel_workbooks_config: List of workbook definitions + - excel_sheets_config: List of sheet fill definitions + - has_error: Boolean flag if critical errors found + - error_messages: List of error message strings + """ + global console + if console_instance: + console = console_instance + + config_path = os.path.join(get_config_path(), DASHBOARD_CONFIG_FILE_NAME) + error_messages = [] + + try: + workbook = openpyxl.load_workbook(config_path) + except FileNotFoundError: + error_msg = f"Error: Configuration file not found at: {config_path}" + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + return None, None, True, [error_msg] + + # Load Excel_Workbooks sheet + if EXCEL_WORKBOOKS_TABLE_NAME not in workbook.sheetnames: + error_msg = f"Error: Sheet '{EXCEL_WORKBOOKS_TABLE_NAME}' not found in configuration file." + error_messages.append(error_msg) + return None, None, True, error_messages + + excel_workbooks_sheet = workbook[EXCEL_WORKBOOKS_TABLE_NAME] + excel_workbooks_config = [] + + try: + headers = [cell.value for cell in excel_workbooks_sheet[1]] + for row_index, row in enumerate(excel_workbooks_sheet.iter_rows(min_row=2, values_only=True), start=2): + if all(cell is None for cell in row): + continue # Skip empty rows + + workbook_config = dict(zip(headers, row)) + + # Validate required fields + if not workbook_config.get("workbook_id"): + error_msg = f"Row {row_index}: 'workbook_id' is mandatory" + error_messages.append(error_msg) + continue + + if not workbook_config.get("workbook_template_name"): + error_msg = f"Row {row_index}: 'workbook_template_name' is mandatory" + error_messages.append(error_msg) + continue + + if not workbook_config.get("output_file_name_template"): + error_msg = f"Row {row_index}: 'output_file_name_template' is mandatory" + error_messages.append(error_msg) + continue + + if_output_exists = workbook_config.get("if_output_exists", OUTPUT_ACTION_OVERWRITE) + if if_output_exists not in OUTPUT_ACTIONS: + error_msg = f"Row {row_index}: 'if_output_exists' must be one of {OUTPUT_ACTIONS}" + error_messages.append(error_msg) + continue + + excel_workbooks_config.append(workbook_config) + except Exception as e: + error_msg = f"Error loading Excel_Workbooks sheet: {e}" + error_messages.append(error_msg) + return None, None, True, error_messages + + # Load Excel_Sheets sheet + if EXCEL_SHEETS_TABLE_NAME not in workbook.sheetnames: + error_msg = f"Error: Sheet '{EXCEL_SHEETS_TABLE_NAME}' not found in configuration file." + error_messages.append(error_msg) + return excel_workbooks_config, None, True, error_messages + + excel_sheets_sheet = workbook[EXCEL_SHEETS_TABLE_NAME] + excel_sheets_config = [] + + try: + headers = [cell.value for cell in excel_sheets_sheet[1]] + for row_index, row in enumerate(excel_sheets_sheet.iter_rows(min_row=2, values_only=True), start=2): + if all(cell is None for cell in row): + continue + + sheet_config = dict(zip(headers, row)) + + # Validate required fields + if not sheet_config.get("workbook_id"): + continue # Skip rows without workbook_id + + if not sheet_config.get("source_type"): + error_msg = f"Row {row_index}: 'source_type' is mandatory" + error_messages.append(error_msg) + continue + + source_type = sheet_config["source_type"] + if source_type not in SOURCE_TYPES: + error_msg = f"Row {row_index}: 'source_type' must be one of {SOURCE_TYPES}" + error_messages.append(error_msg) + continue + + if not sheet_config.get("source"): + error_msg = f"Row {row_index}: 'source' is mandatory" + error_messages.append(error_msg) + continue + + if not sheet_config.get("target_name"): + error_msg = f"Row {row_index}: 'target_name' is mandatory" + error_messages.append(error_msg) + continue + + # Parse JSON fields + has_json_error = False + for json_field in ["filter_condition", "sort_keys", "value_replacement"]: + value = sheet_config.get(json_field) + if value: + if isinstance(value, str): + try: + sheet_config[json_field] = json.loads(value) + except json.JSONDecodeError: + error_msg = f"Row {row_index}, field '{json_field}': Invalid JSON format" + error_messages.append(error_msg) + has_json_error = True + break # ← Skip this row entirely + # else: value is already parsed (dict/list), keep as-is + else: + # Empty/None value - leave as None or empty + sheet_config[json_field] = None + + if not has_json_error: + excel_sheets_config.append(sheet_config) + except Exception as e: + error_msg = f"Error loading Excel_Sheets sheet: {e}" + error_messages.append(error_msg) + return excel_workbooks_config, excel_sheets_config, True, error_messages + + workbook.close() + + has_error = len(error_messages) > 0 + return excel_workbooks_config, excel_sheets_config, has_error, error_messages + + +def validate_excel_config(excel_config, console_instance, requests_mapping_config=None, organizations_mapping_config=None): + """ + Validate Excel export configuration against templates. + + Args: + excel_config: Tuple of (workbooks_config, sheets_config) from load_excel_export_config() + console_instance: Rich Console instance + requests_mapping_config: Loaded requests mapping config (optional, for future use) + organizations_mapping_config: Loaded organizations mapping config (optional, for future use) + + Returns: + Tuple of (has_critical_error, error_messages) + """ + global console + if console_instance: + console = console_instance + + if not excel_config or not excel_config[0] or not excel_config[1]: + return False, [] # No config to validate + + excel_workbooks_config, excel_sheets_config = excel_config[0], excel_config[1] + error_messages = [] + + # Validate each workbook + for workbook_config in excel_workbooks_config: + workbook_id = workbook_config.get("workbook_id") + template_name = workbook_config.get("workbook_template_name") + + # Check template exists + template_path = os.path.join(get_config_path(), template_name) + if not os.path.exists(template_path): + error_msg = f"Template '{template_name}' (workbook_id: {workbook_id}) not found in config/" + error_messages.append(error_msg) + continue + + # Check template is valid Excel + try: + template_wb = openpyxl.load_workbook(template_path) + except Exception as e: + error_msg = f"Template '{template_name}' (workbook_id: {workbook_id}) is not a valid Excel file: {e}" + error_messages.append(error_msg) + continue + + # Validate sheets for this workbook + workbook_sheets = [s for s in excel_sheets_config if s.get("workbook_id") == workbook_id] + + for sheet_config in workbook_sheets: + target_name = sheet_config.get("target_name") + source_type = sheet_config.get("source_type") + + # Find the target in the template (check both named ranges AND tables) + target_found = False + if target_name in template_wb.defined_names: + target_found = True + else: + # Check if it's a table in any sheet + for sheet in template_wb.sheetnames: + sheet_obj = template_wb[sheet] + if hasattr(sheet_obj, 'tables') and target_name in sheet_obj.tables: + target_found = True + break + + # If target was found, validate based on source type + if target_found: + # For Variable sources, ensure it's a single cell + if source_type == SOURCE_TYPE_VARIABLE: + # Check if the defined name references a single cell + # NOTE: We still use openpyxl here because template_wb is already open from config loading + table_dims = _get_named_range_dimensions(template_wb, target_name) + if table_dims: + _, _, height, width = table_dims + if height != 1 or width != 1: + error_msg = f"Target '{target_name}' (template: {template_name}) for Variable source must reference a single cell (found {height}x{width})" + error_messages.append(error_msg) + + # For Table sources (Requests/Organizations), validate dimensions + elif source_type in [SOURCE_TYPE_REQUESTS, SOURCE_TYPE_ORGANIZATIONS]: + # Get the dimensions of the named range + # NOTE: We still use openpyxl here because template_wb is already open from config loading + table_dims = _get_named_range_dimensions(template_wb, target_name) + if table_dims: + _, _, height, width = table_dims + + # CRITICAL: Table height MUST be exactly 1 (template row only) + if height != 1: + error_msg = f"Target '{target_name}' (template: {template_name}, source_type: {source_type}) must have height=1 (found height={height}). " \ + f"Template row must be a single row." + error_messages.append(error_msg) + + # CRITICAL: Table width must be >= max(mapping_indices) + # Get the mapping column to validate indices + source = sheet_config.get("source") + if source: + mapping_config = requests_mapping_config if source_type == SOURCE_TYPE_REQUESTS else organizations_mapping_config + if mapping_config: + column_mapping = _get_column_mapping(mapping_config, source, source_type) + if column_mapping: + max_col_index = max(column_mapping.keys()) # 0-based index + if max_col_index >= width: + error_msg = f"Target '{target_name}' (template: {template_name}) width={width} is insufficient. " \ + f"Maximum column index from mapping is {max_col_index} (0-based). " \ + f"Width must be > {max_col_index}." + error_messages.append(error_msg) + else: + error_msg = f"Named range '{target_name}' (template: {template_name}, workbook_id: {workbook_id}) not found in template" + error_messages.append(error_msg) + + template_wb.close() + + return len(error_messages) > 0, error_messages + + +def export_to_excel(requests_data, organizations_data, excel_config, + requests_mapping_config=None, organizations_mapping_config=None): + """ + Main export function - orchestrates Excel workbook generation. + + Args: + requests_data: List of request dictionaries + organizations_data: List of organization dictionaries + excel_config: Tuple of (workbooks_config, sheets_config) + requests_mapping_config: Requests field mapping configuration + organizations_mapping_config: Organizations field mapping configuration + + Returns: + Tuple of (success, error_count) + + Note: + Uses global console instance (injected from main script) + """ + if not excel_config or not excel_config[0] or not excel_config[1]: + console.print("[yellow]⚠ No Excel export configuration found, skipping[/yellow]") + return True, 0 + + excel_workbooks_config, excel_sheets_config = excel_config[0], excel_config[1] + + # Prepare template variables + template_vars = _prepare_template_variables() + + error_count = 0 + success_count = 0 + + # Track overall export duration + export_start_time = perf_counter() + + # Process each workbook + for workbook_config in excel_workbooks_config: + try: + workbook_id = workbook_config.get("workbook_id") + template_name = workbook_config.get("workbook_template_name") + output_template = workbook_config.get("output_file_name_template") + if_output_exists = workbook_config.get("if_output_exists", OUTPUT_ACTION_OVERWRITE) + + # Resolve output filename + try: + output_filename = output_template.format(**template_vars) + except KeyError as e: + console.print(f"[bold red]✗ Unknown variable in template: {e}[/bold red]") + error_count += 1 + continue + + output_path = os.path.join(EXCEL_OUTPUT_FOLDER, output_filename) + + # Log workbook processing start + logging.info(f"Processing workbook: {workbook_id} (template: {template_name}, output: {output_filename})") + + # PHASE PRÉPARATION: Handle existing file according to action + output_path = _handle_output_exists(output_path, if_output_exists) + + # XLWINGS PHASE: Open template, fill, save as output + template_path = os.path.join(get_config_path(), template_name) + + # Track workbook processing duration with spinning status + workbook_start_time = perf_counter() + + try: + if xw is None: + raise ImportError("xlwings is not installed. Install with: pip install xlwings") + + # Use status with spinner while processing the workbook + with console.status(f"[bold cyan]Exporting {output_filename}...", spinner="dots"): + # PERFORMANCE: Make Excel invisible BEFORE opening the workbook + app_xw = None + screen_updating_original = None + visible_original = None + try: + # Get or create Excel app in invisible mode + if xw.apps: + app_xw = xw.apps.active + visible_original = app_xw.visible + screen_updating_original = app_xw.screen_updating + else: + # Create new app in invisible mode + app_xw = xw.App(visible=False) + visible_original = False + screen_updating_original = True + + app_xw.visible = False # Make Excel invisible + app_xw.screen_updating = False # Disable screen updates + + except Exception as e: + logging.warning(f"Failed to manage Excel visibility: {e}") + app_xw = None + + # Open TEMPLATE directly (not a copy) + wb_xw = xw.Book(template_path, update_links=False) + + try: + # CAPTURE TEMPLATE STATE: Save initial state for restoration before save + template_state = _capture_workbook_state(wb_xw, workbook_context=f"{workbook_id} ({output_filename})") + logging.info(f"Captured template state: active_sheet='{template_state['active_sheet']}', {len(template_state['sheets'])} sheet(s)") + + # Get sheets for this workbook + workbook_sheets = [s for s in excel_sheets_config if s.get("workbook_id") == workbook_id] + + # Process each sheet with xlwings + for sheet_config in workbook_sheets: + _process_sheet_xlwings( + wb_xw, + sheet_config, + requests_data, + organizations_data, + requests_mapping_config=requests_mapping_config, + organizations_mapping_config=organizations_mapping_config, + template_vars=template_vars, + workbook_context=f"{workbook_id} ({output_filename})" + ) + + # RESTORE TEMPLATE STATE: Restore initial state before saving + _restore_workbook_state(wb_xw, template_state, workbook_context=f"{workbook_id} ({output_filename})") + logging.info(f"Restored template state before save") + + # Save as output file with forced overwrite (with retry mechanism) + # This preserves filesystem versioning for cloud storage + # Disable alerts to force silent overwrite + abs_output_path = os.path.abspath(output_path) + if app_xw: + display_alerts_original = app_xw.api.DisplayAlerts + app_xw.api.DisplayAlerts = False + try: + _save_workbook_with_retry(wb_xw, abs_output_path) + logging.info(f"Saved workbook to: {abs_output_path}") + finally: + if app_xw: + app_xw.api.DisplayAlerts = display_alerts_original + # Excel automatically recalculates formulas on save + # No need for separate recalculation step + + finally: + # Always close the workbook and restore visibility/screen updates + wb_xw.close() + if app_xw is not None: + try: + if screen_updating_original is not None: + app_xw.screen_updating = screen_updating_original + if visible_original is not None: + app_xw.visible = visible_original + except: + pass + + # Calculate duration and display success message + workbook_duration = perf_counter() - workbook_start_time + console.print(f"[green]✓ Created: {output_filename} ({workbook_duration:.2f}s)[/green]") + success_count += 1 + + except Exception as e: + console.print(f"[bold red]✗ Error processing {output_filename}: {e}[/bold red]") + logging.error(f"Excel export error for {output_filename}: {e}", exc_info=True) + error_count += 1 + continue + + except Exception as e: + console.print(f"[bold red]✗ Error processing workbook {workbook_id}: {e}[/bold red]") + logging.error(f"Excel workbook processing error: {e}", exc_info=True) + error_count += 1 + + # Summary with total duration + total_workbooks = success_count + error_count + export_duration = perf_counter() - export_start_time + + if error_count == 0: + # Success: all workbooks processed + console.print(f"\n[green]✓ Excel export completed successfully: {success_count}/{total_workbooks} workbooks generated ({export_duration:.2f}s)[/green]") + else: + # Failure: some or all workbooks failed + if success_count > 0: + # Partial success + console.print(f"\n[yellow]⚠ Excel export completed with errors ({export_duration:.2f}s)[/yellow]") + console.print(f"[green] {success_count} workbook(s) generated successfully[/green]") + console.print(f"[bold red] {error_count} workbook(s) failed[/bold red]") + else: + # Complete failure + console.print(f"\n[bold red]✗ Excel export failed: all {error_count} workbook(s) failed ({export_duration:.2f}s)[/bold red]") + + return error_count == 0, error_count + + +# ============================================================================ +# INTERNAL FUNCTIONS +# ============================================================================ + +def _prepare_template_variables(): + """ + Prepare variables available for Template String evaluation. + + Returns: + Dictionary of variables available to .format(**locals()) + """ + # Get UTC timestamp from requests file + # Use constant from do_dashboard_constants (SINGLE SOURCE OF TRUTH) + requests_file = REQUESTS_FILE_NAME + if os.path.exists(requests_file): + file_mtime = os.path.getmtime(requests_file) + extract_date_time_utc = datetime.fromtimestamp(file_mtime, tz=timezone.utc) + else: + extract_date_time_utc = datetime.now(tz=timezone.utc) + + # Convert to Paris timezone + extract_date_time_french = extract_date_time_utc.astimezone( + ZoneInfo('Europe/Paris') + ) + + return { + 'extract_date_time_utc': extract_date_time_utc, + 'extract_date_time_french': extract_date_time_french, + } + + +def _apply_filter(item, filter_condition): + """ + Apply filter condition to item (AND logic for all conditions). + + Args: + item: Dictionary to filter + filter_condition: List of [field_name, operator, value] conditions + + Returns: + Boolean True if item passes all filters + """ + if not filter_condition: + return True # Empty filter = accept all + + for field_path, operator, expected_value in filter_condition: + actual_value = get_nested_value(item, field_path.split(".")) + + if actual_value is None: + return False # Missing field = filter out + + # Apply operator + if operator == "==": + if actual_value != expected_value: + return False + elif operator == "<>": + if actual_value == expected_value: + return False + elif operator == ">": + if not (actual_value > expected_value): + return False + elif operator == ">=": + if not (actual_value >= expected_value): + return False + elif operator == "<": + if not (actual_value < expected_value): + return False + elif operator == "<=": + if not (actual_value <= expected_value): + return False + + return True # All conditions passed + + +def _apply_sort(items, sort_keys): + """ + Apply multi-key sort to items with support for mixed asc/desc ordering. + + Args: + items: List of dictionaries to sort + sort_keys: List of [field_name, order] or [field_name, order, option] + where: + - order is "asc" or "desc" + - option (optional) can be: + * datetime format string (e.g., "%Y-%m-%d") for date parsing + * "*natsort" for natural alphanumeric sorting + Supports MIXED asc/desc on different columns! + + Returns: + Sorted list + """ + if not sort_keys: + return items + + def natural_sort_key(text): + """ + Helper for natural alphanumeric sorting. + Converts "ENDOBEST-003-920-BA" to ["endobest", "-", 3, "-", 920, "-", "ba"] + Python's native list comparison handles the rest element by element. + """ + def convert(segment): + return int(segment) if segment.isdigit() else segment.lower() + return [convert(s) for s in re.split(r'(\d+)', str(text)) if s] + + def compare_items(item1, item2): + """ + Comparator function for multi-key sorting with mixed asc/desc support. + Returns: -1 if item1 < item2, 0 if equal, 1 if item1 > item2 + """ + for sort_spec in sort_keys: + field_name = sort_spec[0] + order = sort_spec[1] if len(sort_spec) > 1 else "asc" + sort_option = sort_spec[2] if len(sort_spec) > 2 else None + + # Get values from both items + val1 = get_nested_value(item1, field_name.split(".")) + val2 = get_nested_value(item2, field_name.split(".")) + + # Handle undefined/None - place at end + is_undef1 = val1 in [None, "", "undefined"] + is_undef2 = val2 in [None, "", "undefined"] + + # Both undefined: equal + if is_undef1 and is_undef2: + continue + + # Only one undefined: undefined goes last + if is_undef1: + return 1 # item1 > item2 (undefined last) + if is_undef2: + return -1 # item1 < item2 (item2 is undefined) + + # Check if natural sort requested + is_natural_sort = (sort_option == "*natsort") + + # Parse datetime if option is a datetime format (not *natsort) + if sort_option and not is_natural_sort: + datetime_format = sort_option + try: + val1 = datetime.strptime(str(val1), datetime_format).timestamp() + except (ValueError, TypeError): + val1 = None + return 1 # Invalid datetime goes last + + try: + val2 = datetime.strptime(str(val2), datetime_format).timestamp() + except (ValueError, TypeError): + val2 = None + return -1 # Invalid datetime goes last + + # Apply natural sort transformation if requested + if is_natural_sort: + val1 = natural_sort_key(val1) + val2 = natural_sort_key(val2) + + # Compare values + # For strings (non-natsort), use case-insensitive comparison for natural alphabetical ordering + if isinstance(val1, str) and isinstance(val2, str): + val1_lower = val1.lower() + val2_lower = val2.lower() + if val1_lower < val2_lower: + cmp_result = -1 + elif val1_lower > val2_lower: + cmp_result = 1 + else: + # Case-insensitive equal, use case-sensitive as tiebreaker + if val1 < val2: + cmp_result = -1 + elif val1 > val2: + cmp_result = 1 + else: + cmp_result = 0 + else: + # Non-string comparison (numbers, dates, natsort lists, etc.) + if val1 < val2: + cmp_result = -1 + elif val1 > val2: + cmp_result = 1 + else: + cmp_result = 0 # Equal, continue to next sort key + + # Apply asc/desc ordering + if cmp_result != 0: + is_desc = isinstance(order, str) and order.lower() == "desc" + return cmp_result if not is_desc else -cmp_result + + # All keys are equal + return 0 + + # Use functools.cmp_to_key to convert comparator to key function + return sorted(items, key=functools.cmp_to_key(compare_items)) + + +def _apply_value_replacement(value, replacements): + """ + Apply value replacement rules (first-match-wins, strict type matching). + + Args: + value: Value to potentially replace + replacements: List of [value_before, value_after] pairs + + Returns: + Replaced value or original + + Note: + This function is currently prepared for future use in table data filling. + """ + if not replacements: + return value + + for value_before, value_after in replacements: + if value == value_before: # Strict equality + return value_after + + return value # No match, return original + + +# OBSOLETE: _preserve_media_in_workbook() removed - xlwings handles media preservation automatically +# When using xlwings, Excel natively preserves all media, images, and relationships + + +def _save_workbook_with_retry(wb_xw, output_path): + """ + Save workbook with retry mechanism for transient xlwings/Excel failures. + + Excel's SaveAs can fail randomly on some environments (especially Excel 2013). + This function retries the save operation with configurable retry count and delay. + + Args: + wb_xw: xlwings Book object + output_path: Absolute path where workbook should be saved + + Raises: + Exception: If SaveAs fails after all retry attempts + """ + from time import sleep + + for attempt in range(1, EXCEL_COM_MAX_RETRIES + 1): + try: + logging.info(f"SaveAs attempt {attempt}/{EXCEL_COM_MAX_RETRIES}: {output_path}") + wb_xw.api.SaveAs(output_path) + logging.info(f"SaveAs succeeded on attempt {attempt}") + return # Success + + except Exception as e: + error_msg = f"SaveAs failed on attempt {attempt}: {type(e).__name__}: {str(e)}" + + if attempt < EXCEL_COM_MAX_RETRIES: + # Intermediate retry - log as warning and sleep before retry + logging.warning(f"{error_msg} - Retrying in {EXCEL_COM_RETRY_DELAY}s...") + sleep(EXCEL_COM_RETRY_DELAY) + else: + # Final attempt failed - log as critical error and raise + logging.error(f"{error_msg} - All {EXCEL_COM_MAX_RETRIES} retry attempts exhausted") + raise + + +def _capture_workbook_state(wb_xw, workbook_context=""): + """ + Capture the visual state of the workbook (active sheet, selections, scroll positions). + + This allows restoration of the template's visual state after data processing, + ensuring recipients see the workbook exactly as designed in the template. + + Args: + wb_xw: xlwings Book object + workbook_context: String identifier for logging (workbook_id and filename) + + Returns: + dict: State dictionary with 'active_sheet' and 'sheets' state per sheet + """ + ctx = f"[{workbook_context}]" if workbook_context else "" + logging.info(f"{ctx} [CAPTURE_STATE] Starting workbook state capture") + logging.info(f"{ctx} [CAPTURE_STATE] Total sheets: {len(wb_xw.sheets)}") + + state = { + 'active_sheet': None, + 'sheets': {} + } + + try: + # Capture active sheet name + state['active_sheet'] = wb_xw.api.ActiveSheet.Name + logging.info(f"{ctx} [CAPTURE_STATE] Active sheet captured: '{state['active_sheet']}'") + except Exception as e: + logging.warning(f"{ctx} [CAPTURE_STATE] Could not capture active sheet: {type(e).__name__}: {str(e)}") + + # Capture state for each sheet + for idx, sheet in enumerate(wb_xw.sheets, 1): + logging.info(f"{ctx} [CAPTURE_STATE] Processing sheet {idx}/{len(wb_xw.sheets)}: '{sheet.name}'") + try: + # Activate sheet to get its state + sheet.activate() + logging.info(f"{ctx} [CAPTURE_STATE] Sheet '{sheet.name}' activated successfully") + sheet_api = sheet.api + + sheet_state = { + 'selection': None, + 'scroll_row': 1, + 'scroll_col': 1 + } + + # Capture selection address + try: + selection_address = sheet_api.Application.Selection.Address + sheet_state['selection'] = selection_address + logging.info(f"{ctx} [CAPTURE_STATE] Sheet '{sheet.name}' selection captured: {selection_address}") + except Exception as e: + sheet_state['selection'] = "A1" # Default + logging.warning(f"{ctx} [CAPTURE_STATE] Could not capture selection for sheet '{sheet.name}': {type(e).__name__}, defaulting to A1") + + # Capture scroll position + try: + scroll_row = sheet_api.Application.ActiveWindow.ScrollRow + scroll_col = sheet_api.Application.ActiveWindow.ScrollColumn + sheet_state['scroll_row'] = scroll_row + sheet_state['scroll_col'] = scroll_col + logging.info(f"{ctx} [CAPTURE_STATE] Sheet '{sheet.name}' scroll position captured: Row={scroll_row}, Col={scroll_col}") + except Exception as e: + logging.warning(f"{ctx} [CAPTURE_STATE] Could not capture scroll position for sheet '{sheet.name}': {type(e).__name__}, keeping defaults") + + state['sheets'][sheet.name] = sheet_state + logging.info(f"{ctx} [CAPTURE_STATE] Sheet '{sheet.name}' state complete: {sheet_state}") + + except Exception as e: + logging.error(f"{ctx} [CAPTURE_STATE] ERROR capturing state for sheet '{sheet.name}': {type(e).__name__}: {str(e)}") + + logging.info(f"{ctx} [CAPTURE_STATE] Workbook state capture complete. Captured {len(state['sheets'])} sheet(s)") + return state + + +def _restore_workbook_state(wb_xw, state, workbook_context=""): + """ + Restore the visual state of the workbook (active sheet, selections, scroll positions). + + Args: + wb_xw: xlwings Book object + state: State dictionary from _capture_workbook_state() + workbook_context: String identifier for logging (workbook_id and filename) + """ + if not state: + logging.warning("[RESTORE_STATE] Empty state provided, skipping restoration") + return + + from time import sleep + + ctx = f"[{workbook_context}]" if workbook_context else "" + logging.info(f"{ctx} [RESTORE_STATE] Starting workbook state restoration") + logging.info(f"{ctx} [RESTORE_STATE] Restoring {len(state.get('sheets', {}))} sheet(s)") + + # NOTE: Screen updating is already disabled at the global level (in export_to_excel) + # for the entire workbook processing cycle (from open to save). + # We do NOT re-disable it here to avoid state conflicts. + # The global setting ensures all operations (capture, process, restore, save) run efficiently. + + # CRITICAL: Excel 2013 COM layer lock recovery + # After bulk paste operations, Excel's COM layer can enter a "locked" state where Range.Select() + # fails persistently. This appears to be a fundamental limitation/bug in Excel 2013. + # To work around this, we need to: + # 1. Give Excel time to recover with a large delay + # 2. Then make a "dummy" Range.Select() to wake up the COM layer + # 3. Then proceed with real restorations + logging.info(f"{ctx} [RESTORE_STATE] Waiting for Excel COM layer to stabilize after bulk operations (2 seconds)...") + sleep(2.0) # Large delay to allow COM layer to recover + + # Track original visibility state (used for temporary visibility during retries) + original_app_visible = None + try: + if wb_xw.app: + original_app_visible = wb_xw.app.visible + + if not original_app_visible: + # Make Excel visible during restoration so user sees what's happening + # (important for selection restore retries which may take 2+ seconds) + wb_xw.app.visible = True + logging.info(f"{ctx} [RESTORE_STATE] Excel app temporarily made visible for restoration operations") + except Exception as e: + logging.warning(f"{ctx} [RESTORE_STATE] Could not manage Excel visibility during restoration: {type(e).__name__}: {str(e)}") + + # Wake up the COM layer with a dummy selection attempt on the first sheet + # This "primes" the COM layer so subsequent Range.Select() calls work reliably + try: + if len(wb_xw.sheets) > 0: + first_sheet = wb_xw.sheets[0] + first_sheet.activate() + logging.info(f"{ctx} [RESTORE_STATE] Priming COM layer by activating first sheet...") + first_sheet.api.Range("$A$1").Select() + logging.info(f"{ctx} [RESTORE_STATE] COM layer priming successful") + except Exception as e: + # This is not critical - if it fails, retries will handle it + logging.info(f"{ctx} [RESTORE_STATE] COM layer priming attempt completed (may have failed, retries will handle it)") + + # Restore state for each sheet + for idx, (sheet_name, sheet_state) in enumerate(state.get('sheets', {}).items(), 1): + logging.info(f"{ctx} [RESTORE_STATE] Processing sheet {idx}: '{sheet_name}'") + try: + sheet = wb_xw.sheets[sheet_name] + sheet.activate() + logging.info(f"{ctx} [RESTORE_STATE] Sheet '{sheet_name}' activated successfully") + + # Small delay after activation to ensure Excel has completed the sheet switch + sleep(0.3) + + sheet_api = sheet.api + + # Restore selection with retry mechanism for transient Excel COM failures + if sheet_state.get('selection'): + selection = sheet_state['selection'] + selection_restored = False + + # Try to restore original selection with retry + for attempt in range(1, EXCEL_COM_MAX_RETRIES + 1): + try: + logging.info(f"{ctx} [RESTORE_STATE] Selection restore attempt {attempt}/{EXCEL_COM_MAX_RETRIES} for '{selection}' on sheet '{sheet_name}'") + sheet_api.Range(selection).Select() + logging.info(f"{ctx} [RESTORE_STATE] Sheet '{sheet_name}' selection restored to: {selection}") + selection_restored = True + break # Success + except Exception as e: + error_msg = f"Selection restore failed on attempt {attempt}: {type(e).__name__}: {str(e)}" + + if attempt < EXCEL_COM_MAX_RETRIES: + # Intermediate retry - log as warning and sleep before retry + logging.warning(f"{ctx} [RESTORE_STATE] {error_msg} - Retrying in {EXCEL_COM_RETRY_DELAY}s...") + sleep(EXCEL_COM_RETRY_DELAY) + else: + # Final attempt failed - log as error, will default to A1 + logging.error(f"{ctx} [RESTORE_STATE] {error_msg} - All {EXCEL_COM_MAX_RETRIES} retry attempts exhausted") + + # If selection restore failed after all retries, default to A1 + if not selection_restored: + logging.warning(f"{ctx} [RESTORE_STATE] Could not restore selection '{selection}' for sheet '{sheet_name}' after {EXCEL_COM_MAX_RETRIES} attempts, defaulting to A1") + + # Try to set default A1 selection (using absolute reference: $A$1) + for attempt in range(1, EXCEL_COM_MAX_RETRIES + 1): + try: + logging.info(f"{ctx} [RESTORE_STATE] A1 default attempt {attempt}/{EXCEL_COM_MAX_RETRIES} for sheet '{sheet_name}'") + sheet_api.Range("$A$1").Select() + logging.info(f"{ctx} [RESTORE_STATE] Sheet '{sheet_name}' selection defaulted to A1") + break # Success + except Exception as e2: + error_msg = f"A1 default failed on attempt {attempt}: {type(e2).__name__}: {str(e2)}" + + if attempt < EXCEL_COM_MAX_RETRIES: + logging.warning(f"{ctx} [RESTORE_STATE] {error_msg} - Retrying in {EXCEL_COM_RETRY_DELAY}s...") + sleep(EXCEL_COM_RETRY_DELAY) + else: + logging.error(f"{ctx} [RESTORE_STATE] {error_msg} - All {EXCEL_COM_MAX_RETRIES} retry attempts exhausted") + + # Restore scroll position + try: + scroll_row = sheet_state.get('scroll_row', 1) + scroll_col = sheet_state.get('scroll_col', 1) + sheet_api.Application.ActiveWindow.ScrollRow = scroll_row + sheet_api.Application.ActiveWindow.ScrollColumn = scroll_col + logging.info(f"{ctx} [RESTORE_STATE] Sheet '{sheet_name}' scroll position restored: Row={scroll_row}, Col={scroll_col}") + except Exception as e: + logging.warning(f"{ctx} [RESTORE_STATE] Could not restore scroll position for sheet '{sheet_name}': {type(e).__name__}") + + except Exception as e: + logging.error(f"{ctx} [RESTORE_STATE] ERROR restoring state for sheet '{sheet_name}': {type(e).__name__}: {str(e)}") + + # Restore active sheet + if state.get('active_sheet'): + try: + from time import sleep + + active_sheet_name = state['active_sheet'] + wb_xw.sheets[active_sheet_name].activate() + + # Wait for sheet activation to complete on Excel 2013's COM layer + sleep(0.3) + + logging.info(f"{ctx} [RESTORE_STATE] Active sheet restored to: '{active_sheet_name}'") + except Exception as e: + logging.error(f"{ctx} [RESTORE_STATE] Could not restore active sheet '{state.get('active_sheet')}': {type(e).__name__}: {str(e)}") + + # Force sheet tabs to scroll to show the first sheet + # This ensures the tab bar starts from the first sheet, regardless of which sheet is active + # NOTE: ScrollWorkbookTabs only works when Excel is visible + try: + if len(wb_xw.sheets) > 0 and wb_xw.app: + logging.info(f"{ctx} [RESTORE_STATE] Attempting to scroll sheet tabs to first sheet") + + try: + # ScrollWorkbookTabs with negative number scrolls tabs LEFT (toward first sheet) + # Use large negative number (-100) to guarantee we reach the beginning + # Excel visibility is already managed at the beginning of this function + wb_xw.api.Application.ActiveWindow.ScrollWorkbookTabs(-100) + logging.info(f"{ctx} [RESTORE_STATE] Sheet tabs scrolled to beginning") + except Exception as e: + logging.warning(f"{ctx} [RESTORE_STATE] Could not scroll sheet tabs to beginning: {type(e).__name__}: {str(e)}") + except Exception as e: + logging.error(f"{ctx} [RESTORE_STATE] ERROR during sheet tabs scroll operation: {type(e).__name__}: {str(e)}") + + # Restore original visibility state (if we temporarily made it visible) + # NOTE: Screen updating restoration is handled at the global level (in export_to_excel) + # after the workbook is saved and closed + try: + if original_app_visible is not None and wb_xw.app: + if not original_app_visible and wb_xw.app.visible: + # Restore to hidden state if it was originally hidden + wb_xw.app.visible = False + logging.info(f"{ctx} [RESTORE_STATE] Excel app visibility restored to original state: False") + except Exception as e: + logging.warning(f"{ctx} [RESTORE_STATE] Could not restore Excel app visibility: {type(e).__name__}: {str(e)}") + + logging.info(f"{ctx} [RESTORE_STATE] Workbook state restoration complete") + + +def _handle_output_exists(output_path, action): + """ + Handle existing output file (Overwrite/Increment/Backup). + + Args: + output_path: Full path to output file + action: "Overwrite", "Increment", or "Backup" + + Returns: + Actual path to use (may be different if Increment/Backup) + """ + if not os.path.exists(output_path): + logging.info(f"Output file doesn't exist yet: {output_path}") + return output_path + + logging.info(f"Output file exists, applying '{action}' rule: {output_path}") + + if action == OUTPUT_ACTION_OVERWRITE: + logging.info(f"Overwriting existing file: {output_path}") + return output_path + + elif action == OUTPUT_ACTION_INCREMENT: + base, ext = os.path.splitext(output_path) + counter = 1 + while os.path.exists(f"{base}_{counter}{ext}"): + counter += 1 + new_path = f"{base}_{counter}{ext}" + logging.info(f"Using incremented filename: {new_path}") + return new_path + + elif action == OUTPUT_ACTION_BACKUP: + base, ext = os.path.splitext(output_path) + counter = 1 + backup_path = f"{base}_backup_{counter}{ext}" + while os.path.exists(backup_path): + counter += 1 + backup_path = f"{base}_backup_{counter}{ext}" + + try: + logging.info(f"Backing up existing file to: {backup_path}") + shutil.copy2(output_path, backup_path) + logging.info(f"Backup successful: {output_path} -> {backup_path}") + except Exception as e: + logging.error(f"Backup failed: {e}") + raise + + # Return original path - the existing file will be overwritten by SaveAs + return output_path + + return output_path + + +def _get_column_mapping(mapping_config, mapping_column_name, source_type): + """ + Extract column mapping from Requests_Mapping or Organizations_Mapping. + + The mapping column contains user-friendly 1-based indices (1, 2, 3, ...) + indicating which column in the Excel table each field should be placed. + These are converted to 0-based indices for internal use. + + Args: + mapping_config: List of mapping config rows (dicts with field_name, etc.) + mapping_column_name: Name of the mapping column to extract (e.g., "MainReport_PatientsList") + source_type: "Requests" or "Organizations" + + Returns: + Dictionary: {excel_column_index: source_field_name} + Example: {0: "Patient_Identification.Patient_Id", 1: "Request.Status", ...} + Indices are 0-based (converted from 1-based user input) + or None if mapping_column not found + """ + if not mapping_config: + return None + + column_mapping = {} + + for row in mapping_config: + # Get the field name (source field in the JSON) + field_name = row.get("field_name") + if not field_name: + continue + + # Get the mapping value from the specified column + mapping_value = row.get(mapping_column_name) + if mapping_value is None or mapping_value == "": + continue # Skip empty mappings + + # Convert mapping_value to integer (1-based user-friendly index) + try: + user_col_index = int(mapping_value) + except (ValueError, TypeError): + logging.warning(f"Invalid column index '{mapping_value}' for field '{field_name}'") + continue + + # Convert 1-based to 0-based index + excel_col_index = user_col_index - 1 + + if excel_col_index < 0: + logging.warning(f"Column index '{user_col_index}' for field '{field_name}' must be >= 1") + continue + + # Store: excel_column_index -> field_name + # Field name needs to be qualified with group for Requests + # (extracted from the row's field_group if available) + if source_type == "Requests": + field_group = row.get("field_group", "") + if field_group: + full_field_name = f"{field_group}.{field_name}" + else: + full_field_name = field_name + else: + # For Organizations, field_name might already be qualified or standalone + full_field_name = field_name + + column_mapping[excel_col_index] = full_field_name + + return column_mapping if column_mapping else None + + +def _parse_range_dimensions(start_row, start_col, end_row, end_col, header_row_count=0): + """ + Shared utility: Calculate dimensions from cell coordinates. + + Args: + start_row, start_col: Starting cell (1-based, after headers) + end_row, end_col: Ending cell (1-based) + header_row_count: Number of header rows (0 if none) + + Returns: + Tuple of (width, total_height, data_height) + """ + width = end_col - start_col + 1 + total_height = end_row - start_row + 1 + data_height = total_height - header_row_count + return width, total_height, data_height + + +def _get_named_range_dimensions(workbook, range_name): + """ + Get dimensions of named range or table in workbook. + + Args: + workbook: openpyxl Workbook object + range_name: Name of the named range or table + + Returns: + Tuple of (sheet_name, start_cell, height, width) or None if not found + """ + # First check for defined named ranges (in openpyxl 3.x) + if range_name in workbook.defined_names: + defined_name = workbook.defined_names[range_name] + # Get the range reference from attr_text (e.g., "Sheet!$A$1:$B$10") + range_ref = defined_name.attr_text + + # Parse: "SheetName!$A$1:$B$10" + if '!' in range_ref: + sheet_name, cell_range = range_ref.split('!') + # Remove quotes if present + sheet_name = sheet_name.strip("'\"") + # Remove $ signs for parsing + cell_range = cell_range.replace('$', '') + + if sheet_name in workbook.sheetnames: + sheet = workbook[sheet_name] + + # Parse cell range (e.g., "A1:B10" or single "A1") + if ':' in cell_range: + start_cell_str, end_cell_str = cell_range.split(':') + start_cell = sheet[start_cell_str] + end_cell = sheet[end_cell_str] + width = end_cell.column - start_cell.column + 1 + height = end_cell.row - start_cell.row + 1 + else: + start_cell = sheet[cell_range] + width = 1 + height = 1 + + return sheet_name, start_cell, height, width + + # Check if it's a Table (Excel table object, not just a named range) + for sheet_name in workbook.sheetnames: + sheet = workbook[sheet_name] + if hasattr(sheet, 'tables') and range_name in sheet.tables: + table = sheet.tables[range_name] + # Table has a 'ref' property with the range (e.g., "A4:F5") + # Excel tables can have header rows (default 1, but can be 0) + table_ref = table.ref + header_row_count = getattr(table, 'headerRowCount', 1) or 0 # 0 if None or False + + # Parse cell range (e.g., "A4:F5") + if ':' in table_ref: + start_cell_str, end_cell_str = table_ref.split(':') + start_cell_temp = sheet[start_cell_str] + end_cell = sheet[end_cell_str] + width = end_cell.column - start_cell_temp.column + 1 + total_height = end_cell.row - start_cell_temp.row + 1 + + # Skip header rows: point to first DATA row + if header_row_count > 0: + data_start_row = start_cell_temp.row + header_row_count + start_cell = sheet.cell(row=data_start_row, column=start_cell_temp.column) + else: + start_cell = start_cell_temp + + # Calculate data row count (total - headers) + height = total_height - header_row_count + else: + start_cell = sheet[table_ref] + width = 1 + height = 1 + + return sheet_name, start_cell, height, width + + return None + +# OBSOLETE: _update_named_range_height() removed +# This function was only called by the old openpyxl-based _process_sheet() implementation +# xlwings uses table.Resize() via COM API instead, which is more reliable +# See PHASE 2 migration notes for details + + +# OBSOLETE: _recalculate_workbook() removed - xlwings handles formula recalculation automatically +# When using xlwings with wb.save(), Excel automatically recalculates all formulas + +# OBSOLETE: _process_sheet() removed - openpyxl implementation migrated to xlwings +# All sheet processing is now handled by _process_sheet_xlwings() using xlwings library +# This eliminates code duplication and provides better preservation of workbook structure + + +def _get_table_dimensions_xlwings(workbook_xw, range_name): + """ + Get dimensions of an Excel table OR named range using xlwings COM API. + + First searches for ListObjects (structured tables), then falls back to + simple named ranges if no table is found. + + Args: + workbook_xw: xlwings Book object (already open) + range_name: Name of the Excel table (ListObject) or named range + + Returns: + Tuple (sheet_name, start_cell, height, width, header_row_count, target_type) or None if not found + - start_cell: Points to FIRST DATA ROW (after headers for tables, first row for named ranges) + - height: Number of DATA ROWS (excluding headers for tables) + - header_row_count: Number of header rows (0 for named ranges, 0 or 1 for tables) + - target_type: TARGET_TYPE_TABLE or TARGET_TYPE_NAMED_RANGE + + Note: + - For tables with headers: start_cell points to first data row (after header) + - For tables without headers: start_cell points to first row of table + - For named ranges: start_cell points to first row (no headers assumed) + """ + # Helper class to mimic openpyxl Cell behavior + class CellRef: + def __init__(self, row, column): + self.row = row + self.column = column + @property + def coordinate(self): + col_letter = get_column_letter(self.column) + return f"{col_letter}{self.row}" + + # === PRIORITY 1: Check if it's a table (ListObject) === + # Excel tables are more reliable than plain named ranges with xlwings + for sheet in workbook_xw.sheets: + sheet_api = sheet.api + + # Try to get the table count - if this fails, the sheet has no ListObjects property + try: + table_count = sheet_api.ListObjects.Count + except: + # Sheet doesn't support ListObjects or has none + continue + + # If no tables in this sheet, skip + if table_count == 0: + continue + + # Iterate through tables by index + for i in range(1, table_count + 1): # COM indexing starts at 1 + try: + xl_table = sheet_api.ListObjects.Item(i) + table_name = xl_table.Name + if table_name == range_name: + # Found a table - get its range + xl_range = xl_table.Range + sheet_name = sheet.name + total_rows = xl_range.Rows.Count + total_cols = xl_range.Columns.Count + start_row = xl_range.Row + start_col = xl_range.Column + + # Get header row count from the table + # In COM API, ListObject has ShowHeaders property (boolean) and HeaderRowRange + # ShowHeaders: True if table has header row, False if not + try: + has_headers = xl_table.ShowHeaders + header_row_count = 1 if has_headers else 0 + except: + # If ShowHeaders not accessible, try HeaderRowRange + try: + header_range = xl_table.HeaderRowRange + header_row_count = 1 if header_range is not None else 0 + except: + # Fallback: assume headers exist (most common case) + header_row_count = 1 + + # Data height = total height - header rows + data_height = total_rows - header_row_count + + # start_cell points to the FIRST DATA ROW (after headers) + # If table has headers: skip them. If no headers: start at table start + if header_row_count > 0: + data_start_row = start_row + header_row_count + else: + data_start_row = start_row + start_cell = CellRef(data_start_row, start_col) + + logging.info(f"[TABLE FOUND] Located table '{range_name}' at {sheet_name}!{start_cell.coordinate} " + f"(data rows: {data_height}, headers: {header_row_count}, total width: {total_cols})") + return sheet_name, start_cell, data_height, total_cols, header_row_count, TARGET_TYPE_TABLE + except Exception as e: + # Error accessing this specific table, skip it + logging.warning(f"Error accessing table {i} in '{sheet.name}': {type(e).__name__}") + + # === PRIORITY 2: Check if it's a named range === + # Named ranges don't have headers - data starts at first row + try: + if range_name in workbook_xw.names: + named_range = workbook_xw.names[range_name] + target_range = named_range.refers_to_range + + sheet_name = target_range.sheet.name + start_row = target_range.row + start_col = target_range.column + total_rows = target_range.rows.count + total_cols = target_range.columns.count + + # Named ranges have no headers - all rows are data rows + header_row_count = 0 + data_height = total_rows + + start_cell = CellRef(start_row, start_col) + + logging.info(f"[NAMED RANGE FOUND] Located named range '{range_name}' at {sheet_name}!{start_cell.coordinate} " + f"(data rows: {data_height}, no headers, total width: {total_cols})") + return sheet_name, start_cell, data_height, total_cols, header_row_count, TARGET_TYPE_NAMED_RANGE + except Exception as e: + logging.warning(f"Error accessing named range '{range_name}': {type(e).__name__}: {str(e)}") + + # Range/table not found + logging.warning(f"Named range or table '{range_name}' not found in workbook") + return None + + +# ============================================================================ +# HELPER FUNCTIONS FOR SHEET PROCESSING (extracted from _process_sheet_xlwings) +# ============================================================================ + +def _fill_variable_in_sheet(workbook_xw, target_name, source_template, template_vars, workbook_context=""): + """ + Fill a single variable cell with evaluated template value. + + Args: + workbook_xw: xlwings Book object + target_name: Name of the target named range (single cell) + source_template: Template string with {variables} + template_vars: Dictionary of variable values + workbook_context: Context string for logging + + Returns: + Boolean True if successful + """ + try: + # Evaluate template string + cell_value = source_template.format(**template_vars) + except KeyError as e: + logging.warning(f"Unknown variable in template: {e}") + return False + + # Write to named cell using xlwings + try: + named_range = workbook_xw.names[target_name] + target_range = named_range.refers_to_range + target_range.value = cell_value + logging.info(f"Set variable '{target_name}' to '{cell_value}'") + return True + except KeyError: + logging.warning(f"Named range '{target_name}' not found in {workbook_context}") + return False + except Exception as e: + logging.warning(f"Error setting variable '{target_name}' in {workbook_context}: {e}") + return False + + +def _prepare_table_data(source_type, source, sheet_config, requests_data, organizations_data, + requests_mapping_config, organizations_mapping_config, target_name): + """ + Prepare table data: select source, apply filter/sort, get column mapping. + + Args: + source_type: Type of source (Requests or Organizations) + source: Source identifier (mapping name) + sheet_config: Sheet configuration dictionary + requests_data: Requests data list + organizations_data: Organizations data list + requests_mapping_config: Requests mapping config + organizations_mapping_config: Organizations mapping config + target_name: Target range name (for logging) + + Returns: + Tuple of (sorted_data, column_mapping) or (None, None) if error + """ + # Select source data and mapping config + if source_type == SOURCE_TYPE_REQUESTS: + source_data = requests_data + mapping_config = requests_mapping_config + else: + source_data = organizations_data + mapping_config = organizations_mapping_config + + # Apply filter and sort + filter_condition = sheet_config.get("filter_condition") + sort_keys = sheet_config.get("sort_keys") + + filtered_data = [item for item in source_data if _apply_filter(item, filter_condition)] + sorted_data = _apply_sort(filtered_data, sort_keys) + + # Get column mapping + column_mapping = _get_column_mapping(mapping_config, source, source_type) + if not column_mapping: + logging.warning(f"Column mapping '{source}' not found or empty for {target_name}") + return None, None + + return sorted_data, column_mapping + + +def _resize_table_range(workbook_xw, sheet_xw, target_name, start_cell, max_col, start_row, num_data_rows, header_row_count=0, target_type=TARGET_TYPE_TABLE): + """ + Resize Excel table (ListObject) or named range to match data dimensions. + + For Tables (ListObjects): Uses ListObject.Resize() COM API + For Named Ranges: Redefines the named range via Name.RefersTo property + + Args: + workbook_xw: xlwings Book object (needed for named range resize) + sheet_xw: xlwings Sheet object + target_name: Name of the table/named range + start_cell: Starting cell (CellRef) - points to FIRST DATA ROW (after headers for tables) + max_col: Maximum column (1-based) + start_row: Starting row (1-based, first data row) + num_data_rows: Number of data rows + header_row_count: Number of header rows in the table (0 for named ranges) + target_type: TARGET_TYPE_TABLE or TARGET_TYPE_NAMED_RANGE + + Returns: + None (logging handles errors) + """ + if num_data_rows <= 1: + return + + try: + # Calculate the last data row + last_data_row = start_row + num_data_rows - 1 + + if target_type == TARGET_TYPE_TABLE: + # === TABLE (ListObject) RESIZE === + excel_sheet = sheet_xw.api + + # Find the ListObject (Table) by name + for list_obj in excel_sheet.ListObjects: + if list_obj.Name == target_name: + # If header_row_count not provided (legacy fallback), get it from the table + if header_row_count == 0: + try: + has_headers = list_obj.ShowHeaders + header_row_count = 1 if has_headers else 0 + except: + header_row_count = 1 + + # For resize, include header rows if they exist + if header_row_count > 0: + first_row = start_row - header_row_count + else: + first_row = start_row + + resize_range_str = f"{get_column_letter(start_cell.column)}{first_row}:{get_column_letter(max_col)}{last_data_row}" + + # Perform resize via ListObject.Resize() + new_range = excel_sheet.Range(resize_range_str) + list_obj.Resize(new_range) + logging.info(f"Resized table '{target_name}' to {resize_range_str} (header_rows={header_row_count})") + break + + elif target_type == TARGET_TYPE_NAMED_RANGE: + # === NAMED RANGE RESIZE === + # Redefine the named range to cover all data rows + # Named ranges have no headers, so start_row is the first row + first_col_letter = get_column_letter(start_cell.column) + last_col_letter = get_column_letter(max_col) + + # Build the range address in A1 style + range_address = f"${first_col_letter}${start_row}:${last_col_letter}${last_data_row}" + + # Get the actual Range object from the sheet and assign it to the Name + # This avoids R1C1/A1 format issues by using the Range object directly + new_range = sheet_xw.range(range_address) + workbook_xw.api.Names(target_name).RefersTo = new_range.api + logging.info(f"Resized named range '{target_name}' to {sheet_xw.name}!{range_address}") + + except Exception as e: + logging.warning(f"Resize skipped for {target_name} ({target_type}): {e}") + + +def _duplicate_template_row(sheet_xw, start_cell, max_col, start_row, num_data_rows, target_name, workbook_context=""): + """ + Duplicate template row to all data rows via copy-paste. + + Args: + sheet_xw: xlwings Sheet object + start_cell: Starting cell (CellRef) + max_col: Maximum column (1-based) + start_row: Starting row (1-based) + num_data_rows: Number of data rows + target_name: Target range name (for logging) + workbook_context: Context string for logging + + Returns: + None (logging handles errors) + """ + if num_data_rows <= 1: + return + + try: + # Replicate template row to all data rows in a single operation + template_range_str = f"{get_column_letter(start_cell.column)}{start_row}:{get_column_letter(max_col)}{start_row}" + last_data_row = start_row + num_data_rows - 1 + full_target_range_str = f"{get_column_letter(start_cell.column)}{start_row}:{get_column_letter(max_col)}{last_data_row}" + + # Copy template row + sheet_xw.range(template_range_str).copy() + # Paste to entire range - Excel automatically replicates the template row + sheet_xw.range(full_target_range_str).paste() + + # CRITICAL: Deselect after paste to avoid COM layer lock + # After bulk paste on large ranges (85k+ cells), Excel's COM layer becomes saturated + # and leaves a massive selection active. This prevents subsequent Range.Select() calls. + # Solution: Reset Excel's selection state by switching sheets and back, then select A1. + try: + from time import sleep + logging.info(f"Deselecting range after bulk paste for {target_name}...") + + # Switch to another sheet to force Excel to reset selection state + other_sheets = [s for s in sheet_xw.book.sheets if s.name != sheet_xw.name] + if other_sheets: + other_sheets[0].activate() + sleep(0.1) + + # Reactivate our sheet - Excel resets selection management when returning + sheet_xw.activate() + + # Select A1 - COM should manage this easily now + sheet_xw.api.Range("$A$1").Select() + logging.info(f"Successfully deselected after bulk paste for {target_name} (sheet reactivation)") + + except Exception as e: + # Deselection is non-critical, log and continue if it fails + logging.warning(f"Deselection after paste failed for {target_name}: {type(e).__name__}: {str(e)}") + + except Exception as e: + logging.warning(f"Template duplication failed for {target_name} in {workbook_context}: {e}") + + +def _fill_table_with_data(sheet_xw, start_cell, start_row, start_col, sorted_data, column_mapping, + value_replacement, target_name, sheet_name): + """ + Fill table with data: group contiguous columns and transfer via bulk 2D arrays. + + Args: + sheet_xw: xlwings Sheet object + start_cell: Starting cell (CellRef) + start_row: Starting row (1-based) + start_col: Starting column (1-based) + sorted_data: Sorted list of data items + column_mapping: Dict mapping Excel column indices to source field paths + value_replacement: Value replacement configuration (or None) + target_name: Target range name (for logging) + sheet_name: Sheet name (for logging) + + Returns: + None (logging handles errors and success) + """ + try: + # === Prepare column mapping and group contiguous columns === + col_order = sorted(column_mapping.keys()) + + # Group contiguous columns for optimal bulk update + contiguous_groups = [] + if col_order: + current_group = [col_order[0]] + for i in range(1, len(col_order)): + if col_order[i] == col_order[i-1] + 1: + current_group.append(col_order[i]) + else: + contiguous_groups.append(current_group) + current_group = [col_order[i]] + contiguous_groups.append(current_group) + + # === Update contiguous column groups (bulk 2D transfer) === + for col_group in contiguous_groups: + # Build 2D array for this group: rows × columns + data_2d = [] + for item in sorted_data: + row_values = [] + for excel_col_index in col_group: + source_field_path = column_mapping[excel_col_index] + # Get value from source item + value = get_nested_value(item, source_field_path.split(".")) + + # Apply value replacement + if value_replacement: + value = _apply_value_replacement(value, value_replacement) + + row_values.append(value) + data_2d.append(row_values) + + # Transfer entire group to Excel in ONE operation + first_col_in_group = start_col + col_group[0] + first_col_letter = get_column_letter(first_col_in_group) + target_range_start = f"{first_col_letter}{start_row}" + + # Write 2D array at once (xlwings automatically maps rows × columns) + sheet_xw.range(target_range_start).value = data_2d + + # Logging + num_data_rows = len(sorted_data) + logging.info(f"Filled table {target_name} with {num_data_rows} rows " + f"at {sheet_name}!{start_cell.coordinate} " + f"(bulk duplication + {len(contiguous_groups)} contiguous group(s))") + except Exception as e: + logging.error(f"Error filling table data for {target_name}: {e}") + logging.error(f"Traceback: {traceback.format_exc()}") + + +def _process_sheet_xlwings(workbook_xw, sheet_config, requests_data, organizations_data, + requests_mapping_config, organizations_mapping_config, template_vars, + workbook_context=""): + """ + Process a single sheet using xlwings (hybrid approach). + + Delegates to specialized helpers to maintain clarity and testability. + + Args: + workbook_xw: xlwings Book object + sheet_config: Sheet configuration dict + requests_data: List of request dictionaries + organizations_data: List of organization dictionaries + requests_mapping_config: Requests mapping config (for column mapping) + organizations_mapping_config: Organizations mapping config + template_vars: Dictionary of variables for template evaluation + workbook_context: Context string identifying the workbook (for logging) + + Returns: + Boolean True if successful + """ + source_type = sheet_config.get("source_type") + source = sheet_config.get("source") + target_name = sheet_config.get("target_name") + value_replacement = sheet_config.get("value_replacement") + + # === Variable sources: single cell fill === + if source_type == SOURCE_TYPE_VARIABLE: + return _fill_variable_in_sheet(workbook_xw, target_name, source, template_vars, workbook_context) + + # === Table sources: bulk data filling === + if source_type not in [SOURCE_TYPE_REQUESTS, SOURCE_TYPE_ORGANIZATIONS]: + return False + + # Prepare data: filter, sort, get column mapping + sorted_data, column_mapping = _prepare_table_data( + source_type, source, sheet_config, requests_data, organizations_data, + requests_mapping_config, organizations_mapping_config, target_name + ) + if sorted_data is None or column_mapping is None: + return False + + # Get table/named range dimensions from xlwings + try: + table_dims = _get_table_dimensions_xlwings(workbook_xw, target_name) + if not table_dims: + logging.warning(f"Target '{target_name}' not found (neither table nor named range)") + return False + + sheet_name, start_cell, table_height, table_width, header_row_count, target_type = table_dims + sheet_xw = workbook_xw.sheets[sheet_name] + start_row = start_cell.row + start_col = start_cell.column + max_col = start_col + table_width - 1 + num_data_rows = len(sorted_data) + + # === Bulk operations for data filling === + if sorted_data: + # STEP 0: Resize table/named range to match data dimensions + _resize_table_range(workbook_xw, sheet_xw, target_name, start_cell, max_col, start_row, num_data_rows, header_row_count, target_type) + + # STEP 1: Duplicate template row to all data rows + _duplicate_template_row(sheet_xw, start_cell, max_col, start_row, num_data_rows, target_name, workbook_context) + + # STEP 2-3: Fill with data (grouped contiguous columns) + _fill_table_with_data(sheet_xw, start_cell, start_row, start_col, sorted_data, + column_mapping, value_replacement, target_name, sheet_name) + else: + # No data - template row stays empty + logging.info(f"No data for target '{target_name}' ({target_type}), leaving template row empty") + + return True + + except Exception as e: + logging.warning(f"Error processing target '{target_name}': {e}") + logging.error(f"Traceback: {traceback.format_exc()}") + return False + + +# ============================================================================ +# COMPREHENSIVE EXCEL EXPORT ORCHESTRATION (for main script) +# ============================================================================ + +def prepare_excel_export(requests_mapping_config, organizations_mapping_config): + """ + Validate Excel export configuration (no data loading). + + This function has a SINGLE responsibility: validate the Excel export CONFIG. + It does NOT load production data (JSONs) - that is the responsibility of + the execution functions (run_normal_mode_export, export_excel_only). + + IMPORTANT: Mapping configs MUST be provided by the caller. The caller is responsible for: + 1. Loading mapping configs from Excel (e.g., via load_requests_mapping_config()) + 2. Passing them to this function for config validation + + This follows the dependency injection pattern: the caller provides dependencies, + this function validates config. This ensures: + - Clear responsibility separation: validation ≠ data loading + - Early CONFIG validation (BEFORE data collection in NORMAL MODE) + - Late DATA loading (AFTER collection, only when needed for execution) + + Args: + requests_mapping_config: Loaded requests mapping (required, non-empty list/dict) + organizations_mapping_config: Loaded organizations mapping (required, non-empty list/dict) + + Returns: + Tuple of (excel_config, has_critical_errors, error_messages) + - excel_config: Tuple of (workbooks_config, sheets_config) or None if errors + - has_critical_errors: Boolean True if validation found critical errors + - error_messages: List of error message strings + + Note: + JSONs are loaded separately by execution functions: + - NORMAL MODE: run_normal_mode_export() loads JSONs AFTER data collection + - --EXCEL-ONLY: export_excel_only() loads JSONs before execution + """ + + error_messages = [] + excel_config = None + has_critical_errors = False + + # === STEP 1: Validate mapping configurations are provided === + # Caller is responsible for loading these configs before calling this function + if not requests_mapping_config or (isinstance(requests_mapping_config, (list, dict)) and len(requests_mapping_config) == 0): + error_msg = "Requests mapping configuration must be provided and non-empty" + error_messages.append(error_msg) + logging.error(error_msg) + if console: + console.print(f"[bold red]✗ {error_msg}[/bold red]") + has_critical_errors = True + return excel_config, has_critical_errors, error_messages + + if not organizations_mapping_config or (isinstance(organizations_mapping_config, (list, dict)) and len(organizations_mapping_config) == 0): + error_msg = "Organizations mapping configuration must be provided and non-empty" + error_messages.append(error_msg) + logging.error(error_msg) + if console: + console.print(f"[bold red]✗ {error_msg}[/bold red]") + has_critical_errors = True + return excel_config, has_critical_errors, error_messages + + # === STEP 2: Load Excel config === + logging.info("Loading Excel export configuration...") + + excel_workbooks_config, excel_sheets_config, has_config_error, config_error_messages = load_excel_export_config(console) + if has_config_error: + error_msg = "Critical errors in Excel Export Config" + error_messages.append(error_msg) + error_messages.extend(config_error_messages) + has_critical_errors = True + logging.warning(error_msg) + if console: + console.print(f"[bold red]✗ {error_msg}[/bold red]") + excel_config = (excel_workbooks_config, excel_sheets_config) + return excel_config, has_critical_errors, error_messages + + if not excel_workbooks_config or not excel_sheets_config: + error_msg = "Excel export configuration is empty" + error_messages.append(error_msg) + logging.warning(error_msg) + if console: + console.print(f"[bold red]✗ {error_msg}[/bold red]") + excel_config = (excel_workbooks_config, excel_sheets_config) + return excel_config, has_critical_errors, error_messages + + # Package config into tuple for downstream functions + excel_config = (excel_workbooks_config, excel_sheets_config) + + # === STEP 3: Validate Excel config === + logging.info("Validating Excel export configuration...") + + has_critical_errors, validation_errors = validate_excel_config( + excel_config, + console, + requests_mapping_config or [], + organizations_mapping_config or {} + ) + + if validation_errors: + error_messages.extend(validation_errors) + if has_critical_errors and console: + console.print("[bold red]✗ Critical validation errors found[/bold red]") + else: + logging.info("✓ Excel export configuration validated successfully") + + return excel_config, has_critical_errors, error_messages + + +# ============================================================================ +# HIGH-LEVEL ORCHESTRATION FUNCTIONS (for main script integration) +# ============================================================================ + +def export_excel_only(sys_argv, + requests_filename=None, organizations_filename=None, + requests_mapping_config=None, organizations_mapping_config=None): + """ + Orchestrates EXCEL_ONLY mode - complete end-to-end Excel export workflow. + + This function completely encapsulates the --excel_only mode: + 1. Validates Excel configuration + 2. Loads JSON data files (must exist) + 3. Executes Excel export with error handling + 4. Displays user-friendly messages and confirmations + + IMPORTANT: The caller (main script) is responsible for loading mapping configs + before calling this function. This ensures consistent config instances across + the application and follows the dependency injection pattern. + + This follows the same pattern as run_check_only_mode() from quality_checks module. + + Args: + sys_argv: sys.argv from main script (for potential future CLI arg parsing) + requests_filename: Name of requests JSON file (e.g., "do_requests.json") + organizations_filename: Name of organizations JSON file (e.g., "do_organizations.json") + requests_mapping_config: Loaded requests mapping configuration (REQUIRED - caller must load) + organizations_mapping_config: Loaded organizations mapping configuration (REQUIRED - caller must load) + """ + global console + + if not requests_filename: + requests_filename = REQUESTS_FILE_NAME + if not organizations_filename: + organizations_filename = ORGANIZATIONS_FILE_NAME + + print() + console.print("[bold cyan]═══ EXCEL ONLY MODE ═══[/bold cyan]\n") + + # Step 1: Validate Excel configuration (no data loading) + logging.info("EXCEL ONLY MODE: Validating Excel configuration") + excel_config, has_config_critical, error_messages = \ + prepare_excel_export(requests_mapping_config, organizations_mapping_config) + + # Step 2: Handle critical configuration errors + if has_config_critical: + print() + console.print("[bold red]⚠ CRITICAL CONFIGURATION ERROR(S) DETECTED[/bold red]") + console.print("[bold red]────────────────────────────────────[/bold red]") + for idx, error_msg in enumerate(error_messages, 1): + console.print(f"[bold red]Error {idx}: {error_msg}[/bold red]") + console.print("[bold red]────────────────────────────────────[/bold red]") + print() + try: + import questionary + answer = questionary.confirm( + "⚠ Continue anyway?", + default=False + ).ask() + if not answer: + console.print("[bold red]Aborted by user[/bold red]") + logging.warning("EXCEL ONLY MODE: Aborted by user due to critical errors") + return + except ImportError: + console.print("[bold yellow]⚠ questionary not available for confirmation[/bold yellow]") + console.print("[bold yellow]Proceeding with export despite critical errors[/bold yellow]") + + # Step 3: Load JSON data files (must exist in --excel-only mode) + logging.info("EXCEL ONLY MODE: Loading data files") + requests_data = _load_json_file_internal(requests_filename) + organizations_data = _load_json_file_internal(organizations_filename) + + if requests_data is None or organizations_data is None: + console.print("[bold red]✗ Error: Could not load data files for Excel export[/bold red]") + logging.error("EXCEL ONLY MODE: Data file loading failed") + return + + # Step 4: Execute Excel export (direct call to export_to_excel, console is global) + print() + console.print("[bold cyan]═══ Excel Export ═══[/bold cyan]\n") + logging.info("EXCEL ONLY MODE: Executing export") + + if excel_config: + try: + logging.info(f"Starting Excel export: {len(requests_data)} requests, {len(organizations_data)} organizations") + + success, error_count = export_to_excel( + requests_data, + organizations_data, + excel_config, + requests_mapping_config=requests_mapping_config, + organizations_mapping_config=organizations_mapping_config + ) + + if success: + logging.info("EXCEL ONLY MODE: Export completed successfully") + else: + logging.warning(f"EXCEL ONLY MODE: Export completed with {error_count} error(s)") + except Exception as e: + error_msg = f"Excel export failed: {str(e)}" + logging.error(f"EXCEL ONLY MODE: {error_msg}\n{traceback.format_exc()}") + console.print(f"[bold red]✗ {error_msg}[/bold red]\n") + else: + console.print("[bold red]✗ Could not load Excel configuration[/bold red]\n") + logging.error("EXCEL ONLY MODE: Excel config missing") + + +def run_normal_mode_export(excel_enabled, excel_config, + requests_mapping_config=None, organizations_mapping_config=None): + """ + Orchestrates Excel export during normal mode execution. + + This function encapsulates the Excel export step that runs after requests and organizations + have been collected and written to JSON files. It handles: + - Loading JSONs from filesystem (ensures fresh data consistency) + - Executing Excel export with comprehensive error handling + - Displaying results to user + + This is called from the normal workflow after data collection completes. + + Args: + excel_enabled: Boolean indicating if Excel export is enabled + excel_config: Tuple of (workbooks_config, sheets_config) or None + requests_mapping_config: Loaded requests mapping configuration (optional) + organizations_mapping_config: Loaded organizations mapping configuration (optional) + + Note: + This function loads JSON files from the filesystem (which were written + during the data collection phase) to ensure consistency. + + Returns: + Tuple of (export_succeeded, error_message) + - export_succeeded: Boolean True if export completed successfully (or skipped) + - error_message: String with error details (empty if success=True or skipped) + """ + global console + + # Only proceed if export is enabled and config is available + if not excel_enabled or not excel_config: + logging.info("Excel export not enabled or config missing, skipping") + return True, "" # FIX BUG #3: Return True when export is intentionally skipped (not an error) + + print() + console.print("[bold cyan]═══ Excel Export ═══[/bold cyan]\n") + logging.info("NORMAL MODE: Starting Excel export") + + try: + # Load JSONs from filesystem to ensure data consistency with what was written + # Use constants imported from do_dashboard_constants.py (SINGLE SOURCE OF TRUTH) + requests_from_fs = _load_json_file_internal(REQUESTS_FILE_NAME) + organizations_from_fs = _load_json_file_internal(ORGANIZATIONS_FILE_NAME) + + if requests_from_fs is None or organizations_from_fs is None: + error_msg = "Could not load data files for Excel export" + logging.error(f"NORMAL MODE: {error_msg}") + console.print(f"[bold red]✗ {error_msg}[/bold red]\n") + return False, error_msg + + # Execute the export (direct call to export_to_excel, console is global) + logging.info(f"Starting Excel export: {len(requests_from_fs)} requests, {len(organizations_from_fs)} organizations") + + success, error_count = export_to_excel( + requests_from_fs, + organizations_from_fs, + excel_config, + requests_mapping_config=requests_mapping_config, + organizations_mapping_config=organizations_mapping_config + ) + + if success: + logging.info("NORMAL MODE: Excel export completed successfully") + return True, "" + else: + error_msg = f"Excel export completed with {error_count} error(s)" + logging.warning(f"NORMAL MODE: {error_msg}") + return False, error_msg + + except Exception as e: + error_msg = f"Unexpected error during Excel export: {str(e)}" + logging.error(f"NORMAL MODE: {error_msg}\n{traceback.format_exc()}") + console.print(f"[bold red]✗ {error_msg}[/bold red]\n") + return False, error_msg + + +def _load_json_file_internal(filename): + """ + Internal helper to load JSON file. + + Args: + filename: Path to JSON file + + Returns: + Parsed JSON data or None if file doesn't exist or can't be parsed + """ + try: + if not os.path.exists(filename): + logging.warning(f"JSON file not found: {filename}") + return None + + with open(filename, 'r', encoding='utf-8') as f: + data = json.load(f) + logging.info(f"Loaded {filename}: {len(data) if isinstance(data, list) else 'data'}") + return data + + except Exception as e: + logging.error(f"Error loading {filename}: {str(e)}") + return None diff --git a/do_dashboard_excel_only-exe.bat b/do_dashboard_excel_only-exe.bat new file mode 100644 index 0000000..7fc15ae --- /dev/null +++ b/do_dashboard_excel_only-exe.bat @@ -0,0 +1,3 @@ +@echo off +do_dashboard.exe --excel-only %* + diff --git a/do_dashboard_excel_only.bat b/do_dashboard_excel_only.bat new file mode 100644 index 0000000..92b927a --- /dev/null +++ b/do_dashboard_excel_only.bat @@ -0,0 +1,4 @@ +@echo off +call C:\PythonProjects\.rcvenv\Scripts\activate.bat +python do_dashboard.py --excel-only %* + diff --git a/do_dashboard_quality_checks.py b/do_dashboard_quality_checks.py new file mode 100644 index 0000000..62227b3 --- /dev/null +++ b/do_dashboard_quality_checks.py @@ -0,0 +1,786 @@ +""" +DO Dashboard - Quality Checks Module + +This module contains all quality assurance functions: +- JSON file loading and backup utilities +- Comprehensive non-regression checks with configurable rules +- Config-driven validation with Warning/Critical thresholds +- Support for special rules (New/Deleted Requests, New/Deleted Fields) +- 4-step logic for normal rules (field selection, transition matching, exception application, bloc_scope) + +Note: Coherence check is not applicable for DO Dashboard since organization +counters are computed directly from request details (not from a separate API). +""" + +import json +import logging +import os +import shutil + +import openpyxl +from rich.console import Console +from do_dashboard_utils import get_nested_value, get_old_filename as _get_old_filename, get_config_path +from do_dashboard_constants import ( + REQUESTS_FILE_NAME, + ORGANIZATIONS_FILE_NAME, + OLD_FILE_SUFFIX, + DASHBOARD_CONFIG_FILE_NAME, + REGRESSION_CHECK_TABLE_NAME +) + + +# ============================================================================ +# MODULE CONFIGURATION +# ============================================================================ + +# Debug mode: Set to True to display detailed changes for each regression check rule +debug_mode = False + + +def enable_debug_mode(): + """Enable debug mode to display detailed changes for each regression check rule.""" + global debug_mode + debug_mode = True + if console: + console.print("[dim]DEBUG MODE enabled - detailed changes will be displayed[/dim]") + + +# ============================================================================ +# MODULE DEPENDENCIES (injected from main module) +# ============================================================================ + +# Will be injected by the main module +console = None + +# Regression check config is loaded on-demand via load_regression_check_config() +regression_check_config = [] + +# NOTE: File names and table names are imported from do_dashboard_constants.py (SINGLE SOURCE OF TRUTH): +# - REQUESTS_FILE_NAME +# - ORGANIZATIONS_FILE_NAME +# - OLD_FILE_SUFFIX +# - DASHBOARD_CONFIG_FILE_NAME +# - REGRESSION_CHECK_TABLE_NAME + + +def set_dependencies(console_instance): + """ + Inject console instance from main module. + + Args: + console_instance: Rich Console instance for formatted output + """ + global console + console = console_instance + + +# ============================================================================ +# CONFIGURATION LOADING +# ============================================================================ + +def load_regression_check_config(console_instance=None): + """Loads and validates the regression check configuration from the Excel file. + + Args: + console_instance: Optional Rich Console instance. If not provided, uses global console. + """ + global regression_check_config, console + + if console_instance: + console = console_instance + + config_path = os.path.join(get_config_path(), DASHBOARD_CONFIG_FILE_NAME) + + try: + workbook = openpyxl.load_workbook(config_path) + except FileNotFoundError: + error_msg = f"Error: Configuration file not found at: {config_path}" + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + if REGRESSION_CHECK_TABLE_NAME not in workbook.sheetnames: + error_msg = f"Error: Sheet '{REGRESSION_CHECK_TABLE_NAME}' not found in the configuration file." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + sheet = workbook[REGRESSION_CHECK_TABLE_NAME] + headers = [cell.value for cell in sheet[1]] + + temp_config = [] + + for row_index, row in enumerate(sheet.iter_rows(min_row=2, values_only=True), start=2): + rule_config = dict(zip(headers, row)) + + # Skip if ignore column contains "ignore" (case insensitive) + ignore_value = rule_config.get("ignore") + if ignore_value and isinstance(ignore_value, str) and "ignore" in ignore_value.lower(): + continue + + # Skip if all columns are None (empty row) + if all(value is None for value in row): + continue + + # Validate bloc_title and line_label + bloc_title = rule_config.get("bloc_title") + line_label = rule_config.get("line_label") + + if not bloc_title or not isinstance(bloc_title, str): + continue # Skip rows without bloc_title (header separators, etc.) + + if not line_label or not isinstance(line_label, str): + error_msg = f"Error in Regression_Check config, row {row_index}: 'line_label' is mandatory when 'bloc_title' is specified." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + # Validate thresholds + warning_threshold = rule_config.get("warning_threshold") + critical_threshold = rule_config.get("critical_threshold") + + if warning_threshold is None or not isinstance(warning_threshold, (int, float)) or warning_threshold < 0: + error_msg = f"Error in Regression_Check config, row {row_index}: 'warning_threshold' must be a number >= 0." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + if critical_threshold is None or not isinstance(critical_threshold, (int, float)) or critical_threshold < 0: + error_msg = f"Error in Regression_Check config, row {row_index}: 'critical_threshold' must be a number >= 0." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + # Parse JSON fields + for json_field in ["field_selection", "transitions"]: + value = rule_config.get(json_field) + if value and isinstance(value, str): + try: + rule_config[json_field] = json.loads(value) + except json.JSONDecodeError: + error_msg = f"Error in Regression_Check config, row {row_index}, field '{json_field}': Invalid JSON format." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + elif value is None: + rule_config[json_field] = None + + # Validate field_selection format + field_selection = rule_config.get("field_selection") + + # Special rules that don't use field_selection + special_rules_no_selection = ["New Fields", "Deleted Fields", "Deleted Requests"] + + if line_label not in special_rules_no_selection: + # Standard rules and "New Requests" MUST have field_selection + if field_selection is None: + error_msg = f"Error in Regression_Check config, row {row_index}: 'field_selection' is mandatory for rule '{line_label}'." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + if not isinstance(field_selection, list): + console.print(f"[yellow]⚠ Row {row_index}: 'field_selection' must be a JSON array of [action, selector] pairs, skipping rule[/yellow]") + rule_config["_config_error"] = True + else: + for step_idx, step in enumerate(field_selection): + if not isinstance(step, list) or len(step) != 2: + console.print(f"[yellow]⚠ Row {row_index}: field_selection[{step_idx}] must be array of 2 elements [action, selector], skipping rule[/yellow]") + rule_config["_config_error"] = True + break + + action, field_selector = step + + if action not in ["include", "exclude"]: + console.print(f"[yellow]⚠ Row {row_index}: field_selection[{step_idx}] action must be 'include' or 'exclude', got '{action}', skipping rule[/yellow]") + rule_config["_config_error"] = True + break + + if not isinstance(field_selector, str) or "." not in field_selector: + console.print(f"[yellow]⚠ Row {row_index}: field_selection[{step_idx}] selector must be string with dot notation (e.g., '*.*', 'group.*', 'group.field'), got '{field_selector}', skipping rule[/yellow]") + rule_config["_config_error"] = True + break + else: + if field_selection is not None and field_selection != [] and field_selection != "": + console.print(f"[yellow]⚠ Row {row_index}: Special rule '{line_label}' should have empty field_selection, got {field_selection}[/yellow]") + rule_config["_config_error"] = True + + # Validate bloc_scope + bloc_scope = rule_config.get("bloc_scope") + if bloc_scope is not None and bloc_scope not in ["all", "any"]: + error_msg = f"Error in Regression_Check config, row {row_index}: 'bloc_scope' must be 'all' or 'any'." + logging.critical(error_msg) + console.print(f"[bold red]{error_msg}[/bold red]") + raise Exception(error_msg) + + # Validate transitions format + transitions = rule_config.get("transitions") + config_error = False + + if transitions is not None: + if not isinstance(transitions, list): + console.print(f"[yellow]⚠ Row {row_index}: 'transitions' must be a JSON array, skipping this rule[/yellow]") + config_error = True + else: + for step_idx, transition_step in enumerate(transitions): + if not isinstance(transition_step, list) or len(transition_step) != 4: + console.print(f"[yellow]⚠ Row {row_index}: transitions[{step_idx}] must be array of 4 elements [action, field_selector, from, to], skipping[/yellow]") + config_error = True + break + + action, field_selector, from_val, to_val = transition_step + + if action not in ["include", "exclude"]: + console.print(f"[yellow]⚠ Row {row_index}: transitions[{step_idx}] action must be 'include' or 'exclude', got '{action}', skipping[/yellow]") + config_error = True + break + + if not isinstance(field_selector, str) or "." not in field_selector: + console.print(f"[yellow]⚠ Row {row_index}: transitions[{step_idx}] field_selector must be string with dot notation, got '{field_selector}', skipping[/yellow]") + config_error = True + break + + if config_error: + rule_config["_config_error"] = True + + temp_config.append(rule_config) + + regression_check_config = temp_config + console.print(f"Loaded {len(regression_check_config)} regression check rules.", style="green") + + +def run_check_only_mode(sys_argv): + """ + Orchestrates CHECK_ONLY and CHECK_ONLY_COMPARE modes. + + - CHECK_ONLY: Full non-regression validation on existing files + - CHECK_ONLY_COMPARE: Regression-only comparison of two specific files + + Args: + sys_argv: sys.argv from main script (to parse command-line arguments) + """ + global console + + if console is None: + console = Console() + + print() + + # Detect CHECK_ONLY_COMPARE mode: --check-only + if len(sys_argv) >= 4: + current_file = sys_argv[2] + old_file = sys_argv[3] + + console.print("[bold cyan]═══ CHECK ONLY COMPARE MODE ═══[/bold cyan]") + console.print(f"Comparing two specific files:\n") + console.print(f" Current: [bold]{current_file}[/bold]") + console.print(f" Old: [bold]{old_file}[/bold]\n") + + print() + load_regression_check_config(console) + + print() + has_regression_critical = run_quality_checks( + current_requests=current_file, + old_requests_filename=old_file + ) + + if has_regression_critical: + console.print("[bold red]✗ CRITICAL issues detected![/bold red]") + else: + console.print("[bold green]✓ All checks passed successfully![/bold green]") + + else: + console.print("[bold cyan]═══ CHECK ONLY MODE ═══[/bold cyan]") + console.print("Running quality checks on existing data files without collecting new data.\n") + + print() + load_regression_check_config(console) + + print() + old_requests_file = _get_old_filename(REQUESTS_FILE_NAME, OLD_FILE_SUFFIX) + has_regression_critical = run_quality_checks( + current_requests=REQUESTS_FILE_NAME, + old_requests_filename=old_requests_file + ) + + if has_regression_critical: + console.print("[bold red]✗ CRITICAL issues detected![/bold red]") + else: + console.print("[bold green]✓ All checks passed successfully![/bold green]") + + +# ============================================================================ +# FILE UTILITIES +# ============================================================================ + +def load_json_file(filename): + """ + Loads a JSON file (requests, organizations, or any JSON data). + Returns the parsed JSON data or None if file doesn't exist or error occurred. + """ + if os.path.exists(filename): + try: + with open(filename, 'r', encoding='utf-8') as f: + return json.load(f) + except Exception as e: + logging.warning(f"Could not load JSON file '{filename}': {e}") + console.print(f"[yellow]⚠ Warning: Could not load JSON file '{filename}': {e}[/yellow]") + return None + + +def backup_output_files(): + """ + Silently backups current output files before writing new versions. + Called AFTER all checks pass to avoid losing history on crash. + """ + def _backup_file_silent(source, destination): + if os.path.exists(source): + try: + shutil.copy2(source, destination) + except Exception as e: + logging.warning(f"Could not backup {source}: {e}") + + _backup_file_silent(REQUESTS_FILE_NAME, _get_old_filename(REQUESTS_FILE_NAME, OLD_FILE_SUFFIX)) + _backup_file_silent(ORGANIZATIONS_FILE_NAME, _get_old_filename(ORGANIZATIONS_FILE_NAME, OLD_FILE_SUFFIX)) + + +# ============================================================================ +# QUALITY CHECKS ORCHESTRATION +# ============================================================================ + +def run_quality_checks(current_requests, old_requests_filename): + """ + Runs non-regression quality checks on requests data. + + Note: Coherence check is not applicable for DO Dashboard since organization + counters are computed from request details, not from a separate API. + + Args: + current_requests: Either a filename (str) to load requests from, + or a list of request dictionaries (already in memory) + old_requests_filename: Filename of old requests for regression comparison (str) + + Returns: + has_regression_critical (bool) + + Usage: + - Normal mode: + run_quality_checks( + current_requests=output_requests, # list (in memory) + old_requests_filename=REQUESTS_FILE_NAME # str (current file on disk) + ) + + - Check-only mode: + run_quality_checks( + current_requests=REQUESTS_FILE_NAME, # str (current file) + old_requests_filename=get_old_filename(REQUESTS_FILE_NAME) # str (old file) + ) + """ + global console, regression_check_config + + if not regression_check_config: + if console is None: + console = Console() + load_regression_check_config(console) + + console.print("[bold cyan]══════════════════════════════════════════════════[/bold cyan]") + + # Load current_requests if it's a filename + if isinstance(current_requests, str): + current_requests_data = load_json_file(current_requests) + if current_requests_data is None: + console.print(f"[bold red]Error: Could not load current requests from '{current_requests}'[/bold red]") + return True + elif isinstance(current_requests, list): + current_requests_data = current_requests + else: + console.print(f"[bold red]Error: current_requests must be either a filename (str) or a list of requests[/bold red]") + return True + + # Run non-regression check + has_regression_critical = non_regression_check(current_requests_data, old_requests_filename) + + console.print("[bold cyan]══════════════════════════════════════════════════[/bold cyan]") + print() + + return has_regression_critical + + +# ============================================================================ +# NON-REGRESSION CHECK +# ============================================================================ + +def non_regression_check(output_requests, old_requests_filename): + """ + Comprehensive config-driven non-regression check comparing current vs old requests. + Uses rules from regression_check_config loaded from Excel. + Returns True if any critical issue was found, False otherwise. + + Args: + output_requests: Current requests data (list) + old_requests_filename: Filename of old requests JSON file to load + """ + console.print("\n[bold]═══ Non Regression Check ═══[/bold]\n") + + console.print(f"[dim]Loading old requests from: {old_requests_filename}[/dim]") + old_requests = load_json_file(old_requests_filename) + + if old_requests is None: + console.print(f"[yellow]⚠ No old requests file found at '{old_requests_filename}', skipping non-regression check[/yellow]") + return False + + has_critical = False + + # ========== INTERNAL UTILITY FUNCTIONS ========== + + def _is_undefined(value): + return value in [None, "", "undefined"] + + def _values_are_equal(val1, val2): + if _is_undefined(val1) and _is_undefined(val2): + return True + return val1 == val2 + + def _apply_pipeline_step(checked_fields, action, field_selector, from_pattern, to_pattern): + for i, field_record in enumerate(checked_fields): + group_name, field_name, old_val, new_val, is_checked = field_record + if not _field_selector_matches_pattern(field_selector, group_name, field_name): + continue + if _transition_matches(old_val, new_val, from_pattern, to_pattern): + if action == "include": + checked_fields[i][4] = True + elif action == "exclude": + checked_fields[i][4] = False + + def _transition_matches(old_val, new_val, expected_old, expected_new): + if expected_old == "*undefined": + old_matches = old_val in [None, "", "undefined"] + elif expected_old == "*defined": + old_matches = old_val not in [None, "", "undefined"] + elif expected_old == "*": + old_matches = True + else: + old_matches = (old_val == expected_old) + + if expected_new == "*undefined": + new_matches = new_val in [None, "", "undefined"] + elif expected_new == "*defined": + new_matches = new_val not in [None, "", "undefined"] + elif expected_new == "*": + new_matches = True + else: + new_matches = (new_val == expected_new) + + return old_matches and new_matches + + def _get_status_and_style(count, warning_threshold, critical_threshold): + nonlocal has_critical + if count > critical_threshold: + has_critical = True + return "CRITICAL", "red", "✗" + elif count > warning_threshold: + return "WARNING", "yellow", "⚠" + else: + return "OK", "green", "✓" + + def _print_block_header(title, status_tuple, indent=0): + indent_str = " " * indent + status, color, emoji = status_tuple + console.print(f"{indent_str}{emoji} [{color}][bold]{title}[/bold][/{color}]") + + def _print_check_line(message, count, status_tuple, indent=1): + indent_str = " " * indent + status, color, emoji = status_tuple + console.print(f"{indent_str}{emoji} [{color}]{message}: {count}[/{color}]") + + def _calculate_block_status(line_statuses): + if any(s[0] == "CRITICAL" for s in line_statuses): + return ("CRITICAL", "red", "✗") + elif any(s[0] == "WARNING" for s in line_statuses): + return ("WARNING", "yellow", "⚠") + else: + return ("OK", "green", "✓") + + def _field_selector_matches_pattern(selector, group_name, field_name): + if selector == "*.*": + return True + sel_group, sel_field = selector.split(".", 1) + if sel_group != "*" and sel_group != group_name: + return False + if sel_field == "*": + return True + return sel_field == field_name + + def _apply_field_selection_pipeline(all_fields, field_selection_config): + candidate_fields = set() + if not field_selection_config: + return candidate_fields + for action, field_selector in field_selection_config: + for group_name, field_name in all_fields: + if _field_selector_matches_pattern(field_selector, group_name, field_name): + if action == "include": + candidate_fields.add((group_name, field_name)) + elif action == "exclude": + candidate_fields.discard((group_name, field_name)) + return candidate_fields + + def _get_key_field_from_new_requests_rule(rule, new_requests_list, old_requests_list): + if not new_requests_list or not old_requests_list: + raise ValueError("Cannot determine key field: empty request lists") + + new_req = new_requests_list[0] + old_req = old_requests_list[0] + + candidate_fields = _build_candidate_fields(new_req, old_req, rule.get("field_selection")) + + if not candidate_fields: + raise ValueError( + f"field_selection produced no candidate fields. " + f"Config: {rule.get('field_selection')}" + ) + + for group_name, field_name in sorted(candidate_fields): + new_val = get_nested_value(new_req, [group_name, field_name]) + old_val = get_nested_value(old_req, [group_name, field_name]) + if new_val is not None and old_val is not None: + return field_name, group_name + + raise ValueError( + f"No field in field_selection has values in both first new and old request. " + f"Candidates from pipeline: {candidate_fields}. " + f"Verify field_selection config or data has proper values." + ) + + def _build_requests_dict(requests_list, key_field, field_group): + result = {} + for request in requests_list: + key = get_nested_value(request, [field_group, key_field]) + if key: + result[key] = request + return result + + def _matches_transition(old_val, new_val, transitions_config): + if transitions_config is None: + return False + for transition in transitions_config: + expected_old, expected_new = transition + if _transition_matches(old_val, new_val, expected_old, expected_new): + return True + return False + + def _process_special_rule(rule, line_label, new_dict, old_dict): + if line_label == "New Requests": + return len(set(new_dict.keys()) - set(old_dict.keys())) + elif line_label == "Deleted Requests": + return len(set(old_dict.keys()) - set(new_dict.keys())) + else: + return 0 + + def _process_new_deleted_fields(line_label, new_dict, old_dict): + field_counts = {} + common_keys = sorted(set(new_dict.keys()) & set(old_dict.keys())) + + for key in common_keys: + new_req = new_dict[key] + old_req = old_dict[key] + + all_groups = sorted(set(new_req.keys()) | set(old_req.keys())) + + for group_name in all_groups: + new_group = new_req.get(group_name, {}) + old_group = old_req.get(group_name, {}) + + if not isinstance(new_group, dict): + new_group = {} + if not isinstance(old_group, dict): + old_group = {} + + new_fields = set(new_group.keys()) + old_fields = set(old_group.keys()) + + if line_label == "New Fields": + changed_fields = sorted(new_fields - old_fields) + elif line_label == "Deleted Fields": + changed_fields = sorted(old_fields - new_fields) + else: + changed_fields = [] + + for field_name in changed_fields: + qualified_name = f"{group_name}.{field_name}" + field_counts[qualified_name] = field_counts.get(qualified_name, 0) + 1 + + return sorted(field_counts.items(), key=lambda x: (-x[1], x[0])) + + def _build_candidate_fields(new_req, old_req, field_selection_config): + common_groups = sorted(set(new_req.keys()) & set(old_req.keys())) + all_available_fields = [] + + for group_name in common_groups: + new_group = new_req.get(group_name, {}) + old_group = old_req.get(group_name, {}) + + if not isinstance(new_group, dict): + new_group = {} + if not isinstance(old_group, dict): + old_group = {} + + common_field_names = sorted(set(new_group.keys()) & set(old_group.keys())) + for field_name in common_field_names: + all_available_fields.append((group_name, field_name)) + + if not field_selection_config: + return [] + + candidate_fields = _apply_field_selection_pipeline(all_available_fields, field_selection_config) + return sorted(candidate_fields, key=lambda x: (x[0], x[1])) + + def _process_rule(rule, new_dict, old_dict): + if rule.get("_config_error"): + return 0, [] + + field_selection_config = rule.get("field_selection") + bloc_scope = rule.get("bloc_scope") or "any" + + common_keys = sorted(set(new_dict.keys()) & set(old_dict.keys())) + matching_requests_count = 0 + details_list = [] + + for key in common_keys: + new_req = new_dict[key] + old_req = old_dict[key] + + candidate_fields = _build_candidate_fields(new_req, old_req, field_selection_config) + + if not candidate_fields: + continue + + all_fields_list = [] + changed_fields = [] + + for group_name, field_name in candidate_fields: + new_val = get_nested_value(new_req, [group_name, field_name]) + old_val = get_nested_value(old_req, [group_name, field_name]) + + field_has_changed = not _values_are_equal(old_val, new_val) + if field_has_changed: + changed_fields.append((group_name, field_name)) + all_fields_list.append([group_name, field_name, old_val, new_val, False]) + + transitions_config = rule.get("transitions", []) + if transitions_config and isinstance(transitions_config, list): + for action, field_selector, from_val, to_val in transitions_config: + _apply_pipeline_step(all_fields_list, action, field_selector, from_val, to_val) + + checked_fields = [(f[0], f[1], f[2], f[3]) for f in all_fields_list if f[4]] + + inclusion_matches = False + if bloc_scope == "all": + if len(changed_fields) > 0 and len(checked_fields) == len(changed_fields): + inclusion_matches = True + else: # bloc_scope == "any" + if len(checked_fields) > 0: + inclusion_matches = True + + if inclusion_matches: + matching_requests_count += 1 + if debug_mode and checked_fields: + field_changes = [(f"{gn}.{fn}", ov, nv) for gn, fn, ov, nv in checked_fields] + details_list.append((key, field_changes)) + + return matching_requests_count, details_list + + # ========== MAIN LOGIC ========== + + key_field = None + field_group = None + + for rule in regression_check_config: + if rule.get("line_label") == "New Requests": + try: + key_field, field_group = _get_key_field_from_new_requests_rule( + rule, + output_requests, + old_requests + ) + break + except ValueError as e: + console.print(f"[bold red]Error determining key field: {e}[/bold red]") + return True + + if not key_field: + console.print("[bold red]Error: 'New Requests' rule not found or has no valid field_selection[/bold red]") + return True + + console.print(f"[dim]Using key field: {field_group}.{key_field}[/dim]\n") + + new_dict = _build_requests_dict(output_requests, key_field, field_group) + old_dict = _build_requests_dict(old_requests, key_field, field_group) + + # Group rules by bloc_title, preserving order of first appearance + blocs = {} + bloc_order = [] + for rule in regression_check_config: + bloc_title = rule["bloc_title"] + if bloc_title not in blocs: + blocs[bloc_title] = [] + bloc_order.append(bloc_title) + blocs[bloc_title].append(rule) + + for bloc_title in bloc_order: + rules = blocs[bloc_title] + line_results = [] + + for rule in rules: + line_label = rule["line_label"] + + if line_label in ["New Requests", "Deleted Requests"]: + count = _process_special_rule(rule, line_label, new_dict, old_dict) + line_results.append((line_label, count, None, "simple")) + + elif line_label in ["New Fields", "Deleted Fields"]: + field_list = _process_new_deleted_fields(line_label, new_dict, old_dict) + count = len(field_list) + line_results.append((line_label, count, field_list, "fields")) + + else: + count, details = _process_rule(rule, new_dict, old_dict) + line_results.append((line_label, count, details, "details")) + + # Calculate status for each line + line_results_with_status = [] + for line_label, count, data, result_type in line_results: + rule = next(r for r in rules if r["line_label"] == line_label) + warning_threshold = rule["warning_threshold"] + critical_threshold = rule["critical_threshold"] + status_tuple = _get_status_and_style(count, warning_threshold, critical_threshold) + line_results_with_status.append((line_label, count, data, result_type, status_tuple)) + + bloc_status = _calculate_block_status([result[4] for result in line_results_with_status]) + _print_block_header(bloc_title, bloc_status, indent=0) + + for line_label, count, data, result_type, status_tuple in line_results_with_status: + should_display = (bloc_title == "Structure") or (status_tuple[0] != "OK") + + if should_display: + if result_type == "fields": + _print_check_line(line_label, count, status_tuple, indent=1) + for field_name, request_count in data: + console.print(f" {field_name} ({request_count} requests)") + + elif result_type == "details": + _print_check_line(line_label, count, status_tuple, indent=1) + if debug_mode and data and len(data) > 0: + for request_key, field_changes in data: + console.print(f" [dim]{key_field}: {request_key}[/dim]") + for qualified_field, old_val, new_val in field_changes: + old_display = f"'{old_val}'" if isinstance(old_val, str) else str(old_val) + new_display = f"'{new_val}'" if isinstance(new_val, str) else str(new_val) + console.print(f" - {qualified_field}: {old_display} → {new_display}") + + else: + _print_check_line(line_label, count, status_tuple, indent=1) + + console.print() + + return has_critical diff --git a/do_dashboard_utils.py b/do_dashboard_utils.py new file mode 100644 index 0000000..85b9129 --- /dev/null +++ b/do_dashboard_utils.py @@ -0,0 +1,221 @@ +""" +DO Dashboard - Utility Functions Module + +This module contains generic utility functions used throughout the DO Dashboard: +- HTTP client management (thread-safe) +- Nested data structure navigation with wildcard support +- Configuration path resolution (script vs PyInstaller) +- Thread position management for progress bars +- Filename generation utilities +""" + +import os +import sys +import threading + +import httpx + +from do_dashboard_constants import CONFIG_FOLDER_NAME + + +# ============================================================================ +# GLOBAL VARIABLES (managed by main module) +# ============================================================================ +thread_local_storage = threading.local() + + +def run_with_context(func, context, *args, **kwargs): + """ + Wrapper to set thread-local context before running a function in a new thread. + Useful for ThreadPoolExecutor where context is lost. + """ + thread_local_storage.current_request_context = context + return func(*args, **kwargs) + + +# These will be set/accessed from the main module +httpx_clients = {} +_clients_lock = threading.Lock() +threads_list = [] +_threads_list_lock = threading.Lock() + + +# ============================================================================ +# HTTP CLIENT MANAGEMENT +# ============================================================================ + +def get_httpx_client() -> httpx.Client: + """ + Get or create thread-local HTTP client. + Keep-alive is disabled to avoid stale connections with load balancers. + """ + global httpx_clients + thread_id = threading.get_ident() + + with _clients_lock: + if thread_id not in httpx_clients: + # Create client with keep-alive disabled + httpx_clients[thread_id] = httpx.Client( + headers={"Connection": "close"}, # Explicitly request closing + limits=httpx.Limits(max_keepalive_connections=0, max_connections=100) + ) + return httpx_clients[thread_id] + + +def clear_httpx_client(): + """ + Removes the current thread's client from the cache. + Ensures a fresh client (and socket pool) will be created on the next call. + """ + global httpx_clients + thread_id = threading.get_ident() + with _clients_lock: + if thread_id in httpx_clients: + try: + httpx_clients[thread_id].close() + except: + pass + del httpx_clients[thread_id] + + +def get_thread_position(): + """ + Get the position of the current thread in the threads list. + Used for managing progress bar positions in multithreaded environment. + """ + global threads_list + thread_id = threading.get_ident() + with _threads_list_lock: + if thread_id not in threads_list: + threads_list.append(thread_id) + return len(threads_list) - 1 + else: + return threads_list.index(thread_id) + + +# ============================================================================ +# NESTED DATA NAVIGATION +# ============================================================================ + +def get_nested_value(data_structure, path, default=None): + """ + Extracts a value from a nested structure of dictionaries and lists. + Supports a wildcard '*' in the path to retrieve all elements from a list. + + Args: + data_structure: The nested dict/list structure to navigate + path: List of keys/indices to follow. Use '*' for list wildcard. + default: Value to return if path not found + + Returns: + The value at the end of the path, or default if not found + + Examples: + get_nested_value({"a": {"b": 1}}, ["a", "b"]) -> 1 + get_nested_value({"items": [{"x": 1}, {"x": 2}]}, ["items", "*", "x"]) -> [1, 2] + """ + if data_structure is None: + return "$$$$ No Data" + if not path: + return default + + if "*" in path: + wildcard_index = path.index("*") + path_before = path[:wildcard_index] + path_after = path[wildcard_index + 1:] + + def _get_simple_nested_value(ds, p, d): + cl = ds + for k in p: + if isinstance(cl, dict): + cl = cl.get(k) + elif isinstance(cl, list): + try: + if isinstance(k, int) and -len(cl) <= k < len(cl): + cl = cl[k] + else: + return d + except (IndexError, TypeError): + return d + else: + return d + if cl is None: + return d + return cl + + base_level = _get_simple_nested_value(data_structure, path_before, default) + + if not isinstance(base_level, list): + return default + + results = [] + for item in base_level: + value = get_nested_value(item, path_after, default) + if value is not default and value != "$$$$ No Data": + results.append(value) + + # Flatten the results by one level to handle multiple wildcards + final_results = [] + for res in results: + if isinstance(res, list): + final_results.extend(res) + else: + final_results.append(res) + + return final_results + + # No wildcard, original logic (iterative) + current_level = data_structure + for key_or_index in path: + if isinstance(current_level, dict): + current_level = current_level.get(key_or_index) + if current_level is None: + return default + elif isinstance(current_level, list): + try: + if isinstance(key_or_index, int) and -len(current_level) <= key_or_index < len(current_level): + current_level = current_level[key_or_index] + else: + return default + except (IndexError, TypeError): + return default + else: + return default + return current_level + + +# ============================================================================ +# CONFIGURATION UTILITIES +# ============================================================================ + +def get_config_path(): + """ + Gets the correct path to the config folder. + Works for both script execution and PyInstaller executable. + + Returns: + Path to config folder + """ + if getattr(sys, 'frozen', False): + # Running as a PyInstaller bundle + config_folder = CONFIG_FOLDER_NAME + return os.path.join(sys._MEIPASS, config_folder) + else: + # Running as a script + return CONFIG_FOLDER_NAME + + +def get_old_filename(current_filename, old_suffix="_old"): + """Generate old backup filename from current filename. + + Example: "do_requests.json" -> "do_requests_old.json" + + Args: + current_filename: Current file name (e.g., "do_requests.json") + old_suffix: Suffix to append before file extension (default: "_old") + + Returns: + Old backup filename with suffix before extension + """ + name, ext = os.path.splitext(current_filename) + return f"{name}{old_suffix}{ext}"