Advanced Retry Management

This commit is contained in:
2026-02-12 18:28:20 +01:00
parent 599360ba34
commit e9dc8015de

View File

@@ -121,6 +121,8 @@ SUBTASKS_POOL_SIZE = 40 # Fixed size for subtasks pool
ERROR_MAX_RETRY = 10 # Max retry attempts for API calls ERROR_MAX_RETRY = 10 # Max retry attempts for API calls
WAIT_BEFORE_RETRY = 0.5 # Delay in seconds between retries (fixed) WAIT_BEFORE_RETRY = 0.5 # Delay in seconds between retries (fixed)
API_TIMEOUT = 60 # Default timeout for API calls (seconds) API_TIMEOUT = 60 # Default timeout for API calls (seconds)
MAX_BATCHS_OF_RETRIES = 3 # Max batches of retries for API calls
WAIT_BEFORE_NEW_BATCH_OF_RETRIES = 5 # Delay in seconds between retry batches
# ============================================================================ # ============================================================================
@@ -166,6 +168,12 @@ _token_refresh_lock = threading.Lock()
main_thread_pool = None main_thread_pool = None
subtasks_thread_pool = None subtasks_thread_pool = None
# User interaction lock
_user_interaction_lock = threading.Lock()
# Thread-local storage for context
thread_local_storage = threading.local()
# Rich console for formatted output # Rich console for formatted output
console = Console() console = Console()
@@ -298,6 +306,30 @@ def get_thread_position():
return threads_list.index(thread_id) return threads_list.index(thread_id)
def clear_httpx_client():
"""
Clear the thread-local HTTP client to force creation of a new one.
Useful for resetting connections after errors.
"""
global httpx_clients
thread_id = threading.get_ident()
if thread_id in httpx_clients:
try:
httpx_clients[thread_id].close()
except Exception:
pass
del httpx_clients[thread_id]
def run_with_context(func, context, *args, **kwargs):
"""
Wrapper to set thread-local context before running a function in a new thread.
Useful for ThreadPoolExecutor where context is lost.
"""
thread_local_storage.current_patient_context = context
return func(*args, **kwargs)
# ============================================================================ # ============================================================================
# AUTHENTICATION # AUTHENTICATION
# ============================================================================ # ============================================================================
@@ -422,51 +454,69 @@ def new_token(app):
# DECORATORS # DECORATORS
# ============================================================================ # ============================================================================
def api_call_with_retry(app): def api_call_with_retry(func):
""" """Decorator for API calls with automatic retry and token refresh on 401 errors"""
Decorator for API calls with automatic retry and token refresh on 401.
Features:
- Retries on network errors (httpx.RequestError)
- Retries on HTTP errors (httpx.HTTPStatusError)
- Automatically refreshes token on 401 Unauthorized
- Configurable retry count and delay
Args:
app: Microservice name for token refresh (e.g., "RC", "GDD")
Usage:
@api_call_with_retry("RC")
def get_organizations():
# API call implementation
...
Raises:
httpx.RequestError: If all retries exhausted
"""
def decorator(func):
@functools.wraps(func) @functools.wraps(func)
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
func_name = func.__name__ func_name = func.__name__
total_attempts = 0
batch_count = 1
while True:
for attempt in range(ERROR_MAX_RETRY): for attempt in range(ERROR_MAX_RETRY):
total_attempts += 1
try: try:
return func(*args, **kwargs) return func(*args, **kwargs)
except (httpx.RequestError, httpx.HTTPStatusError) as exc: except (httpx.RequestError, httpx.HTTPStatusError) as exc:
logging.warning(f"Error in {func_name} (Attempt {attempt + 1}/{ERROR_MAX_RETRY}): {exc}") logging.warning(f"Error in {func_name} (Attempt {total_attempts}): {exc}")
# Refresh the thread-local client if an error occurs
# to avoid potential pool corruption or stale connections
clear_httpx_client()
# Auto-refresh token on 401
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 401: if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 401:
logging.info(f"Token expired for {func_name}. Refreshing token for {app}.") logging.info(f"Token expired for {func_name}. Refreshing token.")
new_token(app) new_token()
if attempt < ERROR_MAX_RETRY - 1: if attempt < ERROR_MAX_RETRY - 1:
sleep(WAIT_BEFORE_RETRY) sleep(WAIT_BEFORE_RETRY)
else:
# Max retries reached for this batch
if batch_count < MAX_BATCHS_OF_RETRIES:
logging.warning(f"Batch {batch_count}/{MAX_BATCHS_OF_RETRIES} failed for {func_name}. "
f"Waiting {WAIT_BEFORE_NEW_BATCH_OF_RETRIES}s before automatic retry batch.")
batch_count += 1
sleep(WAIT_BEFORE_NEW_BATCH_OF_RETRIES)
break # Exit for loop to restart batch in while True
else:
# All automatic batches exhausted, ask the user
with _user_interaction_lock:
console.print(f"\n[bold red]Persistent error in {func_name} after {batch_count} batches ({total_attempts} attempts).[/bold red]")
console.print(f"[red]Exception: {exc}[/red]")
logging.critical(f"Persistent error in {func_name} after {ERROR_MAX_RETRY} attempts.") choice = questionary.select(
raise httpx.RequestError(message=f"Persistent error in {func_name}") f"What would you like to do for {func_name}?",
choices=[
"Retry (try another batch of retries)",
"Ignore (return None and continue)",
"Stop script (critical error)"
]
).ask()
if choice == "Retry (try another batch of retries)":
logging.info(f"User chose to retry {func_name}. Restarting batch sequence.")
batch_count = 1 # Reset batch counter for the next interactive round
break # Exit for loop to restart batch in while True
elif choice == "Ignore (return None and continue)":
# Retrieve context if available
ctx = getattr(thread_local_storage, "current_patient_context", {"id": "Unknown", "pseudo": "Unknown"})
logging.warning(f"[IGNORE] User opted to skip {func_name} for Patient {ctx['id']} ({ctx['pseudo']}). Error: {exc}")
return None
else:
logging.critical(f"User chose to stop script after persistent error in {func_name}.")
raise httpx.RequestError(message=f"Persistent error in {func_name} (stopped by user)")
return wrapper return wrapper
return decorator
# ============================================================================ # ============================================================================
@@ -542,7 +592,7 @@ def main():
4. Main processing block (TODO: implement your logic here) 4. Main processing block (TODO: implement your logic here)
5. Finalization (elapsed time) 5. Finalization (elapsed time)
""" """
global main_thread_pool, subtasks_thread_pool global main_thread_pool, subtasks_thread_pool, thread_local_storage
# ========== AUTHENTICATION ========== # ========== AUTHENTICATION ==========
print() print()
@@ -578,11 +628,19 @@ def main():
# Example pattern with progress bar and multithreading: # Example pattern with progress bar and multithreading:
# #
# items = [...] # Your data to process # items = [...] # Your data to process
# futures = []
# #
# with tqdm(total=len(items), desc="Processing items", # with tqdm(total=len(items), desc="Processing items",
# bar_format=custom_bar_format) as pbar: # bar_format=custom_bar_format) as pbar:
# with main_thread_pool as executor: # with main_thread_pool as executor:
# futures = [executor.submit(process_item, item) for item in items] #
# for item in items:
#
# # Set thread-local context for detailed error logging in decorators
# ctx = {"id": patient_id, "pseudo": pseudo}
# thread_local_storage.current_patient_context = ctx
#
# futures.append(executor.submit(run_with_context, process_item, ctx, item))
# #
# for future in as_completed(futures): # for future in as_completed(futures):
# try: # try: