Advanced Retry Management
This commit is contained in:
@@ -121,6 +121,8 @@ SUBTASKS_POOL_SIZE = 40 # Fixed size for subtasks pool
|
|||||||
ERROR_MAX_RETRY = 10 # Max retry attempts for API calls
|
ERROR_MAX_RETRY = 10 # Max retry attempts for API calls
|
||||||
WAIT_BEFORE_RETRY = 0.5 # Delay in seconds between retries (fixed)
|
WAIT_BEFORE_RETRY = 0.5 # Delay in seconds between retries (fixed)
|
||||||
API_TIMEOUT = 60 # Default timeout for API calls (seconds)
|
API_TIMEOUT = 60 # Default timeout for API calls (seconds)
|
||||||
|
MAX_BATCHS_OF_RETRIES = 3 # Max batches of retries for API calls
|
||||||
|
WAIT_BEFORE_NEW_BATCH_OF_RETRIES = 5 # Delay in seconds between retry batches
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@@ -166,6 +168,12 @@ _token_refresh_lock = threading.Lock()
|
|||||||
main_thread_pool = None
|
main_thread_pool = None
|
||||||
subtasks_thread_pool = None
|
subtasks_thread_pool = None
|
||||||
|
|
||||||
|
# User interaction lock
|
||||||
|
_user_interaction_lock = threading.Lock()
|
||||||
|
|
||||||
|
# Thread-local storage for context
|
||||||
|
thread_local_storage = threading.local()
|
||||||
|
|
||||||
# Rich console for formatted output
|
# Rich console for formatted output
|
||||||
console = Console()
|
console = Console()
|
||||||
|
|
||||||
@@ -298,6 +306,30 @@ def get_thread_position():
|
|||||||
return threads_list.index(thread_id)
|
return threads_list.index(thread_id)
|
||||||
|
|
||||||
|
|
||||||
|
def clear_httpx_client():
|
||||||
|
"""
|
||||||
|
Clear the thread-local HTTP client to force creation of a new one.
|
||||||
|
Useful for resetting connections after errors.
|
||||||
|
"""
|
||||||
|
global httpx_clients
|
||||||
|
thread_id = threading.get_ident()
|
||||||
|
if thread_id in httpx_clients:
|
||||||
|
try:
|
||||||
|
httpx_clients[thread_id].close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
del httpx_clients[thread_id]
|
||||||
|
|
||||||
|
|
||||||
|
def run_with_context(func, context, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Wrapper to set thread-local context before running a function in a new thread.
|
||||||
|
Useful for ThreadPoolExecutor where context is lost.
|
||||||
|
"""
|
||||||
|
thread_local_storage.current_patient_context = context
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# AUTHENTICATION
|
# AUTHENTICATION
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@@ -422,51 +454,69 @@ def new_token(app):
|
|||||||
# DECORATORS
|
# DECORATORS
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
def api_call_with_retry(app):
|
def api_call_with_retry(func):
|
||||||
"""
|
"""Decorator for API calls with automatic retry and token refresh on 401 errors"""
|
||||||
Decorator for API calls with automatic retry and token refresh on 401.
|
|
||||||
|
|
||||||
Features:
|
|
||||||
- Retries on network errors (httpx.RequestError)
|
|
||||||
- Retries on HTTP errors (httpx.HTTPStatusError)
|
|
||||||
- Automatically refreshes token on 401 Unauthorized
|
|
||||||
- Configurable retry count and delay
|
|
||||||
|
|
||||||
Args:
|
|
||||||
app: Microservice name for token refresh (e.g., "RC", "GDD")
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
@api_call_with_retry("RC")
|
|
||||||
def get_organizations():
|
|
||||||
# API call implementation
|
|
||||||
...
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
httpx.RequestError: If all retries exhausted
|
|
||||||
"""
|
|
||||||
def decorator(func):
|
|
||||||
@functools.wraps(func)
|
@functools.wraps(func)
|
||||||
def wrapper(*args, **kwargs):
|
def wrapper(*args, **kwargs):
|
||||||
func_name = func.__name__
|
func_name = func.__name__
|
||||||
|
total_attempts = 0
|
||||||
|
batch_count = 1
|
||||||
|
|
||||||
|
while True:
|
||||||
for attempt in range(ERROR_MAX_RETRY):
|
for attempt in range(ERROR_MAX_RETRY):
|
||||||
|
total_attempts += 1
|
||||||
try:
|
try:
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
except (httpx.RequestError, httpx.HTTPStatusError) as exc:
|
except (httpx.RequestError, httpx.HTTPStatusError) as exc:
|
||||||
logging.warning(f"Error in {func_name} (Attempt {attempt + 1}/{ERROR_MAX_RETRY}): {exc}")
|
logging.warning(f"Error in {func_name} (Attempt {total_attempts}): {exc}")
|
||||||
|
|
||||||
|
# Refresh the thread-local client if an error occurs
|
||||||
|
# to avoid potential pool corruption or stale connections
|
||||||
|
clear_httpx_client()
|
||||||
|
|
||||||
# Auto-refresh token on 401
|
|
||||||
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 401:
|
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 401:
|
||||||
logging.info(f"Token expired for {func_name}. Refreshing token for {app}.")
|
logging.info(f"Token expired for {func_name}. Refreshing token.")
|
||||||
new_token(app)
|
new_token()
|
||||||
|
|
||||||
if attempt < ERROR_MAX_RETRY - 1:
|
if attempt < ERROR_MAX_RETRY - 1:
|
||||||
sleep(WAIT_BEFORE_RETRY)
|
sleep(WAIT_BEFORE_RETRY)
|
||||||
|
else:
|
||||||
|
# Max retries reached for this batch
|
||||||
|
if batch_count < MAX_BATCHS_OF_RETRIES:
|
||||||
|
logging.warning(f"Batch {batch_count}/{MAX_BATCHS_OF_RETRIES} failed for {func_name}. "
|
||||||
|
f"Waiting {WAIT_BEFORE_NEW_BATCH_OF_RETRIES}s before automatic retry batch.")
|
||||||
|
batch_count += 1
|
||||||
|
sleep(WAIT_BEFORE_NEW_BATCH_OF_RETRIES)
|
||||||
|
break # Exit for loop to restart batch in while True
|
||||||
|
else:
|
||||||
|
# All automatic batches exhausted, ask the user
|
||||||
|
with _user_interaction_lock:
|
||||||
|
console.print(f"\n[bold red]Persistent error in {func_name} after {batch_count} batches ({total_attempts} attempts).[/bold red]")
|
||||||
|
console.print(f"[red]Exception: {exc}[/red]")
|
||||||
|
|
||||||
logging.critical(f"Persistent error in {func_name} after {ERROR_MAX_RETRY} attempts.")
|
choice = questionary.select(
|
||||||
raise httpx.RequestError(message=f"Persistent error in {func_name}")
|
f"What would you like to do for {func_name}?",
|
||||||
|
choices=[
|
||||||
|
"Retry (try another batch of retries)",
|
||||||
|
"Ignore (return None and continue)",
|
||||||
|
"Stop script (critical error)"
|
||||||
|
]
|
||||||
|
).ask()
|
||||||
|
|
||||||
|
if choice == "Retry (try another batch of retries)":
|
||||||
|
logging.info(f"User chose to retry {func_name}. Restarting batch sequence.")
|
||||||
|
batch_count = 1 # Reset batch counter for the next interactive round
|
||||||
|
break # Exit for loop to restart batch in while True
|
||||||
|
elif choice == "Ignore (return None and continue)":
|
||||||
|
# Retrieve context if available
|
||||||
|
ctx = getattr(thread_local_storage, "current_patient_context", {"id": "Unknown", "pseudo": "Unknown"})
|
||||||
|
logging.warning(f"[IGNORE] User opted to skip {func_name} for Patient {ctx['id']} ({ctx['pseudo']}). Error: {exc}")
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
logging.critical(f"User chose to stop script after persistent error in {func_name}.")
|
||||||
|
raise httpx.RequestError(message=f"Persistent error in {func_name} (stopped by user)")
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
return decorator
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@@ -542,7 +592,7 @@ def main():
|
|||||||
4. Main processing block (TODO: implement your logic here)
|
4. Main processing block (TODO: implement your logic here)
|
||||||
5. Finalization (elapsed time)
|
5. Finalization (elapsed time)
|
||||||
"""
|
"""
|
||||||
global main_thread_pool, subtasks_thread_pool
|
global main_thread_pool, subtasks_thread_pool, thread_local_storage
|
||||||
|
|
||||||
# ========== AUTHENTICATION ==========
|
# ========== AUTHENTICATION ==========
|
||||||
print()
|
print()
|
||||||
@@ -578,11 +628,19 @@ def main():
|
|||||||
# Example pattern with progress bar and multithreading:
|
# Example pattern with progress bar and multithreading:
|
||||||
#
|
#
|
||||||
# items = [...] # Your data to process
|
# items = [...] # Your data to process
|
||||||
|
# futures = []
|
||||||
#
|
#
|
||||||
# with tqdm(total=len(items), desc="Processing items",
|
# with tqdm(total=len(items), desc="Processing items",
|
||||||
# bar_format=custom_bar_format) as pbar:
|
# bar_format=custom_bar_format) as pbar:
|
||||||
# with main_thread_pool as executor:
|
# with main_thread_pool as executor:
|
||||||
# futures = [executor.submit(process_item, item) for item in items]
|
#
|
||||||
|
# for item in items:
|
||||||
|
#
|
||||||
|
# # Set thread-local context for detailed error logging in decorators
|
||||||
|
# ctx = {"id": patient_id, "pseudo": pseudo}
|
||||||
|
# thread_local_storage.current_patient_context = ctx
|
||||||
|
#
|
||||||
|
# futures.append(executor.submit(run_with_context, process_item, ctx, item))
|
||||||
#
|
#
|
||||||
# for future in as_completed(futures):
|
# for future in as_completed(futures):
|
||||||
# try:
|
# try:
|
||||||
|
|||||||
Reference in New Issue
Block a user