Version Fonctionnelle

This commit is contained in:
2025-12-12 23:45:15 +01:00
commit 67a99a490b
3 changed files with 711 additions and 0 deletions

447
.gitignore vendored Normal file
View File

@@ -0,0 +1,447 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py.cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
# Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
# poetry.lock
# poetry.toml
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
# pdm.lock
# pdm.toml
.pdm-python
.pdm-build/
# pixi
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
# pixi.lock
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
# in the .venv directory. It is recommended not to include this directory in version control.
.pixi
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# Redis
*.rdb
*.aof
*.pid
# RabbitMQ
mnesia/
rabbitmq/
rabbitmq-data/
# ActiveMQ
activemq-data/
# SageMath parsed files
*.sage.py
# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
# .idea/
# Abstra
# Abstra is an AI-powered process automation framework.
# Ignore directories containing user credentials, local state, and settings.
# Learn more at https://abstra.io/docs
.abstra/
# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc
# Marimo
marimo/_static/
marimo/_lsp/
__marimo__/
# Streamlit
.streamlit/secrets.toml
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py.cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
# Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
# poetry.lock
# poetry.toml
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
# pdm.lock
# pdm.toml
.pdm-python
.pdm-build/
# pixi
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
# pixi.lock
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
# in the .venv directory. It is recommended not to include this directory in version control.
.pixi
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# Redis
*.rdb
*.aof
*.pid
# RabbitMQ
mnesia/
rabbitmq/
rabbitmq-data/
# ActiveMQ
activemq-data/
# SageMath parsed files
*.sage.py
# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
# .idea/
# Abstra
# Abstra is an AI-powered process automation framework.
# Ignore directories containing user credentials, local state, and settings.
# Learn more at https://abstra.io/docs
.abstra/
# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc
# Marimo
marimo/_static/
marimo/_lsp/
__marimo__/
# Streamlit
.streamlit/secrets.toml
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets
!*.code-workspace
# Built Visual Studio Code Extensions
*.vsix
# PDF Files
*.pdf

259
extract_endoconnect_pdf.py Normal file
View File

@@ -0,0 +1,259 @@
import os
import sys
import time
import requests
import pandas as pd
import openpyxl
import questionary
from rich.console import Console
from rich.logging import RichHandler
from rich.progress import Progress, SpinnerColumn, TextColumn
from rich import print as rprint
from rich.panel import Panel
from datetime import datetime
# -----------------------------------------------------------------------------
# CONSTANTS & CONFIGURATION
# -----------------------------------------------------------------------------
# API Configuration
API_URL = "https://api-endo.ziwig.com"
LOGIN_ENDPOINT = "/api/auth/login"
PDF_ENDPOINT_TEMPLATE = "/api/records/pdf/{}"
# Defaults (User Configurable via Prompt)
DEFAULT_USER_EMAIL = "abdel.lhachimi@gmail.com"
DEFAULT_USER_PASSWORD = "GU$y#C#Cv73XFKyT3j6^"
DEFAULT_EXCEL_PATH = r"E:\Ziwig Drive\Ziwig Health\Data\Patients\Records_Status.xlsm"
DEFAULT_OUTPUT_ROOT = r"Temp PDF"
# Constants (Code Configurable Only)
SHEET_TABLE_NAME = "Records_Status" # Name of the Excel Table (ListObject)
COL_PATIENT_ID = "id" # Column name for Patient ID
COL_PATIENT_NAME = "fullName" # Column name for Patient Name
COL_RECORD_FINISHED = "isFinished" # Column name for boolean flag
# -----------------------------------------------------------------------------
# SETUP COSOLE
# -----------------------------------------------------------------------------
console = Console()
# -----------------------------------------------------------------------------
# FUNCTIONS
# -----------------------------------------------------------------------------
def get_credentials():
"""Prompts for credentials, reiterates on login failure."""
while True:
email = questionary.text("Enter Endoconnect Email:", default=DEFAULT_USER_EMAIL).ask()
if not email:
console.print("[red]User email cannot be empty. Exiting.[/red]")
sys.exit(1)
password = questionary.password("Enter Endoconnect Password:", default=DEFAULT_USER_PASSWORD).ask()
# Verify credentials
with console.status("[bold green]Verifying credentials...") as status:
token = login(email, password)
if token:
console.print("[bold green]Login successful![/bold green]")
return token, email
else:
console.print("[bold red]Login failed. Please try again.[/bold red]")
def login(email, password):
"""Authenticates with the API and returns the token or None."""
url = f"{API_URL}{LOGIN_ENDPOINT}"
payload = {
"email": email,
"password": password,
"rememberMe": None
}
try:
response = requests.post(url, json=payload, timeout=10)
response.raise_for_status()
data = response.json()
return data.get("token")
except requests.exceptions.RequestException as e:
# console.print(f"[red]API Error: {e}[/red]")
return None
def get_excel_table_data(file_path, table_name):
"""
Locates an Excel Table by name in any sheet and returns it as a DataFrame.
"""
try:
wb = openpyxl.load_workbook(file_path, data_only=True)
except Exception as e:
console.print(f"[bold red]Error loading Excel file: {e}[/bold red]")
sys.exit(1)
target_sheet = None
target_range = None
# Search for the table in all sheets
for sheet_name in wb.sheetnames:
ws = wb[sheet_name]
if table_name in ws.tables:
target_sheet = ws
target_range = ws.tables[table_name].ref
break
if not target_sheet:
console.print(f"[bold red]Table '{table_name}' not found in workbook.[/bold red]")
sys.exit(1)
# Extract data from the range
data_rows = []
# ws[target_range] returns a tuple of rows
rows = list(target_sheet[target_range])
if not rows:
return pd.DataFrame()
# First row is header
headers = [cell.value for cell in rows[0]]
for row in rows[1:]:
values = [cell.value for cell in row]
data_rows.append(values)
df = pd.DataFrame(data_rows, columns=headers)
return df
def download_pdf(token, patient_id, output_path, patient_name):
"""Downloads the PDF for a patient."""
url = f"{API_URL}{PDF_ENDPOINT_TEMPLATE.format(patient_id)}"
headers = {"Authorization": f"Bearer {token}"}
start_time = time.time()
try:
response = requests.get(url, headers=headers, stream=True, timeout=30)
response.raise_for_status()
with open(output_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
duration = time.time() - start_time
return True, duration, None
except Exception as e:
return False, 0, str(e)
def sanitize_filename(name):
"""Sanitizes the patient name for use as a filename."""
invalid_chars = '<>:"/\\|?*'
for char in invalid_chars:
name = name.replace(char, '_')
return name.strip()
# -----------------------------------------------------------------------------
# MAIN
# -----------------------------------------------------------------------------
def main():
console.print(Panel.fit("[bold blue]Endoconnect Patient PDF Extractor[/bold blue]"))
# 1. Credentials
token, user_email = get_credentials()
# 2. Configuration (Excel & Output)
excel_path = questionary.path("Path to Excel file:", default=DEFAULT_EXCEL_PATH).ask()
if not os.path.exists(excel_path):
console.print(f"[bold red]File not found: {excel_path}[/bold red]")
sys.exit(1)
# Output Directory
today_str = datetime.now().strftime("PDFs-%Y%m%d")
default_output_dir = os.path.join(DEFAULT_OUTPUT_ROOT, today_str)
output_dir = questionary.path("Output Directory:", default=default_output_dir).ask()
if not os.path.exists(output_dir):
try:
os.makedirs(output_dir)
console.print(f"[green]Created output directory: {output_dir}[/green]")
except Exception as e:
console.print(f"[bold red]Could not create directory: {e}[/bold red]")
sys.exit(1)
console.print() # Spacing
# 3. Read Data
console.print(f"Reading table '{SHEET_TABLE_NAME}' from Excel...")
df = get_excel_table_data(excel_path, SHEET_TABLE_NAME)
console.print() # Spacing
# Validation
required_cols = [COL_PATIENT_ID, COL_PATIENT_NAME, COL_RECORD_FINISHED]
missing_cols = [c for c in required_cols if c not in df.columns]
if missing_cols:
console.print(f"[bold red]Missing columns in table: {', '.join(missing_cols)}[/bold red]")
sys.exit(1)
# Filter
# Ensure record_finished is treated as boolean
# Handle various truthy values just in case (though Excel boolean is usually 1/0 or True/False)
# df[COL_RECORD_FINISHED] = df[COL_RECORD_FINISHED].astype(bool) # Might be risky if nulls
# Safe filtering for True values
patients_to_process = df[df[COL_RECORD_FINISHED] == True] # Direct comparison for boolean or 1
total_patients = len(patients_to_process)
console.print(f"[bold]Found {total_patients} patients to process.[/bold]")
console.print() # Add spacing
if total_patients == 0:
console.print("[yellow]No patients found with record_finished=True. Exiting.[/yellow]")
return
# 4. Processing Loop
# Let's refactor loop to be safer
records = patients_to_process.to_dict('records')
for i, record in enumerate(records, start=1):
p_id = record[COL_PATIENT_ID]
p_name_raw = record[COL_PATIENT_NAME]
# Normalize Name: Remove extra spaces and Title Case
if p_name_raw:
p_name = " ".join(str(p_name_raw).split()).title()
else:
p_name = "Unknown_Patient"
safe_name = sanitize_filename(p_name)
filename = f"{safe_name}.pdf"
file_path = os.path.join(output_dir, filename)
# Display Message with colored filename
# We construct the visible message separately from the status simple text if needed,
# but rich status supports markup.
prefix = f"{i}/{total_patients} -"
msg_colored = f"{prefix} Downloading [bold cyan]{filename}[/bold cyan]"
msg_plain = f"{prefix} Downloading {filename}"
with console.status(msg_colored, spinner="dots") as status:
success, duration, error = download_pdf(token, p_id, file_path, p_name)
if success:
console.print(f"[bold green]✓[/bold green] {msg_colored} ({duration:.2f}s)")
else:
console.print(f"[bold red]✗[/bold red] {msg_colored} [red]ERROR: {error}[/red]")
console.print(Panel("[bold green]Extraction Finished![/bold green]"))
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
console.print("\n[yellow]Script interrupted by user.[/yellow]")
sys.exit(0)
except Exception as e:
console.print(f"\n[bold red]An unexpected error occurred: {e}[/bold red]")
sys.exit(1)
finally:
console.print()
input("Press Enter to close...")

5
requirements.txt Normal file
View File

@@ -0,0 +1,5 @@
requests
rich
questionary
pandas
openpyxl