diff --git a/.gitignore b/.gitignore index b7e4351..9aa9d66 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,6 @@ wheels/ venv/ ENV/ NUL +extract_endoconnect_medical_records.log + +*.json \ No newline at end of file diff --git a/config/config.xlsx b/config/config.xlsx index 823b556..226f282 100644 Binary files a/config/config.xlsx and b/config/config.xlsx differ diff --git a/extract_endoconnect_medical_records.py b/extract_endoconnect_medical_records.py index 8a5ca68..bba8ed3 100644 --- a/extract_endoconnect_medical_records.py +++ b/extract_endoconnect_medical_records.py @@ -91,8 +91,8 @@ CONFIG_VALUES_SHEET_NAME = "Criteria_values" COL_CRITERIA_ID = "criteria_id" COL_CRITERIA_LABEL = "criteria_name" COL_CRITERIA_TYPE = "criteria_type" -COL_CRITERIA_LEVEL1_LABEL = "domaine_name" -COL_CRITERIA_LEVEL2_LABEL = "subdomaine_name" +COL_CRITERIA_LEVEL1_LABEL = "domain_name" +COL_CRITERIA_LEVEL2_LABEL = "subdomain_name" COL_CRITERIA_ORDER = "criteria_order" # Column names - values sheet @@ -123,7 +123,7 @@ WAIT_BEFORE_NEW_BATCH_OF_RETRIES = 20 # CONFIGURATION - LOGGING # ============================================================================ -LOG_LEVEL = logging.INFO +LOG_LEVEL = logging.WARNING LOG_FORMAT = '%(asctime)s - %(levelname)s - %(message)s' @@ -567,7 +567,8 @@ def get_patients(professional_id): timeout=API_TIMEOUT ) response.raise_for_status() - return response.json() + data = response.json() + return data.get("patients", []) @api_call_with_retry @@ -616,18 +617,22 @@ def resolve_criteria_value(criteria_id, raw_value, patient_id="Unknown"): For TEXT/NUMERIC/DATE: use raw value directly (join with " | " if array). For MULTIBOOLEAN/CHECKLIST: lookup value labels from values_config. """ + def _filter_and_join(values): + """Join non-empty string representations, filtering out None and empty strings.""" + return " | ".join(s for v in values if v is not None and (s := str(v).strip())) + config = criteria_config.get(criteria_id) if not config: logging.warning(f"[Patient {patient_id}] Unknown criteria_id: {criteria_id}, raw_value: {raw_value}") if isinstance(raw_value, list): - return " | ".join(str(v) for v in raw_value) + return _filter_and_join(raw_value) return raw_value crit_type = config["type"] if crit_type in ("TEXT", "NUMERIC", "DATE"): if isinstance(raw_value, list): - return " | ".join(str(v) for v in raw_value) + return _filter_and_join(raw_value) return raw_value elif crit_type in ("MULTIBOOLEAN", "CHECKLIST"): @@ -639,7 +644,11 @@ def resolve_criteria_value(criteria_id, raw_value, patient_id="Unknown"): if isinstance(raw_value, list): labels = [] for v in raw_value: + if v is None: + continue v_str = str(v).strip() + if not v_str: + continue label = val_lookup.get(v_str) if label is None: logging.warning(f"[Patient {patient_id}] Unknown value_id '{v_str}' for criteria '{criteria_id}' (label: {config['label']})") @@ -658,7 +667,7 @@ def resolve_criteria_value(criteria_id, raw_value, patient_id="Unknown"): else: logging.warning(f"[Patient {patient_id}] Unknown criteria type '{crit_type}' for criteria '{criteria_id}' (label: {config['label']})") if isinstance(raw_value, list): - return " | ".join(str(v) for v in raw_value) + return _filter_and_join(raw_value) return raw_value @@ -751,7 +760,7 @@ def process_patient(patient): "record_metadata": { "createdAt": patient.get("createdAt", ""), "isFinishMedicalRecord": patient.get("isFinishMedicalRecord", False), - "lastUpdate": patient.get("lasUpdate", ""), + "lastUpdate": patient.get("lastUpdate", ""), "finishOn": patient.get("finishOn", ""), "confirmedEndo": patient.get("confirmedEndo", False), }, @@ -878,7 +887,7 @@ def main(): # ========== EXPORT JSON ========== timestamp = datetime.now().strftime("%Y%m%d-%H%M") - output_filename = f"{OUTPUT_FILE_NAME}-{timestamp}.json" + output_filename = f"{OUTPUT_FILE_NAME}-{professional_id}-{timestamp}.json" with open(output_filename, "w", encoding="utf-8") as f: json.dump(output, f, indent=2, ensure_ascii=False)