{ "cells": [ { "cell_type": "code", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2025-08-12T19:54:05.652708Z", "start_time": "2025-08-12T19:53:30.037989Z" } }, "source": [ "%%time\n", "import pandas as pd\n", "import csv\n", "df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804.txt\", sep='|',\n", " doublequote=False, quoting=csv.QUOTE_NONE, dtype=str, na_values='', keep_default_na=False)\n", "df.index.name = 'index'\n", "df.columns" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 35 s\n", "Wall time: 35.6 s\n" ] }, { "data": { "text/plain": [ "Index(['Type d'identifiant PP', 'Identifiant PP',\n", " 'Identification nationale PP', 'Code civilité d'exercice',\n", " 'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n", " 'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n", " 'Libellé profession', 'Code catégorie professionnelle',\n", " 'Libellé catégorie professionnelle', 'Code type savoir-faire',\n", " 'Libellé type savoir-faire', 'Code savoir-faire',\n", " 'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n", " 'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n", " 'Numéro FINESS établissement juridique',\n", " 'Identifiant technique de la structure', 'Raison sociale site',\n", " 'Enseigne commerciale site',\n", " 'Complément destinataire (coord. structure)',\n", " 'Complément point géographique (coord. structure)',\n", " 'Numéro Voie (coord. structure)',\n", " 'Indice répétition voie (coord. structure)',\n", " 'Code type de voie (coord. structure)',\n", " 'Libellé type de voie (coord. structure)',\n", " 'Libellé Voie (coord. structure)',\n", " 'Mention distribution (coord. structure)',\n", " 'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n", " 'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n", " 'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n", " 'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n", " 'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n", " 'Code Département (structure)', 'Libellé Département (structure)',\n", " 'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n", " 'Code secteur d'activité', 'Libellé secteur d'activité',\n", " 'Code section tableau pharmaciens',\n", " 'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n", " 'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n", " dtype='object')" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 19 }, { "cell_type": "code", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2025-08-12T19:54:12.829107Z", "start_time": "2025-08-12T19:54:05.751406Z" } }, "source": [ "%%time\n", "df2 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"]+\" \"+df[\"Prénom d'exercice\"]) \\\n", " .groupby('Identifiant PP')[['Nom_Prénom']] \\\n", " .transform('nunique').rename(columns={'Nom_Prénom' : 'Count'}).query('Count > 1') \\\n", " .join(df)[['Identifiant PP','Count',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n", " .sort_values(['Identifiant PP', 'index'])\n", "\n", "df2" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 6.81 s\n", "Wall time: 7.06 s\n" ] }, { "data": { "text/plain": [ " Identifiant PP Count Nom d'exercice Prénom d'exercice\n", "index \n", "1350393 10000034180 2 DUWAT-GEORGES GHISLAINE\n", "1350394 10000034180 2 GEORGES GHISLAINE\n", "259 10000040062 2 MEYER Nicolas\n", "260 10000040062 2 MEYER Nicolas\n", "809702 10000040062 2 MEYER NICOLAS\n", "... ... ... ... ...\n", "2158383 10111077417 2 D'ELLOY FRANCETTE\n", "537896 10111105358 2 HOMO Maddy\n", "1889090 10111105358 2 Homo Maddy\n", "537977 10111110721 2 ROCHEPEAU Nadège\n", "2158797 10111110721 2 BARREAU Nadège\n", "\n", "[9059 rows x 4 columns]" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Identifiant PPCountNom d'exercicePrénom d'exercice
index
1350393100000341802DUWAT-GEORGESGHISLAINE
1350394100000341802GEORGESGHISLAINE
259100000400622MEYERNicolas
260100000400622MEYERNicolas
809702100000400622MEYERNICOLAS
...............
2158383101110774172D'ELLOYFRANCETTE
537896101111053582HOMOMaddy
1889090101111053582HomoMaddy
537977101111107212ROCHEPEAUNadège
2158797101111107212BARREAUNadège
\n", "

9059 rows × 4 columns

\n", "
" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 20 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-12T19:54:13.114103Z", "start_time": "2025-08-12T19:54:13.063080Z" } }, "cell_type": "code", "source": [ "df2.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_2-1.csv\",\n", " sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')" ], "outputs": [], "execution_count": 21 }, { "cell_type": "code", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2025-08-12T19:54:20.671679Z", "start_time": "2025-08-12T19:54:13.377047Z" } }, "source": [ "%%time\n", "df3 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"].str.lower()+\" \"+df[\"Prénom d'exercice\"].str.lower()) \\\n", " .groupby('Identifiant PP')[['Nom_Prénom']] \\\n", " .transform('nunique').rename(columns={'Nom_Prénom' : 'Count'}).query('Count > 1') \\\n", " .join(df)[['Identifiant PP','Count',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n", " .sort_values(['Identifiant PP', 'index'])\n", "df3" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 6.97 s\n", "Wall time: 7.28 s\n" ] }, { "data": { "text/plain": [ " Identifiant PP Count Nom d'exercice Prénom d'exercice\n", "index \n", "1350393 10000034180 2 DUWAT-GEORGES GHISLAINE\n", "1350394 10000034180 2 GEORGES GHISLAINE\n", "1350470 10000046051 2 STUDER AGNES\n", "1620048 10000046051 2 JURION AGNES\n", "269964 10000101518 2 BARREYRE SANDRINE\n", "... ... ... ... ...\n", "1617156 10110987236 2 ROGIER MATHILDE\n", "807882 10111077417 2 DOUVIER FRANCETTE\n", "2158383 10111077417 2 D'ELLOY FRANCETTE\n", "537977 10111110721 2 ROCHEPEAU Nadège\n", "2158797 10111110721 2 BARREAU Nadège\n", "\n", "[5395 rows x 4 columns]" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Identifiant PPCountNom d'exercicePrénom d'exercice
index
1350393100000341802DUWAT-GEORGESGHISLAINE
1350394100000341802GEORGESGHISLAINE
1350470100000460512STUDERAGNES
1620048100000460512JURIONAGNES
269964100001015182BARREYRESANDRINE
...............
1617156101109872362ROGIERMATHILDE
807882101110774172DOUVIERFRANCETTE
2158383101110774172D'ELLOYFRANCETTE
537977101111107212ROCHEPEAUNadège
2158797101111107212BARREAUNadège
\n", "

5395 rows × 4 columns

\n", "
" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 22 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-12T19:54:21.102182Z", "start_time": "2025-08-12T19:54:21.072806Z" } }, "cell_type": "code", "source": [ "df3.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_2-2.csv\",\n", " sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')" ], "outputs": [], "execution_count": 23 } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 4 }