{ "cells": [ { "cell_type": "code", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true }, "ExecuteTime": { "end_time": "2025-08-11T22:50:03.135959Z", "start_time": "2025-08-11T22:49:26.824618Z" } }, "source": [ "%%time\n", "import pandas as pd\n", "import csv\n", "df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804.txt\",\n", " sep='|', doublequote=False, quoting=csv.QUOTE_NONE, dtype=str, na_values='', keep_default_na=False)\n", "df.index.name = 'index'\n", "df.columns" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 35.9 s\n", "Wall time: 36.3 s\n" ] }, { "data": { "text/plain": [ "Index(['Type d'identifiant PP', 'Identifiant PP',\n", " 'Identification nationale PP', 'Code civilité d'exercice',\n", " 'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n", " 'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n", " 'Libellé profession', 'Code catégorie professionnelle',\n", " 'Libellé catégorie professionnelle', 'Code type savoir-faire',\n", " 'Libellé type savoir-faire', 'Code savoir-faire',\n", " 'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n", " 'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n", " 'Numéro FINESS établissement juridique',\n", " 'Identifiant technique de la structure', 'Raison sociale site',\n", " 'Enseigne commerciale site',\n", " 'Complément destinataire (coord. structure)',\n", " 'Complément point géographique (coord. structure)',\n", " 'Numéro Voie (coord. structure)',\n", " 'Indice répétition voie (coord. structure)',\n", " 'Code type de voie (coord. structure)',\n", " 'Libellé type de voie (coord. structure)',\n", " 'Libellé Voie (coord. structure)',\n", " 'Mention distribution (coord. structure)',\n", " 'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n", " 'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n", " 'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n", " 'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n", " 'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n", " 'Code Département (structure)', 'Libellé Département (structure)',\n", " 'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n", " 'Code secteur d'activité', 'Libellé secteur d'activité',\n", " 'Code section tableau pharmaciens',\n", " 'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n", " 'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n", " dtype='object')" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 38 }, { "cell_type": "code", "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "ExecuteTime": { "end_time": "2025-08-11T22:53:55.986443Z", "start_time": "2025-08-11T22:50:03.157898Z" } }, "source": [ "%%time\n", "df2 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"]+\" \"+df[\"Prénom d'exercice\"]) \\\n", " [['Identifiant PP','Nom_Prénom',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n", " .groupby('Identifiant PP') \\\n", " .filter(lambda f: f['Nom_Prénom'].nunique()>1) \\\n", " .sort_values(['Identifiant PP','index']) \\\n", " [['Identifiant PP',\"Nom d'exercice\",\"Prénom d'exercice\"]]\n", "df2" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 3min 44s\n", "Wall time: 3min 52s\n" ] }, { "data": { "text/plain": [ " Identifiant PP Nom d'exercice Prénom d'exercice\n", "index \n", "1350393 10000034180 DUWAT-GEORGES GHISLAINE\n", "1350394 10000034180 GEORGES GHISLAINE\n", "259 10000040062 MEYER Nicolas\n", "260 10000040062 MEYER Nicolas\n", "809702 10000040062 MEYER NICOLAS\n", "... ... ... ...\n", "2158383 10111077417 D'ELLOY FRANCETTE\n", "537896 10111105358 HOMO Maddy\n", "1889090 10111105358 Homo Maddy\n", "537977 10111110721 ROCHEPEAU Nadège\n", "2158797 10111110721 BARREAU Nadège\n", "\n", "[9059 rows x 3 columns]" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Identifiant PPNom d'exercicePrénom d'exercice
index
135039310000034180DUWAT-GEORGESGHISLAINE
135039410000034180GEORGESGHISLAINE
25910000040062MEYERNicolas
26010000040062MEYERNicolas
80970210000040062MEYERNICOLAS
............
215838310111077417D'ELLOYFRANCETTE
53789610111105358HOMOMaddy
188909010111105358HomoMaddy
53797710111110721ROCHEPEAUNadège
215879710111110721BARREAUNadège
\n", "

9059 rows × 3 columns

\n", "
" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 39 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-11T22:53:56.801020Z", "start_time": "2025-08-11T22:53:56.699295Z" } }, "cell_type": "code", "source": "df2.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_3-1.csv\", sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')", "outputs": [], "execution_count": 40 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-11T22:57:28.643070Z", "start_time": "2025-08-11T22:53:56.870889Z" } }, "cell_type": "code", "source": [ "%%time\n", "df3 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"].str.lower()+\" \"+df[\"Prénom d'exercice\"].str.lower()) \\\n", " [['Identifiant PP','Nom_Prénom',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n", " .groupby('Identifiant PP') \\\n", " .filter(lambda f: f['Nom_Prénom'].nunique()>1) \\\n", " .sort_values(['Identifiant PP','index']) \\\n", " [['Identifiant PP',\"Nom d'exercice\",\"Prénom d'exercice\"]]\n", "df3" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 3min 26s\n", "Wall time: 3min 31s\n" ] }, { "data": { "text/plain": [ " Identifiant PP Nom d'exercice Prénom d'exercice\n", "index \n", "1350393 10000034180 DUWAT-GEORGES GHISLAINE\n", "1350394 10000034180 GEORGES GHISLAINE\n", "1350470 10000046051 STUDER AGNES\n", "1620048 10000046051 JURION AGNES\n", "269964 10000101518 BARREYRE SANDRINE\n", "... ... ... ...\n", "1617156 10110987236 ROGIER MATHILDE\n", "807882 10111077417 DOUVIER FRANCETTE\n", "2158383 10111077417 D'ELLOY FRANCETTE\n", "537977 10111110721 ROCHEPEAU Nadège\n", "2158797 10111110721 BARREAU Nadège\n", "\n", "[5395 rows x 3 columns]" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Identifiant PPNom d'exercicePrénom d'exercice
index
135039310000034180DUWAT-GEORGESGHISLAINE
135039410000034180GEORGESGHISLAINE
135047010000046051STUDERAGNES
162004810000046051JURIONAGNES
26996410000101518BARREYRESANDRINE
............
161715610110987236ROGIERMATHILDE
80788210111077417DOUVIERFRANCETTE
215838310111077417D'ELLOYFRANCETTE
53797710111110721ROCHEPEAUNadège
215879710111110721BARREAUNadège
\n", "

5395 rows × 3 columns

\n", "
" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 41 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-11T22:57:29.038232Z", "start_time": "2025-08-11T22:57:29.014447Z" } }, "cell_type": "code", "source": "df3.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_3-2.csv\", sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')", "outputs": [], "execution_count": 42 } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 4 }