{ "cells": [ { "cell_type": "code", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2025-08-12T19:54:05.652708Z", "start_time": "2025-08-12T19:53:30.037989Z" } }, "source": [ "%%time\n", "import pandas as pd\n", "import csv\n", "df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804.txt\", sep='|',\n", " doublequote=False, quoting=csv.QUOTE_NONE, dtype=str, na_values='', keep_default_na=False)\n", "df.index.name = 'index'\n", "df.columns" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 35 s\n", "Wall time: 35.6 s\n" ] }, { "data": { "text/plain": [ "Index(['Type d'identifiant PP', 'Identifiant PP',\n", " 'Identification nationale PP', 'Code civilité d'exercice',\n", " 'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n", " 'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n", " 'Libellé profession', 'Code catégorie professionnelle',\n", " 'Libellé catégorie professionnelle', 'Code type savoir-faire',\n", " 'Libellé type savoir-faire', 'Code savoir-faire',\n", " 'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n", " 'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n", " 'Numéro FINESS établissement juridique',\n", " 'Identifiant technique de la structure', 'Raison sociale site',\n", " 'Enseigne commerciale site',\n", " 'Complément destinataire (coord. structure)',\n", " 'Complément point géographique (coord. structure)',\n", " 'Numéro Voie (coord. structure)',\n", " 'Indice répétition voie (coord. structure)',\n", " 'Code type de voie (coord. structure)',\n", " 'Libellé type de voie (coord. structure)',\n", " 'Libellé Voie (coord. structure)',\n", " 'Mention distribution (coord. structure)',\n", " 'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n", " 'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n", " 'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n", " 'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n", " 'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n", " 'Code Département (structure)', 'Libellé Département (structure)',\n", " 'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n", " 'Code secteur d'activité', 'Libellé secteur d'activité',\n", " 'Code section tableau pharmaciens',\n", " 'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n", " 'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n", " dtype='object')" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 19 }, { "cell_type": "code", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2025-08-12T19:54:12.829107Z", "start_time": "2025-08-12T19:54:05.751406Z" } }, "source": [ "%%time\n", "df2 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"]+\" \"+df[\"Prénom d'exercice\"]) \\\n", " .groupby('Identifiant PP')[['Nom_Prénom']] \\\n", " .transform('nunique').rename(columns={'Nom_Prénom' : 'Count'}).query('Count > 1') \\\n", " .join(df)[['Identifiant PP','Count',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n", " .sort_values(['Identifiant PP', 'index'])\n", "\n", "df2" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 6.81 s\n", "Wall time: 7.06 s\n" ] }, { "data": { "text/plain": [ " Identifiant PP Count Nom d'exercice Prénom d'exercice\n", "index \n", "1350393 10000034180 2 DUWAT-GEORGES GHISLAINE\n", "1350394 10000034180 2 GEORGES GHISLAINE\n", "259 10000040062 2 MEYER Nicolas\n", "260 10000040062 2 MEYER Nicolas\n", "809702 10000040062 2 MEYER NICOLAS\n", "... ... ... ... ...\n", "2158383 10111077417 2 D'ELLOY FRANCETTE\n", "537896 10111105358 2 HOMO Maddy\n", "1889090 10111105358 2 Homo Maddy\n", "537977 10111110721 2 ROCHEPEAU Nadège\n", "2158797 10111110721 2 BARREAU Nadège\n", "\n", "[9059 rows x 4 columns]" ], "text/html": [ "
| \n", " | Identifiant PP | \n", "Count | \n", "Nom d'exercice | \n", "Prénom d'exercice | \n", "
|---|---|---|---|---|
| index | \n", "\n", " | \n", " | \n", " | \n", " |
| 1350393 | \n", "10000034180 | \n", "2 | \n", "DUWAT-GEORGES | \n", "GHISLAINE | \n", "
| 1350394 | \n", "10000034180 | \n", "2 | \n", "GEORGES | \n", "GHISLAINE | \n", "
| 259 | \n", "10000040062 | \n", "2 | \n", "MEYER | \n", "Nicolas | \n", "
| 260 | \n", "10000040062 | \n", "2 | \n", "MEYER | \n", "Nicolas | \n", "
| 809702 | \n", "10000040062 | \n", "2 | \n", "MEYER | \n", "NICOLAS | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 2158383 | \n", "10111077417 | \n", "2 | \n", "D'ELLOY | \n", "FRANCETTE | \n", "
| 537896 | \n", "10111105358 | \n", "2 | \n", "HOMO | \n", "Maddy | \n", "
| 1889090 | \n", "10111105358 | \n", "2 | \n", "Homo | \n", "Maddy | \n", "
| 537977 | \n", "10111110721 | \n", "2 | \n", "ROCHEPEAU | \n", "Nadège | \n", "
| 2158797 | \n", "10111110721 | \n", "2 | \n", "BARREAU | \n", "Nadège | \n", "
9059 rows × 4 columns
\n", "| \n", " | Identifiant PP | \n", "Count | \n", "Nom d'exercice | \n", "Prénom d'exercice | \n", "
|---|---|---|---|---|
| index | \n", "\n", " | \n", " | \n", " | \n", " |
| 1350393 | \n", "10000034180 | \n", "2 | \n", "DUWAT-GEORGES | \n", "GHISLAINE | \n", "
| 1350394 | \n", "10000034180 | \n", "2 | \n", "GEORGES | \n", "GHISLAINE | \n", "
| 1350470 | \n", "10000046051 | \n", "2 | \n", "STUDER | \n", "AGNES | \n", "
| 1620048 | \n", "10000046051 | \n", "2 | \n", "JURION | \n", "AGNES | \n", "
| 269964 | \n", "10000101518 | \n", "2 | \n", "BARREYRE | \n", "SANDRINE | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 1617156 | \n", "10110987236 | \n", "2 | \n", "ROGIER | \n", "MATHILDE | \n", "
| 807882 | \n", "10111077417 | \n", "2 | \n", "DOUVIER | \n", "FRANCETTE | \n", "
| 2158383 | \n", "10111077417 | \n", "2 | \n", "D'ELLOY | \n", "FRANCETTE | \n", "
| 537977 | \n", "10111110721 | \n", "2 | \n", "ROCHEPEAU | \n", "Nadège | \n", "
| 2158797 | \n", "10111110721 | \n", "2 | \n", "BARREAU | \n", "Nadège | \n", "
5395 rows × 4 columns
\n", "