professionals_from_sante_fr/Professionals_Multiple_Names2.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "metadata": {
    "tags": [],
    "ExecuteTime": {
     "end_time": "2025-08-12T19:54:05.652708Z",
     "start_time": "2025-08-12T19:53:30.037989Z"
    }
   },
   "source": [
    "%%time\n",
    "import pandas as pd\n",
    "import csv\n",
    "df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804.txt\", sep='|',\n",
    "                 doublequote=False, quoting=csv.QUOTE_NONE, dtype=str, na_values='', keep_default_na=False)\n",
    "df.index.name = 'index'\n",
    "df.columns"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 35 s\n",
      "Wall time: 35.6 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index(['Type d'identifiant PP', 'Identifiant PP',\n",
       "       'Identification nationale PP', 'Code civilité d'exercice',\n",
       "       'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n",
       "       'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n",
       "       'Libellé profession', 'Code catégorie professionnelle',\n",
       "       'Libellé catégorie professionnelle', 'Code type savoir-faire',\n",
       "       'Libellé type savoir-faire', 'Code savoir-faire',\n",
       "       'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n",
       "       'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n",
       "       'Numéro FINESS établissement juridique',\n",
       "       'Identifiant technique de la structure', 'Raison sociale site',\n",
       "       'Enseigne commerciale site',\n",
       "       'Complément destinataire (coord. structure)',\n",
       "       'Complément point géographique (coord. structure)',\n",
       "       'Numéro Voie (coord. structure)',\n",
       "       'Indice répétition voie (coord. structure)',\n",
       "       'Code type de voie (coord. structure)',\n",
       "       'Libellé type de voie (coord. structure)',\n",
       "       'Libellé Voie (coord. structure)',\n",
       "       'Mention distribution (coord. structure)',\n",
       "       'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n",
       "       'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n",
       "       'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n",
       "       'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n",
       "       'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n",
       "       'Code Département (structure)', 'Libellé Département (structure)',\n",
       "       'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n",
       "       'Code secteur d'activité', 'Libellé secteur d'activité',\n",
       "       'Code section tableau pharmaciens',\n",
       "       'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n",
       "       'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 19
  },
  {
   "cell_type": "code",
   "metadata": {
    "tags": [],
    "ExecuteTime": {
     "end_time": "2025-08-12T19:54:12.829107Z",
     "start_time": "2025-08-12T19:54:05.751406Z"
    }
   },
   "source": [
    "%%time\n",
    "df2 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"]+\" \"+df[\"Prénom d'exercice\"]) \\\n",
    "        .groupby('Identifiant PP')[['Nom_Prénom']] \\\n",
    "        .transform('nunique').rename(columns={'Nom_Prénom' : 'Count'}).query('Count > 1') \\\n",
    "        .join(df)[['Identifiant PP','Count',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
    "        .sort_values(['Identifiant PP', 'index'])\n",
    "\n",
    "df2"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 6.81 s\n",
      "Wall time: 7.06 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "        Identifiant PP  Count Nom d'exercice Prénom d'exercice\n",
       "index                                                         \n",
       "1350393    10000034180      2  DUWAT-GEORGES         GHISLAINE\n",
       "1350394    10000034180      2        GEORGES         GHISLAINE\n",
       "259        10000040062      2          MEYER           Nicolas\n",
       "260        10000040062      2          MEYER           Nicolas\n",
       "809702     10000040062      2          MEYER           NICOLAS\n",
       "...                ...    ...            ...               ...\n",
       "2158383    10111077417      2        D'ELLOY         FRANCETTE\n",
       "537896     10111105358      2           HOMO             Maddy\n",
       "1889090    10111105358      2           Homo             Maddy\n",
       "537977     10111110721      2      ROCHEPEAU            Nadège\n",
       "2158797    10111110721      2        BARREAU            Nadège\n",
       "\n",
       "[9059 rows x 4 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Identifiant PP</th>\n",
       "      <th>Count</th>\n",
       "      <th>Nom d'exercice</th>\n",
       "      <th>Prénom d'exercice</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1350393</th>\n",
       "      <td>10000034180</td>\n",
       "      <td>2</td>\n",
       "      <td>DUWAT-GEORGES</td>\n",
       "      <td>GHISLAINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1350394</th>\n",
       "      <td>10000034180</td>\n",
       "      <td>2</td>\n",
       "      <td>GEORGES</td>\n",
       "      <td>GHISLAINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>259</th>\n",
       "      <td>10000040062</td>\n",
       "      <td>2</td>\n",
       "      <td>MEYER</td>\n",
       "      <td>Nicolas</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>260</th>\n",
       "      <td>10000040062</td>\n",
       "      <td>2</td>\n",
       "      <td>MEYER</td>\n",
       "      <td>Nicolas</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>809702</th>\n",
       "      <td>10000040062</td>\n",
       "      <td>2</td>\n",
       "      <td>MEYER</td>\n",
       "      <td>NICOLAS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2158383</th>\n",
       "      <td>10111077417</td>\n",
       "      <td>2</td>\n",
       "      <td>D'ELLOY</td>\n",
       "      <td>FRANCETTE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>537896</th>\n",
       "      <td>10111105358</td>\n",
       "      <td>2</td>\n",
       "      <td>HOMO</td>\n",
       "      <td>Maddy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1889090</th>\n",
       "      <td>10111105358</td>\n",
       "      <td>2</td>\n",
       "      <td>Homo</td>\n",
       "      <td>Maddy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>537977</th>\n",
       "      <td>10111110721</td>\n",
       "      <td>2</td>\n",
       "      <td>ROCHEPEAU</td>\n",
       "      <td>Nadège</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2158797</th>\n",
       "      <td>10111110721</td>\n",
       "      <td>2</td>\n",
       "      <td>BARREAU</td>\n",
       "      <td>Nadège</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9059 rows × 4 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 20
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-08-12T19:54:13.114103Z",
     "start_time": "2025-08-12T19:54:13.063080Z"
    }
   },
   "cell_type": "code",
   "source": [
    "df2.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_2-1.csv\",\n",
    "           sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')"
   ],
   "outputs": [],
   "execution_count": 21
  },
  {
   "cell_type": "code",
   "metadata": {
    "tags": [],
    "ExecuteTime": {
     "end_time": "2025-08-12T19:54:20.671679Z",
     "start_time": "2025-08-12T19:54:13.377047Z"
    }
   },
   "source": [
    "%%time\n",
    "df3 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"].str.lower()+\" \"+df[\"Prénom d'exercice\"].str.lower()) \\\n",
    "        .groupby('Identifiant PP')[['Nom_Prénom']] \\\n",
    "        .transform('nunique').rename(columns={'Nom_Prénom' : 'Count'}).query('Count > 1') \\\n",
    "        .join(df)[['Identifiant PP','Count',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
    "        .sort_values(['Identifiant PP', 'index'])\n",
    "df3"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 6.97 s\n",
      "Wall time: 7.28 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "        Identifiant PP  Count Nom d'exercice Prénom d'exercice\n",
       "index                                                         \n",
       "1350393    10000034180      2  DUWAT-GEORGES         GHISLAINE\n",
       "1350394    10000034180      2        GEORGES         GHISLAINE\n",
       "1350470    10000046051      2         STUDER             AGNES\n",
       "1620048    10000046051      2         JURION             AGNES\n",
       "269964     10000101518      2       BARREYRE          SANDRINE\n",
       "...                ...    ...            ...               ...\n",
       "1617156    10110987236      2         ROGIER          MATHILDE\n",
       "807882     10111077417      2        DOUVIER         FRANCETTE\n",
       "2158383    10111077417      2        D'ELLOY         FRANCETTE\n",
       "537977     10111110721      2      ROCHEPEAU            Nadège\n",
       "2158797    10111110721      2        BARREAU            Nadège\n",
       "\n",
       "[5395 rows x 4 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Identifiant PP</th>\n",
       "      <th>Count</th>\n",
       "      <th>Nom d'exercice</th>\n",
       "      <th>Prénom d'exercice</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1350393</th>\n",
       "      <td>10000034180</td>\n",
       "      <td>2</td>\n",
       "      <td>DUWAT-GEORGES</td>\n",
       "      <td>GHISLAINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1350394</th>\n",
       "      <td>10000034180</td>\n",
       "      <td>2</td>\n",
       "      <td>GEORGES</td>\n",
       "      <td>GHISLAINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1350470</th>\n",
       "      <td>10000046051</td>\n",
       "      <td>2</td>\n",
       "      <td>STUDER</td>\n",
       "      <td>AGNES</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1620048</th>\n",
       "      <td>10000046051</td>\n",
       "      <td>2</td>\n",
       "      <td>JURION</td>\n",
       "      <td>AGNES</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>269964</th>\n",
       "      <td>10000101518</td>\n",
       "      <td>2</td>\n",
       "      <td>BARREYRE</td>\n",
       "      <td>SANDRINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1617156</th>\n",
       "      <td>10110987236</td>\n",
       "      <td>2</td>\n",
       "      <td>ROGIER</td>\n",
       "      <td>MATHILDE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>807882</th>\n",
       "      <td>10111077417</td>\n",
       "      <td>2</td>\n",
       "      <td>DOUVIER</td>\n",
       "      <td>FRANCETTE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2158383</th>\n",
       "      <td>10111077417</td>\n",
       "      <td>2</td>\n",
       "      <td>D'ELLOY</td>\n",
       "      <td>FRANCETTE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>537977</th>\n",
       "      <td>10111110721</td>\n",
       "      <td>2</td>\n",
       "      <td>ROCHEPEAU</td>\n",
       "      <td>Nadège</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2158797</th>\n",
       "      <td>10111110721</td>\n",
       "      <td>2</td>\n",
       "      <td>BARREAU</td>\n",
       "      <td>Nadège</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5395 rows × 4 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 22
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-08-12T19:54:21.102182Z",
     "start_time": "2025-08-12T19:54:21.072806Z"
    }
   },
   "cell_type": "code",
   "source": [
    "df3.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_2-2.csv\",\n",
    "           sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')"
   ],
   "outputs": [],
   "execution_count": 23
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}