Files
professionals_from_sante_fr/Professionals_Multiple_Names2.ipynb
2026-03-05 11:11:10 +00:00

466 lines
16 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"metadata": {
"tags": [],
"ExecuteTime": {
"end_time": "2025-08-12T19:54:05.652708Z",
"start_time": "2025-08-12T19:53:30.037989Z"
}
},
"source": [
"%%time\n",
"import pandas as pd\n",
"import csv\n",
"df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804.txt\", sep='|',\n",
" doublequote=False, quoting=csv.QUOTE_NONE, dtype=str, na_values='', keep_default_na=False)\n",
"df.index.name = 'index'\n",
"df.columns"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 35 s\n",
"Wall time: 35.6 s\n"
]
},
{
"data": {
"text/plain": [
"Index(['Type d'identifiant PP', 'Identifiant PP',\n",
" 'Identification nationale PP', 'Code civilité d'exercice',\n",
" 'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n",
" 'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n",
" 'Libellé profession', 'Code catégorie professionnelle',\n",
" 'Libellé catégorie professionnelle', 'Code type savoir-faire',\n",
" 'Libellé type savoir-faire', 'Code savoir-faire',\n",
" 'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n",
" 'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n",
" 'Numéro FINESS établissement juridique',\n",
" 'Identifiant technique de la structure', 'Raison sociale site',\n",
" 'Enseigne commerciale site',\n",
" 'Complément destinataire (coord. structure)',\n",
" 'Complément point géographique (coord. structure)',\n",
" 'Numéro Voie (coord. structure)',\n",
" 'Indice répétition voie (coord. structure)',\n",
" 'Code type de voie (coord. structure)',\n",
" 'Libellé type de voie (coord. structure)',\n",
" 'Libellé Voie (coord. structure)',\n",
" 'Mention distribution (coord. structure)',\n",
" 'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n",
" 'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n",
" 'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n",
" 'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n",
" 'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n",
" 'Code Département (structure)', 'Libellé Département (structure)',\n",
" 'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n",
" 'Code secteur d'activité', 'Libellé secteur d'activité',\n",
" 'Code section tableau pharmaciens',\n",
" 'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n",
" 'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n",
" dtype='object')"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 19
},
{
"cell_type": "code",
"metadata": {
"tags": [],
"ExecuteTime": {
"end_time": "2025-08-12T19:54:12.829107Z",
"start_time": "2025-08-12T19:54:05.751406Z"
}
},
"source": [
"%%time\n",
"df2 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"]+\" \"+df[\"Prénom d'exercice\"]) \\\n",
" .groupby('Identifiant PP')[['Nom_Prénom']] \\\n",
" .transform('nunique').rename(columns={'Nom_Prénom' : 'Count'}).query('Count > 1') \\\n",
" .join(df)[['Identifiant PP','Count',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
" .sort_values(['Identifiant PP', 'index'])\n",
"\n",
"df2"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 6.81 s\n",
"Wall time: 7.06 s\n"
]
},
{
"data": {
"text/plain": [
" Identifiant PP Count Nom d'exercice Prénom d'exercice\n",
"index \n",
"1350393 10000034180 2 DUWAT-GEORGES GHISLAINE\n",
"1350394 10000034180 2 GEORGES GHISLAINE\n",
"259 10000040062 2 MEYER Nicolas\n",
"260 10000040062 2 MEYER Nicolas\n",
"809702 10000040062 2 MEYER NICOLAS\n",
"... ... ... ... ...\n",
"2158383 10111077417 2 D'ELLOY FRANCETTE\n",
"537896 10111105358 2 HOMO Maddy\n",
"1889090 10111105358 2 Homo Maddy\n",
"537977 10111110721 2 ROCHEPEAU Nadège\n",
"2158797 10111110721 2 BARREAU Nadège\n",
"\n",
"[9059 rows x 4 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Identifiant PP</th>\n",
" <th>Count</th>\n",
" <th>Nom d'exercice</th>\n",
" <th>Prénom d'exercice</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1350393</th>\n",
" <td>10000034180</td>\n",
" <td>2</td>\n",
" <td>DUWAT-GEORGES</td>\n",
" <td>GHISLAINE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1350394</th>\n",
" <td>10000034180</td>\n",
" <td>2</td>\n",
" <td>GEORGES</td>\n",
" <td>GHISLAINE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>259</th>\n",
" <td>10000040062</td>\n",
" <td>2</td>\n",
" <td>MEYER</td>\n",
" <td>Nicolas</td>\n",
" </tr>\n",
" <tr>\n",
" <th>260</th>\n",
" <td>10000040062</td>\n",
" <td>2</td>\n",
" <td>MEYER</td>\n",
" <td>Nicolas</td>\n",
" </tr>\n",
" <tr>\n",
" <th>809702</th>\n",
" <td>10000040062</td>\n",
" <td>2</td>\n",
" <td>MEYER</td>\n",
" <td>NICOLAS</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2158383</th>\n",
" <td>10111077417</td>\n",
" <td>2</td>\n",
" <td>D'ELLOY</td>\n",
" <td>FRANCETTE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>537896</th>\n",
" <td>10111105358</td>\n",
" <td>2</td>\n",
" <td>HOMO</td>\n",
" <td>Maddy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1889090</th>\n",
" <td>10111105358</td>\n",
" <td>2</td>\n",
" <td>Homo</td>\n",
" <td>Maddy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>537977</th>\n",
" <td>10111110721</td>\n",
" <td>2</td>\n",
" <td>ROCHEPEAU</td>\n",
" <td>Nadège</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2158797</th>\n",
" <td>10111110721</td>\n",
" <td>2</td>\n",
" <td>BARREAU</td>\n",
" <td>Nadège</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>9059 rows × 4 columns</p>\n",
"</div>"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 20
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-12T19:54:13.114103Z",
"start_time": "2025-08-12T19:54:13.063080Z"
}
},
"cell_type": "code",
"source": [
"df2.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_2-1.csv\",\n",
" sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')"
],
"outputs": [],
"execution_count": 21
},
{
"cell_type": "code",
"metadata": {
"tags": [],
"ExecuteTime": {
"end_time": "2025-08-12T19:54:20.671679Z",
"start_time": "2025-08-12T19:54:13.377047Z"
}
},
"source": [
"%%time\n",
"df3 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"].str.lower()+\" \"+df[\"Prénom d'exercice\"].str.lower()) \\\n",
" .groupby('Identifiant PP')[['Nom_Prénom']] \\\n",
" .transform('nunique').rename(columns={'Nom_Prénom' : 'Count'}).query('Count > 1') \\\n",
" .join(df)[['Identifiant PP','Count',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
" .sort_values(['Identifiant PP', 'index'])\n",
"df3"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 6.97 s\n",
"Wall time: 7.28 s\n"
]
},
{
"data": {
"text/plain": [
" Identifiant PP Count Nom d'exercice Prénom d'exercice\n",
"index \n",
"1350393 10000034180 2 DUWAT-GEORGES GHISLAINE\n",
"1350394 10000034180 2 GEORGES GHISLAINE\n",
"1350470 10000046051 2 STUDER AGNES\n",
"1620048 10000046051 2 JURION AGNES\n",
"269964 10000101518 2 BARREYRE SANDRINE\n",
"... ... ... ... ...\n",
"1617156 10110987236 2 ROGIER MATHILDE\n",
"807882 10111077417 2 DOUVIER FRANCETTE\n",
"2158383 10111077417 2 D'ELLOY FRANCETTE\n",
"537977 10111110721 2 ROCHEPEAU Nadège\n",
"2158797 10111110721 2 BARREAU Nadège\n",
"\n",
"[5395 rows x 4 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Identifiant PP</th>\n",
" <th>Count</th>\n",
" <th>Nom d'exercice</th>\n",
" <th>Prénom d'exercice</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1350393</th>\n",
" <td>10000034180</td>\n",
" <td>2</td>\n",
" <td>DUWAT-GEORGES</td>\n",
" <td>GHISLAINE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1350394</th>\n",
" <td>10000034180</td>\n",
" <td>2</td>\n",
" <td>GEORGES</td>\n",
" <td>GHISLAINE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1350470</th>\n",
" <td>10000046051</td>\n",
" <td>2</td>\n",
" <td>STUDER</td>\n",
" <td>AGNES</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1620048</th>\n",
" <td>10000046051</td>\n",
" <td>2</td>\n",
" <td>JURION</td>\n",
" <td>AGNES</td>\n",
" </tr>\n",
" <tr>\n",
" <th>269964</th>\n",
" <td>10000101518</td>\n",
" <td>2</td>\n",
" <td>BARREYRE</td>\n",
" <td>SANDRINE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1617156</th>\n",
" <td>10110987236</td>\n",
" <td>2</td>\n",
" <td>ROGIER</td>\n",
" <td>MATHILDE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>807882</th>\n",
" <td>10111077417</td>\n",
" <td>2</td>\n",
" <td>DOUVIER</td>\n",
" <td>FRANCETTE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2158383</th>\n",
" <td>10111077417</td>\n",
" <td>2</td>\n",
" <td>D'ELLOY</td>\n",
" <td>FRANCETTE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>537977</th>\n",
" <td>10111110721</td>\n",
" <td>2</td>\n",
" <td>ROCHEPEAU</td>\n",
" <td>Nadège</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2158797</th>\n",
" <td>10111110721</td>\n",
" <td>2</td>\n",
" <td>BARREAU</td>\n",
" <td>Nadège</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5395 rows × 4 columns</p>\n",
"</div>"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 22
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-12T19:54:21.102182Z",
"start_time": "2025-08-12T19:54:21.072806Z"
}
},
"cell_type": "code",
"source": [
"df3.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_2-2.csv\",\n",
" sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')"
],
"outputs": [],
"execution_count": 23
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}