466 lines
16 KiB
Plaintext
466 lines
16 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"metadata": {
|
||
"tags": [],
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-12T19:54:05.652708Z",
|
||
"start_time": "2025-08-12T19:53:30.037989Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"%%time\n",
|
||
"import pandas as pd\n",
|
||
"import csv\n",
|
||
"df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804.txt\", sep='|',\n",
|
||
" doublequote=False, quoting=csv.QUOTE_NONE, dtype=str, na_values='', keep_default_na=False)\n",
|
||
"df.index.name = 'index'\n",
|
||
"df.columns"
|
||
],
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"CPU times: total: 35 s\n",
|
||
"Wall time: 35.6 s\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Index(['Type d'identifiant PP', 'Identifiant PP',\n",
|
||
" 'Identification nationale PP', 'Code civilité d'exercice',\n",
|
||
" 'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n",
|
||
" 'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n",
|
||
" 'Libellé profession', 'Code catégorie professionnelle',\n",
|
||
" 'Libellé catégorie professionnelle', 'Code type savoir-faire',\n",
|
||
" 'Libellé type savoir-faire', 'Code savoir-faire',\n",
|
||
" 'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n",
|
||
" 'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n",
|
||
" 'Numéro FINESS établissement juridique',\n",
|
||
" 'Identifiant technique de la structure', 'Raison sociale site',\n",
|
||
" 'Enseigne commerciale site',\n",
|
||
" 'Complément destinataire (coord. structure)',\n",
|
||
" 'Complément point géographique (coord. structure)',\n",
|
||
" 'Numéro Voie (coord. structure)',\n",
|
||
" 'Indice répétition voie (coord. structure)',\n",
|
||
" 'Code type de voie (coord. structure)',\n",
|
||
" 'Libellé type de voie (coord. structure)',\n",
|
||
" 'Libellé Voie (coord. structure)',\n",
|
||
" 'Mention distribution (coord. structure)',\n",
|
||
" 'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n",
|
||
" 'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n",
|
||
" 'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n",
|
||
" 'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n",
|
||
" 'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n",
|
||
" 'Code Département (structure)', 'Libellé Département (structure)',\n",
|
||
" 'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n",
|
||
" 'Code secteur d'activité', 'Libellé secteur d'activité',\n",
|
||
" 'Code section tableau pharmaciens',\n",
|
||
" 'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n",
|
||
" 'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n",
|
||
" dtype='object')"
|
||
]
|
||
},
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 19
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"metadata": {
|
||
"tags": [],
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-12T19:54:12.829107Z",
|
||
"start_time": "2025-08-12T19:54:05.751406Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"%%time\n",
|
||
"df2 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"]+\" \"+df[\"Prénom d'exercice\"]) \\\n",
|
||
" .groupby('Identifiant PP')[['Nom_Prénom']] \\\n",
|
||
" .transform('nunique').rename(columns={'Nom_Prénom' : 'Count'}).query('Count > 1') \\\n",
|
||
" .join(df)[['Identifiant PP','Count',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
|
||
" .sort_values(['Identifiant PP', 'index'])\n",
|
||
"\n",
|
||
"df2"
|
||
],
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"CPU times: total: 6.81 s\n",
|
||
"Wall time: 7.06 s\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
" Identifiant PP Count Nom d'exercice Prénom d'exercice\n",
|
||
"index \n",
|
||
"1350393 10000034180 2 DUWAT-GEORGES GHISLAINE\n",
|
||
"1350394 10000034180 2 GEORGES GHISLAINE\n",
|
||
"259 10000040062 2 MEYER Nicolas\n",
|
||
"260 10000040062 2 MEYER Nicolas\n",
|
||
"809702 10000040062 2 MEYER NICOLAS\n",
|
||
"... ... ... ... ...\n",
|
||
"2158383 10111077417 2 D'ELLOY FRANCETTE\n",
|
||
"537896 10111105358 2 HOMO Maddy\n",
|
||
"1889090 10111105358 2 Homo Maddy\n",
|
||
"537977 10111110721 2 ROCHEPEAU Nadège\n",
|
||
"2158797 10111110721 2 BARREAU Nadège\n",
|
||
"\n",
|
||
"[9059 rows x 4 columns]"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Identifiant PP</th>\n",
|
||
" <th>Count</th>\n",
|
||
" <th>Nom d'exercice</th>\n",
|
||
" <th>Prénom d'exercice</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>index</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1350393</th>\n",
|
||
" <td>10000034180</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>DUWAT-GEORGES</td>\n",
|
||
" <td>GHISLAINE</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1350394</th>\n",
|
||
" <td>10000034180</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>GEORGES</td>\n",
|
||
" <td>GHISLAINE</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>259</th>\n",
|
||
" <td>10000040062</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>MEYER</td>\n",
|
||
" <td>Nicolas</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>260</th>\n",
|
||
" <td>10000040062</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>MEYER</td>\n",
|
||
" <td>Nicolas</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>809702</th>\n",
|
||
" <td>10000040062</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>MEYER</td>\n",
|
||
" <td>NICOLAS</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2158383</th>\n",
|
||
" <td>10111077417</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>D'ELLOY</td>\n",
|
||
" <td>FRANCETTE</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>537896</th>\n",
|
||
" <td>10111105358</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>HOMO</td>\n",
|
||
" <td>Maddy</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1889090</th>\n",
|
||
" <td>10111105358</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>Homo</td>\n",
|
||
" <td>Maddy</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>537977</th>\n",
|
||
" <td>10111110721</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>ROCHEPEAU</td>\n",
|
||
" <td>Nadège</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2158797</th>\n",
|
||
" <td>10111110721</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>BARREAU</td>\n",
|
||
" <td>Nadège</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>9059 rows × 4 columns</p>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 20
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-12T19:54:13.114103Z",
|
||
"start_time": "2025-08-12T19:54:13.063080Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"df2.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_2-1.csv\",\n",
|
||
" sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')"
|
||
],
|
||
"outputs": [],
|
||
"execution_count": 21
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"metadata": {
|
||
"tags": [],
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-12T19:54:20.671679Z",
|
||
"start_time": "2025-08-12T19:54:13.377047Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"%%time\n",
|
||
"df3 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"].str.lower()+\" \"+df[\"Prénom d'exercice\"].str.lower()) \\\n",
|
||
" .groupby('Identifiant PP')[['Nom_Prénom']] \\\n",
|
||
" .transform('nunique').rename(columns={'Nom_Prénom' : 'Count'}).query('Count > 1') \\\n",
|
||
" .join(df)[['Identifiant PP','Count',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
|
||
" .sort_values(['Identifiant PP', 'index'])\n",
|
||
"df3"
|
||
],
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"CPU times: total: 6.97 s\n",
|
||
"Wall time: 7.28 s\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
" Identifiant PP Count Nom d'exercice Prénom d'exercice\n",
|
||
"index \n",
|
||
"1350393 10000034180 2 DUWAT-GEORGES GHISLAINE\n",
|
||
"1350394 10000034180 2 GEORGES GHISLAINE\n",
|
||
"1350470 10000046051 2 STUDER AGNES\n",
|
||
"1620048 10000046051 2 JURION AGNES\n",
|
||
"269964 10000101518 2 BARREYRE SANDRINE\n",
|
||
"... ... ... ... ...\n",
|
||
"1617156 10110987236 2 ROGIER MATHILDE\n",
|
||
"807882 10111077417 2 DOUVIER FRANCETTE\n",
|
||
"2158383 10111077417 2 D'ELLOY FRANCETTE\n",
|
||
"537977 10111110721 2 ROCHEPEAU Nadège\n",
|
||
"2158797 10111110721 2 BARREAU Nadège\n",
|
||
"\n",
|
||
"[5395 rows x 4 columns]"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Identifiant PP</th>\n",
|
||
" <th>Count</th>\n",
|
||
" <th>Nom d'exercice</th>\n",
|
||
" <th>Prénom d'exercice</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>index</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1350393</th>\n",
|
||
" <td>10000034180</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>DUWAT-GEORGES</td>\n",
|
||
" <td>GHISLAINE</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1350394</th>\n",
|
||
" <td>10000034180</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>GEORGES</td>\n",
|
||
" <td>GHISLAINE</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1350470</th>\n",
|
||
" <td>10000046051</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>STUDER</td>\n",
|
||
" <td>AGNES</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1620048</th>\n",
|
||
" <td>10000046051</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>JURION</td>\n",
|
||
" <td>AGNES</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>269964</th>\n",
|
||
" <td>10000101518</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>BARREYRE</td>\n",
|
||
" <td>SANDRINE</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1617156</th>\n",
|
||
" <td>10110987236</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>ROGIER</td>\n",
|
||
" <td>MATHILDE</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>807882</th>\n",
|
||
" <td>10111077417</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>DOUVIER</td>\n",
|
||
" <td>FRANCETTE</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2158383</th>\n",
|
||
" <td>10111077417</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>D'ELLOY</td>\n",
|
||
" <td>FRANCETTE</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>537977</th>\n",
|
||
" <td>10111110721</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>ROCHEPEAU</td>\n",
|
||
" <td>Nadège</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2158797</th>\n",
|
||
" <td>10111110721</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>BARREAU</td>\n",
|
||
" <td>Nadège</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5395 rows × 4 columns</p>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 22
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-12T19:54:21.102182Z",
|
||
"start_time": "2025-08-12T19:54:21.072806Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"df3.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_2-2.csv\",\n",
|
||
" sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')"
|
||
],
|
||
"outputs": [],
|
||
"execution_count": 23
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.8.12"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|