Initial Commit
This commit is contained in:
439
Professionals_Multiple_Names3.ipynb
Normal file
439
Professionals_Multiple_Names3.ipynb
Normal file
@@ -0,0 +1,439 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"jupyter": {
|
||||
"outputs_hidden": true
|
||||
},
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-11T22:50:03.135959Z",
|
||||
"start_time": "2025-08-11T22:49:26.824618Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"import pandas as pd\n",
|
||||
"import csv\n",
|
||||
"df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804.txt\",\n",
|
||||
" sep='|', doublequote=False, quoting=csv.QUOTE_NONE, dtype=str, na_values='', keep_default_na=False)\n",
|
||||
"df.index.name = 'index'\n",
|
||||
"df.columns"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: total: 35.9 s\n",
|
||||
"Wall time: 36.3 s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Index(['Type d'identifiant PP', 'Identifiant PP',\n",
|
||||
" 'Identification nationale PP', 'Code civilité d'exercice',\n",
|
||||
" 'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n",
|
||||
" 'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n",
|
||||
" 'Libellé profession', 'Code catégorie professionnelle',\n",
|
||||
" 'Libellé catégorie professionnelle', 'Code type savoir-faire',\n",
|
||||
" 'Libellé type savoir-faire', 'Code savoir-faire',\n",
|
||||
" 'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n",
|
||||
" 'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n",
|
||||
" 'Numéro FINESS établissement juridique',\n",
|
||||
" 'Identifiant technique de la structure', 'Raison sociale site',\n",
|
||||
" 'Enseigne commerciale site',\n",
|
||||
" 'Complément destinataire (coord. structure)',\n",
|
||||
" 'Complément point géographique (coord. structure)',\n",
|
||||
" 'Numéro Voie (coord. structure)',\n",
|
||||
" 'Indice répétition voie (coord. structure)',\n",
|
||||
" 'Code type de voie (coord. structure)',\n",
|
||||
" 'Libellé type de voie (coord. structure)',\n",
|
||||
" 'Libellé Voie (coord. structure)',\n",
|
||||
" 'Mention distribution (coord. structure)',\n",
|
||||
" 'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n",
|
||||
" 'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n",
|
||||
" 'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n",
|
||||
" 'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n",
|
||||
" 'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n",
|
||||
" 'Code Département (structure)', 'Libellé Département (structure)',\n",
|
||||
" 'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n",
|
||||
" 'Code secteur d'activité', 'Libellé secteur d'activité',\n",
|
||||
" 'Code section tableau pharmaciens',\n",
|
||||
" 'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n",
|
||||
" 'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n",
|
||||
" dtype='object')"
|
||||
]
|
||||
},
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": 38
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-11T22:53:55.986443Z",
|
||||
"start_time": "2025-08-11T22:50:03.157898Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"df2 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"]+\" \"+df[\"Prénom d'exercice\"]) \\\n",
|
||||
" [['Identifiant PP','Nom_Prénom',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
|
||||
" .groupby('Identifiant PP') \\\n",
|
||||
" .filter(lambda f: f['Nom_Prénom'].nunique()>1) \\\n",
|
||||
" .sort_values(['Identifiant PP','index']) \\\n",
|
||||
" [['Identifiant PP',\"Nom d'exercice\",\"Prénom d'exercice\"]]\n",
|
||||
"df2"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: total: 3min 44s\n",
|
||||
"Wall time: 3min 52s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
" Identifiant PP Nom d'exercice Prénom d'exercice\n",
|
||||
"index \n",
|
||||
"1350393 10000034180 DUWAT-GEORGES GHISLAINE\n",
|
||||
"1350394 10000034180 GEORGES GHISLAINE\n",
|
||||
"259 10000040062 MEYER Nicolas\n",
|
||||
"260 10000040062 MEYER Nicolas\n",
|
||||
"809702 10000040062 MEYER NICOLAS\n",
|
||||
"... ... ... ...\n",
|
||||
"2158383 10111077417 D'ELLOY FRANCETTE\n",
|
||||
"537896 10111105358 HOMO Maddy\n",
|
||||
"1889090 10111105358 Homo Maddy\n",
|
||||
"537977 10111110721 ROCHEPEAU Nadège\n",
|
||||
"2158797 10111110721 BARREAU Nadège\n",
|
||||
"\n",
|
||||
"[9059 rows x 3 columns]"
|
||||
],
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Identifiant PP</th>\n",
|
||||
" <th>Nom d'exercice</th>\n",
|
||||
" <th>Prénom d'exercice</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>index</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>1350393</th>\n",
|
||||
" <td>10000034180</td>\n",
|
||||
" <td>DUWAT-GEORGES</td>\n",
|
||||
" <td>GHISLAINE</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1350394</th>\n",
|
||||
" <td>10000034180</td>\n",
|
||||
" <td>GEORGES</td>\n",
|
||||
" <td>GHISLAINE</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>259</th>\n",
|
||||
" <td>10000040062</td>\n",
|
||||
" <td>MEYER</td>\n",
|
||||
" <td>Nicolas</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>260</th>\n",
|
||||
" <td>10000040062</td>\n",
|
||||
" <td>MEYER</td>\n",
|
||||
" <td>Nicolas</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>809702</th>\n",
|
||||
" <td>10000040062</td>\n",
|
||||
" <td>MEYER</td>\n",
|
||||
" <td>NICOLAS</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2158383</th>\n",
|
||||
" <td>10111077417</td>\n",
|
||||
" <td>D'ELLOY</td>\n",
|
||||
" <td>FRANCETTE</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>537896</th>\n",
|
||||
" <td>10111105358</td>\n",
|
||||
" <td>HOMO</td>\n",
|
||||
" <td>Maddy</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1889090</th>\n",
|
||||
" <td>10111105358</td>\n",
|
||||
" <td>Homo</td>\n",
|
||||
" <td>Maddy</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>537977</th>\n",
|
||||
" <td>10111110721</td>\n",
|
||||
" <td>ROCHEPEAU</td>\n",
|
||||
" <td>Nadège</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2158797</th>\n",
|
||||
" <td>10111110721</td>\n",
|
||||
" <td>BARREAU</td>\n",
|
||||
" <td>Nadège</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>9059 rows × 3 columns</p>\n",
|
||||
"</div>"
|
||||
]
|
||||
},
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": 39
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-11T22:53:56.801020Z",
|
||||
"start_time": "2025-08-11T22:53:56.699295Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "df2.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_3-1.csv\", sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')",
|
||||
"outputs": [],
|
||||
"execution_count": 40
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-11T22:57:28.643070Z",
|
||||
"start_time": "2025-08-11T22:53:56.870889Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"df3 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"].str.lower()+\" \"+df[\"Prénom d'exercice\"].str.lower()) \\\n",
|
||||
" [['Identifiant PP','Nom_Prénom',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
|
||||
" .groupby('Identifiant PP') \\\n",
|
||||
" .filter(lambda f: f['Nom_Prénom'].nunique()>1) \\\n",
|
||||
" .sort_values(['Identifiant PP','index']) \\\n",
|
||||
" [['Identifiant PP',\"Nom d'exercice\",\"Prénom d'exercice\"]]\n",
|
||||
"df3"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: total: 3min 26s\n",
|
||||
"Wall time: 3min 31s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
" Identifiant PP Nom d'exercice Prénom d'exercice\n",
|
||||
"index \n",
|
||||
"1350393 10000034180 DUWAT-GEORGES GHISLAINE\n",
|
||||
"1350394 10000034180 GEORGES GHISLAINE\n",
|
||||
"1350470 10000046051 STUDER AGNES\n",
|
||||
"1620048 10000046051 JURION AGNES\n",
|
||||
"269964 10000101518 BARREYRE SANDRINE\n",
|
||||
"... ... ... ...\n",
|
||||
"1617156 10110987236 ROGIER MATHILDE\n",
|
||||
"807882 10111077417 DOUVIER FRANCETTE\n",
|
||||
"2158383 10111077417 D'ELLOY FRANCETTE\n",
|
||||
"537977 10111110721 ROCHEPEAU Nadège\n",
|
||||
"2158797 10111110721 BARREAU Nadège\n",
|
||||
"\n",
|
||||
"[5395 rows x 3 columns]"
|
||||
],
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Identifiant PP</th>\n",
|
||||
" <th>Nom d'exercice</th>\n",
|
||||
" <th>Prénom d'exercice</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>index</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>1350393</th>\n",
|
||||
" <td>10000034180</td>\n",
|
||||
" <td>DUWAT-GEORGES</td>\n",
|
||||
" <td>GHISLAINE</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1350394</th>\n",
|
||||
" <td>10000034180</td>\n",
|
||||
" <td>GEORGES</td>\n",
|
||||
" <td>GHISLAINE</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1350470</th>\n",
|
||||
" <td>10000046051</td>\n",
|
||||
" <td>STUDER</td>\n",
|
||||
" <td>AGNES</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1620048</th>\n",
|
||||
" <td>10000046051</td>\n",
|
||||
" <td>JURION</td>\n",
|
||||
" <td>AGNES</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>269964</th>\n",
|
||||
" <td>10000101518</td>\n",
|
||||
" <td>BARREYRE</td>\n",
|
||||
" <td>SANDRINE</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1617156</th>\n",
|
||||
" <td>10110987236</td>\n",
|
||||
" <td>ROGIER</td>\n",
|
||||
" <td>MATHILDE</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>807882</th>\n",
|
||||
" <td>10111077417</td>\n",
|
||||
" <td>DOUVIER</td>\n",
|
||||
" <td>FRANCETTE</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2158383</th>\n",
|
||||
" <td>10111077417</td>\n",
|
||||
" <td>D'ELLOY</td>\n",
|
||||
" <td>FRANCETTE</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>537977</th>\n",
|
||||
" <td>10111110721</td>\n",
|
||||
" <td>ROCHEPEAU</td>\n",
|
||||
" <td>Nadège</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2158797</th>\n",
|
||||
" <td>10111110721</td>\n",
|
||||
" <td>BARREAU</td>\n",
|
||||
" <td>Nadège</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>5395 rows × 3 columns</p>\n",
|
||||
"</div>"
|
||||
]
|
||||
},
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": 41
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-11T22:57:29.038232Z",
|
||||
"start_time": "2025-08-11T22:57:29.014447Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "df3.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_3-2.csv\", sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')",
|
||||
"outputs": [],
|
||||
"execution_count": 42
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
Reference in New Issue
Block a user