Initial Commit

This commit is contained in:
2026-03-05 11:11:10 +00:00
commit 0ae48d63f2
16 changed files with 3247 additions and 0 deletions

View File

@@ -0,0 +1,439 @@
{
"cells": [
{
"cell_type": "code",
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
},
"ExecuteTime": {
"end_time": "2025-08-11T22:50:03.135959Z",
"start_time": "2025-08-11T22:49:26.824618Z"
}
},
"source": [
"%%time\n",
"import pandas as pd\n",
"import csv\n",
"df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804.txt\",\n",
" sep='|', doublequote=False, quoting=csv.QUOTE_NONE, dtype=str, na_values='', keep_default_na=False)\n",
"df.index.name = 'index'\n",
"df.columns"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 35.9 s\n",
"Wall time: 36.3 s\n"
]
},
{
"data": {
"text/plain": [
"Index(['Type d'identifiant PP', 'Identifiant PP',\n",
" 'Identification nationale PP', 'Code civilité d'exercice',\n",
" 'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n",
" 'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n",
" 'Libellé profession', 'Code catégorie professionnelle',\n",
" 'Libellé catégorie professionnelle', 'Code type savoir-faire',\n",
" 'Libellé type savoir-faire', 'Code savoir-faire',\n",
" 'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n",
" 'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n",
" 'Numéro FINESS établissement juridique',\n",
" 'Identifiant technique de la structure', 'Raison sociale site',\n",
" 'Enseigne commerciale site',\n",
" 'Complément destinataire (coord. structure)',\n",
" 'Complément point géographique (coord. structure)',\n",
" 'Numéro Voie (coord. structure)',\n",
" 'Indice répétition voie (coord. structure)',\n",
" 'Code type de voie (coord. structure)',\n",
" 'Libellé type de voie (coord. structure)',\n",
" 'Libellé Voie (coord. structure)',\n",
" 'Mention distribution (coord. structure)',\n",
" 'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n",
" 'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n",
" 'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n",
" 'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n",
" 'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n",
" 'Code Département (structure)', 'Libellé Département (structure)',\n",
" 'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n",
" 'Code secteur d'activité', 'Libellé secteur d'activité',\n",
" 'Code section tableau pharmaciens',\n",
" 'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n",
" 'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n",
" dtype='object')"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 38
},
{
"cell_type": "code",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"ExecuteTime": {
"end_time": "2025-08-11T22:53:55.986443Z",
"start_time": "2025-08-11T22:50:03.157898Z"
}
},
"source": [
"%%time\n",
"df2 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"]+\" \"+df[\"Prénom d'exercice\"]) \\\n",
" [['Identifiant PP','Nom_Prénom',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
" .groupby('Identifiant PP') \\\n",
" .filter(lambda f: f['Nom_Prénom'].nunique()>1) \\\n",
" .sort_values(['Identifiant PP','index']) \\\n",
" [['Identifiant PP',\"Nom d'exercice\",\"Prénom d'exercice\"]]\n",
"df2"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 3min 44s\n",
"Wall time: 3min 52s\n"
]
},
{
"data": {
"text/plain": [
" Identifiant PP Nom d'exercice Prénom d'exercice\n",
"index \n",
"1350393 10000034180 DUWAT-GEORGES GHISLAINE\n",
"1350394 10000034180 GEORGES GHISLAINE\n",
"259 10000040062 MEYER Nicolas\n",
"260 10000040062 MEYER Nicolas\n",
"809702 10000040062 MEYER NICOLAS\n",
"... ... ... ...\n",
"2158383 10111077417 D'ELLOY FRANCETTE\n",
"537896 10111105358 HOMO Maddy\n",
"1889090 10111105358 Homo Maddy\n",
"537977 10111110721 ROCHEPEAU Nadège\n",
"2158797 10111110721 BARREAU Nadège\n",
"\n",
"[9059 rows x 3 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Identifiant PP</th>\n",
" <th>Nom d'exercice</th>\n",
" <th>Prénom d'exercice</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1350393</th>\n",
" <td>10000034180</td>\n",
" <td>DUWAT-GEORGES</td>\n",
" <td>GHISLAINE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1350394</th>\n",
" <td>10000034180</td>\n",
" <td>GEORGES</td>\n",
" <td>GHISLAINE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>259</th>\n",
" <td>10000040062</td>\n",
" <td>MEYER</td>\n",
" <td>Nicolas</td>\n",
" </tr>\n",
" <tr>\n",
" <th>260</th>\n",
" <td>10000040062</td>\n",
" <td>MEYER</td>\n",
" <td>Nicolas</td>\n",
" </tr>\n",
" <tr>\n",
" <th>809702</th>\n",
" <td>10000040062</td>\n",
" <td>MEYER</td>\n",
" <td>NICOLAS</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2158383</th>\n",
" <td>10111077417</td>\n",
" <td>D'ELLOY</td>\n",
" <td>FRANCETTE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>537896</th>\n",
" <td>10111105358</td>\n",
" <td>HOMO</td>\n",
" <td>Maddy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1889090</th>\n",
" <td>10111105358</td>\n",
" <td>Homo</td>\n",
" <td>Maddy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>537977</th>\n",
" <td>10111110721</td>\n",
" <td>ROCHEPEAU</td>\n",
" <td>Nadège</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2158797</th>\n",
" <td>10111110721</td>\n",
" <td>BARREAU</td>\n",
" <td>Nadège</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>9059 rows × 3 columns</p>\n",
"</div>"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 39
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-11T22:53:56.801020Z",
"start_time": "2025-08-11T22:53:56.699295Z"
}
},
"cell_type": "code",
"source": "df2.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_3-1.csv\", sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')",
"outputs": [],
"execution_count": 40
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-11T22:57:28.643070Z",
"start_time": "2025-08-11T22:53:56.870889Z"
}
},
"cell_type": "code",
"source": [
"%%time\n",
"df3 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"].str.lower()+\" \"+df[\"Prénom d'exercice\"].str.lower()) \\\n",
" [['Identifiant PP','Nom_Prénom',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
" .groupby('Identifiant PP') \\\n",
" .filter(lambda f: f['Nom_Prénom'].nunique()>1) \\\n",
" .sort_values(['Identifiant PP','index']) \\\n",
" [['Identifiant PP',\"Nom d'exercice\",\"Prénom d'exercice\"]]\n",
"df3"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 3min 26s\n",
"Wall time: 3min 31s\n"
]
},
{
"data": {
"text/plain": [
" Identifiant PP Nom d'exercice Prénom d'exercice\n",
"index \n",
"1350393 10000034180 DUWAT-GEORGES GHISLAINE\n",
"1350394 10000034180 GEORGES GHISLAINE\n",
"1350470 10000046051 STUDER AGNES\n",
"1620048 10000046051 JURION AGNES\n",
"269964 10000101518 BARREYRE SANDRINE\n",
"... ... ... ...\n",
"1617156 10110987236 ROGIER MATHILDE\n",
"807882 10111077417 DOUVIER FRANCETTE\n",
"2158383 10111077417 D'ELLOY FRANCETTE\n",
"537977 10111110721 ROCHEPEAU Nadège\n",
"2158797 10111110721 BARREAU Nadège\n",
"\n",
"[5395 rows x 3 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Identifiant PP</th>\n",
" <th>Nom d'exercice</th>\n",
" <th>Prénom d'exercice</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1350393</th>\n",
" <td>10000034180</td>\n",
" <td>DUWAT-GEORGES</td>\n",
" <td>GHISLAINE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1350394</th>\n",
" <td>10000034180</td>\n",
" <td>GEORGES</td>\n",
" <td>GHISLAINE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1350470</th>\n",
" <td>10000046051</td>\n",
" <td>STUDER</td>\n",
" <td>AGNES</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1620048</th>\n",
" <td>10000046051</td>\n",
" <td>JURION</td>\n",
" <td>AGNES</td>\n",
" </tr>\n",
" <tr>\n",
" <th>269964</th>\n",
" <td>10000101518</td>\n",
" <td>BARREYRE</td>\n",
" <td>SANDRINE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1617156</th>\n",
" <td>10110987236</td>\n",
" <td>ROGIER</td>\n",
" <td>MATHILDE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>807882</th>\n",
" <td>10111077417</td>\n",
" <td>DOUVIER</td>\n",
" <td>FRANCETTE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2158383</th>\n",
" <td>10111077417</td>\n",
" <td>D'ELLOY</td>\n",
" <td>FRANCETTE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>537977</th>\n",
" <td>10111110721</td>\n",
" <td>ROCHEPEAU</td>\n",
" <td>Nadège</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2158797</th>\n",
" <td>10111110721</td>\n",
" <td>BARREAU</td>\n",
" <td>Nadège</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5395 rows × 3 columns</p>\n",
"</div>"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 41
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-11T22:57:29.038232Z",
"start_time": "2025-08-11T22:57:29.014447Z"
}
},
"cell_type": "code",
"source": "df3.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_3-2.csv\", sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')",
"outputs": [],
"execution_count": 42
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}