Initial Commit
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
*.rar
|
||||||
|
*.zip
|
||||||
|
*.txt
|
||||||
|
*.csv
|
||||||
76
.ipynb_checkpoints/Professionals_Sort-checkpoint.ipynb
Normal file
76
.ipynb_checkpoints/Professionals_Sort-checkpoint.ipynb
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import csv\n",
|
||||||
|
"filename = 'Table_Réf_Professionnels_250430'\n",
|
||||||
|
"df = pd.read_csv(\"H:\\\\Mon Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\\"+filename+\".csv\", sep='|', doublequote=False, quoting=csv.QUOTE_NONE,dtype=str, na_values='', keep_default_na=False)\n",
|
||||||
|
"df.columns"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"is_executing": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"execution_count": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"df.shape"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"execution_count": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"df_sorted = df.sort_values([\"Identifiant PP\", \"Nom d'exercice\", \"Prénom d'exercice\", 'Libellé profession', 'Libellé savoir-faire', 'Bureau cedex (coord. structure)', 'Numéro Voie (coord. structure)', 'Indice répétition voie (coord. structure)', 'Libellé type de voie (coord. structure)', 'Libellé Voie (coord. structure)' , 'Mention distribution (coord. structure)', 'Téléphone (coord. structure)'])"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"execution_count": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"df_sorted.to_csv(\"H:\\\\Mon Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\\"+filename+\"-sorted.csv\", sep='|', index=False, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"execution_count": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
6
.ipynb_checkpoints/Untitled-checkpoint.ipynb
Normal file
6
.ipynb_checkpoints/Untitled-checkpoint.ipynb
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"cells": [],
|
||||||
|
"metadata": {},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
176
Professionals.ipynb
Normal file
176
Professionals.ipynb
Normal file
@@ -0,0 +1,176 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [],
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": "Index(['Type d'identifiant PP', 'Identifiant PP',\n 'Identification nationale PP', 'Code civilité d'exercice',\n 'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n 'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n 'Libellé profession', 'Code catégorie professionnelle',\n 'Libellé catégorie professionnelle', 'Code type savoir-faire',\n 'Libellé type savoir-faire', 'Code savoir-faire',\n 'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n 'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n 'Numéro FINESS établissement juridique',\n 'Identifiant technique de la structure', 'Raison sociale site',\n 'Enseigne commerciale site',\n 'Complément destinataire (coord. structure)',\n 'Complément point géographique (coord. structure)',\n 'Numéro Voie (coord. structure)',\n 'Indice répétition voie (coord. structure)',\n 'Code type de voie (coord. structure)',\n 'Libellé type de voie (coord. structure)',\n 'Libellé Voie (coord. structure)',\n 'Mention distribution (coord. structure)',\n 'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n 'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n 'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n 'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n 'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n 'Code Département (structure)', 'Libellé Département (structure)',\n 'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n 'Code secteur d'activité', 'Libellé secteur d'activité',\n 'Code section tableau pharmaciens',\n 'Libellé section tableau pharmaciens', 'Unnamed: 52'],\n dtype='object')"
|
||||||
|
},
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import csv\n",
|
||||||
|
"df = pd.read_csv(\"C:\\_temp\\Professionnels\\Table_Réf_Professionnels_220615.csv\", sep='|', doublequote=False, quoting=csv.QUOTE_NONE,\n",
|
||||||
|
" dtype=str, na_values='', keep_default_na=False)\n",
|
||||||
|
"df.columns"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": "(864328, 53)"
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df.shape"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": "(843643, 53)"
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df.drop_duplicates().shape"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": "(812168, 53)"
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df.drop_duplicates([\"Nom d'exercice\", \"Prénom d'exercice\", 'Libellé profession', 'Libellé savoir-faire', 'Bureau cedex (coord. structure)']).shape"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": "(814972, 53)"
|
||||||
|
},
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df.drop_duplicates([\"Nom d'exercice\", \"Prénom d'exercice\", 'Libellé profession', 'Libellé savoir-faire', 'Bureau cedex (coord. structure)', \"Identifiant PP\"]).shape"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": "(839643, 53)"
|
||||||
|
},
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df.drop_duplicates([\"Nom d'exercice\", \"Prénom d'exercice\", 'Libellé profession', 'Libellé savoir-faire', 'Bureau cedex (coord. structure)', \"Identifiant PP\", 'Numéro Voie (coord. structure)', 'Indice répétition voie (coord. structure)', 'Libellé type de voie (coord. structure)', 'Libellé Voie (coord. structure)' , 'Mention distribution (coord. structure)']).shape"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
924
Professionals_Activities_Inconsistencies.ipynb
Normal file
924
Professionals_Activities_Inconsistencies.ipynb
Normal file
File diff suppressed because one or more lines are too long
465
Professionals_Multiple_Names2.ipynb
Normal file
465
Professionals_Multiple_Names2.ipynb
Normal file
@@ -0,0 +1,465 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"metadata": {
|
||||||
|
"tags": [],
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-12T19:54:05.652708Z",
|
||||||
|
"start_time": "2025-08-12T19:53:30.037989Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import csv\n",
|
||||||
|
"df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804.txt\", sep='|',\n",
|
||||||
|
" doublequote=False, quoting=csv.QUOTE_NONE, dtype=str, na_values='', keep_default_na=False)\n",
|
||||||
|
"df.index.name = 'index'\n",
|
||||||
|
"df.columns"
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"CPU times: total: 35 s\n",
|
||||||
|
"Wall time: 35.6 s\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"Index(['Type d'identifiant PP', 'Identifiant PP',\n",
|
||||||
|
" 'Identification nationale PP', 'Code civilité d'exercice',\n",
|
||||||
|
" 'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n",
|
||||||
|
" 'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n",
|
||||||
|
" 'Libellé profession', 'Code catégorie professionnelle',\n",
|
||||||
|
" 'Libellé catégorie professionnelle', 'Code type savoir-faire',\n",
|
||||||
|
" 'Libellé type savoir-faire', 'Code savoir-faire',\n",
|
||||||
|
" 'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n",
|
||||||
|
" 'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n",
|
||||||
|
" 'Numéro FINESS établissement juridique',\n",
|
||||||
|
" 'Identifiant technique de la structure', 'Raison sociale site',\n",
|
||||||
|
" 'Enseigne commerciale site',\n",
|
||||||
|
" 'Complément destinataire (coord. structure)',\n",
|
||||||
|
" 'Complément point géographique (coord. structure)',\n",
|
||||||
|
" 'Numéro Voie (coord. structure)',\n",
|
||||||
|
" 'Indice répétition voie (coord. structure)',\n",
|
||||||
|
" 'Code type de voie (coord. structure)',\n",
|
||||||
|
" 'Libellé type de voie (coord. structure)',\n",
|
||||||
|
" 'Libellé Voie (coord. structure)',\n",
|
||||||
|
" 'Mention distribution (coord. structure)',\n",
|
||||||
|
" 'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n",
|
||||||
|
" 'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n",
|
||||||
|
" 'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n",
|
||||||
|
" 'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n",
|
||||||
|
" 'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n",
|
||||||
|
" 'Code Département (structure)', 'Libellé Département (structure)',\n",
|
||||||
|
" 'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n",
|
||||||
|
" 'Code secteur d'activité', 'Libellé secteur d'activité',\n",
|
||||||
|
" 'Code section tableau pharmaciens',\n",
|
||||||
|
" 'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n",
|
||||||
|
" 'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n",
|
||||||
|
" dtype='object')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 19,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"execution_count": 19
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"metadata": {
|
||||||
|
"tags": [],
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-12T19:54:12.829107Z",
|
||||||
|
"start_time": "2025-08-12T19:54:05.751406Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"df2 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"]+\" \"+df[\"Prénom d'exercice\"]) \\\n",
|
||||||
|
" .groupby('Identifiant PP')[['Nom_Prénom']] \\\n",
|
||||||
|
" .transform('nunique').rename(columns={'Nom_Prénom' : 'Count'}).query('Count > 1') \\\n",
|
||||||
|
" .join(df)[['Identifiant PP','Count',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
|
||||||
|
" .sort_values(['Identifiant PP', 'index'])\n",
|
||||||
|
"\n",
|
||||||
|
"df2"
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"CPU times: total: 6.81 s\n",
|
||||||
|
"Wall time: 7.06 s\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
" Identifiant PP Count Nom d'exercice Prénom d'exercice\n",
|
||||||
|
"index \n",
|
||||||
|
"1350393 10000034180 2 DUWAT-GEORGES GHISLAINE\n",
|
||||||
|
"1350394 10000034180 2 GEORGES GHISLAINE\n",
|
||||||
|
"259 10000040062 2 MEYER Nicolas\n",
|
||||||
|
"260 10000040062 2 MEYER Nicolas\n",
|
||||||
|
"809702 10000040062 2 MEYER NICOLAS\n",
|
||||||
|
"... ... ... ... ...\n",
|
||||||
|
"2158383 10111077417 2 D'ELLOY FRANCETTE\n",
|
||||||
|
"537896 10111105358 2 HOMO Maddy\n",
|
||||||
|
"1889090 10111105358 2 Homo Maddy\n",
|
||||||
|
"537977 10111110721 2 ROCHEPEAU Nadège\n",
|
||||||
|
"2158797 10111110721 2 BARREAU Nadège\n",
|
||||||
|
"\n",
|
||||||
|
"[9059 rows x 4 columns]"
|
||||||
|
],
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Identifiant PP</th>\n",
|
||||||
|
" <th>Count</th>\n",
|
||||||
|
" <th>Nom d'exercice</th>\n",
|
||||||
|
" <th>Prénom d'exercice</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>index</th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1350393</th>\n",
|
||||||
|
" <td>10000034180</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>DUWAT-GEORGES</td>\n",
|
||||||
|
" <td>GHISLAINE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1350394</th>\n",
|
||||||
|
" <td>10000034180</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>GEORGES</td>\n",
|
||||||
|
" <td>GHISLAINE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>259</th>\n",
|
||||||
|
" <td>10000040062</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>MEYER</td>\n",
|
||||||
|
" <td>Nicolas</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>260</th>\n",
|
||||||
|
" <td>10000040062</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>MEYER</td>\n",
|
||||||
|
" <td>Nicolas</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>809702</th>\n",
|
||||||
|
" <td>10000040062</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>MEYER</td>\n",
|
||||||
|
" <td>NICOLAS</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2158383</th>\n",
|
||||||
|
" <td>10111077417</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>D'ELLOY</td>\n",
|
||||||
|
" <td>FRANCETTE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>537896</th>\n",
|
||||||
|
" <td>10111105358</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>HOMO</td>\n",
|
||||||
|
" <td>Maddy</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1889090</th>\n",
|
||||||
|
" <td>10111105358</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>Homo</td>\n",
|
||||||
|
" <td>Maddy</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>537977</th>\n",
|
||||||
|
" <td>10111110721</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>ROCHEPEAU</td>\n",
|
||||||
|
" <td>Nadège</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2158797</th>\n",
|
||||||
|
" <td>10111110721</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>BARREAU</td>\n",
|
||||||
|
" <td>Nadège</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>9059 rows × 4 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 20,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"execution_count": 20
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-12T19:54:13.114103Z",
|
||||||
|
"start_time": "2025-08-12T19:54:13.063080Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"df2.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_2-1.csv\",\n",
|
||||||
|
" sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')"
|
||||||
|
],
|
||||||
|
"outputs": [],
|
||||||
|
"execution_count": 21
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"metadata": {
|
||||||
|
"tags": [],
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-12T19:54:20.671679Z",
|
||||||
|
"start_time": "2025-08-12T19:54:13.377047Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"df3 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"].str.lower()+\" \"+df[\"Prénom d'exercice\"].str.lower()) \\\n",
|
||||||
|
" .groupby('Identifiant PP')[['Nom_Prénom']] \\\n",
|
||||||
|
" .transform('nunique').rename(columns={'Nom_Prénom' : 'Count'}).query('Count > 1') \\\n",
|
||||||
|
" .join(df)[['Identifiant PP','Count',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
|
||||||
|
" .sort_values(['Identifiant PP', 'index'])\n",
|
||||||
|
"df3"
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"CPU times: total: 6.97 s\n",
|
||||||
|
"Wall time: 7.28 s\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
" Identifiant PP Count Nom d'exercice Prénom d'exercice\n",
|
||||||
|
"index \n",
|
||||||
|
"1350393 10000034180 2 DUWAT-GEORGES GHISLAINE\n",
|
||||||
|
"1350394 10000034180 2 GEORGES GHISLAINE\n",
|
||||||
|
"1350470 10000046051 2 STUDER AGNES\n",
|
||||||
|
"1620048 10000046051 2 JURION AGNES\n",
|
||||||
|
"269964 10000101518 2 BARREYRE SANDRINE\n",
|
||||||
|
"... ... ... ... ...\n",
|
||||||
|
"1617156 10110987236 2 ROGIER MATHILDE\n",
|
||||||
|
"807882 10111077417 2 DOUVIER FRANCETTE\n",
|
||||||
|
"2158383 10111077417 2 D'ELLOY FRANCETTE\n",
|
||||||
|
"537977 10111110721 2 ROCHEPEAU Nadège\n",
|
||||||
|
"2158797 10111110721 2 BARREAU Nadège\n",
|
||||||
|
"\n",
|
||||||
|
"[5395 rows x 4 columns]"
|
||||||
|
],
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Identifiant PP</th>\n",
|
||||||
|
" <th>Count</th>\n",
|
||||||
|
" <th>Nom d'exercice</th>\n",
|
||||||
|
" <th>Prénom d'exercice</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>index</th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1350393</th>\n",
|
||||||
|
" <td>10000034180</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>DUWAT-GEORGES</td>\n",
|
||||||
|
" <td>GHISLAINE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1350394</th>\n",
|
||||||
|
" <td>10000034180</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>GEORGES</td>\n",
|
||||||
|
" <td>GHISLAINE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1350470</th>\n",
|
||||||
|
" <td>10000046051</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>STUDER</td>\n",
|
||||||
|
" <td>AGNES</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1620048</th>\n",
|
||||||
|
" <td>10000046051</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>JURION</td>\n",
|
||||||
|
" <td>AGNES</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>269964</th>\n",
|
||||||
|
" <td>10000101518</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>BARREYRE</td>\n",
|
||||||
|
" <td>SANDRINE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1617156</th>\n",
|
||||||
|
" <td>10110987236</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>ROGIER</td>\n",
|
||||||
|
" <td>MATHILDE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>807882</th>\n",
|
||||||
|
" <td>10111077417</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>DOUVIER</td>\n",
|
||||||
|
" <td>FRANCETTE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2158383</th>\n",
|
||||||
|
" <td>10111077417</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>D'ELLOY</td>\n",
|
||||||
|
" <td>FRANCETTE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>537977</th>\n",
|
||||||
|
" <td>10111110721</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>ROCHEPEAU</td>\n",
|
||||||
|
" <td>Nadège</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2158797</th>\n",
|
||||||
|
" <td>10111110721</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>BARREAU</td>\n",
|
||||||
|
" <td>Nadège</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>5395 rows × 4 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 22,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"execution_count": 22
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-12T19:54:21.102182Z",
|
||||||
|
"start_time": "2025-08-12T19:54:21.072806Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"df3.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_2-2.csv\",\n",
|
||||||
|
" sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')"
|
||||||
|
],
|
||||||
|
"outputs": [],
|
||||||
|
"execution_count": 23
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
439
Professionals_Multiple_Names3.ipynb
Normal file
439
Professionals_Multiple_Names3.ipynb
Normal file
@@ -0,0 +1,439 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": true,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": true
|
||||||
|
},
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-11T22:50:03.135959Z",
|
||||||
|
"start_time": "2025-08-11T22:49:26.824618Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import csv\n",
|
||||||
|
"df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804.txt\",\n",
|
||||||
|
" sep='|', doublequote=False, quoting=csv.QUOTE_NONE, dtype=str, na_values='', keep_default_na=False)\n",
|
||||||
|
"df.index.name = 'index'\n",
|
||||||
|
"df.columns"
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"CPU times: total: 35.9 s\n",
|
||||||
|
"Wall time: 36.3 s\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"Index(['Type d'identifiant PP', 'Identifiant PP',\n",
|
||||||
|
" 'Identification nationale PP', 'Code civilité d'exercice',\n",
|
||||||
|
" 'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n",
|
||||||
|
" 'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n",
|
||||||
|
" 'Libellé profession', 'Code catégorie professionnelle',\n",
|
||||||
|
" 'Libellé catégorie professionnelle', 'Code type savoir-faire',\n",
|
||||||
|
" 'Libellé type savoir-faire', 'Code savoir-faire',\n",
|
||||||
|
" 'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n",
|
||||||
|
" 'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n",
|
||||||
|
" 'Numéro FINESS établissement juridique',\n",
|
||||||
|
" 'Identifiant technique de la structure', 'Raison sociale site',\n",
|
||||||
|
" 'Enseigne commerciale site',\n",
|
||||||
|
" 'Complément destinataire (coord. structure)',\n",
|
||||||
|
" 'Complément point géographique (coord. structure)',\n",
|
||||||
|
" 'Numéro Voie (coord. structure)',\n",
|
||||||
|
" 'Indice répétition voie (coord. structure)',\n",
|
||||||
|
" 'Code type de voie (coord. structure)',\n",
|
||||||
|
" 'Libellé type de voie (coord. structure)',\n",
|
||||||
|
" 'Libellé Voie (coord. structure)',\n",
|
||||||
|
" 'Mention distribution (coord. structure)',\n",
|
||||||
|
" 'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n",
|
||||||
|
" 'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n",
|
||||||
|
" 'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n",
|
||||||
|
" 'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n",
|
||||||
|
" 'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n",
|
||||||
|
" 'Code Département (structure)', 'Libellé Département (structure)',\n",
|
||||||
|
" 'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n",
|
||||||
|
" 'Code secteur d'activité', 'Libellé secteur d'activité',\n",
|
||||||
|
" 'Code section tableau pharmaciens',\n",
|
||||||
|
" 'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n",
|
||||||
|
" 'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n",
|
||||||
|
" dtype='object')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 38,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"execution_count": 38
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-11T22:53:55.986443Z",
|
||||||
|
"start_time": "2025-08-11T22:50:03.157898Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"df2 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"]+\" \"+df[\"Prénom d'exercice\"]) \\\n",
|
||||||
|
" [['Identifiant PP','Nom_Prénom',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
|
||||||
|
" .groupby('Identifiant PP') \\\n",
|
||||||
|
" .filter(lambda f: f['Nom_Prénom'].nunique()>1) \\\n",
|
||||||
|
" .sort_values(['Identifiant PP','index']) \\\n",
|
||||||
|
" [['Identifiant PP',\"Nom d'exercice\",\"Prénom d'exercice\"]]\n",
|
||||||
|
"df2"
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"CPU times: total: 3min 44s\n",
|
||||||
|
"Wall time: 3min 52s\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
" Identifiant PP Nom d'exercice Prénom d'exercice\n",
|
||||||
|
"index \n",
|
||||||
|
"1350393 10000034180 DUWAT-GEORGES GHISLAINE\n",
|
||||||
|
"1350394 10000034180 GEORGES GHISLAINE\n",
|
||||||
|
"259 10000040062 MEYER Nicolas\n",
|
||||||
|
"260 10000040062 MEYER Nicolas\n",
|
||||||
|
"809702 10000040062 MEYER NICOLAS\n",
|
||||||
|
"... ... ... ...\n",
|
||||||
|
"2158383 10111077417 D'ELLOY FRANCETTE\n",
|
||||||
|
"537896 10111105358 HOMO Maddy\n",
|
||||||
|
"1889090 10111105358 Homo Maddy\n",
|
||||||
|
"537977 10111110721 ROCHEPEAU Nadège\n",
|
||||||
|
"2158797 10111110721 BARREAU Nadège\n",
|
||||||
|
"\n",
|
||||||
|
"[9059 rows x 3 columns]"
|
||||||
|
],
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Identifiant PP</th>\n",
|
||||||
|
" <th>Nom d'exercice</th>\n",
|
||||||
|
" <th>Prénom d'exercice</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>index</th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1350393</th>\n",
|
||||||
|
" <td>10000034180</td>\n",
|
||||||
|
" <td>DUWAT-GEORGES</td>\n",
|
||||||
|
" <td>GHISLAINE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1350394</th>\n",
|
||||||
|
" <td>10000034180</td>\n",
|
||||||
|
" <td>GEORGES</td>\n",
|
||||||
|
" <td>GHISLAINE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>259</th>\n",
|
||||||
|
" <td>10000040062</td>\n",
|
||||||
|
" <td>MEYER</td>\n",
|
||||||
|
" <td>Nicolas</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>260</th>\n",
|
||||||
|
" <td>10000040062</td>\n",
|
||||||
|
" <td>MEYER</td>\n",
|
||||||
|
" <td>Nicolas</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>809702</th>\n",
|
||||||
|
" <td>10000040062</td>\n",
|
||||||
|
" <td>MEYER</td>\n",
|
||||||
|
" <td>NICOLAS</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2158383</th>\n",
|
||||||
|
" <td>10111077417</td>\n",
|
||||||
|
" <td>D'ELLOY</td>\n",
|
||||||
|
" <td>FRANCETTE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>537896</th>\n",
|
||||||
|
" <td>10111105358</td>\n",
|
||||||
|
" <td>HOMO</td>\n",
|
||||||
|
" <td>Maddy</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1889090</th>\n",
|
||||||
|
" <td>10111105358</td>\n",
|
||||||
|
" <td>Homo</td>\n",
|
||||||
|
" <td>Maddy</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>537977</th>\n",
|
||||||
|
" <td>10111110721</td>\n",
|
||||||
|
" <td>ROCHEPEAU</td>\n",
|
||||||
|
" <td>Nadège</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2158797</th>\n",
|
||||||
|
" <td>10111110721</td>\n",
|
||||||
|
" <td>BARREAU</td>\n",
|
||||||
|
" <td>Nadège</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>9059 rows × 3 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 39,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"execution_count": 39
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-11T22:53:56.801020Z",
|
||||||
|
"start_time": "2025-08-11T22:53:56.699295Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": "df2.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_3-1.csv\", sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')",
|
||||||
|
"outputs": [],
|
||||||
|
"execution_count": 40
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-11T22:57:28.643070Z",
|
||||||
|
"start_time": "2025-08-11T22:53:56.870889Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"df3 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"].str.lower()+\" \"+df[\"Prénom d'exercice\"].str.lower()) \\\n",
|
||||||
|
" [['Identifiant PP','Nom_Prénom',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
|
||||||
|
" .groupby('Identifiant PP') \\\n",
|
||||||
|
" .filter(lambda f: f['Nom_Prénom'].nunique()>1) \\\n",
|
||||||
|
" .sort_values(['Identifiant PP','index']) \\\n",
|
||||||
|
" [['Identifiant PP',\"Nom d'exercice\",\"Prénom d'exercice\"]]\n",
|
||||||
|
"df3"
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"CPU times: total: 3min 26s\n",
|
||||||
|
"Wall time: 3min 31s\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
" Identifiant PP Nom d'exercice Prénom d'exercice\n",
|
||||||
|
"index \n",
|
||||||
|
"1350393 10000034180 DUWAT-GEORGES GHISLAINE\n",
|
||||||
|
"1350394 10000034180 GEORGES GHISLAINE\n",
|
||||||
|
"1350470 10000046051 STUDER AGNES\n",
|
||||||
|
"1620048 10000046051 JURION AGNES\n",
|
||||||
|
"269964 10000101518 BARREYRE SANDRINE\n",
|
||||||
|
"... ... ... ...\n",
|
||||||
|
"1617156 10110987236 ROGIER MATHILDE\n",
|
||||||
|
"807882 10111077417 DOUVIER FRANCETTE\n",
|
||||||
|
"2158383 10111077417 D'ELLOY FRANCETTE\n",
|
||||||
|
"537977 10111110721 ROCHEPEAU Nadège\n",
|
||||||
|
"2158797 10111110721 BARREAU Nadège\n",
|
||||||
|
"\n",
|
||||||
|
"[5395 rows x 3 columns]"
|
||||||
|
],
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Identifiant PP</th>\n",
|
||||||
|
" <th>Nom d'exercice</th>\n",
|
||||||
|
" <th>Prénom d'exercice</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>index</th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1350393</th>\n",
|
||||||
|
" <td>10000034180</td>\n",
|
||||||
|
" <td>DUWAT-GEORGES</td>\n",
|
||||||
|
" <td>GHISLAINE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1350394</th>\n",
|
||||||
|
" <td>10000034180</td>\n",
|
||||||
|
" <td>GEORGES</td>\n",
|
||||||
|
" <td>GHISLAINE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1350470</th>\n",
|
||||||
|
" <td>10000046051</td>\n",
|
||||||
|
" <td>STUDER</td>\n",
|
||||||
|
" <td>AGNES</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1620048</th>\n",
|
||||||
|
" <td>10000046051</td>\n",
|
||||||
|
" <td>JURION</td>\n",
|
||||||
|
" <td>AGNES</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>269964</th>\n",
|
||||||
|
" <td>10000101518</td>\n",
|
||||||
|
" <td>BARREYRE</td>\n",
|
||||||
|
" <td>SANDRINE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1617156</th>\n",
|
||||||
|
" <td>10110987236</td>\n",
|
||||||
|
" <td>ROGIER</td>\n",
|
||||||
|
" <td>MATHILDE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>807882</th>\n",
|
||||||
|
" <td>10111077417</td>\n",
|
||||||
|
" <td>DOUVIER</td>\n",
|
||||||
|
" <td>FRANCETTE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2158383</th>\n",
|
||||||
|
" <td>10111077417</td>\n",
|
||||||
|
" <td>D'ELLOY</td>\n",
|
||||||
|
" <td>FRANCETTE</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>537977</th>\n",
|
||||||
|
" <td>10111110721</td>\n",
|
||||||
|
" <td>ROCHEPEAU</td>\n",
|
||||||
|
" <td>Nadège</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2158797</th>\n",
|
||||||
|
" <td>10111110721</td>\n",
|
||||||
|
" <td>BARREAU</td>\n",
|
||||||
|
" <td>Nadège</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>5395 rows × 3 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 41,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"execution_count": 41
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-11T22:57:29.038232Z",
|
||||||
|
"start_time": "2025-08-11T22:57:29.014447Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": "df3.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_3-2.csv\", sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')",
|
||||||
|
"outputs": [],
|
||||||
|
"execution_count": 42
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
140
Professionals_Sort.ipynb
Normal file
140
Professionals_Sort.ipynb
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import csv\n",
|
||||||
|
"filename = 'Table_Réf_Professionnels_250815'\n",
|
||||||
|
"df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\\"+filename+\".csv\", sep='|', doublequote=False, quoting=csv.QUOTE_NONE,dtype=str, na_values='', keep_default_na=False)\n",
|
||||||
|
"df.columns"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-16T01:35:13.352289Z",
|
||||||
|
"start_time": "2025-08-16T01:34:58.550068Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"Index(['Type d'identifiant PP', 'Identifiant PP',\n",
|
||||||
|
" 'Identification nationale PP', 'Code civilité d'exercice',\n",
|
||||||
|
" 'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n",
|
||||||
|
" 'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n",
|
||||||
|
" 'Libellé profession', 'Code catégorie professionnelle',\n",
|
||||||
|
" 'Libellé catégorie professionnelle', 'Code type savoir-faire',\n",
|
||||||
|
" 'Libellé type savoir-faire', 'Code savoir-faire',\n",
|
||||||
|
" 'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n",
|
||||||
|
" 'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n",
|
||||||
|
" 'Numéro FINESS établissement juridique',\n",
|
||||||
|
" 'Identifiant technique de la structure', 'Raison sociale site',\n",
|
||||||
|
" 'Enseigne commerciale site',\n",
|
||||||
|
" 'Complément destinataire (coord. structure)',\n",
|
||||||
|
" 'Complément point géographique (coord. structure)',\n",
|
||||||
|
" 'Numéro Voie (coord. structure)',\n",
|
||||||
|
" 'Indice répétition voie (coord. structure)',\n",
|
||||||
|
" 'Code type de voie (coord. structure)',\n",
|
||||||
|
" 'Libellé type de voie (coord. structure)',\n",
|
||||||
|
" 'Libellé Voie (coord. structure)',\n",
|
||||||
|
" 'Mention distribution (coord. structure)',\n",
|
||||||
|
" 'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n",
|
||||||
|
" 'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n",
|
||||||
|
" 'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n",
|
||||||
|
" 'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n",
|
||||||
|
" 'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n",
|
||||||
|
" 'Code Département (structure)', 'Libellé Département (structure)',\n",
|
||||||
|
" 'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n",
|
||||||
|
" 'Code secteur d'activité', 'Libellé secteur d'activité',\n",
|
||||||
|
" 'Code section tableau pharmaciens',\n",
|
||||||
|
" 'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n",
|
||||||
|
" 'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n",
|
||||||
|
" dtype='object')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"execution_count": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"df.shape"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-16T01:35:13.450995Z",
|
||||||
|
"start_time": "2025-08-16T01:35:13.442103Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"(994582, 57)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"execution_count": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"df_sorted = df.sort_values([\"Identifiant PP\", \"Nom d'exercice\", \"Prénom d'exercice\", 'Libellé profession', 'Libellé savoir-faire', 'Bureau cedex (coord. structure)', 'Numéro Voie (coord. structure)', 'Indice répétition voie (coord. structure)', 'Libellé type de voie (coord. structure)', 'Libellé Voie (coord. structure)' , 'Mention distribution (coord. structure)', 'Téléphone (coord. structure)'])"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-16T01:35:20.312959Z",
|
||||||
|
"start_time": "2025-08-16T01:35:13.656674Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"execution_count": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": "df_sorted.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\\"+filename+\"-sorted.csv\", sep='|', index=False, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')",
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2025-08-16T01:35:34.463854Z",
|
||||||
|
"start_time": "2025-08-16T01:35:20.454076Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"execution_count": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
210
Resendo.ipynb
Normal file
210
Resendo.ipynb
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df = pd.read_excel(\"G:\\Mon Drive\\Ziwig-Health\\Data\\Extract_Prof_Patient_List.xlsx\", header=3)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": "(7728, 9)"
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df.shape"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"timedDf = df.set_index('createdAt')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"timedDf['count']=True"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"timedDf['NonStarted']=1-timedDf['isStartMedicalRecord'].astype(int)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"timedDf['NonFinished']=1-timedDf['isFinishMedicalRecord'].astype(int)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"timedDf=timedDf.loc[:, ['isStartMedicalRecord','isFinishMedicalRecord','count','NonStarted','NonFinished']]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"timedDfMonthly = timedDf.resample('M')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": " isStartMedicalRecord isFinishMedicalRecord count NonStarted \\\ncreatedAt \n2020-11-30 2 2 2 0 \n2020-12-31 3 3 3 0 \n2021-01-31 21 21 21 0 \n2021-02-28 10 10 10 0 \n2021-03-31 348 323 404 56 \n2021-04-30 602 559 712 110 \n2021-05-31 511 465 622 111 \n2021-06-30 406 372 503 97 \n2021-07-31 426 398 498 72 \n2021-08-31 429 393 528 99 \n2021-09-30 561 517 677 116 \n2021-10-31 580 539 696 116 \n2021-11-30 453 416 557 104 \n2021-12-31 480 447 608 128 \n2022-01-31 608 562 786 178 \n2022-02-28 544 502 704 160 \n2022-03-31 286 255 397 111 \n\n NonFinished \ncreatedAt \n2020-11-30 0 \n2020-12-31 0 \n2021-01-31 0 \n2021-02-28 0 \n2021-03-31 81 \n2021-04-30 153 \n2021-05-31 157 \n2021-06-30 131 \n2021-07-31 100 \n2021-08-31 135 \n2021-09-30 160 \n2021-10-31 157 \n2021-11-30 141 \n2021-12-31 161 \n2022-01-31 224 \n2022-02-28 202 \n2022-03-31 142 ",
|
||||||
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>isStartMedicalRecord</th>\n <th>isFinishMedicalRecord</th>\n <th>count</th>\n <th>NonStarted</th>\n <th>NonFinished</th>\n </tr>\n <tr>\n <th>createdAt</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2020-11-30</th>\n <td>2</td>\n <td>2</td>\n <td>2</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2020-12-31</th>\n <td>3</td>\n <td>3</td>\n <td>3</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2021-01-31</th>\n <td>21</td>\n <td>21</td>\n <td>21</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2021-02-28</th>\n <td>10</td>\n <td>10</td>\n <td>10</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2021-03-31</th>\n <td>348</td>\n <td>323</td>\n <td>404</td>\n <td>56</td>\n <td>81</td>\n </tr>\n <tr>\n <th>2021-04-30</th>\n <td>602</td>\n <td>559</td>\n <td>712</td>\n <td>110</td>\n <td>153</td>\n </tr>\n <tr>\n <th>2021-05-31</th>\n <td>511</td>\n <td>465</td>\n <td>622</td>\n <td>111</td>\n <td>157</td>\n </tr>\n <tr>\n <th>2021-06-30</th>\n <td>406</td>\n <td>372</td>\n <td>503</td>\n <td>97</td>\n <td>131</td>\n </tr>\n <tr>\n <th>2021-07-31</th>\n <td>426</td>\n <td>398</td>\n <td>498</td>\n <td>72</td>\n <td>100</td>\n </tr>\n <tr>\n <th>2021-08-31</th>\n <td>429</td>\n <td>393</td>\n <td>528</td>\n <td>99</td>\n <td>135</td>\n </tr>\n <tr>\n <th>2021-09-30</th>\n <td>561</td>\n <td>517</td>\n <td>677</td>\n <td>116</td>\n <td>160</td>\n </tr>\n <tr>\n <th>2021-10-31</th>\n <td>580</td>\n <td>539</td>\n <td>696</td>\n <td>116</td>\n <td>157</td>\n </tr>\n <tr>\n <th>2021-11-30</th>\n <td>453</td>\n <td>416</td>\n <td>557</td>\n <td>104</td>\n <td>141</td>\n </tr>\n <tr>\n <th>2021-12-31</th>\n <td>480</td>\n <td>447</td>\n <td>608</td>\n <td>128</td>\n <td>161</td>\n </tr>\n <tr>\n <th>2022-01-31</th>\n <td>608</td>\n <td>562</td>\n <td>786</td>\n <td>178</td>\n <td>224</td>\n </tr>\n <tr>\n <th>2022-02-28</th>\n <td>544</td>\n <td>502</td>\n <td>704</td>\n <td>160</td>\n <td>202</td>\n </tr>\n <tr>\n <th>2022-03-31</th>\n <td>286</td>\n <td>255</td>\n <td>397</td>\n <td>111</td>\n <td>142</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||||
|
},
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"timedDfMonthly.sum()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
163
Resendo2.ipynb
Normal file
163
Resendo2.ipynb
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"CPU times: total: 5.89 s\n",
|
||||||
|
"Wall time: 6.14 s\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": "(31371, 9)"
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"df = pd.read_excel(\"G:\\Mon Drive\\Ziwig-Health\\Data\\Extract_Prof_Patient_List_new.xlsx\", header=2)\n",
|
||||||
|
"df.shape"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"CPU times: total: 46.9 ms\n",
|
||||||
|
"Wall time: 49.9 ms\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": "isStartMedicalRecord False True \nisFinishMedicalRecord False False True\ncreatedAt \n2020-11-30 169 9 78\n2020-12-31 226 16 147\n2021-01-31 149 388 842\n2021-02-28 238 164 606\n2021-03-31 652 453 2262\n2021-04-30 250 118 1141\n2021-05-31 269 144 1106\n2021-06-30 283 150 1012\n2021-07-31 227 127 883\n2021-08-31 196 111 912\n2021-09-30 223 142 1254\n2021-10-31 224 112 1176\n2021-11-30 229 110 988\n2021-12-31 466 111 925\n2022-01-31 753 287 1766\n2022-02-28 1095 549 2362\n2022-03-31 520 176 1242\n2022-04-30 395 125 849\n2022-05-31 363 99 771\n2022-06-30 233 65 433",
|
||||||
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead tr th {\n text-align: left;\n }\n\n .dataframe thead tr:last-of-type th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr>\n <th>isStartMedicalRecord</th>\n <th>False</th>\n <th colspan=\"2\" halign=\"left\">True</th>\n </tr>\n <tr>\n <th>isFinishMedicalRecord</th>\n <th>False</th>\n <th>False</th>\n <th>True</th>\n </tr>\n <tr>\n <th>createdAt</th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2020-11-30</th>\n <td>169</td>\n <td>9</td>\n <td>78</td>\n </tr>\n <tr>\n <th>2020-12-31</th>\n <td>226</td>\n <td>16</td>\n <td>147</td>\n </tr>\n <tr>\n <th>2021-01-31</th>\n <td>149</td>\n <td>388</td>\n <td>842</td>\n </tr>\n <tr>\n <th>2021-02-28</th>\n <td>238</td>\n <td>164</td>\n <td>606</td>\n </tr>\n <tr>\n <th>2021-03-31</th>\n <td>652</td>\n <td>453</td>\n <td>2262</td>\n </tr>\n <tr>\n <th>2021-04-30</th>\n <td>250</td>\n <td>118</td>\n <td>1141</td>\n </tr>\n <tr>\n <th>2021-05-31</th>\n <td>269</td>\n <td>144</td>\n <td>1106</td>\n </tr>\n <tr>\n <th>2021-06-30</th>\n <td>283</td>\n <td>150</td>\n <td>1012</td>\n </tr>\n <tr>\n <th>2021-07-31</th>\n <td>227</td>\n <td>127</td>\n <td>883</td>\n </tr>\n <tr>\n <th>2021-08-31</th>\n <td>196</td>\n <td>111</td>\n <td>912</td>\n </tr>\n <tr>\n <th>2021-09-30</th>\n <td>223</td>\n <td>142</td>\n <td>1254</td>\n </tr>\n <tr>\n <th>2021-10-31</th>\n <td>224</td>\n <td>112</td>\n <td>1176</td>\n </tr>\n <tr>\n <th>2021-11-30</th>\n <td>229</td>\n <td>110</td>\n <td>988</td>\n </tr>\n <tr>\n <th>2021-12-31</th>\n <td>466</td>\n <td>111</td>\n <td>925</td>\n </tr>\n <tr>\n <th>2022-01-31</th>\n <td>753</td>\n <td>287</td>\n <td>1766</td>\n </tr>\n <tr>\n <th>2022-02-28</th>\n <td>1095</td>\n <td>549</td>\n <td>2362</td>\n </tr>\n <tr>\n <th>2022-03-31</th>\n <td>520</td>\n <td>176</td>\n <td>1242</td>\n </tr>\n <tr>\n <th>2022-04-30</th>\n <td>395</td>\n <td>125</td>\n <td>849</td>\n </tr>\n <tr>\n <th>2022-05-31</th>\n <td>363</td>\n <td>99</td>\n <td>771</td>\n </tr>\n <tr>\n <th>2022-06-30</th>\n <td>233</td>\n <td>65</td>\n <td>433</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||||
|
},
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"pd.pivot_table(data = df, index=pd.Grouper(key=\"createdAt\", freq=\"M\"), columns=[\"isStartMedicalRecord\",\"isFinishMedicalRecord\"], values=\"fullName\", aggfunc=\"count\", fill_value= 0)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"CPU times: total: 46.9 ms\n",
|
||||||
|
"Wall time: 46.9 ms\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": "isStartMedicalRecord False True\ncreatedAt \n2020-11-30 169 87\n2020-12-31 226 163\n2021-01-31 149 1230\n2021-02-28 238 770\n2021-03-31 652 2715\n2021-04-30 250 1259\n2021-05-31 269 1250\n2021-06-30 283 1162\n2021-07-31 227 1010\n2021-08-31 196 1023\n2021-09-30 223 1396\n2021-10-31 224 1288\n2021-11-30 229 1098\n2021-12-31 466 1036\n2022-01-31 753 2053\n2022-02-28 1095 2911\n2022-03-31 520 1418\n2022-04-30 395 974\n2022-05-31 363 870\n2022-06-30 233 498",
|
||||||
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>isStartMedicalRecord</th>\n <th>False</th>\n <th>True</th>\n </tr>\n <tr>\n <th>createdAt</th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2020-11-30</th>\n <td>169</td>\n <td>87</td>\n </tr>\n <tr>\n <th>2020-12-31</th>\n <td>226</td>\n <td>163</td>\n </tr>\n <tr>\n <th>2021-01-31</th>\n <td>149</td>\n <td>1230</td>\n </tr>\n <tr>\n <th>2021-02-28</th>\n <td>238</td>\n <td>770</td>\n </tr>\n <tr>\n <th>2021-03-31</th>\n <td>652</td>\n <td>2715</td>\n </tr>\n <tr>\n <th>2021-04-30</th>\n <td>250</td>\n <td>1259</td>\n </tr>\n <tr>\n <th>2021-05-31</th>\n <td>269</td>\n <td>1250</td>\n </tr>\n <tr>\n <th>2021-06-30</th>\n <td>283</td>\n <td>1162</td>\n </tr>\n <tr>\n <th>2021-07-31</th>\n <td>227</td>\n <td>1010</td>\n </tr>\n <tr>\n <th>2021-08-31</th>\n <td>196</td>\n <td>1023</td>\n </tr>\n <tr>\n <th>2021-09-30</th>\n <td>223</td>\n <td>1396</td>\n </tr>\n <tr>\n <th>2021-10-31</th>\n <td>224</td>\n <td>1288</td>\n </tr>\n <tr>\n <th>2021-11-30</th>\n <td>229</td>\n <td>1098</td>\n </tr>\n <tr>\n <th>2021-12-31</th>\n <td>466</td>\n <td>1036</td>\n </tr>\n <tr>\n <th>2022-01-31</th>\n <td>753</td>\n <td>2053</td>\n </tr>\n <tr>\n <th>2022-02-28</th>\n <td>1095</td>\n <td>2911</td>\n </tr>\n <tr>\n <th>2022-03-31</th>\n <td>520</td>\n <td>1418</td>\n </tr>\n <tr>\n <th>2022-04-30</th>\n <td>395</td>\n <td>974</td>\n </tr>\n <tr>\n <th>2022-05-31</th>\n <td>363</td>\n <td>870</td>\n </tr>\n <tr>\n <th>2022-06-30</th>\n <td>233</td>\n <td>498</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||||
|
},
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"pd.pivot_table(data = df, index=pd.Grouper(key=\"createdAt\", freq=\"M\"), columns=[\"isStartMedicalRecord\"], values=\"fullName\", aggfunc=\"count\", fill_value=0)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"CPU times: total: 62.5 ms\n",
|
||||||
|
"Wall time: 58.8 ms\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": "isFinishMedicalRecord False True\ncreatedAt \n2020-11-30 178 78\n2020-12-31 242 147\n2021-01-31 537 842\n2021-02-28 402 606\n2021-03-31 1105 2262\n2021-04-30 368 1141\n2021-05-31 413 1106\n2021-06-30 433 1012\n2021-07-31 354 883\n2021-08-31 307 912\n2021-09-30 365 1254\n2021-10-31 336 1176\n2021-11-30 339 988\n2021-12-31 577 925\n2022-01-31 1040 1766\n2022-02-28 1644 2362\n2022-03-31 696 1242\n2022-04-30 520 849\n2022-05-31 462 771\n2022-06-30 298 433",
|
||||||
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>isFinishMedicalRecord</th>\n <th>False</th>\n <th>True</th>\n </tr>\n <tr>\n <th>createdAt</th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2020-11-30</th>\n <td>178</td>\n <td>78</td>\n </tr>\n <tr>\n <th>2020-12-31</th>\n <td>242</td>\n <td>147</td>\n </tr>\n <tr>\n <th>2021-01-31</th>\n <td>537</td>\n <td>842</td>\n </tr>\n <tr>\n <th>2021-02-28</th>\n <td>402</td>\n <td>606</td>\n </tr>\n <tr>\n <th>2021-03-31</th>\n <td>1105</td>\n <td>2262</td>\n </tr>\n <tr>\n <th>2021-04-30</th>\n <td>368</td>\n <td>1141</td>\n </tr>\n <tr>\n <th>2021-05-31</th>\n <td>413</td>\n <td>1106</td>\n </tr>\n <tr>\n <th>2021-06-30</th>\n <td>433</td>\n <td>1012</td>\n </tr>\n <tr>\n <th>2021-07-31</th>\n <td>354</td>\n <td>883</td>\n </tr>\n <tr>\n <th>2021-08-31</th>\n <td>307</td>\n <td>912</td>\n </tr>\n <tr>\n <th>2021-09-30</th>\n <td>365</td>\n <td>1254</td>\n </tr>\n <tr>\n <th>2021-10-31</th>\n <td>336</td>\n <td>1176</td>\n </tr>\n <tr>\n <th>2021-11-30</th>\n <td>339</td>\n <td>988</td>\n </tr>\n <tr>\n <th>2021-12-31</th>\n <td>577</td>\n <td>925</td>\n </tr>\n <tr>\n <th>2022-01-31</th>\n <td>1040</td>\n <td>1766</td>\n </tr>\n <tr>\n <th>2022-02-28</th>\n <td>1644</td>\n <td>2362</td>\n </tr>\n <tr>\n <th>2022-03-31</th>\n <td>696</td>\n <td>1242</td>\n </tr>\n <tr>\n <th>2022-04-30</th>\n <td>520</td>\n <td>849</td>\n </tr>\n <tr>\n <th>2022-05-31</th>\n <td>462</td>\n <td>771</td>\n </tr>\n <tr>\n <th>2022-06-30</th>\n <td>298</td>\n <td>433</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||||
|
},
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"pd.pivot_table(data = df, index=pd.Grouper(key=\"createdAt\", freq=\"M\"), columns=[\"isFinishMedicalRecord\"], values=\"fullName\", aggfunc=\"count\", fill_value=0)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
BIN
Table_Réf_Professionnels - Copie.xlsx
Normal file
BIN
Table_Réf_Professionnels - Copie.xlsx
Normal file
Binary file not shown.
BIN
Table_Réf_Professionnels.xlsx
Normal file
BIN
Table_Réf_Professionnels.xlsx
Normal file
Binary file not shown.
BIN
Table_Réf_Professionnels_inconsistencies.xlsx
Normal file
BIN
Table_Réf_Professionnels_inconsistencies.xlsx
Normal file
Binary file not shown.
276
datacompyProfessionals.ipynb
Normal file
276
datacompyProfessionals.ipynb
Normal file
File diff suppressed because one or more lines are too long
204
datacompyTest.ipynb
Normal file
204
datacompyTest.ipynb
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 23,
|
||||||
|
"id": "initial_id",
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": true,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-01-23T18:53:49.676160800Z",
|
||||||
|
"start_time": "2024-01-23T18:53:49.620035200Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"from io import StringIO\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import datacompy\n",
|
||||||
|
"\n",
|
||||||
|
"data1 = \"\"\"acct_id,dollar_amt,name,float_fld,date_fld\n",
|
||||||
|
"10000001234,123.45,George Maharis,14530.1555,2017-01-01\n",
|
||||||
|
"10000001235,0.45,Michael Bluth,1,2017-01-01\n",
|
||||||
|
"10000001236,1345,George Bluth,,2017-01-01\n",
|
||||||
|
"10000001237,123456,Bob Loblaw,345.12,2017-01-01\n",
|
||||||
|
"10000001237,123457,Bob Loblaw,345.12,2017-01-01\n",
|
||||||
|
"10000001239,1.05,Lucille Bluth,,2017-01-01\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"data2 = \"\"\"acct_id,dollar_amt,name,float_fld\n",
|
||||||
|
"10000001234,123.4,George Michael Bluth,14530.155\n",
|
||||||
|
"10000001235,0.45,Michael Bluth,\n",
|
||||||
|
"10000001236,1345,George Bluth,1\n",
|
||||||
|
"10000001237,123456,Robert Loblaw,345.12\n",
|
||||||
|
"10000001238,1.05,Loose Seal Bluth,111\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df1 = pd.read_csv(StringIO(data1))\n",
|
||||||
|
"df2 = pd.read_csv(StringIO(data2))\n",
|
||||||
|
"\n",
|
||||||
|
"compare = datacompy.Compare(\n",
|
||||||
|
" df1,\n",
|
||||||
|
" df2,\n",
|
||||||
|
" join_columns='acct_id', #You can also specify a list of columns\n",
|
||||||
|
" abs_tol=0, #Optional, defaults to 0\n",
|
||||||
|
" rel_tol=0, #Optional, defaults to 0\n",
|
||||||
|
" df1_name='Original', #Optional, defaults to 'df1'\n",
|
||||||
|
" df2_name='New' #Optional, defaults to 'df2'\n",
|
||||||
|
" )\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": " acct_id dollar_amt_df1 dollar_amt_df2 name_df1 \\\n0 10000001234 123.45 123.40 George Maharis \n1 10000001235 0.45 0.45 Michael Bluth \n2 10000001236 1345.00 1345.00 George Bluth \n3 10000001237 123456.00 123456.00 Bob Loblaw \n\n name_df2 float_fld_df1 float_fld_df2 \n0 George Michael Bluth 14530.1555 14530.155 \n1 Michael Bluth 1.0000 NaN \n2 George Bluth NaN 1.000 \n3 Robert Loblaw 345.1200 345.120 ",
|
||||||
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt_df1</th>\n <th>dollar_amt_df2</th>\n <th>name_df1</th>\n <th>name_df2</th>\n <th>float_fld_df1</th>\n <th>float_fld_df2</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10000001234</td>\n <td>123.45</td>\n <td>123.40</td>\n <td>George Maharis</td>\n <td>George Michael Bluth</td>\n <td>14530.1555</td>\n <td>14530.155</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10000001235</td>\n <td>0.45</td>\n <td>0.45</td>\n <td>Michael Bluth</td>\n <td>Michael Bluth</td>\n <td>1.0000</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10000001236</td>\n <td>1345.00</td>\n <td>1345.00</td>\n <td>George Bluth</td>\n <td>George Bluth</td>\n <td>NaN</td>\n <td>1.000</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10000001237</td>\n <td>123456.00</td>\n <td>123456.00</td>\n <td>Bob Loblaw</td>\n <td>Robert Loblaw</td>\n <td>345.1200</td>\n <td>345.120</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||||
|
},
|
||||||
|
"execution_count": 24,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"compare.all_mismatch(ignore_matching_cols=True)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-01-23T18:53:56.135115400Z",
|
||||||
|
"start_time": "2024-01-23T18:53:56.086349900Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "2f16ab257397f6c9",
|
||||||
|
"execution_count": 24
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": " acct_id dollar_amt name float_fld date_fld\n4 10000001237 123457.00 Bob Loblaw 345.12 2017-01-01\n5 10000001239 1.05 Lucille Bluth NaN 2017-01-01",
|
||||||
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt</th>\n <th>name</th>\n <th>float_fld</th>\n <th>date_fld</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>4</th>\n <td>10000001237</td>\n <td>123457.00</td>\n <td>Bob Loblaw</td>\n <td>345.12</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>5</th>\n <td>10000001239</td>\n <td>1.05</td>\n <td>Lucille Bluth</td>\n <td>NaN</td>\n <td>2017-01-01</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||||
|
},
|
||||||
|
"execution_count": 25,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"compare.df1_unq_rows"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-01-23T18:53:59.793951800Z",
|
||||||
|
"start_time": "2024-01-23T18:53:59.751624300Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "f38ecf439538fc9b",
|
||||||
|
"execution_count": 25
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": " acct_id dollar_amt name float_fld\n6 10000001238 1.05 Loose Seal Bluth 111.0",
|
||||||
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt</th>\n <th>name</th>\n <th>float_fld</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>6</th>\n <td>10000001238</td>\n <td>1.05</td>\n <td>Loose Seal Bluth</td>\n <td>111.0</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||||
|
},
|
||||||
|
"execution_count": 26,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"compare.df2_unq_rows"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-01-23T18:54:20.805047600Z",
|
||||||
|
"start_time": "2024-01-23T18:54:20.777818600Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "b0a4c80da0847ac0",
|
||||||
|
"execution_count": 26
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": " acct_id dollar_amt name float_fld date_fld\n0 10000001234 123.45 George Maharis 14530.1555 2017-01-01\n1 10000001235 0.45 Michael Bluth 1.0000 2017-01-01\n2 10000001236 1345.00 George Bluth NaN 2017-01-01\n3 10000001237 123456.00 Bob Loblaw 345.1200 2017-01-01\n4 10000001237 123457.00 Bob Loblaw 345.1200 2017-01-01\n5 10000001239 1.05 Lucille Bluth NaN 2017-01-01",
|
||||||
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt</th>\n <th>name</th>\n <th>float_fld</th>\n <th>date_fld</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10000001234</td>\n <td>123.45</td>\n <td>George Maharis</td>\n <td>14530.1555</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10000001235</td>\n <td>0.45</td>\n <td>Michael Bluth</td>\n <td>1.0000</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10000001236</td>\n <td>1345.00</td>\n <td>George Bluth</td>\n <td>NaN</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10000001237</td>\n <td>123456.00</td>\n <td>Bob Loblaw</td>\n <td>345.1200</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>4</th>\n <td>10000001237</td>\n <td>123457.00</td>\n <td>Bob Loblaw</td>\n <td>345.1200</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>5</th>\n <td>10000001239</td>\n <td>1.05</td>\n <td>Lucille Bluth</td>\n <td>NaN</td>\n <td>2017-01-01</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||||
|
},
|
||||||
|
"execution_count": 27,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df1"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-01-23T18:54:25.595365100Z",
|
||||||
|
"start_time": "2024-01-23T18:54:25.533925200Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "b9aa33151fa6f235",
|
||||||
|
"execution_count": 27
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": " acct_id dollar_amt name float_fld\n0 10000001234 123.40 George Michael Bluth 14530.155\n1 10000001235 0.45 Michael Bluth NaN\n2 10000001236 1345.00 George Bluth 1.000\n3 10000001237 123456.00 Robert Loblaw 345.120\n4 10000001238 1.05 Loose Seal Bluth 111.000",
|
||||||
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt</th>\n <th>name</th>\n <th>float_fld</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10000001234</td>\n <td>123.40</td>\n <td>George Michael Bluth</td>\n <td>14530.155</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10000001235</td>\n <td>0.45</td>\n <td>Michael Bluth</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10000001236</td>\n <td>1345.00</td>\n <td>George Bluth</td>\n <td>1.000</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10000001237</td>\n <td>123456.00</td>\n <td>Robert Loblaw</td>\n <td>345.120</td>\n </tr>\n <tr>\n <th>4</th>\n <td>10000001238</td>\n <td>1.05</td>\n <td>Loose Seal Bluth</td>\n <td>111.000</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||||
|
},
|
||||||
|
"execution_count": 28,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df2"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-01-23T18:54:28.672000100Z",
|
||||||
|
"start_time": "2024-01-23T18:54:28.631719300Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "aaa69421db146ed7",
|
||||||
|
"execution_count": 28
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 2
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython2",
|
||||||
|
"version": "2.7.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
164
prepareProfessionalsTable.py
Normal file
164
prepareProfessionalsTable.py
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
import argparse
|
||||||
|
import csv
|
||||||
|
import math
|
||||||
|
import sys
|
||||||
|
from os import path
|
||||||
|
from shutil import copyfileobj
|
||||||
|
from zipfile import ZipFile, is_zipfile
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import requests
|
||||||
|
from tqdm import tqdm # could use from tqdm.gui import tqdm
|
||||||
|
from tqdm.utils import CallbackIOWrapper
|
||||||
|
from urllib3.exceptions import InsecureRequestWarning
|
||||||
|
from urllib3 import disable_warnings
|
||||||
|
import questionary
|
||||||
|
|
||||||
|
|
||||||
|
def process_professionals_table(xls_file, txt_file, output_file):
|
||||||
|
# Load Excel Dataframes
|
||||||
|
xls = pd.read_excel(xls_file, sheet_name=None, dtype=str,
|
||||||
|
na_values='', keep_default_na=False)
|
||||||
|
professions = xls['F_Professions']['Professions'].tolist()
|
||||||
|
|
||||||
|
# CSV Progressbar initialisation
|
||||||
|
estimated_total_rows = sum(1 for _ in open(txt_file, 'rb')) - 1
|
||||||
|
chunk_size = 20000
|
||||||
|
|
||||||
|
# Iterating over CSV file
|
||||||
|
columns_to_clean = np.r_[0, 2, 4:7, 9, 11:16, 17:28, 30, 35:40, 41:56]
|
||||||
|
with tqdm(total=estimated_total_rows, desc=f'Writing to {path.basename(output_file)}',
|
||||||
|
leave=True, unit="Ln") as bar:
|
||||||
|
for i, df in enumerate(pd.read_csv(txt_file, sep='|', doublequote=False, quoting=csv.QUOTE_NONE,
|
||||||
|
dtype=str, na_values='', keep_default_na=False, chunksize=chunk_size)):
|
||||||
|
n_rows = df.shape[0]
|
||||||
|
df.iloc[:, columns_to_clean] = ''
|
||||||
|
df = df[df['Libellé profession'].isin(professions)]
|
||||||
|
|
||||||
|
if i == 0:
|
||||||
|
df.to_csv(output_file, sep='|', index=False, doublequote=False, quoting=csv.QUOTE_NONE,
|
||||||
|
lineterminator='\n')
|
||||||
|
else:
|
||||||
|
df.to_csv(output_file, sep='|', index=False, doublequote=False, quoting=csv.QUOTE_NONE,
|
||||||
|
lineterminator='\n', header=False, mode='a')
|
||||||
|
|
||||||
|
bar.update(n_rows)
|
||||||
|
bar.close()
|
||||||
|
|
||||||
|
# Appending Other xls tabs
|
||||||
|
df = pd.concat([df[:0], xls['F_Append_Update'], xls['F_Etrangers'],
|
||||||
|
xls['F_Fake'], xls['F_Sophrologues']], ignore_index=True)
|
||||||
|
df.iloc[:, columns_to_clean] = ''
|
||||||
|
df = df[df['Libellé profession'].isin(professions)]
|
||||||
|
df.to_csv(output_file, sep='|', index=False, doublequote=False, quoting=csv.QUOTE_NONE,
|
||||||
|
lineterminator='\n', header=False, mode='a')
|
||||||
|
|
||||||
|
|
||||||
|
def download_file(url: str, filename: str = False) -> object:
|
||||||
|
if not filename:
|
||||||
|
local_filename = path.join(".", url.split('/')[-1])
|
||||||
|
else:
|
||||||
|
local_filename = filename
|
||||||
|
disable_warnings(InsecureRequestWarning)
|
||||||
|
r = requests.get(url, stream=True, verify=False)
|
||||||
|
file_size = int(r.headers['Content-Length'])
|
||||||
|
unit_scale = 64
|
||||||
|
|
||||||
|
with open(local_filename, 'wb') as fp:
|
||||||
|
for chunk in tqdm(r.iter_content(chunk_size=unit_scale * 1024),
|
||||||
|
total=math.ceil(file_size / 1024 / unit_scale),
|
||||||
|
unit_scale=unit_scale,
|
||||||
|
unit='KB',
|
||||||
|
desc=f"Downloading to {path.basename(local_filename)}",
|
||||||
|
leave=True):
|
||||||
|
fp.write(chunk)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def extract_one_file_from_zip(zipfile, fromfile, tofile, desc=False):
|
||||||
|
if not desc:
|
||||||
|
desc = f"Extracting to {path.basename(tofile)}"
|
||||||
|
file = None
|
||||||
|
if not is_zipfile(zipfile):
|
||||||
|
return f"Can't open Zipfile (non existent or bad): {zipfile}"
|
||||||
|
zipf = ZipFile(zipfile)
|
||||||
|
for f in zipf.infolist():
|
||||||
|
if getattr(f, "filename", "").startswith(fromfile):
|
||||||
|
file = f
|
||||||
|
break
|
||||||
|
if file is None:
|
||||||
|
return f"No such file name in the Zip ({fromfile}*)..."
|
||||||
|
|
||||||
|
with zipf, tqdm(
|
||||||
|
desc=desc, unit="B", unit_scale=True, unit_divisor=1024,
|
||||||
|
total=getattr(file, "file_size", 0), leave=True,
|
||||||
|
) as pbar:
|
||||||
|
with zipf.open(file) as fi, open(tofile, "wb") as fo:
|
||||||
|
copyfileobj(CallbackIOWrapper(pbar.update, fi), fo)
|
||||||
|
pbar.close()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
defaultFileName = 'Table_Réf_Professionnels'
|
||||||
|
defaultExcelFileName = 'Table_Réf_Professionnels'
|
||||||
|
internalFileName = 'PS_LibreAcces_Personne_activite'
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Prepare Professionals Table for Import to Endoziwig.')
|
||||||
|
parser.add_argument('fileName', type=str, nargs='?', default=defaultFileName,
|
||||||
|
help=f'File name to use : default="{defaultFileName}"')
|
||||||
|
parser.add_argument('--excelFileName', '-x', type=str, nargs='?', default=defaultExcelFileName,
|
||||||
|
help=f'Excel File Containing Append Data: default="{defaultExcelFileName}" (without extension)')
|
||||||
|
parser.add_argument('--noDownload', '-ndw', action='store_true',
|
||||||
|
help='Do not Download the file (Default = Download).')
|
||||||
|
parser.add_argument('--noUnzip', '-nuz', action='store_true',
|
||||||
|
help='Do not Unzip the file (Default = Unzip).')
|
||||||
|
parser.add_argument('--noProcess', '-npr', action='store_true',
|
||||||
|
help='Do not Process the file (Default = Process).')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if len(sys.argv) == 1:
|
||||||
|
print("You're about to download and prepare Professionals Table for import to Endoziwig")
|
||||||
|
|
||||||
|
# Files Settings
|
||||||
|
if args.fileName == defaultFileName:
|
||||||
|
print("\n")
|
||||||
|
args.fileName = questionary.text("Please confirm file name (or empty to cancel):",
|
||||||
|
default=defaultFileName).ask()
|
||||||
|
if args.fileName == '':
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
BASE_DIR = path.dirname(path.abspath(__file__))
|
||||||
|
zipFileName = path.join(BASE_DIR, f'{args.fileName}.zip')
|
||||||
|
xlsFileName = path.join(BASE_DIR, f'{args.excelFileName}.xlsx')
|
||||||
|
txtFileName = path.join(BASE_DIR, f'{args.fileName}.txt')
|
||||||
|
outputFileName = path.join(BASE_DIR, f'{args.fileName}.csv')
|
||||||
|
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
if not args.noDownload:
|
||||||
|
download_file(
|
||||||
|
'https://service.annuaire.sante.fr/annuaire-sante-webservices/V300/services/extraction/PS_LibreAcces',
|
||||||
|
filename=zipFileName)
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
if not args.noUnzip:
|
||||||
|
unzipResult = extract_one_file_from_zip(zipFileName, internalFileName, txtFileName)
|
||||||
|
if unzipResult is not None:
|
||||||
|
print(unzipResult)
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
if not args.noProcess:
|
||||||
|
process_professionals_table(xlsFileName, txtFileName, outputFileName)
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try :
|
||||||
|
main()
|
||||||
|
except(Exception) as e :
|
||||||
|
print(e)
|
||||||
|
finally :
|
||||||
|
input('Finished... Press Enter to continue')
|
||||||
|
print('\n')
|
||||||
|
|
||||||
Reference in New Issue
Block a user