Initial Commit

2026-03-05 11:11:10 +00:00
commit 0ae48d63f2
16 changed files with 3247 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,4 @@
 *.rar
 *.zip
 *.txt
 *.csv
--- a/.ipynb_checkpoints/Professionals_Sort-checkpoint.ipynb
+++ b/.ipynb_checkpoints/Professionals_Sort-checkpoint.ipynb
@@ -0,0 +1,76 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import csv\n",
    "filename = 'Table_Réf_Professionnels_250430'\n",
    "df = pd.read_csv(\"H:\\\\Mon Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\\"+filename+\".csv\", sep='|', doublequote=False, quoting=csv.QUOTE_NONE,dtype=str, na_values='', keep_default_na=False)\n",
    "df.columns"
   ],
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "is_executing": true
    }
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "df.shape"
   ],
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "df_sorted = df.sort_values([\"Identifiant PP\", \"Nom d'exercice\", \"Prénom d'exercice\", 'Libellé profession', 'Libellé savoir-faire', 'Bureau cedex (coord. structure)', 'Numéro Voie (coord. structure)', 'Indice répétition voie (coord. structure)', 'Libellé type de voie (coord. structure)', 'Libellé Voie (coord. structure)' , 'Mention distribution (coord. structure)', 'Téléphone (coord. structure)'])"
   ],
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "df_sorted.to_csv(\"H:\\\\Mon Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\\"+filename+\"-sorted.csv\", sep='|', index=False, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')"
   ],
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "execution_count": null
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/.ipynb_checkpoints/Untitled-checkpoint.ipynb
+++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,6 @@
 {
 "cells": [],
 "metadata": {},
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/Professionals.ipynb
+++ b/Professionals.ipynb
@@ -0,0 +1,176 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": [],
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "Index(['Type d'identifiant PP', 'Identifiant PP',\n       'Identification nationale PP', 'Code civilité d'exercice',\n       'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n       'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n       'Libellé profession', 'Code catégorie professionnelle',\n       'Libellé catégorie professionnelle', 'Code type savoir-faire',\n       'Libellé type savoir-faire', 'Code savoir-faire',\n       'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n       'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n       'Numéro FINESS établissement juridique',\n       'Identifiant technique de la structure', 'Raison sociale site',\n       'Enseigne commerciale site',\n       'Complément destinataire (coord. structure)',\n       'Complément point géographique (coord. structure)',\n       'Numéro Voie (coord. structure)',\n       'Indice répétition voie (coord. structure)',\n       'Code type de voie (coord. structure)',\n       'Libellé type de voie (coord. structure)',\n       'Libellé Voie (coord. structure)',\n       'Mention distribution (coord. structure)',\n       'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n       'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n       'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n       'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n       'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n       'Code Département (structure)', 'Libellé Département (structure)',\n       'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n       'Code secteur d'activité', 'Libellé secteur d'activité',\n       'Code section tableau pharmaciens',\n       'Libellé section tableau pharmaciens', 'Unnamed: 52'],\n      dtype='object')"
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import csv\n",
    "df = pd.read_csv(\"C:\\_temp\\Professionnels\\Table_Réf_Professionnels_220615.csv\", sep='|', doublequote=False, quoting=csv.QUOTE_NONE,\n",
    "                                           dtype=str, na_values='', keep_default_na=False)\n",
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": "(864328, 53)"
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "(843643, 53)"
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop_duplicates().shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": "(812168, 53)"
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop_duplicates([\"Nom d'exercice\", \"Prénom d'exercice\", 'Libellé profession', 'Libellé savoir-faire', 'Bureau cedex (coord. structure)']).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "(814972, 53)"
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop_duplicates([\"Nom d'exercice\", \"Prénom d'exercice\", 'Libellé profession', 'Libellé savoir-faire', 'Bureau cedex (coord. structure)', \"Identifiant PP\"]).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "(839643, 53)"
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop_duplicates([\"Nom d'exercice\", \"Prénom d'exercice\", 'Libellé profession', 'Libellé savoir-faire', 'Bureau cedex (coord. structure)', \"Identifiant PP\", 'Numéro Voie (coord. structure)', 'Indice répétition voie (coord. structure)', 'Libellé type de voie (coord. structure)', 'Libellé Voie (coord. structure)' , 'Mention distribution (coord. structure)']).shape"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/Professionals_Activities_Inconsistencies.ipynb
+++ b/Professionals_Activities_Inconsistencies.ipynb
--- a/Professionals_Multiple_Names2.ipynb
+++ b/Professionals_Multiple_Names2.ipynb
@@ -0,0 +1,465 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "metadata": {
    "tags": [],
    "ExecuteTime": {
     "end_time": "2025-08-12T19:54:05.652708Z",
     "start_time": "2025-08-12T19:53:30.037989Z"
    }
   },
   "source": [
    "%%time\n",
    "import pandas as pd\n",
    "import csv\n",
    "df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804.txt\", sep='|',\n",
    "                 doublequote=False, quoting=csv.QUOTE_NONE, dtype=str, na_values='', keep_default_na=False)\n",
    "df.index.name = 'index'\n",
    "df.columns"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 35 s\n",
      "Wall time: 35.6 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index(['Type d'identifiant PP', 'Identifiant PP',\n",
       "       'Identification nationale PP', 'Code civilité d'exercice',\n",
       "       'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n",
       "       'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n",
       "       'Libellé profession', 'Code catégorie professionnelle',\n",
       "       'Libellé catégorie professionnelle', 'Code type savoir-faire',\n",
       "       'Libellé type savoir-faire', 'Code savoir-faire',\n",
       "       'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n",
       "       'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n",
       "       'Numéro FINESS établissement juridique',\n",
       "       'Identifiant technique de la structure', 'Raison sociale site',\n",
       "       'Enseigne commerciale site',\n",
       "       'Complément destinataire (coord. structure)',\n",
       "       'Complément point géographique (coord. structure)',\n",
       "       'Numéro Voie (coord. structure)',\n",
       "       'Indice répétition voie (coord. structure)',\n",
       "       'Code type de voie (coord. structure)',\n",
       "       'Libellé type de voie (coord. structure)',\n",
       "       'Libellé Voie (coord. structure)',\n",
       "       'Mention distribution (coord. structure)',\n",
       "       'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n",
       "       'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n",
       "       'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n",
       "       'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n",
       "       'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n",
       "       'Code Département (structure)', 'Libellé Département (structure)',\n",
       "       'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n",
       "       'Code secteur d'activité', 'Libellé secteur d'activité',\n",
       "       'Code section tableau pharmaciens',\n",
       "       'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n",
       "       'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 19
  },
  {
   "cell_type": "code",
   "metadata": {
    "tags": [],
    "ExecuteTime": {
     "end_time": "2025-08-12T19:54:12.829107Z",
     "start_time": "2025-08-12T19:54:05.751406Z"
    }
   },
   "source": [
    "%%time\n",
    "df2 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"]+\" \"+df[\"Prénom d'exercice\"]) \\\n",
    "        .groupby('Identifiant PP')[['Nom_Prénom']] \\\n",
    "        .transform('nunique').rename(columns={'Nom_Prénom' : 'Count'}).query('Count > 1') \\\n",
    "        .join(df)[['Identifiant PP','Count',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
    "        .sort_values(['Identifiant PP', 'index'])\n",
    "\n",
    "df2"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 6.81 s\n",
      "Wall time: 7.06 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "        Identifiant PP  Count Nom d'exercice Prénom d'exercice\n",
       "index                                                         \n",
       "1350393    10000034180      2  DUWAT-GEORGES         GHISLAINE\n",
       "1350394    10000034180      2        GEORGES         GHISLAINE\n",
       "259        10000040062      2          MEYER           Nicolas\n",
       "260        10000040062      2          MEYER           Nicolas\n",
       "809702     10000040062      2          MEYER           NICOLAS\n",
       "...                ...    ...            ...               ...\n",
       "2158383    10111077417      2        D'ELLOY         FRANCETTE\n",
       "537896     10111105358      2           HOMO             Maddy\n",
       "1889090    10111105358      2           Homo             Maddy\n",
       "537977     10111110721      2      ROCHEPEAU            Nadège\n",
       "2158797    10111110721      2        BARREAU            Nadège\n",
       "\n",
       "[9059 rows x 4 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Identifiant PP</th>\n",
       "      <th>Count</th>\n",
       "      <th>Nom d'exercice</th>\n",
       "      <th>Prénom d'exercice</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1350393</th>\n",
       "      <td>10000034180</td>\n",
       "      <td>2</td>\n",
       "      <td>DUWAT-GEORGES</td>\n",
       "      <td>GHISLAINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1350394</th>\n",
       "      <td>10000034180</td>\n",
       "      <td>2</td>\n",
       "      <td>GEORGES</td>\n",
       "      <td>GHISLAINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>259</th>\n",
       "      <td>10000040062</td>\n",
       "      <td>2</td>\n",
       "      <td>MEYER</td>\n",
       "      <td>Nicolas</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>260</th>\n",
       "      <td>10000040062</td>\n",
       "      <td>2</td>\n",
       "      <td>MEYER</td>\n",
       "      <td>Nicolas</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>809702</th>\n",
       "      <td>10000040062</td>\n",
       "      <td>2</td>\n",
       "      <td>MEYER</td>\n",
       "      <td>NICOLAS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2158383</th>\n",
       "      <td>10111077417</td>\n",
       "      <td>2</td>\n",
       "      <td>D'ELLOY</td>\n",
       "      <td>FRANCETTE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>537896</th>\n",
       "      <td>10111105358</td>\n",
       "      <td>2</td>\n",
       "      <td>HOMO</td>\n",
       "      <td>Maddy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1889090</th>\n",
       "      <td>10111105358</td>\n",
       "      <td>2</td>\n",
       "      <td>Homo</td>\n",
       "      <td>Maddy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>537977</th>\n",
       "      <td>10111110721</td>\n",
       "      <td>2</td>\n",
       "      <td>ROCHEPEAU</td>\n",
       "      <td>Nadège</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2158797</th>\n",
       "      <td>10111110721</td>\n",
       "      <td>2</td>\n",
       "      <td>BARREAU</td>\n",
       "      <td>Nadège</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9059 rows × 4 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 20
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-08-12T19:54:13.114103Z",
     "start_time": "2025-08-12T19:54:13.063080Z"
    }
   },
   "cell_type": "code",
   "source": [
    "df2.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_2-1.csv\",\n",
    "           sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')"
   ],
   "outputs": [],
   "execution_count": 21
  },
  {
   "cell_type": "code",
   "metadata": {
    "tags": [],
    "ExecuteTime": {
     "end_time": "2025-08-12T19:54:20.671679Z",
     "start_time": "2025-08-12T19:54:13.377047Z"
    }
   },
   "source": [
    "%%time\n",
    "df3 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"].str.lower()+\" \"+df[\"Prénom d'exercice\"].str.lower()) \\\n",
    "        .groupby('Identifiant PP')[['Nom_Prénom']] \\\n",
    "        .transform('nunique').rename(columns={'Nom_Prénom' : 'Count'}).query('Count > 1') \\\n",
    "        .join(df)[['Identifiant PP','Count',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
    "        .sort_values(['Identifiant PP', 'index'])\n",
    "df3"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 6.97 s\n",
      "Wall time: 7.28 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "        Identifiant PP  Count Nom d'exercice Prénom d'exercice\n",
       "index                                                         \n",
       "1350393    10000034180      2  DUWAT-GEORGES         GHISLAINE\n",
       "1350394    10000034180      2        GEORGES         GHISLAINE\n",
       "1350470    10000046051      2         STUDER             AGNES\n",
       "1620048    10000046051      2         JURION             AGNES\n",
       "269964     10000101518      2       BARREYRE          SANDRINE\n",
       "...                ...    ...            ...               ...\n",
       "1617156    10110987236      2         ROGIER          MATHILDE\n",
       "807882     10111077417      2        DOUVIER         FRANCETTE\n",
       "2158383    10111077417      2        D'ELLOY         FRANCETTE\n",
       "537977     10111110721      2      ROCHEPEAU            Nadège\n",
       "2158797    10111110721      2        BARREAU            Nadège\n",
       "\n",
       "[5395 rows x 4 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Identifiant PP</th>\n",
       "      <th>Count</th>\n",
       "      <th>Nom d'exercice</th>\n",
       "      <th>Prénom d'exercice</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1350393</th>\n",
       "      <td>10000034180</td>\n",
       "      <td>2</td>\n",
       "      <td>DUWAT-GEORGES</td>\n",
       "      <td>GHISLAINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1350394</th>\n",
       "      <td>10000034180</td>\n",
       "      <td>2</td>\n",
       "      <td>GEORGES</td>\n",
       "      <td>GHISLAINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1350470</th>\n",
       "      <td>10000046051</td>\n",
       "      <td>2</td>\n",
       "      <td>STUDER</td>\n",
       "      <td>AGNES</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1620048</th>\n",
       "      <td>10000046051</td>\n",
       "      <td>2</td>\n",
       "      <td>JURION</td>\n",
       "      <td>AGNES</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>269964</th>\n",
       "      <td>10000101518</td>\n",
       "      <td>2</td>\n",
       "      <td>BARREYRE</td>\n",
       "      <td>SANDRINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1617156</th>\n",
       "      <td>10110987236</td>\n",
       "      <td>2</td>\n",
       "      <td>ROGIER</td>\n",
       "      <td>MATHILDE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>807882</th>\n",
       "      <td>10111077417</td>\n",
       "      <td>2</td>\n",
       "      <td>DOUVIER</td>\n",
       "      <td>FRANCETTE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2158383</th>\n",
       "      <td>10111077417</td>\n",
       "      <td>2</td>\n",
       "      <td>D'ELLOY</td>\n",
       "      <td>FRANCETTE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>537977</th>\n",
       "      <td>10111110721</td>\n",
       "      <td>2</td>\n",
       "      <td>ROCHEPEAU</td>\n",
       "      <td>Nadège</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2158797</th>\n",
       "      <td>10111110721</td>\n",
       "      <td>2</td>\n",
       "      <td>BARREAU</td>\n",
       "      <td>Nadège</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5395 rows × 4 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 22
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-08-12T19:54:21.102182Z",
     "start_time": "2025-08-12T19:54:21.072806Z"
    }
   },
   "cell_type": "code",
   "source": [
    "df3.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_2-2.csv\",\n",
    "           sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')"
   ],
   "outputs": [],
   "execution_count": 23
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/Professionals_Multiple_Names3.ipynb
+++ b/Professionals_Multiple_Names3.ipynb
@@ -0,0 +1,439 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "ExecuteTime": {
     "end_time": "2025-08-11T22:50:03.135959Z",
     "start_time": "2025-08-11T22:49:26.824618Z"
    }
   },
   "source": [
    "%%time\n",
    "import pandas as pd\n",
    "import csv\n",
    "df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804.txt\",\n",
    "                 sep='|', doublequote=False, quoting=csv.QUOTE_NONE, dtype=str, na_values='', keep_default_na=False)\n",
    "df.index.name = 'index'\n",
    "df.columns"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 35.9 s\n",
      "Wall time: 36.3 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index(['Type d'identifiant PP', 'Identifiant PP',\n",
       "       'Identification nationale PP', 'Code civilité d'exercice',\n",
       "       'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n",
       "       'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n",
       "       'Libellé profession', 'Code catégorie professionnelle',\n",
       "       'Libellé catégorie professionnelle', 'Code type savoir-faire',\n",
       "       'Libellé type savoir-faire', 'Code savoir-faire',\n",
       "       'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n",
       "       'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n",
       "       'Numéro FINESS établissement juridique',\n",
       "       'Identifiant technique de la structure', 'Raison sociale site',\n",
       "       'Enseigne commerciale site',\n",
       "       'Complément destinataire (coord. structure)',\n",
       "       'Complément point géographique (coord. structure)',\n",
       "       'Numéro Voie (coord. structure)',\n",
       "       'Indice répétition voie (coord. structure)',\n",
       "       'Code type de voie (coord. structure)',\n",
       "       'Libellé type de voie (coord. structure)',\n",
       "       'Libellé Voie (coord. structure)',\n",
       "       'Mention distribution (coord. structure)',\n",
       "       'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n",
       "       'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n",
       "       'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n",
       "       'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n",
       "       'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n",
       "       'Code Département (structure)', 'Libellé Département (structure)',\n",
       "       'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n",
       "       'Code secteur d'activité', 'Libellé secteur d'activité',\n",
       "       'Code section tableau pharmaciens',\n",
       "       'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n",
       "       'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 38
  },
  {
   "cell_type": "code",
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "ExecuteTime": {
     "end_time": "2025-08-11T22:53:55.986443Z",
     "start_time": "2025-08-11T22:50:03.157898Z"
    }
   },
   "source": [
    "%%time\n",
    "df2 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"]+\" \"+df[\"Prénom d'exercice\"]) \\\n",
    "        [['Identifiant PP','Nom_Prénom',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
    "        .groupby('Identifiant PP') \\\n",
    "        .filter(lambda f: f['Nom_Prénom'].nunique()>1) \\\n",
    "        .sort_values(['Identifiant PP','index']) \\\n",
    "        [['Identifiant PP',\"Nom d'exercice\",\"Prénom d'exercice\"]]\n",
    "df2"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 3min 44s\n",
      "Wall time: 3min 52s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "        Identifiant PP Nom d'exercice Prénom d'exercice\n",
       "index                                                  \n",
       "1350393    10000034180  DUWAT-GEORGES         GHISLAINE\n",
       "1350394    10000034180        GEORGES         GHISLAINE\n",
       "259        10000040062          MEYER           Nicolas\n",
       "260        10000040062          MEYER           Nicolas\n",
       "809702     10000040062          MEYER           NICOLAS\n",
       "...                ...            ...               ...\n",
       "2158383    10111077417        D'ELLOY         FRANCETTE\n",
       "537896     10111105358           HOMO             Maddy\n",
       "1889090    10111105358           Homo             Maddy\n",
       "537977     10111110721      ROCHEPEAU            Nadège\n",
       "2158797    10111110721        BARREAU            Nadège\n",
       "\n",
       "[9059 rows x 3 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Identifiant PP</th>\n",
       "      <th>Nom d'exercice</th>\n",
       "      <th>Prénom d'exercice</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1350393</th>\n",
       "      <td>10000034180</td>\n",
       "      <td>DUWAT-GEORGES</td>\n",
       "      <td>GHISLAINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1350394</th>\n",
       "      <td>10000034180</td>\n",
       "      <td>GEORGES</td>\n",
       "      <td>GHISLAINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>259</th>\n",
       "      <td>10000040062</td>\n",
       "      <td>MEYER</td>\n",
       "      <td>Nicolas</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>260</th>\n",
       "      <td>10000040062</td>\n",
       "      <td>MEYER</td>\n",
       "      <td>Nicolas</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>809702</th>\n",
       "      <td>10000040062</td>\n",
       "      <td>MEYER</td>\n",
       "      <td>NICOLAS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2158383</th>\n",
       "      <td>10111077417</td>\n",
       "      <td>D'ELLOY</td>\n",
       "      <td>FRANCETTE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>537896</th>\n",
       "      <td>10111105358</td>\n",
       "      <td>HOMO</td>\n",
       "      <td>Maddy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1889090</th>\n",
       "      <td>10111105358</td>\n",
       "      <td>Homo</td>\n",
       "      <td>Maddy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>537977</th>\n",
       "      <td>10111110721</td>\n",
       "      <td>ROCHEPEAU</td>\n",
       "      <td>Nadège</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2158797</th>\n",
       "      <td>10111110721</td>\n",
       "      <td>BARREAU</td>\n",
       "      <td>Nadège</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9059 rows × 3 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 39
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-08-11T22:53:56.801020Z",
     "start_time": "2025-08-11T22:53:56.699295Z"
    }
   },
   "cell_type": "code",
   "source": "df2.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_3-1.csv\", sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')",
   "outputs": [],
   "execution_count": 40
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-08-11T22:57:28.643070Z",
     "start_time": "2025-08-11T22:53:56.870889Z"
    }
   },
   "cell_type": "code",
   "source": [
    "%%time\n",
    "df3 = df.assign(Nom_Prénom = df[\"Nom d'exercice\"].str.lower()+\" \"+df[\"Prénom d'exercice\"].str.lower()) \\\n",
    "        [['Identifiant PP','Nom_Prénom',\"Nom d'exercice\",\"Prénom d'exercice\"]] \\\n",
    "        .groupby('Identifiant PP') \\\n",
    "        .filter(lambda f: f['Nom_Prénom'].nunique()>1) \\\n",
    "        .sort_values(['Identifiant PP','index']) \\\n",
    "        [['Identifiant PP',\"Nom d'exercice\",\"Prénom d'exercice\"]]\n",
    "df3"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 3min 26s\n",
      "Wall time: 3min 31s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "        Identifiant PP Nom d'exercice Prénom d'exercice\n",
       "index                                                  \n",
       "1350393    10000034180  DUWAT-GEORGES         GHISLAINE\n",
       "1350394    10000034180        GEORGES         GHISLAINE\n",
       "1350470    10000046051         STUDER             AGNES\n",
       "1620048    10000046051         JURION             AGNES\n",
       "269964     10000101518       BARREYRE          SANDRINE\n",
       "...                ...            ...               ...\n",
       "1617156    10110987236         ROGIER          MATHILDE\n",
       "807882     10111077417        DOUVIER         FRANCETTE\n",
       "2158383    10111077417        D'ELLOY         FRANCETTE\n",
       "537977     10111110721      ROCHEPEAU            Nadège\n",
       "2158797    10111110721        BARREAU            Nadège\n",
       "\n",
       "[5395 rows x 3 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Identifiant PP</th>\n",
       "      <th>Nom d'exercice</th>\n",
       "      <th>Prénom d'exercice</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1350393</th>\n",
       "      <td>10000034180</td>\n",
       "      <td>DUWAT-GEORGES</td>\n",
       "      <td>GHISLAINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1350394</th>\n",
       "      <td>10000034180</td>\n",
       "      <td>GEORGES</td>\n",
       "      <td>GHISLAINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1350470</th>\n",
       "      <td>10000046051</td>\n",
       "      <td>STUDER</td>\n",
       "      <td>AGNES</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1620048</th>\n",
       "      <td>10000046051</td>\n",
       "      <td>JURION</td>\n",
       "      <td>AGNES</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>269964</th>\n",
       "      <td>10000101518</td>\n",
       "      <td>BARREYRE</td>\n",
       "      <td>SANDRINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1617156</th>\n",
       "      <td>10110987236</td>\n",
       "      <td>ROGIER</td>\n",
       "      <td>MATHILDE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>807882</th>\n",
       "      <td>10111077417</td>\n",
       "      <td>DOUVIER</td>\n",
       "      <td>FRANCETTE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2158383</th>\n",
       "      <td>10111077417</td>\n",
       "      <td>D'ELLOY</td>\n",
       "      <td>FRANCETTE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>537977</th>\n",
       "      <td>10111110721</td>\n",
       "      <td>ROCHEPEAU</td>\n",
       "      <td>Nadège</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2158797</th>\n",
       "      <td>10111110721</td>\n",
       "      <td>BARREAU</td>\n",
       "      <td>Nadège</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5395 rows × 3 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 41
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-08-11T22:57:29.038232Z",
     "start_time": "2025-08-11T22:57:29.014447Z"
    }
   },
   "cell_type": "code",
   "source": "df3.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_250804_Multiple_Names_3-2.csv\", sep='|', index=True, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')",
   "outputs": [],
   "execution_count": 42
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/Professionals_Sort.ipynb
+++ b/Professionals_Sort.ipynb
@@ -0,0 +1,140 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import csv\n",
    "filename = 'Table_Réf_Professionnels_250815'\n",
    "df = pd.read_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\\"+filename+\".csv\", sep='|', doublequote=False, quoting=csv.QUOTE_NONE,dtype=str, na_values='', keep_default_na=False)\n",
    "df.columns"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-08-16T01:35:13.352289Z",
     "start_time": "2025-08-16T01:34:58.550068Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Type d'identifiant PP', 'Identifiant PP',\n",
       "       'Identification nationale PP', 'Code civilité d'exercice',\n",
       "       'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n",
       "       'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n",
       "       'Libellé profession', 'Code catégorie professionnelle',\n",
       "       'Libellé catégorie professionnelle', 'Code type savoir-faire',\n",
       "       'Libellé type savoir-faire', 'Code savoir-faire',\n",
       "       'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n",
       "       'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n",
       "       'Numéro FINESS établissement juridique',\n",
       "       'Identifiant technique de la structure', 'Raison sociale site',\n",
       "       'Enseigne commerciale site',\n",
       "       'Complément destinataire (coord. structure)',\n",
       "       'Complément point géographique (coord. structure)',\n",
       "       'Numéro Voie (coord. structure)',\n",
       "       'Indice répétition voie (coord. structure)',\n",
       "       'Code type de voie (coord. structure)',\n",
       "       'Libellé type de voie (coord. structure)',\n",
       "       'Libellé Voie (coord. structure)',\n",
       "       'Mention distribution (coord. structure)',\n",
       "       'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n",
       "       'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n",
       "       'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n",
       "       'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n",
       "       'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n",
       "       'Code Département (structure)', 'Libellé Département (structure)',\n",
       "       'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n",
       "       'Code secteur d'activité', 'Libellé secteur d'activité',\n",
       "       'Code section tableau pharmaciens',\n",
       "       'Libellé section tableau pharmaciens', 'Code rôle', 'Libellé rôle',\n",
       "       'Code genre activité', 'Libellé genre activité', 'Unnamed: 56'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 1
  },
  {
   "cell_type": "code",
   "source": [
    "df.shape"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-08-16T01:35:13.450995Z",
     "start_time": "2025-08-16T01:35:13.442103Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(994582, 57)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 2
  },
  {
   "cell_type": "code",
   "source": [
    "df_sorted = df.sort_values([\"Identifiant PP\", \"Nom d'exercice\", \"Prénom d'exercice\", 'Libellé profession', 'Libellé savoir-faire', 'Bureau cedex (coord. structure)', 'Numéro Voie (coord. structure)', 'Indice répétition voie (coord. structure)', 'Libellé type de voie (coord. structure)', 'Libellé Voie (coord. structure)' , 'Mention distribution (coord. structure)', 'Téléphone (coord. structure)'])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-08-16T01:35:20.312959Z",
     "start_time": "2025-08-16T01:35:13.656674Z"
    }
   },
   "outputs": [],
   "execution_count": 3
  },
  {
   "cell_type": "code",
   "source": "df_sorted.to_csv(\"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\\"+filename+\"-sorted.csv\", sep='|', index=False, doublequote=False, quoting=csv.QUOTE_NONE, lineterminator='\\n')",
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-08-16T01:35:34.463854Z",
     "start_time": "2025-08-16T01:35:20.454076Z"
    }
   },
   "outputs": [],
   "execution_count": 4
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/Resendo.ipynb
+++ b/Resendo.ipynb
@@ -0,0 +1,210 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "df = pd.read_excel(\"G:\\Mon Drive\\Ziwig-Health\\Data\\Extract_Prof_Patient_List.xlsx\", header=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "(7728, 9)"
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "timedDf = df.set_index('createdAt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "timedDf['count']=True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "timedDf['NonStarted']=1-timedDf['isStartMedicalRecord'].astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "timedDf['NonFinished']=1-timedDf['isFinishMedicalRecord'].astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "timedDf=timedDf.loc[:, ['isStartMedicalRecord','isFinishMedicalRecord','count','NonStarted','NonFinished']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "timedDfMonthly = timedDf.resample('M')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "            isStartMedicalRecord  isFinishMedicalRecord  count  NonStarted  \\\ncreatedAt                                                                    \n2020-11-30                     2                      2      2           0   \n2020-12-31                     3                      3      3           0   \n2021-01-31                    21                     21     21           0   \n2021-02-28                    10                     10     10           0   \n2021-03-31                   348                    323    404          56   \n2021-04-30                   602                    559    712         110   \n2021-05-31                   511                    465    622         111   \n2021-06-30                   406                    372    503          97   \n2021-07-31                   426                    398    498          72   \n2021-08-31                   429                    393    528          99   \n2021-09-30                   561                    517    677         116   \n2021-10-31                   580                    539    696         116   \n2021-11-30                   453                    416    557         104   \n2021-12-31                   480                    447    608         128   \n2022-01-31                   608                    562    786         178   \n2022-02-28                   544                    502    704         160   \n2022-03-31                   286                    255    397         111   \n\n            NonFinished  \ncreatedAt                \n2020-11-30            0  \n2020-12-31            0  \n2021-01-31            0  \n2021-02-28            0  \n2021-03-31           81  \n2021-04-30          153  \n2021-05-31          157  \n2021-06-30          131  \n2021-07-31          100  \n2021-08-31          135  \n2021-09-30          160  \n2021-10-31          157  \n2021-11-30          141  \n2021-12-31          161  \n2022-01-31          224  \n2022-02-28          202  \n2022-03-31          142  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>isStartMedicalRecord</th>\n      <th>isFinishMedicalRecord</th>\n      <th>count</th>\n      <th>NonStarted</th>\n      <th>NonFinished</th>\n    </tr>\n    <tr>\n      <th>createdAt</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2020-11-30</th>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2020-12-31</th>\n      <td>3</td>\n      <td>3</td>\n      <td>3</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2021-01-31</th>\n      <td>21</td>\n      <td>21</td>\n      <td>21</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2021-02-28</th>\n      <td>10</td>\n      <td>10</td>\n      <td>10</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2021-03-31</th>\n      <td>348</td>\n      <td>323</td>\n      <td>404</td>\n      <td>56</td>\n      <td>81</td>\n    </tr>\n    <tr>\n      <th>2021-04-30</th>\n      <td>602</td>\n      <td>559</td>\n      <td>712</td>\n      <td>110</td>\n      <td>153</td>\n    </tr>\n    <tr>\n      <th>2021-05-31</th>\n      <td>511</td>\n      <td>465</td>\n      <td>622</td>\n      <td>111</td>\n      <td>157</td>\n    </tr>\n    <tr>\n      <th>2021-06-30</th>\n      <td>406</td>\n      <td>372</td>\n      <td>503</td>\n      <td>97</td>\n      <td>131</td>\n    </tr>\n    <tr>\n      <th>2021-07-31</th>\n      <td>426</td>\n      <td>398</td>\n      <td>498</td>\n      <td>72</td>\n      <td>100</td>\n    </tr>\n    <tr>\n      <th>2021-08-31</th>\n      <td>429</td>\n      <td>393</td>\n      <td>528</td>\n      <td>99</td>\n      <td>135</td>\n    </tr>\n    <tr>\n      <th>2021-09-30</th>\n      <td>561</td>\n      <td>517</td>\n      <td>677</td>\n      <td>116</td>\n      <td>160</td>\n    </tr>\n    <tr>\n      <th>2021-10-31</th>\n      <td>580</td>\n      <td>539</td>\n      <td>696</td>\n      <td>116</td>\n      <td>157</td>\n    </tr>\n    <tr>\n      <th>2021-11-30</th>\n      <td>453</td>\n      <td>416</td>\n      <td>557</td>\n      <td>104</td>\n      <td>141</td>\n    </tr>\n    <tr>\n      <th>2021-12-31</th>\n      <td>480</td>\n      <td>447</td>\n      <td>608</td>\n      <td>128</td>\n      <td>161</td>\n    </tr>\n    <tr>\n      <th>2022-01-31</th>\n      <td>608</td>\n      <td>562</td>\n      <td>786</td>\n      <td>178</td>\n      <td>224</td>\n    </tr>\n    <tr>\n      <th>2022-02-28</th>\n      <td>544</td>\n      <td>502</td>\n      <td>704</td>\n      <td>160</td>\n      <td>202</td>\n    </tr>\n    <tr>\n      <th>2022-03-31</th>\n      <td>286</td>\n      <td>255</td>\n      <td>397</td>\n      <td>111</td>\n      <td>142</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "timedDfMonthly.sum()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/Resendo2.ipynb
+++ b/Resendo2.ipynb
@@ -0,0 +1,163 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 5.89 s\n",
      "Wall time: 6.14 s\n"
     ]
    },
    {
     "data": {
      "text/plain": "(31371, 9)"
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "import pandas as pd\n",
    "df = pd.read_excel(\"G:\\Mon Drive\\Ziwig-Health\\Data\\Extract_Prof_Patient_List_new.xlsx\", header=2)\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 46.9 ms\n",
      "Wall time: 49.9 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": "isStartMedicalRecord  False  True      \nisFinishMedicalRecord False False  True\ncreatedAt                              \n2020-11-30              169     9    78\n2020-12-31              226    16   147\n2021-01-31              149   388   842\n2021-02-28              238   164   606\n2021-03-31              652   453  2262\n2021-04-30              250   118  1141\n2021-05-31              269   144  1106\n2021-06-30              283   150  1012\n2021-07-31              227   127   883\n2021-08-31              196   111   912\n2021-09-30              223   142  1254\n2021-10-31              224   112  1176\n2021-11-30              229   110   988\n2021-12-31              466   111   925\n2022-01-31              753   287  1766\n2022-02-28             1095   549  2362\n2022-03-31              520   176  1242\n2022-04-30              395   125   849\n2022-05-31              363    99   771\n2022-06-30              233    65   433",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead tr th {\n        text-align: left;\n    }\n\n    .dataframe thead tr:last-of-type th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr>\n      <th>isStartMedicalRecord</th>\n      <th>False</th>\n      <th colspan=\"2\" halign=\"left\">True</th>\n    </tr>\n    <tr>\n      <th>isFinishMedicalRecord</th>\n      <th>False</th>\n      <th>False</th>\n      <th>True</th>\n    </tr>\n    <tr>\n      <th>createdAt</th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2020-11-30</th>\n      <td>169</td>\n      <td>9</td>\n      <td>78</td>\n    </tr>\n    <tr>\n      <th>2020-12-31</th>\n      <td>226</td>\n      <td>16</td>\n      <td>147</td>\n    </tr>\n    <tr>\n      <th>2021-01-31</th>\n      <td>149</td>\n      <td>388</td>\n      <td>842</td>\n    </tr>\n    <tr>\n      <th>2021-02-28</th>\n      <td>238</td>\n      <td>164</td>\n      <td>606</td>\n    </tr>\n    <tr>\n      <th>2021-03-31</th>\n      <td>652</td>\n      <td>453</td>\n      <td>2262</td>\n    </tr>\n    <tr>\n      <th>2021-04-30</th>\n      <td>250</td>\n      <td>118</td>\n      <td>1141</td>\n    </tr>\n    <tr>\n      <th>2021-05-31</th>\n      <td>269</td>\n      <td>144</td>\n      <td>1106</td>\n    </tr>\n    <tr>\n      <th>2021-06-30</th>\n      <td>283</td>\n      <td>150</td>\n      <td>1012</td>\n    </tr>\n    <tr>\n      <th>2021-07-31</th>\n      <td>227</td>\n      <td>127</td>\n      <td>883</td>\n    </tr>\n    <tr>\n      <th>2021-08-31</th>\n      <td>196</td>\n      <td>111</td>\n      <td>912</td>\n    </tr>\n    <tr>\n      <th>2021-09-30</th>\n      <td>223</td>\n      <td>142</td>\n      <td>1254</td>\n    </tr>\n    <tr>\n      <th>2021-10-31</th>\n      <td>224</td>\n      <td>112</td>\n      <td>1176</td>\n    </tr>\n    <tr>\n      <th>2021-11-30</th>\n      <td>229</td>\n      <td>110</td>\n      <td>988</td>\n    </tr>\n    <tr>\n      <th>2021-12-31</th>\n      <td>466</td>\n      <td>111</td>\n      <td>925</td>\n    </tr>\n    <tr>\n      <th>2022-01-31</th>\n      <td>753</td>\n      <td>287</td>\n      <td>1766</td>\n    </tr>\n    <tr>\n      <th>2022-02-28</th>\n      <td>1095</td>\n      <td>549</td>\n      <td>2362</td>\n    </tr>\n    <tr>\n      <th>2022-03-31</th>\n      <td>520</td>\n      <td>176</td>\n      <td>1242</td>\n    </tr>\n    <tr>\n      <th>2022-04-30</th>\n      <td>395</td>\n      <td>125</td>\n      <td>849</td>\n    </tr>\n    <tr>\n      <th>2022-05-31</th>\n      <td>363</td>\n      <td>99</td>\n      <td>771</td>\n    </tr>\n    <tr>\n      <th>2022-06-30</th>\n      <td>233</td>\n      <td>65</td>\n      <td>433</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "pd.pivot_table(data = df, index=pd.Grouper(key=\"createdAt\", freq=\"M\"), columns=[\"isStartMedicalRecord\",\"isFinishMedicalRecord\"], values=\"fullName\", aggfunc=\"count\", fill_value= 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 46.9 ms\n",
      "Wall time: 46.9 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": "isStartMedicalRecord  False  True\ncreatedAt                        \n2020-11-30              169    87\n2020-12-31              226   163\n2021-01-31              149  1230\n2021-02-28              238   770\n2021-03-31              652  2715\n2021-04-30              250  1259\n2021-05-31              269  1250\n2021-06-30              283  1162\n2021-07-31              227  1010\n2021-08-31              196  1023\n2021-09-30              223  1396\n2021-10-31              224  1288\n2021-11-30              229  1098\n2021-12-31              466  1036\n2022-01-31              753  2053\n2022-02-28             1095  2911\n2022-03-31              520  1418\n2022-04-30              395   974\n2022-05-31              363   870\n2022-06-30              233   498",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th>isStartMedicalRecord</th>\n      <th>False</th>\n      <th>True</th>\n    </tr>\n    <tr>\n      <th>createdAt</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2020-11-30</th>\n      <td>169</td>\n      <td>87</td>\n    </tr>\n    <tr>\n      <th>2020-12-31</th>\n      <td>226</td>\n      <td>163</td>\n    </tr>\n    <tr>\n      <th>2021-01-31</th>\n      <td>149</td>\n      <td>1230</td>\n    </tr>\n    <tr>\n      <th>2021-02-28</th>\n      <td>238</td>\n      <td>770</td>\n    </tr>\n    <tr>\n      <th>2021-03-31</th>\n      <td>652</td>\n      <td>2715</td>\n    </tr>\n    <tr>\n      <th>2021-04-30</th>\n      <td>250</td>\n      <td>1259</td>\n    </tr>\n    <tr>\n      <th>2021-05-31</th>\n      <td>269</td>\n      <td>1250</td>\n    </tr>\n    <tr>\n      <th>2021-06-30</th>\n      <td>283</td>\n      <td>1162</td>\n    </tr>\n    <tr>\n      <th>2021-07-31</th>\n      <td>227</td>\n      <td>1010</td>\n    </tr>\n    <tr>\n      <th>2021-08-31</th>\n      <td>196</td>\n      <td>1023</td>\n    </tr>\n    <tr>\n      <th>2021-09-30</th>\n      <td>223</td>\n      <td>1396</td>\n    </tr>\n    <tr>\n      <th>2021-10-31</th>\n      <td>224</td>\n      <td>1288</td>\n    </tr>\n    <tr>\n      <th>2021-11-30</th>\n      <td>229</td>\n      <td>1098</td>\n    </tr>\n    <tr>\n      <th>2021-12-31</th>\n      <td>466</td>\n      <td>1036</td>\n    </tr>\n    <tr>\n      <th>2022-01-31</th>\n      <td>753</td>\n      <td>2053</td>\n    </tr>\n    <tr>\n      <th>2022-02-28</th>\n      <td>1095</td>\n      <td>2911</td>\n    </tr>\n    <tr>\n      <th>2022-03-31</th>\n      <td>520</td>\n      <td>1418</td>\n    </tr>\n    <tr>\n      <th>2022-04-30</th>\n      <td>395</td>\n      <td>974</td>\n    </tr>\n    <tr>\n      <th>2022-05-31</th>\n      <td>363</td>\n      <td>870</td>\n    </tr>\n    <tr>\n      <th>2022-06-30</th>\n      <td>233</td>\n      <td>498</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "pd.pivot_table(data = df, index=pd.Grouper(key=\"createdAt\", freq=\"M\"), columns=[\"isStartMedicalRecord\"], values=\"fullName\", aggfunc=\"count\", fill_value=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: total: 62.5 ms\n",
      "Wall time: 58.8 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": "isFinishMedicalRecord  False  True\ncreatedAt                         \n2020-11-30               178    78\n2020-12-31               242   147\n2021-01-31               537   842\n2021-02-28               402   606\n2021-03-31              1105  2262\n2021-04-30               368  1141\n2021-05-31               413  1106\n2021-06-30               433  1012\n2021-07-31               354   883\n2021-08-31               307   912\n2021-09-30               365  1254\n2021-10-31               336  1176\n2021-11-30               339   988\n2021-12-31               577   925\n2022-01-31              1040  1766\n2022-02-28              1644  2362\n2022-03-31               696  1242\n2022-04-30               520   849\n2022-05-31               462   771\n2022-06-30               298   433",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th>isFinishMedicalRecord</th>\n      <th>False</th>\n      <th>True</th>\n    </tr>\n    <tr>\n      <th>createdAt</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2020-11-30</th>\n      <td>178</td>\n      <td>78</td>\n    </tr>\n    <tr>\n      <th>2020-12-31</th>\n      <td>242</td>\n      <td>147</td>\n    </tr>\n    <tr>\n      <th>2021-01-31</th>\n      <td>537</td>\n      <td>842</td>\n    </tr>\n    <tr>\n      <th>2021-02-28</th>\n      <td>402</td>\n      <td>606</td>\n    </tr>\n    <tr>\n      <th>2021-03-31</th>\n      <td>1105</td>\n      <td>2262</td>\n    </tr>\n    <tr>\n      <th>2021-04-30</th>\n      <td>368</td>\n      <td>1141</td>\n    </tr>\n    <tr>\n      <th>2021-05-31</th>\n      <td>413</td>\n      <td>1106</td>\n    </tr>\n    <tr>\n      <th>2021-06-30</th>\n      <td>433</td>\n      <td>1012</td>\n    </tr>\n    <tr>\n      <th>2021-07-31</th>\n      <td>354</td>\n      <td>883</td>\n    </tr>\n    <tr>\n      <th>2021-08-31</th>\n      <td>307</td>\n      <td>912</td>\n    </tr>\n    <tr>\n      <th>2021-09-30</th>\n      <td>365</td>\n      <td>1254</td>\n    </tr>\n    <tr>\n      <th>2021-10-31</th>\n      <td>336</td>\n      <td>1176</td>\n    </tr>\n    <tr>\n      <th>2021-11-30</th>\n      <td>339</td>\n      <td>988</td>\n    </tr>\n    <tr>\n      <th>2021-12-31</th>\n      <td>577</td>\n      <td>925</td>\n    </tr>\n    <tr>\n      <th>2022-01-31</th>\n      <td>1040</td>\n      <td>1766</td>\n    </tr>\n    <tr>\n      <th>2022-02-28</th>\n      <td>1644</td>\n      <td>2362</td>\n    </tr>\n    <tr>\n      <th>2022-03-31</th>\n      <td>696</td>\n      <td>1242</td>\n    </tr>\n    <tr>\n      <th>2022-04-30</th>\n      <td>520</td>\n      <td>849</td>\n    </tr>\n    <tr>\n      <th>2022-05-31</th>\n      <td>462</td>\n      <td>771</td>\n    </tr>\n    <tr>\n      <th>2022-06-30</th>\n      <td>298</td>\n      <td>433</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "pd.pivot_table(data = df, index=pd.Grouper(key=\"createdAt\", freq=\"M\"), columns=[\"isFinishMedicalRecord\"], values=\"fullName\", aggfunc=\"count\", fill_value=0)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/Copie.xlsx
+++ b/Copie.xlsx
--- a/Table_Réf_Professionnels.xlsx
+++ b/Table_Réf_Professionnels.xlsx
--- a/Table_Réf_Professionnels_inconsistencies.xlsx
+++ b/Table_Réf_Professionnels_inconsistencies.xlsx
--- a/datacompyProfessionals.ipynb
+++ b/datacompyProfessionals.ipynb
--- a/datacompyTest.ipynb
+++ b/datacompyTest.ipynb
@@ -0,0 +1,204 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2024-01-23T18:53:49.676160800Z",
     "start_time": "2024-01-23T18:53:49.620035200Z"
    }
   },
   "outputs": [],
   "source": [
    "\n",
    "from io import StringIO\n",
    "import pandas as pd\n",
    "import datacompy\n",
    "\n",
    "data1 = \"\"\"acct_id,dollar_amt,name,float_fld,date_fld\n",
    "10000001234,123.45,George Maharis,14530.1555,2017-01-01\n",
    "10000001235,0.45,Michael Bluth,1,2017-01-01\n",
    "10000001236,1345,George Bluth,,2017-01-01\n",
    "10000001237,123456,Bob Loblaw,345.12,2017-01-01\n",
    "10000001237,123457,Bob Loblaw,345.12,2017-01-01\n",
    "10000001239,1.05,Lucille Bluth,,2017-01-01\n",
    "\"\"\"\n",
    "\n",
    "data2 = \"\"\"acct_id,dollar_amt,name,float_fld\n",
    "10000001234,123.4,George Michael Bluth,14530.155\n",
    "10000001235,0.45,Michael Bluth,\n",
    "10000001236,1345,George Bluth,1\n",
    "10000001237,123456,Robert Loblaw,345.12\n",
    "10000001238,1.05,Loose Seal Bluth,111\n",
    "\"\"\"\n",
    "\n",
    "df1 = pd.read_csv(StringIO(data1))\n",
    "df2 = pd.read_csv(StringIO(data2))\n",
    "\n",
    "compare = datacompy.Compare(\n",
    "    df1,\n",
    "    df2,\n",
    "    join_columns='acct_id',  #You can also specify a list of columns\n",
    "    abs_tol=0, #Optional, defaults to 0\n",
    "    rel_tol=0, #Optional, defaults to 0\n",
    "    df1_name='Original', #Optional, defaults to 'df1'\n",
    "    df2_name='New' #Optional, defaults to 'df2'\n",
    "    )\n"
   ]
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "       acct_id  dollar_amt_df1  dollar_amt_df2        name_df1  \\\n0  10000001234          123.45          123.40  George Maharis   \n1  10000001235            0.45            0.45   Michael Bluth   \n2  10000001236         1345.00         1345.00    George Bluth   \n3  10000001237       123456.00       123456.00      Bob Loblaw   \n\n               name_df2  float_fld_df1  float_fld_df2  \n0  George Michael Bluth     14530.1555      14530.155  \n1         Michael Bluth         1.0000            NaN  \n2          George Bluth            NaN          1.000  \n3         Robert Loblaw       345.1200        345.120  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>acct_id</th>\n      <th>dollar_amt_df1</th>\n      <th>dollar_amt_df2</th>\n      <th>name_df1</th>\n      <th>name_df2</th>\n      <th>float_fld_df1</th>\n      <th>float_fld_df2</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>10000001234</td>\n      <td>123.45</td>\n      <td>123.40</td>\n      <td>George Maharis</td>\n      <td>George Michael Bluth</td>\n      <td>14530.1555</td>\n      <td>14530.155</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>10000001235</td>\n      <td>0.45</td>\n      <td>0.45</td>\n      <td>Michael Bluth</td>\n      <td>Michael Bluth</td>\n      <td>1.0000</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>10000001236</td>\n      <td>1345.00</td>\n      <td>1345.00</td>\n      <td>George Bluth</td>\n      <td>George Bluth</td>\n      <td>NaN</td>\n      <td>1.000</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>10000001237</td>\n      <td>123456.00</td>\n      <td>123456.00</td>\n      <td>Bob Loblaw</td>\n      <td>Robert Loblaw</td>\n      <td>345.1200</td>\n      <td>345.120</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "compare.all_mismatch(ignore_matching_cols=True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-01-23T18:53:56.135115400Z",
     "start_time": "2024-01-23T18:53:56.086349900Z"
    }
   },
   "id": "2f16ab257397f6c9",
   "execution_count": 24
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "       acct_id  dollar_amt           name  float_fld    date_fld\n4  10000001237   123457.00     Bob Loblaw     345.12  2017-01-01\n5  10000001239        1.05  Lucille Bluth        NaN  2017-01-01",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>acct_id</th>\n      <th>dollar_amt</th>\n      <th>name</th>\n      <th>float_fld</th>\n      <th>date_fld</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>4</th>\n      <td>10000001237</td>\n      <td>123457.00</td>\n      <td>Bob Loblaw</td>\n      <td>345.12</td>\n      <td>2017-01-01</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>10000001239</td>\n      <td>1.05</td>\n      <td>Lucille Bluth</td>\n      <td>NaN</td>\n      <td>2017-01-01</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "compare.df1_unq_rows"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-01-23T18:53:59.793951800Z",
     "start_time": "2024-01-23T18:53:59.751624300Z"
    }
   },
   "id": "f38ecf439538fc9b",
   "execution_count": 25
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "       acct_id  dollar_amt              name  float_fld\n6  10000001238        1.05  Loose Seal Bluth      111.0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>acct_id</th>\n      <th>dollar_amt</th>\n      <th>name</th>\n      <th>float_fld</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>6</th>\n      <td>10000001238</td>\n      <td>1.05</td>\n      <td>Loose Seal Bluth</td>\n      <td>111.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "compare.df2_unq_rows"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-01-23T18:54:20.805047600Z",
     "start_time": "2024-01-23T18:54:20.777818600Z"
    }
   },
   "id": "b0a4c80da0847ac0",
   "execution_count": 26
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "       acct_id  dollar_amt            name   float_fld    date_fld\n0  10000001234      123.45  George Maharis  14530.1555  2017-01-01\n1  10000001235        0.45   Michael Bluth      1.0000  2017-01-01\n2  10000001236     1345.00    George Bluth         NaN  2017-01-01\n3  10000001237   123456.00      Bob Loblaw    345.1200  2017-01-01\n4  10000001237   123457.00      Bob Loblaw    345.1200  2017-01-01\n5  10000001239        1.05   Lucille Bluth         NaN  2017-01-01",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>acct_id</th>\n      <th>dollar_amt</th>\n      <th>name</th>\n      <th>float_fld</th>\n      <th>date_fld</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>10000001234</td>\n      <td>123.45</td>\n      <td>George Maharis</td>\n      <td>14530.1555</td>\n      <td>2017-01-01</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>10000001235</td>\n      <td>0.45</td>\n      <td>Michael Bluth</td>\n      <td>1.0000</td>\n      <td>2017-01-01</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>10000001236</td>\n      <td>1345.00</td>\n      <td>George Bluth</td>\n      <td>NaN</td>\n      <td>2017-01-01</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>10000001237</td>\n      <td>123456.00</td>\n      <td>Bob Loblaw</td>\n      <td>345.1200</td>\n      <td>2017-01-01</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>10000001237</td>\n      <td>123457.00</td>\n      <td>Bob Loblaw</td>\n      <td>345.1200</td>\n      <td>2017-01-01</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>10000001239</td>\n      <td>1.05</td>\n      <td>Lucille Bluth</td>\n      <td>NaN</td>\n      <td>2017-01-01</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-01-23T18:54:25.595365100Z",
     "start_time": "2024-01-23T18:54:25.533925200Z"
    }
   },
   "id": "b9aa33151fa6f235",
   "execution_count": 27
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "       acct_id  dollar_amt                  name  float_fld\n0  10000001234      123.40  George Michael Bluth  14530.155\n1  10000001235        0.45         Michael Bluth        NaN\n2  10000001236     1345.00          George Bluth      1.000\n3  10000001237   123456.00         Robert Loblaw    345.120\n4  10000001238        1.05      Loose Seal Bluth    111.000",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>acct_id</th>\n      <th>dollar_amt</th>\n      <th>name</th>\n      <th>float_fld</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>10000001234</td>\n      <td>123.40</td>\n      <td>George Michael Bluth</td>\n      <td>14530.155</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>10000001235</td>\n      <td>0.45</td>\n      <td>Michael Bluth</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>10000001236</td>\n      <td>1345.00</td>\n      <td>George Bluth</td>\n      <td>1.000</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>10000001237</td>\n      <td>123456.00</td>\n      <td>Robert Loblaw</td>\n      <td>345.120</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>10000001238</td>\n      <td>1.05</td>\n      <td>Loose Seal Bluth</td>\n      <td>111.000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-01-23T18:54:28.672000100Z",
     "start_time": "2024-01-23T18:54:28.631719300Z"
    }
   },
   "id": "aaa69421db146ed7",
   "execution_count": 28
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/prepareProfessionalsTable.py
+++ b/prepareProfessionalsTable.py
@@ -0,0 +1,164 @@
 import argparse
 import csv
 import math
 import sys
 from os import path
 from shutil import copyfileobj
 from zipfile import ZipFile, is_zipfile
 import numpy as np
 import pandas as pd
 import requests
 from tqdm import tqdm  # could use from tqdm.gui import tqdm
 from tqdm.utils import CallbackIOWrapper
 from urllib3.exceptions import InsecureRequestWarning
 from urllib3 import disable_warnings
 import questionary
 def process_professionals_table(xls_file, txt_file, output_file):
    # Load Excel Dataframes
    xls = pd.read_excel(xls_file, sheet_name=None, dtype=str,
                        na_values='', keep_default_na=False)
    professions = xls['F_Professions']['Professions'].tolist()
    # CSV Progressbar initialisation
    estimated_total_rows = sum(1 for _ in open(txt_file, 'rb')) - 1
    chunk_size = 20000
    # Iterating over CSV file
    columns_to_clean = np.r_[0, 2, 4:7, 9, 11:16, 17:28, 30, 35:40, 41:56]
    with tqdm(total=estimated_total_rows, desc=f'Writing to {path.basename(output_file)}',
              leave=True, unit="Ln") as bar:
        for i, df in enumerate(pd.read_csv(txt_file, sep='|', doublequote=False, quoting=csv.QUOTE_NONE,
                                           dtype=str, na_values='', keep_default_na=False, chunksize=chunk_size)):
            n_rows = df.shape[0]
            df.iloc[:, columns_to_clean] = ''
            df = df[df['Libellé profession'].isin(professions)]
            if i == 0:
                df.to_csv(output_file, sep='|', index=False, doublequote=False, quoting=csv.QUOTE_NONE,
                          lineterminator='\n')
            else:
                df.to_csv(output_file, sep='|', index=False, doublequote=False, quoting=csv.QUOTE_NONE,
                          lineterminator='\n', header=False, mode='a')
            bar.update(n_rows)
        bar.close()
    # Appending Other xls tabs
    df = pd.concat([df[:0], xls['F_Append_Update'], xls['F_Etrangers'],
                    xls['F_Fake'], xls['F_Sophrologues']], ignore_index=True)
    df.iloc[:, columns_to_clean] = ''
    df = df[df['Libellé profession'].isin(professions)]
    df.to_csv(output_file, sep='|', index=False, doublequote=False, quoting=csv.QUOTE_NONE,
              lineterminator='\n', header=False, mode='a')
 def download_file(url: str, filename: str = False) -> object:
    if not filename:
        local_filename = path.join(".", url.split('/')[-1])
    else:
        local_filename = filename
    disable_warnings(InsecureRequestWarning)
    r = requests.get(url, stream=True, verify=False)
    file_size = int(r.headers['Content-Length'])
    unit_scale = 64
    with open(local_filename, 'wb') as fp:
        for chunk in tqdm(r.iter_content(chunk_size=unit_scale * 1024),
                          total=math.ceil(file_size / 1024 / unit_scale),
                          unit_scale=unit_scale,
                          unit='KB',
                          desc=f"Downloading to {path.basename(local_filename)}",
                          leave=True):
            fp.write(chunk)
    return
 def extract_one_file_from_zip(zipfile, fromfile, tofile, desc=False):
    if not desc:
        desc = f"Extracting to {path.basename(tofile)}"
    file = None
    if not is_zipfile(zipfile):
        return f"Can't open Zipfile (non existent or bad): {zipfile}"
    zipf = ZipFile(zipfile)
    for f in zipf.infolist():
        if getattr(f, "filename", "").startswith(fromfile):
            file = f
            break
    if file is None:
        return f"No such file name in the Zip ({fromfile}*)..."
    with zipf, tqdm(
            desc=desc, unit="B", unit_scale=True, unit_divisor=1024,
            total=getattr(file, "file_size", 0), leave=True,
    ) as pbar:
        with zipf.open(file) as fi, open(tofile, "wb") as fo:
            copyfileobj(CallbackIOWrapper(pbar.update, fi), fo)
        pbar.close()
 def main():
    defaultFileName = 'Table_Réf_Professionnels'
    defaultExcelFileName = 'Table_Réf_Professionnels'
    internalFileName = 'PS_LibreAcces_Personne_activite'
    parser = argparse.ArgumentParser(description='Prepare Professionals Table for Import to Endoziwig.')
    parser.add_argument('fileName', type=str, nargs='?', default=defaultFileName,
                        help=f'File name to use : default="{defaultFileName}"')
    parser.add_argument('--excelFileName', '-x', type=str, nargs='?', default=defaultExcelFileName,
                        help=f'Excel File Containing Append Data: default="{defaultExcelFileName}" (without extension)')
    parser.add_argument('--noDownload', '-ndw', action='store_true',
                        help='Do not Download the file (Default = Download).')
    parser.add_argument('--noUnzip', '-nuz', action='store_true',
                        help='Do not Unzip the file (Default = Unzip).')
    parser.add_argument('--noProcess', '-npr', action='store_true',
                        help='Do not Process the file (Default = Process).')
    args = parser.parse_args()
    if len(sys.argv) == 1:
        print("You're about to download and prepare Professionals Table for import to Endoziwig")
    # Files Settings
    if args.fileName == defaultFileName:
        print("\n")
        args.fileName = questionary.text("Please confirm file name (or empty to cancel):",
                                         default=defaultFileName).ask()
    if args.fileName == '':
        sys.exit(0)
    BASE_DIR = path.dirname(path.abspath(__file__))
    zipFileName = path.join(BASE_DIR, f'{args.fileName}.zip')
    xlsFileName = path.join(BASE_DIR, f'{args.excelFileName}.xlsx')
    txtFileName = path.join(BASE_DIR, f'{args.fileName}.txt')
    outputFileName = path.join(BASE_DIR, f'{args.fileName}.csv')
    print("\n")
    if not args.noDownload:
        download_file(
            'https://service.annuaire.sante.fr/annuaire-sante-webservices/V300/services/extraction/PS_LibreAcces',
            filename=zipFileName)
        print("\n")
    if not args.noUnzip:
        unzipResult = extract_one_file_from_zip(zipFileName, internalFileName, txtFileName)
        if unzipResult is not None:
            print(unzipResult)
        print("\n")
    if not args.noProcess:
        process_professionals_table(xlsFileName, txtFileName, outputFileName)
        print("\n")
 if __name__ == '__main__':
    try :
        main()
    except(Exception) as e : 
        print(e)
    finally :
        input('Finished... Press Enter to continue')
        print('\n')