{ "cells": [ { "cell_type": "code", "execution_count": 48, "id": "66b27b71bfe4a1e6", "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:35:09.000042Z", "start_time": "2024-03-07T16:35:08.917093Z" } }, "outputs": [], "source": [ "\n", "import pandas as pd\n", "import datacompy\n", "import csv" ] }, { "cell_type": "code", "outputs": [], "source": [ "df1 = pd.read_csv(\"H:\\\\Mon Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_240103.txt\", sep='|', doublequote=False, quoting=csv.QUOTE_NONE,dtype=str, na_values='', keep_default_na=False)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:35:57.199406Z", "start_time": "2024-03-07T16:35:09.010084Z" } }, "id": "2bf7e140e6e3a0cf", "execution_count": 49 }, { "cell_type": "code", "outputs": [], "source": [ "df2 = pd.read_csv(\"H:\\\\Mon Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_240307.txt\", sep='|', doublequote=False, quoting=csv.QUOTE_NONE,dtype=str, na_values='', keep_default_na=False)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:36:52.304783Z", "start_time": "2024-03-07T16:35:57.203517Z" } }, "id": "f55e34a990ae89a9", "execution_count": 50 }, { "cell_type": "code", "outputs": [], "source": [ "compare = datacompy.Compare(\n", " df1,\n", " df2,\n", " join_columns=['Identification nationale PP',\n", " 'Code profession',\n", " 'Code catégorie professionnelle',\n", " 'Code type savoir-faire',\n", " 'Code savoir-faire',\n", " 'Code mode exercice', \n", " 'Numéro SIRET site', \n", " 'Numéro FINESS site',\n", " 'Identifiant technique de la structure',\n", " 'Code rôle'], #You can also specify a list of columns\n", " abs_tol=0, #Optional, defaults to 0\n", " rel_tol=0, #Optional, defaults to 0\n", " df1_name='Original', #Optional, defaults to 'df1'\n", " df2_name='New' #Optional, defaults to 'df2'\n", " )" ], "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2024-03-07T16:49:52.268885Z", "start_time": "2024-03-07T16:45:38.231325Z" } }, "id": "initial_id", "execution_count": 59 }, { "cell_type": "code", "outputs": [], "source": [ "mismatch = compare.all_mismatch(ignore_matching_cols=True)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:50:11.544899Z", "start_time": "2024-03-07T16:49:52.305809Z" } }, "id": "2f16ab257397f6c9", "execution_count": 60 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": "(117966, 82)" }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mismatch.shape" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:50:11.571499Z", "start_time": "2024-03-07T16:50:11.547923Z" } }, "id": "7b85bbc3f923fe64", "execution_count": 61 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": " identification nationale pp code profession \\\n247 0010002616 72 \n1604 0019101898 91 \n1628 0019102417 91 \n2010 0019300995 93 \n2270 0019303460 93 \n... ... ... \n2217837 810109452887 99 \n2217838 810109452895 99 \n2217850 810109453018 99 \n2217851 810109453026 99 \n2217856 810109453075 99 \n\n code catégorie professionnelle code type savoir-faire \\\n247 C NaN \n1604 C NaN \n1628 C NaN \n2010 C NaN \n2270 C NaN \n... ... ... \n2217837 C NaN \n2217838 C NaN \n2217850 C NaN \n2217851 C NaN \n2217856 C NaN \n\n code savoir-faire code mode exercice numéro siret site \\\n247 NaN S 81157144700010 \n1604 NaN S 77567246200345 \n1628 NaN L NaN \n2010 NaN S 77554456200611 \n2270 NaN S 77554456200116 \n... ... ... ... \n2217837 NaN S NaN \n2217838 NaN S NaN \n2217850 NaN S 81899316400016 \n2217851 NaN S 82459575500011 \n2217856 NaN S NaN \n\n numéro finess site identifiant technique de la structure code rôle \\\n247 010780203 F010780203 NaN \n1604 010780609 F010780609 NaN \n1628 NaN C01910241700 FON-01 \n2010 010784262 F010784262 NaN \n2270 010780591 F010780591 NaN \n... ... ... ... \n2217837 970404372 F970404372 312 \n2217838 440015857 F44001585701042021 317 \n2217850 770024271 F770024271 312 \n2217851 690043179 F690043179 312 \n2217856 190007500 F19000750001102007 317 \n\n ... code section tableau pharmaciens_df1 \\\n247 ... NaN \n1604 ... NaN \n1628 ... NaN \n2010 ... NaN \n2270 ... NaN \n... ... ... \n2217837 ... NaN \n2217838 ... NaN \n2217850 ... NaN \n2217851 ... NaN \n2217856 ... NaN \n\n code section tableau pharmaciens_df2 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 NaN \n2217838 NaN \n2217850 NaN \n2217851 NaN \n2217856 NaN \n\n libellé section tableau pharmaciens_df1 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 NaN \n2217838 NaN \n2217850 NaN \n2217851 NaN \n2217856 NaN \n\n libellé section tableau pharmaciens_df2 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 NaN \n2217838 NaN \n2217850 NaN \n2217851 NaN \n2217856 NaN \n\n libellé rôle_df1 \\\n247 NaN \n1604 NaN \n1628 Titulaire de cabinet \n2010 NaN \n2270 NaN \n... ... \n2217837 Autre professionnel \n2217838 Préparateur en pharmacie (officine) \n2217850 Autre professionnel \n2217851 Autre professionnel \n2217856 Préparateur en pharmacie (officine) \n\n libellé rôle_df2 code genre activité_df1 \\\n247 NaN NaN \n1604 NaN NaN \n1628 Titulaire de cabinet NaN \n2010 NaN NaN \n2270 NaN NaN \n... ... ... \n2217837 Autre professionnel GENR12 \n2217838 Préparateur en pharmacie (officine) GENR01 \n2217850 Autre professionnel GENR08 \n2217851 Autre professionnel GENR12 \n2217856 Préparateur en pharmacie (officine) GENR01 \n\n code genre activité_df2 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 GENR12 \n2217838 GENR01 \n2217850 GENR08 \n2217851 GENR12 \n2217856 GENR01 \n\n libellé genre activité_df1 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 Encadrement et orga accompagnt social/médico-soc \n2217838 Activité standard de soin ou de pharmacien \n2217850 Activité de coordination et d'orientation \n2217851 Encadrement et orga accompagnt social/médico-soc \n2217856 Activité standard de soin ou de pharmacien \n\n libellé genre activité_df2 \n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 Encadrement et orga accompagnt social/médico-soc \n2217838 Activité standard de soin ou de pharmacien \n2217850 Activité de coordination et d'orientation \n2217851 Encadrement et orga accompagnt social/médico-soc \n2217856 Activité standard de soin ou de pharmacien \n\n[117966 rows x 82 columns]", "text/html": "
| \n | identification nationale pp | \ncode profession | \ncode catégorie professionnelle | \ncode type savoir-faire | \ncode savoir-faire | \ncode mode exercice | \nnuméro siret site | \nnuméro finess site | \nidentifiant technique de la structure | \ncode rôle | \n... | \ncode section tableau pharmaciens_df1 | \ncode section tableau pharmaciens_df2 | \nlibellé section tableau pharmaciens_df1 | \nlibellé section tableau pharmaciens_df2 | \nlibellé rôle_df1 | \nlibellé rôle_df2 | \ncode genre activité_df1 | \ncode genre activité_df2 | \nlibellé genre activité_df1 | \nlibellé genre activité_df2 | \n
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 247 | \n0010002616 | \n72 | \nC | \nNaN | \nNaN | \nS | \n81157144700010 | \n010780203 | \nF010780203 | \nNaN | \n... | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \n
| 1604 | \n0019101898 | \n91 | \nC | \nNaN | \nNaN | \nS | \n77567246200345 | \n010780609 | \nF010780609 | \nNaN | \n... | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \n
| 1628 | \n0019102417 | \n91 | \nC | \nNaN | \nNaN | \nL | \nNaN | \nNaN | \nC01910241700 | \nFON-01 | \n... | \nNaN | \nNaN | \nNaN | \nNaN | \nTitulaire de cabinet | \nTitulaire de cabinet | \nNaN | \nNaN | \nNaN | \nNaN | \n
| 2010 | \n0019300995 | \n93 | \nC | \nNaN | \nNaN | \nS | \n77554456200611 | \n010784262 | \nF010784262 | \nNaN | \n... | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \n
| 2270 | \n0019303460 | \n93 | \nC | \nNaN | \nNaN | \nS | \n77554456200116 | \n010780591 | \nF010780591 | \nNaN | \n... | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \n
| ... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
| 2217837 | \n810109452887 | \n99 | \nC | \nNaN | \nNaN | \nS | \nNaN | \n970404372 | \nF970404372 | \n312 | \n... | \nNaN | \nNaN | \nNaN | \nNaN | \nAutre professionnel | \nAutre professionnel | \nGENR12 | \nGENR12 | \nEncadrement et orga accompagnt social/médico-soc | \nEncadrement et orga accompagnt social/médico-soc | \n
| 2217838 | \n810109452895 | \n99 | \nC | \nNaN | \nNaN | \nS | \nNaN | \n440015857 | \nF44001585701042021 | \n317 | \n... | \nNaN | \nNaN | \nNaN | \nNaN | \nPréparateur en pharmacie (officine) | \nPréparateur en pharmacie (officine) | \nGENR01 | \nGENR01 | \nActivité standard de soin ou de pharmacien | \nActivité standard de soin ou de pharmacien | \n
| 2217850 | \n810109453018 | \n99 | \nC | \nNaN | \nNaN | \nS | \n81899316400016 | \n770024271 | \nF770024271 | \n312 | \n... | \nNaN | \nNaN | \nNaN | \nNaN | \nAutre professionnel | \nAutre professionnel | \nGENR08 | \nGENR08 | \nActivité de coordination et d'orientation | \nActivité de coordination et d'orientation | \n
| 2217851 | \n810109453026 | \n99 | \nC | \nNaN | \nNaN | \nS | \n82459575500011 | \n690043179 | \nF690043179 | \n312 | \n... | \nNaN | \nNaN | \nNaN | \nNaN | \nAutre professionnel | \nAutre professionnel | \nGENR12 | \nGENR12 | \nEncadrement et orga accompagnt social/médico-soc | \nEncadrement et orga accompagnt social/médico-soc | \n
| 2217856 | \n810109453075 | \n99 | \nC | \nNaN | \nNaN | \nS | \nNaN | \n190007500 | \nF19000750001102007 | \n317 | \n... | \nNaN | \nNaN | \nNaN | \nNaN | \nPréparateur en pharmacie (officine) | \nPréparateur en pharmacie (officine) | \nGENR01 | \nGENR01 | \nActivité standard de soin ou de pharmacien | \nActivité standard de soin ou de pharmacien | \n
117966 rows × 82 columns
\n