{ "cells": [ { "cell_type": "code", "execution_count": 48, "id": "66b27b71bfe4a1e6", "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:35:09.000042Z", "start_time": "2024-03-07T16:35:08.917093Z" } }, "outputs": [], "source": [ "\n", "import pandas as pd\n", "import datacompy\n", "import csv" ] }, { "cell_type": "code", "outputs": [], "source": [ "df1 = pd.read_csv(\"H:\\\\Mon Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_240103.txt\", sep='|', doublequote=False, quoting=csv.QUOTE_NONE,dtype=str, na_values='', keep_default_na=False)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:35:57.199406Z", "start_time": "2024-03-07T16:35:09.010084Z" } }, "id": "2bf7e140e6e3a0cf", "execution_count": 49 }, { "cell_type": "code", "outputs": [], "source": [ "df2 = pd.read_csv(\"H:\\\\Mon Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_240307.txt\", sep='|', doublequote=False, quoting=csv.QUOTE_NONE,dtype=str, na_values='', keep_default_na=False)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:36:52.304783Z", "start_time": "2024-03-07T16:35:57.203517Z" } }, "id": "f55e34a990ae89a9", "execution_count": 50 }, { "cell_type": "code", "outputs": [], "source": [ "compare = datacompy.Compare(\n", " df1,\n", " df2,\n", " join_columns=['Identification nationale PP',\n", " 'Code profession',\n", " 'Code catégorie professionnelle',\n", " 'Code type savoir-faire',\n", " 'Code savoir-faire',\n", " 'Code mode exercice', \n", " 'Numéro SIRET site', \n", " 'Numéro FINESS site',\n", " 'Identifiant technique de la structure',\n", " 'Code rôle'], #You can also specify a list of columns\n", " abs_tol=0, #Optional, defaults to 0\n", " rel_tol=0, #Optional, defaults to 0\n", " df1_name='Original', #Optional, defaults to 'df1'\n", " df2_name='New' #Optional, defaults to 'df2'\n", " )" ], "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2024-03-07T16:49:52.268885Z", "start_time": "2024-03-07T16:45:38.231325Z" } }, "id": "initial_id", "execution_count": 59 }, { "cell_type": "code", "outputs": [], "source": [ "mismatch = compare.all_mismatch(ignore_matching_cols=True)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:50:11.544899Z", "start_time": "2024-03-07T16:49:52.305809Z" } }, "id": "2f16ab257397f6c9", "execution_count": 60 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": "(117966, 82)" }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mismatch.shape" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:50:11.571499Z", "start_time": "2024-03-07T16:50:11.547923Z" } }, "id": "7b85bbc3f923fe64", "execution_count": 61 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": " identification nationale pp code profession \\\n247 0010002616 72 \n1604 0019101898 91 \n1628 0019102417 91 \n2010 0019300995 93 \n2270 0019303460 93 \n... ... ... \n2217837 810109452887 99 \n2217838 810109452895 99 \n2217850 810109453018 99 \n2217851 810109453026 99 \n2217856 810109453075 99 \n\n code catégorie professionnelle code type savoir-faire \\\n247 C NaN \n1604 C NaN \n1628 C NaN \n2010 C NaN \n2270 C NaN \n... ... ... \n2217837 C NaN \n2217838 C NaN \n2217850 C NaN \n2217851 C NaN \n2217856 C NaN \n\n code savoir-faire code mode exercice numéro siret site \\\n247 NaN S 81157144700010 \n1604 NaN S 77567246200345 \n1628 NaN L NaN \n2010 NaN S 77554456200611 \n2270 NaN S 77554456200116 \n... ... ... ... \n2217837 NaN S NaN \n2217838 NaN S NaN \n2217850 NaN S 81899316400016 \n2217851 NaN S 82459575500011 \n2217856 NaN S NaN \n\n numéro finess site identifiant technique de la structure code rôle \\\n247 010780203 F010780203 NaN \n1604 010780609 F010780609 NaN \n1628 NaN C01910241700 FON-01 \n2010 010784262 F010784262 NaN \n2270 010780591 F010780591 NaN \n... ... ... ... \n2217837 970404372 F970404372 312 \n2217838 440015857 F44001585701042021 317 \n2217850 770024271 F770024271 312 \n2217851 690043179 F690043179 312 \n2217856 190007500 F19000750001102007 317 \n\n ... code section tableau pharmaciens_df1 \\\n247 ... NaN \n1604 ... NaN \n1628 ... NaN \n2010 ... NaN \n2270 ... NaN \n... ... ... \n2217837 ... NaN \n2217838 ... NaN \n2217850 ... NaN \n2217851 ... NaN \n2217856 ... NaN \n\n code section tableau pharmaciens_df2 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 NaN \n2217838 NaN \n2217850 NaN \n2217851 NaN \n2217856 NaN \n\n libellé section tableau pharmaciens_df1 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 NaN \n2217838 NaN \n2217850 NaN \n2217851 NaN \n2217856 NaN \n\n libellé section tableau pharmaciens_df2 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 NaN \n2217838 NaN \n2217850 NaN \n2217851 NaN \n2217856 NaN \n\n libellé rôle_df1 \\\n247 NaN \n1604 NaN \n1628 Titulaire de cabinet \n2010 NaN \n2270 NaN \n... ... \n2217837 Autre professionnel \n2217838 Préparateur en pharmacie (officine) \n2217850 Autre professionnel \n2217851 Autre professionnel \n2217856 Préparateur en pharmacie (officine) \n\n libellé rôle_df2 code genre activité_df1 \\\n247 NaN NaN \n1604 NaN NaN \n1628 Titulaire de cabinet NaN \n2010 NaN NaN \n2270 NaN NaN \n... ... ... \n2217837 Autre professionnel GENR12 \n2217838 Préparateur en pharmacie (officine) GENR01 \n2217850 Autre professionnel GENR08 \n2217851 Autre professionnel GENR12 \n2217856 Préparateur en pharmacie (officine) GENR01 \n\n code genre activité_df2 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 GENR12 \n2217838 GENR01 \n2217850 GENR08 \n2217851 GENR12 \n2217856 GENR01 \n\n libellé genre activité_df1 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 Encadrement et orga accompagnt social/médico-soc \n2217838 Activité standard de soin ou de pharmacien \n2217850 Activité de coordination et d'orientation \n2217851 Encadrement et orga accompagnt social/médico-soc \n2217856 Activité standard de soin ou de pharmacien \n\n libellé genre activité_df2 \n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 Encadrement et orga accompagnt social/médico-soc \n2217838 Activité standard de soin ou de pharmacien \n2217850 Activité de coordination et d'orientation \n2217851 Encadrement et orga accompagnt social/médico-soc \n2217856 Activité standard de soin ou de pharmacien \n\n[117966 rows x 82 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
identification nationale ppcode professioncode catégorie professionnellecode type savoir-fairecode savoir-fairecode mode exercicenuméro siret sitenuméro finess siteidentifiant technique de la structurecode rôle...code section tableau pharmaciens_df1code section tableau pharmaciens_df2libellé section tableau pharmaciens_df1libellé section tableau pharmaciens_df2libellé rôle_df1libellé rôle_df2code genre activité_df1code genre activité_df2libellé genre activité_df1libellé genre activité_df2
247001000261672CNaNNaNS81157144700010010780203F010780203NaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1604001910189891CNaNNaNS77567246200345010780609F010780609NaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1628001910241791CNaNNaNLNaNNaNC01910241700FON-01...NaNNaNNaNNaNTitulaire de cabinetTitulaire de cabinetNaNNaNNaNNaN
2010001930099593CNaNNaNS77554456200611010784262F010784262NaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2270001930346093CNaNNaNS77554456200116010780591F010780591NaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
221783781010945288799CNaNNaNSNaN970404372F970404372312...NaNNaNNaNNaNAutre professionnelAutre professionnelGENR12GENR12Encadrement et orga accompagnt social/médico-socEncadrement et orga accompagnt social/médico-soc
221783881010945289599CNaNNaNSNaN440015857F44001585701042021317...NaNNaNNaNNaNPréparateur en pharmacie (officine)Préparateur en pharmacie (officine)GENR01GENR01Activité standard de soin ou de pharmacienActivité standard de soin ou de pharmacien
221785081010945301899CNaNNaNS81899316400016770024271F770024271312...NaNNaNNaNNaNAutre professionnelAutre professionnelGENR08GENR08Activité de coordination et d'orientationActivité de coordination et d'orientation
221785181010945302699CNaNNaNS82459575500011690043179F690043179312...NaNNaNNaNNaNAutre professionnelAutre professionnelGENR12GENR12Encadrement et orga accompagnt social/médico-socEncadrement et orga accompagnt social/médico-soc
221785681010945307599CNaNNaNSNaN190007500F19000750001102007317...NaNNaNNaNNaNPréparateur en pharmacie (officine)Préparateur en pharmacie (officine)GENR01GENR01Activité standard de soin ou de pharmacienActivité standard de soin ou de pharmacien
\n

117966 rows × 82 columns

\n
" }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mismatch" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:50:11.736073Z", "start_time": "2024-03-07T16:50:11.584819Z" } }, "id": "9331dbcc33b567fd", "execution_count": 62 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": "(234567, 57)" }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "compare.df1_unq_rows.shape" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:50:11.747805Z", "start_time": "2024-03-07T16:50:11.739092Z" } }, "id": "f38ecf439538fc9b", "execution_count": 63 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": "(255343, 57)" }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "compare.df2_unq_rows.shape" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:50:11.765984Z", "start_time": "2024-03-07T16:50:11.752578Z" } }, "id": "b0a4c80da0847ac0", "execution_count": 64 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": "(1976330, 57)" }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.shape" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:50:11.783138Z", "start_time": "2024-03-07T16:50:11.769029Z" } }, "id": "b9aa33151fa6f235", "execution_count": 65 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": "(1997106, 57)" }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.shape" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-07T16:50:11.795092Z", "start_time": "2024-03-07T16:50:11.785223Z" } }, "id": "aaa69421db146ed7", "execution_count": 66 } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }