Files
professionals_from_sante_fr/datacompyProfessionals.ipynb
2026-03-05 11:11:10 +00:00

277 lines
22 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 48,
"id": "66b27b71bfe4a1e6",
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-07T16:35:09.000042Z",
"start_time": "2024-03-07T16:35:08.917093Z"
}
},
"outputs": [],
"source": [
"\n",
"import pandas as pd\n",
"import datacompy\n",
"import csv"
]
},
{
"cell_type": "code",
"outputs": [],
"source": [
"df1 = pd.read_csv(\"H:\\\\Mon Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_240103.txt\", sep='|', doublequote=False, quoting=csv.QUOTE_NONE,dtype=str, na_values='', keep_default_na=False)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-07T16:35:57.199406Z",
"start_time": "2024-03-07T16:35:09.010084Z"
}
},
"id": "2bf7e140e6e3a0cf",
"execution_count": 49
},
{
"cell_type": "code",
"outputs": [],
"source": [
"df2 = pd.read_csv(\"H:\\\\Mon Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\Table_Réf_Professionnels_240307.txt\", sep='|', doublequote=False, quoting=csv.QUOTE_NONE,dtype=str, na_values='', keep_default_na=False)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-07T16:36:52.304783Z",
"start_time": "2024-03-07T16:35:57.203517Z"
}
},
"id": "f55e34a990ae89a9",
"execution_count": 50
},
{
"cell_type": "code",
"outputs": [],
"source": [
"compare = datacompy.Compare(\n",
" df1,\n",
" df2,\n",
" join_columns=['Identification nationale PP',\n",
" 'Code profession',\n",
" 'Code catégorie professionnelle',\n",
" 'Code type savoir-faire',\n",
" 'Code savoir-faire',\n",
" 'Code mode exercice', \n",
" 'Numéro SIRET site', \n",
" 'Numéro FINESS site',\n",
" 'Identifiant technique de la structure',\n",
" 'Code rôle'], #You can also specify a list of columns\n",
" abs_tol=0, #Optional, defaults to 0\n",
" rel_tol=0, #Optional, defaults to 0\n",
" df1_name='Original', #Optional, defaults to 'df1'\n",
" df2_name='New' #Optional, defaults to 'df2'\n",
" )"
],
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-03-07T16:49:52.268885Z",
"start_time": "2024-03-07T16:45:38.231325Z"
}
},
"id": "initial_id",
"execution_count": 59
},
{
"cell_type": "code",
"outputs": [],
"source": [
"mismatch = compare.all_mismatch(ignore_matching_cols=True)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-07T16:50:11.544899Z",
"start_time": "2024-03-07T16:49:52.305809Z"
}
},
"id": "2f16ab257397f6c9",
"execution_count": 60
},
{
"cell_type": "code",
"outputs": [
{
"data": {
"text/plain": "(117966, 82)"
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mismatch.shape"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-07T16:50:11.571499Z",
"start_time": "2024-03-07T16:50:11.547923Z"
}
},
"id": "7b85bbc3f923fe64",
"execution_count": 61
},
{
"cell_type": "code",
"outputs": [
{
"data": {
"text/plain": " identification nationale pp code profession \\\n247 0010002616 72 \n1604 0019101898 91 \n1628 0019102417 91 \n2010 0019300995 93 \n2270 0019303460 93 \n... ... ... \n2217837 810109452887 99 \n2217838 810109452895 99 \n2217850 810109453018 99 \n2217851 810109453026 99 \n2217856 810109453075 99 \n\n code catégorie professionnelle code type savoir-faire \\\n247 C NaN \n1604 C NaN \n1628 C NaN \n2010 C NaN \n2270 C NaN \n... ... ... \n2217837 C NaN \n2217838 C NaN \n2217850 C NaN \n2217851 C NaN \n2217856 C NaN \n\n code savoir-faire code mode exercice numéro siret site \\\n247 NaN S 81157144700010 \n1604 NaN S 77567246200345 \n1628 NaN L NaN \n2010 NaN S 77554456200611 \n2270 NaN S 77554456200116 \n... ... ... ... \n2217837 NaN S NaN \n2217838 NaN S NaN \n2217850 NaN S 81899316400016 \n2217851 NaN S 82459575500011 \n2217856 NaN S NaN \n\n numéro finess site identifiant technique de la structure code rôle \\\n247 010780203 F010780203 NaN \n1604 010780609 F010780609 NaN \n1628 NaN C01910241700 FON-01 \n2010 010784262 F010784262 NaN \n2270 010780591 F010780591 NaN \n... ... ... ... \n2217837 970404372 F970404372 312 \n2217838 440015857 F44001585701042021 317 \n2217850 770024271 F770024271 312 \n2217851 690043179 F690043179 312 \n2217856 190007500 F19000750001102007 317 \n\n ... code section tableau pharmaciens_df1 \\\n247 ... NaN \n1604 ... NaN \n1628 ... NaN \n2010 ... NaN \n2270 ... NaN \n... ... ... \n2217837 ... NaN \n2217838 ... NaN \n2217850 ... NaN \n2217851 ... NaN \n2217856 ... NaN \n\n code section tableau pharmaciens_df2 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 NaN \n2217838 NaN \n2217850 NaN \n2217851 NaN \n2217856 NaN \n\n libellé section tableau pharmaciens_df1 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 NaN \n2217838 NaN \n2217850 NaN \n2217851 NaN \n2217856 NaN \n\n libellé section tableau pharmaciens_df2 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 NaN \n2217838 NaN \n2217850 NaN \n2217851 NaN \n2217856 NaN \n\n libellé rôle_df1 \\\n247 NaN \n1604 NaN \n1628 Titulaire de cabinet \n2010 NaN \n2270 NaN \n... ... \n2217837 Autre professionnel \n2217838 Préparateur en pharmacie (officine) \n2217850 Autre professionnel \n2217851 Autre professionnel \n2217856 Préparateur en pharmacie (officine) \n\n libellé rôle_df2 code genre activité_df1 \\\n247 NaN NaN \n1604 NaN NaN \n1628 Titulaire de cabinet NaN \n2010 NaN NaN \n2270 NaN NaN \n... ... ... \n2217837 Autre professionnel GENR12 \n2217838 Préparateur en pharmacie (officine) GENR01 \n2217850 Autre professionnel GENR08 \n2217851 Autre professionnel GENR12 \n2217856 Préparateur en pharmacie (officine) GENR01 \n\n code genre activité_df2 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 GENR12 \n2217838 GENR01 \n2217850 GENR08 \n2217851 GENR12 \n2217856 GENR01 \n\n libellé genre activité_df1 \\\n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 Encadrement et orga accompagnt social/médico-soc \n2217838 Activité standard de soin ou de pharmacien \n2217850 Activité de coordination et d'orientation \n2217851 Encadrement et orga accompagnt social/médico-soc \n2217856 Activité standard de soin ou de pharmacien \n\n libellé genre activité_df2 \n247 NaN \n1604 NaN \n1628 NaN \n2010 NaN \n2270 NaN \n... ... \n2217837 Encadrement et orga accompagnt social/médico-soc \n2217838 Activité standard de soin ou de pharmacien \n2217850 Activité de coordination et d'orientation \n2217851 Encadrement et orga accompagnt social/médico-soc \n2217856 Activité standard de soin ou de pharmacien \n\n[117966 rows x 82 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>identification nationale pp</th>\n <th>code profession</th>\n <th>code catégorie professionnelle</th>\n <th>code type savoir-faire</th>\n <th>code savoir-faire</th>\n <th>code mode exercice</th>\n <th>numéro siret site</th>\n <th>numéro finess site</th>\n <th>identifiant technique de la structure</th>\n <th>code rôle</th>\n <th>...</th>\n <th>code section tableau pharmaciens_df1</th>\n <th>code section tableau pharmaciens_df2</th>\n <th>libellé section tableau pharmaciens_df1</th>\n <th>libellé section tableau pharmaciens_df2</th>\n <th>libellé rôle_df1</th>\n <th>libellé rôle_df2</th>\n <th>code genre activité_df1</th>\n <th>code genre activité_df2</th>\n <th>libellé genre activité_df1</th>\n <th>libellé genre activité_df2</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>247</th>\n <td>0010002616</td>\n <td>72</td>\n <td>C</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>S</td>\n <td>81157144700010</td>\n <td>010780203</td>\n <td>F010780203</td>\n <td>NaN</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1604</th>\n <td>0019101898</td>\n <td>91</td>\n <td>C</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>S</td>\n <td>77567246200345</td>\n <td>010780609</td>\n <td>F010780609</td>\n <td>NaN</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1628</th>\n <td>0019102417</td>\n <td>91</td>\n <td>C</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>L</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>C01910241700</td>\n <td>FON-01</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>Titulaire de cabinet</td>\n <td>Titulaire de cabinet</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2010</th>\n <td>0019300995</td>\n <td>93</td>\n <td>C</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>S</td>\n <td>77554456200611</td>\n <td>010784262</td>\n <td>F010784262</td>\n <td>NaN</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2270</th>\n <td>0019303460</td>\n <td>93</td>\n <td>C</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>S</td>\n <td>77554456200116</td>\n <td>010780591</td>\n <td>F010780591</td>\n <td>NaN</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>2217837</th>\n <td>810109452887</td>\n <td>99</td>\n <td>C</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>S</td>\n <td>NaN</td>\n <td>970404372</td>\n <td>F970404372</td>\n <td>312</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>Autre professionnel</td>\n <td>Autre professionnel</td>\n <td>GENR12</td>\n <td>GENR12</td>\n <td>Encadrement et orga accompagnt social/médico-soc</td>\n <td>Encadrement et orga accompagnt social/médico-soc</td>\n </tr>\n <tr>\n <th>2217838</th>\n <td>810109452895</td>\n <td>99</td>\n <td>C</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>S</td>\n <td>NaN</td>\n <td>440015857</td>\n <td>F44001585701042021</td>\n <td>317</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>Préparateur en pharmacie (officine)</td>\n <td>Préparateur en pharmacie (officine)</td>\n <td>GENR01</td>\n <td>GENR01</td>\n <td>Activité standard de soin ou de pharmacien</td>\n <td>Activité standard de soin ou de pharmacien</td>\n </tr>\n <tr>\n <th>2217850</th>\n <td>810109453018</td>\n <td>99</td>\n <td>C</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>S</td>\n <td>81899316400016</td>\n <td>770024271</td>\n <td>F770024271</td>\n <td>312</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>Autre professionnel</td>\n <td>Autre professionnel</td>\n <td>GENR08</td>\n <td>GENR08</td>\n <td>Activité de coordination et d'orientation</td>\n <td>Activité de coordination et d'orientation</td>\n </tr>\n <tr>\n <th>2217851</th>\n <td>810109453026</td>\n <td>99</td>\n <td>C</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>S</td>\n <td>82459575500011</td>\n <td>690043179</td>\n <td>F690043179</td>\n <td>312</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>Autre professionnel</td>\n <td>Autre professionnel</td>\n <td>GENR12</td>\n <td>GENR12</td>\n <td>Encadrement et orga accompagnt social/médico-soc</td>\n <td>Encadrement et orga accompagnt social/médico-soc</td>\n </tr>\n <tr>\n <th>2217856</th>\n <td>810109453075</td>\n <td>99</td>\n <td>C</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>S</td>\n <td>NaN</td>\n <td>190007500</td>\n <td>F19000750001102007</td>\n <td>317</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>Préparateur en pharmacie (officine)</td>\n <td>Préparateur en pharmacie (officine)</td>\n <td>GENR01</td>\n <td>GENR01</td>\n <td>Activité standard de soin ou de pharmacien</td>\n <td>Activité standard de soin ou de pharmacien</td>\n </tr>\n </tbody>\n</table>\n<p>117966 rows × 82 columns</p>\n</div>"
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mismatch"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-07T16:50:11.736073Z",
"start_time": "2024-03-07T16:50:11.584819Z"
}
},
"id": "9331dbcc33b567fd",
"execution_count": 62
},
{
"cell_type": "code",
"outputs": [
{
"data": {
"text/plain": "(234567, 57)"
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compare.df1_unq_rows.shape"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-07T16:50:11.747805Z",
"start_time": "2024-03-07T16:50:11.739092Z"
}
},
"id": "f38ecf439538fc9b",
"execution_count": 63
},
{
"cell_type": "code",
"outputs": [
{
"data": {
"text/plain": "(255343, 57)"
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compare.df2_unq_rows.shape"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-07T16:50:11.765984Z",
"start_time": "2024-03-07T16:50:11.752578Z"
}
},
"id": "b0a4c80da0847ac0",
"execution_count": 64
},
{
"cell_type": "code",
"outputs": [
{
"data": {
"text/plain": "(1976330, 57)"
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.shape"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-07T16:50:11.783138Z",
"start_time": "2024-03-07T16:50:11.769029Z"
}
},
"id": "b9aa33151fa6f235",
"execution_count": 65
},
{
"cell_type": "code",
"outputs": [
{
"data": {
"text/plain": "(1997106, 57)"
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.shape"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-03-07T16:50:11.795092Z",
"start_time": "2024-03-07T16:50:11.785223Z"
}
},
"id": "aaa69421db146ed7",
"execution_count": 66
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}