Files
professionals_from_sante_fr/Professionals.ipynb
2026-03-05 11:11:10 +00:00

176 lines
5.5 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"tags": [],
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": "Index(['Type d'identifiant PP', 'Identifiant PP',\n 'Identification nationale PP', 'Code civilité d'exercice',\n 'Libellé civilité d'exercice', 'Code civilité', 'Libellé civilité',\n 'Nom d'exercice', 'Prénom d'exercice', 'Code profession',\n 'Libellé profession', 'Code catégorie professionnelle',\n 'Libellé catégorie professionnelle', 'Code type savoir-faire',\n 'Libellé type savoir-faire', 'Code savoir-faire',\n 'Libellé savoir-faire', 'Code mode exercice', 'Libellé mode exercice',\n 'Numéro SIRET site', 'Numéro SIREN site', 'Numéro FINESS site',\n 'Numéro FINESS établissement juridique',\n 'Identifiant technique de la structure', 'Raison sociale site',\n 'Enseigne commerciale site',\n 'Complément destinataire (coord. structure)',\n 'Complément point géographique (coord. structure)',\n 'Numéro Voie (coord. structure)',\n 'Indice répétition voie (coord. structure)',\n 'Code type de voie (coord. structure)',\n 'Libellé type de voie (coord. structure)',\n 'Libellé Voie (coord. structure)',\n 'Mention distribution (coord. structure)',\n 'Bureau cedex (coord. structure)', 'Code postal (coord. structure)',\n 'Code commune (coord. structure)', 'Libellé commune (coord. structure)',\n 'Code pays (coord. structure)', 'Libellé pays (coord. structure)',\n 'Téléphone (coord. structure)', 'Téléphone 2 (coord. structure)',\n 'Télécopie (coord. structure)', 'Adresse e-mail (coord. structure)',\n 'Code Département (structure)', 'Libellé Département (structure)',\n 'Ancien identifiant de la structure', 'Autorité d'enregistrement',\n 'Code secteur d'activité', 'Libellé secteur d'activité',\n 'Code section tableau pharmaciens',\n 'Libellé section tableau pharmaciens', 'Unnamed: 52'],\n dtype='object')"
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import csv\n",
"df = pd.read_csv(\"C:\\_temp\\Professionnels\\Table_Réf_Professionnels_220615.csv\", sep='|', doublequote=False, quoting=csv.QUOTE_NONE,\n",
" dtype=str, na_values='', keep_default_na=False)\n",
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"pycharm": {
"name": "#%%\n"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": "(864328, 53)"
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": "(843643, 53)"
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.drop_duplicates().shape"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"pycharm": {
"name": "#%%\n"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": "(812168, 53)"
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.drop_duplicates([\"Nom d'exercice\", \"Prénom d'exercice\", 'Libellé profession', 'Libellé savoir-faire', 'Bureau cedex (coord. structure)']).shape"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": "(814972, 53)"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.drop_duplicates([\"Nom d'exercice\", \"Prénom d'exercice\", 'Libellé profession', 'Libellé savoir-faire', 'Bureau cedex (coord. structure)', \"Identifiant PP\"]).shape"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": "(839643, 53)"
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.drop_duplicates([\"Nom d'exercice\", \"Prénom d'exercice\", 'Libellé profession', 'Libellé savoir-faire', 'Bureau cedex (coord. structure)', \"Identifiant PP\", 'Numéro Voie (coord. structure)', 'Indice répétition voie (coord. structure)', 'Libellé type de voie (coord. structure)', 'Libellé Voie (coord. structure)' , 'Mention distribution (coord. structure)']).shape"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}