diff --git a/.gitignore b/.gitignore index 0869605..f533e4f 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ ENV/ *.zip *.txt *.csv +*.xlsx diff --git a/Professionals_Activities_Inconsistencies.ipynb b/Professionals_Activities_Inconsistencies.ipynb index 39dde05..81de874 100644 --- a/Professionals_Activities_Inconsistencies.ipynb +++ b/Professionals_Activities_Inconsistencies.ipynb @@ -1,30 +1,15 @@ { "cells": [ { + "cell_type": "code", + "execution_count": 57, + "id": "58db5082e27759f7", "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:45:53.794236Z", "start_time": "2025-08-19T22:45:51.445477Z" } }, - "cell_type": "code", - "source": [ - "%%time\n", - "import polars as pd\n", - "import csv\n", - "folder = \"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\\"\n", - "input_file = \"Table_Réf_Professionnels_250815.txt\"\n", - "output_file = \"Table_Réf_Professionnels_inconsistencies\"\n", - "output_extension = \".csv\"\n", - "df = pd.read_csv(f\"{folder}{input_file}\",\n", - " separator='|',\n", - " quote_char=None,\n", - " null_values='',\n", - " infer_schema_length=0) # Read all columns as strings\n", - "df = df.with_row_index('index')\n", - "df.columns\n" - ], - "id": "58db5082e27759f7", "outputs": [ { "name": "stdout", @@ -102,32 +87,33 @@ "output_type": "execute_result" } ], - "execution_count": 57 + "source": [ + "%%time\n", + "import polars as pd\n", + "import csv\n", + "folder = \"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\\"\n", + "input_file = \"Table_Réf_Professionnels_260319.txt\"\n", + "output_file = \"Table_Réf_Professionnels_inconsistencies_new\"\n", + "output_extension = \".csv\"\n", + "df = pd.read_csv(f\"{folder}{input_file}\",\n", + " separator='|',\n", + " quote_char=None,\n", + " null_values='',\n", + " infer_schema_length=0) # Read all columns as strings\n", + "df = df.with_row_index('index')\n", + "df.columns\n" + ] }, { + "cell_type": "code", + "execution_count": 58, + "id": "7d9b7562c09955", "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:45:54.952210Z", "start_time": "2025-08-19T22:45:53.873718Z" } }, - "cell_type": "code", - "source": [ - "%%time\n", - "df1 = (\n", - " df.with_columns(\n", - " (pd.col(\"Nom d'exercice\") + \" \" + pd.col(\"Prénom d'exercice\")).alias(\"Nom_Prénom\")\n", - " )\n", - " .with_columns(\n", - " pd.col(\"Nom_Prénom\").n_unique().over(\"Identifiant PP\").alias(\"Count\")\n", - " )\n", - " .filter(pd.col(\"Count\") > 1)\n", - " .sort([\"Identifiant PP\", \"index\"])\n", - " .select(\"index\", \"Identifiant PP\", \"Count\", \"Nom d'exercice\", \"Prénom d'exercice\")\n", - ")\n", - "df1\n" - ], - "id": "7d9b7562c09955", "outputs": [ { "name": "stdout", @@ -139,6 +125,16 @@ }, { "data": { + "text/html": [ + "
\n", + "shape: (9_108, 5)
indexIdentifiant PPCountNom d'exercicePrénom d'exercice
u32stru32strstr
1352933"10000034180"2"DUWAT-GEORGES""GHISLAINE"
1352934"10000034180"2"GEORGES""GHISLAINE"
261"10000040062"2"MEYER""Nicolas"
262"10000040062"2"MEYER""Nicolas"
811196"10000040062"2"MEYER""NICOLAS"
2162425"10111110721"2"BARREAU""Nadège"
268237"10111112636"2"GIRAUDET""MEGGIE"
1892318"10111112636"2"GIRAUDET""Meggie"
269544"10111320304"2"Sengel""Coralie"
1352396"10111320304"2"SENGEL""Coralie"
" + ], "text/plain": [ "shape: (9_108, 5)\n", "┌─────────┬────────────────┬───────┬────────────────┬───────────────────┐\n", @@ -158,16 +154,6 @@ "│ 269544 ┆ 10111320304 ┆ 2 ┆ Sengel ┆ Coralie │\n", "│ 1352396 ┆ 10111320304 ┆ 2 ┆ SENGEL ┆ Coralie │\n", "└─────────┴────────────────┴───────┴────────────────┴───────────────────┘" - ], - "text/html": [ - "
\n", - "shape: (9_108, 5)
indexIdentifiant PPCountNom d'exercicePrénom d'exercice
u32stru32strstr
1352933"10000034180"2"DUWAT-GEORGES""GHISLAINE"
1352934"10000034180"2"GEORGES""GHISLAINE"
261"10000040062"2"MEYER""Nicolas"
262"10000040062"2"MEYER""Nicolas"
811196"10000040062"2"MEYER""NICOLAS"
2162425"10111110721"2"BARREAU""Nadège"
268237"10111112636"2"GIRAUDET""MEGGIE"
1892318"10111112636"2"GIRAUDET""Meggie"
269544"10111320304"2"Sengel""Coralie"
1352396"10111320304"2"SENGEL""Coralie"
" ] }, "execution_count": 58, @@ -175,39 +161,11 @@ "output_type": "execute_result" } ], - "execution_count": 58 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-19T22:45:55.090712Z", - "start_time": "2025-08-19T22:45:55.072647Z" - } - }, - "cell_type": "code", - "source": [ - "df1.write_csv(f\"{folder}{output_file}-Names_Variations_Strict{output_extension}\",\n", - " separator='|',\n", - " quote_style=\"never\",\n", - " line_terminator='\\n')\n" - ], - "id": "c418a6ea7abd77b", - "outputs": [], - "execution_count": 59 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-19T22:45:56.962873Z", - "start_time": "2025-08-19T22:45:55.259223Z" - } - }, - "cell_type": "code", "source": [ "%%time\n", - "df2 = (\n", + "df1 = (\n", " df.with_columns(\n", - " (pd.col(\"Nom d'exercice\").str.to_lowercase() + \" \" + pd.col(\"Prénom d'exercice\").str.to_lowercase()).alias(\"Nom_Prénom\")\n", + " (pd.col(\"Nom d'exercice\") + \" \" + pd.col(\"Prénom d'exercice\")).alias(\"Nom_Prénom\")\n", " )\n", " .with_columns(\n", " pd.col(\"Nom_Prénom\").n_unique().over(\"Identifiant PP\").alias(\"Count\")\n", @@ -216,9 +174,37 @@ " .sort([\"Identifiant PP\", \"index\"])\n", " .select(\"index\", \"Identifiant PP\", \"Count\", \"Nom d'exercice\", \"Prénom d'exercice\")\n", ")\n", - "df2\n" - ], + "df1\n" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "c418a6ea7abd77b", + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-19T22:45:55.090712Z", + "start_time": "2025-08-19T22:45:55.072647Z" + } + }, + "outputs": [], + "source": [ + "df1.write_csv(f\"{folder}{output_file}-Names_Variations_Strict{output_extension}\",\n", + " separator='|',\n", + " quote_style=\"never\",\n", + " line_terminator='\\n')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, "id": "9d94b716364356c7", + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-19T22:45:56.962873Z", + "start_time": "2025-08-19T22:45:55.259223Z" + } + }, "outputs": [ { "name": "stdout", @@ -230,6 +216,16 @@ }, { "data": { + "text/html": [ + "
\n", + "shape: (5_426, 5)
indexIdentifiant PPCountNom d'exercicePrénom d'exercice
u32stru32strstr
1352933"10000034180"2"DUWAT-GEORGES""GHISLAINE"
1352934"10000034180"2"GEORGES""GHISLAINE"
1353009"10000046051"2"STUDER""AGNES"
1623173"10000046051"2"JURION""AGNES"
270462"10000101518"2"BARREYRE""SANDRINE"
1619731"10110987236"2"ROGIER""MATHILDE"
808810"10111077417"2"DOUVIER""FRANCETTE"
2161999"10111077417"2"D'ELLOY""FRANCETTE"
538415"10111110721"2"ROCHEPEAU""Nadège"
2162425"10111110721"2"BARREAU""Nadège"
" + ], "text/plain": [ "shape: (5_426, 5)\n", "┌─────────┬────────────────┬───────┬────────────────┬───────────────────┐\n", @@ -249,7 +245,68 @@ "│ 538415 ┆ 10111110721 ┆ 2 ┆ ROCHEPEAU ┆ Nadège │\n", "│ 2162425 ┆ 10111110721 ┆ 2 ┆ BARREAU ┆ Nadège │\n", "└─────────┴────────────────┴───────┴────────────────┴───────────────────┘" - ], + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "df2 = (\n", + " df.with_columns(\n", + " (pd.col(\"Nom d'exercice\").str.to_lowercase() + \" \" + pd.col(\"Prénom d'exercice\").str.to_lowercase()).alias(\"Nom_Prénom\")\n", + " )\n", + " .with_columns(\n", + " pd.col(\"Nom_Prénom\").n_unique().over(\"Identifiant PP\").alias(\"Count\")\n", + " )\n", + " .filter(pd.col(\"Count\") > 1)\n", + " .sort([\"Identifiant PP\", \"index\"])\n", + " .select(\"index\", \"Identifiant PP\", \"Count\", \"Nom d'exercice\", \"Prénom d'exercice\")\n", + ")\n", + "df2\n" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "18aab4499103491a", + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-19T22:45:57.433036Z", + "start_time": "2025-08-19T22:45:57.417970Z" + } + }, + "outputs": [], + "source": [ + "df2.write_csv(f\"{folder}{output_file}-Names_Variations_Insensitive{output_extension}\",\n", + " separator='|',\n", + " quote_style=\"never\",\n", + " line_terminator='\\n')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "8e4e3e22f16fea1c", + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-19T22:46:02.915526Z", + "start_time": "2025-08-19T22:45:57.710258Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: total: 8.59 s\n", + "Wall time: 5.19 s\n" + ] + }, + { + "data": { "text/html": [ "
\n", - "shape: (5_426, 5)
indexIdentifiant PPCountNom d'exercicePrénom d'exercice
u32stru32strstr
1352933"10000034180"2"DUWAT-GEORGES""GHISLAINE"
1352934"10000034180"2"GEORGES""GHISLAINE"
1353009"10000046051"2"STUDER""AGNES"
1623173"10000046051"2"JURION""AGNES"
270462"10000101518"2"BARREYRE""SANDRINE"
1619731"10110987236"2"ROGIER""MATHILDE"
808810"10111077417"2"DOUVIER""FRANCETTE"
2161999"10111077417"2"D'ELLOY""FRANCETTE"
538415"10111110721"2"ROCHEPEAU""Nadège"
2162425"10111110721"2"BARREAU""Nadège"
" + "shape: (3_584, 6)
indexIdentifiant PPCountNom d'exercicePrénom d'exerciceNom_Prénom_Nettoyé
u32stru32strstrstr
1352933"10000034180"2"DUWAT-GEORGES""GHISLAINE""duwat georges ghislaine"
1352934"10000034180"2"GEORGES""GHISLAINE""georges ghislaine"
1353009"10000046051"2"STUDER""AGNES""studer agnes"
1623173"10000046051"2"JURION""AGNES""jurion agnes"
270462"10000101518"2"BARREYRE""SANDRINE""barreyre sandrine"
1619731"10110987236"2"ROGIER""MATHILDE""rogier mathilde"
808810"10111077417"2"DOUVIER""FRANCETTE""douvier francette"
2161999"10111077417"2"D'ELLOY""FRANCETTE""d elloy francette"
538415"10111110721"2"ROCHEPEAU""Nadège""rochepeau nadege"
2162425"10111110721"2"BARREAU""Nadège""barreau nadege"
" + ], + "text/plain": [ + "shape: (3_584, 6)\n", + "┌─────────┬────────────────┬───────┬────────────────┬───────────────────┬─────────────────────────┐\n", + "│ index ┆ Identifiant PP ┆ Count ┆ Nom d'exercice ┆ Prénom d'exercice ┆ Nom_Prénom_Nettoyé │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ u32 ┆ str ┆ u32 ┆ str ┆ str ┆ str │\n", + "╞═════════╪════════════════╪═══════╪════════════════╪═══════════════════╪═════════════════════════╡\n", + "│ 1352933 ┆ 10000034180 ┆ 2 ┆ DUWAT-GEORGES ┆ GHISLAINE ┆ duwat georges ghislaine │\n", + "│ 1352934 ┆ 10000034180 ┆ 2 ┆ GEORGES ┆ GHISLAINE ┆ georges ghislaine │\n", + "│ 1353009 ┆ 10000046051 ┆ 2 ┆ STUDER ┆ AGNES ┆ studer agnes │\n", + "│ 1623173 ┆ 10000046051 ┆ 2 ┆ JURION ┆ AGNES ┆ jurion agnes │\n", + "│ 270462 ┆ 10000101518 ┆ 2 ┆ BARREYRE ┆ SANDRINE ┆ barreyre sandrine │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 1619731 ┆ 10110987236 ┆ 2 ┆ ROGIER ┆ MATHILDE ┆ rogier mathilde │\n", + "│ 808810 ┆ 10111077417 ┆ 2 ┆ DOUVIER ┆ FRANCETTE ┆ douvier francette │\n", + "│ 2161999 ┆ 10111077417 ┆ 2 ┆ D'ELLOY ┆ FRANCETTE ┆ d elloy francette │\n", + "│ 538415 ┆ 10111110721 ┆ 2 ┆ ROCHEPEAU ┆ Nadège ┆ rochepeau nadege │\n", + "│ 2162425 ┆ 10111110721 ┆ 2 ┆ BARREAU ┆ Nadège ┆ barreau nadege │\n", + "└─────────┴────────────────┴───────┴────────────────┴───────────────────┴─────────────────────────┘" ] }, - "execution_count": 60, + "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], - "execution_count": 60 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-19T22:45:57.433036Z", - "start_time": "2025-08-19T22:45:57.417970Z" - } - }, - "cell_type": "code", - "source": [ - "df2.write_csv(f\"{folder}{output_file}-Names_Variations_Insensitive{output_extension}\",\n", - " separator='|',\n", - " quote_style=\"never\",\n", - " line_terminator='\\n')\n" - ], - "id": "18aab4499103491a", - "outputs": [], - "execution_count": 61 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-19T22:46:02.915526Z", - "start_time": "2025-08-19T22:45:57.710258Z" - } - }, - "cell_type": "code", "source": [ "%%time\n", "df3 = (\n", @@ -325,121 +374,36 @@ " .select(\"index\", \"Identifiant PP\", \"Count\", \"Nom d'exercice\", \"Prénom d'exercice\", \"Nom_Prénom_Nettoyé\")\n", ")\n", "df3\n" - ], - "id": "8e4e3e22f16fea1c", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: total: 8.59 s\n", - "Wall time: 5.19 s\n" - ] - }, - { - "data": { - "text/plain": [ - "shape: (3_584, 6)\n", - "┌─────────┬────────────────┬───────┬────────────────┬───────────────────┬─────────────────────────┐\n", - "│ index ┆ Identifiant PP ┆ Count ┆ Nom d'exercice ┆ Prénom d'exercice ┆ Nom_Prénom_Nettoyé │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ u32 ┆ str ┆ u32 ┆ str ┆ str ┆ str │\n", - "╞═════════╪════════════════╪═══════╪════════════════╪═══════════════════╪═════════════════════════╡\n", - "│ 1352933 ┆ 10000034180 ┆ 2 ┆ DUWAT-GEORGES ┆ GHISLAINE ┆ duwat georges ghislaine │\n", - "│ 1352934 ┆ 10000034180 ┆ 2 ┆ GEORGES ┆ GHISLAINE ┆ georges ghislaine │\n", - "│ 1353009 ┆ 10000046051 ┆ 2 ┆ STUDER ┆ AGNES ┆ studer agnes │\n", - "│ 1623173 ┆ 10000046051 ┆ 2 ┆ JURION ┆ AGNES ┆ jurion agnes │\n", - "│ 270462 ┆ 10000101518 ┆ 2 ┆ BARREYRE ┆ SANDRINE ┆ barreyre sandrine │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 1619731 ┆ 10110987236 ┆ 2 ┆ ROGIER ┆ MATHILDE ┆ rogier mathilde │\n", - "│ 808810 ┆ 10111077417 ┆ 2 ┆ DOUVIER ┆ FRANCETTE ┆ douvier francette │\n", - "│ 2161999 ┆ 10111077417 ┆ 2 ┆ D'ELLOY ┆ FRANCETTE ┆ d elloy francette │\n", - "│ 538415 ┆ 10111110721 ┆ 2 ┆ ROCHEPEAU ┆ Nadège ┆ rochepeau nadege │\n", - "│ 2162425 ┆ 10111110721 ┆ 2 ┆ BARREAU ┆ Nadège ┆ barreau nadege │\n", - "└─────────┴────────────────┴───────┴────────────────┴───────────────────┴─────────────────────────┘" - ], - "text/html": [ - "
\n", - "shape: (3_584, 6)
indexIdentifiant PPCountNom d'exercicePrénom d'exerciceNom_Prénom_Nettoyé
u32stru32strstrstr
1352933"10000034180"2"DUWAT-GEORGES""GHISLAINE""duwat georges ghislaine"
1352934"10000034180"2"GEORGES""GHISLAINE""georges ghislaine"
1353009"10000046051"2"STUDER""AGNES""studer agnes"
1623173"10000046051"2"JURION""AGNES""jurion agnes"
270462"10000101518"2"BARREYRE""SANDRINE""barreyre sandrine"
1619731"10110987236"2"ROGIER""MATHILDE""rogier mathilde"
808810"10111077417"2"DOUVIER""FRANCETTE""douvier francette"
2161999"10111077417"2"D'ELLOY""FRANCETTE""d elloy francette"
538415"10111110721"2"ROCHEPEAU""Nadège""rochepeau nadege"
2162425"10111110721"2"BARREAU""Nadège""barreau nadege"
" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 62 + ] }, { + "cell_type": "code", + "execution_count": 63, + "id": "aab2ae2e91a7190c", "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:03.290835Z", "start_time": "2025-08-19T22:46:03.280259Z" } }, - "cell_type": "code", + "outputs": [], "source": [ "df3.write_csv(f\"{folder}{output_file}-Names_Variations_Normalized{output_extension}\",\n", " separator='|',\n", " quote_style=\"never\",\n", " line_terminator='\\n')\n" - ], - "id": "aab2ae2e91a7190c", - "outputs": [], - "execution_count": 63 + ] }, { + "cell_type": "code", + "execution_count": 64, + "id": "3c2f2bb5fc3c2a5e", "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:07.814563Z", "start_time": "2025-08-19T22:46:03.493442Z" } }, - "cell_type": "code", - "source": [ - "%%time\n", - "all_columns = df.columns\n", - "start_col = 'Raison sociale site'\n", - "end_col = \"Libellé secteur d'activité\"\n", - "start_col_index = all_columns.index(start_col)\n", - "end_col_index = all_columns.index(end_col)\n", - "site_info_cols = all_columns[start_col_index : end_col_index + 1]\n", - "if \"Autorité d'enregistrement\" in site_info_cols:\n", - " site_info_cols.remove(\"Autorité d'enregistrement\")\n", - "\n", - "df4 = (\n", - " df\n", - " .filter(pd.col('Numéro FINESS site').is_null())\n", - " .filter(\n", - " pd.col('Numéro SIRET site').is_not_null() | pd.col('Identifiant technique de la structure').is_not_null()\n", - " )\n", - " .with_columns(\n", - " pd.coalesce(\n", - " pd.col('Numéro SIRET site'),\n", - " pd.col('Identifiant technique de la structure')\n", - " ).alias('Site_Identifier')\n", - " )\n", - " .with_columns(\n", - " pd.struct(site_info_cols).n_unique().over(['Identifiant PP', 'Site_Identifier']).alias('Site_Info_Variations_Count')\n", - " )\n", - " .filter(pd.col('Site_Info_Variations_Count') > 1)\n", - " .sort(['Identifiant PP', 'Site_Identifier', 'index'])\n", - " .select(['index', \n", - " 'Identifiant PP', \n", - " 'Site_Identifier', \n", - " 'Numéro SIRET site', \n", - " 'Identifiant technique de la structure', \n", - " 'Site_Info_Variations_Count'] + site_info_cols)\n", - ")\n", - "df4" - ], - "id": "3c2f2bb5fc3c2a5e", "outputs": [ { "name": "stdout", @@ -451,6 +415,16 @@ }, { "data": { + "text/html": [ + "
\n", + "shape: (98, 31)
indexIdentifiant PPSite_IdentifierNuméro SIRET siteIdentifiant technique de la structureSite_Info_Variations_CountRaison sociale siteEnseigne commerciale siteComplément destinataire (coord. structure)Complément point géographique (coord. structure)Numéro Voie (coord. structure)Indice répétition voie (coord. structure)Code type de voie (coord. structure)Libellé type de voie (coord. structure)Libellé Voie (coord. structure)Mention distribution (coord. structure)Bureau cedex (coord. structure)Code postal (coord. structure)Code commune (coord. structure)Libellé commune (coord. structure)Code pays (coord. structure)Libellé pays (coord. structure)Téléphone (coord. structure)Téléphone 2 (coord. structure)Télécopie (coord. structure)Adresse e-mail (coord. structure)Code Département (structure)Libellé Département (structure)Ancien identifiant de la structureCode secteur d'activitéLibellé secteur d'activité
u32strstrstrstru32strstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstr
270597"10000116383""30980220500505""30980220500505""R10100000198782"2"OEUVRES HOSPITALIERES FRANCAIS…"ORDRE DE MALTE FRANCE""ORDRE DE MALTE FRANCE"null"42"nullnullnull"RUE DES VOLONTAIRES"null"75015 PARIS""75015""75056""Paris""99000""France"nullnullnullnullnullnull"330980220500505""SA28""Asso et orga humanitaire"
1353470"10000116383""30980220500505""30980220500505""R10100000779807"2"OEUVRE HOSP FRANC DE L'ORDRE D…nullnullnull"49"null"R""Rue""DE LA CHAPELLE"null"75018 PARIS 18E  ARRONDISSEMEN…"75018""75118""Paris 18e  Arrondissement"nullnullnullnullnullnullnullnull"330980220500505""SA28""Asso et orga humanitaire"
4214"10000536309""18003502402369""18003502402369""R10100000050224"2"DRSM PAYS DE LA LOIREELSM 44""SITE NANTES"nullnull"9"null"R""Rue""GAETAN RONDEAU""BP""44203 NANTES""44203""44109""Nantes"nullnullnullnullnullnullnullnull"318003502402369""SA24""Organisme de Sécurité Sociale"
1627221"10000536309""18003502402369""18003502402369""R10100000049799"2"DRSM PAYS DE LOIREELSM 49""SITE CHOLET"nullnull"2"null"R""Rue""SAINT ELOI""BP""49321 CHOLET""49321""49099""Cholet"nullnullnullnullnullnullnullnull"318003502402369""SA24""Organisme de Sécurité Sociale"
816501"10000667187""18003502402369""18003502402369""R10100000049794"3"DRSM NORD PICARDIEELSM 59""SITE MAUBEUGE"nullnullnullnull"PL""Place""DE WATTIGNIES""BP""59603 MAUBEUGE""59603""59392""Maubeuge"nullnullnullnullnullnullnullnull"318003502402369""SA24""Organisme de Sécurité Sociale"
532248"10110592168""88085935000014""88085935000014""R10100000325887"2"FAREVA PAU"nullnullnullnullnull"AV""Avenue""DU BEARN"null"64320 IDRON""64320""64269""Idron"nullnull"0559402100"null"0559402119"nullnullnull"388085935000014""SA32""Fab. Exploit. Import. Méd. DM"
1344743"10110597498""13000800600038""13000800600038""R10100000097229"2"AGENCE REGIONALE SANTE PAYS LO…nullnullnull"17"null"BD""Boulevard""GASTON DOUMERGUE"null"44262 NANTES""44262""44109""Nantes"nullnullnullnullnullnullnullnull"313000800600038""SA24""Organisme de Sécurité Sociale"
2156205"10110597498""13000800600038""13000800600038""R10100000097229"2"AGENCE REGIONALE SANTE PAYS LO…null"DELEGATION TERRITORIALE"null"2"null"BD""Boulevard""MURAT"null"53000 LAVAL""53000""53130""Laval"nullnullnullnullnullnullnullnull"313000800600038""SA24""Organisme de Sécurité Sociale"
1618789"10110910345""18003502401098""18003502401098""R10100000398898"2"CAISSE NATIONALE DE L'ASSURANC…"DRSM DIRECTION REG. DU SERVICE…"QUARTIER DU LAC"null"80"nullnullnull"AVENUE DE LA JALLERE""BP 260""33300 BORDEAUX""33300""33063""Bordeaux""99000""France"nullnullnullnullnullnull"318003502401098""SA24""Organisme de Sécurité Sociale"
1889827"10110910345""18003502401098""18003502401098""R10100000398898"2"CAISSE NATIONALE DE L'ASSURANC…"DRSM DIRECTION REG. DU SERVICE…nullnull"207"null"R""Rue""FONTAINEBLEAU""BP""40011 MONT-DE-MARSAN""40011""40192""Mont-de-Marsan"nullnullnullnullnullnullnullnull"318003502401098""SA24""Organisme de Sécurité Sociale"
" + ], "text/plain": [ "shape: (98, 31)\n", "┌─────────┬────────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n", @@ -501,16 +475,6 @@ "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n", "└─────────┴────────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘" - ], - "text/html": [ - "
\n", - "shape: (98, 31)
indexIdentifiant PPSite_IdentifierNuméro SIRET siteIdentifiant technique de la structureSite_Info_Variations_CountRaison sociale siteEnseigne commerciale siteComplément destinataire (coord. structure)Complément point géographique (coord. structure)Numéro Voie (coord. structure)Indice répétition voie (coord. structure)Code type de voie (coord. structure)Libellé type de voie (coord. structure)Libellé Voie (coord. structure)Mention distribution (coord. structure)Bureau cedex (coord. structure)Code postal (coord. structure)Code commune (coord. structure)Libellé commune (coord. structure)Code pays (coord. structure)Libellé pays (coord. structure)Téléphone (coord. structure)Téléphone 2 (coord. structure)Télécopie (coord. structure)Adresse e-mail (coord. structure)Code Département (structure)Libellé Département (structure)Ancien identifiant de la structureCode secteur d'activitéLibellé secteur d'activité
u32strstrstrstru32strstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstr
270597"10000116383""30980220500505""30980220500505""R10100000198782"2"OEUVRES HOSPITALIERES FRANCAIS…"ORDRE DE MALTE FRANCE""ORDRE DE MALTE FRANCE"null"42"nullnullnull"RUE DES VOLONTAIRES"null"75015 PARIS""75015""75056""Paris""99000""France"nullnullnullnullnullnull"330980220500505""SA28""Asso et orga humanitaire"
1353470"10000116383""30980220500505""30980220500505""R10100000779807"2"OEUVRE HOSP FRANC DE L'ORDRE D…nullnullnull"49"null"R""Rue""DE LA CHAPELLE"null"75018 PARIS 18E  ARRONDISSEMEN…"75018""75118""Paris 18e  Arrondissement"nullnullnullnullnullnullnullnull"330980220500505""SA28""Asso et orga humanitaire"
4214"10000536309""18003502402369""18003502402369""R10100000050224"2"DRSM PAYS DE LA LOIREELSM 44""SITE NANTES"nullnull"9"null"R""Rue""GAETAN RONDEAU""BP""44203 NANTES""44203""44109""Nantes"nullnullnullnullnullnullnullnull"318003502402369""SA24""Organisme de Sécurité Sociale"
1627221"10000536309""18003502402369""18003502402369""R10100000049799"2"DRSM PAYS DE LOIREELSM 49""SITE CHOLET"nullnull"2"null"R""Rue""SAINT ELOI""BP""49321 CHOLET""49321""49099""Cholet"nullnullnullnullnullnullnullnull"318003502402369""SA24""Organisme de Sécurité Sociale"
816501"10000667187""18003502402369""18003502402369""R10100000049794"3"DRSM NORD PICARDIEELSM 59""SITE MAUBEUGE"nullnullnullnull"PL""Place""DE WATTIGNIES""BP""59603 MAUBEUGE""59603""59392""Maubeuge"nullnullnullnullnullnullnullnull"318003502402369""SA24""Organisme de Sécurité Sociale"
532248"10110592168""88085935000014""88085935000014""R10100000325887"2"FAREVA PAU"nullnullnullnullnull"AV""Avenue""DU BEARN"null"64320 IDRON""64320""64269""Idron"nullnull"0559402100"null"0559402119"nullnullnull"388085935000014""SA32""Fab. Exploit. Import. Méd. DM"
1344743"10110597498""13000800600038""13000800600038""R10100000097229"2"AGENCE REGIONALE SANTE PAYS LO…nullnullnull"17"null"BD""Boulevard""GASTON DOUMERGUE"null"44262 NANTES""44262""44109""Nantes"nullnullnullnullnullnullnullnull"313000800600038""SA24""Organisme de Sécurité Sociale"
2156205"10110597498""13000800600038""13000800600038""R10100000097229"2"AGENCE REGIONALE SANTE PAYS LO…null"DELEGATION TERRITORIALE"null"2"null"BD""Boulevard""MURAT"null"53000 LAVAL""53000""53130""Laval"nullnullnullnullnullnullnullnull"313000800600038""SA24""Organisme de Sécurité Sociale"
1618789"10110910345""18003502401098""18003502401098""R10100000398898"2"CAISSE NATIONALE DE L'ASSURANC…"DRSM DIRECTION REG. DU SERVICE…"QUARTIER DU LAC"null"80"nullnullnull"AVENUE DE LA JALLERE""BP 260""33300 BORDEAUX""33300""33063""Bordeaux""99000""France"nullnullnullnullnullnull"318003502401098""SA24""Organisme de Sécurité Sociale"
1889827"10110910345""18003502401098""18003502401098""R10100000398898"2"CAISSE NATIONALE DE L'ASSURANC…"DRSM DIRECTION REG. DU SERVICE…nullnull"207"null"R""Rue""FONTAINEBLEAU""BP""40011 MONT-DE-MARSAN""40011""40192""Mont-de-Marsan"nullnullnullnullnullnullnullnull"318003502401098""SA24""Organisme de Sécurité Sociale"
" ] }, "execution_count": 64, @@ -518,37 +482,18 @@ "output_type": "execute_result" } ], - "execution_count": 64 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-19T22:46:07.974271Z", - "start_time": "2025-08-19T22:46:07.943280Z" - } - }, - "cell_type": "code", - "source": [ - "df4.write_csv(f\"{folder}{output_file}-Sites_Variations{output_extension}\",\n", - " separator='|',\n", - " quote_style=\"never\",\n", - " line_terminator='\\n')\n" - ], - "id": "c1fd01e419f4ccc9", - "outputs": [], - "execution_count": 65 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-08-19T22:46:12.781888Z", - "start_time": "2025-08-19T22:46:08.306776Z" - } - }, - "cell_type": "code", "source": [ "%%time\n", - "df5 = (\n", + "all_columns = df.columns\n", + "start_col = 'Raison sociale site'\n", + "end_col = \"Libellé secteur d'activité\"\n", + "start_col_index = all_columns.index(start_col)\n", + "end_col_index = all_columns.index(end_col)\n", + "site_info_cols = all_columns[start_col_index : end_col_index + 1]\n", + "if \"Autorité d'enregistrement\" in site_info_cols:\n", + " site_info_cols.remove(\"Autorité d'enregistrement\")\n", + "\n", + "df4 = (\n", " df\n", " .filter(pd.col('Numéro FINESS site').is_null())\n", " .filter(\n", @@ -561,10 +506,10 @@ " ).alias('Site_Identifier')\n", " )\n", " .with_columns(\n", - " pd.struct(site_info_cols).n_unique().over(['Site_Identifier']).alias('Site_Info_Variations_Count')\n", + " pd.struct(site_info_cols).n_unique().over(['Identifiant PP', 'Site_Identifier']).alias('Site_Info_Variations_Count')\n", " )\n", " .filter(pd.col('Site_Info_Variations_Count') > 1)\n", - " .sort(['Site_Identifier', 'index'])\n", + " .sort(['Identifiant PP', 'Site_Identifier', 'index'])\n", " .select(['index', \n", " 'Identifiant PP', \n", " 'Site_Identifier', \n", @@ -572,9 +517,37 @@ " 'Identifiant technique de la structure', \n", " 'Site_Info_Variations_Count'] + site_info_cols)\n", ")\n", - "df5\n" - ], + "df4" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "c1fd01e419f4ccc9", + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-19T22:46:07.974271Z", + "start_time": "2025-08-19T22:46:07.943280Z" + } + }, + "outputs": [], + "source": [ + "df4.write_csv(f\"{folder}{output_file}-Sites_Variations{output_extension}\",\n", + " separator='|',\n", + " quote_style=\"never\",\n", + " line_terminator='\\n')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, "id": "7838523925fc85ee", + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-19T22:46:12.781888Z", + "start_time": "2025-08-19T22:46:08.306776Z" + } + }, "outputs": [ { "name": "stdout", @@ -586,6 +559,16 @@ }, { "data": { + "text/html": [ + "
\n", + "shape: (4_190, 31)
indexIdentifiant PPSite_IdentifierNuméro SIRET siteIdentifiant technique de la structureSite_Info_Variations_CountRaison sociale siteEnseigne commerciale siteComplément destinataire (coord. structure)Complément point géographique (coord. structure)Numéro Voie (coord. structure)Indice répétition voie (coord. structure)Code type de voie (coord. structure)Libellé type de voie (coord. structure)Libellé Voie (coord. structure)Mention distribution (coord. structure)Bureau cedex (coord. structure)Code postal (coord. structure)Code commune (coord. structure)Libellé commune (coord. structure)Code pays (coord. structure)Libellé pays (coord. structure)Téléphone (coord. structure)Téléphone 2 (coord. structure)Télécopie (coord. structure)Adresse e-mail (coord. structure)Code Département (structure)Libellé Département (structure)Ancien identifiant de la structureCode secteur d'activitéLibellé secteur d'activité
u32strstrstrstru32strstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstr
127508"10100002293""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
285182"10001806768""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
466100"10104800411""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
722626"10103687157""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
826390"10001796597""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
793647"10109869403""98452619400019""98452619400019""R10100000673943"2"SELARL CENTRE DE PODOLOGIE SPO…"SELARL CENTRE DE PODOLOGIE SPO…nullnull"8"null"R""Rue""GEORGES NEGREVERGNE"null"33700 MERIGNAC""33700""33281""Mérignac""99000""France"nullnullnullnullnullnull"398452619400019""SA09""Exercice en Société"
1500531"10101293263""98452619400019""98452619400019""R10100000673943"2"SELARL CENTRE DE PODOLOGIE SPO…"SELARL CENTRE DE PODOLOGIE SPO…nullnull"8"null"R""Rue""GEORGES NEGREVERGNE"null"33700 MERIGNAC""33700""33281""Mérignac""99000""France"nullnullnullnullnullnull"398452619400019""SA09""Exercice en Société"
1852947"10108015131""98452619400019""98452619400019""R10100000673943"2"SELARL CENTRE DE PODOLOGIE SPO…"SELARL CENTRE DE PODOLOGIE SPO…nullnull"8"null"R""Rue""GEORGES NEGREVERGNE"null"33700 MERIGNAC""33700""33281""Mérignac""99000""France"nullnullnullnullnullnull"398452619400019""SA09""Exercice en Société"
1571945"10107235243""99882350430834""99882350430834""R10100000554688"2"ADECCO FRANCE""ADECCO"null"PARC VALMY PARK AVENUE BAT A 1…"8""D"nullnull"RUE JEANNE BARRET"null"21000 DIJON""21000""21231""Dijon""99000""France"nullnullnullnullnullnull"399882350430834""SA11""Entreprise d'intérim"
1590681"10108693036""99882350430834""99882350430834""R10100000413248"2"ADECCO MEDICAL"nullnullnull"8""D""R""Rue""JEANNE BARRET"null"21000 DIJON""21000""21231""Dijon""99000""France"nullnullnullnullnullnull"399882350430834""SA11""Entreprise d'intérim"
" + ], "text/plain": [ "shape: (4_190, 31)\n", "┌─────────┬────────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n", @@ -633,16 +616,6 @@ "│ ┆ 6 ┆ 30834 ┆ 30834 ┆ ┆ ┆ 430834 ┆ ┆ e │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ d'intérim │\n", "└─────────┴────────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘" - ], - "text/html": [ - "
\n", - "shape: (4_190, 31)
indexIdentifiant PPSite_IdentifierNuméro SIRET siteIdentifiant technique de la structureSite_Info_Variations_CountRaison sociale siteEnseigne commerciale siteComplément destinataire (coord. structure)Complément point géographique (coord. structure)Numéro Voie (coord. structure)Indice répétition voie (coord. structure)Code type de voie (coord. structure)Libellé type de voie (coord. structure)Libellé Voie (coord. structure)Mention distribution (coord. structure)Bureau cedex (coord. structure)Code postal (coord. structure)Code commune (coord. structure)Libellé commune (coord. structure)Code pays (coord. structure)Libellé pays (coord. structure)Téléphone (coord. structure)Téléphone 2 (coord. structure)Télécopie (coord. structure)Adresse e-mail (coord. structure)Code Département (structure)Libellé Département (structure)Ancien identifiant de la structureCode secteur d'activitéLibellé secteur d'activité
u32strstrstrstru32strstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstr
127508"10100002293""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
285182"10001806768""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
466100"10104800411""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
722626"10103687157""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
826390"10001796597""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
793647"10109869403""98452619400019""98452619400019""R10100000673943"2"SELARL CENTRE DE PODOLOGIE SPO…"SELARL CENTRE DE PODOLOGIE SPO…nullnull"8"null"R""Rue""GEORGES NEGREVERGNE"null"33700 MERIGNAC""33700""33281""Mérignac""99000""France"nullnullnullnullnullnull"398452619400019""SA09""Exercice en Société"
1500531"10101293263""98452619400019""98452619400019""R10100000673943"2"SELARL CENTRE DE PODOLOGIE SPO…"SELARL CENTRE DE PODOLOGIE SPO…nullnull"8"null"R""Rue""GEORGES NEGREVERGNE"null"33700 MERIGNAC""33700""33281""Mérignac""99000""France"nullnullnullnullnullnull"398452619400019""SA09""Exercice en Société"
1852947"10108015131""98452619400019""98452619400019""R10100000673943"2"SELARL CENTRE DE PODOLOGIE SPO…"SELARL CENTRE DE PODOLOGIE SPO…nullnull"8"null"R""Rue""GEORGES NEGREVERGNE"null"33700 MERIGNAC""33700""33281""Mérignac""99000""France"nullnullnullnullnullnull"398452619400019""SA09""Exercice en Société"
1571945"10107235243""99882350430834""99882350430834""R10100000554688"2"ADECCO FRANCE""ADECCO"null"PARC VALMY PARK AVENUE BAT A 1…"8""D"nullnull"RUE JEANNE BARRET"null"21000 DIJON""21000""21231""Dijon""99000""France"nullnullnullnullnullnull"399882350430834""SA11""Entreprise d'intérim"
1590681"10108693036""99882350430834""99882350430834""R10100000413248"2"ADECCO MEDICAL"nullnullnull"8""D""R""Rue""JEANNE BARRET"null"21000 DIJON""21000""21231""Dijon""99000""France"nullnullnullnullnullnull"399882350430834""SA11""Entreprise d'intérim"
" ] }, "execution_count": 66, @@ -650,75 +623,63 @@ "output_type": "execute_result" } ], - "execution_count": 66 + "source": [ + "%%time\n", + "df5 = (\n", + " df\n", + " .filter(pd.col('Numéro FINESS site').is_null())\n", + " .filter(\n", + " pd.col('Numéro SIRET site').is_not_null() | pd.col('Identifiant technique de la structure').is_not_null()\n", + " )\n", + " .with_columns(\n", + " pd.coalesce(\n", + " pd.col('Numéro SIRET site'),\n", + " pd.col('Identifiant technique de la structure')\n", + " ).alias('Site_Identifier')\n", + " )\n", + " .with_columns(\n", + " pd.struct(site_info_cols).n_unique().over(['Site_Identifier']).alias('Site_Info_Variations_Count')\n", + " )\n", + " .filter(pd.col('Site_Info_Variations_Count') > 1)\n", + " .sort(['Site_Identifier', 'index'])\n", + " .select(['index', \n", + " 'Identifiant PP', \n", + " 'Site_Identifier', \n", + " 'Numéro SIRET site', \n", + " 'Identifiant technique de la structure', \n", + " 'Site_Info_Variations_Count'] + site_info_cols)\n", + ")\n", + "df5\n" + ] }, { + "cell_type": "code", + "execution_count": 67, + "id": "416184f32f973a71", "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:13.670911Z", "start_time": "2025-08-19T22:46:13.655386Z" } }, - "cell_type": "code", + "outputs": [], "source": [ "df5.write_csv(f\"{folder}{output_file}-Sites_Variations_Global{output_extension}\",\n", " separator='|',\n", " quote_style=\"never\",\n", " line_terminator='\\n')\n" - ], - "id": "416184f32f973a71", - "outputs": [], - "execution_count": 67 + ] }, { + "cell_type": "code", + "execution_count": 68, + "id": "84549f83ce5e92f", "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:17.023811Z", "start_time": "2025-08-19T22:46:14.032470Z" } }, - "cell_type": "code", - "source": [ - "%%time\n", - "df6 = (\n", - " df\n", - " .with_columns(\n", - " pd.coalesce(\n", - " pd.col('Numéro FINESS site'),\n", - " pd.col('Numéro SIRET site'),\n", - " pd.col('Identifiant technique de la structure')\n", - " ).alias('Site_Identifier_Global')\n", - " )\n", - " .filter(pd.col('Site_Identifier_Global').is_not_null())\n", - " .with_columns(\n", - " pd.struct([\n", - " \"Libellé profession\",\n", - " \"Libellé savoir-faire\",\n", - " \"Libellé mode exercice\",\n", - " \"Libellé rôle\",\n", - " \"Libellé genre activité\"\n", - " ]).n_unique().over([\"Identifiant PP\", \"Site_Identifier_Global\"]).alias(\"Activites_Count\")\n", - " )\n", - " .filter(pd.col(\"Activites_Count\") > 1)\n", - " .sort([\"Identifiant PP\", \"Site_Identifier_Global\", \"index\"])\n", - " .select([\n", - " \"index\",\n", - " \"Identifiant PP\",\n", - " \"Site_Identifier_Global\",\n", - " \"Numéro FINESS site\",\n", - " \"Numéro SIRET site\",\n", - " \"Identifiant technique de la structure\",\n", - " \"Activites_Count\",\n", - " \"Libellé profession\",\n", - " \"Libellé savoir-faire\",\n", - " \"Libellé mode exercice\",\n", - " \"Libellé rôle\",\n", - " \"Libellé genre activité\"\n", - " ])\n", - ")\n", - "df6\n" - ], - "id": "84549f83ce5e92f", "outputs": [ { "name": "stdout", @@ -730,6 +691,16 @@ }, { "data": { + "text/html": [ + "
\n", + "shape: (25_389, 12)
indexIdentifiant PPSite_Identifier_GlobalNuméro FINESS siteNuméro SIRET siteIdentifiant technique de la structureActivites_CountLibellé professionLibellé savoir-faireLibellé mode exerciceLibellé rôleLibellé genre activité
u32strstrstrstrstru32strstrstrstrstr
1352840"10000017979""130786445""130786445""30247736900011""F130786445"2"Ostéopathe"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
1893982"10000017979""130786445""130786445""30247736900011""F130786445"2"Sage-Femme"null"Salarié""Fonction non définie""Activité standard de soin ou d…
270269"10000070283""39784090100011"null"39784090100011""R10000002500225"2"Chirurgien-Dentiste"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
811380"10000070283""39784090100011"null"39784090100011""R10000002500225"2"Chirurgien-Dentiste"null"Salarié""Fonction non définie""Activité non soignante"
540586"10000086842""860012228""860012228""13001256000038""F860012228"2"Psychologue"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
538572"10111123542""970400016""970400016""26974214400034""F970400016"2"Psychologue"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
538882"10111252994""250006954""250006954""26250176000264""F250006954"2"Psychothérapeute"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
2162932"10111252994""250006954""250006954""26250176000264""F250006954"2"Psychologue"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
2163446"10111293121""490540218""490540218""77568873211159""F490540218"2"Infirmier"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
2163447"10111293121""490540218""490540218""77568873211159""F490540218"2"Infirmier"null"Salarié""Cadre de santé de proximité""Activité non soignante"
" + ], "text/plain": [ "shape: (25_389, 12)\n", "┌─────────┬────────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n", @@ -779,16 +750,6 @@ "│ ┆ 1 ┆ ┆ ┆ ┆ ┆ ┆ santé de ┆ non │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ proximité ┆ soignante │\n", "└─────────┴────────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘" - ], - "text/html": [ - "
\n", - "shape: (25_389, 12)
indexIdentifiant PPSite_Identifier_GlobalNuméro FINESS siteNuméro SIRET siteIdentifiant technique de la structureActivites_CountLibellé professionLibellé savoir-faireLibellé mode exerciceLibellé rôleLibellé genre activité
u32strstrstrstrstru32strstrstrstrstr
1352840"10000017979""130786445""130786445""30247736900011""F130786445"2"Ostéopathe"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
1893982"10000017979""130786445""130786445""30247736900011""F130786445"2"Sage-Femme"null"Salarié""Fonction non définie""Activité standard de soin ou d…
270269"10000070283""39784090100011"null"39784090100011""R10000002500225"2"Chirurgien-Dentiste"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
811380"10000070283""39784090100011"null"39784090100011""R10000002500225"2"Chirurgien-Dentiste"null"Salarié""Fonction non définie""Activité non soignante"
540586"10000086842""860012228""860012228""13001256000038""F860012228"2"Psychologue"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
538572"10111123542""970400016""970400016""26974214400034""F970400016"2"Psychologue"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
538882"10111252994""250006954""250006954""26250176000264""F250006954"2"Psychothérapeute"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
2162932"10111252994""250006954""250006954""26250176000264""F250006954"2"Psychologue"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
2163446"10111293121""490540218""490540218""77568873211159""F490540218"2"Infirmier"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
2163447"10111293121""490540218""490540218""77568873211159""F490540218"2"Infirmier"null"Salarié""Cadre de santé de proximité""Activité non soignante"
" ] }, "execution_count": 68, @@ -796,54 +757,75 @@ "output_type": "execute_result" } ], - "execution_count": 68 + "source": [ + "%%time\n", + "df6 = (\n", + " df\n", + " .with_columns(\n", + " pd.coalesce(\n", + " pd.col('Numéro FINESS site'),\n", + " pd.col('Numéro SIRET site'),\n", + " pd.col('Identifiant technique de la structure')\n", + " ).alias('Site_Identifier_Global')\n", + " )\n", + " .filter(pd.col('Site_Identifier_Global').is_not_null())\n", + " .with_columns(\n", + " pd.struct([\n", + " \"Libellé profession\",\n", + " \"Libellé savoir-faire\",\n", + " \"Libellé mode exercice\",\n", + " \"Libellé rôle\",\n", + " \"Libellé genre activité\"\n", + " ]).n_unique().over([\"Identifiant PP\", \"Site_Identifier_Global\"]).alias(\"Activites_Count\")\n", + " )\n", + " .filter(pd.col(\"Activites_Count\") > 1)\n", + " .sort([\"Identifiant PP\", \"Site_Identifier_Global\", \"index\"])\n", + " .select([\n", + " \"index\",\n", + " \"Identifiant PP\",\n", + " \"Site_Identifier_Global\",\n", + " \"Numéro FINESS site\",\n", + " \"Numéro SIRET site\",\n", + " \"Identifiant technique de la structure\",\n", + " \"Activites_Count\",\n", + " \"Libellé profession\",\n", + " \"Libellé savoir-faire\",\n", + " \"Libellé mode exercice\",\n", + " \"Libellé rôle\",\n", + " \"Libellé genre activité\"\n", + " ])\n", + ")\n", + "df6\n" + ] }, { + "cell_type": "code", + "execution_count": 69, + "id": "6f7025a7c08b54b4", "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:17.851427Z", "start_time": "2025-08-19T22:46:17.796168Z" } }, - "cell_type": "code", + "outputs": [], "source": [ "df6.write_csv(f\"{folder}{output_file}-Multiple_Activities_Per_Site{output_extension}\",\n", " separator='|',\n", " quote_style=\"never\",\n", " line_terminator='\\n')\n" - ], - "id": "6f7025a7c08b54b4", - "outputs": [], - "execution_count": 69 + ] }, { + "cell_type": "code", + "execution_count": 70, + "id": "b18d9ba71ba63d9d", "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:19.535052Z", "start_time": "2025-08-19T22:46:18.015194Z" } }, - "cell_type": "code", - "source": [ - "%%time\n", - "df7 = (\n", - " df\n", - " .with_columns(\n", - " pd.col(\"Libellé profession\").n_unique().over(\"Identifiant PP\").alias(\"Profession_Count\")\n", - " )\n", - " .filter(pd.col(\"Profession_Count\") > 1)\n", - " .sort([\"Identifiant PP\", \"index\"])\n", - " .select([\n", - " \"index\",\n", - " \"Identifiant PP\",\n", - " \"Profession_Count\",\n", - " \"Libellé profession\",\n", - " \"Libellé savoir-faire\"\n", - " ])\n", - ")\n", - "df7\n" - ], - "id": "b18d9ba71ba63d9d", "outputs": [ { "name": "stdout", @@ -855,6 +837,16 @@ }, { "data": { + "text/html": [ + "
\n", + "shape: (88_845, 5)
indexIdentifiant PPProfession_CountLibellé professionLibellé savoir-faire
u32stru32strstr
74"10000013150"2"Médecin""Psychiatrie"
269913"10000013150"2"Psychothérapeute"null
1352840"10000017979"2"Ostéopathe"null
1893982"10000017979"2"Sage-Femme"null
811125"10000029966"2"Sage-Femme"null
1352396"10111320304"2"Psychothérapeute"null
269545"10111320379"2"Psychothérapeute"null
810601"10111320379"2"Psychologue"null
539711"10111321468"2"Psychothérapeute"null
1352414"10111321468"2"Psychologue"null
" + ], "text/plain": [ "shape: (88_845, 5)\n", "┌─────────┬────────────────┬──────────────────┬────────────────────┬──────────────────────┐\n", @@ -874,16 +866,6 @@ "│ 539711 ┆ 10111321468 ┆ 2 ┆ Psychothérapeute ┆ null │\n", "│ 1352414 ┆ 10111321468 ┆ 2 ┆ Psychologue ┆ null │\n", "└─────────┴────────────────┴──────────────────┴────────────────────┴──────────────────────┘" - ], - "text/html": [ - "
\n", - "shape: (88_845, 5)
indexIdentifiant PPProfession_CountLibellé professionLibellé savoir-faire
u32stru32strstr
74"10000013150"2"Médecin""Psychiatrie"
269913"10000013150"2"Psychothérapeute"null
1352840"10000017979"2"Ostéopathe"null
1893982"10000017979"2"Sage-Femme"null
811125"10000029966"2"Sage-Femme"null
1352396"10111320304"2"Psychothérapeute"null
269545"10111320379"2"Psychothérapeute"null
810601"10111320379"2"Psychologue"null
539711"10111321468"2"Psychothérapeute"null
1352414"10111321468"2"Psychologue"null
" ] }, "execution_count": 70, @@ -891,32 +873,50 @@ "output_type": "execute_result" } ], - "execution_count": 70 + "source": [ + "%%time\n", + "df7 = (\n", + " df\n", + " .with_columns(\n", + " pd.col(\"Libellé profession\").n_unique().over(\"Identifiant PP\").alias(\"Profession_Count\")\n", + " )\n", + " .filter(pd.col(\"Profession_Count\") > 1)\n", + " .sort([\"Identifiant PP\", \"index\"])\n", + " .select([\n", + " \"index\",\n", + " \"Identifiant PP\",\n", + " \"Profession_Count\",\n", + " \"Libellé profession\",\n", + " \"Libellé savoir-faire\"\n", + " ])\n", + ")\n", + "df7\n" + ] }, { + "cell_type": "code", + "execution_count": 71, + "id": "85be468fd3f461d1", "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:19.893214Z", "start_time": "2025-08-19T22:46:19.851765Z" } }, - "cell_type": "code", + "outputs": [], "source": [ "df7.write_csv(f\"{folder}{output_file}-Multiple_Professions{output_extension}\",\n", " separator='|',\n", " quote_style=\"never\",\n", " line_terminator='\\n')\n" - ], - "id": "85be468fd3f461d1", - "outputs": [], - "execution_count": 71 + ] } ], "metadata": { "kernelspec": { - "name": "python3", + "display_name": "Python 3 (ipykernel)", "language": "python", - "display_name": "Python 3 (ipykernel)" + "name": "python3" } }, "nbformat": 4, diff --git a/Table_Réf_Professionnels_inconsistencies.xlsx b/Table_Réf_Professionnels_inconsistencies.xlsx index 675a028..e6a0861 100644 Binary files a/Table_Réf_Professionnels_inconsistencies.xlsx and b/Table_Réf_Professionnels_inconsistencies.xlsx differ