{ "cells": [ { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:45:53.794236Z", "start_time": "2025-08-19T22:45:51.445477Z" } }, "cell_type": "code", "source": [ "%%time\n", "import polars as pd\n", "import csv\n", "folder = \"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\\"\n", "input_file = \"Table_Réf_Professionnels_250815.txt\"\n", "output_file = \"Table_Réf_Professionnels_inconsistencies\"\n", "output_extension = \".csv\"\n", "df = pd.read_csv(f\"{folder}{input_file}\",\n", " separator='|',\n", " quote_char=None,\n", " null_values='',\n", " infer_schema_length=0) # Read all columns as strings\n", "df = df.with_row_index('index')\n", "df.columns\n" ], "id": "58db5082e27759f7", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 13 s\n", "Wall time: 2.33 s\n" ] }, { "data": { "text/plain": [ "['index',\n", " \"Type d'identifiant PP\",\n", " 'Identifiant PP',\n", " 'Identification nationale PP',\n", " \"Code civilité d'exercice\",\n", " \"Libellé civilité d'exercice\",\n", " 'Code civilité',\n", " 'Libellé civilité',\n", " \"Nom d'exercice\",\n", " \"Prénom d'exercice\",\n", " 'Code profession',\n", " 'Libellé profession',\n", " 'Code catégorie professionnelle',\n", " 'Libellé catégorie professionnelle',\n", " 'Code type savoir-faire',\n", " 'Libellé type savoir-faire',\n", " 'Code savoir-faire',\n", " 'Libellé savoir-faire',\n", " 'Code mode exercice',\n", " 'Libellé mode exercice',\n", " 'Numéro SIRET site',\n", " 'Numéro SIREN site',\n", " 'Numéro FINESS site',\n", " 'Numéro FINESS établissement juridique',\n", " 'Identifiant technique de la structure',\n", " 'Raison sociale site',\n", " 'Enseigne commerciale site',\n", " 'Complément destinataire (coord. structure)',\n", " 'Complément point géographique (coord. structure)',\n", " 'Numéro Voie (coord. structure)',\n", " 'Indice répétition voie (coord. structure)',\n", " 'Code type de voie (coord. structure)',\n", " 'Libellé type de voie (coord. structure)',\n", " 'Libellé Voie (coord. structure)',\n", " 'Mention distribution (coord. structure)',\n", " 'Bureau cedex (coord. structure)',\n", " 'Code postal (coord. structure)',\n", " 'Code commune (coord. structure)',\n", " 'Libellé commune (coord. structure)',\n", " 'Code pays (coord. structure)',\n", " 'Libellé pays (coord. structure)',\n", " 'Téléphone (coord. structure)',\n", " 'Téléphone 2 (coord. structure)',\n", " 'Télécopie (coord. structure)',\n", " 'Adresse e-mail (coord. structure)',\n", " 'Code Département (structure)',\n", " 'Libellé Département (structure)',\n", " 'Ancien identifiant de la structure',\n", " \"Autorité d'enregistrement\",\n", " \"Code secteur d'activité\",\n", " \"Libellé secteur d'activité\",\n", " 'Code section tableau pharmaciens',\n", " 'Libellé section tableau pharmaciens',\n", " 'Code rôle',\n", " 'Libellé rôle',\n", " 'Code genre activité',\n", " 'Libellé genre activité',\n", " '']" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 57 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:45:54.952210Z", "start_time": "2025-08-19T22:45:53.873718Z" } }, "cell_type": "code", "source": [ "%%time\n", "df1 = (\n", " df.with_columns(\n", " (pd.col(\"Nom d'exercice\") + \" \" + pd.col(\"Prénom d'exercice\")).alias(\"Nom_Prénom\")\n", " )\n", " .with_columns(\n", " pd.col(\"Nom_Prénom\").n_unique().over(\"Identifiant PP\").alias(\"Count\")\n", " )\n", " .filter(pd.col(\"Count\") > 1)\n", " .sort([\"Identifiant PP\", \"index\"])\n", " .select(\"index\", \"Identifiant PP\", \"Count\", \"Nom d'exercice\", \"Prénom d'exercice\")\n", ")\n", "df1\n" ], "id": "7d9b7562c09955", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 4.83 s\n", "Wall time: 1.07 s\n" ] }, { "data": { "text/plain": [ "shape: (9_108, 5)\n", "┌─────────┬────────────────┬───────┬────────────────┬───────────────────┐\n", "│ index ┆ Identifiant PP ┆ Count ┆ Nom d'exercice ┆ Prénom d'exercice │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ u32 ┆ str ┆ u32 ┆ str ┆ str │\n", "╞═════════╪════════════════╪═══════╪════════════════╪═══════════════════╡\n", "│ 1352933 ┆ 10000034180 ┆ 2 ┆ DUWAT-GEORGES ┆ GHISLAINE │\n", "│ 1352934 ┆ 10000034180 ┆ 2 ┆ GEORGES ┆ GHISLAINE │\n", "│ 261 ┆ 10000040062 ┆ 2 ┆ MEYER ┆ Nicolas │\n", "│ 262 ┆ 10000040062 ┆ 2 ┆ MEYER ┆ Nicolas │\n", "│ 811196 ┆ 10000040062 ┆ 2 ┆ MEYER ┆ NICOLAS │\n", "│ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 2162425 ┆ 10111110721 ┆ 2 ┆ BARREAU ┆ Nadège │\n", "│ 268237 ┆ 10111112636 ┆ 2 ┆ GIRAUDET ┆ MEGGIE │\n", "│ 1892318 ┆ 10111112636 ┆ 2 ┆ GIRAUDET ┆ Meggie │\n", "│ 269544 ┆ 10111320304 ┆ 2 ┆ Sengel ┆ Coralie │\n", "│ 1352396 ┆ 10111320304 ┆ 2 ┆ SENGEL ┆ Coralie │\n", "└─────────┴────────────────┴───────┴────────────────┴───────────────────┘" ], "text/html": [ "
\n", "shape: (9_108, 5)
indexIdentifiant PPCountNom d'exercicePrénom d'exercice
u32stru32strstr
1352933"10000034180"2"DUWAT-GEORGES""GHISLAINE"
1352934"10000034180"2"GEORGES""GHISLAINE"
261"10000040062"2"MEYER""Nicolas"
262"10000040062"2"MEYER""Nicolas"
811196"10000040062"2"MEYER""NICOLAS"
2162425"10111110721"2"BARREAU""Nadège"
268237"10111112636"2"GIRAUDET""MEGGIE"
1892318"10111112636"2"GIRAUDET""Meggie"
269544"10111320304"2"Sengel""Coralie"
1352396"10111320304"2"SENGEL""Coralie"
" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 58 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:45:55.090712Z", "start_time": "2025-08-19T22:45:55.072647Z" } }, "cell_type": "code", "source": [ "df1.write_csv(f\"{folder}{output_file}-Names_Variations_Strict{output_extension}\",\n", " separator='|',\n", " quote_style=\"never\",\n", " line_terminator='\\n')\n" ], "id": "c418a6ea7abd77b", "outputs": [], "execution_count": 59 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:45:56.962873Z", "start_time": "2025-08-19T22:45:55.259223Z" } }, "cell_type": "code", "source": [ "%%time\n", "df2 = (\n", " df.with_columns(\n", " (pd.col(\"Nom d'exercice\").str.to_lowercase() + \" \" + pd.col(\"Prénom d'exercice\").str.to_lowercase()).alias(\"Nom_Prénom\")\n", " )\n", " .with_columns(\n", " pd.col(\"Nom_Prénom\").n_unique().over(\"Identifiant PP\").alias(\"Count\")\n", " )\n", " .filter(pd.col(\"Count\") > 1)\n", " .sort([\"Identifiant PP\", \"index\"])\n", " .select(\"index\", \"Identifiant PP\", \"Count\", \"Nom d'exercice\", \"Prénom d'exercice\")\n", ")\n", "df2\n" ], "id": "9d94b716364356c7", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 5.06 s\n", "Wall time: 1.68 s\n" ] }, { "data": { "text/plain": [ "shape: (5_426, 5)\n", "┌─────────┬────────────────┬───────┬────────────────┬───────────────────┐\n", "│ index ┆ Identifiant PP ┆ Count ┆ Nom d'exercice ┆ Prénom d'exercice │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ u32 ┆ str ┆ u32 ┆ str ┆ str │\n", "╞═════════╪════════════════╪═══════╪════════════════╪═══════════════════╡\n", "│ 1352933 ┆ 10000034180 ┆ 2 ┆ DUWAT-GEORGES ┆ GHISLAINE │\n", "│ 1352934 ┆ 10000034180 ┆ 2 ┆ GEORGES ┆ GHISLAINE │\n", "│ 1353009 ┆ 10000046051 ┆ 2 ┆ STUDER ┆ AGNES │\n", "│ 1623173 ┆ 10000046051 ┆ 2 ┆ JURION ┆ AGNES │\n", "│ 270462 ┆ 10000101518 ┆ 2 ┆ BARREYRE ┆ SANDRINE │\n", "│ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 1619731 ┆ 10110987236 ┆ 2 ┆ ROGIER ┆ MATHILDE │\n", "│ 808810 ┆ 10111077417 ┆ 2 ┆ DOUVIER ┆ FRANCETTE │\n", "│ 2161999 ┆ 10111077417 ┆ 2 ┆ D'ELLOY ┆ FRANCETTE │\n", "│ 538415 ┆ 10111110721 ┆ 2 ┆ ROCHEPEAU ┆ Nadège │\n", "│ 2162425 ┆ 10111110721 ┆ 2 ┆ BARREAU ┆ Nadège │\n", "└─────────┴────────────────┴───────┴────────────────┴───────────────────┘" ], "text/html": [ "
\n", "shape: (5_426, 5)
indexIdentifiant PPCountNom d'exercicePrénom d'exercice
u32stru32strstr
1352933"10000034180"2"DUWAT-GEORGES""GHISLAINE"
1352934"10000034180"2"GEORGES""GHISLAINE"
1353009"10000046051"2"STUDER""AGNES"
1623173"10000046051"2"JURION""AGNES"
270462"10000101518"2"BARREYRE""SANDRINE"
1619731"10110987236"2"ROGIER""MATHILDE"
808810"10111077417"2"DOUVIER""FRANCETTE"
2161999"10111077417"2"D'ELLOY""FRANCETTE"
538415"10111110721"2"ROCHEPEAU""Nadège"
2162425"10111110721"2"BARREAU""Nadège"
" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 60 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:45:57.433036Z", "start_time": "2025-08-19T22:45:57.417970Z" } }, "cell_type": "code", "source": [ "df2.write_csv(f\"{folder}{output_file}-Names_Variations_Insensitive{output_extension}\",\n", " separator='|',\n", " quote_style=\"never\",\n", " line_terminator='\\n')\n" ], "id": "18aab4499103491a", "outputs": [], "execution_count": 61 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:02.915526Z", "start_time": "2025-08-19T22:45:57.710258Z" } }, "cell_type": "code", "source": [ "%%time\n", "df3 = (\n", " df\n", " .with_columns(\n", " (\n", " (pd.col(\"Nom d'exercice\") + \" \" + pd.col(\"Prénom d'exercice\"))\n", " .str.to_lowercase()\n", " # Normalisation des accents\n", " .str.replace_all(\"à|á|â|ã|ä|å\", \"a\", literal=False)\n", " .str.replace_all(\"ç\", \"c\", literal=False)\n", " .str.replace_all(\"è|é|ê|ë\", \"e\", literal=False)\n", " .str.replace_all(\"ì|í|î|ï\", \"i\", literal=False)\n", " .str.replace_all(\"ñ\", \"n\", literal=False)\n", " .str.replace_all(\"ò|ó|ô|õ|ö\", \"o\", literal=False)\n", " .str.replace_all(\"ù|ú|û|ü\", \"u\", literal=False)\n", " .str.replace_all(\"ý|ÿ\", \"y\", literal=False)\n", " # Remplacement des caractères non-alphanumériques et nettoyage des espaces\n", " .str.replace_all(r\"[^a-z0-9\\\\s]\", \" \", literal=False)\n", " .str.replace_all(r\"\\\\s+\", \" \", literal=False)\n", " .str.strip_chars()\n", " ).alias(\"Nom_Prénom_Nettoyé\")\n", " )\n", " .with_columns(\n", " pd.col(\"Nom_Prénom_Nettoyé\").n_unique().over(\"Identifiant PP\").alias(\"Count\")\n", " )\n", " .filter(pd.col(\"Count\") > 1)\n", " .sort([\"Identifiant PP\", \"index\"])\n", " .select(\"index\", \"Identifiant PP\", \"Count\", \"Nom d'exercice\", \"Prénom d'exercice\", \"Nom_Prénom_Nettoyé\")\n", ")\n", "df3\n" ], "id": "8e4e3e22f16fea1c", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 8.59 s\n", "Wall time: 5.19 s\n" ] }, { "data": { "text/plain": [ "shape: (3_584, 6)\n", "┌─────────┬────────────────┬───────┬────────────────┬───────────────────┬─────────────────────────┐\n", "│ index ┆ Identifiant PP ┆ Count ┆ Nom d'exercice ┆ Prénom d'exercice ┆ Nom_Prénom_Nettoyé │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ u32 ┆ str ┆ u32 ┆ str ┆ str ┆ str │\n", "╞═════════╪════════════════╪═══════╪════════════════╪═══════════════════╪═════════════════════════╡\n", "│ 1352933 ┆ 10000034180 ┆ 2 ┆ DUWAT-GEORGES ┆ GHISLAINE ┆ duwat georges ghislaine │\n", "│ 1352934 ┆ 10000034180 ┆ 2 ┆ GEORGES ┆ GHISLAINE ┆ georges ghislaine │\n", "│ 1353009 ┆ 10000046051 ┆ 2 ┆ STUDER ┆ AGNES ┆ studer agnes │\n", "│ 1623173 ┆ 10000046051 ┆ 2 ┆ JURION ┆ AGNES ┆ jurion agnes │\n", "│ 270462 ┆ 10000101518 ┆ 2 ┆ BARREYRE ┆ SANDRINE ┆ barreyre sandrine │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 1619731 ┆ 10110987236 ┆ 2 ┆ ROGIER ┆ MATHILDE ┆ rogier mathilde │\n", "│ 808810 ┆ 10111077417 ┆ 2 ┆ DOUVIER ┆ FRANCETTE ┆ douvier francette │\n", "│ 2161999 ┆ 10111077417 ┆ 2 ┆ D'ELLOY ┆ FRANCETTE ┆ d elloy francette │\n", "│ 538415 ┆ 10111110721 ┆ 2 ┆ ROCHEPEAU ┆ Nadège ┆ rochepeau nadege │\n", "│ 2162425 ┆ 10111110721 ┆ 2 ┆ BARREAU ┆ Nadège ┆ barreau nadege │\n", "└─────────┴────────────────┴───────┴────────────────┴───────────────────┴─────────────────────────┘" ], "text/html": [ "
\n", "shape: (3_584, 6)
indexIdentifiant PPCountNom d'exercicePrénom d'exerciceNom_Prénom_Nettoyé
u32stru32strstrstr
1352933"10000034180"2"DUWAT-GEORGES""GHISLAINE""duwat georges ghislaine"
1352934"10000034180"2"GEORGES""GHISLAINE""georges ghislaine"
1353009"10000046051"2"STUDER""AGNES""studer agnes"
1623173"10000046051"2"JURION""AGNES""jurion agnes"
270462"10000101518"2"BARREYRE""SANDRINE""barreyre sandrine"
1619731"10110987236"2"ROGIER""MATHILDE""rogier mathilde"
808810"10111077417"2"DOUVIER""FRANCETTE""douvier francette"
2161999"10111077417"2"D'ELLOY""FRANCETTE""d elloy francette"
538415"10111110721"2"ROCHEPEAU""Nadège""rochepeau nadege"
2162425"10111110721"2"BARREAU""Nadège""barreau nadege"
" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 62 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:03.290835Z", "start_time": "2025-08-19T22:46:03.280259Z" } }, "cell_type": "code", "source": [ "df3.write_csv(f\"{folder}{output_file}-Names_Variations_Normalized{output_extension}\",\n", " separator='|',\n", " quote_style=\"never\",\n", " line_terminator='\\n')\n" ], "id": "aab2ae2e91a7190c", "outputs": [], "execution_count": 63 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:07.814563Z", "start_time": "2025-08-19T22:46:03.493442Z" } }, "cell_type": "code", "source": [ "%%time\n", "all_columns = df.columns\n", "start_col = 'Raison sociale site'\n", "end_col = \"Libellé secteur d'activité\"\n", "start_col_index = all_columns.index(start_col)\n", "end_col_index = all_columns.index(end_col)\n", "site_info_cols = all_columns[start_col_index : end_col_index + 1]\n", "if \"Autorité d'enregistrement\" in site_info_cols:\n", " site_info_cols.remove(\"Autorité d'enregistrement\")\n", "\n", "df4 = (\n", " df\n", " .filter(pd.col('Numéro FINESS site').is_null())\n", " .filter(\n", " pd.col('Numéro SIRET site').is_not_null() | pd.col('Identifiant technique de la structure').is_not_null()\n", " )\n", " .with_columns(\n", " pd.coalesce(\n", " pd.col('Numéro SIRET site'),\n", " pd.col('Identifiant technique de la structure')\n", " ).alias('Site_Identifier')\n", " )\n", " .with_columns(\n", " pd.struct(site_info_cols).n_unique().over(['Identifiant PP', 'Site_Identifier']).alias('Site_Info_Variations_Count')\n", " )\n", " .filter(pd.col('Site_Info_Variations_Count') > 1)\n", " .sort(['Identifiant PP', 'Site_Identifier', 'index'])\n", " .select(['index', \n", " 'Identifiant PP', \n", " 'Site_Identifier', \n", " 'Numéro SIRET site', \n", " 'Identifiant technique de la structure', \n", " 'Site_Info_Variations_Count'] + site_info_cols)\n", ")\n", "df4" ], "id": "3c2f2bb5fc3c2a5e", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 18.1 s\n", "Wall time: 4.29 s\n" ] }, { "data": { "text/plain": [ "shape: (98, 31)\n", "┌─────────┬────────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n", "│ index ┆ Identifian ┆ Site_Iden ┆ Numéro ┆ … ┆ Libellé ┆ Ancien ┆ Code ┆ Libellé │\n", "│ --- ┆ t PP ┆ tifier ┆ SIRET ┆ ┆ Départeme ┆ identifia ┆ secteur ┆ secteur │\n", "│ u32 ┆ --- ┆ --- ┆ site ┆ ┆ nt (struc ┆ nt de la ┆ d'activit ┆ d'activit │\n", "│ ┆ str ┆ str ┆ --- ┆ ┆ ture… ┆ struc… ┆ é ┆ é │\n", "│ ┆ ┆ ┆ str ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ ┆ ┆ ┆ ┆ ┆ str ┆ str ┆ str ┆ str │\n", "╞═════════╪════════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n", "│ 270597 ┆ 1000011638 ┆ 309802205 ┆ 309802205 ┆ … ┆ null ┆ 330980220 ┆ SA28 ┆ Asso et │\n", "│ ┆ 3 ┆ 00505 ┆ 00505 ┆ ┆ ┆ 500505 ┆ ┆ orga huma │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ nitaire │\n", "│ 1353470 ┆ 1000011638 ┆ 309802205 ┆ 309802205 ┆ … ┆ null ┆ 330980220 ┆ SA28 ┆ Asso et │\n", "│ ┆ 3 ┆ 00505 ┆ 00505 ┆ ┆ ┆ 500505 ┆ ┆ orga huma │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ nitaire │\n", "│ 4214 ┆ 1000053630 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n", "│ ┆ 9 ┆ 02369 ┆ 02369 ┆ ┆ ┆ 402369 ┆ ┆ de │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n", "│ 1627221 ┆ 1000053630 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n", "│ ┆ 9 ┆ 02369 ┆ 02369 ┆ ┆ ┆ 402369 ┆ ┆ de │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n", "│ 816501 ┆ 1000066718 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n", "│ ┆ 7 ┆ 02369 ┆ 02369 ┆ ┆ ┆ 402369 ┆ ┆ de │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 532248 ┆ 1011059216 ┆ 880859350 ┆ 880859350 ┆ … ┆ null ┆ 388085935 ┆ SA32 ┆ Fab. │\n", "│ ┆ 8 ┆ 00014 ┆ 00014 ┆ ┆ ┆ 000014 ┆ ┆ Exploit. │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n", "│ 1344743 ┆ 1011059749 ┆ 130008006 ┆ 130008006 ┆ … ┆ null ┆ 313000800 ┆ SA24 ┆ Organisme │\n", "│ ┆ 8 ┆ 00038 ┆ 00038 ┆ ┆ ┆ 600038 ┆ ┆ de │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n", "│ 2156205 ┆ 1011059749 ┆ 130008006 ┆ 130008006 ┆ … ┆ null ┆ 313000800 ┆ SA24 ┆ Organisme │\n", "│ ┆ 8 ┆ 00038 ┆ 00038 ┆ ┆ ┆ 600038 ┆ ┆ de │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n", "│ 1618789 ┆ 1011091034 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n", "│ ┆ 5 ┆ 01098 ┆ 01098 ┆ ┆ ┆ 401098 ┆ ┆ de │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n", "│ 1889827 ┆ 1011091034 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n", "│ ┆ 5 ┆ 01098 ┆ 01098 ┆ ┆ ┆ 401098 ┆ ┆ de │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n", "└─────────┴────────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘" ], "text/html": [ "
\n", "shape: (98, 31)
indexIdentifiant PPSite_IdentifierNuméro SIRET siteIdentifiant technique de la structureSite_Info_Variations_CountRaison sociale siteEnseigne commerciale siteComplément destinataire (coord. structure)Complément point géographique (coord. structure)Numéro Voie (coord. structure)Indice répétition voie (coord. structure)Code type de voie (coord. structure)Libellé type de voie (coord. structure)Libellé Voie (coord. structure)Mention distribution (coord. structure)Bureau cedex (coord. structure)Code postal (coord. structure)Code commune (coord. structure)Libellé commune (coord. structure)Code pays (coord. structure)Libellé pays (coord. structure)Téléphone (coord. structure)Téléphone 2 (coord. structure)Télécopie (coord. structure)Adresse e-mail (coord. structure)Code Département (structure)Libellé Département (structure)Ancien identifiant de la structureCode secteur d'activitéLibellé secteur d'activité
u32strstrstrstru32strstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstr
270597"10000116383""30980220500505""30980220500505""R10100000198782"2"OEUVRES HOSPITALIERES FRANCAIS…"ORDRE DE MALTE FRANCE""ORDRE DE MALTE FRANCE"null"42"nullnullnull"RUE DES VOLONTAIRES"null"75015 PARIS""75015""75056""Paris""99000""France"nullnullnullnullnullnull"330980220500505""SA28""Asso et orga humanitaire"
1353470"10000116383""30980220500505""30980220500505""R10100000779807"2"OEUVRE HOSP FRANC DE L'ORDRE D…nullnullnull"49"null"R""Rue""DE LA CHAPELLE"null"75018 PARIS 18E  ARRONDISSEMEN…"75018""75118""Paris 18e  Arrondissement"nullnullnullnullnullnullnullnull"330980220500505""SA28""Asso et orga humanitaire"
4214"10000536309""18003502402369""18003502402369""R10100000050224"2"DRSM PAYS DE LA LOIREELSM 44""SITE NANTES"nullnull"9"null"R""Rue""GAETAN RONDEAU""BP""44203 NANTES""44203""44109""Nantes"nullnullnullnullnullnullnullnull"318003502402369""SA24""Organisme de Sécurité Sociale"
1627221"10000536309""18003502402369""18003502402369""R10100000049799"2"DRSM PAYS DE LOIREELSM 49""SITE CHOLET"nullnull"2"null"R""Rue""SAINT ELOI""BP""49321 CHOLET""49321""49099""Cholet"nullnullnullnullnullnullnullnull"318003502402369""SA24""Organisme de Sécurité Sociale"
816501"10000667187""18003502402369""18003502402369""R10100000049794"3"DRSM NORD PICARDIEELSM 59""SITE MAUBEUGE"nullnullnullnull"PL""Place""DE WATTIGNIES""BP""59603 MAUBEUGE""59603""59392""Maubeuge"nullnullnullnullnullnullnullnull"318003502402369""SA24""Organisme de Sécurité Sociale"
532248"10110592168""88085935000014""88085935000014""R10100000325887"2"FAREVA PAU"nullnullnullnullnull"AV""Avenue""DU BEARN"null"64320 IDRON""64320""64269""Idron"nullnull"0559402100"null"0559402119"nullnullnull"388085935000014""SA32""Fab. Exploit. Import. Méd. DM"
1344743"10110597498""13000800600038""13000800600038""R10100000097229"2"AGENCE REGIONALE SANTE PAYS LO…nullnullnull"17"null"BD""Boulevard""GASTON DOUMERGUE"null"44262 NANTES""44262""44109""Nantes"nullnullnullnullnullnullnullnull"313000800600038""SA24""Organisme de Sécurité Sociale"
2156205"10110597498""13000800600038""13000800600038""R10100000097229"2"AGENCE REGIONALE SANTE PAYS LO…null"DELEGATION TERRITORIALE"null"2"null"BD""Boulevard""MURAT"null"53000 LAVAL""53000""53130""Laval"nullnullnullnullnullnullnullnull"313000800600038""SA24""Organisme de Sécurité Sociale"
1618789"10110910345""18003502401098""18003502401098""R10100000398898"2"CAISSE NATIONALE DE L'ASSURANC…"DRSM DIRECTION REG. DU SERVICE…"QUARTIER DU LAC"null"80"nullnullnull"AVENUE DE LA JALLERE""BP 260""33300 BORDEAUX""33300""33063""Bordeaux""99000""France"nullnullnullnullnullnull"318003502401098""SA24""Organisme de Sécurité Sociale"
1889827"10110910345""18003502401098""18003502401098""R10100000398898"2"CAISSE NATIONALE DE L'ASSURANC…"DRSM DIRECTION REG. DU SERVICE…nullnull"207"null"R""Rue""FONTAINEBLEAU""BP""40011 MONT-DE-MARSAN""40011""40192""Mont-de-Marsan"nullnullnullnullnullnullnullnull"318003502401098""SA24""Organisme de Sécurité Sociale"
" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 64 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:07.974271Z", "start_time": "2025-08-19T22:46:07.943280Z" } }, "cell_type": "code", "source": [ "df4.write_csv(f\"{folder}{output_file}-Sites_Variations{output_extension}\",\n", " separator='|',\n", " quote_style=\"never\",\n", " line_terminator='\\n')\n" ], "id": "c1fd01e419f4ccc9", "outputs": [], "execution_count": 65 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:12.781888Z", "start_time": "2025-08-19T22:46:08.306776Z" } }, "cell_type": "code", "source": [ "%%time\n", "df5 = (\n", " df\n", " .filter(pd.col('Numéro FINESS site').is_null())\n", " .filter(\n", " pd.col('Numéro SIRET site').is_not_null() | pd.col('Identifiant technique de la structure').is_not_null()\n", " )\n", " .with_columns(\n", " pd.coalesce(\n", " pd.col('Numéro SIRET site'),\n", " pd.col('Identifiant technique de la structure')\n", " ).alias('Site_Identifier')\n", " )\n", " .with_columns(\n", " pd.struct(site_info_cols).n_unique().over(['Site_Identifier']).alias('Site_Info_Variations_Count')\n", " )\n", " .filter(pd.col('Site_Info_Variations_Count') > 1)\n", " .sort(['Site_Identifier', 'index'])\n", " .select(['index', \n", " 'Identifiant PP', \n", " 'Site_Identifier', \n", " 'Numéro SIRET site', \n", " 'Identifiant technique de la structure', \n", " 'Site_Info_Variations_Count'] + site_info_cols)\n", ")\n", "df5\n" ], "id": "7838523925fc85ee", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 19.8 s\n", "Wall time: 4.43 s\n" ] }, { "data": { "text/plain": [ "shape: (4_190, 31)\n", "┌─────────┬────────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n", "│ index ┆ Identifian ┆ Site_Iden ┆ Numéro ┆ … ┆ Libellé ┆ Ancien ┆ Code ┆ Libellé │\n", "│ --- ┆ t PP ┆ tifier ┆ SIRET ┆ ┆ Départeme ┆ identifia ┆ secteur ┆ secteur │\n", "│ u32 ┆ --- ┆ --- ┆ site ┆ ┆ nt (struc ┆ nt de la ┆ d'activit ┆ d'activit │\n", "│ ┆ str ┆ str ┆ --- ┆ ┆ ture… ┆ struc… ┆ é ┆ é │\n", "│ ┆ ┆ ┆ str ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ ┆ ┆ ┆ ┆ ┆ str ┆ str ┆ str ┆ str │\n", "╞═════════╪════════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n", "│ 127508 ┆ 1010000229 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n", "│ ┆ 3 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n", "│ 285182 ┆ 1000180676 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n", "│ ┆ 8 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n", "│ 466100 ┆ 1010480041 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n", "│ ┆ 1 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n", "│ 722626 ┆ 1010368715 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n", "│ ┆ 7 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n", "│ 826390 ┆ 1000179659 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n", "│ ┆ 7 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 793647 ┆ 1010986940 ┆ 984526194 ┆ 984526194 ┆ … ┆ null ┆ 398452619 ┆ SA09 ┆ Exercice │\n", "│ ┆ 3 ┆ 00019 ┆ 00019 ┆ ┆ ┆ 400019 ┆ ┆ en │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Société │\n", "│ 1500531 ┆ 1010129326 ┆ 984526194 ┆ 984526194 ┆ … ┆ null ┆ 398452619 ┆ SA09 ┆ Exercice │\n", "│ ┆ 3 ┆ 00019 ┆ 00019 ┆ ┆ ┆ 400019 ┆ ┆ en │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Société │\n", "│ 1852947 ┆ 1010801513 ┆ 984526194 ┆ 984526194 ┆ … ┆ null ┆ 398452619 ┆ SA09 ┆ Exercice │\n", "│ ┆ 1 ┆ 00019 ┆ 00019 ┆ ┆ ┆ 400019 ┆ ┆ en │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Société │\n", "│ 1571945 ┆ 1010723524 ┆ 998823504 ┆ 998823504 ┆ … ┆ null ┆ 399882350 ┆ SA11 ┆ Entrepris │\n", "│ ┆ 3 ┆ 30834 ┆ 30834 ┆ ┆ ┆ 430834 ┆ ┆ e │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ d'intérim │\n", "│ 1590681 ┆ 1010869303 ┆ 998823504 ┆ 998823504 ┆ … ┆ null ┆ 399882350 ┆ SA11 ┆ Entrepris │\n", "│ ┆ 6 ┆ 30834 ┆ 30834 ┆ ┆ ┆ 430834 ┆ ┆ e │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ d'intérim │\n", "└─────────┴────────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘" ], "text/html": [ "
\n", "shape: (4_190, 31)
indexIdentifiant PPSite_IdentifierNuméro SIRET siteIdentifiant technique de la structureSite_Info_Variations_CountRaison sociale siteEnseigne commerciale siteComplément destinataire (coord. structure)Complément point géographique (coord. structure)Numéro Voie (coord. structure)Indice répétition voie (coord. structure)Code type de voie (coord. structure)Libellé type de voie (coord. structure)Libellé Voie (coord. structure)Mention distribution (coord. structure)Bureau cedex (coord. structure)Code postal (coord. structure)Code commune (coord. structure)Libellé commune (coord. structure)Code pays (coord. structure)Libellé pays (coord. structure)Téléphone (coord. structure)Téléphone 2 (coord. structure)Télécopie (coord. structure)Adresse e-mail (coord. structure)Code Département (structure)Libellé Département (structure)Ancien identifiant de la structureCode secteur d'activitéLibellé secteur d'activité
u32strstrstrstru32strstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstrstr
127508"10100002293""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
285182"10001806768""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
466100"10104800411""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
722626"10103687157""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
826390"10001796597""05650171100115""05650171100115""R10000001502146"2"BECTON DICKINSON FRANCE"nullnullnull"11"null"R""Rue""RUE ARISTIDE BERGES"null"38801 LE PONT DE CLAIX CEDEX""38801""38317""Le Pont-de-Claix""99000""France"nullnullnullnullnullnull"305650171100115""SA32""Fab. Exploit. Import. Méd. DM"
793647"10109869403""98452619400019""98452619400019""R10100000673943"2"SELARL CENTRE DE PODOLOGIE SPO…"SELARL CENTRE DE PODOLOGIE SPO…nullnull"8"null"R""Rue""GEORGES NEGREVERGNE"null"33700 MERIGNAC""33700""33281""Mérignac""99000""France"nullnullnullnullnullnull"398452619400019""SA09""Exercice en Société"
1500531"10101293263""98452619400019""98452619400019""R10100000673943"2"SELARL CENTRE DE PODOLOGIE SPO…"SELARL CENTRE DE PODOLOGIE SPO…nullnull"8"null"R""Rue""GEORGES NEGREVERGNE"null"33700 MERIGNAC""33700""33281""Mérignac""99000""France"nullnullnullnullnullnull"398452619400019""SA09""Exercice en Société"
1852947"10108015131""98452619400019""98452619400019""R10100000673943"2"SELARL CENTRE DE PODOLOGIE SPO…"SELARL CENTRE DE PODOLOGIE SPO…nullnull"8"null"R""Rue""GEORGES NEGREVERGNE"null"33700 MERIGNAC""33700""33281""Mérignac""99000""France"nullnullnullnullnullnull"398452619400019""SA09""Exercice en Société"
1571945"10107235243""99882350430834""99882350430834""R10100000554688"2"ADECCO FRANCE""ADECCO"null"PARC VALMY PARK AVENUE BAT A 1…"8""D"nullnull"RUE JEANNE BARRET"null"21000 DIJON""21000""21231""Dijon""99000""France"nullnullnullnullnullnull"399882350430834""SA11""Entreprise d'intérim"
1590681"10108693036""99882350430834""99882350430834""R10100000413248"2"ADECCO MEDICAL"nullnullnull"8""D""R""Rue""JEANNE BARRET"null"21000 DIJON""21000""21231""Dijon""99000""France"nullnullnullnullnullnull"399882350430834""SA11""Entreprise d'intérim"
" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 66 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:13.670911Z", "start_time": "2025-08-19T22:46:13.655386Z" } }, "cell_type": "code", "source": [ "df5.write_csv(f\"{folder}{output_file}-Sites_Variations_Global{output_extension}\",\n", " separator='|',\n", " quote_style=\"never\",\n", " line_terminator='\\n')\n" ], "id": "416184f32f973a71", "outputs": [], "execution_count": 67 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:17.023811Z", "start_time": "2025-08-19T22:46:14.032470Z" } }, "cell_type": "code", "source": [ "%%time\n", "df6 = (\n", " df\n", " .with_columns(\n", " pd.coalesce(\n", " pd.col('Numéro FINESS site'),\n", " pd.col('Numéro SIRET site'),\n", " pd.col('Identifiant technique de la structure')\n", " ).alias('Site_Identifier_Global')\n", " )\n", " .filter(pd.col('Site_Identifier_Global').is_not_null())\n", " .with_columns(\n", " pd.struct([\n", " \"Libellé profession\",\n", " \"Libellé savoir-faire\",\n", " \"Libellé mode exercice\",\n", " \"Libellé rôle\",\n", " \"Libellé genre activité\"\n", " ]).n_unique().over([\"Identifiant PP\", \"Site_Identifier_Global\"]).alias(\"Activites_Count\")\n", " )\n", " .filter(pd.col(\"Activites_Count\") > 1)\n", " .sort([\"Identifiant PP\", \"Site_Identifier_Global\", \"index\"])\n", " .select([\n", " \"index\",\n", " \"Identifiant PP\",\n", " \"Site_Identifier_Global\",\n", " \"Numéro FINESS site\",\n", " \"Numéro SIRET site\",\n", " \"Identifiant technique de la structure\",\n", " \"Activites_Count\",\n", " \"Libellé profession\",\n", " \"Libellé savoir-faire\",\n", " \"Libellé mode exercice\",\n", " \"Libellé rôle\",\n", " \"Libellé genre activité\"\n", " ])\n", ")\n", "df6\n" ], "id": "84549f83ce5e92f", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 11 s\n", "Wall time: 2.96 s\n" ] }, { "data": { "text/plain": [ "shape: (25_389, 12)\n", "┌─────────┬────────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n", "│ index ┆ Identifian ┆ Site_Iden ┆ Numéro ┆ … ┆ Libellé ┆ Libellé ┆ Libellé ┆ Libellé │\n", "│ --- ┆ t PP ┆ tifier_Gl ┆ FINESS ┆ ┆ savoir-fa ┆ mode ┆ rôle ┆ genre │\n", "│ u32 ┆ --- ┆ obal ┆ site ┆ ┆ ire ┆ exercice ┆ --- ┆ activité │\n", "│ ┆ str ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ str ┆ --- │\n", "│ ┆ ┆ str ┆ str ┆ ┆ str ┆ str ┆ ┆ str │\n", "╞═════════╪════════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n", "│ 1352840 ┆ 1000001797 ┆ 130786445 ┆ 130786445 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n", "│ ┆ 9 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n", "│ 1893982 ┆ 1000001797 ┆ 130786445 ┆ 130786445 ┆ … ┆ null ┆ Salarié ┆ Fonction ┆ Activité │\n", "│ ┆ 9 ┆ ┆ ┆ ┆ ┆ ┆ non ┆ standard │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ définie ┆ de soin │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n", "│ 270269 ┆ 1000007028 ┆ 397840901 ┆ null ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n", "│ ┆ 3 ┆ 00011 ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n", "│ 811380 ┆ 1000007028 ┆ 397840901 ┆ null ┆ … ┆ null ┆ Salarié ┆ Fonction ┆ Activité │\n", "│ ┆ 3 ┆ 00011 ┆ ┆ ┆ ┆ ┆ non ┆ non │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ définie ┆ soignante │\n", "│ 540586 ┆ 1000008684 ┆ 860012228 ┆ 860012228 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n", "│ ┆ 2 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 538572 ┆ 1011112354 ┆ 970400016 ┆ 970400016 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n", "│ ┆ 2 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n", "│ 538882 ┆ 1011125299 ┆ 250006954 ┆ 250006954 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n", "│ ┆ 4 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n", "│ 2162932 ┆ 1011125299 ┆ 250006954 ┆ 250006954 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n", "│ ┆ 4 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n", "│ 2163446 ┆ 1011129312 ┆ 490540218 ┆ 490540218 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n", "│ ┆ 1 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n", "│ 2163447 ┆ 1011129312 ┆ 490540218 ┆ 490540218 ┆ … ┆ null ┆ Salarié ┆ Cadre de ┆ Activité │\n", "│ ┆ 1 ┆ ┆ ┆ ┆ ┆ ┆ santé de ┆ non │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ proximité ┆ soignante │\n", "└─────────┴────────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘" ], "text/html": [ "
\n", "shape: (25_389, 12)
indexIdentifiant PPSite_Identifier_GlobalNuméro FINESS siteNuméro SIRET siteIdentifiant technique de la structureActivites_CountLibellé professionLibellé savoir-faireLibellé mode exerciceLibellé rôleLibellé genre activité
u32strstrstrstrstru32strstrstrstrstr
1352840"10000017979""130786445""130786445""30247736900011""F130786445"2"Ostéopathe"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
1893982"10000017979""130786445""130786445""30247736900011""F130786445"2"Sage-Femme"null"Salarié""Fonction non définie""Activité standard de soin ou d…
270269"10000070283""39784090100011"null"39784090100011""R10000002500225"2"Chirurgien-Dentiste"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
811380"10000070283""39784090100011"null"39784090100011""R10000002500225"2"Chirurgien-Dentiste"null"Salarié""Fonction non définie""Activité non soignante"
540586"10000086842""860012228""860012228""13001256000038""F860012228"2"Psychologue"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
538572"10111123542""970400016""970400016""26974214400034""F970400016"2"Psychologue"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
538882"10111252994""250006954""250006954""26250176000264""F250006954"2"Psychothérapeute"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
2162932"10111252994""250006954""250006954""26250176000264""F250006954"2"Psychologue"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
2163446"10111293121""490540218""490540218""77568873211159""F490540218"2"Infirmier"null"Salarié""Salarié en poste fixe""Activité standard de soin ou d…
2163447"10111293121""490540218""490540218""77568873211159""F490540218"2"Infirmier"null"Salarié""Cadre de santé de proximité""Activité non soignante"
" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 68 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:17.851427Z", "start_time": "2025-08-19T22:46:17.796168Z" } }, "cell_type": "code", "source": [ "df6.write_csv(f\"{folder}{output_file}-Multiple_Activities_Per_Site{output_extension}\",\n", " separator='|',\n", " quote_style=\"never\",\n", " line_terminator='\\n')\n" ], "id": "6f7025a7c08b54b4", "outputs": [], "execution_count": 69 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:19.535052Z", "start_time": "2025-08-19T22:46:18.015194Z" } }, "cell_type": "code", "source": [ "%%time\n", "df7 = (\n", " df\n", " .with_columns(\n", " pd.col(\"Libellé profession\").n_unique().over(\"Identifiant PP\").alias(\"Profession_Count\")\n", " )\n", " .filter(pd.col(\"Profession_Count\") > 1)\n", " .sort([\"Identifiant PP\", \"index\"])\n", " .select([\n", " \"index\",\n", " \"Identifiant PP\",\n", " \"Profession_Count\",\n", " \"Libellé profession\",\n", " \"Libellé savoir-faire\"\n", " ])\n", ")\n", "df7\n" ], "id": "b18d9ba71ba63d9d", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 5.33 s\n", "Wall time: 1.5 s\n" ] }, { "data": { "text/plain": [ "shape: (88_845, 5)\n", "┌─────────┬────────────────┬──────────────────┬────────────────────┬──────────────────────┐\n", "│ index ┆ Identifiant PP ┆ Profession_Count ┆ Libellé profession ┆ Libellé savoir-faire │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ u32 ┆ str ┆ u32 ┆ str ┆ str │\n", "╞═════════╪════════════════╪══════════════════╪════════════════════╪══════════════════════╡\n", "│ 74 ┆ 10000013150 ┆ 2 ┆ Médecin ┆ Psychiatrie │\n", "│ 269913 ┆ 10000013150 ┆ 2 ┆ Psychothérapeute ┆ null │\n", "│ 1352840 ┆ 10000017979 ┆ 2 ┆ Ostéopathe ┆ null │\n", "│ 1893982 ┆ 10000017979 ┆ 2 ┆ Sage-Femme ┆ null │\n", "│ 811125 ┆ 10000029966 ┆ 2 ┆ Sage-Femme ┆ null │\n", "│ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 1352396 ┆ 10111320304 ┆ 2 ┆ Psychothérapeute ┆ null │\n", "│ 269545 ┆ 10111320379 ┆ 2 ┆ Psychothérapeute ┆ null │\n", "│ 810601 ┆ 10111320379 ┆ 2 ┆ Psychologue ┆ null │\n", "│ 539711 ┆ 10111321468 ┆ 2 ┆ Psychothérapeute ┆ null │\n", "│ 1352414 ┆ 10111321468 ┆ 2 ┆ Psychologue ┆ null │\n", "└─────────┴────────────────┴──────────────────┴────────────────────┴──────────────────────┘" ], "text/html": [ "
\n", "shape: (88_845, 5)
indexIdentifiant PPProfession_CountLibellé professionLibellé savoir-faire
u32stru32strstr
74"10000013150"2"Médecin""Psychiatrie"
269913"10000013150"2"Psychothérapeute"null
1352840"10000017979"2"Ostéopathe"null
1893982"10000017979"2"Sage-Femme"null
811125"10000029966"2"Sage-Femme"null
1352396"10111320304"2"Psychothérapeute"null
269545"10111320379"2"Psychothérapeute"null
810601"10111320379"2"Psychologue"null
539711"10111321468"2"Psychothérapeute"null
1352414"10111321468"2"Psychologue"null
" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 70 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-19T22:46:19.893214Z", "start_time": "2025-08-19T22:46:19.851765Z" } }, "cell_type": "code", "source": [ "df7.write_csv(f\"{folder}{output_file}-Multiple_Professions{output_extension}\",\n", " separator='|',\n", " quote_style=\"never\",\n", " line_terminator='\\n')\n" ], "id": "85be468fd3f461d1", "outputs": [], "execution_count": 71 } ], "metadata": { "kernelspec": { "name": "python3", "language": "python", "display_name": "Python 3 (ipykernel)" } }, "nbformat": 4, "nbformat_minor": 5 }