Files
professionals_from_sante_fr/Professionals_Activities_Inconsistencies.ipynb
2026-03-05 11:11:10 +00:00

925 lines
80 KiB
Plaintext

{
"cells": [
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:45:53.794236Z",
"start_time": "2025-08-19T22:45:51.445477Z"
}
},
"cell_type": "code",
"source": [
"%%time\n",
"import polars as pd\n",
"import csv\n",
"folder = \"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\\"\n",
"input_file = \"Table_Réf_Professionnels_250815.txt\"\n",
"output_file = \"Table_Réf_Professionnels_inconsistencies\"\n",
"output_extension = \".csv\"\n",
"df = pd.read_csv(f\"{folder}{input_file}\",\n",
" separator='|',\n",
" quote_char=None,\n",
" null_values='',\n",
" infer_schema_length=0) # Read all columns as strings\n",
"df = df.with_row_index('index')\n",
"df.columns\n"
],
"id": "58db5082e27759f7",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 13 s\n",
"Wall time: 2.33 s\n"
]
},
{
"data": {
"text/plain": [
"['index',\n",
" \"Type d'identifiant PP\",\n",
" 'Identifiant PP',\n",
" 'Identification nationale PP',\n",
" \"Code civilité d'exercice\",\n",
" \"Libellé civilité d'exercice\",\n",
" 'Code civilité',\n",
" 'Libellé civilité',\n",
" \"Nom d'exercice\",\n",
" \"Prénom d'exercice\",\n",
" 'Code profession',\n",
" 'Libellé profession',\n",
" 'Code catégorie professionnelle',\n",
" 'Libellé catégorie professionnelle',\n",
" 'Code type savoir-faire',\n",
" 'Libellé type savoir-faire',\n",
" 'Code savoir-faire',\n",
" 'Libellé savoir-faire',\n",
" 'Code mode exercice',\n",
" 'Libellé mode exercice',\n",
" 'Numéro SIRET site',\n",
" 'Numéro SIREN site',\n",
" 'Numéro FINESS site',\n",
" 'Numéro FINESS établissement juridique',\n",
" 'Identifiant technique de la structure',\n",
" 'Raison sociale site',\n",
" 'Enseigne commerciale site',\n",
" 'Complément destinataire (coord. structure)',\n",
" 'Complément point géographique (coord. structure)',\n",
" 'Numéro Voie (coord. structure)',\n",
" 'Indice répétition voie (coord. structure)',\n",
" 'Code type de voie (coord. structure)',\n",
" 'Libellé type de voie (coord. structure)',\n",
" 'Libellé Voie (coord. structure)',\n",
" 'Mention distribution (coord. structure)',\n",
" 'Bureau cedex (coord. structure)',\n",
" 'Code postal (coord. structure)',\n",
" 'Code commune (coord. structure)',\n",
" 'Libellé commune (coord. structure)',\n",
" 'Code pays (coord. structure)',\n",
" 'Libellé pays (coord. structure)',\n",
" 'Téléphone (coord. structure)',\n",
" 'Téléphone 2 (coord. structure)',\n",
" 'Télécopie (coord. structure)',\n",
" 'Adresse e-mail (coord. structure)',\n",
" 'Code Département (structure)',\n",
" 'Libellé Département (structure)',\n",
" 'Ancien identifiant de la structure',\n",
" \"Autorité d'enregistrement\",\n",
" \"Code secteur d'activité\",\n",
" \"Libellé secteur d'activité\",\n",
" 'Code section tableau pharmaciens',\n",
" 'Libellé section tableau pharmaciens',\n",
" 'Code rôle',\n",
" 'Libellé rôle',\n",
" 'Code genre activité',\n",
" 'Libellé genre activité',\n",
" '']"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 57
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:45:54.952210Z",
"start_time": "2025-08-19T22:45:53.873718Z"
}
},
"cell_type": "code",
"source": [
"%%time\n",
"df1 = (\n",
" df.with_columns(\n",
" (pd.col(\"Nom d'exercice\") + \" \" + pd.col(\"Prénom d'exercice\")).alias(\"Nom_Prénom\")\n",
" )\n",
" .with_columns(\n",
" pd.col(\"Nom_Prénom\").n_unique().over(\"Identifiant PP\").alias(\"Count\")\n",
" )\n",
" .filter(pd.col(\"Count\") > 1)\n",
" .sort([\"Identifiant PP\", \"index\"])\n",
" .select(\"index\", \"Identifiant PP\", \"Count\", \"Nom d'exercice\", \"Prénom d'exercice\")\n",
")\n",
"df1\n"
],
"id": "7d9b7562c09955",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 4.83 s\n",
"Wall time: 1.07 s\n"
]
},
{
"data": {
"text/plain": [
"shape: (9_108, 5)\n",
"┌─────────┬────────────────┬───────┬────────────────┬───────────────────┐\n",
"│ index ┆ Identifiant PP ┆ Count ┆ Nom d'exercice ┆ Prénom d'exercice │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ u32 ┆ str ┆ u32 ┆ str ┆ str │\n",
"╞═════════╪════════════════╪═══════╪════════════════╪═══════════════════╡\n",
"│ 1352933 ┆ 10000034180 ┆ 2 ┆ DUWAT-GEORGES ┆ GHISLAINE │\n",
"│ 1352934 ┆ 10000034180 ┆ 2 ┆ GEORGES ┆ GHISLAINE │\n",
"│ 261 ┆ 10000040062 ┆ 2 ┆ MEYER ┆ Nicolas │\n",
"│ 262 ┆ 10000040062 ┆ 2 ┆ MEYER ┆ Nicolas │\n",
"│ 811196 ┆ 10000040062 ┆ 2 ┆ MEYER ┆ NICOLAS │\n",
"│ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ 2162425 ┆ 10111110721 ┆ 2 ┆ BARREAU ┆ Nadège │\n",
"│ 268237 ┆ 10111112636 ┆ 2 ┆ GIRAUDET ┆ MEGGIE │\n",
"│ 1892318 ┆ 10111112636 ┆ 2 ┆ GIRAUDET ┆ Meggie │\n",
"│ 269544 ┆ 10111320304 ┆ 2 ┆ Sengel ┆ Coralie │\n",
"│ 1352396 ┆ 10111320304 ┆ 2 ┆ SENGEL ┆ Coralie │\n",
"└─────────┴────────────────┴───────┴────────────────┴───────────────────┘"
],
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (9_108, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Count</th><th>Nom d&#x27;exercice</th><th>Prénom d&#x27;exercice</th></tr><tr><td>u32</td><td>str</td><td>u32</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>1352933</td><td>&quot;10000034180&quot;</td><td>2</td><td>&quot;DUWAT-GEORGES&quot;</td><td>&quot;GHISLAINE&quot;</td></tr><tr><td>1352934</td><td>&quot;10000034180&quot;</td><td>2</td><td>&quot;GEORGES&quot;</td><td>&quot;GHISLAINE&quot;</td></tr><tr><td>261</td><td>&quot;10000040062&quot;</td><td>2</td><td>&quot;MEYER&quot;</td><td>&quot;Nicolas&quot;</td></tr><tr><td>262</td><td>&quot;10000040062&quot;</td><td>2</td><td>&quot;MEYER&quot;</td><td>&quot;Nicolas&quot;</td></tr><tr><td>811196</td><td>&quot;10000040062&quot;</td><td>2</td><td>&quot;MEYER&quot;</td><td>&quot;NICOLAS&quot;</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>2162425</td><td>&quot;10111110721&quot;</td><td>2</td><td>&quot;BARREAU&quot;</td><td>&quot;Nadège&quot;</td></tr><tr><td>268237</td><td>&quot;10111112636&quot;</td><td>2</td><td>&quot;GIRAUDET&quot;</td><td>&quot;MEGGIE&quot;</td></tr><tr><td>1892318</td><td>&quot;10111112636&quot;</td><td>2</td><td>&quot;GIRAUDET&quot;</td><td>&quot;Meggie&quot;</td></tr><tr><td>269544</td><td>&quot;10111320304&quot;</td><td>2</td><td>&quot;Sengel&quot;</td><td>&quot;Coralie&quot;</td></tr><tr><td>1352396</td><td>&quot;10111320304&quot;</td><td>2</td><td>&quot;SENGEL&quot;</td><td>&quot;Coralie&quot;</td></tr></tbody></table></div>"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 58
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:45:55.090712Z",
"start_time": "2025-08-19T22:45:55.072647Z"
}
},
"cell_type": "code",
"source": [
"df1.write_csv(f\"{folder}{output_file}-Names_Variations_Strict{output_extension}\",\n",
" separator='|',\n",
" quote_style=\"never\",\n",
" line_terminator='\\n')\n"
],
"id": "c418a6ea7abd77b",
"outputs": [],
"execution_count": 59
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:45:56.962873Z",
"start_time": "2025-08-19T22:45:55.259223Z"
}
},
"cell_type": "code",
"source": [
"%%time\n",
"df2 = (\n",
" df.with_columns(\n",
" (pd.col(\"Nom d'exercice\").str.to_lowercase() + \" \" + pd.col(\"Prénom d'exercice\").str.to_lowercase()).alias(\"Nom_Prénom\")\n",
" )\n",
" .with_columns(\n",
" pd.col(\"Nom_Prénom\").n_unique().over(\"Identifiant PP\").alias(\"Count\")\n",
" )\n",
" .filter(pd.col(\"Count\") > 1)\n",
" .sort([\"Identifiant PP\", \"index\"])\n",
" .select(\"index\", \"Identifiant PP\", \"Count\", \"Nom d'exercice\", \"Prénom d'exercice\")\n",
")\n",
"df2\n"
],
"id": "9d94b716364356c7",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 5.06 s\n",
"Wall time: 1.68 s\n"
]
},
{
"data": {
"text/plain": [
"shape: (5_426, 5)\n",
"┌─────────┬────────────────┬───────┬────────────────┬───────────────────┐\n",
"│ index ┆ Identifiant PP ┆ Count ┆ Nom d'exercice ┆ Prénom d'exercice │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ u32 ┆ str ┆ u32 ┆ str ┆ str │\n",
"╞═════════╪════════════════╪═══════╪════════════════╪═══════════════════╡\n",
"│ 1352933 ┆ 10000034180 ┆ 2 ┆ DUWAT-GEORGES ┆ GHISLAINE │\n",
"│ 1352934 ┆ 10000034180 ┆ 2 ┆ GEORGES ┆ GHISLAINE │\n",
"│ 1353009 ┆ 10000046051 ┆ 2 ┆ STUDER ┆ AGNES │\n",
"│ 1623173 ┆ 10000046051 ┆ 2 ┆ JURION ┆ AGNES │\n",
"│ 270462 ┆ 10000101518 ┆ 2 ┆ BARREYRE ┆ SANDRINE │\n",
"│ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ 1619731 ┆ 10110987236 ┆ 2 ┆ ROGIER ┆ MATHILDE │\n",
"│ 808810 ┆ 10111077417 ┆ 2 ┆ DOUVIER ┆ FRANCETTE │\n",
"│ 2161999 ┆ 10111077417 ┆ 2 ┆ D'ELLOY ┆ FRANCETTE │\n",
"│ 538415 ┆ 10111110721 ┆ 2 ┆ ROCHEPEAU ┆ Nadège │\n",
"│ 2162425 ┆ 10111110721 ┆ 2 ┆ BARREAU ┆ Nadège │\n",
"└─────────┴────────────────┴───────┴────────────────┴───────────────────┘"
],
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (5_426, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Count</th><th>Nom d&#x27;exercice</th><th>Prénom d&#x27;exercice</th></tr><tr><td>u32</td><td>str</td><td>u32</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>1352933</td><td>&quot;10000034180&quot;</td><td>2</td><td>&quot;DUWAT-GEORGES&quot;</td><td>&quot;GHISLAINE&quot;</td></tr><tr><td>1352934</td><td>&quot;10000034180&quot;</td><td>2</td><td>&quot;GEORGES&quot;</td><td>&quot;GHISLAINE&quot;</td></tr><tr><td>1353009</td><td>&quot;10000046051&quot;</td><td>2</td><td>&quot;STUDER&quot;</td><td>&quot;AGNES&quot;</td></tr><tr><td>1623173</td><td>&quot;10000046051&quot;</td><td>2</td><td>&quot;JURION&quot;</td><td>&quot;AGNES&quot;</td></tr><tr><td>270462</td><td>&quot;10000101518&quot;</td><td>2</td><td>&quot;BARREYRE&quot;</td><td>&quot;SANDRINE&quot;</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>1619731</td><td>&quot;10110987236&quot;</td><td>2</td><td>&quot;ROGIER&quot;</td><td>&quot;MATHILDE&quot;</td></tr><tr><td>808810</td><td>&quot;10111077417&quot;</td><td>2</td><td>&quot;DOUVIER&quot;</td><td>&quot;FRANCETTE&quot;</td></tr><tr><td>2161999</td><td>&quot;10111077417&quot;</td><td>2</td><td>&quot;D&#x27;ELLOY&quot;</td><td>&quot;FRANCETTE&quot;</td></tr><tr><td>538415</td><td>&quot;10111110721&quot;</td><td>2</td><td>&quot;ROCHEPEAU&quot;</td><td>&quot;Nadège&quot;</td></tr><tr><td>2162425</td><td>&quot;10111110721&quot;</td><td>2</td><td>&quot;BARREAU&quot;</td><td>&quot;Nadège&quot;</td></tr></tbody></table></div>"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 60
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:45:57.433036Z",
"start_time": "2025-08-19T22:45:57.417970Z"
}
},
"cell_type": "code",
"source": [
"df2.write_csv(f\"{folder}{output_file}-Names_Variations_Insensitive{output_extension}\",\n",
" separator='|',\n",
" quote_style=\"never\",\n",
" line_terminator='\\n')\n"
],
"id": "18aab4499103491a",
"outputs": [],
"execution_count": 61
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:46:02.915526Z",
"start_time": "2025-08-19T22:45:57.710258Z"
}
},
"cell_type": "code",
"source": [
"%%time\n",
"df3 = (\n",
" df\n",
" .with_columns(\n",
" (\n",
" (pd.col(\"Nom d'exercice\") + \" \" + pd.col(\"Prénom d'exercice\"))\n",
" .str.to_lowercase()\n",
" # Normalisation des accents\n",
" .str.replace_all(\"à|á|â|ã|ä|å\", \"a\", literal=False)\n",
" .str.replace_all(\"ç\", \"c\", literal=False)\n",
" .str.replace_all(\"è|é|ê|ë\", \"e\", literal=False)\n",
" .str.replace_all(\"ì|í|î|ï\", \"i\", literal=False)\n",
" .str.replace_all(\"ñ\", \"n\", literal=False)\n",
" .str.replace_all(\"ò|ó|ô|õ|ö\", \"o\", literal=False)\n",
" .str.replace_all(\"ù|ú|û|ü\", \"u\", literal=False)\n",
" .str.replace_all(\"ý|ÿ\", \"y\", literal=False)\n",
" # Remplacement des caractères non-alphanumériques et nettoyage des espaces\n",
" .str.replace_all(r\"[^a-z0-9\\\\s]\", \" \", literal=False)\n",
" .str.replace_all(r\"\\\\s+\", \" \", literal=False)\n",
" .str.strip_chars()\n",
" ).alias(\"Nom_Prénom_Nettoyé\")\n",
" )\n",
" .with_columns(\n",
" pd.col(\"Nom_Prénom_Nettoyé\").n_unique().over(\"Identifiant PP\").alias(\"Count\")\n",
" )\n",
" .filter(pd.col(\"Count\") > 1)\n",
" .sort([\"Identifiant PP\", \"index\"])\n",
" .select(\"index\", \"Identifiant PP\", \"Count\", \"Nom d'exercice\", \"Prénom d'exercice\", \"Nom_Prénom_Nettoyé\")\n",
")\n",
"df3\n"
],
"id": "8e4e3e22f16fea1c",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 8.59 s\n",
"Wall time: 5.19 s\n"
]
},
{
"data": {
"text/plain": [
"shape: (3_584, 6)\n",
"┌─────────┬────────────────┬───────┬────────────────┬───────────────────┬─────────────────────────┐\n",
"│ index ┆ Identifiant PP ┆ Count ┆ Nom d'exercice ┆ Prénom d'exercice ┆ Nom_Prénom_Nettoyé │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ u32 ┆ str ┆ u32 ┆ str ┆ str ┆ str │\n",
"╞═════════╪════════════════╪═══════╪════════════════╪═══════════════════╪═════════════════════════╡\n",
"│ 1352933 ┆ 10000034180 ┆ 2 ┆ DUWAT-GEORGES ┆ GHISLAINE ┆ duwat georges ghislaine │\n",
"│ 1352934 ┆ 10000034180 ┆ 2 ┆ GEORGES ┆ GHISLAINE ┆ georges ghislaine │\n",
"│ 1353009 ┆ 10000046051 ┆ 2 ┆ STUDER ┆ AGNES ┆ studer agnes │\n",
"│ 1623173 ┆ 10000046051 ┆ 2 ┆ JURION ┆ AGNES ┆ jurion agnes │\n",
"│ 270462 ┆ 10000101518 ┆ 2 ┆ BARREYRE ┆ SANDRINE ┆ barreyre sandrine │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ 1619731 ┆ 10110987236 ┆ 2 ┆ ROGIER ┆ MATHILDE ┆ rogier mathilde │\n",
"│ 808810 ┆ 10111077417 ┆ 2 ┆ DOUVIER ┆ FRANCETTE ┆ douvier francette │\n",
"│ 2161999 ┆ 10111077417 ┆ 2 ┆ D'ELLOY ┆ FRANCETTE ┆ d elloy francette │\n",
"│ 538415 ┆ 10111110721 ┆ 2 ┆ ROCHEPEAU ┆ Nadège ┆ rochepeau nadege │\n",
"│ 2162425 ┆ 10111110721 ┆ 2 ┆ BARREAU ┆ Nadège ┆ barreau nadege │\n",
"└─────────┴────────────────┴───────┴────────────────┴───────────────────┴─────────────────────────┘"
],
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (3_584, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Count</th><th>Nom d&#x27;exercice</th><th>Prénom d&#x27;exercice</th><th>Nom_Prénom_Nettoyé</th></tr><tr><td>u32</td><td>str</td><td>u32</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>1352933</td><td>&quot;10000034180&quot;</td><td>2</td><td>&quot;DUWAT-GEORGES&quot;</td><td>&quot;GHISLAINE&quot;</td><td>&quot;duwat georges ghislaine&quot;</td></tr><tr><td>1352934</td><td>&quot;10000034180&quot;</td><td>2</td><td>&quot;GEORGES&quot;</td><td>&quot;GHISLAINE&quot;</td><td>&quot;georges ghislaine&quot;</td></tr><tr><td>1353009</td><td>&quot;10000046051&quot;</td><td>2</td><td>&quot;STUDER&quot;</td><td>&quot;AGNES&quot;</td><td>&quot;studer agnes&quot;</td></tr><tr><td>1623173</td><td>&quot;10000046051&quot;</td><td>2</td><td>&quot;JURION&quot;</td><td>&quot;AGNES&quot;</td><td>&quot;jurion agnes&quot;</td></tr><tr><td>270462</td><td>&quot;10000101518&quot;</td><td>2</td><td>&quot;BARREYRE&quot;</td><td>&quot;SANDRINE&quot;</td><td>&quot;barreyre sandrine&quot;</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>1619731</td><td>&quot;10110987236&quot;</td><td>2</td><td>&quot;ROGIER&quot;</td><td>&quot;MATHILDE&quot;</td><td>&quot;rogier mathilde&quot;</td></tr><tr><td>808810</td><td>&quot;10111077417&quot;</td><td>2</td><td>&quot;DOUVIER&quot;</td><td>&quot;FRANCETTE&quot;</td><td>&quot;douvier francette&quot;</td></tr><tr><td>2161999</td><td>&quot;10111077417&quot;</td><td>2</td><td>&quot;D&#x27;ELLOY&quot;</td><td>&quot;FRANCETTE&quot;</td><td>&quot;d elloy francette&quot;</td></tr><tr><td>538415</td><td>&quot;10111110721&quot;</td><td>2</td><td>&quot;ROCHEPEAU&quot;</td><td>&quot;Nadège&quot;</td><td>&quot;rochepeau nadege&quot;</td></tr><tr><td>2162425</td><td>&quot;10111110721&quot;</td><td>2</td><td>&quot;BARREAU&quot;</td><td>&quot;Nadège&quot;</td><td>&quot;barreau nadege&quot;</td></tr></tbody></table></div>"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 62
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:46:03.290835Z",
"start_time": "2025-08-19T22:46:03.280259Z"
}
},
"cell_type": "code",
"source": [
"df3.write_csv(f\"{folder}{output_file}-Names_Variations_Normalized{output_extension}\",\n",
" separator='|',\n",
" quote_style=\"never\",\n",
" line_terminator='\\n')\n"
],
"id": "aab2ae2e91a7190c",
"outputs": [],
"execution_count": 63
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:46:07.814563Z",
"start_time": "2025-08-19T22:46:03.493442Z"
}
},
"cell_type": "code",
"source": [
"%%time\n",
"all_columns = df.columns\n",
"start_col = 'Raison sociale site'\n",
"end_col = \"Libellé secteur d'activité\"\n",
"start_col_index = all_columns.index(start_col)\n",
"end_col_index = all_columns.index(end_col)\n",
"site_info_cols = all_columns[start_col_index : end_col_index + 1]\n",
"if \"Autorité d'enregistrement\" in site_info_cols:\n",
" site_info_cols.remove(\"Autorité d'enregistrement\")\n",
"\n",
"df4 = (\n",
" df\n",
" .filter(pd.col('Numéro FINESS site').is_null())\n",
" .filter(\n",
" pd.col('Numéro SIRET site').is_not_null() | pd.col('Identifiant technique de la structure').is_not_null()\n",
" )\n",
" .with_columns(\n",
" pd.coalesce(\n",
" pd.col('Numéro SIRET site'),\n",
" pd.col('Identifiant technique de la structure')\n",
" ).alias('Site_Identifier')\n",
" )\n",
" .with_columns(\n",
" pd.struct(site_info_cols).n_unique().over(['Identifiant PP', 'Site_Identifier']).alias('Site_Info_Variations_Count')\n",
" )\n",
" .filter(pd.col('Site_Info_Variations_Count') > 1)\n",
" .sort(['Identifiant PP', 'Site_Identifier', 'index'])\n",
" .select(['index', \n",
" 'Identifiant PP', \n",
" 'Site_Identifier', \n",
" 'Numéro SIRET site', \n",
" 'Identifiant technique de la structure', \n",
" 'Site_Info_Variations_Count'] + site_info_cols)\n",
")\n",
"df4"
],
"id": "3c2f2bb5fc3c2a5e",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 18.1 s\n",
"Wall time: 4.29 s\n"
]
},
{
"data": {
"text/plain": [
"shape: (98, 31)\n",
"┌─────────┬────────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n",
"│ index ┆ Identifian ┆ Site_Iden ┆ Numéro ┆ … ┆ Libellé ┆ Ancien ┆ Code ┆ Libellé │\n",
"│ --- ┆ t PP ┆ tifier ┆ SIRET ┆ ┆ Départeme ┆ identifia ┆ secteur ┆ secteur │\n",
"│ u32 ┆ --- ┆ --- ┆ site ┆ ┆ nt (struc ┆ nt de la ┆ d'activit ┆ d'activit │\n",
"│ ┆ str ┆ str ┆ --- ┆ ┆ ture… ┆ struc… ┆ é ┆ é │\n",
"│ ┆ ┆ ┆ str ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ ┆ ┆ ┆ ┆ ┆ str ┆ str ┆ str ┆ str │\n",
"╞═════════╪════════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n",
"│ 270597 ┆ 1000011638 ┆ 309802205 ┆ 309802205 ┆ … ┆ null ┆ 330980220 ┆ SA28 ┆ Asso et │\n",
"│ ┆ 3 ┆ 00505 ┆ 00505 ┆ ┆ ┆ 500505 ┆ ┆ orga huma │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ nitaire │\n",
"│ 1353470 ┆ 1000011638 ┆ 309802205 ┆ 309802205 ┆ … ┆ null ┆ 330980220 ┆ SA28 ┆ Asso et │\n",
"│ ┆ 3 ┆ 00505 ┆ 00505 ┆ ┆ ┆ 500505 ┆ ┆ orga huma │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ nitaire │\n",
"│ 4214 ┆ 1000053630 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n",
"│ ┆ 9 ┆ 02369 ┆ 02369 ┆ ┆ ┆ 402369 ┆ ┆ de │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
"│ 1627221 ┆ 1000053630 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n",
"│ ┆ 9 ┆ 02369 ┆ 02369 ┆ ┆ ┆ 402369 ┆ ┆ de │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
"│ 816501 ┆ 1000066718 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n",
"│ ┆ 7 ┆ 02369 ┆ 02369 ┆ ┆ ┆ 402369 ┆ ┆ de │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ 532248 ┆ 1011059216 ┆ 880859350 ┆ 880859350 ┆ … ┆ null ┆ 388085935 ┆ SA32 ┆ Fab. │\n",
"│ ┆ 8 ┆ 00014 ┆ 00014 ┆ ┆ ┆ 000014 ┆ ┆ Exploit. │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n",
"│ 1344743 ┆ 1011059749 ┆ 130008006 ┆ 130008006 ┆ … ┆ null ┆ 313000800 ┆ SA24 ┆ Organisme │\n",
"│ ┆ 8 ┆ 00038 ┆ 00038 ┆ ┆ ┆ 600038 ┆ ┆ de │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
"│ 2156205 ┆ 1011059749 ┆ 130008006 ┆ 130008006 ┆ … ┆ null ┆ 313000800 ┆ SA24 ┆ Organisme │\n",
"│ ┆ 8 ┆ 00038 ┆ 00038 ┆ ┆ ┆ 600038 ┆ ┆ de │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
"│ 1618789 ┆ 1011091034 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n",
"│ ┆ 5 ┆ 01098 ┆ 01098 ┆ ┆ ┆ 401098 ┆ ┆ de │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
"│ 1889827 ┆ 1011091034 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n",
"│ ┆ 5 ┆ 01098 ┆ 01098 ┆ ┆ ┆ 401098 ┆ ┆ de │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
"└─────────┴────────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘"
],
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (98, 31)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Site_Identifier</th><th>Numéro SIRET site</th><th>Identifiant technique de la structure</th><th>Site_Info_Variations_Count</th><th>Raison sociale site</th><th>Enseigne commerciale site</th><th>Complément destinataire (coord. structure)</th><th>Complément point géographique (coord. structure)</th><th>Numéro Voie (coord. structure)</th><th>Indice répétition voie (coord. structure)</th><th>Code type de voie (coord. structure)</th><th>Libellé type de voie (coord. structure)</th><th>Libellé Voie (coord. structure)</th><th>Mention distribution (coord. structure)</th><th>Bureau cedex (coord. structure)</th><th>Code postal (coord. structure)</th><th>Code commune (coord. structure)</th><th>Libellé commune (coord. structure)</th><th>Code pays (coord. structure)</th><th>Libellé pays (coord. structure)</th><th>Téléphone (coord. structure)</th><th>Téléphone 2 (coord. structure)</th><th>Télécopie (coord. structure)</th><th>Adresse e-mail (coord. structure)</th><th>Code Département (structure)</th><th>Libellé Département (structure)</th><th>Ancien identifiant de la structure</th><th>Code secteur d&#x27;activité</th><th>Libellé secteur d&#x27;activité</th></tr><tr><td>u32</td><td>str</td><td>str</td><td>str</td><td>str</td><td>u32</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>270597</td><td>&quot;10000116383&quot;</td><td>&quot;30980220500505&quot;</td><td>&quot;30980220500505&quot;</td><td>&quot;R10100000198782&quot;</td><td>2</td><td>&quot;OEUVRES HOSPITALIERES FRANCAIS…</td><td>&quot;ORDRE DE MALTE FRANCE&quot;</td><td>&quot;ORDRE DE MALTE FRANCE&quot;</td><td>null</td><td>&quot;42&quot;</td><td>null</td><td>null</td><td>null</td><td>&quot;RUE DES VOLONTAIRES&quot;</td><td>null</td><td>&quot;75015 PARIS&quot;</td><td>&quot;75015&quot;</td><td>&quot;75056&quot;</td><td>&quot;Paris&quot;</td><td>&quot;99000&quot;</td><td>&quot;France&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;330980220500505&quot;</td><td>&quot;SA28&quot;</td><td>&quot;Asso et orga humanitaire&quot;</td></tr><tr><td>1353470</td><td>&quot;10000116383&quot;</td><td>&quot;30980220500505&quot;</td><td>&quot;30980220500505&quot;</td><td>&quot;R10100000779807&quot;</td><td>2</td><td>&quot;OEUVRE HOSP FRANC DE L&#x27;ORDRE D…</td><td>null</td><td>null</td><td>null</td><td>&quot;49&quot;</td><td>null</td><td>&quot;R&quot;</td><td>&quot;Rue&quot;</td><td>&quot;DE LA CHAPELLE&quot;</td><td>null</td><td>&quot;75018 PARIS 18E&nbsp;&nbsp;ARRONDISSEMEN…</td><td>&quot;75018&quot;</td><td>&quot;75118&quot;</td><td>&quot;Paris 18e&nbsp;&nbsp;Arrondissement&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;330980220500505&quot;</td><td>&quot;SA28&quot;</td><td>&quot;Asso et orga humanitaire&quot;</td></tr><tr><td>4214</td><td>&quot;10000536309&quot;</td><td>&quot;18003502402369&quot;</td><td>&quot;18003502402369&quot;</td><td>&quot;R10100000050224&quot;</td><td>2</td><td>&quot;DRSM PAYS DE LA LOIREELSM 44&quot;</td><td>&quot;SITE NANTES&quot;</td><td>null</td><td>null</td><td>&quot;9&quot;</td><td>null</td><td>&quot;R&quot;</td><td>&quot;Rue&quot;</td><td>&quot;GAETAN RONDEAU&quot;</td><td>&quot;BP&quot;</td><td>&quot;44203 NANTES&quot;</td><td>&quot;44203&quot;</td><td>&quot;44109&quot;</td><td>&quot;Nantes&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;318003502402369&quot;</td><td>&quot;SA24&quot;</td><td>&quot;Organisme de Sécurité Sociale&quot;</td></tr><tr><td>1627221</td><td>&quot;10000536309&quot;</td><td>&quot;18003502402369&quot;</td><td>&quot;18003502402369&quot;</td><td>&quot;R10100000049799&quot;</td><td>2</td><td>&quot;DRSM PAYS DE LOIREELSM 49&quot;</td><td>&quot;SITE CHOLET&quot;</td><td>null</td><td>null</td><td>&quot;2&quot;</td><td>null</td><td>&quot;R&quot;</td><td>&quot;Rue&quot;</td><td>&quot;SAINT ELOI&quot;</td><td>&quot;BP&quot;</td><td>&quot;49321 CHOLET&quot;</td><td>&quot;49321&quot;</td><td>&quot;49099&quot;</td><td>&quot;Cholet&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;318003502402369&quot;</td><td>&quot;SA24&quot;</td><td>&quot;Organisme de Sécurité Sociale&quot;</td></tr><tr><td>816501</td><td>&quot;10000667187&quot;</td><td>&quot;18003502402369&quot;</td><td>&quot;18003502402369&quot;</td><td>&quot;R10100000049794&quot;</td><td>3</td><td>&quot;DRSM NORD PICARDIEELSM 59&quot;</td><td>&quot;SITE MAUBEUGE&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;PL&quot;</td><td>&quot;Place&quot;</td><td>&quot;DE WATTIGNIES&quot;</td><td>&quot;BP&quot;</td><td>&quot;59603 MAUBEUGE&quot;</td><td>&quot;59603&quot;</td><td>&quot;59392&quot;</td><td>&quot;Maubeuge&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;318003502402369&quot;</td><td>&quot;SA24&quot;</td><td>&quot;Organisme de Sécurité Sociale&quot;</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>532248</td><td>&quot;10110592168&quot;</td><td>&quot;88085935000014&quot;</td><td>&quot;88085935000014&quot;</td><td>&quot;R10100000325887&quot;</td><td>2</td><td>&quot;FAREVA PAU&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;AV&quot;</td><td>&quot;Avenue&quot;</td><td>&quot;DU BEARN&quot;</td><td>null</td><td>&quot;64320 IDRON&quot;</td><td>&quot;64320&quot;</td><td>&quot;64269&quot;</td><td>&quot;Idron&quot;</td><td>null</td><td>null</td><td>&quot;0559402100&quot;</td><td>null</td><td>&quot;0559402119&quot;</td><td>null</td><td>null</td><td>null</td><td>&quot;388085935000014&quot;</td><td>&quot;SA32&quot;</td><td>&quot;Fab. Exploit. Import. Méd. DM&quot;</td></tr><tr><td>1344743</td><td>&quot;10110597498&quot;</td><td>&quot;13000800600038&quot;</td><td>&quot;13000800600038&quot;</td><td>&quot;R10100000097229&quot;</td><td>2</td><td>&quot;AGENCE REGIONALE SANTE PAYS LO…</td><td>null</td><td>null</td><td>null</td><td>&quot;17&quot;</td><td>null</td><td>&quot;BD&quot;</td><td>&quot;Boulevard&quot;</td><td>&quot;GASTON DOUMERGUE&quot;</td><td>null</td><td>&quot;44262 NANTES&quot;</td><td>&quot;44262&quot;</td><td>&quot;44109&quot;</td><td>&quot;Nantes&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;313000800600038&quot;</td><td>&quot;SA24&quot;</td><td>&quot;Organisme de Sécurité Sociale&quot;</td></tr><tr><td>2156205</td><td>&quot;10110597498&quot;</td><td>&quot;13000800600038&quot;</td><td>&quot;13000800600038&quot;</td><td>&quot;R10100000097229&quot;</td><td>2</td><td>&quot;AGENCE REGIONALE SANTE PAYS LO…</td><td>null</td><td>&quot;DELEGATION TERRITORIALE&quot;</td><td>null</td><td>&quot;2&quot;</td><td>null</td><td>&quot;BD&quot;</td><td>&quot;Boulevard&quot;</td><td>&quot;MURAT&quot;</td><td>null</td><td>&quot;53000 LAVAL&quot;</td><td>&quot;53000&quot;</td><td>&quot;53130&quot;</td><td>&quot;Laval&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;313000800600038&quot;</td><td>&quot;SA24&quot;</td><td>&quot;Organisme de Sécurité Sociale&quot;</td></tr><tr><td>1618789</td><td>&quot;10110910345&quot;</td><td>&quot;18003502401098&quot;</td><td>&quot;18003502401098&quot;</td><td>&quot;R10100000398898&quot;</td><td>2</td><td>&quot;CAISSE NATIONALE DE L&#x27;ASSURANC…</td><td>&quot;DRSM DIRECTION REG. DU SERVICE…</td><td>&quot;QUARTIER DU LAC&quot;</td><td>null</td><td>&quot;80&quot;</td><td>null</td><td>null</td><td>null</td><td>&quot;AVENUE DE LA JALLERE&quot;</td><td>&quot;BP 260&quot;</td><td>&quot;33300 BORDEAUX&quot;</td><td>&quot;33300&quot;</td><td>&quot;33063&quot;</td><td>&quot;Bordeaux&quot;</td><td>&quot;99000&quot;</td><td>&quot;France&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;318003502401098&quot;</td><td>&quot;SA24&quot;</td><td>&quot;Organisme de Sécurité Sociale&quot;</td></tr><tr><td>1889827</td><td>&quot;10110910345&quot;</td><td>&quot;18003502401098&quot;</td><td>&quot;18003502401098&quot;</td><td>&quot;R10100000398898&quot;</td><td>2</td><td>&quot;CAISSE NATIONALE DE L&#x27;ASSURANC…</td><td>&quot;DRSM DIRECTION REG. DU SERVICE…</td><td>null</td><td>null</td><td>&quot;207&quot;</td><td>null</td><td>&quot;R&quot;</td><td>&quot;Rue&quot;</td><td>&quot;FONTAINEBLEAU&quot;</td><td>&quot;BP&quot;</td><td>&quot;40011 MONT-DE-MARSAN&quot;</td><td>&quot;40011&quot;</td><td>&quot;40192&quot;</td><td>&quot;Mont-de-Marsan&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;318003502401098&quot;</td><td>&quot;SA24&quot;</td><td>&quot;Organisme de Sécurité Sociale&quot;</td></tr></tbody></table></div>"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 64
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:46:07.974271Z",
"start_time": "2025-08-19T22:46:07.943280Z"
}
},
"cell_type": "code",
"source": [
"df4.write_csv(f\"{folder}{output_file}-Sites_Variations{output_extension}\",\n",
" separator='|',\n",
" quote_style=\"never\",\n",
" line_terminator='\\n')\n"
],
"id": "c1fd01e419f4ccc9",
"outputs": [],
"execution_count": 65
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:46:12.781888Z",
"start_time": "2025-08-19T22:46:08.306776Z"
}
},
"cell_type": "code",
"source": [
"%%time\n",
"df5 = (\n",
" df\n",
" .filter(pd.col('Numéro FINESS site').is_null())\n",
" .filter(\n",
" pd.col('Numéro SIRET site').is_not_null() | pd.col('Identifiant technique de la structure').is_not_null()\n",
" )\n",
" .with_columns(\n",
" pd.coalesce(\n",
" pd.col('Numéro SIRET site'),\n",
" pd.col('Identifiant technique de la structure')\n",
" ).alias('Site_Identifier')\n",
" )\n",
" .with_columns(\n",
" pd.struct(site_info_cols).n_unique().over(['Site_Identifier']).alias('Site_Info_Variations_Count')\n",
" )\n",
" .filter(pd.col('Site_Info_Variations_Count') > 1)\n",
" .sort(['Site_Identifier', 'index'])\n",
" .select(['index', \n",
" 'Identifiant PP', \n",
" 'Site_Identifier', \n",
" 'Numéro SIRET site', \n",
" 'Identifiant technique de la structure', \n",
" 'Site_Info_Variations_Count'] + site_info_cols)\n",
")\n",
"df5\n"
],
"id": "7838523925fc85ee",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 19.8 s\n",
"Wall time: 4.43 s\n"
]
},
{
"data": {
"text/plain": [
"shape: (4_190, 31)\n",
"┌─────────┬────────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n",
"│ index ┆ Identifian ┆ Site_Iden ┆ Numéro ┆ … ┆ Libellé ┆ Ancien ┆ Code ┆ Libellé │\n",
"│ --- ┆ t PP ┆ tifier ┆ SIRET ┆ ┆ Départeme ┆ identifia ┆ secteur ┆ secteur │\n",
"│ u32 ┆ --- ┆ --- ┆ site ┆ ┆ nt (struc ┆ nt de la ┆ d'activit ┆ d'activit │\n",
"│ ┆ str ┆ str ┆ --- ┆ ┆ ture… ┆ struc… ┆ é ┆ é │\n",
"│ ┆ ┆ ┆ str ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ ┆ ┆ ┆ ┆ ┆ str ┆ str ┆ str ┆ str │\n",
"╞═════════╪════════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n",
"│ 127508 ┆ 1010000229 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n",
"│ ┆ 3 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n",
"│ 285182 ┆ 1000180676 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n",
"│ ┆ 8 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n",
"│ 466100 ┆ 1010480041 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n",
"│ ┆ 1 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n",
"│ 722626 ┆ 1010368715 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n",
"│ ┆ 7 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n",
"│ 826390 ┆ 1000179659 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n",
"│ ┆ 7 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ 793647 ┆ 1010986940 ┆ 984526194 ┆ 984526194 ┆ … ┆ null ┆ 398452619 ┆ SA09 ┆ Exercice │\n",
"│ ┆ 3 ┆ 00019 ┆ 00019 ┆ ┆ ┆ 400019 ┆ ┆ en │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Société │\n",
"│ 1500531 ┆ 1010129326 ┆ 984526194 ┆ 984526194 ┆ … ┆ null ┆ 398452619 ┆ SA09 ┆ Exercice │\n",
"│ ┆ 3 ┆ 00019 ┆ 00019 ┆ ┆ ┆ 400019 ┆ ┆ en │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Société │\n",
"│ 1852947 ┆ 1010801513 ┆ 984526194 ┆ 984526194 ┆ … ┆ null ┆ 398452619 ┆ SA09 ┆ Exercice │\n",
"│ ┆ 1 ┆ 00019 ┆ 00019 ┆ ┆ ┆ 400019 ┆ ┆ en │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Société │\n",
"│ 1571945 ┆ 1010723524 ┆ 998823504 ┆ 998823504 ┆ … ┆ null ┆ 399882350 ┆ SA11 ┆ Entrepris │\n",
"│ ┆ 3 ┆ 30834 ┆ 30834 ┆ ┆ ┆ 430834 ┆ ┆ e │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ d'intérim │\n",
"│ 1590681 ┆ 1010869303 ┆ 998823504 ┆ 998823504 ┆ … ┆ null ┆ 399882350 ┆ SA11 ┆ Entrepris │\n",
"│ ┆ 6 ┆ 30834 ┆ 30834 ┆ ┆ ┆ 430834 ┆ ┆ e │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ d'intérim │\n",
"└─────────┴────────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘"
],
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (4_190, 31)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Site_Identifier</th><th>Numéro SIRET site</th><th>Identifiant technique de la structure</th><th>Site_Info_Variations_Count</th><th>Raison sociale site</th><th>Enseigne commerciale site</th><th>Complément destinataire (coord. structure)</th><th>Complément point géographique (coord. structure)</th><th>Numéro Voie (coord. structure)</th><th>Indice répétition voie (coord. structure)</th><th>Code type de voie (coord. structure)</th><th>Libellé type de voie (coord. structure)</th><th>Libellé Voie (coord. structure)</th><th>Mention distribution (coord. structure)</th><th>Bureau cedex (coord. structure)</th><th>Code postal (coord. structure)</th><th>Code commune (coord. structure)</th><th>Libellé commune (coord. structure)</th><th>Code pays (coord. structure)</th><th>Libellé pays (coord. structure)</th><th>Téléphone (coord. structure)</th><th>Téléphone 2 (coord. structure)</th><th>Télécopie (coord. structure)</th><th>Adresse e-mail (coord. structure)</th><th>Code Département (structure)</th><th>Libellé Département (structure)</th><th>Ancien identifiant de la structure</th><th>Code secteur d&#x27;activité</th><th>Libellé secteur d&#x27;activité</th></tr><tr><td>u32</td><td>str</td><td>str</td><td>str</td><td>str</td><td>u32</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>127508</td><td>&quot;10100002293&quot;</td><td>&quot;05650171100115&quot;</td><td>&quot;05650171100115&quot;</td><td>&quot;R10000001502146&quot;</td><td>2</td><td>&quot;BECTON DICKINSON FRANCE&quot;</td><td>null</td><td>null</td><td>null</td><td>&quot;11&quot;</td><td>null</td><td>&quot;R&quot;</td><td>&quot;Rue&quot;</td><td>&quot;RUE ARISTIDE BERGES&quot;</td><td>null</td><td>&quot;38801 LE PONT DE CLAIX CEDEX&quot;</td><td>&quot;38801&quot;</td><td>&quot;38317&quot;</td><td>&quot;Le Pont-de-Claix&quot;</td><td>&quot;99000&quot;</td><td>&quot;France&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;305650171100115&quot;</td><td>&quot;SA32&quot;</td><td>&quot;Fab. Exploit. Import. Méd. DM&quot;</td></tr><tr><td>285182</td><td>&quot;10001806768&quot;</td><td>&quot;05650171100115&quot;</td><td>&quot;05650171100115&quot;</td><td>&quot;R10000001502146&quot;</td><td>2</td><td>&quot;BECTON DICKINSON FRANCE&quot;</td><td>null</td><td>null</td><td>null</td><td>&quot;11&quot;</td><td>null</td><td>&quot;R&quot;</td><td>&quot;Rue&quot;</td><td>&quot;RUE ARISTIDE BERGES&quot;</td><td>null</td><td>&quot;38801 LE PONT DE CLAIX CEDEX&quot;</td><td>&quot;38801&quot;</td><td>&quot;38317&quot;</td><td>&quot;Le Pont-de-Claix&quot;</td><td>&quot;99000&quot;</td><td>&quot;France&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;305650171100115&quot;</td><td>&quot;SA32&quot;</td><td>&quot;Fab. Exploit. Import. Méd. DM&quot;</td></tr><tr><td>466100</td><td>&quot;10104800411&quot;</td><td>&quot;05650171100115&quot;</td><td>&quot;05650171100115&quot;</td><td>&quot;R10000001502146&quot;</td><td>2</td><td>&quot;BECTON DICKINSON FRANCE&quot;</td><td>null</td><td>null</td><td>null</td><td>&quot;11&quot;</td><td>null</td><td>&quot;R&quot;</td><td>&quot;Rue&quot;</td><td>&quot;RUE ARISTIDE BERGES&quot;</td><td>null</td><td>&quot;38801 LE PONT DE CLAIX CEDEX&quot;</td><td>&quot;38801&quot;</td><td>&quot;38317&quot;</td><td>&quot;Le Pont-de-Claix&quot;</td><td>&quot;99000&quot;</td><td>&quot;France&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;305650171100115&quot;</td><td>&quot;SA32&quot;</td><td>&quot;Fab. Exploit. Import. Méd. DM&quot;</td></tr><tr><td>722626</td><td>&quot;10103687157&quot;</td><td>&quot;05650171100115&quot;</td><td>&quot;05650171100115&quot;</td><td>&quot;R10000001502146&quot;</td><td>2</td><td>&quot;BECTON DICKINSON FRANCE&quot;</td><td>null</td><td>null</td><td>null</td><td>&quot;11&quot;</td><td>null</td><td>&quot;R&quot;</td><td>&quot;Rue&quot;</td><td>&quot;RUE ARISTIDE BERGES&quot;</td><td>null</td><td>&quot;38801 LE PONT DE CLAIX CEDEX&quot;</td><td>&quot;38801&quot;</td><td>&quot;38317&quot;</td><td>&quot;Le Pont-de-Claix&quot;</td><td>&quot;99000&quot;</td><td>&quot;France&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;305650171100115&quot;</td><td>&quot;SA32&quot;</td><td>&quot;Fab. Exploit. Import. Méd. DM&quot;</td></tr><tr><td>826390</td><td>&quot;10001796597&quot;</td><td>&quot;05650171100115&quot;</td><td>&quot;05650171100115&quot;</td><td>&quot;R10000001502146&quot;</td><td>2</td><td>&quot;BECTON DICKINSON FRANCE&quot;</td><td>null</td><td>null</td><td>null</td><td>&quot;11&quot;</td><td>null</td><td>&quot;R&quot;</td><td>&quot;Rue&quot;</td><td>&quot;RUE ARISTIDE BERGES&quot;</td><td>null</td><td>&quot;38801 LE PONT DE CLAIX CEDEX&quot;</td><td>&quot;38801&quot;</td><td>&quot;38317&quot;</td><td>&quot;Le Pont-de-Claix&quot;</td><td>&quot;99000&quot;</td><td>&quot;France&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;305650171100115&quot;</td><td>&quot;SA32&quot;</td><td>&quot;Fab. Exploit. Import. Méd. DM&quot;</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>793647</td><td>&quot;10109869403&quot;</td><td>&quot;98452619400019&quot;</td><td>&quot;98452619400019&quot;</td><td>&quot;R10100000673943&quot;</td><td>2</td><td>&quot;SELARL CENTRE DE PODOLOGIE SPO…</td><td>&quot;SELARL CENTRE DE PODOLOGIE SPO…</td><td>null</td><td>null</td><td>&quot;8&quot;</td><td>null</td><td>&quot;R&quot;</td><td>&quot;Rue&quot;</td><td>&quot;GEORGES NEGREVERGNE&quot;</td><td>null</td><td>&quot;33700 MERIGNAC&quot;</td><td>&quot;33700&quot;</td><td>&quot;33281&quot;</td><td>&quot;Mérignac&quot;</td><td>&quot;99000&quot;</td><td>&quot;France&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;398452619400019&quot;</td><td>&quot;SA09&quot;</td><td>&quot;Exercice en Société&quot;</td></tr><tr><td>1500531</td><td>&quot;10101293263&quot;</td><td>&quot;98452619400019&quot;</td><td>&quot;98452619400019&quot;</td><td>&quot;R10100000673943&quot;</td><td>2</td><td>&quot;SELARL CENTRE DE PODOLOGIE SPO…</td><td>&quot;SELARL CENTRE DE PODOLOGIE SPO…</td><td>null</td><td>null</td><td>&quot;8&quot;</td><td>null</td><td>&quot;R&quot;</td><td>&quot;Rue&quot;</td><td>&quot;GEORGES NEGREVERGNE&quot;</td><td>null</td><td>&quot;33700 MERIGNAC&quot;</td><td>&quot;33700&quot;</td><td>&quot;33281&quot;</td><td>&quot;Mérignac&quot;</td><td>&quot;99000&quot;</td><td>&quot;France&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;398452619400019&quot;</td><td>&quot;SA09&quot;</td><td>&quot;Exercice en Société&quot;</td></tr><tr><td>1852947</td><td>&quot;10108015131&quot;</td><td>&quot;98452619400019&quot;</td><td>&quot;98452619400019&quot;</td><td>&quot;R10100000673943&quot;</td><td>2</td><td>&quot;SELARL CENTRE DE PODOLOGIE SPO…</td><td>&quot;SELARL CENTRE DE PODOLOGIE SPO…</td><td>null</td><td>null</td><td>&quot;8&quot;</td><td>null</td><td>&quot;R&quot;</td><td>&quot;Rue&quot;</td><td>&quot;GEORGES NEGREVERGNE&quot;</td><td>null</td><td>&quot;33700 MERIGNAC&quot;</td><td>&quot;33700&quot;</td><td>&quot;33281&quot;</td><td>&quot;Mérignac&quot;</td><td>&quot;99000&quot;</td><td>&quot;France&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;398452619400019&quot;</td><td>&quot;SA09&quot;</td><td>&quot;Exercice en Société&quot;</td></tr><tr><td>1571945</td><td>&quot;10107235243&quot;</td><td>&quot;99882350430834&quot;</td><td>&quot;99882350430834&quot;</td><td>&quot;R10100000554688&quot;</td><td>2</td><td>&quot;ADECCO FRANCE&quot;</td><td>&quot;ADECCO&quot;</td><td>null</td><td>&quot;PARC VALMY PARK AVENUE BAT A 1…</td><td>&quot;8&quot;</td><td>&quot;D&quot;</td><td>null</td><td>null</td><td>&quot;RUE JEANNE BARRET&quot;</td><td>null</td><td>&quot;21000 DIJON&quot;</td><td>&quot;21000&quot;</td><td>&quot;21231&quot;</td><td>&quot;Dijon&quot;</td><td>&quot;99000&quot;</td><td>&quot;France&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;399882350430834&quot;</td><td>&quot;SA11&quot;</td><td>&quot;Entreprise d&#x27;intérim&quot;</td></tr><tr><td>1590681</td><td>&quot;10108693036&quot;</td><td>&quot;99882350430834&quot;</td><td>&quot;99882350430834&quot;</td><td>&quot;R10100000413248&quot;</td><td>2</td><td>&quot;ADECCO MEDICAL&quot;</td><td>null</td><td>null</td><td>null</td><td>&quot;8&quot;</td><td>&quot;D&quot;</td><td>&quot;R&quot;</td><td>&quot;Rue&quot;</td><td>&quot;JEANNE BARRET&quot;</td><td>null</td><td>&quot;21000 DIJON&quot;</td><td>&quot;21000&quot;</td><td>&quot;21231&quot;</td><td>&quot;Dijon&quot;</td><td>&quot;99000&quot;</td><td>&quot;France&quot;</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>&quot;399882350430834&quot;</td><td>&quot;SA11&quot;</td><td>&quot;Entreprise d&#x27;intérim&quot;</td></tr></tbody></table></div>"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 66
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:46:13.670911Z",
"start_time": "2025-08-19T22:46:13.655386Z"
}
},
"cell_type": "code",
"source": [
"df5.write_csv(f\"{folder}{output_file}-Sites_Variations_Global{output_extension}\",\n",
" separator='|',\n",
" quote_style=\"never\",\n",
" line_terminator='\\n')\n"
],
"id": "416184f32f973a71",
"outputs": [],
"execution_count": 67
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:46:17.023811Z",
"start_time": "2025-08-19T22:46:14.032470Z"
}
},
"cell_type": "code",
"source": [
"%%time\n",
"df6 = (\n",
" df\n",
" .with_columns(\n",
" pd.coalesce(\n",
" pd.col('Numéro FINESS site'),\n",
" pd.col('Numéro SIRET site'),\n",
" pd.col('Identifiant technique de la structure')\n",
" ).alias('Site_Identifier_Global')\n",
" )\n",
" .filter(pd.col('Site_Identifier_Global').is_not_null())\n",
" .with_columns(\n",
" pd.struct([\n",
" \"Libellé profession\",\n",
" \"Libellé savoir-faire\",\n",
" \"Libellé mode exercice\",\n",
" \"Libellé rôle\",\n",
" \"Libellé genre activité\"\n",
" ]).n_unique().over([\"Identifiant PP\", \"Site_Identifier_Global\"]).alias(\"Activites_Count\")\n",
" )\n",
" .filter(pd.col(\"Activites_Count\") > 1)\n",
" .sort([\"Identifiant PP\", \"Site_Identifier_Global\", \"index\"])\n",
" .select([\n",
" \"index\",\n",
" \"Identifiant PP\",\n",
" \"Site_Identifier_Global\",\n",
" \"Numéro FINESS site\",\n",
" \"Numéro SIRET site\",\n",
" \"Identifiant technique de la structure\",\n",
" \"Activites_Count\",\n",
" \"Libellé profession\",\n",
" \"Libellé savoir-faire\",\n",
" \"Libellé mode exercice\",\n",
" \"Libellé rôle\",\n",
" \"Libellé genre activité\"\n",
" ])\n",
")\n",
"df6\n"
],
"id": "84549f83ce5e92f",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 11 s\n",
"Wall time: 2.96 s\n"
]
},
{
"data": {
"text/plain": [
"shape: (25_389, 12)\n",
"┌─────────┬────────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n",
"│ index ┆ Identifian ┆ Site_Iden ┆ Numéro ┆ … ┆ Libellé ┆ Libellé ┆ Libellé ┆ Libellé │\n",
"│ --- ┆ t PP ┆ tifier_Gl ┆ FINESS ┆ ┆ savoir-fa ┆ mode ┆ rôle ┆ genre │\n",
"│ u32 ┆ --- ┆ obal ┆ site ┆ ┆ ire ┆ exercice ┆ --- ┆ activité │\n",
"│ ┆ str ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ str ┆ --- │\n",
"│ ┆ ┆ str ┆ str ┆ ┆ str ┆ str ┆ ┆ str │\n",
"╞═════════╪════════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n",
"│ 1352840 ┆ 1000001797 ┆ 130786445 ┆ 130786445 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
"│ ┆ 9 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
"│ 1893982 ┆ 1000001797 ┆ 130786445 ┆ 130786445 ┆ … ┆ null ┆ Salarié ┆ Fonction ┆ Activité │\n",
"│ ┆ 9 ┆ ┆ ┆ ┆ ┆ ┆ non ┆ standard │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ définie ┆ de soin │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
"│ 270269 ┆ 1000007028 ┆ 397840901 ┆ null ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
"│ ┆ 3 ┆ 00011 ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
"│ 811380 ┆ 1000007028 ┆ 397840901 ┆ null ┆ … ┆ null ┆ Salarié ┆ Fonction ┆ Activité │\n",
"│ ┆ 3 ┆ 00011 ┆ ┆ ┆ ┆ ┆ non ┆ non │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ définie ┆ soignante │\n",
"│ 540586 ┆ 1000008684 ┆ 860012228 ┆ 860012228 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
"│ ┆ 2 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ 538572 ┆ 1011112354 ┆ 970400016 ┆ 970400016 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
"│ ┆ 2 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
"│ 538882 ┆ 1011125299 ┆ 250006954 ┆ 250006954 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
"│ ┆ 4 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
"│ 2162932 ┆ 1011125299 ┆ 250006954 ┆ 250006954 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
"│ ┆ 4 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
"│ 2163446 ┆ 1011129312 ┆ 490540218 ┆ 490540218 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
"│ ┆ 1 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
"│ 2163447 ┆ 1011129312 ┆ 490540218 ┆ 490540218 ┆ … ┆ null ┆ Salarié ┆ Cadre de ┆ Activité │\n",
"│ ┆ 1 ┆ ┆ ┆ ┆ ┆ ┆ santé de ┆ non │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ proximité ┆ soignante │\n",
"└─────────┴────────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘"
],
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (25_389, 12)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Site_Identifier_Global</th><th>Numéro FINESS site</th><th>Numéro SIRET site</th><th>Identifiant technique de la structure</th><th>Activites_Count</th><th>Libellé profession</th><th>Libellé savoir-faire</th><th>Libellé mode exercice</th><th>Libellé rôle</th><th>Libellé genre activité</th></tr><tr><td>u32</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>u32</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>1352840</td><td>&quot;10000017979&quot;</td><td>&quot;130786445&quot;</td><td>&quot;130786445&quot;</td><td>&quot;30247736900011&quot;</td><td>&quot;F130786445&quot;</td><td>2</td><td>&quot;Ostéopathe&quot;</td><td>null</td><td>&quot;Salarié&quot;</td><td>&quot;Salarié en poste fixe&quot;</td><td>&quot;Activité standard de soin ou d…</td></tr><tr><td>1893982</td><td>&quot;10000017979&quot;</td><td>&quot;130786445&quot;</td><td>&quot;130786445&quot;</td><td>&quot;30247736900011&quot;</td><td>&quot;F130786445&quot;</td><td>2</td><td>&quot;Sage-Femme&quot;</td><td>null</td><td>&quot;Salarié&quot;</td><td>&quot;Fonction non définie&quot;</td><td>&quot;Activité standard de soin ou d…</td></tr><tr><td>270269</td><td>&quot;10000070283&quot;</td><td>&quot;39784090100011&quot;</td><td>null</td><td>&quot;39784090100011&quot;</td><td>&quot;R10000002500225&quot;</td><td>2</td><td>&quot;Chirurgien-Dentiste&quot;</td><td>null</td><td>&quot;Salarié&quot;</td><td>&quot;Salarié en poste fixe&quot;</td><td>&quot;Activité standard de soin ou d…</td></tr><tr><td>811380</td><td>&quot;10000070283&quot;</td><td>&quot;39784090100011&quot;</td><td>null</td><td>&quot;39784090100011&quot;</td><td>&quot;R10000002500225&quot;</td><td>2</td><td>&quot;Chirurgien-Dentiste&quot;</td><td>null</td><td>&quot;Salarié&quot;</td><td>&quot;Fonction non définie&quot;</td><td>&quot;Activité non soignante&quot;</td></tr><tr><td>540586</td><td>&quot;10000086842&quot;</td><td>&quot;860012228&quot;</td><td>&quot;860012228&quot;</td><td>&quot;13001256000038&quot;</td><td>&quot;F860012228&quot;</td><td>2</td><td>&quot;Psychologue&quot;</td><td>null</td><td>&quot;Salarié&quot;</td><td>&quot;Salarié en poste fixe&quot;</td><td>&quot;Activité standard de soin ou d…</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>538572</td><td>&quot;10111123542&quot;</td><td>&quot;970400016&quot;</td><td>&quot;970400016&quot;</td><td>&quot;26974214400034&quot;</td><td>&quot;F970400016&quot;</td><td>2</td><td>&quot;Psychologue&quot;</td><td>null</td><td>&quot;Salarié&quot;</td><td>&quot;Salarié en poste fixe&quot;</td><td>&quot;Activité standard de soin ou d…</td></tr><tr><td>538882</td><td>&quot;10111252994&quot;</td><td>&quot;250006954&quot;</td><td>&quot;250006954&quot;</td><td>&quot;26250176000264&quot;</td><td>&quot;F250006954&quot;</td><td>2</td><td>&quot;Psychothérapeute&quot;</td><td>null</td><td>&quot;Salarié&quot;</td><td>&quot;Salarié en poste fixe&quot;</td><td>&quot;Activité standard de soin ou d…</td></tr><tr><td>2162932</td><td>&quot;10111252994&quot;</td><td>&quot;250006954&quot;</td><td>&quot;250006954&quot;</td><td>&quot;26250176000264&quot;</td><td>&quot;F250006954&quot;</td><td>2</td><td>&quot;Psychologue&quot;</td><td>null</td><td>&quot;Salarié&quot;</td><td>&quot;Salarié en poste fixe&quot;</td><td>&quot;Activité standard de soin ou d…</td></tr><tr><td>2163446</td><td>&quot;10111293121&quot;</td><td>&quot;490540218&quot;</td><td>&quot;490540218&quot;</td><td>&quot;77568873211159&quot;</td><td>&quot;F490540218&quot;</td><td>2</td><td>&quot;Infirmier&quot;</td><td>null</td><td>&quot;Salarié&quot;</td><td>&quot;Salarié en poste fixe&quot;</td><td>&quot;Activité standard de soin ou d…</td></tr><tr><td>2163447</td><td>&quot;10111293121&quot;</td><td>&quot;490540218&quot;</td><td>&quot;490540218&quot;</td><td>&quot;77568873211159&quot;</td><td>&quot;F490540218&quot;</td><td>2</td><td>&quot;Infirmier&quot;</td><td>null</td><td>&quot;Salarié&quot;</td><td>&quot;Cadre de santé de proximité&quot;</td><td>&quot;Activité non soignante&quot;</td></tr></tbody></table></div>"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 68
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:46:17.851427Z",
"start_time": "2025-08-19T22:46:17.796168Z"
}
},
"cell_type": "code",
"source": [
"df6.write_csv(f\"{folder}{output_file}-Multiple_Activities_Per_Site{output_extension}\",\n",
" separator='|',\n",
" quote_style=\"never\",\n",
" line_terminator='\\n')\n"
],
"id": "6f7025a7c08b54b4",
"outputs": [],
"execution_count": 69
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:46:19.535052Z",
"start_time": "2025-08-19T22:46:18.015194Z"
}
},
"cell_type": "code",
"source": [
"%%time\n",
"df7 = (\n",
" df\n",
" .with_columns(\n",
" pd.col(\"Libellé profession\").n_unique().over(\"Identifiant PP\").alias(\"Profession_Count\")\n",
" )\n",
" .filter(pd.col(\"Profession_Count\") > 1)\n",
" .sort([\"Identifiant PP\", \"index\"])\n",
" .select([\n",
" \"index\",\n",
" \"Identifiant PP\",\n",
" \"Profession_Count\",\n",
" \"Libellé profession\",\n",
" \"Libellé savoir-faire\"\n",
" ])\n",
")\n",
"df7\n"
],
"id": "b18d9ba71ba63d9d",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 5.33 s\n",
"Wall time: 1.5 s\n"
]
},
{
"data": {
"text/plain": [
"shape: (88_845, 5)\n",
"┌─────────┬────────────────┬──────────────────┬────────────────────┬──────────────────────┐\n",
"│ index ┆ Identifiant PP ┆ Profession_Count ┆ Libellé profession ┆ Libellé savoir-faire │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ u32 ┆ str ┆ u32 ┆ str ┆ str │\n",
"╞═════════╪════════════════╪══════════════════╪════════════════════╪══════════════════════╡\n",
"│ 74 ┆ 10000013150 ┆ 2 ┆ Médecin ┆ Psychiatrie │\n",
"│ 269913 ┆ 10000013150 ┆ 2 ┆ Psychothérapeute ┆ null │\n",
"│ 1352840 ┆ 10000017979 ┆ 2 ┆ Ostéopathe ┆ null │\n",
"│ 1893982 ┆ 10000017979 ┆ 2 ┆ Sage-Femme ┆ null │\n",
"│ 811125 ┆ 10000029966 ┆ 2 ┆ Sage-Femme ┆ null │\n",
"│ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ 1352396 ┆ 10111320304 ┆ 2 ┆ Psychothérapeute ┆ null │\n",
"│ 269545 ┆ 10111320379 ┆ 2 ┆ Psychothérapeute ┆ null │\n",
"│ 810601 ┆ 10111320379 ┆ 2 ┆ Psychologue ┆ null │\n",
"│ 539711 ┆ 10111321468 ┆ 2 ┆ Psychothérapeute ┆ null │\n",
"│ 1352414 ┆ 10111321468 ┆ 2 ┆ Psychologue ┆ null │\n",
"└─────────┴────────────────┴──────────────────┴────────────────────┴──────────────────────┘"
],
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (88_845, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Profession_Count</th><th>Libellé profession</th><th>Libellé savoir-faire</th></tr><tr><td>u32</td><td>str</td><td>u32</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>74</td><td>&quot;10000013150&quot;</td><td>2</td><td>&quot;Médecin&quot;</td><td>&quot;Psychiatrie&quot;</td></tr><tr><td>269913</td><td>&quot;10000013150&quot;</td><td>2</td><td>&quot;Psychothérapeute&quot;</td><td>null</td></tr><tr><td>1352840</td><td>&quot;10000017979&quot;</td><td>2</td><td>&quot;Ostéopathe&quot;</td><td>null</td></tr><tr><td>1893982</td><td>&quot;10000017979&quot;</td><td>2</td><td>&quot;Sage-Femme&quot;</td><td>null</td></tr><tr><td>811125</td><td>&quot;10000029966&quot;</td><td>2</td><td>&quot;Sage-Femme&quot;</td><td>null</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>1352396</td><td>&quot;10111320304&quot;</td><td>2</td><td>&quot;Psychothérapeute&quot;</td><td>null</td></tr><tr><td>269545</td><td>&quot;10111320379&quot;</td><td>2</td><td>&quot;Psychothérapeute&quot;</td><td>null</td></tr><tr><td>810601</td><td>&quot;10111320379&quot;</td><td>2</td><td>&quot;Psychologue&quot;</td><td>null</td></tr><tr><td>539711</td><td>&quot;10111321468&quot;</td><td>2</td><td>&quot;Psychothérapeute&quot;</td><td>null</td></tr><tr><td>1352414</td><td>&quot;10111321468&quot;</td><td>2</td><td>&quot;Psychologue&quot;</td><td>null</td></tr></tbody></table></div>"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 70
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T22:46:19.893214Z",
"start_time": "2025-08-19T22:46:19.851765Z"
}
},
"cell_type": "code",
"source": [
"df7.write_csv(f\"{folder}{output_file}-Multiple_Professions{output_extension}\",\n",
" separator='|',\n",
" quote_style=\"never\",\n",
" line_terminator='\\n')\n"
],
"id": "85be468fd3f461d1",
"outputs": [],
"execution_count": 71
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"language": "python",
"display_name": "Python 3 (ipykernel)"
}
},
"nbformat": 4,
"nbformat_minor": 5
}