941 lines
81 KiB
Plaintext
941 lines
81 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "58db5082e27759f7",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:45:53.794236Z",
|
|
"start_time": "2025-08-19T22:45:51.445477Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: total: 5.73 s\n",
|
|
"Wall time: 511 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"['index',\n",
|
|
" \"Type d'identifiant PP\",\n",
|
|
" 'Identifiant PP',\n",
|
|
" 'Identification nationale PP',\n",
|
|
" \"Code civilité d'exercice\",\n",
|
|
" \"Libellé civilité d'exercice\",\n",
|
|
" 'Code civilité',\n",
|
|
" 'Libellé civilité',\n",
|
|
" \"Nom d'exercice\",\n",
|
|
" \"Prénom d'exercice\",\n",
|
|
" 'Code profession',\n",
|
|
" 'Libellé profession',\n",
|
|
" 'Code catégorie professionnelle',\n",
|
|
" 'Libellé catégorie professionnelle',\n",
|
|
" 'Code type savoir-faire',\n",
|
|
" 'Libellé type savoir-faire',\n",
|
|
" 'Code savoir-faire',\n",
|
|
" 'Libellé savoir-faire',\n",
|
|
" 'Code mode exercice',\n",
|
|
" 'Libellé mode exercice',\n",
|
|
" 'Numéro SIRET site',\n",
|
|
" 'Numéro SIREN site',\n",
|
|
" 'Numéro FINESS site',\n",
|
|
" 'Numéro FINESS établissement juridique',\n",
|
|
" 'Identifiant technique de la structure',\n",
|
|
" 'Raison sociale site',\n",
|
|
" 'Enseigne commerciale site',\n",
|
|
" 'Complément destinataire (coord. structure)',\n",
|
|
" 'Complément point géographique (coord. structure)',\n",
|
|
" 'Numéro Voie (coord. structure)',\n",
|
|
" 'Indice répétition voie (coord. structure)',\n",
|
|
" 'Code type de voie (coord. structure)',\n",
|
|
" 'Libellé type de voie (coord. structure)',\n",
|
|
" 'Libellé Voie (coord. structure)',\n",
|
|
" 'Mention distribution (coord. structure)',\n",
|
|
" 'Bureau cedex (coord. structure)',\n",
|
|
" 'Code postal (coord. structure)',\n",
|
|
" 'Code commune (coord. structure)',\n",
|
|
" 'Libellé commune (coord. structure)',\n",
|
|
" 'Code pays (coord. structure)',\n",
|
|
" 'Libellé pays (coord. structure)',\n",
|
|
" 'Téléphone (coord. structure)',\n",
|
|
" 'Téléphone 2 (coord. structure)',\n",
|
|
" 'Télécopie (coord. structure)',\n",
|
|
" 'Adresse e-mail (coord. structure)',\n",
|
|
" 'Code Département (structure)',\n",
|
|
" 'Libellé Département (structure)',\n",
|
|
" 'Ancien identifiant de la structure',\n",
|
|
" \"Autorité d'enregistrement\",\n",
|
|
" \"Code secteur d'activité\",\n",
|
|
" \"Libellé secteur d'activité\",\n",
|
|
" 'Code section tableau pharmaciens',\n",
|
|
" 'Libellé section tableau pharmaciens',\n",
|
|
" 'Code rôle',\n",
|
|
" 'Libellé rôle',\n",
|
|
" 'Code genre activité',\n",
|
|
" 'Libellé genre activité']"
|
|
]
|
|
},
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"import polars as pd\n",
|
|
"import csv\n",
|
|
"folder = \"E:\\\\Ziwig Drive\\\\Ziwig Health\\\\Tables de Réf\\\\Professionnels\\\\\"\n",
|
|
"input_file = \"Table_Réf_Professionnels_260319.txt\"\n",
|
|
"output_file = \"Table_Réf_Professionnels_inconsistencies\"\n",
|
|
"output_extension = \".csv\"\n",
|
|
"df = pd.read_csv(f\"{folder}{input_file}\",\n",
|
|
" separator='|',\n",
|
|
" quote_char=None,\n",
|
|
" null_values='',\n",
|
|
" infer_schema_length=0) # Read all columns as strings\n",
|
|
"df = df.with_row_index('index')\n",
|
|
"df.columns\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "7d9b7562c09955",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:45:54.952210Z",
|
|
"start_time": "2025-08-19T22:45:53.873718Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: total: 2.03 s\n",
|
|
"Wall time: 389 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div><style>\n",
|
|
".dataframe > thead > tr,\n",
|
|
".dataframe > tbody > tr {\n",
|
|
" text-align: right;\n",
|
|
" white-space: pre-wrap;\n",
|
|
"}\n",
|
|
"</style>\n",
|
|
"<small>shape: (10_110, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Count</th><th>Nom d'exercice</th><th>Prénom d'exercice</th></tr><tr><td>u32</td><td>str</td><td>u32</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>1393753</td><td>"10000034180"</td><td>2</td><td>"DUWAT-GEORGES"</td><td>"GHISLAINE"</td></tr><tr><td>1393754</td><td>"10000034180"</td><td>2</td><td>"GEORGES"</td><td>"GHISLAINE"</td></tr><tr><td>262</td><td>"10000040062"</td><td>2</td><td>"MEYER"</td><td>"Nicolas"</td></tr><tr><td>263</td><td>"10000040062"</td><td>2</td><td>"MEYER"</td><td>"Nicolas"</td></tr><tr><td>835639</td><td>"10000040062"</td><td>2</td><td>"MEYER"</td><td>"NICOLAS"</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>1947864</td><td>"10111726807"</td><td>2</td><td>"BARES"</td><td>"Valérie"</td></tr><tr><td>275095</td><td>"10111753363"</td><td>2</td><td>"BOUZIDI"</td><td>"MOHAND"</td></tr><tr><td>275096</td><td>"10111753363"</td><td>2</td><td>"BOUZIDI"</td><td>"Mohand"</td></tr><tr><td>1112006</td><td>"10111761572"</td><td>2</td><td>"BOVENS"</td><td>"Brice"</td></tr><tr><td>1669178</td><td>"10111761572"</td><td>2</td><td>"BOVENS"</td><td>"BRICE"</td></tr></tbody></table></div>"
|
|
],
|
|
"text/plain": [
|
|
"shape: (10_110, 5)\n",
|
|
"┌─────────┬────────────────┬───────┬────────────────┬───────────────────┐\n",
|
|
"│ index ┆ Identifiant PP ┆ Count ┆ Nom d'exercice ┆ Prénom d'exercice │\n",
|
|
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
|
|
"│ u32 ┆ str ┆ u32 ┆ str ┆ str │\n",
|
|
"╞═════════╪════════════════╪═══════╪════════════════╪═══════════════════╡\n",
|
|
"│ 1393753 ┆ 10000034180 ┆ 2 ┆ DUWAT-GEORGES ┆ GHISLAINE │\n",
|
|
"│ 1393754 ┆ 10000034180 ┆ 2 ┆ GEORGES ┆ GHISLAINE │\n",
|
|
"│ 262 ┆ 10000040062 ┆ 2 ┆ MEYER ┆ Nicolas │\n",
|
|
"│ 263 ┆ 10000040062 ┆ 2 ┆ MEYER ┆ Nicolas │\n",
|
|
"│ 835639 ┆ 10000040062 ┆ 2 ┆ MEYER ┆ NICOLAS │\n",
|
|
"│ … ┆ … ┆ … ┆ … ┆ … │\n",
|
|
"│ 1947864 ┆ 10111726807 ┆ 2 ┆ BARES ┆ Valérie │\n",
|
|
"│ 275095 ┆ 10111753363 ┆ 2 ┆ BOUZIDI ┆ MOHAND │\n",
|
|
"│ 275096 ┆ 10111753363 ┆ 2 ┆ BOUZIDI ┆ Mohand │\n",
|
|
"│ 1112006 ┆ 10111761572 ┆ 2 ┆ BOVENS ┆ Brice │\n",
|
|
"│ 1669178 ┆ 10111761572 ┆ 2 ┆ BOVENS ┆ BRICE │\n",
|
|
"└─────────┴────────────────┴───────┴────────────────┴───────────────────┘"
|
|
]
|
|
},
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"df1 = (\n",
|
|
" df.with_columns(\n",
|
|
" (pd.col(\"Nom d'exercice\") + \" \" + pd.col(\"Prénom d'exercice\")).alias(\"Nom_Prénom\")\n",
|
|
" )\n",
|
|
" .with_columns(\n",
|
|
" pd.col(\"Nom_Prénom\").n_unique().over(\"Identifiant PP\").alias(\"Count\")\n",
|
|
" )\n",
|
|
" .filter(pd.col(\"Count\") > 1)\n",
|
|
" .sort([\"Identifiant PP\", \"index\"])\n",
|
|
" .select(\"index\", \"Identifiant PP\", \"Count\", \"Nom d'exercice\", \"Prénom d'exercice\")\n",
|
|
")\n",
|
|
"df1\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "c418a6ea7abd77b",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:45:55.090712Z",
|
|
"start_time": "2025-08-19T22:45:55.072647Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1.write_csv(f\"{folder}{output_file}-Names_Variations_Strict{output_extension}\",\n",
|
|
" separator='|',\n",
|
|
" quote_style=\"never\",\n",
|
|
" line_terminator='\\n')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "9d94b716364356c7",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:45:56.962873Z",
|
|
"start_time": "2025-08-19T22:45:55.259223Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: total: 2.31 s\n",
|
|
"Wall time: 509 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div><style>\n",
|
|
".dataframe > thead > tr,\n",
|
|
".dataframe > tbody > tr {\n",
|
|
" text-align: right;\n",
|
|
" white-space: pre-wrap;\n",
|
|
"}\n",
|
|
"</style>\n",
|
|
"<small>shape: (5_847, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Count</th><th>Nom d'exercice</th><th>Prénom d'exercice</th></tr><tr><td>u32</td><td>str</td><td>u32</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>1393753</td><td>"10000034180"</td><td>2</td><td>"DUWAT-GEORGES"</td><td>"GHISLAINE"</td></tr><tr><td>1393754</td><td>"10000034180"</td><td>2</td><td>"GEORGES"</td><td>"GHISLAINE"</td></tr><tr><td>1393833</td><td>"10000046051"</td><td>2</td><td>"STUDER"</td><td>"AGNES"</td></tr><tr><td>1672210</td><td>"10000046051"</td><td>2</td><td>"JURION"</td><td>"AGNES"</td></tr><tr><td>278448</td><td>"10000101518"</td><td>2</td><td>"BARREYRE"</td><td>"SANDRINE"</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>2224008</td><td>"10111558895"</td><td>2</td><td>"QUIROGA TENORIO DE CARVALHO"</td><td>"Rafaela"</td></tr><tr><td>831407</td><td>"10111667787"</td><td>2</td><td>"ANDY"</td><td>"MARIE MORGANE"</td></tr><tr><td>1389569</td><td>"10111667787"</td><td>2</td><td>"BARDIL"</td><td>"Morgane"</td></tr><tr><td>274737</td><td>"10111726807"</td><td>2</td><td>"COURET BARES"</td><td>"Valérie"</td></tr><tr><td>1947864</td><td>"10111726807"</td><td>2</td><td>"BARES"</td><td>"Valérie"</td></tr></tbody></table></div>"
|
|
],
|
|
"text/plain": [
|
|
"shape: (5_847, 5)\n",
|
|
"┌─────────┬────────────────┬───────┬─────────────────────────────┬───────────────────┐\n",
|
|
"│ index ┆ Identifiant PP ┆ Count ┆ Nom d'exercice ┆ Prénom d'exercice │\n",
|
|
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
|
|
"│ u32 ┆ str ┆ u32 ┆ str ┆ str │\n",
|
|
"╞═════════╪════════════════╪═══════╪═════════════════════════════╪═══════════════════╡\n",
|
|
"│ 1393753 ┆ 10000034180 ┆ 2 ┆ DUWAT-GEORGES ┆ GHISLAINE │\n",
|
|
"│ 1393754 ┆ 10000034180 ┆ 2 ┆ GEORGES ┆ GHISLAINE │\n",
|
|
"│ 1393833 ┆ 10000046051 ┆ 2 ┆ STUDER ┆ AGNES │\n",
|
|
"│ 1672210 ┆ 10000046051 ┆ 2 ┆ JURION ┆ AGNES │\n",
|
|
"│ 278448 ┆ 10000101518 ┆ 2 ┆ BARREYRE ┆ SANDRINE │\n",
|
|
"│ … ┆ … ┆ … ┆ … ┆ … │\n",
|
|
"│ 2224008 ┆ 10111558895 ┆ 2 ┆ QUIROGA TENORIO DE CARVALHO ┆ Rafaela │\n",
|
|
"│ 831407 ┆ 10111667787 ┆ 2 ┆ ANDY ┆ MARIE MORGANE │\n",
|
|
"│ 1389569 ┆ 10111667787 ┆ 2 ┆ BARDIL ┆ Morgane │\n",
|
|
"│ 274737 ┆ 10111726807 ┆ 2 ┆ COURET BARES ┆ Valérie │\n",
|
|
"│ 1947864 ┆ 10111726807 ┆ 2 ┆ BARES ┆ Valérie │\n",
|
|
"└─────────┴────────────────┴───────┴─────────────────────────────┴───────────────────┘"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"df2 = (\n",
|
|
" df.with_columns(\n",
|
|
" (pd.col(\"Nom d'exercice\").str.to_lowercase() + \" \" + pd.col(\"Prénom d'exercice\").str.to_lowercase()).alias(\"Nom_Prénom\")\n",
|
|
" )\n",
|
|
" .with_columns(\n",
|
|
" pd.col(\"Nom_Prénom\").n_unique().over(\"Identifiant PP\").alias(\"Count\")\n",
|
|
" )\n",
|
|
" .filter(pd.col(\"Count\") > 1)\n",
|
|
" .sort([\"Identifiant PP\", \"index\"])\n",
|
|
" .select(\"index\", \"Identifiant PP\", \"Count\", \"Nom d'exercice\", \"Prénom d'exercice\")\n",
|
|
")\n",
|
|
"df2\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "18aab4499103491a",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:45:57.433036Z",
|
|
"start_time": "2025-08-19T22:45:57.417970Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df2.write_csv(f\"{folder}{output_file}-Names_Variations_Insensitive{output_extension}\",\n",
|
|
" separator='|',\n",
|
|
" quote_style=\"never\",\n",
|
|
" line_terminator='\\n')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "8e4e3e22f16fea1c",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:46:02.915526Z",
|
|
"start_time": "2025-08-19T22:45:57.710258Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: total: 3.19 s\n",
|
|
"Wall time: 1.42 s\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div><style>\n",
|
|
".dataframe > thead > tr,\n",
|
|
".dataframe > tbody > tr {\n",
|
|
" text-align: right;\n",
|
|
" white-space: pre-wrap;\n",
|
|
"}\n",
|
|
"</style>\n",
|
|
"<small>shape: (3_894, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Count</th><th>Nom d'exercice</th><th>Prénom d'exercice</th><th>Nom_Prénom_Nettoyé</th></tr><tr><td>u32</td><td>str</td><td>u32</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>1393753</td><td>"10000034180"</td><td>2</td><td>"DUWAT-GEORGES"</td><td>"GHISLAINE"</td><td>"duwat georges ghislaine"</td></tr><tr><td>1393754</td><td>"10000034180"</td><td>2</td><td>"GEORGES"</td><td>"GHISLAINE"</td><td>"georges ghislaine"</td></tr><tr><td>1393833</td><td>"10000046051"</td><td>2</td><td>"STUDER"</td><td>"AGNES"</td><td>"studer agnes"</td></tr><tr><td>1672210</td><td>"10000046051"</td><td>2</td><td>"JURION"</td><td>"AGNES"</td><td>"jurion agnes"</td></tr><tr><td>278448</td><td>"10000101518"</td><td>2</td><td>"BARREYRE"</td><td>"SANDRINE"</td><td>"barreyre sandrine"</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>2224008</td><td>"10111558895"</td><td>2</td><td>"QUIROGA TENORIO DE CARVALHO"</td><td>"Rafaela"</td><td>"quiroga tenorio de carvalho ra…</td></tr><tr><td>831407</td><td>"10111667787"</td><td>2</td><td>"ANDY"</td><td>"MARIE MORGANE"</td><td>"andy marie morgane"</td></tr><tr><td>1389569</td><td>"10111667787"</td><td>2</td><td>"BARDIL"</td><td>"Morgane"</td><td>"bardil morgane"</td></tr><tr><td>274737</td><td>"10111726807"</td><td>2</td><td>"COURET BARES"</td><td>"Valérie"</td><td>"couret bares valerie"</td></tr><tr><td>1947864</td><td>"10111726807"</td><td>2</td><td>"BARES"</td><td>"Valérie"</td><td>"bares valerie"</td></tr></tbody></table></div>"
|
|
],
|
|
"text/plain": [
|
|
"shape: (3_894, 6)\n",
|
|
"┌─────────┬────────────────┬───────┬─────────────────────┬───────────────────┬─────────────────────┐\n",
|
|
"│ index ┆ Identifiant PP ┆ Count ┆ Nom d'exercice ┆ Prénom d'exercice ┆ Nom_Prénom_Nettoyé │\n",
|
|
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
|
|
"│ u32 ┆ str ┆ u32 ┆ str ┆ str ┆ str │\n",
|
|
"╞═════════╪════════════════╪═══════╪═════════════════════╪═══════════════════╪═════════════════════╡\n",
|
|
"│ 1393753 ┆ 10000034180 ┆ 2 ┆ DUWAT-GEORGES ┆ GHISLAINE ┆ duwat georges │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ghislaine │\n",
|
|
"│ 1393754 ┆ 10000034180 ┆ 2 ┆ GEORGES ┆ GHISLAINE ┆ georges ghislaine │\n",
|
|
"│ 1393833 ┆ 10000046051 ┆ 2 ┆ STUDER ┆ AGNES ┆ studer agnes │\n",
|
|
"│ 1672210 ┆ 10000046051 ┆ 2 ┆ JURION ┆ AGNES ┆ jurion agnes │\n",
|
|
"│ 278448 ┆ 10000101518 ┆ 2 ┆ BARREYRE ┆ SANDRINE ┆ barreyre sandrine │\n",
|
|
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
|
|
"│ 2224008 ┆ 10111558895 ┆ 2 ┆ QUIROGA TENORIO DE ┆ Rafaela ┆ quiroga tenorio de │\n",
|
|
"│ ┆ ┆ ┆ CARVALHO ┆ ┆ carvalho ra… │\n",
|
|
"│ 831407 ┆ 10111667787 ┆ 2 ┆ ANDY ┆ MARIE MORGANE ┆ andy marie morgane │\n",
|
|
"│ 1389569 ┆ 10111667787 ┆ 2 ┆ BARDIL ┆ Morgane ┆ bardil morgane │\n",
|
|
"│ 274737 ┆ 10111726807 ┆ 2 ┆ COURET BARES ┆ Valérie ┆ couret bares │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ valerie │\n",
|
|
"│ 1947864 ┆ 10111726807 ┆ 2 ┆ BARES ┆ Valérie ┆ bares valerie │\n",
|
|
"└─────────┴────────────────┴───────┴─────────────────────┴───────────────────┴─────────────────────┘"
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"df3 = (\n",
|
|
" df\n",
|
|
" .with_columns(\n",
|
|
" (\n",
|
|
" (pd.col(\"Nom d'exercice\") + \" \" + pd.col(\"Prénom d'exercice\"))\n",
|
|
" .str.to_lowercase()\n",
|
|
" # Normalisation des accents\n",
|
|
" .str.replace_all(\"à|á|â|ã|ä|å\", \"a\", literal=False)\n",
|
|
" .str.replace_all(\"ç\", \"c\", literal=False)\n",
|
|
" .str.replace_all(\"è|é|ê|ë\", \"e\", literal=False)\n",
|
|
" .str.replace_all(\"ì|í|î|ï\", \"i\", literal=False)\n",
|
|
" .str.replace_all(\"ñ\", \"n\", literal=False)\n",
|
|
" .str.replace_all(\"ò|ó|ô|õ|ö\", \"o\", literal=False)\n",
|
|
" .str.replace_all(\"ù|ú|û|ü\", \"u\", literal=False)\n",
|
|
" .str.replace_all(\"ý|ÿ\", \"y\", literal=False)\n",
|
|
" # Remplacement des caractères non-alphanumériques et nettoyage des espaces\n",
|
|
" .str.replace_all(r\"[^a-z0-9\\\\s]\", \" \", literal=False)\n",
|
|
" .str.replace_all(r\"\\\\s+\", \" \", literal=False)\n",
|
|
" .str.strip_chars()\n",
|
|
" ).alias(\"Nom_Prénom_Nettoyé\")\n",
|
|
" )\n",
|
|
" .with_columns(\n",
|
|
" pd.col(\"Nom_Prénom_Nettoyé\").n_unique().over(\"Identifiant PP\").alias(\"Count\")\n",
|
|
" )\n",
|
|
" .filter(pd.col(\"Count\") > 1)\n",
|
|
" .sort([\"Identifiant PP\", \"index\"])\n",
|
|
" .select(\"index\", \"Identifiant PP\", \"Count\", \"Nom d'exercice\", \"Prénom d'exercice\", \"Nom_Prénom_Nettoyé\")\n",
|
|
")\n",
|
|
"df3\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "aab2ae2e91a7190c",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:46:03.290835Z",
|
|
"start_time": "2025-08-19T22:46:03.280259Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df3.write_csv(f\"{folder}{output_file}-Names_Variations_Normalized{output_extension}\",\n",
|
|
" separator='|',\n",
|
|
" quote_style=\"never\",\n",
|
|
" line_terminator='\\n')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "3c2f2bb5fc3c2a5e",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:46:07.814563Z",
|
|
"start_time": "2025-08-19T22:46:03.493442Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: total: 3.78 s\n",
|
|
"Wall time: 645 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div><style>\n",
|
|
".dataframe > thead > tr,\n",
|
|
".dataframe > tbody > tr {\n",
|
|
" text-align: right;\n",
|
|
" white-space: pre-wrap;\n",
|
|
"}\n",
|
|
"</style>\n",
|
|
"<small>shape: (81, 31)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Site_Identifier</th><th>Numéro SIRET site</th><th>Identifiant technique de la structure</th><th>Site_Info_Variations_Count</th><th>Raison sociale site</th><th>Enseigne commerciale site</th><th>Complément destinataire (coord. structure)</th><th>Complément point géographique (coord. structure)</th><th>Numéro Voie (coord. structure)</th><th>Indice répétition voie (coord. structure)</th><th>Code type de voie (coord. structure)</th><th>Libellé type de voie (coord. structure)</th><th>Libellé Voie (coord. structure)</th><th>Mention distribution (coord. structure)</th><th>Bureau cedex (coord. structure)</th><th>Code postal (coord. structure)</th><th>Code commune (coord. structure)</th><th>Libellé commune (coord. structure)</th><th>Code pays (coord. structure)</th><th>Libellé pays (coord. structure)</th><th>Téléphone (coord. structure)</th><th>Téléphone 2 (coord. structure)</th><th>Télécopie (coord. structure)</th><th>Adresse e-mail (coord. structure)</th><th>Code Département (structure)</th><th>Libellé Département (structure)</th><th>Ancien identifiant de la structure</th><th>Code secteur d'activité</th><th>Libellé secteur d'activité</th></tr><tr><td>u32</td><td>str</td><td>str</td><td>str</td><td>str</td><td>u32</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>278582</td><td>"10000116383"</td><td>"30980220500505"</td><td>"30980220500505"</td><td>"R10100000198782"</td><td>2</td><td>"OEUVRES HOSPITALIERES FRANCAIS…</td><td>"ORDRE DE MALTE FRANCE"</td><td>"ORDRE DE MALTE FRANCE"</td><td>null</td><td>"42"</td><td>null</td><td>null</td><td>null</td><td>"RUE DES VOLONTAIRES"</td><td>null</td><td>"75015 PARIS"</td><td>"75015"</td><td>"75056"</td><td>"Paris"</td><td>"99000"</td><td>"France"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"330980220500505"</td><td>"SA28"</td><td>"Asso et orga humanitaire"</td></tr><tr><td>1394285</td><td>"10000116383"</td><td>"30980220500505"</td><td>"30980220500505"</td><td>"R10100000779807"</td><td>2</td><td>"OEUVRE HOSP FRANC DE L'ORDRE D…</td><td>null</td><td>null</td><td>null</td><td>"49"</td><td>null</td><td>"R"</td><td>"Rue"</td><td>"DE LA CHAPELLE"</td><td>null</td><td>"75018 PARIS 18E ARRONDISSEMEN…</td><td>"75018"</td><td>"75118"</td><td>"Paris 18e Arrondissement"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"330980220500505"</td><td>"SA28"</td><td>"Asso et orga humanitaire"</td></tr><tr><td>840759</td><td>"10000667187"</td><td>"18003502402369"</td><td>"18003502402369"</td><td>"R10100000049794"</td><td>3</td><td>"DRSM NORD PICARDIEELSM 59"</td><td>"SITE MAUBEUGE"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"PL"</td><td>"Place"</td><td>"DE WATTIGNIES"</td><td>"BP"</td><td>"59603 MAUBEUGE"</td><td>"59603"</td><td>"59392"</td><td>"Maubeuge"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"318003502402369"</td><td>"SA24"</td><td>"Organisme de Sécurité Sociale"</td></tr><tr><td>1677281</td><td>"10000667187"</td><td>"18003502402369"</td><td>"18003502402369"</td><td>"R10100000050000"</td><td>3</td><td>"DRSM NORD PICARDIEELSM 59"</td><td>"SITE VALENCIENNES"</td><td>null</td><td>null</td><td>"2"</td><td>null</td><td>"PL"</td><td>"Place"</td><td>"DE LA REPUBLIQUE"</td><td>"BP"</td><td>"59304 VALENCIENNES"</td><td>"59304"</td><td>"59606"</td><td>"Valenciennes"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"318003502402369"</td><td>"SA24"</td><td>"Organisme de Sécurité Sociale"</td></tr><tr><td>1956358</td><td>"10000667187"</td><td>"18003502402369"</td><td>"18003502402369"</td><td>"R10100000049793"</td><td>3</td><td>"DRSM NORD PICARDIEELSM 59"</td><td>"SITE CAMBRAI"</td><td>null</td><td>null</td><td>"10"</td><td>null</td><td>"R"</td><td>"Rue"</td><td>"SAINT LAZARE"</td><td>"BP"</td><td>"59403 CAMBRAI"</td><td>"59403"</td><td>"59122"</td><td>"Cambrai"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"318003502402369"</td><td>"SA24"</td><td>"Organisme de Sécurité Sociale"</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>539023</td><td>"10110592168"</td><td>"88085935000014"</td><td>"88085935000014"</td><td>"R10100000325887"</td><td>2</td><td>"FAREVA PAU"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"AV"</td><td>"Avenue"</td><td>"DU BEARN"</td><td>null</td><td>"64320 IDRON"</td><td>"64320"</td><td>"64269"</td><td>"Idron"</td><td>null</td><td>null</td><td>"0559402100"</td><td>null</td><td>"0559402119"</td><td>null</td><td>null</td><td>null</td><td>"388085935000014"</td><td>"SA32"</td><td>"Fab. Exploit. Import. Méd. DM"</td></tr><tr><td>260720</td><td>"10110597498"</td><td>"13000800600038"</td><td>"13000800600038"</td><td>"R10100000097229"</td><td>2</td><td>"AGENCE REGIONALE SANTE PAYS LO…</td><td>null</td><td>"DELEGATION TERRITORIALE"</td><td>null</td><td>"2"</td><td>null</td><td>null</td><td>null</td><td>"BOULEVARD MURAT"</td><td>null</td><td>"53000 LAVAL"</td><td>"53000"</td><td>"53130"</td><td>"Laval"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"313000800600038"</td><td>"SA24"</td><td>"Organisme de Sécurité Sociale"</td></tr><tr><td>1376341</td><td>"10110597498"</td><td>"13000800600038"</td><td>"13000800600038"</td><td>"R10100000097229"</td><td>2</td><td>"AGENCE REGIONALE SANTE PAYS LO…</td><td>null</td><td>null</td><td>null</td><td>"17"</td><td>null</td><td>"BD"</td><td>"Boulevard"</td><td>"GASTON DOUMERGUE"</td><td>null</td><td>"44262 NANTES"</td><td>"44262"</td><td>"44109"</td><td>"Nantes"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"313000800600038"</td><td>"SA24"</td><td>"Organisme de Sécurité Sociale"</td></tr><tr><td>1658604</td><td>"10110910345"</td><td>"18003502401098"</td><td>"18003502401098"</td><td>"R10100000398898"</td><td>2</td><td>"CAISSE NATIONALE DE L'ASSURANC…</td><td>"DRSM DIRECTION REG. DU SERVICE…</td><td>"QUARTIER DU LAC"</td><td>null</td><td>"80"</td><td>null</td><td>null</td><td>null</td><td>"AVENUE DE LA JALLERE"</td><td>"BP 260"</td><td>"33300 BORDEAUX"</td><td>"33300"</td><td>"33063"</td><td>"Bordeaux"</td><td>"99000"</td><td>"France"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"318003502401098"</td><td>"SA24"</td><td>"Organisme de Sécurité Sociale"</td></tr><tr><td>1937582</td><td>"10110910345"</td><td>"18003502401098"</td><td>"18003502401098"</td><td>"R10100000398898"</td><td>2</td><td>"CAISSE NATIONALE DE L'ASSURANC…</td><td>"DRSM DIRECTION REG. DU SERVICE…</td><td>null</td><td>null</td><td>"207"</td><td>null</td><td>"R"</td><td>"Rue"</td><td>"FONTAINEBLEAU"</td><td>"BP"</td><td>"40011 MONT-DE-MARSAN"</td><td>"40011"</td><td>"40192"</td><td>"Mont-de-Marsan"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"318003502401098"</td><td>"SA24"</td><td>"Organisme de Sécurité Sociale"</td></tr></tbody></table></div>"
|
|
],
|
|
"text/plain": [
|
|
"shape: (81, 31)\n",
|
|
"┌─────────┬────────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n",
|
|
"│ index ┆ Identifian ┆ Site_Iden ┆ Numéro ┆ … ┆ Libellé ┆ Ancien ┆ Code ┆ Libellé │\n",
|
|
"│ --- ┆ t PP ┆ tifier ┆ SIRET ┆ ┆ Départeme ┆ identifia ┆ secteur ┆ secteur │\n",
|
|
"│ u32 ┆ --- ┆ --- ┆ site ┆ ┆ nt (struc ┆ nt de la ┆ d'activit ┆ d'activit │\n",
|
|
"│ ┆ str ┆ str ┆ --- ┆ ┆ ture… ┆ struc… ┆ é ┆ é │\n",
|
|
"│ ┆ ┆ ┆ str ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ str ┆ str ┆ str ┆ str │\n",
|
|
"╞═════════╪════════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n",
|
|
"│ 278582 ┆ 1000011638 ┆ 309802205 ┆ 309802205 ┆ … ┆ null ┆ 330980220 ┆ SA28 ┆ Asso et │\n",
|
|
"│ ┆ 3 ┆ 00505 ┆ 00505 ┆ ┆ ┆ 500505 ┆ ┆ orga huma │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ nitaire │\n",
|
|
"│ 1394285 ┆ 1000011638 ┆ 309802205 ┆ 309802205 ┆ … ┆ null ┆ 330980220 ┆ SA28 ┆ Asso et │\n",
|
|
"│ ┆ 3 ┆ 00505 ┆ 00505 ┆ ┆ ┆ 500505 ┆ ┆ orga huma │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ nitaire │\n",
|
|
"│ 840759 ┆ 1000066718 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n",
|
|
"│ ┆ 7 ┆ 02369 ┆ 02369 ┆ ┆ ┆ 402369 ┆ ┆ de │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
|
|
"│ 1677281 ┆ 1000066718 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n",
|
|
"│ ┆ 7 ┆ 02369 ┆ 02369 ┆ ┆ ┆ 402369 ┆ ┆ de │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
|
|
"│ 1956358 ┆ 1000066718 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n",
|
|
"│ ┆ 7 ┆ 02369 ┆ 02369 ┆ ┆ ┆ 402369 ┆ ┆ de │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
|
|
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
|
|
"│ 539023 ┆ 1011059216 ┆ 880859350 ┆ 880859350 ┆ … ┆ null ┆ 388085935 ┆ SA32 ┆ Fab. │\n",
|
|
"│ ┆ 8 ┆ 00014 ┆ 00014 ┆ ┆ ┆ 000014 ┆ ┆ Exploit. │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n",
|
|
"│ 260720 ┆ 1011059749 ┆ 130008006 ┆ 130008006 ┆ … ┆ null ┆ 313000800 ┆ SA24 ┆ Organisme │\n",
|
|
"│ ┆ 8 ┆ 00038 ┆ 00038 ┆ ┆ ┆ 600038 ┆ ┆ de │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
|
|
"│ 1376341 ┆ 1011059749 ┆ 130008006 ┆ 130008006 ┆ … ┆ null ┆ 313000800 ┆ SA24 ┆ Organisme │\n",
|
|
"│ ┆ 8 ┆ 00038 ┆ 00038 ┆ ┆ ┆ 600038 ┆ ┆ de │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
|
|
"│ 1658604 ┆ 1011091034 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n",
|
|
"│ ┆ 5 ┆ 01098 ┆ 01098 ┆ ┆ ┆ 401098 ┆ ┆ de │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
|
|
"│ 1937582 ┆ 1011091034 ┆ 180035024 ┆ 180035024 ┆ … ┆ null ┆ 318003502 ┆ SA24 ┆ Organisme │\n",
|
|
"│ ┆ 5 ┆ 01098 ┆ 01098 ┆ ┆ ┆ 401098 ┆ ┆ de │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sécurité │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Sociale │\n",
|
|
"└─────────┴────────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘"
|
|
]
|
|
},
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"all_columns = df.columns\n",
|
|
"start_col = 'Raison sociale site'\n",
|
|
"end_col = \"Libellé secteur d'activité\"\n",
|
|
"start_col_index = all_columns.index(start_col)\n",
|
|
"end_col_index = all_columns.index(end_col)\n",
|
|
"site_info_cols = all_columns[start_col_index : end_col_index + 1]\n",
|
|
"if \"Autorité d'enregistrement\" in site_info_cols:\n",
|
|
" site_info_cols.remove(\"Autorité d'enregistrement\")\n",
|
|
"\n",
|
|
"df4 = (\n",
|
|
" df\n",
|
|
" .filter(pd.col('Numéro FINESS site').is_null())\n",
|
|
" .filter(\n",
|
|
" pd.col('Numéro SIRET site').is_not_null() | pd.col('Identifiant technique de la structure').is_not_null()\n",
|
|
" )\n",
|
|
" .with_columns(\n",
|
|
" pd.coalesce(\n",
|
|
" pd.col('Numéro SIRET site'),\n",
|
|
" pd.col('Identifiant technique de la structure')\n",
|
|
" ).alias('Site_Identifier')\n",
|
|
" )\n",
|
|
" .with_columns(\n",
|
|
" pd.struct(site_info_cols).n_unique().over(['Identifiant PP', 'Site_Identifier']).alias('Site_Info_Variations_Count')\n",
|
|
" )\n",
|
|
" .filter(pd.col('Site_Info_Variations_Count') > 1)\n",
|
|
" .sort(['Identifiant PP', 'Site_Identifier', 'index'])\n",
|
|
" .select(['index', \n",
|
|
" 'Identifiant PP', \n",
|
|
" 'Site_Identifier', \n",
|
|
" 'Numéro SIRET site', \n",
|
|
" 'Identifiant technique de la structure', \n",
|
|
" 'Site_Info_Variations_Count'] + site_info_cols)\n",
|
|
")\n",
|
|
"df4"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "c1fd01e419f4ccc9",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:46:07.974271Z",
|
|
"start_time": "2025-08-19T22:46:07.943280Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df4.write_csv(f\"{folder}{output_file}-Sites_Variations{output_extension}\",\n",
|
|
" separator='|',\n",
|
|
" quote_style=\"never\",\n",
|
|
" line_terminator='\\n')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "7838523925fc85ee",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:46:12.781888Z",
|
|
"start_time": "2025-08-19T22:46:08.306776Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: total: 2.84 s\n",
|
|
"Wall time: 596 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div><style>\n",
|
|
".dataframe > thead > tr,\n",
|
|
".dataframe > tbody > tr {\n",
|
|
" text-align: right;\n",
|
|
" white-space: pre-wrap;\n",
|
|
"}\n",
|
|
"</style>\n",
|
|
"<small>shape: (4_350, 31)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Site_Identifier</th><th>Numéro SIRET site</th><th>Identifiant technique de la structure</th><th>Site_Info_Variations_Count</th><th>Raison sociale site</th><th>Enseigne commerciale site</th><th>Complément destinataire (coord. structure)</th><th>Complément point géographique (coord. structure)</th><th>Numéro Voie (coord. structure)</th><th>Indice répétition voie (coord. structure)</th><th>Code type de voie (coord. structure)</th><th>Libellé type de voie (coord. structure)</th><th>Libellé Voie (coord. structure)</th><th>Mention distribution (coord. structure)</th><th>Bureau cedex (coord. structure)</th><th>Code postal (coord. structure)</th><th>Code commune (coord. structure)</th><th>Libellé commune (coord. structure)</th><th>Code pays (coord. structure)</th><th>Libellé pays (coord. structure)</th><th>Téléphone (coord. structure)</th><th>Téléphone 2 (coord. structure)</th><th>Télécopie (coord. structure)</th><th>Adresse e-mail (coord. structure)</th><th>Code Département (structure)</th><th>Libellé Département (structure)</th><th>Ancien identifiant de la structure</th><th>Code secteur d'activité</th><th>Libellé secteur d'activité</th></tr><tr><td>u32</td><td>str</td><td>str</td><td>str</td><td>str</td><td>u32</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>126741</td><td>"10100002293"</td><td>"05650171100115"</td><td>"05650171100115"</td><td>"R10000001502146"</td><td>2</td><td>"BECTON DICKINSON FRANCE"</td><td>null</td><td>null</td><td>null</td><td>"11"</td><td>null</td><td>"R"</td><td>"Rue"</td><td>"RUE ARISTIDE BERGES"</td><td>null</td><td>"38801 LE PONT DE CLAIX CEDEX"</td><td>"38801"</td><td>"38317"</td><td>"Le Pont-de-Claix"</td><td>"99000"</td><td>"France"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"305650171100115"</td><td>"SA32"</td><td>"Fab. Exploit. Import. Méd. DM"</td></tr><tr><td>292886</td><td>"10001806768"</td><td>"05650171100115"</td><td>"05650171100115"</td><td>"R10000001502146"</td><td>2</td><td>"BECTON DICKINSON FRANCE"</td><td>null</td><td>null</td><td>null</td><td>"11"</td><td>null</td><td>"R"</td><td>"Rue"</td><td>"RUE ARISTIDE BERGES"</td><td>null</td><td>"38801 LE PONT DE CLAIX CEDEX"</td><td>"38801"</td><td>"38317"</td><td>"Le Pont-de-Claix"</td><td>"99000"</td><td>"France"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"305650171100115"</td><td>"SA32"</td><td>"Fab. Exploit. Import. Méd. DM"</td></tr><tr><td>473490</td><td>"10104800411"</td><td>"05650171100115"</td><td>"05650171100115"</td><td>"R10000001502146"</td><td>2</td><td>"BECTON DICKINSON FRANCE"</td><td>null</td><td>null</td><td>null</td><td>"11"</td><td>null</td><td>"R"</td><td>"Rue"</td><td>"RUE ARISTIDE BERGES"</td><td>null</td><td>"38801 LE PONT DE CLAIX CEDEX"</td><td>"38801"</td><td>"38317"</td><td>"Le Pont-de-Claix"</td><td>"99000"</td><td>"France"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"305650171100115"</td><td>"SA32"</td><td>"Fab. Exploit. Import. Méd. DM"</td></tr><tr><td>738289</td><td>"10103687157"</td><td>"05650171100115"</td><td>"05650171100115"</td><td>"R10000001502146"</td><td>2</td><td>"BECTON DICKINSON FRANCE"</td><td>null</td><td>null</td><td>null</td><td>"11"</td><td>null</td><td>"R"</td><td>"Rue"</td><td>"RUE ARISTIDE BERGES"</td><td>null</td><td>"38801 LE PONT DE CLAIX CEDEX"</td><td>"38801"</td><td>"38317"</td><td>"Le Pont-de-Claix"</td><td>"99000"</td><td>"France"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"305650171100115"</td><td>"SA32"</td><td>"Fab. Exploit. Import. Méd. DM"</td></tr><tr><td>850421</td><td>"10001796597"</td><td>"05650171100115"</td><td>"05650171100115"</td><td>"R10000001502146"</td><td>2</td><td>"BECTON DICKINSON FRANCE"</td><td>null</td><td>null</td><td>null</td><td>"11"</td><td>null</td><td>"R"</td><td>"Rue"</td><td>"RUE ARISTIDE BERGES"</td><td>null</td><td>"38801 LE PONT DE CLAIX CEDEX"</td><td>"38801"</td><td>"38317"</td><td>"Le Pont-de-Claix"</td><td>"99000"</td><td>"France"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"305650171100115"</td><td>"SA32"</td><td>"Fab. Exploit. Import. Méd. DM"</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>808716</td><td>"10109869403"</td><td>"98452619400019"</td><td>"98452619400019"</td><td>"R10100000673943"</td><td>2</td><td>"SELARL CENTRE DE PODOLOGIE SPO…</td><td>"SELARL CENTRE DE PODOLOGIE SPO…</td><td>null</td><td>null</td><td>"8"</td><td>null</td><td>"R"</td><td>"Rue"</td><td>"GEORGES NEGREVERGNE"</td><td>null</td><td>"33700 MERIGNAC"</td><td>"33700"</td><td>"33281"</td><td>"Mérignac"</td><td>"99000"</td><td>"France"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"398452619400019"</td><td>"SA09"</td><td>"Exercice en Société"</td></tr><tr><td>1541009</td><td>"10101293263"</td><td>"98452619400019"</td><td>"98452619400019"</td><td>"R10100000673943"</td><td>2</td><td>"SELARL CENTRE DE PODOLOGIE SPO…</td><td>"SELARL CENTRE DE PODOLOGIE SPO…</td><td>null</td><td>null</td><td>"8"</td><td>null</td><td>"R"</td><td>"Rue"</td><td>"GEORGES NEGREVERGNE"</td><td>null</td><td>"33700 MERIGNAC"</td><td>"33700"</td><td>"33281"</td><td>"Mérignac"</td><td>"99000"</td><td>"France"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"398452619400019"</td><td>"SA09"</td><td>"Exercice en Société"</td></tr><tr><td>1900660</td><td>"10108015131"</td><td>"98452619400019"</td><td>"98452619400019"</td><td>"R10100000673943"</td><td>2</td><td>"SELARL CENTRE DE PODOLOGIE SPO…</td><td>"SELARL CENTRE DE PODOLOGIE SPO…</td><td>null</td><td>null</td><td>"8"</td><td>null</td><td>"R"</td><td>"Rue"</td><td>"GEORGES NEGREVERGNE"</td><td>null</td><td>"33700 MERIGNAC"</td><td>"33700"</td><td>"33281"</td><td>"Mérignac"</td><td>"99000"</td><td>"France"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"398452619400019"</td><td>"SA09"</td><td>"Exercice en Société"</td></tr><tr><td>1611683</td><td>"10107235243"</td><td>"99882350430834"</td><td>"99882350430834"</td><td>"R10100000554688"</td><td>2</td><td>"ADECCO FRANCE"</td><td>"ADECCO"</td><td>null</td><td>"PARC VALMY PARK AVENUE BAT A 1…</td><td>"8"</td><td>"D"</td><td>null</td><td>null</td><td>"RUE JEANNE BARRET"</td><td>null</td><td>"21000 DIJON"</td><td>"21000"</td><td>"21231"</td><td>"Dijon"</td><td>"99000"</td><td>"France"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"399882350430834"</td><td>"SA11"</td><td>"Entreprise d'intérim"</td></tr><tr><td>1630338</td><td>"10108693036"</td><td>"99882350430834"</td><td>"99882350430834"</td><td>"R10100000413248"</td><td>2</td><td>"ADECCO MEDICAL"</td><td>null</td><td>null</td><td>null</td><td>"8"</td><td>"D"</td><td>"R"</td><td>"Rue"</td><td>"JEANNE BARRET"</td><td>null</td><td>"21000 DIJON"</td><td>"21000"</td><td>"21231"</td><td>"Dijon"</td><td>"99000"</td><td>"France"</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>"399882350430834"</td><td>"SA11"</td><td>"Entreprise d'intérim"</td></tr></tbody></table></div>"
|
|
],
|
|
"text/plain": [
|
|
"shape: (4_350, 31)\n",
|
|
"┌─────────┬────────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n",
|
|
"│ index ┆ Identifian ┆ Site_Iden ┆ Numéro ┆ … ┆ Libellé ┆ Ancien ┆ Code ┆ Libellé │\n",
|
|
"│ --- ┆ t PP ┆ tifier ┆ SIRET ┆ ┆ Départeme ┆ identifia ┆ secteur ┆ secteur │\n",
|
|
"│ u32 ┆ --- ┆ --- ┆ site ┆ ┆ nt (struc ┆ nt de la ┆ d'activit ┆ d'activit │\n",
|
|
"│ ┆ str ┆ str ┆ --- ┆ ┆ ture… ┆ struc… ┆ é ┆ é │\n",
|
|
"│ ┆ ┆ ┆ str ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ str ┆ str ┆ str ┆ str │\n",
|
|
"╞═════════╪════════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n",
|
|
"│ 126741 ┆ 1010000229 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n",
|
|
"│ ┆ 3 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n",
|
|
"│ 292886 ┆ 1000180676 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n",
|
|
"│ ┆ 8 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n",
|
|
"│ 473490 ┆ 1010480041 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n",
|
|
"│ ┆ 1 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n",
|
|
"│ 738289 ┆ 1010368715 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n",
|
|
"│ ┆ 7 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n",
|
|
"│ 850421 ┆ 1000179659 ┆ 056501711 ┆ 056501711 ┆ … ┆ null ┆ 305650171 ┆ SA32 ┆ Fab. │\n",
|
|
"│ ┆ 7 ┆ 00115 ┆ 00115 ┆ ┆ ┆ 100115 ┆ ┆ Exploit. │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Import. │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Méd. DM │\n",
|
|
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
|
|
"│ 808716 ┆ 1010986940 ┆ 984526194 ┆ 984526194 ┆ … ┆ null ┆ 398452619 ┆ SA09 ┆ Exercice │\n",
|
|
"│ ┆ 3 ┆ 00019 ┆ 00019 ┆ ┆ ┆ 400019 ┆ ┆ en │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Société │\n",
|
|
"│ 1541009 ┆ 1010129326 ┆ 984526194 ┆ 984526194 ┆ … ┆ null ┆ 398452619 ┆ SA09 ┆ Exercice │\n",
|
|
"│ ┆ 3 ┆ 00019 ┆ 00019 ┆ ┆ ┆ 400019 ┆ ┆ en │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Société │\n",
|
|
"│ 1900660 ┆ 1010801513 ┆ 984526194 ┆ 984526194 ┆ … ┆ null ┆ 398452619 ┆ SA09 ┆ Exercice │\n",
|
|
"│ ┆ 1 ┆ 00019 ┆ 00019 ┆ ┆ ┆ 400019 ┆ ┆ en │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ Société │\n",
|
|
"│ 1611683 ┆ 1010723524 ┆ 998823504 ┆ 998823504 ┆ … ┆ null ┆ 399882350 ┆ SA11 ┆ Entrepris │\n",
|
|
"│ ┆ 3 ┆ 30834 ┆ 30834 ┆ ┆ ┆ 430834 ┆ ┆ e │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ d'intérim │\n",
|
|
"│ 1630338 ┆ 1010869303 ┆ 998823504 ┆ 998823504 ┆ … ┆ null ┆ 399882350 ┆ SA11 ┆ Entrepris │\n",
|
|
"│ ┆ 6 ┆ 30834 ┆ 30834 ┆ ┆ ┆ 430834 ┆ ┆ e │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ d'intérim │\n",
|
|
"└─────────┴────────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘"
|
|
]
|
|
},
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"df5 = (\n",
|
|
" df\n",
|
|
" .filter(pd.col('Numéro FINESS site').is_null())\n",
|
|
" .filter(\n",
|
|
" pd.col('Numéro SIRET site').is_not_null() | pd.col('Identifiant technique de la structure').is_not_null()\n",
|
|
" )\n",
|
|
" .with_columns(\n",
|
|
" pd.coalesce(\n",
|
|
" pd.col('Numéro SIRET site'),\n",
|
|
" pd.col('Identifiant technique de la structure')\n",
|
|
" ).alias('Site_Identifier')\n",
|
|
" )\n",
|
|
" .with_columns(\n",
|
|
" pd.struct(site_info_cols).n_unique().over(['Site_Identifier']).alias('Site_Info_Variations_Count')\n",
|
|
" )\n",
|
|
" .filter(pd.col('Site_Info_Variations_Count') > 1)\n",
|
|
" .sort(['Site_Identifier', 'index'])\n",
|
|
" .select(['index', \n",
|
|
" 'Identifiant PP', \n",
|
|
" 'Site_Identifier', \n",
|
|
" 'Numéro SIRET site', \n",
|
|
" 'Identifiant technique de la structure', \n",
|
|
" 'Site_Info_Variations_Count'] + site_info_cols)\n",
|
|
")\n",
|
|
"df5\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "416184f32f973a71",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:46:13.670911Z",
|
|
"start_time": "2025-08-19T22:46:13.655386Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df5.write_csv(f\"{folder}{output_file}-Sites_Variations_Global{output_extension}\",\n",
|
|
" separator='|',\n",
|
|
" quote_style=\"never\",\n",
|
|
" line_terminator='\\n')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "84549f83ce5e92f",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:46:17.023811Z",
|
|
"start_time": "2025-08-19T22:46:14.032470Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: total: 3.77 s\n",
|
|
"Wall time: 685 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div><style>\n",
|
|
".dataframe > thead > tr,\n",
|
|
".dataframe > tbody > tr {\n",
|
|
" text-align: right;\n",
|
|
" white-space: pre-wrap;\n",
|
|
"}\n",
|
|
"</style>\n",
|
|
"<small>shape: (25_105, 12)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Site_Identifier_Global</th><th>Numéro FINESS site</th><th>Numéro SIRET site</th><th>Identifiant technique de la structure</th><th>Activites_Count</th><th>Libellé profession</th><th>Libellé savoir-faire</th><th>Libellé mode exercice</th><th>Libellé rôle</th><th>Libellé genre activité</th></tr><tr><td>u32</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td><td>u32</td><td>str</td><td>str</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>1393660</td><td>"10000017979"</td><td>"130786445"</td><td>"130786445"</td><td>"30247736900011"</td><td>"F130786445"</td><td>2</td><td>"Ostéopathe"</td><td>null</td><td>"Salarié"</td><td>"Salarié en poste fixe"</td><td>"Activité standard de soin ou d…</td></tr><tr><td>1951128</td><td>"10000017979"</td><td>"130786445"</td><td>"130786445"</td><td>"30247736900011"</td><td>"F130786445"</td><td>2</td><td>"Sage-Femme"</td><td>null</td><td>"Salarié"</td><td>"Fonction non définie"</td><td>"Activité standard de soin ou d…</td></tr><tr><td>556685</td><td>"10000086842"</td><td>"860012228"</td><td>"860012228"</td><td>"13001256000038"</td><td>"F860012228"</td><td>2</td><td>"Psychologue"</td><td>null</td><td>"Salarié"</td><td>"Salarié en poste fixe"</td><td>"Activité standard de soin ou d…</td></tr><tr><td>1951541</td><td>"10000086842"</td><td>"860012228"</td><td>"860012228"</td><td>"13001256000038"</td><td>"F860012228"</td><td>2</td><td>"Psychothérapeute"</td><td>null</td><td>"Salarié"</td><td>"Salarié en poste fixe"</td><td>"Activité standard de soin ou d…</td></tr><tr><td>278389</td><td>"10000090869"</td><td>"110000023"</td><td>"110000023"</td><td>"26110002800149"</td><td>"F110000023"</td><td>2</td><td>"Sage-Femme"</td><td>null</td><td>"Salarié"</td><td>"Fonction non définie"</td><td>"Activité standard de soin ou d…</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>2225628</td><td>"10111685458"</td><td>"670000025"</td><td>"670000025"</td><td>"26670057400012"</td><td>"F670000025"</td><td>2</td><td>"Chirurgien-Dentiste"</td><td>null</td><td>"Salarié"</td><td>"Salarié en poste fixe"</td><td>"Activité standard de soin ou d…</td></tr><tr><td>275095</td><td>"10111753363"</td><td>"13002370800014"</td><td>null</td><td>"13002370800014"</td><td>"R10100000794696"</td><td>2</td><td>"Infirmier"</td><td>null</td><td>"Salarié"</td><td>"Salarié en poste fixe"</td><td>"Activité standard de soin ou d…</td></tr><tr><td>275096</td><td>"10111753363"</td><td>"13002370800014"</td><td>null</td><td>"13002370800014"</td><td>"R10100000794696"</td><td>2</td><td>"Psychologue"</td><td>null</td><td>"Salarié"</td><td>"Salarié en poste fixe"</td><td>"Activité standard de soin ou d…</td></tr><tr><td>276592</td><td>"10111877634"</td><td>"210987632"</td><td>"210987632"</td><td>"26210006800010"</td><td>"F210987632"</td><td>2</td><td>"Psychothérapeute"</td><td>null</td><td>"Salarié"</td><td>"Salarié en poste fixe"</td><td>"Activité standard de soin ou d…</td></tr><tr><td>1392260</td><td>"10111877634"</td><td>"210987632"</td><td>"210987632"</td><td>"26210006800010"</td><td>"F210987632"</td><td>2</td><td>"Psychologue"</td><td>null</td><td>"Salarié"</td><td>"Salarié en poste fixe"</td><td>"Activité standard de soin ou d…</td></tr></tbody></table></div>"
|
|
],
|
|
"text/plain": [
|
|
"shape: (25_105, 12)\n",
|
|
"┌─────────┬────────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n",
|
|
"│ index ┆ Identifian ┆ Site_Iden ┆ Numéro ┆ … ┆ Libellé ┆ Libellé ┆ Libellé ┆ Libellé │\n",
|
|
"│ --- ┆ t PP ┆ tifier_Gl ┆ FINESS ┆ ┆ savoir-fa ┆ mode ┆ rôle ┆ genre │\n",
|
|
"│ u32 ┆ --- ┆ obal ┆ site ┆ ┆ ire ┆ exercice ┆ --- ┆ activité │\n",
|
|
"│ ┆ str ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ str ┆ --- │\n",
|
|
"│ ┆ ┆ str ┆ str ┆ ┆ str ┆ str ┆ ┆ str │\n",
|
|
"╞═════════╪════════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n",
|
|
"│ 1393660 ┆ 1000001797 ┆ 130786445 ┆ 130786445 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
|
|
"│ ┆ 9 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
|
|
"│ 1951128 ┆ 1000001797 ┆ 130786445 ┆ 130786445 ┆ … ┆ null ┆ Salarié ┆ Fonction ┆ Activité │\n",
|
|
"│ ┆ 9 ┆ ┆ ┆ ┆ ┆ ┆ non ┆ standard │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ définie ┆ de soin │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
|
|
"│ 556685 ┆ 1000008684 ┆ 860012228 ┆ 860012228 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
|
|
"│ ┆ 2 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
|
|
"│ 1951541 ┆ 1000008684 ┆ 860012228 ┆ 860012228 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
|
|
"│ ┆ 2 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
|
|
"│ 278389 ┆ 1000009086 ┆ 110000023 ┆ 110000023 ┆ … ┆ null ┆ Salarié ┆ Fonction ┆ Activité │\n",
|
|
"│ ┆ 9 ┆ ┆ ┆ ┆ ┆ ┆ non ┆ standard │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ définie ┆ de soin │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
|
|
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
|
|
"│ 2225628 ┆ 1011168545 ┆ 670000025 ┆ 670000025 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
|
|
"│ ┆ 8 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
|
|
"│ 275095 ┆ 1011175336 ┆ 130023708 ┆ null ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
|
|
"│ ┆ 3 ┆ 00014 ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
|
|
"│ 275096 ┆ 1011175336 ┆ 130023708 ┆ null ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
|
|
"│ ┆ 3 ┆ 00014 ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
|
|
"│ 276592 ┆ 1011187763 ┆ 210987632 ┆ 210987632 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
|
|
"│ ┆ 4 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
|
|
"│ 1392260 ┆ 1011187763 ┆ 210987632 ┆ 210987632 ┆ … ┆ null ┆ Salarié ┆ Salarié ┆ Activité │\n",
|
|
"│ ┆ 4 ┆ ┆ ┆ ┆ ┆ ┆ en poste ┆ standard │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fixe ┆ de soin │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ou d… │\n",
|
|
"└─────────┴────────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘"
|
|
]
|
|
},
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"df6 = (\n",
|
|
" df\n",
|
|
" .with_columns(\n",
|
|
" pd.coalesce(\n",
|
|
" pd.col('Numéro FINESS site'),\n",
|
|
" pd.col('Numéro SIRET site'),\n",
|
|
" pd.col('Identifiant technique de la structure')\n",
|
|
" ).alias('Site_Identifier_Global')\n",
|
|
" )\n",
|
|
" .filter(pd.col('Site_Identifier_Global').is_not_null())\n",
|
|
" .with_columns(\n",
|
|
" pd.struct([\n",
|
|
" \"Libellé profession\",\n",
|
|
" \"Libellé savoir-faire\",\n",
|
|
" \"Libellé mode exercice\",\n",
|
|
" \"Libellé rôle\",\n",
|
|
" \"Libellé genre activité\"\n",
|
|
" ]).n_unique().over([\"Identifiant PP\", \"Site_Identifier_Global\"]).alias(\"Activites_Count\")\n",
|
|
" )\n",
|
|
" .filter(pd.col(\"Activites_Count\") > 1)\n",
|
|
" .sort([\"Identifiant PP\", \"Site_Identifier_Global\", \"index\"])\n",
|
|
" .select([\n",
|
|
" \"index\",\n",
|
|
" \"Identifiant PP\",\n",
|
|
" \"Site_Identifier_Global\",\n",
|
|
" \"Numéro FINESS site\",\n",
|
|
" \"Numéro SIRET site\",\n",
|
|
" \"Identifiant technique de la structure\",\n",
|
|
" \"Activites_Count\",\n",
|
|
" \"Libellé profession\",\n",
|
|
" \"Libellé savoir-faire\",\n",
|
|
" \"Libellé mode exercice\",\n",
|
|
" \"Libellé rôle\",\n",
|
|
" \"Libellé genre activité\"\n",
|
|
" ])\n",
|
|
")\n",
|
|
"df6\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "6f7025a7c08b54b4",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:46:17.851427Z",
|
|
"start_time": "2025-08-19T22:46:17.796168Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df6.write_csv(f\"{folder}{output_file}-Multiple_Activities_Per_Site{output_extension}\",\n",
|
|
" separator='|',\n",
|
|
" quote_style=\"never\",\n",
|
|
" line_terminator='\\n')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"id": "b18d9ba71ba63d9d",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:46:19.535052Z",
|
|
"start_time": "2025-08-19T22:46:18.015194Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: total: 2.53 s\n",
|
|
"Wall time: 483 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div><style>\n",
|
|
".dataframe > thead > tr,\n",
|
|
".dataframe > tbody > tr {\n",
|
|
" text-align: right;\n",
|
|
" white-space: pre-wrap;\n",
|
|
"}\n",
|
|
"</style>\n",
|
|
"<small>shape: (91_163, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>index</th><th>Identifiant PP</th><th>Profession_Count</th><th>Libellé profession</th><th>Libellé savoir-faire</th></tr><tr><td>u32</td><td>str</td><td>u32</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>78</td><td>"10000013150"</td><td>2</td><td>"Médecin"</td><td>"Psychiatrie"</td></tr><tr><td>277918</td><td>"10000013150"</td><td>2</td><td>"Psychothérapeute"</td><td>null</td></tr><tr><td>1393660</td><td>"10000017979"</td><td>2</td><td>"Ostéopathe"</td><td>null</td></tr><tr><td>1951128</td><td>"10000017979"</td><td>2</td><td>"Sage-Femme"</td><td>null</td></tr><tr><td>835571</td><td>"10000029966"</td><td>2</td><td>"Sage-Femme"</td><td>null</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>1670279</td><td>"10111847264"</td><td>2</td><td>"Psychologue"</td><td>null</td></tr><tr><td>1391880</td><td>"10111849799"</td><td>2</td><td>"Psychothérapeute"</td><td>null</td></tr><tr><td>1670314</td><td>"10111849799"</td><td>2</td><td>"Psychologue"</td><td>null</td></tr><tr><td>276592</td><td>"10111877634"</td><td>2</td><td>"Psychothérapeute"</td><td>null</td></tr><tr><td>1392260</td><td>"10111877634"</td><td>2</td><td>"Psychologue"</td><td>null</td></tr></tbody></table></div>"
|
|
],
|
|
"text/plain": [
|
|
"shape: (91_163, 5)\n",
|
|
"┌─────────┬────────────────┬──────────────────┬────────────────────┬──────────────────────┐\n",
|
|
"│ index ┆ Identifiant PP ┆ Profession_Count ┆ Libellé profession ┆ Libellé savoir-faire │\n",
|
|
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
|
|
"│ u32 ┆ str ┆ u32 ┆ str ┆ str │\n",
|
|
"╞═════════╪════════════════╪══════════════════╪════════════════════╪══════════════════════╡\n",
|
|
"│ 78 ┆ 10000013150 ┆ 2 ┆ Médecin ┆ Psychiatrie │\n",
|
|
"│ 277918 ┆ 10000013150 ┆ 2 ┆ Psychothérapeute ┆ null │\n",
|
|
"│ 1393660 ┆ 10000017979 ┆ 2 ┆ Ostéopathe ┆ null │\n",
|
|
"│ 1951128 ┆ 10000017979 ┆ 2 ┆ Sage-Femme ┆ null │\n",
|
|
"│ 835571 ┆ 10000029966 ┆ 2 ┆ Sage-Femme ┆ null │\n",
|
|
"│ … ┆ … ┆ … ┆ … ┆ … │\n",
|
|
"│ 1670279 ┆ 10111847264 ┆ 2 ┆ Psychologue ┆ null │\n",
|
|
"│ 1391880 ┆ 10111849799 ┆ 2 ┆ Psychothérapeute ┆ null │\n",
|
|
"│ 1670314 ┆ 10111849799 ┆ 2 ┆ Psychologue ┆ null │\n",
|
|
"│ 276592 ┆ 10111877634 ┆ 2 ┆ Psychothérapeute ┆ null │\n",
|
|
"│ 1392260 ┆ 10111877634 ┆ 2 ┆ Psychologue ┆ null │\n",
|
|
"└─────────┴────────────────┴──────────────────┴────────────────────┴──────────────────────┘"
|
|
]
|
|
},
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"df7 = (\n",
|
|
" df\n",
|
|
" .with_columns(\n",
|
|
" pd.col(\"Libellé profession\").n_unique().over(\"Identifiant PP\").alias(\"Profession_Count\")\n",
|
|
" )\n",
|
|
" .filter(pd.col(\"Profession_Count\") > 1)\n",
|
|
" .sort([\"Identifiant PP\", \"index\"])\n",
|
|
" .select([\n",
|
|
" \"index\",\n",
|
|
" \"Identifiant PP\",\n",
|
|
" \"Profession_Count\",\n",
|
|
" \"Libellé profession\",\n",
|
|
" \"Libellé savoir-faire\"\n",
|
|
" ])\n",
|
|
")\n",
|
|
"df7\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "85be468fd3f461d1",
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-08-19T22:46:19.893214Z",
|
|
"start_time": "2025-08-19T22:46:19.851765Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df7.write_csv(f\"{folder}{output_file}-Multiple_Professions{output_extension}\",\n",
|
|
" separator='|',\n",
|
|
" quote_style=\"never\",\n",
|
|
" line_terminator='\\n')\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.14.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|