Initial Commit

This commit is contained in:
2026-03-05 11:11:10 +00:00
commit 0ae48d63f2
16 changed files with 3247 additions and 0 deletions

204
datacompyTest.ipynb Normal file
View File

@@ -0,0 +1,204 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-01-23T18:53:49.676160800Z",
"start_time": "2024-01-23T18:53:49.620035200Z"
}
},
"outputs": [],
"source": [
"\n",
"from io import StringIO\n",
"import pandas as pd\n",
"import datacompy\n",
"\n",
"data1 = \"\"\"acct_id,dollar_amt,name,float_fld,date_fld\n",
"10000001234,123.45,George Maharis,14530.1555,2017-01-01\n",
"10000001235,0.45,Michael Bluth,1,2017-01-01\n",
"10000001236,1345,George Bluth,,2017-01-01\n",
"10000001237,123456,Bob Loblaw,345.12,2017-01-01\n",
"10000001237,123457,Bob Loblaw,345.12,2017-01-01\n",
"10000001239,1.05,Lucille Bluth,,2017-01-01\n",
"\"\"\"\n",
"\n",
"data2 = \"\"\"acct_id,dollar_amt,name,float_fld\n",
"10000001234,123.4,George Michael Bluth,14530.155\n",
"10000001235,0.45,Michael Bluth,\n",
"10000001236,1345,George Bluth,1\n",
"10000001237,123456,Robert Loblaw,345.12\n",
"10000001238,1.05,Loose Seal Bluth,111\n",
"\"\"\"\n",
"\n",
"df1 = pd.read_csv(StringIO(data1))\n",
"df2 = pd.read_csv(StringIO(data2))\n",
"\n",
"compare = datacompy.Compare(\n",
" df1,\n",
" df2,\n",
" join_columns='acct_id', #You can also specify a list of columns\n",
" abs_tol=0, #Optional, defaults to 0\n",
" rel_tol=0, #Optional, defaults to 0\n",
" df1_name='Original', #Optional, defaults to 'df1'\n",
" df2_name='New' #Optional, defaults to 'df2'\n",
" )\n"
]
},
{
"cell_type": "code",
"outputs": [
{
"data": {
"text/plain": " acct_id dollar_amt_df1 dollar_amt_df2 name_df1 \\\n0 10000001234 123.45 123.40 George Maharis \n1 10000001235 0.45 0.45 Michael Bluth \n2 10000001236 1345.00 1345.00 George Bluth \n3 10000001237 123456.00 123456.00 Bob Loblaw \n\n name_df2 float_fld_df1 float_fld_df2 \n0 George Michael Bluth 14530.1555 14530.155 \n1 Michael Bluth 1.0000 NaN \n2 George Bluth NaN 1.000 \n3 Robert Loblaw 345.1200 345.120 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt_df1</th>\n <th>dollar_amt_df2</th>\n <th>name_df1</th>\n <th>name_df2</th>\n <th>float_fld_df1</th>\n <th>float_fld_df2</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10000001234</td>\n <td>123.45</td>\n <td>123.40</td>\n <td>George Maharis</td>\n <td>George Michael Bluth</td>\n <td>14530.1555</td>\n <td>14530.155</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10000001235</td>\n <td>0.45</td>\n <td>0.45</td>\n <td>Michael Bluth</td>\n <td>Michael Bluth</td>\n <td>1.0000</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10000001236</td>\n <td>1345.00</td>\n <td>1345.00</td>\n <td>George Bluth</td>\n <td>George Bluth</td>\n <td>NaN</td>\n <td>1.000</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10000001237</td>\n <td>123456.00</td>\n <td>123456.00</td>\n <td>Bob Loblaw</td>\n <td>Robert Loblaw</td>\n <td>345.1200</td>\n <td>345.120</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compare.all_mismatch(ignore_matching_cols=True)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-23T18:53:56.135115400Z",
"start_time": "2024-01-23T18:53:56.086349900Z"
}
},
"id": "2f16ab257397f6c9",
"execution_count": 24
},
{
"cell_type": "code",
"outputs": [
{
"data": {
"text/plain": " acct_id dollar_amt name float_fld date_fld\n4 10000001237 123457.00 Bob Loblaw 345.12 2017-01-01\n5 10000001239 1.05 Lucille Bluth NaN 2017-01-01",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt</th>\n <th>name</th>\n <th>float_fld</th>\n <th>date_fld</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>4</th>\n <td>10000001237</td>\n <td>123457.00</td>\n <td>Bob Loblaw</td>\n <td>345.12</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>5</th>\n <td>10000001239</td>\n <td>1.05</td>\n <td>Lucille Bluth</td>\n <td>NaN</td>\n <td>2017-01-01</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compare.df1_unq_rows"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-23T18:53:59.793951800Z",
"start_time": "2024-01-23T18:53:59.751624300Z"
}
},
"id": "f38ecf439538fc9b",
"execution_count": 25
},
{
"cell_type": "code",
"outputs": [
{
"data": {
"text/plain": " acct_id dollar_amt name float_fld\n6 10000001238 1.05 Loose Seal Bluth 111.0",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt</th>\n <th>name</th>\n <th>float_fld</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>6</th>\n <td>10000001238</td>\n <td>1.05</td>\n <td>Loose Seal Bluth</td>\n <td>111.0</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compare.df2_unq_rows"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-23T18:54:20.805047600Z",
"start_time": "2024-01-23T18:54:20.777818600Z"
}
},
"id": "b0a4c80da0847ac0",
"execution_count": 26
},
{
"cell_type": "code",
"outputs": [
{
"data": {
"text/plain": " acct_id dollar_amt name float_fld date_fld\n0 10000001234 123.45 George Maharis 14530.1555 2017-01-01\n1 10000001235 0.45 Michael Bluth 1.0000 2017-01-01\n2 10000001236 1345.00 George Bluth NaN 2017-01-01\n3 10000001237 123456.00 Bob Loblaw 345.1200 2017-01-01\n4 10000001237 123457.00 Bob Loblaw 345.1200 2017-01-01\n5 10000001239 1.05 Lucille Bluth NaN 2017-01-01",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt</th>\n <th>name</th>\n <th>float_fld</th>\n <th>date_fld</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10000001234</td>\n <td>123.45</td>\n <td>George Maharis</td>\n <td>14530.1555</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10000001235</td>\n <td>0.45</td>\n <td>Michael Bluth</td>\n <td>1.0000</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10000001236</td>\n <td>1345.00</td>\n <td>George Bluth</td>\n <td>NaN</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10000001237</td>\n <td>123456.00</td>\n <td>Bob Loblaw</td>\n <td>345.1200</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>4</th>\n <td>10000001237</td>\n <td>123457.00</td>\n <td>Bob Loblaw</td>\n <td>345.1200</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>5</th>\n <td>10000001239</td>\n <td>1.05</td>\n <td>Lucille Bluth</td>\n <td>NaN</td>\n <td>2017-01-01</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-23T18:54:25.595365100Z",
"start_time": "2024-01-23T18:54:25.533925200Z"
}
},
"id": "b9aa33151fa6f235",
"execution_count": 27
},
{
"cell_type": "code",
"outputs": [
{
"data": {
"text/plain": " acct_id dollar_amt name float_fld\n0 10000001234 123.40 George Michael Bluth 14530.155\n1 10000001235 0.45 Michael Bluth NaN\n2 10000001236 1345.00 George Bluth 1.000\n3 10000001237 123456.00 Robert Loblaw 345.120\n4 10000001238 1.05 Loose Seal Bluth 111.000",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt</th>\n <th>name</th>\n <th>float_fld</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10000001234</td>\n <td>123.40</td>\n <td>George Michael Bluth</td>\n <td>14530.155</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10000001235</td>\n <td>0.45</td>\n <td>Michael Bluth</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10000001236</td>\n <td>1345.00</td>\n <td>George Bluth</td>\n <td>1.000</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10000001237</td>\n <td>123456.00</td>\n <td>Robert Loblaw</td>\n <td>345.120</td>\n </tr>\n <tr>\n <th>4</th>\n <td>10000001238</td>\n <td>1.05</td>\n <td>Loose Seal Bluth</td>\n <td>111.000</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-23T18:54:28.672000100Z",
"start_time": "2024-01-23T18:54:28.631719300Z"
}
},
"id": "aaa69421db146ed7",
"execution_count": 28
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}