Initial Commit
This commit is contained in:
204
datacompyTest.ipynb
Normal file
204
datacompyTest.ipynb
Normal file
@@ -0,0 +1,204 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "initial_id",
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-23T18:53:49.676160800Z",
|
||||
"start_time": "2024-01-23T18:53:49.620035200Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from io import StringIO\n",
|
||||
"import pandas as pd\n",
|
||||
"import datacompy\n",
|
||||
"\n",
|
||||
"data1 = \"\"\"acct_id,dollar_amt,name,float_fld,date_fld\n",
|
||||
"10000001234,123.45,George Maharis,14530.1555,2017-01-01\n",
|
||||
"10000001235,0.45,Michael Bluth,1,2017-01-01\n",
|
||||
"10000001236,1345,George Bluth,,2017-01-01\n",
|
||||
"10000001237,123456,Bob Loblaw,345.12,2017-01-01\n",
|
||||
"10000001237,123457,Bob Loblaw,345.12,2017-01-01\n",
|
||||
"10000001239,1.05,Lucille Bluth,,2017-01-01\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"data2 = \"\"\"acct_id,dollar_amt,name,float_fld\n",
|
||||
"10000001234,123.4,George Michael Bluth,14530.155\n",
|
||||
"10000001235,0.45,Michael Bluth,\n",
|
||||
"10000001236,1345,George Bluth,1\n",
|
||||
"10000001237,123456,Robert Loblaw,345.12\n",
|
||||
"10000001238,1.05,Loose Seal Bluth,111\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df1 = pd.read_csv(StringIO(data1))\n",
|
||||
"df2 = pd.read_csv(StringIO(data2))\n",
|
||||
"\n",
|
||||
"compare = datacompy.Compare(\n",
|
||||
" df1,\n",
|
||||
" df2,\n",
|
||||
" join_columns='acct_id', #You can also specify a list of columns\n",
|
||||
" abs_tol=0, #Optional, defaults to 0\n",
|
||||
" rel_tol=0, #Optional, defaults to 0\n",
|
||||
" df1_name='Original', #Optional, defaults to 'df1'\n",
|
||||
" df2_name='New' #Optional, defaults to 'df2'\n",
|
||||
" )\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": " acct_id dollar_amt_df1 dollar_amt_df2 name_df1 \\\n0 10000001234 123.45 123.40 George Maharis \n1 10000001235 0.45 0.45 Michael Bluth \n2 10000001236 1345.00 1345.00 George Bluth \n3 10000001237 123456.00 123456.00 Bob Loblaw \n\n name_df2 float_fld_df1 float_fld_df2 \n0 George Michael Bluth 14530.1555 14530.155 \n1 Michael Bluth 1.0000 NaN \n2 George Bluth NaN 1.000 \n3 Robert Loblaw 345.1200 345.120 ",
|
||||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt_df1</th>\n <th>dollar_amt_df2</th>\n <th>name_df1</th>\n <th>name_df2</th>\n <th>float_fld_df1</th>\n <th>float_fld_df2</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10000001234</td>\n <td>123.45</td>\n <td>123.40</td>\n <td>George Maharis</td>\n <td>George Michael Bluth</td>\n <td>14530.1555</td>\n <td>14530.155</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10000001235</td>\n <td>0.45</td>\n <td>0.45</td>\n <td>Michael Bluth</td>\n <td>Michael Bluth</td>\n <td>1.0000</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10000001236</td>\n <td>1345.00</td>\n <td>1345.00</td>\n <td>George Bluth</td>\n <td>George Bluth</td>\n <td>NaN</td>\n <td>1.000</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10000001237</td>\n <td>123456.00</td>\n <td>123456.00</td>\n <td>Bob Loblaw</td>\n <td>Robert Loblaw</td>\n <td>345.1200</td>\n <td>345.120</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"compare.all_mismatch(ignore_matching_cols=True)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-23T18:53:56.135115400Z",
|
||||
"start_time": "2024-01-23T18:53:56.086349900Z"
|
||||
}
|
||||
},
|
||||
"id": "2f16ab257397f6c9",
|
||||
"execution_count": 24
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": " acct_id dollar_amt name float_fld date_fld\n4 10000001237 123457.00 Bob Loblaw 345.12 2017-01-01\n5 10000001239 1.05 Lucille Bluth NaN 2017-01-01",
|
||||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt</th>\n <th>name</th>\n <th>float_fld</th>\n <th>date_fld</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>4</th>\n <td>10000001237</td>\n <td>123457.00</td>\n <td>Bob Loblaw</td>\n <td>345.12</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>5</th>\n <td>10000001239</td>\n <td>1.05</td>\n <td>Lucille Bluth</td>\n <td>NaN</td>\n <td>2017-01-01</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"compare.df1_unq_rows"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-23T18:53:59.793951800Z",
|
||||
"start_time": "2024-01-23T18:53:59.751624300Z"
|
||||
}
|
||||
},
|
||||
"id": "f38ecf439538fc9b",
|
||||
"execution_count": 25
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": " acct_id dollar_amt name float_fld\n6 10000001238 1.05 Loose Seal Bluth 111.0",
|
||||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt</th>\n <th>name</th>\n <th>float_fld</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>6</th>\n <td>10000001238</td>\n <td>1.05</td>\n <td>Loose Seal Bluth</td>\n <td>111.0</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"compare.df2_unq_rows"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-23T18:54:20.805047600Z",
|
||||
"start_time": "2024-01-23T18:54:20.777818600Z"
|
||||
}
|
||||
},
|
||||
"id": "b0a4c80da0847ac0",
|
||||
"execution_count": 26
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": " acct_id dollar_amt name float_fld date_fld\n0 10000001234 123.45 George Maharis 14530.1555 2017-01-01\n1 10000001235 0.45 Michael Bluth 1.0000 2017-01-01\n2 10000001236 1345.00 George Bluth NaN 2017-01-01\n3 10000001237 123456.00 Bob Loblaw 345.1200 2017-01-01\n4 10000001237 123457.00 Bob Loblaw 345.1200 2017-01-01\n5 10000001239 1.05 Lucille Bluth NaN 2017-01-01",
|
||||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt</th>\n <th>name</th>\n <th>float_fld</th>\n <th>date_fld</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10000001234</td>\n <td>123.45</td>\n <td>George Maharis</td>\n <td>14530.1555</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10000001235</td>\n <td>0.45</td>\n <td>Michael Bluth</td>\n <td>1.0000</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10000001236</td>\n <td>1345.00</td>\n <td>George Bluth</td>\n <td>NaN</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10000001237</td>\n <td>123456.00</td>\n <td>Bob Loblaw</td>\n <td>345.1200</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>4</th>\n <td>10000001237</td>\n <td>123457.00</td>\n <td>Bob Loblaw</td>\n <td>345.1200</td>\n <td>2017-01-01</td>\n </tr>\n <tr>\n <th>5</th>\n <td>10000001239</td>\n <td>1.05</td>\n <td>Lucille Bluth</td>\n <td>NaN</td>\n <td>2017-01-01</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||
},
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df1"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-23T18:54:25.595365100Z",
|
||||
"start_time": "2024-01-23T18:54:25.533925200Z"
|
||||
}
|
||||
},
|
||||
"id": "b9aa33151fa6f235",
|
||||
"execution_count": 27
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": " acct_id dollar_amt name float_fld\n0 10000001234 123.40 George Michael Bluth 14530.155\n1 10000001235 0.45 Michael Bluth NaN\n2 10000001236 1345.00 George Bluth 1.000\n3 10000001237 123456.00 Robert Loblaw 345.120\n4 10000001238 1.05 Loose Seal Bluth 111.000",
|
||||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>acct_id</th>\n <th>dollar_amt</th>\n <th>name</th>\n <th>float_fld</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10000001234</td>\n <td>123.40</td>\n <td>George Michael Bluth</td>\n <td>14530.155</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10000001235</td>\n <td>0.45</td>\n <td>Michael Bluth</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10000001236</td>\n <td>1345.00</td>\n <td>George Bluth</td>\n <td>1.000</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10000001237</td>\n <td>123456.00</td>\n <td>Robert Loblaw</td>\n <td>345.120</td>\n </tr>\n <tr>\n <th>4</th>\n <td>10000001238</td>\n <td>1.05</td>\n <td>Loose Seal Bluth</td>\n <td>111.000</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||
},
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df2"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-23T18:54:28.672000100Z",
|
||||
"start_time": "2024-01-23T18:54:28.631719300Z"
|
||||
}
|
||||
},
|
||||
"id": "aaa69421db146ed7",
|
||||
"execution_count": 28
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user