{ "cells": [ { "cell_type": "code", "execution_count": 23, "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2024-01-23T18:53:49.676160800Z", "start_time": "2024-01-23T18:53:49.620035200Z" } }, "outputs": [], "source": [ "\n", "from io import StringIO\n", "import pandas as pd\n", "import datacompy\n", "\n", "data1 = \"\"\"acct_id,dollar_amt,name,float_fld,date_fld\n", "10000001234,123.45,George Maharis,14530.1555,2017-01-01\n", "10000001235,0.45,Michael Bluth,1,2017-01-01\n", "10000001236,1345,George Bluth,,2017-01-01\n", "10000001237,123456,Bob Loblaw,345.12,2017-01-01\n", "10000001237,123457,Bob Loblaw,345.12,2017-01-01\n", "10000001239,1.05,Lucille Bluth,,2017-01-01\n", "\"\"\"\n", "\n", "data2 = \"\"\"acct_id,dollar_amt,name,float_fld\n", "10000001234,123.4,George Michael Bluth,14530.155\n", "10000001235,0.45,Michael Bluth,\n", "10000001236,1345,George Bluth,1\n", "10000001237,123456,Robert Loblaw,345.12\n", "10000001238,1.05,Loose Seal Bluth,111\n", "\"\"\"\n", "\n", "df1 = pd.read_csv(StringIO(data1))\n", "df2 = pd.read_csv(StringIO(data2))\n", "\n", "compare = datacompy.Compare(\n", " df1,\n", " df2,\n", " join_columns='acct_id', #You can also specify a list of columns\n", " abs_tol=0, #Optional, defaults to 0\n", " rel_tol=0, #Optional, defaults to 0\n", " df1_name='Original', #Optional, defaults to 'df1'\n", " df2_name='New' #Optional, defaults to 'df2'\n", " )\n" ] }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": " acct_id dollar_amt_df1 dollar_amt_df2 name_df1 \\\n0 10000001234 123.45 123.40 George Maharis \n1 10000001235 0.45 0.45 Michael Bluth \n2 10000001236 1345.00 1345.00 George Bluth \n3 10000001237 123456.00 123456.00 Bob Loblaw \n\n name_df2 float_fld_df1 float_fld_df2 \n0 George Michael Bluth 14530.1555 14530.155 \n1 Michael Bluth 1.0000 NaN \n2 George Bluth NaN 1.000 \n3 Robert Loblaw 345.1200 345.120 ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
acct_iddollar_amt_df1dollar_amt_df2name_df1name_df2float_fld_df1float_fld_df2
010000001234123.45123.40George MaharisGeorge Michael Bluth14530.155514530.155
1100000012350.450.45Michael BluthMichael Bluth1.0000NaN
2100000012361345.001345.00George BluthGeorge BluthNaN1.000
310000001237123456.00123456.00Bob LoblawRobert Loblaw345.1200345.120
\n
" }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "compare.all_mismatch(ignore_matching_cols=True)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-01-23T18:53:56.135115400Z", "start_time": "2024-01-23T18:53:56.086349900Z" } }, "id": "2f16ab257397f6c9", "execution_count": 24 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": " acct_id dollar_amt name float_fld date_fld\n4 10000001237 123457.00 Bob Loblaw 345.12 2017-01-01\n5 10000001239 1.05 Lucille Bluth NaN 2017-01-01", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
acct_iddollar_amtnamefloat_flddate_fld
410000001237123457.00Bob Loblaw345.122017-01-01
5100000012391.05Lucille BluthNaN2017-01-01
\n
" }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "compare.df1_unq_rows" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-01-23T18:53:59.793951800Z", "start_time": "2024-01-23T18:53:59.751624300Z" } }, "id": "f38ecf439538fc9b", "execution_count": 25 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": " acct_id dollar_amt name float_fld\n6 10000001238 1.05 Loose Seal Bluth 111.0", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
acct_iddollar_amtnamefloat_fld
6100000012381.05Loose Seal Bluth111.0
\n
" }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "compare.df2_unq_rows" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-01-23T18:54:20.805047600Z", "start_time": "2024-01-23T18:54:20.777818600Z" } }, "id": "b0a4c80da0847ac0", "execution_count": 26 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": " acct_id dollar_amt name float_fld date_fld\n0 10000001234 123.45 George Maharis 14530.1555 2017-01-01\n1 10000001235 0.45 Michael Bluth 1.0000 2017-01-01\n2 10000001236 1345.00 George Bluth NaN 2017-01-01\n3 10000001237 123456.00 Bob Loblaw 345.1200 2017-01-01\n4 10000001237 123457.00 Bob Loblaw 345.1200 2017-01-01\n5 10000001239 1.05 Lucille Bluth NaN 2017-01-01", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
acct_iddollar_amtnamefloat_flddate_fld
010000001234123.45George Maharis14530.15552017-01-01
1100000012350.45Michael Bluth1.00002017-01-01
2100000012361345.00George BluthNaN2017-01-01
310000001237123456.00Bob Loblaw345.12002017-01-01
410000001237123457.00Bob Loblaw345.12002017-01-01
5100000012391.05Lucille BluthNaN2017-01-01
\n
" }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-01-23T18:54:25.595365100Z", "start_time": "2024-01-23T18:54:25.533925200Z" } }, "id": "b9aa33151fa6f235", "execution_count": 27 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": " acct_id dollar_amt name float_fld\n0 10000001234 123.40 George Michael Bluth 14530.155\n1 10000001235 0.45 Michael Bluth NaN\n2 10000001236 1345.00 George Bluth 1.000\n3 10000001237 123456.00 Robert Loblaw 345.120\n4 10000001238 1.05 Loose Seal Bluth 111.000", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
acct_iddollar_amtnamefloat_fld
010000001234123.40George Michael Bluth14530.155
1100000012350.45Michael BluthNaN
2100000012361345.00George Bluth1.000
310000001237123456.00Robert Loblaw345.120
4100000012381.05Loose Seal Bluth111.000
\n
" }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-01-23T18:54:28.672000100Z", "start_time": "2024-01-23T18:54:28.631719300Z" } }, "id": "aaa69421db146ed7", "execution_count": 28 } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }