diff --git a/tests/correlatedPattern/basic/CoMineTest.ipynb b/tests/correlatedPattern/basic/CoMineTest.ipynb new file mode 100644 index 00000000..c28c9872 --- /dev/null +++ b/tests/correlatedPattern/basic/CoMineTest.ipynb @@ -0,0 +1,665 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pami in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (2024.5.29.2)\n", + "Requirement already satisfied: psutil in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (5.9.8)\n", + "Requirement already satisfied: pandas in c:\\python310\\lib\\site-packages (from pami) (2.2.1)\n", + "Requirement already satisfied: plotly in c:\\python310\\lib\\site-packages (from pami) (5.18.0)\n", + "Requirement already satisfied: matplotlib in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (3.9.0)\n", + "Requirement already satisfied: resource in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (0.2.1)\n", + "Requirement already satisfied: validators in c:\\python310\\lib\\site-packages (from pami) (0.28.3)\n", + "Requirement already satisfied: urllib3 in c:\\python310\\lib\\site-packages (from pami) (2.2.1)\n", + "Requirement already satisfied: Pillow in c:\\python310\\lib\\site-packages (from pami) (10.3.0)\n", + "Requirement already satisfied: numpy in c:\\python310\\lib\\site-packages (from pami) (1.26.4)\n", + "Requirement already satisfied: sphinx in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (7.3.7)\n", + "Requirement already satisfied: sphinx-rtd-theme in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (2.0.0)\n", + "Requirement already satisfied: discord.py in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (2.3.2)\n", + "Requirement already satisfied: networkx in c:\\python310\\lib\\site-packages (from pami) (3.3)\n", + "Requirement already satisfied: deprecated in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (1.2.14)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in c:\\python310\\lib\\site-packages (from deprecated->pami) (1.16.0)\n", + "Requirement already satisfied: aiohttp<4,>=3.7.4 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from discord.py->pami) (3.9.5)\n", + "Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from matplotlib->pami) (1.2.1)\n", + "Requirement already satisfied: cycler>=0.10 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from matplotlib->pami) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in c:\\python310\\lib\\site-packages (from matplotlib->pami) (4.52.4)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in c:\\python310\\lib\\site-packages (from matplotlib->pami) (1.4.5)\n", + "Requirement already satisfied: packaging>=20.0 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from matplotlib->pami) (24.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in c:\\python310\\lib\\site-packages (from matplotlib->pami) (3.1.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in c:\\python310\\lib\\site-packages (from matplotlib->pami) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in c:\\python310\\lib\\site-packages (from pandas->pami) (2023.3.post1)\n", + "Requirement already satisfied: tzdata>=2022.7 in c:\\python310\\lib\\site-packages (from pandas->pami) (2023.4)\n", + "Requirement already satisfied: tenacity>=6.2.0 in c:\\python310\\lib\\site-packages (from plotly->pami) (8.2.3)\n", + "Requirement already satisfied: JsonForm>=0.0.2 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from resource->pami) (0.0.2)\n", + "Requirement already satisfied: JsonSir>=0.0.2 in c:\\python310\\lib\\site-packages (from resource->pami) (0.0.2)\n", + "Requirement already satisfied: python-easyconfig>=0.1.0 in c:\\python310\\lib\\site-packages (from resource->pami) (0.1.7)\n", + "Requirement already satisfied: sphinxcontrib-applehelp in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.0.8)\n", + "Requirement already satisfied: sphinxcontrib-devhelp in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.0.6)\n", + "Requirement already satisfied: sphinxcontrib-jsmath in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.0.1)\n", + "Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in c:\\python310\\lib\\site-packages (from sphinx->pami) (2.0.5)\n", + "Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.9 in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.1.10)\n", + "Requirement already satisfied: sphinxcontrib-qthelp in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.0.7)\n", + "Requirement already satisfied: Jinja2>=3.0 in c:\\python310\\lib\\site-packages (from sphinx->pami) (3.1.2)\n", + "Requirement already satisfied: Pygments>=2.14 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx->pami) (2.18.0)\n", + "Requirement already satisfied: docutils<0.22,>=0.18.1 in c:\\python310\\lib\\site-packages (from sphinx->pami) (0.20.1)\n", + "Requirement already satisfied: snowballstemmer>=2.0 in c:\\python310\\lib\\site-packages (from sphinx->pami) (2.2.0)\n", + "Requirement already satisfied: babel>=2.9 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx->pami) (2.15.0)\n", + "Requirement already satisfied: alabaster~=0.7.14 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx->pami) (0.7.16)\n", + "Requirement already satisfied: imagesize>=1.3 in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.4.1)\n", + "Requirement already satisfied: requests>=2.25.0 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx->pami) (2.32.2)\n", + "Requirement already satisfied: tomli>=2 in c:\\python310\\lib\\site-packages (from sphinx->pami) (2.0.1)\n", + "Requirement already satisfied: colorama>=0.4.5 in c:\\python310\\lib\\site-packages (from sphinx->pami) (0.4.6)\n", + "Requirement already satisfied: sphinxcontrib-jquery<5,>=4 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx-rtd-theme->pami) (4.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in c:\\python310\\lib\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in c:\\python310\\lib\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in c:\\python310\\lib\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.9.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (4.0.3)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in c:\\python310\\lib\\site-packages (from Jinja2>=3.0->sphinx->pami) (2.1.1)\n", + "Requirement already satisfied: jsonschema in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from JsonForm>=0.0.2->resource->pami) (4.22.0)\n", + "Requirement already satisfied: six>=1.5 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from python-dateutil>=2.7->matplotlib->pami) (1.16.0)\n", + "Requirement already satisfied: PyYAML in c:\\python310\\lib\\site-packages (from python-easyconfig>=0.1.0->resource->pami) (6.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\python310\\lib\\site-packages (from requests>=2.25.0->sphinx->pami) (3.0.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in c:\\python310\\lib\\site-packages (from requests>=2.25.0->sphinx->pami) (3.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from requests>=2.25.0->sphinx->pami) (2024.2.2)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (2023.12.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in c:\\python310\\lib\\site-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (0.18.1)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: Ignoring invalid distribution - (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -ip (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -p (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution - (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -ip (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -p (c:\\python310\\lib\\site-packages)\n" + ] + } + ], + "source": [ + "!pip install -U pami" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test cases for T10I4D100K Datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test case 1 for T10I4D100K Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Frequent patterns were generated successfully using CoMine algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 CoMine 500 575 13.170256 474042368\n", + "1 CoMine 1000 375 14.911338 405041152\n", + "2 CoMine 1500 237 8.679268 390664192\n", + "3 CoMine 2000 155 8.170271 309084160\n", + "4 CoMine 2500 107 6.989860 302297088\n" + ] + } + ], + "source": [ + "from PAMI.correlatedPattern.basic import CoMine as alg\n", + "import pandas as pd\n", + "# Set the input parameters\n", + "URL = 'https://u-aizu.ac.jp/~udayrage/datasets/transactionalDatabases/Transactional_T10I4D100K.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [500, 1000, 1500, 2000, 2500]\n", + "minAllConf = 0.5\n", + "# Initialize a data frame to store the results of CoMine\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.CoMine(URL, minSup=minSupCount, sep=separator, minAllConf=minAllConf)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['CoMine', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS(),]\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using CoMine algorithm\")\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test case 2 for T10I4D100K Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Frequent patterns were generated successfully using CoMine algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 CoMine 500 575 9.224247 475312128\n", + "1 CoMine 1000 375 8.205463 405835776\n", + "2 CoMine 1500 237 8.248886 377323520\n", + "3 CoMine 2000 155 5.561130 366460928\n", + "4 CoMine 2500 107 3.269074 342110208\n" + ] + } + ], + "source": [ + "from PAMI.correlatedPattern.basic import CoMine as alg\n", + "import pandas as pd\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_T10I4D100K.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [500, 1000, 1500, 2000, 2500]\n", + "minAllConf = 0.5\n", + "# Initialize a data frame to store the results of CoMine\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.CoMine(inputFile, minSup=minSupCount, sep=separator, minAllConf=minAllConf)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['CoMine', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS(),]\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using CoMine algorithm\")\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test case 3 for T10I4D100K Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Frequent patterns were generated successfully using CoMine algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 CoMine 500 575 9.048601 508465152\n", + "1 CoMine 1000 375 7.117447 448401408\n", + "2 CoMine 1500 237 7.068889 377303040\n", + "3 CoMine 2000 155 7.174149 371314688\n", + "4 CoMine 2500 107 4.165333 354504704\n" + ] + } + ], + "source": [ + "from PAMI.correlatedPattern.basic import CoMine as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_T10I4D100K.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [500, 1000, 1500, 2000, 2500]\n", + "minAllConf = 0.5\n", + "\n", + "# Read the CSV file into a DataFrame\n", + "df = pd.read_csv(inputFile, header=[0], sep=' ')\n", + "df = df.rename(columns={df.columns[0]: \"Transactions\"})\n", + "\n", + "# Ensure all data in the \"Transaction\" column is string before splitting\n", + "df[\"Transactions\"] = df[\"Transactions\"].astype(str)\n", + "\n", + "# Preprocess the dataset\n", + "# transactions = df[\"Transaction\"].apply(lambda x: x.split(separator)).tolist()\n", + "transactions = df['Transactions'].apply(lambda x: x.split(separator))\n", + "transactions = pd.DataFrame(transactions, columns=['Transactions'])\n", + "\n", + "# Initialize a data frame to store the results of CoMine\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.CoMine(transactions, minSup=minSupCount, sep=separator, minAllConf=minAllConf)\n", + " obj.mine()\n", + "\n", + " # Get the patterns found\n", + " patterns = obj.getPatterns()\n", + "\n", + " # Store the results in the data frame\n", + " result.loc[len(result)] = ['CoMine', minSupCount, len(patterns), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(\"Frequent patterns were generated successfully using CoMine algorithm\")\n", + "print(result)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Cases for Retail Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 1 for Retail Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Frequent patterns were generated successfully using CoMine algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 CoMine 100 1872 15.773522 435101696\n", + "1 CoMine 200 809 11.169929 407351296\n", + "2 CoMine 300 419 9.317302 403980288\n", + "3 CoMine 400 275 8.696479 398663680\n", + "4 CoMine 500 186 7.090744 395563008\n" + ] + } + ], + "source": [ + "from PAMI.correlatedPattern.basic import CoMine as alg\n", + "import pandas as pd\n", + "# Set the input parameters\n", + "URL = 'https://u-aizu.ac.jp/~udayrage/datasets/transactionalDatabases/Transactional_retail.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [100, 200, 300, 400, 500]\n", + "minAllConf = 0.5\n", + "# Initialize a data frame to store the results of CoMine\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.CoMine(URL, minSup=minSupCount, sep=separator, minAllConf=minAllConf)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['CoMine', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS(),]\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using CoMine algorithm\")\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 2 for Retail Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Frequent patterns were generated successfully using CoMine algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 CoMine 100 1872 10.039421 434905088\n", + "1 CoMine 200 809 6.405442 458797056\n", + "2 CoMine 300 419 4.890616 455753728\n", + "3 CoMine 400 275 4.109501 433479680\n", + "4 CoMine 500 186 3.501025 428417024\n" + ] + } + ], + "source": [ + "from PAMI.correlatedPattern.basic import CoMine as alg\n", + "import pandas as pd\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_retail.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [100, 200, 300, 400, 500]\n", + "minAllConf = 0.5\n", + "# Initialize a data frame to store the results of CoMine\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.CoMine(inputFile, minSup=minSupCount, sep=separator, minAllConf=minAllConf)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['CoMine', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS(),]\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using CoMine algorithm\")\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 3 for Retail Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Frequent patterns were generated successfully using CoMine algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 CoMine 100 1871 10.264555 447922176\n", + "1 CoMine 200 808 8.216191 440242176\n", + "2 CoMine 300 419 4.190705 436215808\n", + "3 CoMine 400 275 3.876611 432005120\n", + "4 CoMine 500 186 2.952072 428527616\n" + ] + } + ], + "source": [ + "from PAMI.correlatedPattern.basic import CoMine as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_retail.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [100, 200, 300, 400, 500]\n", + "minAllConf = 0.5\n", + "\n", + "# Read the CSV file into a DataFrame\n", + "df = pd.read_csv(inputFile, header=[0], sep=' ')\n", + "df = df.rename(columns={df.columns[0]: \"Transactions\"})\n", + "\n", + "# Ensure all data in the \"Transaction\" column is string before splitting\n", + "df[\"Transactions\"] = df[\"Transactions\"].astype(str)\n", + "\n", + "# Preprocess the dataset\n", + "# transactions = df[\"Transaction\"].apply(lambda x: x.split(separator)).tolist()\n", + "transactions = df['Transactions'].apply(lambda x: x.split(separator))\n", + "transactions = pd.DataFrame(transactions, columns=['Transactions'])\n", + "\n", + "# Initialize a data frame to store the results of CoMine\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.CoMine(transactions, minSup=minSupCount, sep=separator, minAllConf=minAllConf)\n", + " obj.mine()\n", + "\n", + " # Get the patterns found\n", + " patterns = obj.getPatterns()\n", + "\n", + " # Store the results in the data frame\n", + " result.loc[len(result)] = ['CoMine', minSupCount, len(patterns), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(\"Frequent patterns were generated successfully using CoMine algorithm\")\n", + "print(result)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Cases for Chess Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 1 for chess Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Frequent patterns were generated successfully using CoMine algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 CoMine 100 1322393 37.272396 647032832\n", + "1 CoMine 200 1322387 38.555083 649850880\n", + "2 CoMine 300 1322385 35.034727 644644864\n", + "3 CoMine 400 1322381 33.540815 645607424\n", + "4 CoMine 500 1322379 30.238773 642048000\n" + ] + } + ], + "source": [ + "from PAMI.correlatedPattern.basic import CoMine as alg\n", + "import pandas as pd\n", + "# Set the input parameters\n", + "URL = 'https://u-aizu.ac.jp/~udayrage/datasets/transactionalDatabases/Transactional_chess.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [100, 200, 300, 400, 500]\n", + "minAllConf = 0.5\n", + "# Initialize a data frame to store the results of CoMine\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.CoMine(URL, minSup=minSupCount, sep=separator, minAllConf=minAllConf)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['CoMine', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS(),]\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using CoMine algorithm\")\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 2 for chess Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Frequent patterns were generated successfully using CoMine algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 CoMine 100 1322393 29.589390 651730944\n", + "1 CoMine 200 1322387 29.159429 646467584\n", + "2 CoMine 300 1322385 29.959191 639229952\n", + "3 CoMine 400 1322381 32.925684 636133376\n", + "4 CoMine 500 1322379 28.632365 636116992\n" + ] + } + ], + "source": [ + "from PAMI.correlatedPattern.basic import CoMine as alg\n", + "import pandas as pd\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_chess1.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [100, 200, 300, 400, 500]\n", + "minAllConf = 0.5\n", + "# Initialize a data frame to store the results of CoMine\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.CoMine(inputFile, minSup=minSupCount, sep=separator, minAllConf=minAllConf)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['CoMine', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS(),]\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using CoMine algorithm\")\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 3 for chess Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Correlated patterns were generated successfully using CoMine algorithm\n", + "Frequent patterns were generated successfully using CoMine algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 CoMine 100 1321743 29.900726 583147520\n", + "1 CoMine 200 1321737 26.131184 964255744\n", + "2 CoMine 300 1321735 27.905186 963428352\n", + "3 CoMine 400 1321731 26.916327 953618432\n", + "4 CoMine 500 1321729 29.389611 949780480\n" + ] + } + ], + "source": [ + "from PAMI.correlatedPattern.basic import CoMine as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_chess1.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [100, 200, 300, 400, 500]\n", + "minAllConf = 0.5\n", + "\n", + "# Read the CSV file into a DataFrame\n", + "df = pd.read_csv(inputFile, header=[0], sep=' ')\n", + "df = df.rename(columns={df.columns[0]: \"Transactions\"})\n", + "\n", + "# Ensure all data in the \"Transaction\" column is string before splitting\n", + "df[\"Transactions\"] = df[\"Transactions\"].astype(str)\n", + "\n", + "# Preprocess the dataset\n", + "# transactions = df[\"Transaction\"].apply(lambda x: x.split(separator)).tolist()\n", + "transactions = df['Transactions'].apply(lambda x: x.split(separator))\n", + "transactions = pd.DataFrame(transactions, columns=['Transactions'])\n", + "\n", + "# Initialize a data frame to store the results of CoMine\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.CoMine(transactions, minSup=minSupCount, sep=separator, minAllConf=minAllConf)\n", + " obj.mine()\n", + "\n", + " # Get the patterns found\n", + " patterns = obj.getPatterns()\n", + "\n", + " # Store the results in the data frame\n", + " result.loc[len(result)] = ['CoMine', minSupCount, len(patterns), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(\"Frequent patterns were generated successfully using CoMine algorithm\")\n", + "print(result)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/correlatedPattern/basic/CoMineTest.py b/tests/correlatedPattern/basic/CoMineTest.py new file mode 100644 index 00000000..56e52b1a --- /dev/null +++ b/tests/correlatedPattern/basic/CoMineTest.py @@ -0,0 +1,758 @@ +# CoMine is one of the fundamental algorithm to discover correlated patterns in a transactional database. +# +# **Importing this algorithm into a python program** +# -------------------------------------------------------- +# +# +# from PAMI.correlatedPattern.basic import CoMine as alg +# +# obj = alg.CoMine(iFile, minSup, minAllConf, sep) +# +# obj.mine() +# +# Rules = obj.getPatterns() +# +# print("Total number of Patterns:", len(Patterns)) +# +# obj.save(oFile) +# +# Df = obj.getPatternsAsDataFrame() +# +# memUSS = obj.getMemoryUSS() +# +# print("Total Memory in USS:", memUSS) +# +# memRSS = obj.getMemoryRSS() +# +# print("Total Memory in RSS", memRSS) +# +# run = obj.getRuntime() +# +# print("Total ExecutionTime in seconds:", run) +# + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + Copyright (C) 2021 Rage Uday Kiran + +""" + +from PAMI.correlatedPattern.basic import abstract as _ab +import pandas as _pd +from typing import List, Dict, Tuple, Union +from deprecated import deprecated + + +class _Node: + """ + A class used to represent the node of correlatedPatternTree + + + :Attributes: + + itemId : int + storing item of a node + counter : int + To maintain the support of node + parent : node + To maintain the parent of every node + child : list + To maintain the children of node + nodeLink : node + Points to the node with same itemId + + :Methods: + + getChild(itemName) + returns the node with same itemName from correlatedPatternTree + """ + + def __init__(self) -> None: + self.itemId = -1 + self.counter = 1 + self.parent = None + self.child = [] + self.nodeLink = None + + def getChild(self, id1) -> Union[None, '_Node']: + """ + :param id1: give item id as input + :type id1: int + :return: the node with same itemId + :rtype: _Node + """ + for i in self.child: + if i.itemId == id1: + return i + return None + + +class _Tree: + """ + A class used to represent the correlatedPatternGrowth tree structure + + :Attributes: + + headerList : list + storing the list of items in tree sorted in ascending of their supports + mapItemNodes : dictionary + storing the nodes with same item name + mapItemLastNodes : dictionary + representing the map that indicates the last node for each item + root : Node + representing the root Node in a tree + + + :Methods: + + createHeaderList(items,minSup) + takes items only which are greater than minSup and sort the items in ascending order + addTransaction(transaction) + creating transaction as a branch in correlatedPatternTree + fixNodeLinks(item,newNode) + To create the link for nodes with same item + printTree(Node) + gives the details of node in correlatedPatternGrowth tree + addPrefixPath(prefix,port,minSup) + It takes the items in prefix pattern whose support is >=minSup and construct a subtree + """ + + def __init__(self) -> None: + self.headerList = [] + self.mapItemNodes = {} + self.mapItemLastNodes = {} + self.root = _Node() + + def addTransaction(self, transaction: List[int]) -> None: + """ + Adding transaction into tree + + :param transaction : it represents a single transaction in a database + :type transaction : list + :return: None + """ + + current = self.root + for i in transaction: + child = current.getChild(i) + if child is None: + newNode = _Node() + newNode.itemId = i + newNode.parent = current + current.child.append(newNode) + self.fixNodeLinks(i, newNode) + current = newNode + else: + child.counter += 1 + current = child + + def fixNodeLinks(self, item: int, newNode: '_Node') -> None: + """ + Fixing node link for the newNode that inserted into correlatedPatternTree + + :param item: it represents the item of newNode + :type item : int + :param newNode : it represents the newNode that inserted in correlatedPatternTree + :type newNode : Node + :return: None + """ + if item in self.mapItemLastNodes.keys(): + lastNode = self.mapItemLastNodes[item] + lastNode.nodeLink = newNode + self.mapItemLastNodes[item] = newNode + if item not in self.mapItemNodes.keys(): + self.mapItemNodes[item] = newNode + + def printTree(self, root: '_Node') -> None: + """ + This method is to find the details of parent, children, and support of a Node + + :param root: it represents the Node in correlatedPatternTree + :type root: Node + :return: None + """ + + if root.child is None: + return + else: + for i in root.child: + print(i.itemId, i.counter, i.parent.itemId) + self.printTree(i) + + def createHeaderList(self, mapSupport: Dict[int, int], minSup: int) -> None: + """ + To create the headerList + + :param mapSupport : it represents the items with their supports + :type mapSupport : dictionary + :param minSup : it represents the minSup + :param minSup : float + :return: None + """ + + t1 = [] + for x, y in mapSupport.items(): + if y >= minSup: + t1.append(x) + itemSetBuffer = [k for k, v in sorted(mapSupport.items(), key=lambda x: x[1], reverse=True)] + self.headerList = [i for i in t1 if i in itemSetBuffer] + + def addPrefixPath(self, prefix: List['_Node'], mapSupportBeta, minSup) -> None: + """ + To construct the conditional tree with prefix paths of a node in correlatedPatternTree + + :param prefix : it represents the prefix items of a Node + :type prefix : list + :param mapSupportBeta : it represents the items with their supports + :param mapSupportBeta : dictionary + :param minSup : to check the item meets with minSup + :param minSup : float + :return: None + """ + pathCount = prefix[0].counter + current = self.root + prefix.reverse() + for i in range(0, len(prefix) - 1): + pathItem = prefix[i] + if mapSupportBeta.get(pathItem.itemId) >= minSup: + child = current.getChild(pathItem.itemId) + if child is None: + newNode = _Node() + newNode.itemId = pathItem.itemId + newNode.parent = current + newNode.counter = pathCount + current.child.append(newNode) + current = newNode + self.fixNodeLinks(pathItem.itemId, newNode) + else: + child.counter += pathCount + current = child + +class WrongNumberOfArguments(Exception): + pass + +class CoMine(_ab._correlatedPatterns): + """ + About this algorithm + ==================== + + :Description: CoMine is one of the fundamental algorithm to discover correlated patterns in a transactional database. It is based on the traditional FP-Growth algorithm. This algorithm uses depth-first search technique to find all correlated patterns in a transactional database. + + :Reference: Lee, Y.K., Kim, W.Y., Cao, D., Han, J. (2003). CoMine: efficient mining of correlated patterns. In ICDM (pp. 581–584). + + :param iFile: str : + Name of the Input file to mine complete set of correlated patterns + :param oFile: str : + Name of the output file to store complete set of correlated patterns + :param minSup: int or float or str : + The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. + :param minAllConf: float : + The user can specify minAllConf values within the range (0, 1). + :param sep: str : + This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator. + + :Attributes: + + memoryUSS : float + To store the total amount of USS memory consumed by the program + memoryRSS : float + To store the total amount of RSS memory consumed by the program + startTime:float + To record the start time of the mining process + endTime:float + To record the completion time of the mining process + minSup : int + The user given minSup + minAllConf: float + The user given minimum all confidence Ratio(should be in range of 0 to 1) + Database : list + To store the transactions of a database in list + mapSupport : Dictionary + To maintain the information of item and their frequency + lno : int + it represents the total no of transactions + tree : class + it represents the Tree class + itemSetCount : int + it represents the total no of patterns + finalPatterns : dict + it represents to store the patterns + itemSetBuffer : list + it represents the store the items in mining + maxPatternLength : int + it represents the constraint for pattern length + + Execution methods + ================= + + **Terminal command** + + .. code-block:: console + + Format: + + (.venv) $ python3 CoMine.py + + Example Usage: + + (.venv) $ python3 CoMine.py sampleTDB.txt output.txt 0.25 0.2 + + .. note:: minSup can be specified in support count or a value between 0 and 1. + + **Calling from a python program** + + .. code-block:: python + + from PAMI.correlatedPattern.basic import CoMine as alg + + obj = alg.CoMine(iFile, minSup, minAllConf,sep) + + obj.mine() + + patterns = obj.getPatterns() + + print("Total number of Patterns:", len(patterns)) + + obj.savePatterns(oFile) + + df = obj.getPatternsAsDataFrame() + + memUSS = obj.getMemoryUSS() + + print("Total Memory in USS:", memUSS) + + memRSS = obj.getMemoryRSS() + + print("Total Memory in RSS", memRSS) + + run = obj.getRuntime() + + print("Total ExecutionTime in seconds:", run) + + Credits + ======= + + The complete program was written by B.Sai Chitra under the supervision of Professor Rage Uday Kiran. + + """ + + _startTime = float() + _endTime = float() + _minSup = float() + _finalPatterns = {} + _iFile = " " + _oFile = " " + _memoryUSS = float() + _memoryRSS = float() + _minAllConf = 0.0 + _Database = [] + _mapSupport = {} + _lno = 0 + _tree = str() + _itemSetBuffer = None + _fpNodeTempBuffer = [] + _itemSetCount = 0 + _maxPatternLength = 1000 + _sep = "\t" + + def __init__(self, iFile: Union[str, _pd.DataFrame], minSup: Union[int, float, str], minAllConf: float, + sep: str = "\t") -> None: + """ + param iFile: give the input file + type iFile: str or DataFrame or url + param minSup: minimum support + type minSup: int or float + param sep: Delimiter of input file + type sep: str + """ + + super().__init__(iFile, minSup, minAllConf, sep) + + def _creatingItemSets(self) -> None: + """ + Storing the complete transactions of the database/input file in a database variable + """ + self._Database = [] + if isinstance(self._iFile, _ab._pd.DataFrame): + if self._iFile.empty: + print("its empty..") + i = self._iFile.columns.values.tolist() + if 'Transactions' in i: + self._Database = self._iFile['Transactions'].tolist() + if isinstance(self._iFile, str): + if _ab._validators.url(self._iFile): + data = _ab._urlopen(self._iFile) + for line in data: + line.strip() + line = line.decode("utf-8") + temp = [i.rstrip() for i in line.split(self._sep)] + temp = [x for x in temp if x] + self._Database.append(temp) + else: + try: + with open(self._iFile, 'r', encoding='utf-8') as f: + for line in f: + line.strip() + temp = [i.rstrip() for i in line.split(self._sep)] + temp = [x for x in temp if x] + self._Database.append(temp) + except IOError: + print("File Not Found") + quit() + + def _getRatio(self, prefix: List[int], prefixLength: int, s: int) -> float: + """ + A Function to get itemSet Ratio + + :param prefix:the path + :type prefix: list + :param prefixLength: length + :type prefixLength:int + :s :current ratio + :type s:float + :return: minAllConf of prefix + :rtype:float + """ + maximums = 0 + for ele in range(prefixLength): + i = prefix[ele] + if maximums < self._mapSupport.get(i): + maximums = self._mapSupport.get(i) + return s / maximums + + def _correlatedOneItem(self) -> None: + """ + Generating One correlated item + """ + self._mapSupport = {} + for i in self._Database: + for j in i: + if j not in self._mapSupport: + self._mapSupport[j] = 1 + else: + self._mapSupport[j] += 1 + + def _saveItemSet(self, prefix, prefixLength, support) -> None: + """ + To save the correlated patterns mined form correlatedPatternTree + + :param prefix: the correlated pattern + :type prefix: list + :param prefixLength : the length of a correlated pattern + :type prefixLength : int + :param support: the support of a pattern + :type support : int + :return: None + + The correlated patterns were stored in a global variable finalPatterns + """ + all_conf = self._getRatio(prefix, prefixLength, support) + if all_conf < self._minAllConf: + return + l = [] + for i in range(prefixLength): + l.append(prefix[i]) + self._itemSetCount += 1 + self._finalPatterns[tuple(l)] = [support, all_conf] + + def _convert(self, value: Union[int, float, str]) -> None: + """ + To convert the type of user specified minSup value + + :param value: user specified minSup value + :return: None + """ + if type(value) is int: + value = int(value) + if type(value) is float: + value = (len(self._Database) * value) + if type(value) is str: + if '.' in value: + value = float(value) + value = (len(self._Database) * value) + else: + value = int(value) + return value + + def _saveAllCombinations(self, tempBuffer, s, position, prefix, prefixLength) -> None: + """ + Generating all the combinations for items in single branch in correlatedPatternTree + + :param tempBuffer: items in a single branch + :type tempBuffer: list + :param s : support at leaf node of a branch + :param position : the length of a tempBuffer + :type position : int + :param prefix : it represents the list of leaf node + :type prefix : list + :param prefixLength : the length of prefix + :type prefixLength :int + :return: None + """ + max1 = 1 << position + for i in range(1, max1): + newPrefixLength = prefixLength + for j in range(position): + isSet = i & (1 << j) + if isSet > 0: + prefix.insert(newPrefixLength, tempBuffer[j].itemId) + newPrefixLength += 1 + self._saveItemSet(prefix, newPrefixLength, s) + + def _correlatedPatternGrowthGenerate(self, correlatedPatternTree, prefix, prefixLength, mapSupport) -> None: + """ + Mining the fp tree + + :param correlatedPatternTree: it represents the correlatedPatternTree + :type correlatedPatternTree: class Tree + :param prefix : it represents an empty list and store the patterns that are mined + :type prefix : list + :param prefixLength : the length of prefix + :type prefixLength :int + :param mapSupport : it represents the support of item + :type mapSupport : dictionary + :return: None + """ + + singlePath = True + position = 0 + s = 0 + if len(correlatedPatternTree.root.child) > 1: + singlePath = False + else: + currentNode = correlatedPatternTree.root.child[0] + while True: + if len(currentNode.child) > 1: + singlePath = False + break + self._fpNodeTempBuffer.insert(position, currentNode) + s = currentNode.counter + position += 1 + if len(currentNode.child) == 0: + break + currentNode = currentNode.child[0] + if singlePath is True: + self._saveAllCombinations(self._fpNodeTempBuffer, s, position, prefix, prefixLength) + else: + for i in reversed(correlatedPatternTree.headerList): + item = i + support = mapSupport[i] + betaSupport = support + prefix.insert(prefixLength, item) + self._saveItemSet(prefix, prefixLength + 1, betaSupport) + if prefixLength + 1 < self._maxPatternLength: + prefixPaths = [] + path = correlatedPatternTree.mapItemNodes.get(item) + mapSupportBeta = {} + while path is not None: + if path.parent.itemId != -1: + prefixPath = [] + prefixPath.append(path) + pathCount = path.counter + parent1 = path.parent + while parent1.itemId != -1: + prefixPath.append(parent1) + if mapSupportBeta.get(parent1.itemId) is None: + mapSupportBeta[parent1.itemId] = pathCount + else: + mapSupportBeta[parent1.itemId] = mapSupportBeta[parent1.itemId] + pathCount + parent1 = parent1.parent + prefixPaths.append(prefixPath) + path = path.nodeLink + treeBeta = _Tree() + for k in prefixPaths: + treeBeta.addPrefixPath(k, mapSupportBeta, self._minSup) + if len(treeBeta.root.child) > 0: + treeBeta.createHeaderList(mapSupportBeta, self._minSup) + self._correlatedPatternGrowthGenerate(treeBeta, prefix, prefixLength + 1, mapSupportBeta) + + @deprecated( + "It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") + def startMine(self) -> None: + """ + main method to start + """ + self.mine() + + def mine(self) -> None: + """ + main method to start + """ + self._startTime = _ab._time.time() + if self._iFile is None: + raise Exception("Please enter the file path or file name:") + self._creatingItemSets() + self._minSup = self._convert(self._minSup) + self._tree = _Tree() + self._finalPatterns = {} + self._correlatedOneItem() + self._mapSupport = {k: v for k, v in self._mapSupport.items() if v >= self._minSup} + _itemSetBuffer = [k for k, v in sorted(self._mapSupport.items(), key=lambda x: x[1], reverse=True)] + for i in self._Database: + _transaction = [] + for j in i: + if j in _itemSetBuffer: + _transaction.append(j) + _transaction.sort(key=lambda val: self._mapSupport[val], reverse=True) + self._tree.addTransaction(_transaction) + self._tree.createHeaderList(self._mapSupport, self._minSup) + if len(self._tree.headerList) > 0: + self._itemSetBuffer = [] + self._correlatedPatternGrowthGenerate(self._tree, self._itemSetBuffer, 0, self._mapSupport) + print("Correlated patterns were generated successfully using CoMine algorithm") + self._endTime = _ab._time.time() + self._memoryUSS = float() + self._memoryRSS = float() + process = _ab._psutil.Process(_ab._os.getpid()) + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + + def getMemoryUSS(self) -> float: + """ + Total amount of USS memory consumed by the mining process will be retrieved from this function + + :return: returning USS memory consumed by the mining process + :rtype: float + """ + + return self._memoryUSS + + def getMemoryRSS(self) -> float: + """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + + :return: returning RSS memory consumed by the mining process + :rtype: float + """ + + return self._memoryRSS + + def getRuntime(self) -> float: + """ + Calculating the total amount of runtime taken by the mining process + + :return: returning total amount of runtime taken by the mining process + :rtype: float + """ + + return self._endTime - self._startTime + + def getPatternsAsDataFrame(self) -> _pd.DataFrame: + """ + Storing final correlated patterns in a dataframe + + :return: returning correlated patterns in a dataframe + :rtype: pd.DataFrame + """ + + dataframe = {} + data = [] + for a, b in self._finalPatterns.items(): + pat = " " + for i in a: + pat += str(i) + " " + data.append([pat, b[0], b[1]]) + dataframe = _ab._pd.DataFrame(data, columns=['Patterns', 'Support', 'Confidence']) + return dataframe + + def save(self, outFile) -> None: + """ + Complete set of correlated patterns will be saved into an output file + + :param outFile: name of the outputfile + :type outFile: file + :return: None + """ + self._oFile = outFile + writer = open(self._oFile, 'w+') + for x, y in self._finalPatterns.items(): + pat = "" + for i in x: + pat += str(i) + "\t" + patternsAndSupport = pat.strip() + ":" + str(y[0]) + ":" + str(y[1]) + writer.write("%s \n" % patternsAndSupport) + + def getPatterns(self) -> Dict[Tuple[int], List[Union[int, float]]]: + """ + Function to send the set of correlated patterns after completion of the mining process + + :return: returning correlated patterns + :rtype: dict + """ + return self._finalPatterns + + def printResults(self) -> None: + """ + function to print the result after completing the process + + :return: None + """ + print("Total number of Correlated Patterns:", len(self.getPatterns())) + print("Total Memory in USS:", self.getMemoryUSS()) + print("Total Memory in RSS", self.getMemoryRSS()) + print("Total ExecutionTime in ms:", self.getRuntime()) + + +if __name__ == "__main__": + print("Number of arguments:", len(_ab._sys.argv)) + print("Arguments:", _ab._sys.argv) + + if len(_ab._sys.argv) == 6: + iFile = _ab._sys.argv[1] + minSup = _ab._sys.argv[2] + minAllConf = float(_ab._sys.argv[3]) + oFile = _ab._sys.argv[4] + sep = _ab._sys.argv[5] + if sep == "\\t": + sep = "\t" + print("Input File:", iFile) + print("Minimum Support List:", minSup) + print("Minimum All Confidence:", minAllConf) + print("Output File:", oFile) + print("Separator:", sep) + _ap = CoMine(iFile=iFile, minSup=minSup, minAllConf=minAllConf, sep=sep) + _ap.mine() + _ap.save(oFile) + print("Total number of Frequent Patterns:", len(_ap.getPatterns())) + print("Total Memory in USS:", _ap.getMemoryUSS()) + print("Total Memory in RSS:", _ap.getMemoryRSS()) + print("Total ExecutionTime in ms:", _ap.getRuntime()) + elif len(_ab._sys.argv) == 5: + iFile = _ab._sys.argv[1] + minSup = _ab._sys.argv[2] + minAllConf = float(_ab._sys.argv[3]) + oFile = _ab._sys.argv[4] + + print("Input File:", iFile) + print("Minimum Support List:", minSup) + print("Minimum All Confidence:", minAllConf) + print("Output File:", oFile) + + _ap = CoMine(iFile=iFile, minSup=minSup, minAllConf=minAllConf) + _ap.mine() + _ap.save(oFile) + + print("Total number of Frequent Patterns:", len(_ap.getPatterns())) + print("Total Memory in USS:", _ap.getMemoryUSS()) + print("Total Memory in RSS:", _ap.getMemoryRSS()) + print("Total ExecutionTime in ms:", _ap.getRuntime()) + + + else: + raise WrongNumberOfArguments( + "Please provide four arguments: iFile, minSup and oFile \n""or Please provide five arguments: iFile, minSup, oFile and sep" + ) diff --git a/tests/frequentPattern/basic/ECLATDiffsetTest.ipynb b/tests/frequentPattern/basic/ECLATDiffsetTest.ipynb new file mode 100644 index 00000000..6234e821 --- /dev/null +++ b/tests/frequentPattern/basic/ECLATDiffsetTest.ipynb @@ -0,0 +1,653 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pami in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (2024.5.29.2)\n", + "Requirement already satisfied: psutil in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (5.9.8)\n", + "Requirement already satisfied: pandas in c:\\python310\\lib\\site-packages (from pami) (2.2.1)\n", + "Requirement already satisfied: plotly in c:\\python310\\lib\\site-packages (from pami) (5.18.0)\n", + "Requirement already satisfied: matplotlib in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (3.9.0)\n", + "Requirement already satisfied: resource in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (0.2.1)\n", + "Requirement already satisfied: validators in c:\\python310\\lib\\site-packages (from pami) (0.28.3)\n", + "Requirement already satisfied: urllib3 in c:\\python310\\lib\\site-packages (from pami) (2.2.1)\n", + "Requirement already satisfied: Pillow in c:\\python310\\lib\\site-packages (from pami) (10.3.0)\n", + "Requirement already satisfied: numpy in c:\\python310\\lib\\site-packages (from pami) (1.26.4)\n", + "Requirement already satisfied: sphinx in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (7.3.7)\n", + "Requirement already satisfied: sphinx-rtd-theme in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (2.0.0)\n", + "Requirement already satisfied: discord.py in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (2.3.2)\n", + "Requirement already satisfied: networkx in c:\\python310\\lib\\site-packages (from pami) (3.3)\n", + "Requirement already satisfied: deprecated in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (1.2.14)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in c:\\python310\\lib\\site-packages (from deprecated->pami) (1.16.0)\n", + "Requirement already satisfied: aiohttp<4,>=3.7.4 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from discord.py->pami) (3.9.5)\n", + "Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from matplotlib->pami) (1.2.1)\n", + "Requirement already satisfied: cycler>=0.10 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from matplotlib->pami) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in c:\\python310\\lib\\site-packages (from matplotlib->pami) (4.52.4)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in c:\\python310\\lib\\site-packages (from matplotlib->pami) (1.4.5)\n", + "Requirement already satisfied: packaging>=20.0 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from matplotlib->pami) (24.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in c:\\python310\\lib\\site-packages (from matplotlib->pami) (3.1.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in c:\\python310\\lib\\site-packages (from matplotlib->pami) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in c:\\python310\\lib\\site-packages (from pandas->pami) (2023.3.post1)\n", + "Requirement already satisfied: tzdata>=2022.7 in c:\\python310\\lib\\site-packages (from pandas->pami) (2023.4)\n", + "Requirement already satisfied: tenacity>=6.2.0 in c:\\python310\\lib\\site-packages (from plotly->pami) (8.2.3)\n", + "Requirement already satisfied: JsonForm>=0.0.2 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from resource->pami) (0.0.2)\n", + "Requirement already satisfied: JsonSir>=0.0.2 in c:\\python310\\lib\\site-packages (from resource->pami) (0.0.2)\n", + "Requirement already satisfied: python-easyconfig>=0.1.0 in c:\\python310\\lib\\site-packages (from resource->pami) (0.1.7)\n", + "Requirement already satisfied: sphinxcontrib-applehelp in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.0.8)\n", + "Requirement already satisfied: sphinxcontrib-devhelp in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.0.6)\n", + "Requirement already satisfied: sphinxcontrib-jsmath in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.0.1)\n", + "Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in c:\\python310\\lib\\site-packages (from sphinx->pami) (2.0.5)\n", + "Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.9 in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.1.10)\n", + "Requirement already satisfied: sphinxcontrib-qthelp in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.0.7)\n", + "Requirement already satisfied: Jinja2>=3.0 in c:\\python310\\lib\\site-packages (from sphinx->pami) (3.1.2)\n", + "Requirement already satisfied: Pygments>=2.14 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx->pami) (2.18.0)\n", + "Requirement already satisfied: docutils<0.22,>=0.18.1 in c:\\python310\\lib\\site-packages (from sphinx->pami) (0.20.1)\n", + "Requirement already satisfied: snowballstemmer>=2.0 in c:\\python310\\lib\\site-packages (from sphinx->pami) (2.2.0)\n", + "Requirement already satisfied: babel>=2.9 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx->pami) (2.15.0)\n", + "Requirement already satisfied: alabaster~=0.7.14 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx->pami) (0.7.16)\n", + "Requirement already satisfied: imagesize>=1.3 in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.4.1)\n", + "Requirement already satisfied: requests>=2.25.0 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx->pami) (2.32.2)\n", + "Requirement already satisfied: tomli>=2 in c:\\python310\\lib\\site-packages (from sphinx->pami) (2.0.1)\n", + "Requirement already satisfied: colorama>=0.4.5 in c:\\python310\\lib\\site-packages (from sphinx->pami) (0.4.6)\n", + "Requirement already satisfied: sphinxcontrib-jquery<5,>=4 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx-rtd-theme->pami) (4.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in c:\\python310\\lib\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in c:\\python310\\lib\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in c:\\python310\\lib\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.9.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (4.0.3)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in c:\\python310\\lib\\site-packages (from Jinja2>=3.0->sphinx->pami) (2.1.1)\n", + "Requirement already satisfied: jsonschema in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from JsonForm>=0.0.2->resource->pami) (4.22.0)\n", + "Requirement already satisfied: six>=1.5 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from python-dateutil>=2.7->matplotlib->pami) (1.16.0)\n", + "Requirement already satisfied: PyYAML in c:\\python310\\lib\\site-packages (from python-easyconfig>=0.1.0->resource->pami) (6.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\python310\\lib\\site-packages (from requests>=2.25.0->sphinx->pami) (3.0.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in c:\\python310\\lib\\site-packages (from requests>=2.25.0->sphinx->pami) (3.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from requests>=2.25.0->sphinx->pami) (2024.2.2)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (2023.12.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in c:\\python310\\lib\\site-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (0.18.1)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: Ignoring invalid distribution - (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -ip (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -p (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution - (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -ip (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -p (c:\\python310\\lib\\site-packages)\n" + ] + } + ], + "source": [ + "!pip install -U pami" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test cases for T10I4D100K Datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test case 1 for T10I4D100K Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLATDiffset algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLATDiffset 1500 237 333.679456 1138515968\n", + "1 ECLATDiffset 2000 155 152.367364 824270848\n", + "2 ECLATDiffset 2500 107 78.776027 622489600\n", + "3 ECLATDiffset 3000 60 24.674447 425213952\n", + "4 ECLATDiffset 3500 40 12.997767 341245952\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLATDiffset as alg\n", + "import pandas as pd\n", + "# Set the input parameters\n", + "URL = 'https://u-aizu.ac.jp/~udayrage/datasets/transactionalDatabases/Transactional_T10I4D100K.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [1500, 2000, 2500, 3000, 3500]\n", + "# Initialize a data frame to store the results of ECLATDiffset\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLATDiffset(URL, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['ECLATDiffset', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS(),]\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using ECLATDiffset algorithm\")\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test case 2 for T10I4D100K Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLATDiffset algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLATDiffset 1500 237 293.271607 1018994688\n", + "1 ECLATDiffset 2000 155 110.202376 844914688\n", + "2 ECLATDiffset 2500 107 57.827067 642691072\n", + "3 ECLATDiffset 3000 60 19.332573 444780544\n", + "4 ECLATDiffset 3500 40 8.569055 372994048\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLATDiffset as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_T10I4D100K.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [1500, 2000, 2500, 3000, 3500]\n", + "\n", + "# Initialize a data frame to store the results of ECLATDiffset\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLATDiffset(inputFile, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['ECLATDiffset', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using ECLATDiffset algorithm\")\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test case 3 for T10I4D100K Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLATDiffset algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLATDiffset 1500 237 287.429025 806281216\n", + "1 ECLATDiffset 2000 155 126.705970 945033216\n", + "2 ECLATDiffset 2500 107 61.042020 732131328\n", + "3 ECLATDiffset 3000 60 21.202885 546054144\n", + "4 ECLATDiffset 3500 40 9.498319 462487552\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLATDiffset as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_T10I4D100K.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [1500, 2000, 2500, 3000, 3500]\n", + "\n", + "# Read the CSV file into a DataFrame\n", + "df = pd.read_csv(inputFile, header=[0], sep=' ')\n", + "df = df.rename(columns={df.columns[0]: \"Transactions\"})\n", + "\n", + "# Ensure all data in the \"Transaction\" column is string before splitting\n", + "df[\"Transactions\"] = df[\"Transactions\"].astype(str)\n", + "\n", + "# Preprocess the dataset\n", + "# transactions = df[\"Transaction\"].apply(lambda x: x.split(separator)).tolist()\n", + "transactions = df['Transactions'].apply(lambda x: x.split(separator))\n", + "transactions = pd.DataFrame(transactions, columns=['Transactions'])\n", + "\n", + "# Initialize a data frame to store the results of ECLATDiffset\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLATDiffset(transactions, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " \n", + " # Get the patterns found\n", + " patterns = obj.getPatterns()\n", + " \n", + " # Store the results in the data frame\n", + " result.loc[len(result)] = ['ECLATDiffset', minSupCount, len(patterns), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(\"Frequent patterns were generated successfully using ECLATDiffset algorithm\")\n", + "print(result)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Cases for Retail Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 1 for Retail Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLATDiffset algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLATDiffset 500 468 274.229504 805797888\n", + "1 ECLATDiffset 1000 135 28.987289 914071552\n", + "2 ECLATDiffset 1500 68 9.670796 533106688\n", + "3 ECLATDiffset 2000 45 8.276896 394313728\n", + "4 ECLATDiffset 2500 34 6.801466 333340672\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLATDiffset as alg\n", + "import pandas as pd\n", + "# Set the input parameters\n", + "URL = 'https://u-aizu.ac.jp/~udayrage/datasets/transactionalDatabases/Transactional_retail.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [500, 1000, 1500, 2000, 2500]\n", + "# Initialize a data frame to store the results of ECLATDiffset\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLATDiffset(URL, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['ECLATDiffset', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS(),]\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using ECLATDiffset algorithm\")\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 2 for Retail Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLATDiffset algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLATDiffset 500 468 244.238339 3028475904\n", + "1 ECLATDiffset 1000 135 23.635539 1029160960\n", + "2 ECLATDiffset 1500 68 5.130960 657547264\n", + "3 ECLATDiffset 2000 45 3.080636 509980672\n", + "4 ECLATDiffset 2500 34 2.160706 456544256\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLATDiffset as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_retail.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [500, 1000, 1500, 2000, 2500]\n", + "\n", + "# Initialize a data frame to store the results of ECLATDiffset\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLATDiffset(inputFile, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['ECLATDiffset', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using ECLATDiffset algorithm\")\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 3 for Retail Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLATDiffset algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLATDiffset 500 468 242.327266 3014520832\n", + "1 ECLATDiffset 1000 135 21.199661 1033453568\n", + "2 ECLATDiffset 1500 68 4.248463 651497472\n", + "3 ECLATDiffset 2000 45 2.794304 513015808\n", + "4 ECLATDiffset 2500 34 1.727216 449040384\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLATDiffset as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_retail.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [500, 1000, 1500, 2000, 2500]\n", + "\n", + "# Read the CSV file into a DataFrame\n", + "df = pd.read_csv(inputFile, header=[0], sep=' ')\n", + "df = df.rename(columns={df.columns[0]: \"Transactions\"})\n", + "\n", + "# Ensure all data in the \"Transaction\" column is string before splitting\n", + "df[\"Transactions\"] = df[\"Transactions\"].astype(str)\n", + "\n", + "# Preprocess the dataset\n", + "# transactions = df[\"Transaction\"].apply(lambda x: x.split(separator)).tolist()\n", + "transactions = df['Transactions'].apply(lambda x: x.split(separator))\n", + "transactions = pd.DataFrame(transactions, columns=['Transactions'])\n", + "\n", + "# Initialize a data frame to store the results of ECLATDiffset\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLATDiffset(transactions, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " \n", + " # Get the patterns found\n", + " patterns = obj.getPatterns()\n", + " \n", + " # Store the results in the data frame\n", + " result.loc[len(result)] = ['ECLATDiffset', minSupCount, len(patterns), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(\"Frequent patterns were generated successfully using ECLATDiffset algorithm\")\n", + "print(result)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Cases for Chess Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 1 for chess Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLATDiffset algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLATDiffset 2500 11474 5.830794 483733504\n", + "1 ECLATDiffset 3000 152 2.120591 117514240\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLATDiffset as alg\n", + "import pandas as pd\n", + "# Set the input parameters\n", + "URL = 'https://u-aizu.ac.jp/~udayrage/datasets/transactionalDatabases/Transactional_chess.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [2500, 3000]\n", + "# Initialize a data frame to store the results of ECLATDiffset\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLATDiffset(URL, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['ECLATDiffset', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS(),]\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using ECLATDiffset algorithm\")\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 2 for chess Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLATDiffset algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLATDiffset 2500 11474 3.284947 484827136\n", + "1 ECLATDiffset 3000 152 0.085676 119513088\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLATDiffset as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_chess1.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [2500, 3000]\n", + "\n", + "# Initialize a data frame to store the results of ECLATDiffset\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLATDiffset(inputFile, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['ECLATDiffset', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using ECLATDiffset algorithm\")\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 3 for chess Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLAT Diffset algorithm\n", + "Frequent patterns were generated successfully using ECLATDiffset algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLATDiffset 2500 11401 3.016511 487473152\n", + "1 ECLATDiffset 3000 151 0.063394 126992384\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLATDiffset as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_chess1.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [2500, 3000]\n", + "\n", + "# Read the CSV file into a DataFrame\n", + "df = pd.read_csv(inputFile, header=[0], sep=' ')\n", + "df = df.rename(columns={df.columns[0]: \"Transactions\"})\n", + "\n", + "# Ensure all data in the \"Transaction\" column is string before splitting\n", + "df[\"Transactions\"] = df[\"Transactions\"].astype(str)\n", + "\n", + "# Preprocess the dataset\n", + "# transactions = df[\"Transaction\"].apply(lambda x: x.split(separator)).tolist()\n", + "transactions = df['Transactions'].apply(lambda x: x.split(separator))\n", + "transactions = pd.DataFrame(transactions, columns=['Transactions'])\n", + "\n", + "# Initialize a data frame to store the results of ECLATDiffset\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLATDiffset(transactions, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " \n", + " # Get the patterns found\n", + " patterns = obj.getPatterns()\n", + " \n", + " # Store the results in the data frame\n", + " result.loc[len(result)] = ['ECLATDiffset', minSupCount, len(patterns), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(\"Frequent patterns were generated successfully using ECLATDiffset algorithm\")\n", + "print(result)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/frequentPattern/basic/ECLATDiffsetTest.py b/tests/frequentPattern/basic/ECLATDiffsetTest.py new file mode 100644 index 00000000..fdab6259 --- /dev/null +++ b/tests/frequentPattern/basic/ECLATDiffsetTest.py @@ -0,0 +1,431 @@ +# ECLATDiffest uses diffset to extract the frequent patterns in a transactional database. +# +# **Importing this algorithm into a python program** +# +# import PAMI.frequentPattern.basic.ECLATDiffset as alg +# +# iFile = 'sampleDB.txt' +# +# minSup = 10 # can also be specified between 0 and 1 +# +# obj = alg.ECLATDiffset(iFile, minSup) +# +# obj.mine() +# +# frequentPatterns = obj.getPatterns() +# +# print("Total number of Frequent Patterns:", len(frequentPatterns)) +# +# obj.savePatterns(oFile) +# +# Df = obj.getPatternInDataFrame() +# +# memUSS = obj.getMemoryUSS() +# +# print("Total Memory in USS:", memUSS) +# +# memRSS = obj.getMemoryRSS() +# +# print("Total Memory in RSS", memRSS) +# +# run = obj.getRuntime() +# +# print("Total ExecutionTime in seconds:", run) +# + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" + +# from abstract import * + +from PAMI.frequentPattern.basic import abstract as _ab +from deprecated import deprecated + + +class WrongNumberOfArguments(Exception): + pass + +class ECLATDiffset(_ab._frequentPatterns): + """ + :**Description**: ECLATDiffset uses diffset to extract the frequent patterns in a transactional database. + + :**Reference**: KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining + August 2003 Pages 326–335 https://doi.org/10.1145/956750.956788 + + :**Parameters**: - **iFile** (*str or URL or dataFrame*) -- *Name of the Input file to mine complete set of frequent patterns.* + - **oFile** (*str*) -- *Name of the output file to store complete set of frequent patterns* + - **minSup** (*int or float or str*) -- *The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count.* + - **sep** (*str*) -- **This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.** + + :**Attributes**: - **startTime** (*float*) -- *To record the start time of the mining process.* + - **endTime** (*float*) -- *To record the end time of the mining process.* + - **finalPatterns** (*dict*) -- *Storing the complete set of patterns in a dictionary variable.* + - **memoryUSS** (*float*) -- *To store the total amount of USS memory consumed by the program.* + - **memoryRSS** *(float*) -- *To store the total amount of RSS memory consumed by the program.* + - **Database** (*list*) -- *To store the transactions of a database in list.* + + + Execution methods + ================= + + **Terminal command** + + .. code-block:: console + + Format: + + (.venv) $ python3 ECLATDiffset.py + + Example Usage: + + (.venv) $ python3 ECLATDiffset.py sampleDB.txt patterns.txt 10.0 + + .. note:: minSup can be specified in support count or a value between 0 and 1. + + + **Calling from a python program** + + .. code-block:: python + + import PAMI.frequentPattern.basic.ECLATDiffset as alg + + iFile = 'sampleDB.txt' + + minSup = 10 # can also be specified between 0 and 1 + + obj = alg.ECLATDiffset(iFile, minSup) + + obj.mine() + + frequentPatterns = obj.getPatterns() + + print("Total number of Frequent Patterns:", len(frequentPatterns)) + + obj.savePatterns(oFile) + + Df = obj.getPatternInDataFrame() + + memUSS = obj.getMemoryUSS() + + print("Total Memory in USS:", memUSS) + + memRSS = obj.getMemoryRSS() + + print("Total Memory in RSS", memRSS) + + run = obj.getRuntime() + + print("Total ExecutionTime in seconds:", run) + + + Credits: + ======== + + The complete program was written by Kundai and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran. + + """ + + _minSup = float() + _startTime = float() + _endTime = float() + _finalPatterns = {} + _iFile = " " + _oFile = " " + _sep = " " + _memoryUSS = float() + _memoryRSS = float() + _Database = [] + _diffSets = {} + _trans_set = set() + + def _creatingItemSets(self): + """ + Storing the complete transactions of the database/input file in a database variable + """ + self._Database = [] + if isinstance(self._iFile, _ab._pd.DataFrame): + if self._iFile.empty: + print("its empty..") + i = self._iFile.columns.values.tolist() + if 'Transactions' in i: + self._Database = self._iFile['Transactions'].tolist() + if isinstance(self._iFile, str): + if _ab._validators.url(self._iFile): + data = _ab._urlopen(self._iFile) + for line in data: + line.strip() + line = line.decode("utf-8") + temp = [i.rstrip() for i in line.split(self._sep)] + temp = [x for x in temp if x] + self._Database.append(temp) + else: + try: + with open(self._iFile, 'r', encoding='utf-8') as f: + for line in f: + line.strip() + temp = [i.rstrip() for i in line.split(self._sep)] + temp = [x for x in temp if x] + self._Database.append(temp) + except IOError: + print("File Not Found") + quit() + + def _convert(self, value): + """ + + To convert the user specified minSup value + + :param value: user specified minSup value + :return: converted type + """ + if type(value) is int: + value = int(value) + if type(value) is float: + value = (len(self._Database) * value) + if type(value) is str: + if '.' in value: + value = float(value) + value = (len(self._Database) * value) + else: + value = int(value) + return value + + def _getUniqueItemList(self): + + # tidSets will store all the initial tids + tidSets = {} + # uniqueItem will store all frequent 1 items + uniqueItem = [] + for line in self._Database: + transNum = 0 + # Database = [set([i.rstrip() for i in transaction.split('\t')]) for transaction in f] + for transaction in self._Database: + transNum += 1 + self._trans_set.add(transNum) + for item in transaction: + if item in tidSets: + tidSets[item].add(transNum) + else: + tidSets[item] = {transNum} + for key, value in tidSets.items(): + supp = len(value) + if supp >= self._minSup: + self._diffSets[key] = [supp, self._trans_set.difference(value)] + uniqueItem.append(key) + # for x, y in self._diffSets.items(): + # print(x, y) + uniqueItem.sort() + # print() + return uniqueItem + + def _runDeclat(self, candidateList): + """ + + It will generate the combinations of frequent items + + :param candidateList :it represents the items with their respective transaction identifiers + :type candidateList: list + :return: returning transaction dictionary + :rtype: dict + """ + + newList = [] + for i in range(0, len(candidateList)): + item1 = candidateList[i] + iList = item1.split() + for j in range(i + 1, len(candidateList)): + item2 = candidateList[j] + jList = item2.split() + if iList[:-1] == jList[:-1]: + unionDiffSet = self._diffSets[item2][1].difference(self._diffSets[item1][1]) + unionSup = self._diffSets[item1][0] - len(unionDiffSet) + if unionSup >= self._minSup: + newKey = item1 + "\t" + jList[-1] + self._diffSets[newKey] = [unionSup, unionDiffSet] + newList.append(newKey) + else: + break + + if len(newList) > 0: + self._runDeclat(newList) + + @deprecated( + "It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") + def startMine(self): + """ + Frequent pattern mining process will start from here + """ + self.mine() + + def mine(self): + """ + Frequent pattern mining process will start from here + """ + + self._startTime = _ab._time.time() + self._Database = [] + self._finalPatterns = {} + self._diffSets = {} + self._trans_set = set() + if self._iFile is None: + raise Exception("Please enter the file path or file name:") + if self._minSup is None: + raise Exception("Please enter the Minimum Support") + self._creatingItemSets() + # print(len(self._Database)) + self._minSup = self._convert(self._minSup) + uniqueItemList = [] + uniqueItemList = self._getUniqueItemList() + self._runDeclat(uniqueItemList) + self._finalPatterns = self._diffSets + # print(len(self._finalPatterns), len(uniqueItemList)) + self._endTime = _ab._time.time() + process = _ab._psutil.Process(_ab._os.getpid()) + self._memoryUSS = float() + self._memoryRSS = float() + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + print("Frequent patterns were generated successfully using ECLAT Diffset algorithm") + + def getMemoryUSS(self): + """ + + Total amount of USS memory consumed by the mining process will be retrieved from this function + + :return: returning USS memory consumed by the mining process + :rtype: float + """ + + return self._memoryUSS + + def getMemoryRSS(self): + """ + + Total amount of RSS memory consumed by the mining process will be retrieved from this function + + :return: returning RSS memory consumed by the mining process + :rtype: float + """ + + return self._memoryRSS + + def getRuntime(self): + """ + + Calculating the total amount of runtime taken by the mining process + + :return: returning total amount of runtime taken by the mining process + :rtype: float + """ + + return self._endTime - self._startTime + + def getPatternsAsDataFrame(self): + """ + + Storing final frequent patterns in a dataframe + + :return: returning frequent patterns in a dataframe + :rtype: pd.DataFrame + """ + + dataFrame = {} + data = [] + for a, b in self._finalPatterns.items(): + data.append([a.replace('\t', ' '), b[0]]) + dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + return dataFrame + + def save(self, outFile): + """ + + Complete set of frequent patterns will be loaded in to an output file + + :param outFile: name of the output file + :type outFile: csvfile + """ + self._oFile = outFile + writer = open(self._oFile, 'w+') + for x, y in self._finalPatterns.items(): + patternsAndSupport = x.strip() + ":" + str(y[0]) + writer.write("%s \n" % patternsAndSupport) + + def getPatterns(self): + """ + + Function to send the set of frequent patterns after completion of the mining process + + :return: returning frequent patterns + :rtype: dict + """ + return self._finalPatterns + + def printResults(self): + """ + This function is used to print the results + """ + print("Total number of Frequent Patterns:", len(self.getPatterns())) + print("Total Memory in USS:", self.getMemoryUSS()) + print("Total Memory in RSS", self.getMemoryRSS()) + print("Total ExecutionTime in ms:", self.getRuntime()) + + +if __name__ == "__main__": + print("Number of arguments:", len(_ab._sys.argv)) + print("Arguments:", _ab._sys.argv) + + if len(_ab._sys.argv) == 5: + iFile = _ab._sys.argv[1] + minSup = _ab._sys.argv[2] + oFile = _ab._sys.argv[3] + sep = _ab._sys.argv[4] + if sep == "\\t": + sep = "\t" + print("Input File:", iFile) + print("Minimum Support List:", minSup) + print("Output File:", oFile) + print("Separator:", sep) + _ap = ECLATDiffset(iFile=iFile, minSup=minSup, sep=sep) + _ap.mine() + _ap.save(oFile) + print("Total number of Frequent Patterns:", len(_ap.getPatterns())) + print("Total Memory in USS:", _ap.getMemoryUSS()) + print("Total Memory in RSS:", _ap.getMemoryRSS()) + print("Total ExecutionTime in ms:", _ap.getRuntime()) + elif len(_ab._sys.argv) == 4: + iFile = _ab._sys.argv[1] + minSup = _ab._sys.argv[2] + oFile = _ab._sys.argv[3] + + print("Input File:", iFile) + print("Minimum Support List:", minSup) + print("Output File:", oFile) + + _ap = ECLATDiffset(iFile=iFile, minSup=minSup) + _ap.mine() + _ap.save(oFile) + + print("Total number of Frequent Patterns:", len(_ap.getPatterns())) + print("Total Memory in USS:", _ap.getMemoryUSS()) + print("Total Memory in RSS:", _ap.getMemoryRSS()) + print("Total ExecutionTime in ms:", _ap.getRuntime()) + + + else: + raise WrongNumberOfArguments( + "Please provide three arguments: iFile, minSup and oFile \n""or Please provide four arguments: iFile, minSup, oFile and sep") diff --git a/tests/frequentPattern/basic/ECLATTest.ipynb b/tests/frequentPattern/basic/ECLATTest.ipynb new file mode 100644 index 00000000..005abf76 --- /dev/null +++ b/tests/frequentPattern/basic/ECLATTest.ipynb @@ -0,0 +1,665 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pami in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (2024.5.29.2)\n", + "Requirement already satisfied: networkx in c:\\python310\\lib\\site-packages (from pami) (3.3)\n", + "Requirement already satisfied: sphinx in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (7.3.7)\n", + "Requirement already satisfied: discord.py in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (2.3.2)\n", + "Requirement already satisfied: deprecated in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (1.2.14)\n", + "Requirement already satisfied: psutil in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (5.9.8)\n", + "Requirement already satisfied: resource in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (0.2.1)\n", + "Requirement already satisfied: validators in c:\\python310\\lib\\site-packages (from pami) (0.28.3)\n", + "Requirement already satisfied: sphinx-rtd-theme in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (2.0.0)\n", + "Requirement already satisfied: Pillow in c:\\python310\\lib\\site-packages (from pami) (10.3.0)\n", + "Requirement already satisfied: plotly in c:\\python310\\lib\\site-packages (from pami) (5.18.0)\n", + "Requirement already satisfied: matplotlib in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from pami) (3.9.0)\n", + "Requirement already satisfied: pandas in c:\\python310\\lib\\site-packages (from pami) (2.2.1)\n", + "Requirement already satisfied: numpy in c:\\python310\\lib\\site-packages (from pami) (1.26.4)\n", + "Requirement already satisfied: urllib3 in c:\\python310\\lib\\site-packages (from pami) (2.2.1)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in c:\\python310\\lib\\site-packages (from deprecated->pami) (1.16.0)\n", + "Requirement already satisfied: aiohttp<4,>=3.7.4 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from discord.py->pami) (3.9.5)\n", + "Requirement already satisfied: fonttools>=4.22.0 in c:\\python310\\lib\\site-packages (from matplotlib->pami) (4.52.4)\n", + "Requirement already satisfied: packaging>=20.0 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from matplotlib->pami) (24.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from matplotlib->pami) (1.2.1)\n", + "Requirement already satisfied: python-dateutil>=2.7 in c:\\python310\\lib\\site-packages (from matplotlib->pami) (2.8.2)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in c:\\python310\\lib\\site-packages (from matplotlib->pami) (3.1.2)\n", + "Requirement already satisfied: cycler>=0.10 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from matplotlib->pami) (0.12.1)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in c:\\python310\\lib\\site-packages (from matplotlib->pami) (1.4.5)\n", + "Requirement already satisfied: tzdata>=2022.7 in c:\\python310\\lib\\site-packages (from pandas->pami) (2023.4)\n", + "Requirement already satisfied: pytz>=2020.1 in c:\\python310\\lib\\site-packages (from pandas->pami) (2023.3.post1)\n", + "Requirement already satisfied: tenacity>=6.2.0 in c:\\python310\\lib\\site-packages (from plotly->pami) (8.2.3)\n", + "Requirement already satisfied: JsonForm>=0.0.2 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from resource->pami) (0.0.2)\n", + "Requirement already satisfied: python-easyconfig>=0.1.0 in c:\\python310\\lib\\site-packages (from resource->pami) (0.1.7)\n", + "Requirement already satisfied: JsonSir>=0.0.2 in c:\\python310\\lib\\site-packages (from resource->pami) (0.0.2)\n", + "Requirement already satisfied: snowballstemmer>=2.0 in c:\\python310\\lib\\site-packages (from sphinx->pami) (2.2.0)\n", + "Requirement already satisfied: sphinxcontrib-jsmath in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.0.1)\n", + "Requirement already satisfied: imagesize>=1.3 in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.4.1)\n", + "Requirement already satisfied: Jinja2>=3.0 in c:\\python310\\lib\\site-packages (from sphinx->pami) (3.1.2)\n", + "Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in c:\\python310\\lib\\site-packages (from sphinx->pami) (2.0.5)\n", + "Requirement already satisfied: docutils<0.22,>=0.18.1 in c:\\python310\\lib\\site-packages (from sphinx->pami) (0.20.1)\n", + "Requirement already satisfied: babel>=2.9 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx->pami) (2.15.0)\n", + "Requirement already satisfied: requests>=2.25.0 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx->pami) (2.32.2)\n", + "Requirement already satisfied: sphinxcontrib-applehelp in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.0.8)\n", + "Requirement already satisfied: colorama>=0.4.5 in c:\\python310\\lib\\site-packages (from sphinx->pami) (0.4.6)\n", + "Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.9 in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.1.10)\n", + "Requirement already satisfied: sphinxcontrib-devhelp in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.0.6)\n", + "Requirement already satisfied: sphinxcontrib-qthelp in c:\\python310\\lib\\site-packages (from sphinx->pami) (1.0.7)\n", + "Requirement already satisfied: tomli>=2 in c:\\python310\\lib\\site-packages (from sphinx->pami) (2.0.1)\n", + "Requirement already satisfied: Pygments>=2.14 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx->pami) (2.18.0)\n", + "Requirement already satisfied: alabaster~=0.7.14 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx->pami) (0.7.16)\n", + "Requirement already satisfied: sphinxcontrib-jquery<5,>=4 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from sphinx-rtd-theme->pami) (4.1)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in c:\\python310\\lib\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.4.1)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (4.0.3)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in c:\\python310\\lib\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.9.4)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in c:\\python310\\lib\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (6.0.5)\n", + "Requirement already satisfied: attrs>=17.3.0 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (23.2.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from aiohttp<4,>=3.7.4->discord.py->pami) (1.3.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in c:\\python310\\lib\\site-packages (from Jinja2>=3.0->sphinx->pami) (2.1.1)\n", + "Requirement already satisfied: jsonschema in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from JsonForm>=0.0.2->resource->pami) (4.22.0)\n", + "Requirement already satisfied: six>=1.5 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from python-dateutil>=2.7->matplotlib->pami) (1.16.0)\n", + "Requirement already satisfied: PyYAML in c:\\python310\\lib\\site-packages (from python-easyconfig>=0.1.0->resource->pami) (6.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\python310\\lib\\site-packages (from requests>=2.25.0->sphinx->pami) (3.0.1)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from requests>=2.25.0->sphinx->pami) (2024.2.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in c:\\python310\\lib\\site-packages (from requests>=2.25.0->sphinx->pami) (3.7)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in c:\\python310\\lib\\site-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (0.18.1)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (2023.12.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in c:\\users\\aechx\\appdata\\roaming\\python\\python310\\site-packages (from jsonschema->JsonForm>=0.0.2->resource->pami) (0.35.1)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: Ignoring invalid distribution -ip (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution - (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -ip (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution - (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -ip (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution - (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -ip (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution - (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -ip (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution - (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -ip (c:\\python310\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution - (c:\\python310\\lib\\site-packages)\n", + "WARNING: You are using pip version 22.0.4; however, version 24.0 is available.\n", + "You should consider upgrading via the 'C:\\Python310\\python.exe -m pip install --upgrade pip' command.\n" + ] + } + ], + "source": [ + "!pip install -U pami" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test cases T10I4D100K Datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test case 1 for T10I4D100K Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLAT 500 1072 10.576422 264699904\n", + "1 ECLAT 1000 385 7.181708 244805632\n", + "2 ECLAT 1500 237 6.447231 232861696\n", + "3 ECLAT 2000 155 7.549860 219901952\n", + "4 ECLAT 2500 107 4.145388 214679552\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLAT as alg\n", + "import pandas as pd\n", + "# Set the input parameters\n", + "URL = 'https://u-aizu.ac.jp/~udayrage/datasets/transactionalDatabases/Transactional_T10I4D100K.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [500, 1000, 1500, 2000, 2500]\n", + "# Initialize a data frame to store the results of ECLAT\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLAT(URL, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['ECLAT', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS(),]\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using ECLAT algorithm\")\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test case 2 for T10I4D100K Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLAT 500 1072 6.536374 264773632\n", + "1 ECLAT 1000 385 5.514250 244793344\n", + "2 ECLAT 1500 237 2.417863 233717760\n", + "3 ECLAT 2000 155 1.515320 219537408\n", + "4 ECLAT 2500 107 1.047624 215359488\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLAT as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_T10I4D100K.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [500, 1000, 1500, 2000, 2500]\n", + "\n", + "# Initialize a data frame to store the results of ECLAT\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLAT(inputFile, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['ECLAT', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using ECLAT algorithm\")\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test case 3 for T10I4D100K Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLAT 500 1072 6.981791 307707904\n", + "1 ECLAT 1000 385 3.813990 286265344\n", + "2 ECLAT 1500 237 2.092535 280530944\n", + "3 ECLAT 2000 155 1.278450 269508608\n", + "4 ECLAT 2500 107 0.886711 263536640\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLAT as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_T10I4D100K.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [500, 1000, 1500, 2000, 2500]\n", + "\n", + "# Read the CSV file into a DataFrame\n", + "df = pd.read_csv(inputFile, header=[0], sep=' ')\n", + "df = df.rename(columns={df.columns[0]: \"Transactions\"})\n", + "\n", + "# Ensure all data in the \"Transaction\" column is string before splitting\n", + "df[\"Transactions\"] = df[\"Transactions\"].astype(str)\n", + "\n", + "# Preprocess the dataset\n", + "# transactions = df[\"Transaction\"].apply(lambda x: x.split(separator)).tolist()\n", + "transactions = df['Transactions'].apply(lambda x: x.split(separator))\n", + "transactions = pd.DataFrame(transactions, columns=['Transactions'])\n", + "\n", + "# Initialize a data frame to store the results of ECLAT\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLAT(transactions, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " \n", + " # Get the patterns found\n", + " patterns = obj.getPatterns()\n", + " \n", + " # Store the results in the data frame\n", + " result.loc[len(result)] = ['ECLAT', minSupCount, len(patterns), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(\"Frequent patterns were generated successfully using ECLAT algorithm\")\n", + "print(result)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Cases Retail Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 1 for Retail Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLAT 500 468 5.637216 316071936\n", + "1 ECLAT 1000 135 4.174073 306638848\n", + "2 ECLAT 1500 68 4.112242 307113984\n", + "3 ECLAT 2000 45 3.880633 301191168\n", + "4 ECLAT 2500 34 4.444733 301608960\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLAT as alg\n", + "import pandas as pd\n", + "# Set the input parameters\n", + "URL = 'https://u-aizu.ac.jp/~udayrage/datasets/transactionalDatabases/Transactional_retail.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [500, 1000, 1500, 2000, 2500]\n", + "\n", + "# Initialize a data frame to store the results of ECLAT\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLAT(URL, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['ECLAT', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using ECLAT algorithm\")\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 2 for Retail Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLAT 500 468 1.081722 317181952\n", + "1 ECLAT 1000 135 0.668655 307216384\n", + "2 ECLAT 1500 68 0.585485 307830784\n", + "3 ECLAT 2000 45 0.600223 300044288\n", + "4 ECLAT 2500 34 0.782643 300445696\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLAT as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_retail.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [500, 1000, 1500, 2000, 2500]\n", + "\n", + "# Initialize a data frame to store the results of ECLAT\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLAT(inputFile, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['ECLAT', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using ECLAT algorithm\")\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 3 for Retail Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLAT 500 468 0.764652 285491200\n", + "1 ECLAT 1000 135 0.315166 272891904\n", + "2 ECLAT 1500 68 0.248909 273596416\n", + "3 ECLAT 2000 45 0.245186 273661952\n", + "4 ECLAT 2500 34 0.239663 273735680\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLAT as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_retail.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [500, 1000, 1500, 2000, 2500]\n", + "\n", + "# Read the CSV file into a DataFrame\n", + "df = pd.read_csv(inputFile, header=[0], sep=' ')\n", + "df = df.rename(columns={df.columns[0]: \"Transactions\"})\n", + "\n", + "# Ensure all data in the \"Transaction\" column is string before splitting\n", + "df[\"Transactions\"] = df[\"Transactions\"].astype(str)\n", + "\n", + "# Preprocess the dataset\n", + "# transactions = df[\"Transaction\"].apply(lambda x: x.split(separator)).tolist()\n", + "transactions = df['Transactions'].apply(lambda x: x.split(separator))\n", + "transactions = pd.DataFrame(transactions, columns=['Transactions'])\n", + "\n", + "# Initialize a data frame to store the results of ECLAT\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLAT(transactions, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " \n", + " # Get the patterns found\n", + " patterns = obj.getPatterns()\n", + " \n", + " # Store the results in the data frame\n", + " result.loc[len(result)] = ['ECLAT', minSupCount, len(patterns), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(\"Frequent patterns were generated successfully using ECLAT algorithm\")\n", + "print(result)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Cases for Chess Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 1 for chess Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLAT 2500 11474 4.202990 1750896640\n", + "1 ECLAT 3000 152 2.192795 263335936\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLAT as alg\n", + "import pandas as pd\n", + "# Set the input parameters\n", + "URL = 'https://u-aizu.ac.jp/~udayrage/datasets/transactionalDatabases/Transactional_chess.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [2500, 3000]\n", + "# Initialize a data frame to store the results of ECLAT\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLAT(URL, minSup=minSupCount, sep=separator,)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['ECLAT', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS(),]\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using ECLAT algorithm\")\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 2 for chess Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLAT 2500 11474 2.007263 1749270528\n", + "1 ECLAT 3000 152 0.126143 262852608\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLAT as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_chess1.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [2500, 3000]\n", + "\n", + "# Initialize a data frame to store the results of ECLAT\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLAT(inputFile, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " # Store the results in the data frame\n", + " result.loc[result.shape[0]] = ['ECLAT', minSupCount, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(f\"Frequent patterns were generated successfully using ECLAT algorithm\")\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Case 3 for chess Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + "Frequent patterns were generated successfully using ECLAT algorithm\n", + " algorithm minSup patterns runtime memory\n", + "0 ECLAT 2500 11401 2.706934 1671409664\n", + "1 ECLAT 3000 151 0.078167 195690496\n" + ] + } + ], + "source": [ + "import PAMI.frequentPattern.basic.ECLAT as alg\n", + "import pandas as pd\n", + "\n", + "# Set the input parameters\n", + "inputFile = 'Transactional_chess1.csv'\n", + "separator = '\\t'\n", + "minimumSupportCountList = [2500, 3000]\n", + "\n", + "# Read the CSV file into a DataFrame\n", + "df = pd.read_csv(inputFile, header=[0], sep=' ')\n", + "df = df.rename(columns={df.columns[0]: \"Transactions\"})\n", + "\n", + "# Ensure all data in the \"Transaction\" column is string before splitting\n", + "df[\"Transactions\"] = df[\"Transactions\"].astype(str)\n", + "\n", + "# Preprocess the dataset\n", + "# transactions = df[\"Transaction\"].apply(lambda x: x.split(separator)).tolist()\n", + "transactions = df['Transactions'].apply(lambda x: x.split(separator))\n", + "transactions = pd.DataFrame(transactions, columns=['Transactions'])\n", + "\n", + "# Initialize a data frame to store the results of ECLAT\n", + "result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])\n", + "\n", + "# Execute the algorithm at different minSup values\n", + "for minSupCount in minimumSupportCountList:\n", + " obj = alg.ECLAT(transactions, minSup=minSupCount, sep=separator)\n", + " obj.mine()\n", + " \n", + " # Get the patterns found\n", + " patterns = obj.getPatterns()\n", + " \n", + " # Store the results in the data frame\n", + " result.loc[len(result)] = ['ECLAT', minSupCount, len(patterns), obj.getRuntime(), obj.getMemoryRSS()]\n", + "\n", + "# Print the results\n", + "print(\"Frequent patterns were generated successfully using ECLAT algorithm\")\n", + "print(result)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/frequentPattern/basic/ECLATTest.py b/tests/frequentPattern/basic/ECLATTest.py new file mode 100644 index 00000000..7ae8c90c --- /dev/null +++ b/tests/frequentPattern/basic/ECLATTest.py @@ -0,0 +1,449 @@ +# ECLAT is one of the fundamental algorithm to discover frequent patterns in a transactional database. +# +# **Importing this algorithm into a python program** +# ------------------------------------------------------------------ +# +# import PAMI.frequentPattern.basic.ECLAT as alg +# +# obj = alg.ECLAT(iFile, minSup) +# +# obj.mine() +# +# frequentPatterns = obj.getPatterns() +# +# print("Total number of Frequent Patterns:", len(frequentPatterns)) +# +# obj.save(oFile) +# +# Df = obj.getPatternInDataFrame() +# +# memUSS = obj.getMemoryUSS() +# +# print("Total Memory in USS:", memUSS) +# +# memRSS = obj.getMemoryRSS() +# +# print("Total Memory in RSS", memRSS) +# +# run = obj.getRuntime() +# +# print("Total ExecutionTime in seconds:", run) +# + + + + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" + +from PAMI.frequentPattern.basic import abstract as _ab +from typing import List, Dict, Tuple, Set, Union, Any, Generator +from deprecated import deprecated + + +class WrongNumberOfArguments(Exception): + pass + +class ECLAT(_ab._frequentPatterns): + """ + :Description: ECLAT is one of the fundamental algorithm to discover frequent patterns in a transactional database. + + :Reference: Mohammed Javeed Zaki: Scalable Algorithms for Association Mining. IEEE Trans. Knowl. Data Eng. 12(3): + 372-390 (2000), https://ieeexplore.ieee.org/document/846291 + + :param iFile: str : + Name of the Input file to mine complete set of frequent pattern's + :param oFile: str : + Name of the output file to store complete set of frequent patterns + :param minSup: int or float or str : + The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. + :param sep: str : + This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator. + + :Attributes: + + startTime : float + To record the start time of the mining process + + endTime : float + To record the completion time of the mining process + + finalPatterns : dict + Storing the complete set of patterns in a dictionary variable + + memoryUSS : float + To store the total amount of USS memory consumed by the program + + memoryRSS : float + To store the total amount of RSS memory consumed by the program + + Database : list + To store the transactions of a database in list + + + **Methods to execute code on terminal** + ------------------------------------------ + + .. code-block:: console + + Format: + + (.venv) $ python3 ECLAT.py + + Example Usage: + + (.venv) $ python3 ECLAT.py sampleDB.txt patterns.txt 10.0 + + .. note:: minSup will be considered in percentage of database transactions + + + **Importing this algorithm into a python program** + ------------------------------------------------------------------ + .. code-block:: python + + import PAMI.frequentPattern.basic.ECLAT as alg + + obj = alg.ECLAT(iFile, minSup) + + obj.mine() + + frequentPatterns = obj.getPatterns() + + print("Total number of Frequent Patterns:", len(frequentPatterns)) + + obj.save(oFile) + + Df = obj.getPatternInDataFrame() + + memUSS = obj.getMemoryUSS() + + print("Total Memory in USS:", memUSS) + + memRSS = obj.getMemoryRSS() + + print("Total Memory in RSS", memRSS) + + run = obj.getRuntime() + + print("Total ExecutionTime in seconds:", run) + + + **Credits:** + ---------------------- + + The complete program was written by Kundai under the supervision of Professor Rage Uday Kiran. + + """ + + _minSup = float() + _startTime = float() + _endTime = float() + _finalPatterns = {} + _iFile = " " + _oFile = " " + _sep = " " + _memoryUSS = float() + _memoryRSS = float() + _Database = [] + + def _creatingItemSets(self) -> float: + """ + Storing the complete transactions of the database/input file in a database variable + + :return: the complete transactions of the database/input file in a database variable + + :rtype: float + """ + self._Database = [] + if isinstance(self._iFile, _ab._pd.DataFrame): + if self._iFile.empty: + print("its empty..") + i = self._iFile.columns.values.tolist() + if 'Transactions' in i: + self._Database = self._iFile['Transactions'].tolist() + if isinstance(self._iFile, str): + if _ab._validators.url(self._iFile): + data = _ab._urlopen(self._iFile) + for line in data: + line.strip() + line = line.decode("utf-8") + temp = [i.rstrip() for i in line.split(self._sep)] + temp = [x for x in temp if x] + self._Database.append(temp) + else: + try: + with open(self._iFile, 'r', encoding='utf-8') as f: + for line in f: + line.strip() + temp = [i.rstrip() for i in line.split(self._sep)] + temp = [x for x in temp if x] + self._Database.append(temp) + except IOError: + print("File Not Found") + quit() + + def _getUniqueItemList(self) -> list: + """ + + Generating one frequent patterns + + :return: list of unique patterns + + :rtype: list + + """ + self._finalPatterns = {} + candidate = {} + uniqueItem = [] + for i in range(len(self._Database)): + for j in range(len(self._Database[i])): + if self._Database[i][j] not in candidate: + candidate[self._Database[i][j]] = {i} + else: + candidate[self._Database[i][j]].add(i) + for key, value in candidate.items(): + supp = len(value) + if supp >= self._minSup: + self._finalPatterns[key] = [value] + uniqueItem.append(key) + uniqueItem.sort() + return uniqueItem + + def _generateFrequentPatterns(self, candidateFrequent: list) -> None: + """ + + It will generate the combinations of frequent items + + :param candidateFrequent :it represents the items with their respective transaction identifiers + + :type candidateFrequent: list + + :return: None + + """ + new_freqList = [] + for i in range(0, len(candidateFrequent)): + item1 = candidateFrequent[i] + i1_list = item1.split() + for j in range(i + 1, len(candidateFrequent)): + item2 = candidateFrequent[j] + i2_list = item2.split() + if i1_list[:-1] == i2_list[:-1]: + interSet = self._finalPatterns[item1][0].intersection(self._finalPatterns[item2][0]) + if len(interSet) >= self._minSup: + newKey = item1 + "\t" + i2_list[-1] + self._finalPatterns[newKey] = [interSet] + new_freqList.append(newKey) + else: break + + if len(new_freqList) > 0: + self._generateFrequentPatterns(new_freqList) + + def _convert(self, value) -> float: + """ + + To convert the user specified minSup value + + :param value: user specified minSup value + + :return: converted type + + :rtype: float + + """ + if type(value) is int: + value = int(value) + if type(value) is float: + value = (len(self._Database) * value) + if type(value) is str: + if '.' in value: + value = float(value) + value = (len(self._Database) * value) + else: + value = int(value) + return value + + @deprecated("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") + def startMine(self) -> None: + """ + Frequent pattern mining process will start from here + """ + + self.mine() + + def mine(self) -> None: + """ + Frequent pattern mining process will start from here + """ + + self._startTime = _ab._time.time() + if self._iFile is None: + raise Exception("Please enter the file path or file name:") + if self._minSup is None: + raise Exception("Please enter the Minimum Support") + self._creatingItemSets() + self._minSup = self._convert(self._minSup) + uniqueItemList = self._getUniqueItemList() + self._generateFrequentPatterns(uniqueItemList) + for x, y in self._finalPatterns.items(): + self._finalPatterns[x] = len(y[0]) + self._endTime = _ab._time.time() + process = _ab._psutil.Process(_ab._os.getpid()) + self._memoryUSS = float() + self._memoryRSS = float() + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + print("Frequent patterns were generated successfully using ECLAT algorithm") + + def getMemoryUSS(self) -> float: + """ + + Total amount of USS memory consumed by the mining process will be retrieved from this function + + :return: returning USS memory consumed by the mining process + + :rtype: float + + """ + + return self._memoryUSS + + def getMemoryRSS(self) -> float: + """ + + Total amount of RSS memory consumed by the mining process will be retrieved from this function + + :return: returning RSS memory consumed by the mining process + + :rtype: float + + """ + + return self._memoryRSS + + def getRuntime(self) -> float: + """ + Calculating the total amount of runtime taken by the mining process + + :return: returning total amount of runtime taken by the mining process + + :rtype: float + """ + + return self._endTime - self._startTime + + def getPatternsAsDataFrame(self) -> _ab._pd.DataFrame: + """ + + Storing final frequent patterns in a dataframe + + :return: returning frequent patterns in a dataframe + + :rtype: pd.DataFrame + + """ + + dataFrame = {} + data = [] + for a, b in self._finalPatterns.items(): + data.append([a.replace('\t', ' '), b]) + dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + return dataFrame + + def save(self, outFile: str) -> None: + """ + + Complete set of frequent patterns will be loaded in to an output file + + :param outFile: name of the output file + + :type outFile: csvfile + + :return: None + + """ + self._oFile = outFile + writer = open(self._oFile, 'w+') + for x, y in self._finalPatterns.items(): + patternsAndSupport = x.strip() + ":" + str(y) + writer.write("%s \n" % patternsAndSupport) + + def getPatterns(self) -> dict: + """ + Function to send the set of frequent patterns after completion of the mining process + + :return: returning frequent patterns + + :rtype: dict + """ + return self._finalPatterns + + def printResults(self) -> None: + """ + Function used to print the results + """ + print("Total number of Frequent Patterns:", len(self.getPatterns())) + print("Total Memory in USS:", self.getMemoryUSS()) + print("Total Memory in RSS", self.getMemoryRSS()) + print("Total ExecutionTime in ms:", self.getRuntime()) + + +if __name__ == "__main__": + print("Number of arguments:", len(_ab._sys.argv)) + print("Arguments:", _ab._sys.argv) + + if len(_ab._sys.argv) == 5: + iFile = _ab._sys.argv[1] + minSup = _ab._sys.argv[2] + oFile = _ab._sys.argv[3] + sep = _ab._sys.argv[4] + if sep == "\\t": + sep = "\t" + print("Input File:", iFile) + print("Minimum Support List:", minSup) + print("Output File:", oFile) + print("Separator:", sep) + _ap = ECLAT(iFile=iFile, minSup=minSup, sep=sep) + _ap.mine() + _ap.save(oFile) + print("Total number of Frequent Patterns:", len(_ap.getPatterns())) + print("Total Memory in USS:", _ap.getMemoryUSS()) + print("Total Memory in RSS:", _ap.getMemoryRSS()) + print("Total ExecutionTime in ms:", _ap.getRuntime()) + elif len(_ab._sys.argv) == 4: + iFile = _ab._sys.argv[1] + minSup = _ab._sys.argv[2] + oFile = _ab._sys.argv[3] + + print("Input File:", iFile) + print("Minimum Support List:", minSup) + print("Output File:", oFile) + + _ap = ECLAT(iFile=iFile, minSup=minSup) + _ap.mine() + _ap.save(oFile) + + print("Total number of Frequent Patterns:", len(_ap.getPatterns())) + print("Total Memory in USS:", _ap.getMemoryUSS()) + print("Total Memory in RSS:", _ap.getMemoryRSS()) + print("Total ExecutionTime in ms:", _ap.getRuntime()) + + + else: + raise WrongNumberOfArguments( + "Please provide three arguments: iFile, minSup and oFile \n""or Please provide four arguments: iFile, minSup, oFile and sep") \ No newline at end of file