diff --git a/docs/sources/user_guide/frequent_patterns/association_rules.ipynb b/docs/sources/user_guide/frequent_patterns/association_rules.ipynb index 856d44649..4a4156fda 100644 --- a/docs/sources/user_guide/frequent_patterns/association_rules.ipynb +++ b/docs/sources/user_guide/frequent_patterns/association_rules.ipynb @@ -209,9 +209,114 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
supportitemsets
01.0(Kidney Beans)
10.8(Eggs)
20.6(Yogurt)
30.6(Milk)
40.6(Onion)
50.8(Kidney Beans, Eggs)
60.6(Kidney Beans, Yogurt)
70.6(Kidney Beans, Milk)
80.6(Onion, Eggs)
90.6(Kidney Beans, Onion)
100.6(Kidney Beans, Onion, Eggs)
\n", + "
" + ], + "text/plain": [ + " support itemsets\n", + "0 1.0 (Kidney Beans)\n", + "1 0.8 (Eggs)\n", + "2 0.6 (Yogurt)\n", + "3 0.6 (Milk)\n", + "4 0.6 (Onion)\n", + "5 0.8 (Kidney Beans, Eggs)\n", + "6 0.6 (Kidney Beans, Yogurt)\n", + "7 0.6 (Kidney Beans, Milk)\n", + "8 0.6 (Onion, Eggs)\n", + "9 0.6 (Kidney Beans, Onion)\n", + "10 0.6 (Kidney Beans, Onion, Eggs)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import pandas as pd\n", "from mlxtend.preprocessing import TransactionEncoder\n", @@ -245,13 +350,316 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/marcelo/anaconda3/envs/analysis/lib/python3.10/site-packages/mlxtend/frequent_patterns/association_rules.py:182: RuntimeWarning: invalid value encountered in divide\n", + " cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftrepresentativityleverageconvictionzhangs_metricjaccardcertaintykulczynski
0(Kidney Beans)(Eggs)1.00.80.80.801.001.00.001.00.00.800.0000.900
1(Eggs)(Kidney Beans)0.81.00.81.001.001.00.00inf0.00.800.0000.900
2(Yogurt)(Kidney Beans)0.61.00.61.001.001.00.00inf0.00.600.0000.800
3(Milk)(Kidney Beans)0.61.00.61.001.001.00.00inf0.00.600.0000.800
4(Onion)(Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.875
5(Eggs)(Onion)0.80.60.60.751.251.00.121.61.00.750.3750.875
6(Onion)(Kidney Beans)0.61.00.61.001.001.00.00inf0.00.600.0000.800
7(Kidney Beans, Onion)(Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.875
8(Kidney Beans, Eggs)(Onion)0.80.60.60.751.251.00.121.61.00.750.3750.875
9(Onion, Eggs)(Kidney Beans)0.61.00.61.001.001.00.00inf0.00.600.0000.800
10(Onion)(Kidney Beans, Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.875
11(Eggs)(Kidney Beans, Onion)0.80.60.60.751.251.00.121.61.00.750.3750.875
\n", + "
" + ], + "text/plain": [ + " antecedents consequents antecedent support \\\n", + "0 (Kidney Beans) (Eggs) 1.0 \n", + "1 (Eggs) (Kidney Beans) 0.8 \n", + "2 (Yogurt) (Kidney Beans) 0.6 \n", + "3 (Milk) (Kidney Beans) 0.6 \n", + "4 (Onion) (Eggs) 0.6 \n", + "5 (Eggs) (Onion) 0.8 \n", + "6 (Onion) (Kidney Beans) 0.6 \n", + "7 (Kidney Beans, Onion) (Eggs) 0.6 \n", + "8 (Kidney Beans, Eggs) (Onion) 0.8 \n", + "9 (Onion, Eggs) (Kidney Beans) 0.6 \n", + "10 (Onion) (Kidney Beans, Eggs) 0.6 \n", + "11 (Eggs) (Kidney Beans, Onion) 0.8 \n", + "\n", + " consequent support support confidence lift representativity leverage \\\n", + "0 0.8 0.8 0.80 1.00 1.0 0.00 \n", + "1 1.0 0.8 1.00 1.00 1.0 0.00 \n", + "2 1.0 0.6 1.00 1.00 1.0 0.00 \n", + "3 1.0 0.6 1.00 1.00 1.0 0.00 \n", + "4 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "5 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "6 1.0 0.6 1.00 1.00 1.0 0.00 \n", + "7 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "8 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "9 1.0 0.6 1.00 1.00 1.0 0.00 \n", + "10 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "11 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "\n", + " conviction zhangs_metric jaccard certainty kulczynski \n", + "0 1.0 0.0 0.80 0.000 0.900 \n", + "1 inf 0.0 0.80 0.000 0.900 \n", + "2 inf 0.0 0.60 0.000 0.800 \n", + "3 inf 0.0 0.60 0.000 0.800 \n", + "4 inf 0.5 0.75 1.000 0.875 \n", + "5 1.6 1.0 0.75 0.375 0.875 \n", + "6 inf 0.0 0.60 0.000 0.800 \n", + "7 inf 0.5 0.75 1.000 0.875 \n", + "8 1.6 1.0 0.75 0.375 0.875 \n", + "9 inf 0.0 0.60 0.000 0.800 \n", + "10 inf 0.5 0.75 1.000 0.875 \n", + "11 1.6 1.0 0.75 0.375 0.875 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from mlxtend.frequent_patterns import association_rules\n", "\n", - "association_rules(frequent_itemsets, metric=\"confidence\", min_threshold=0.7)" + "association_rules(frequent_itemsets, metric=\"confidence\", min_threshold=0.7, num_itemsets=len(df.index))" ] }, { @@ -270,34 +678,186 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "rules = association_rules(frequent_itemsets, metric=\"lift\", min_threshold=1.2)\n", - "rules" - ] - }, - { - "cell_type": "markdown", + "execution_count": 5, "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftrepresentativityleverageconvictionzhangs_metricjaccardcertaintykulczynski
0(Onion)(Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.875
1(Eggs)(Onion)0.80.60.60.751.251.00.121.61.00.750.3750.875
2(Kidney Beans, Onion)(Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.875
3(Kidney Beans, Eggs)(Onion)0.80.60.60.751.251.00.121.61.00.750.3750.875
4(Onion)(Kidney Beans, Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.875
5(Eggs)(Kidney Beans, Onion)0.80.60.60.751.251.00.121.61.00.750.3750.875
\n", + "
" + ], + "text/plain": [ + " antecedents consequents antecedent support \\\n", + "0 (Onion) (Eggs) 0.6 \n", + "1 (Eggs) (Onion) 0.8 \n", + "2 (Kidney Beans, Onion) (Eggs) 0.6 \n", + "3 (Kidney Beans, Eggs) (Onion) 0.8 \n", + "4 (Onion) (Kidney Beans, Eggs) 0.6 \n", + "5 (Eggs) (Kidney Beans, Onion) 0.8 \n", + "\n", + " consequent support support confidence lift representativity leverage \\\n", + "0 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "1 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "2 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "3 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "4 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "5 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "\n", + " conviction zhangs_metric jaccard certainty kulczynski \n", + "0 inf 0.5 0.75 1.000 0.875 \n", + "1 1.6 1.0 0.75 0.375 0.875 \n", + "2 inf 0.5 0.75 1.000 0.875 \n", + "3 1.6 1.0 0.75 0.375 0.875 \n", + "4 inf 0.5 0.75 1.000 0.875 \n", + "5 1.6 1.0 0.75 0.375 0.875 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "Pandas `DataFrames` make it easy to filter the results further. Let's say we are ony interested in rules that satisfy the following criteria:\n", - "\n", - "1. at least 2 antecedents\n", - "2. a confidence > 0.75\n", - "3. a lift score > 1.2\n", - "\n", - "We could compute the antecedent length as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "rules[\"antecedent_len\"] = rules[\"antecedents\"].apply(lambda x: len(x))\n", + "rules = association_rules(frequent_itemsets, metric=\"lift\", min_threshold=1.2, num_itemsets=len(df.index))\n", "rules" ] }, @@ -305,74 +865,514 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Then, we can use pandas' selection syntax as shown below:" + "Pandas `DataFrames` make it easy to filter the results further. Let's say we are ony interested in rules that satisfy the following criteria:\n", + "\n", + "1. at least 2 antecedents\n", + "2. a confidence > 0.75\n", + "3. a lift score > 1.2\n", + "\n", + "We could compute the antecedent length as follows:" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftrepresentativityleverageconvictionzhangs_metricjaccardcertaintykulczynskiantecedent_len
0(Onion)(Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.8751
1(Eggs)(Onion)0.80.60.60.751.251.00.121.61.00.750.3750.8751
2(Kidney Beans, Onion)(Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.8752
3(Kidney Beans, Eggs)(Onion)0.80.60.60.751.251.00.121.61.00.750.3750.8752
4(Onion)(Kidney Beans, Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.8751
5(Eggs)(Kidney Beans, Onion)0.80.60.60.751.251.00.121.61.00.750.3750.8751
\n", + "
" + ], + "text/plain": [ + " antecedents consequents antecedent support \\\n", + "0 (Onion) (Eggs) 0.6 \n", + "1 (Eggs) (Onion) 0.8 \n", + "2 (Kidney Beans, Onion) (Eggs) 0.6 \n", + "3 (Kidney Beans, Eggs) (Onion) 0.8 \n", + "4 (Onion) (Kidney Beans, Eggs) 0.6 \n", + "5 (Eggs) (Kidney Beans, Onion) 0.8 \n", + "\n", + " consequent support support confidence lift representativity leverage \\\n", + "0 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "1 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "2 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "3 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "4 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "5 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "\n", + " conviction zhangs_metric jaccard certainty kulczynski antecedent_len \n", + "0 inf 0.5 0.75 1.000 0.875 1 \n", + "1 1.6 1.0 0.75 0.375 0.875 1 \n", + "2 inf 0.5 0.75 1.000 0.875 2 \n", + "3 1.6 1.0 0.75 0.375 0.875 2 \n", + "4 inf 0.5 0.75 1.000 0.875 1 \n", + "5 1.6 1.0 0.75 0.375 0.875 1 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "rules[ (rules['antecedent_len'] >= 2) &\n", - " (rules['confidence'] > 0.75) &\n", - " (rules['lift'] > 1.2) ]" + "rules[\"antecedent_len\"] = rules[\"antecedents\"].apply(lambda x: len(x))\n", + "rules" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Similarly, using the Pandas API, we can select entries based on the \"antecedents\" or \"consequents\" columns:" + "Then, we can use pandas' selection syntax as shown below:" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Frozensets**\n", - "\n", - "Note that the entries in the \"itemsets\" column are of type `frozenset`, which is built-in Python type that is similar to a Python `set` but immutable, which makes it more efficient for certain query or comparison operations (https://docs.python.org/3.6/library/stdtypes.html#frozenset). Since `frozenset`s are sets, the item order does not matter. I.e., the query\n", - "\n", - "`rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]`\n", - " \n", - "is equivalent to any of the following three\n", - "\n", - "- `rules[rules['antecedents'] == {'Kidney Beans', 'Eggs'}]`\n", - "- `rules[rules['antecedents'] == frozenset(('Eggs', 'Kidney Beans'))]`\n", - "- `rules[rules['antecedents'] == frozenset(('Kidney Beans', 'Eggs'))]`\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Example 3 -- Frequent Itemsets with Incomplete Antecedent and Consequent Information" - ] - }, - { - "cell_type": "markdown", + "execution_count": 7, "metadata": {}, - "source": [ - "Most metrics computed by `association_rules` depends on the consequent and antecedent support score of a given rule provided in the frequent itemset input DataFrame. Consider the following example:" - ] - }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftrepresentativityleverageconvictionzhangs_metricjaccardcertaintykulczynskiantecedent_len
2(Kidney Beans, Onion)(Eggs)0.60.80.61.01.251.00.12inf0.50.751.00.8752
\n", + "
" + ], + "text/plain": [ + " antecedents consequents antecedent support consequent support \\\n", + "2 (Kidney Beans, Onion) (Eggs) 0.6 0.8 \n", + "\n", + " support confidence lift representativity leverage conviction \\\n", + "2 0.6 1.0 1.25 1.0 0.12 inf \n", + "\n", + " zhangs_metric jaccard certainty kulczynski antecedent_len \n", + "2 0.5 0.75 1.0 0.875 2 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rules[ (rules['antecedent_len'] >= 2) &\n", + " (rules['confidence'] > 0.75) &\n", + " (rules['lift'] > 1.2) ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Similarly, using the Pandas API, we can select entries based on the \"antecedents\" or \"consequents\" columns:" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftrepresentativityleverageconvictionzhangs_metricjaccardcertaintykulczynskiantecedent_len
3(Kidney Beans, Eggs)(Onion)0.80.60.60.751.251.00.121.61.00.750.3750.8752
\n", + "
" + ], + "text/plain": [ + " antecedents consequents antecedent support consequent support \\\n", + "3 (Kidney Beans, Eggs) (Onion) 0.8 0.6 \n", + "\n", + " support confidence lift representativity leverage conviction \\\n", + "3 0.6 0.75 1.25 1.0 0.12 1.6 \n", + "\n", + " zhangs_metric jaccard certainty kulczynski antecedent_len \n", + "3 1.0 0.75 0.375 0.875 2 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Frozensets**\n", + "\n", + "Note that the entries in the \"itemsets\" column are of type `frozenset`, which is built-in Python type that is similar to a Python `set` but immutable, which makes it more efficient for certain query or comparison operations (https://docs.python.org/3.6/library/stdtypes.html#frozenset). Since `frozenset`s are sets, the item order does not matter. I.e., the query\n", + "\n", + "`rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]`\n", + " \n", + "is equivalent to any of the following three\n", + "\n", + "- `rules[rules['antecedents'] == {'Kidney Beans', 'Eggs'}]`\n", + "- `rules[rules['antecedents'] == frozenset(('Eggs', 'Kidney Beans'))]`\n", + "- `rules[rules['antecedents'] == frozenset(('Kidney Beans', 'Eggs'))]`\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 3 -- Frequent Itemsets with Incomplete Antecedent and Consequent Information" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Most metrics computed by `association_rules` depends on the consequent and antecedent support score of a given rule provided in the frequent itemset input DataFrame. Consider the following example:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
itemsetssupport
0[177, 176]0.253623
1[177, 179]0.253623
2[176, 178]0.217391
3[176, 179]0.217391
4[93, 100]0.181159
5[177, 178]0.108696
6[177, 176, 178]0.108696
\n", + "
" + ], + "text/plain": [ + " itemsets support\n", + "0 [177, 176] 0.253623\n", + "1 [177, 179] 0.253623\n", + "2 [176, 178] 0.217391\n", + "3 [176, 179] 0.217391\n", + "4 [93, 100] 0.181159\n", + "5 [177, 178] 0.108696\n", + "6 [177, 176, 178] 0.108696" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import pandas as pd\n", "\n", @@ -409,28 +1409,609 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from mlxtend.frequent_patterns import association_rules\n", - "\n", - "res = association_rules(freq_itemsets, support_only=True, min_threshold=0.1)\n", - "res" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To clean up the representation, you may want to do the following:" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftrepresentativityleverageconvictionzhangs_metricjaccardcertaintykulczynski
0(176)(177)NaNNaN0.253623NaNNaNNaNNaNNaNNaNNaNNaNNaN
1(177)(176)NaNNaN0.253623NaNNaNNaNNaNNaNNaNNaNNaNNaN
2(179)(177)NaNNaN0.253623NaNNaNNaNNaNNaNNaNNaNNaNNaN
3(177)(179)NaNNaN0.253623NaNNaNNaNNaNNaNNaNNaNNaNNaN
4(176)(178)NaNNaN0.217391NaNNaNNaNNaNNaNNaNNaNNaNNaN
5(178)(176)NaNNaN0.217391NaNNaNNaNNaNNaNNaNNaNNaNNaN
6(176)(179)NaNNaN0.217391NaNNaNNaNNaNNaNNaNNaNNaNNaN
7(179)(176)NaNNaN0.217391NaNNaNNaNNaNNaNNaNNaNNaNNaN
8(100)(93)NaNNaN0.181159NaNNaNNaNNaNNaNNaNNaNNaNNaN
9(93)(100)NaNNaN0.181159NaNNaNNaNNaNNaNNaNNaNNaNNaN
10(178)(177)NaNNaN0.108696NaNNaNNaNNaNNaNNaNNaNNaNNaN
11(177)(178)NaNNaN0.108696NaNNaNNaNNaNNaNNaNNaNNaNNaN
12(176, 178)(177)NaNNaN0.108696NaNNaNNaNNaNNaNNaNNaNNaNNaN
13(176, 177)(178)NaNNaN0.108696NaNNaNNaNNaNNaNNaNNaNNaNNaN
14(178, 177)(176)NaNNaN0.108696NaNNaNNaNNaNNaNNaNNaNNaNNaN
15(176)(178, 177)NaNNaN0.108696NaNNaNNaNNaNNaNNaNNaNNaNNaN
16(178)(176, 177)NaNNaN0.108696NaNNaNNaNNaNNaNNaNNaNNaNNaN
17(177)(176, 178)NaNNaN0.108696NaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " antecedents consequents antecedent support consequent support support \\\n", + "0 (176) (177) NaN NaN 0.253623 \n", + "1 (177) (176) NaN NaN 0.253623 \n", + "2 (179) (177) NaN NaN 0.253623 \n", + "3 (177) (179) NaN NaN 0.253623 \n", + "4 (176) (178) NaN NaN 0.217391 \n", + "5 (178) (176) NaN NaN 0.217391 \n", + "6 (176) (179) NaN NaN 0.217391 \n", + "7 (179) (176) NaN NaN 0.217391 \n", + "8 (100) (93) NaN NaN 0.181159 \n", + "9 (93) (100) NaN NaN 0.181159 \n", + "10 (178) (177) NaN NaN 0.108696 \n", + "11 (177) (178) NaN NaN 0.108696 \n", + "12 (176, 178) (177) NaN NaN 0.108696 \n", + "13 (176, 177) (178) NaN NaN 0.108696 \n", + "14 (178, 177) (176) NaN NaN 0.108696 \n", + "15 (176) (178, 177) NaN NaN 0.108696 \n", + "16 (178) (176, 177) NaN NaN 0.108696 \n", + "17 (177) (176, 178) NaN NaN 0.108696 \n", + "\n", + " confidence lift representativity leverage conviction zhangs_metric \\\n", + "0 NaN NaN NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN NaN NaN \n", + "5 NaN NaN NaN NaN NaN NaN \n", + "6 NaN NaN NaN NaN NaN NaN \n", + "7 NaN NaN NaN NaN NaN NaN \n", + "8 NaN NaN NaN NaN NaN NaN \n", + "9 NaN NaN NaN NaN NaN NaN \n", + "10 NaN NaN NaN NaN NaN NaN \n", + "11 NaN NaN NaN NaN NaN NaN \n", + "12 NaN NaN NaN NaN NaN NaN \n", + "13 NaN NaN NaN NaN NaN NaN \n", + "14 NaN NaN NaN NaN NaN NaN \n", + "15 NaN NaN NaN NaN NaN NaN \n", + "16 NaN NaN NaN NaN NaN NaN \n", + "17 NaN NaN NaN NaN NaN NaN \n", + "\n", + " jaccard certainty kulczynski \n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "5 NaN NaN NaN \n", + "6 NaN NaN NaN \n", + "7 NaN NaN NaN \n", + "8 NaN NaN NaN \n", + "9 NaN NaN NaN \n", + "10 NaN NaN NaN \n", + "11 NaN NaN NaN \n", + "12 NaN NaN NaN \n", + "13 NaN NaN NaN \n", + "14 NaN NaN NaN \n", + "15 NaN NaN NaN \n", + "16 NaN NaN NaN \n", + "17 NaN NaN NaN " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from mlxtend.frequent_patterns import association_rules\n", + "\n", + "res = association_rules(freq_itemsets, support_only=True, min_threshold=0.1, num_itemsets=0)\n", + "res" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To clean up the representation, you may want to do the following:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
antecedentsconsequentssupport
0(176)(177)0.253623
1(177)(176)0.253623
2(179)(177)0.253623
3(177)(179)0.253623
4(176)(178)0.217391
5(178)(176)0.217391
6(176)(179)0.217391
7(179)(176)0.217391
8(100)(93)0.181159
9(93)(100)0.181159
10(178)(177)0.108696
11(177)(178)0.108696
12(176, 178)(177)0.108696
13(176, 177)(178)0.108696
14(178, 177)(176)0.108696
15(176)(178, 177)0.108696
16(178)(176, 177)0.108696
17(177)(176, 178)0.108696
\n", + "
" + ], + "text/plain": [ + " antecedents consequents support\n", + "0 (176) (177) 0.253623\n", + "1 (177) (176) 0.253623\n", + "2 (179) (177) 0.253623\n", + "3 (177) (179) 0.253623\n", + "4 (176) (178) 0.217391\n", + "5 (178) (176) 0.217391\n", + "6 (176) (179) 0.217391\n", + "7 (179) (176) 0.217391\n", + "8 (100) (93) 0.181159\n", + "9 (93) (100) 0.181159\n", + "10 (178) (177) 0.108696\n", + "11 (177) (178) 0.108696\n", + "12 (176, 178) (177) 0.108696\n", + "13 (176, 177) (178) 0.108696\n", + "14 (178, 177) (176) 0.108696\n", + "15 (176) (178, 177) 0.108696\n", + "16 (178) (176, 177) 0.108696\n", + "17 (177) (176, 178) 0.108696" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "res = res[['antecedents', 'consequents', 'support']]\n", "res" @@ -453,9 +2034,184 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftrepresentativityleverageconvictionzhangs_metricjaccardcertaintykulczynski
0(Onion)(Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.875
1(Eggs)(Onion)0.80.60.60.751.251.00.121.61.00.750.3750.875
2(Kidney Beans, Onion)(Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.875
3(Kidney Beans, Eggs)(Onion)0.80.60.60.751.251.00.121.61.00.750.3750.875
4(Onion)(Kidney Beans, Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.875
5(Eggs)(Kidney Beans, Onion)0.80.60.60.751.251.00.121.61.00.750.3750.875
\n", + "
" + ], + "text/plain": [ + " antecedents consequents antecedent support \\\n", + "0 (Onion) (Eggs) 0.6 \n", + "1 (Eggs) (Onion) 0.8 \n", + "2 (Kidney Beans, Onion) (Eggs) 0.6 \n", + "3 (Kidney Beans, Eggs) (Onion) 0.8 \n", + "4 (Onion) (Kidney Beans, Eggs) 0.6 \n", + "5 (Eggs) (Kidney Beans, Onion) 0.8 \n", + "\n", + " consequent support support confidence lift representativity leverage \\\n", + "0 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "1 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "2 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "3 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "4 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "5 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "\n", + " conviction zhangs_metric jaccard certainty kulczynski \n", + "0 inf 0.5 0.75 1.000 0.875 \n", + "1 1.6 1.0 0.75 0.375 0.875 \n", + "2 inf 0.5 0.75 1.000 0.875 \n", + "3 1.6 1.0 0.75 0.375 0.875 \n", + "4 inf 0.5 0.75 1.000 0.875 \n", + "5 1.6 1.0 0.75 0.375 0.875 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import pandas as pd\n", "from mlxtend.preprocessing import TransactionEncoder\n", @@ -474,7 +2230,7 @@ "df = pd.DataFrame(te_ary, columns=te.columns_)\n", "\n", "frequent_itemsets = fpgrowth(df, min_support=0.6, use_colnames=True)\n", - "rules = association_rules(frequent_itemsets, metric=\"lift\", min_threshold=1.2)\n", + "rules = association_rules(frequent_itemsets, metric=\"lift\", min_threshold=1.2, num_itemsets=len(df.index))\n", "rules" ] }, @@ -487,9 +2243,164 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
antecedentsconsequentsantecedent supportconsequent supportsupportconfidenceliftrepresentativityleverageconvictionzhangs_metricjaccardcertaintykulczynski
0(Onion)(Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.875
1(Eggs)(Onion)0.80.60.60.751.251.00.121.61.00.750.3750.875
3(Kidney Beans, Eggs)(Onion)0.80.60.60.751.251.00.121.61.00.750.3750.875
4(Onion)(Kidney Beans, Eggs)0.60.80.61.001.251.00.12inf0.50.751.0000.875
5(Eggs)(Kidney Beans, Onion)0.80.60.60.751.251.00.121.61.00.750.3750.875
\n", + "
" + ], + "text/plain": [ + " antecedents consequents antecedent support \\\n", + "0 (Onion) (Eggs) 0.6 \n", + "1 (Eggs) (Onion) 0.8 \n", + "3 (Kidney Beans, Eggs) (Onion) 0.8 \n", + "4 (Onion) (Kidney Beans, Eggs) 0.6 \n", + "5 (Eggs) (Kidney Beans, Onion) 0.8 \n", + "\n", + " consequent support support confidence lift representativity leverage \\\n", + "0 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "1 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "3 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "4 0.8 0.6 1.00 1.25 1.0 0.12 \n", + "5 0.6 0.6 0.75 1.25 1.0 0.12 \n", + "\n", + " conviction zhangs_metric jaccard certainty kulczynski \n", + "0 inf 0.5 0.75 1.000 0.875 \n", + "1 1.6 1.0 0.75 0.375 0.875 \n", + "3 1.6 1.0 0.75 0.375 0.875 \n", + "4 inf 0.5 0.75 1.000 0.875 \n", + "5 1.6 1.0 0.75 0.375 0.875 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "antecedent_sele = rules['antecedents'] == frozenset({'Onion', 'Kidney Beans'}) # or frozenset({'Kidney Beans', 'Onion'})\n", "consequent_sele = rules['consequents'] == frozenset({'Eggs'})\n", @@ -507,24 +2418,26 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\User\\AppData\\Local\\Temp\\ipykernel_10132\\2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n", + "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n", + " df.iloc[idx[i], col[i]] = np.nan\n", + "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n", " df.iloc[idx[i], col[i]] = np.nan\n", - "C:\\Users\\User\\AppData\\Local\\Temp\\ipykernel_10132\\2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n", + "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n", " df.iloc[idx[i], col[i]] = np.nan\n", - "C:\\Users\\User\\AppData\\Local\\Temp\\ipykernel_10132\\2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n", + "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n", " df.iloc[idx[i], col[i]] = np.nan\n", - "C:\\Users\\User\\AppData\\Local\\Temp\\ipykernel_10132\\2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n", + "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n", " df.iloc[idx[i], col[i]] = np.nan\n", - "C:\\Users\\User\\AppData\\Local\\Temp\\ipykernel_10132\\2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n", + "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n", " df.iloc[idx[i], col[i]] = np.nan\n", - "C:\\Users\\User\\AppData\\Local\\Temp\\ipykernel_10132\\2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n", + "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n", " df.iloc[idx[i], col[i]] = np.nan\n" ] }, @@ -573,21 +2486,21 @@ " True\n", " True\n", " True\n", - " NaN\n", - " False\n", " True\n", + " False\n", + " NaN\n", " \n", " \n", " 1\n", " False\n", - " NaN\n", - " True\n", - " True\n", " False\n", " True\n", - " False\n", " True\n", + " False\n", + " NaN\n", + " NaN\n", " True\n", + " NaN\n", " False\n", " NaN\n", " \n", @@ -595,15 +2508,15 @@ " 2\n", " True\n", " False\n", - " NaN\n", - " NaN\n", + " False\n", + " True\n", " False\n", " True\n", " True\n", " False\n", " False\n", " False\n", - " NaN\n", + " False\n", " \n", " \n", " 3\n", @@ -613,7 +2526,7 @@ " False\n", " False\n", " True\n", - " NaN\n", + " True\n", " False\n", " False\n", " True\n", @@ -624,13 +2537,13 @@ " False\n", " NaN\n", " False\n", - " NaN\n", " True\n", + " NaN\n", " True\n", " False\n", " False\n", - " True\n", - " False\n", + " NaN\n", + " NaN\n", " False\n", " \n", " \n", @@ -638,22 +2551,22 @@ "" ], "text/plain": [ - " Apple Corn Dill Eggs Ice cream Kidney Beans Milk Nutmeg Onion \\\n", - "0 False False False True False True True True NaN \n", - "1 False NaN True True False True False True True \n", - "2 True False NaN NaN False True True False False \n", - "3 False True False False False True NaN False False \n", - "4 False NaN False NaN True True False False True \n", - "\n", - " Unicorn Yogurt \n", - "0 False True \n", - "1 False NaN \n", - "2 False NaN \n", - "3 True True \n", - "4 False False " + " Apple Corn Dill Eggs Ice cream Kidney Beans Milk Nutmeg Onion \\\n", + "0 False False False True False True True True True \n", + "1 False False True True False NaN NaN True NaN \n", + "2 True False False True False True True False False \n", + "3 False True False False False True True False False \n", + "4 False NaN False True NaN True False False NaN \n", + "\n", + " Unicorn Yogurt \n", + "0 False NaN \n", + "1 False NaN \n", + "2 False False \n", + "3 True True \n", + "4 NaN False " ] }, - "execution_count": 30, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -695,14 +2608,14 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\User\\OneDrive\\Υπολογιστής\\KONSTANTINOS-LAPTOP-C3JUH2VS\\KONSTANTINOS\\PROJECTS\\mlxtend\\mlxtend\\frequent_patterns\\association_rules.py:172: RuntimeWarning: invalid value encountered in divide\n", + "/home/marcelo/anaconda3/envs/analysis/lib/python3.10/site-packages/mlxtend/frequent_patterns/association_rules.py:182: RuntimeWarning: invalid value encountered in divide\n", " cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom)\n" ] }, @@ -746,37 +2659,37 @@ " \n", " \n", " 0\n", - " (Yogurt)\n", + " (Eggs)\n", " (Kidney Beans)\n", - " 0.666667\n", + " 0.80\n", " 1.0\n", - " 0.666667\n", + " 0.75\n", " 1.0\n", " 1.0\n", - " 0.6\n", - " 0.0\n", + " 0.8\n", + " -0.05\n", " inf\n", + " -0.25\n", + " 0.714286\n", " 0.0\n", - " 0.666667\n", - " 0.0\n", - " 0.833333\n", + " 0.875\n", " \n", " \n", " 1\n", - " (Eggs)\n", + " (Milk)\n", " (Kidney Beans)\n", - " 0.666667\n", + " 0.75\n", " 1.0\n", - " 0.666667\n", + " 0.75\n", " 1.0\n", " 1.0\n", - " 0.6\n", - " 0.0\n", + " 0.8\n", + " 0.00\n", " inf\n", + " 0.00\n", + " 0.750000\n", " 0.0\n", - " 0.666667\n", - " 0.0\n", - " 0.833333\n", + " 0.875\n", " \n", " \n", "\n", @@ -784,19 +2697,19 @@ ], "text/plain": [ " antecedents consequents antecedent support consequent support \\\n", - "0 (Yogurt) (Kidney Beans) 0.666667 1.0 \n", - "1 (Eggs) (Kidney Beans) 0.666667 1.0 \n", + "0 (Eggs) (Kidney Beans) 0.80 1.0 \n", + "1 (Milk) (Kidney Beans) 0.75 1.0 \n", "\n", - " support confidence lift representativity leverage conviction \\\n", - "0 0.666667 1.0 1.0 0.6 0.0 inf \n", - "1 0.666667 1.0 1.0 0.6 0.0 inf \n", + " support confidence lift representativity leverage conviction \\\n", + "0 0.75 1.0 1.0 0.8 -0.05 inf \n", + "1 0.75 1.0 1.0 0.8 0.00 inf \n", "\n", " zhangs_metric jaccard certainty kulczynski \n", - "0 0.0 0.666667 0.0 0.833333 \n", - "1 0.0 0.666667 0.0 0.833333 " + "0 -0.25 0.714286 0.0 0.875 \n", + "1 0.00 0.750000 0.0 0.875 " ] }, - "execution_count": 31, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -804,7 +2717,7 @@ "source": [ "frequent_itemsets = fpgrowth(df, min_support=0.6, null_values = True, use_colnames=True)\n", "# frequent_itemsets = fpmax(df, min_support=0.6, null_values = True, use_colnames=True)\n", - "rules = association_rules(frequent_itemsets, df, len(df), null_values = True, metric=\"confidence\", min_threshold=0.8)\n", + "rules = association_rules(frequent_itemsets, len(df), df, null_values = True, metric=\"confidence\", min_threshold=0.8)\n", "rules" ] }, @@ -829,7 +2742,7 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -843,7 +2756,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.10.13" }, "toc": { "nav_menu": {},