diff --git a/docs/sources/user_guide/frequent_patterns/association_rules.ipynb b/docs/sources/user_guide/frequent_patterns/association_rules.ipynb
index 856d44649..4a4156fda 100644
--- a/docs/sources/user_guide/frequent_patterns/association_rules.ipynb
+++ b/docs/sources/user_guide/frequent_patterns/association_rules.ipynb
@@ -209,9 +209,114 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " support | \n",
+ " itemsets | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " (Kidney Beans) | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.8 | \n",
+ " (Eggs) | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0.6 | \n",
+ " (Yogurt) | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0.6 | \n",
+ " (Milk) | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.6 | \n",
+ " (Onion) | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 0.8 | \n",
+ " (Kidney Beans, Eggs) | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 0.6 | \n",
+ " (Kidney Beans, Yogurt) | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 0.6 | \n",
+ " (Kidney Beans, Milk) | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 0.6 | \n",
+ " (Onion, Eggs) | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 0.6 | \n",
+ " (Kidney Beans, Onion) | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 0.6 | \n",
+ " (Kidney Beans, Onion, Eggs) | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " support itemsets\n",
+ "0 1.0 (Kidney Beans)\n",
+ "1 0.8 (Eggs)\n",
+ "2 0.6 (Yogurt)\n",
+ "3 0.6 (Milk)\n",
+ "4 0.6 (Onion)\n",
+ "5 0.8 (Kidney Beans, Eggs)\n",
+ "6 0.6 (Kidney Beans, Yogurt)\n",
+ "7 0.6 (Kidney Beans, Milk)\n",
+ "8 0.6 (Onion, Eggs)\n",
+ "9 0.6 (Kidney Beans, Onion)\n",
+ "10 0.6 (Kidney Beans, Onion, Eggs)"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import pandas as pd\n",
"from mlxtend.preprocessing import TransactionEncoder\n",
@@ -245,13 +350,316 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/marcelo/anaconda3/envs/analysis/lib/python3.10/site-packages/mlxtend/frequent_patterns/association_rules.py:182: RuntimeWarning: invalid value encountered in divide\n",
+ " cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " antecedents | \n",
+ " consequents | \n",
+ " antecedent support | \n",
+ " consequent support | \n",
+ " support | \n",
+ " confidence | \n",
+ " lift | \n",
+ " representativity | \n",
+ " leverage | \n",
+ " conviction | \n",
+ " zhangs_metric | \n",
+ " jaccard | \n",
+ " certainty | \n",
+ " kulczynski | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " (Kidney Beans) | \n",
+ " (Eggs) | \n",
+ " 1.0 | \n",
+ " 0.8 | \n",
+ " 0.8 | \n",
+ " 0.80 | \n",
+ " 1.00 | \n",
+ " 1.0 | \n",
+ " 0.00 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.80 | \n",
+ " 0.000 | \n",
+ " 0.900 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " (Eggs) | \n",
+ " (Kidney Beans) | \n",
+ " 0.8 | \n",
+ " 1.0 | \n",
+ " 0.8 | \n",
+ " 1.00 | \n",
+ " 1.00 | \n",
+ " 1.0 | \n",
+ " 0.00 | \n",
+ " inf | \n",
+ " 0.0 | \n",
+ " 0.80 | \n",
+ " 0.000 | \n",
+ " 0.900 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " (Yogurt) | \n",
+ " (Kidney Beans) | \n",
+ " 0.6 | \n",
+ " 1.0 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.00 | \n",
+ " 1.0 | \n",
+ " 0.00 | \n",
+ " inf | \n",
+ " 0.0 | \n",
+ " 0.60 | \n",
+ " 0.000 | \n",
+ " 0.800 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " (Milk) | \n",
+ " (Kidney Beans) | \n",
+ " 0.6 | \n",
+ " 1.0 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.00 | \n",
+ " 1.0 | \n",
+ " 0.00 | \n",
+ " inf | \n",
+ " 0.0 | \n",
+ " 0.60 | \n",
+ " 0.000 | \n",
+ " 0.800 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " (Onion) | \n",
+ " (Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " (Eggs) | \n",
+ " (Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " (Onion) | \n",
+ " (Kidney Beans) | \n",
+ " 0.6 | \n",
+ " 1.0 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.00 | \n",
+ " 1.0 | \n",
+ " 0.00 | \n",
+ " inf | \n",
+ " 0.0 | \n",
+ " 0.60 | \n",
+ " 0.000 | \n",
+ " 0.800 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " (Kidney Beans, Onion) | \n",
+ " (Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " (Kidney Beans, Eggs) | \n",
+ " (Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " (Onion, Eggs) | \n",
+ " (Kidney Beans) | \n",
+ " 0.6 | \n",
+ " 1.0 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.00 | \n",
+ " 1.0 | \n",
+ " 0.00 | \n",
+ " inf | \n",
+ " 0.0 | \n",
+ " 0.60 | \n",
+ " 0.000 | \n",
+ " 0.800 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " (Onion) | \n",
+ " (Kidney Beans, Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " (Eggs) | \n",
+ " (Kidney Beans, Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " antecedents consequents antecedent support \\\n",
+ "0 (Kidney Beans) (Eggs) 1.0 \n",
+ "1 (Eggs) (Kidney Beans) 0.8 \n",
+ "2 (Yogurt) (Kidney Beans) 0.6 \n",
+ "3 (Milk) (Kidney Beans) 0.6 \n",
+ "4 (Onion) (Eggs) 0.6 \n",
+ "5 (Eggs) (Onion) 0.8 \n",
+ "6 (Onion) (Kidney Beans) 0.6 \n",
+ "7 (Kidney Beans, Onion) (Eggs) 0.6 \n",
+ "8 (Kidney Beans, Eggs) (Onion) 0.8 \n",
+ "9 (Onion, Eggs) (Kidney Beans) 0.6 \n",
+ "10 (Onion) (Kidney Beans, Eggs) 0.6 \n",
+ "11 (Eggs) (Kidney Beans, Onion) 0.8 \n",
+ "\n",
+ " consequent support support confidence lift representativity leverage \\\n",
+ "0 0.8 0.8 0.80 1.00 1.0 0.00 \n",
+ "1 1.0 0.8 1.00 1.00 1.0 0.00 \n",
+ "2 1.0 0.6 1.00 1.00 1.0 0.00 \n",
+ "3 1.0 0.6 1.00 1.00 1.0 0.00 \n",
+ "4 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "5 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "6 1.0 0.6 1.00 1.00 1.0 0.00 \n",
+ "7 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "8 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "9 1.0 0.6 1.00 1.00 1.0 0.00 \n",
+ "10 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "11 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "\n",
+ " conviction zhangs_metric jaccard certainty kulczynski \n",
+ "0 1.0 0.0 0.80 0.000 0.900 \n",
+ "1 inf 0.0 0.80 0.000 0.900 \n",
+ "2 inf 0.0 0.60 0.000 0.800 \n",
+ "3 inf 0.0 0.60 0.000 0.800 \n",
+ "4 inf 0.5 0.75 1.000 0.875 \n",
+ "5 1.6 1.0 0.75 0.375 0.875 \n",
+ "6 inf 0.0 0.60 0.000 0.800 \n",
+ "7 inf 0.5 0.75 1.000 0.875 \n",
+ "8 1.6 1.0 0.75 0.375 0.875 \n",
+ "9 inf 0.0 0.60 0.000 0.800 \n",
+ "10 inf 0.5 0.75 1.000 0.875 \n",
+ "11 1.6 1.0 0.75 0.375 0.875 "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from mlxtend.frequent_patterns import association_rules\n",
"\n",
- "association_rules(frequent_itemsets, metric=\"confidence\", min_threshold=0.7)"
+ "association_rules(frequent_itemsets, metric=\"confidence\", min_threshold=0.7, num_itemsets=len(df.index))"
]
},
{
@@ -270,34 +678,186 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "rules = association_rules(frequent_itemsets, metric=\"lift\", min_threshold=1.2)\n",
- "rules"
- ]
- },
- {
- "cell_type": "markdown",
+ "execution_count": 5,
"metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " antecedents | \n",
+ " consequents | \n",
+ " antecedent support | \n",
+ " consequent support | \n",
+ " support | \n",
+ " confidence | \n",
+ " lift | \n",
+ " representativity | \n",
+ " leverage | \n",
+ " conviction | \n",
+ " zhangs_metric | \n",
+ " jaccard | \n",
+ " certainty | \n",
+ " kulczynski | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " (Onion) | \n",
+ " (Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " (Eggs) | \n",
+ " (Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " (Kidney Beans, Onion) | \n",
+ " (Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " (Kidney Beans, Eggs) | \n",
+ " (Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " (Onion) | \n",
+ " (Kidney Beans, Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " (Eggs) | \n",
+ " (Kidney Beans, Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " antecedents consequents antecedent support \\\n",
+ "0 (Onion) (Eggs) 0.6 \n",
+ "1 (Eggs) (Onion) 0.8 \n",
+ "2 (Kidney Beans, Onion) (Eggs) 0.6 \n",
+ "3 (Kidney Beans, Eggs) (Onion) 0.8 \n",
+ "4 (Onion) (Kidney Beans, Eggs) 0.6 \n",
+ "5 (Eggs) (Kidney Beans, Onion) 0.8 \n",
+ "\n",
+ " consequent support support confidence lift representativity leverage \\\n",
+ "0 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "1 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "2 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "3 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "4 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "5 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "\n",
+ " conviction zhangs_metric jaccard certainty kulczynski \n",
+ "0 inf 0.5 0.75 1.000 0.875 \n",
+ "1 1.6 1.0 0.75 0.375 0.875 \n",
+ "2 inf 0.5 0.75 1.000 0.875 \n",
+ "3 1.6 1.0 0.75 0.375 0.875 \n",
+ "4 inf 0.5 0.75 1.000 0.875 \n",
+ "5 1.6 1.0 0.75 0.375 0.875 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "Pandas `DataFrames` make it easy to filter the results further. Let's say we are ony interested in rules that satisfy the following criteria:\n",
- "\n",
- "1. at least 2 antecedents\n",
- "2. a confidence > 0.75\n",
- "3. a lift score > 1.2\n",
- "\n",
- "We could compute the antecedent length as follows:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "rules[\"antecedent_len\"] = rules[\"antecedents\"].apply(lambda x: len(x))\n",
+ "rules = association_rules(frequent_itemsets, metric=\"lift\", min_threshold=1.2, num_itemsets=len(df.index))\n",
"rules"
]
},
@@ -305,74 +865,514 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Then, we can use pandas' selection syntax as shown below:"
+ "Pandas `DataFrames` make it easy to filter the results further. Let's say we are ony interested in rules that satisfy the following criteria:\n",
+ "\n",
+ "1. at least 2 antecedents\n",
+ "2. a confidence > 0.75\n",
+ "3. a lift score > 1.2\n",
+ "\n",
+ "We could compute the antecedent length as follows:"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " antecedents | \n",
+ " consequents | \n",
+ " antecedent support | \n",
+ " consequent support | \n",
+ " support | \n",
+ " confidence | \n",
+ " lift | \n",
+ " representativity | \n",
+ " leverage | \n",
+ " conviction | \n",
+ " zhangs_metric | \n",
+ " jaccard | \n",
+ " certainty | \n",
+ " kulczynski | \n",
+ " antecedent_len | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " (Onion) | \n",
+ " (Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " (Eggs) | \n",
+ " (Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " (Kidney Beans, Onion) | \n",
+ " (Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " (Kidney Beans, Eggs) | \n",
+ " (Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " (Onion) | \n",
+ " (Kidney Beans, Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " (Eggs) | \n",
+ " (Kidney Beans, Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " antecedents consequents antecedent support \\\n",
+ "0 (Onion) (Eggs) 0.6 \n",
+ "1 (Eggs) (Onion) 0.8 \n",
+ "2 (Kidney Beans, Onion) (Eggs) 0.6 \n",
+ "3 (Kidney Beans, Eggs) (Onion) 0.8 \n",
+ "4 (Onion) (Kidney Beans, Eggs) 0.6 \n",
+ "5 (Eggs) (Kidney Beans, Onion) 0.8 \n",
+ "\n",
+ " consequent support support confidence lift representativity leverage \\\n",
+ "0 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "1 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "2 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "3 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "4 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "5 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "\n",
+ " conviction zhangs_metric jaccard certainty kulczynski antecedent_len \n",
+ "0 inf 0.5 0.75 1.000 0.875 1 \n",
+ "1 1.6 1.0 0.75 0.375 0.875 1 \n",
+ "2 inf 0.5 0.75 1.000 0.875 2 \n",
+ "3 1.6 1.0 0.75 0.375 0.875 2 \n",
+ "4 inf 0.5 0.75 1.000 0.875 1 \n",
+ "5 1.6 1.0 0.75 0.375 0.875 1 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "rules[ (rules['antecedent_len'] >= 2) &\n",
- " (rules['confidence'] > 0.75) &\n",
- " (rules['lift'] > 1.2) ]"
+ "rules[\"antecedent_len\"] = rules[\"antecedents\"].apply(lambda x: len(x))\n",
+ "rules"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "Similarly, using the Pandas API, we can select entries based on the \"antecedents\" or \"consequents\" columns:"
+ "Then, we can use pandas' selection syntax as shown below:"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Frozensets**\n",
- "\n",
- "Note that the entries in the \"itemsets\" column are of type `frozenset`, which is built-in Python type that is similar to a Python `set` but immutable, which makes it more efficient for certain query or comparison operations (https://docs.python.org/3.6/library/stdtypes.html#frozenset). Since `frozenset`s are sets, the item order does not matter. I.e., the query\n",
- "\n",
- "`rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]`\n",
- " \n",
- "is equivalent to any of the following three\n",
- "\n",
- "- `rules[rules['antecedents'] == {'Kidney Beans', 'Eggs'}]`\n",
- "- `rules[rules['antecedents'] == frozenset(('Eggs', 'Kidney Beans'))]`\n",
- "- `rules[rules['antecedents'] == frozenset(('Kidney Beans', 'Eggs'))]`\n",
- "\n",
- "\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Example 3 -- Frequent Itemsets with Incomplete Antecedent and Consequent Information"
- ]
- },
- {
- "cell_type": "markdown",
+ "execution_count": 7,
"metadata": {},
- "source": [
- "Most metrics computed by `association_rules` depends on the consequent and antecedent support score of a given rule provided in the frequent itemset input DataFrame. Consider the following example:"
- ]
- },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " antecedents | \n",
+ " consequents | \n",
+ " antecedent support | \n",
+ " consequent support | \n",
+ " support | \n",
+ " confidence | \n",
+ " lift | \n",
+ " representativity | \n",
+ " leverage | \n",
+ " conviction | \n",
+ " zhangs_metric | \n",
+ " jaccard | \n",
+ " certainty | \n",
+ " kulczynski | \n",
+ " antecedent_len | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " (Kidney Beans, Onion) | \n",
+ " (Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.0 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.0 | \n",
+ " 0.875 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " antecedents consequents antecedent support consequent support \\\n",
+ "2 (Kidney Beans, Onion) (Eggs) 0.6 0.8 \n",
+ "\n",
+ " support confidence lift representativity leverage conviction \\\n",
+ "2 0.6 1.0 1.25 1.0 0.12 inf \n",
+ "\n",
+ " zhangs_metric jaccard certainty kulczynski antecedent_len \n",
+ "2 0.5 0.75 1.0 0.875 2 "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rules[ (rules['antecedent_len'] >= 2) &\n",
+ " (rules['confidence'] > 0.75) &\n",
+ " (rules['lift'] > 1.2) ]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Similarly, using the Pandas API, we can select entries based on the \"antecedents\" or \"consequents\" columns:"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " antecedents | \n",
+ " consequents | \n",
+ " antecedent support | \n",
+ " consequent support | \n",
+ " support | \n",
+ " confidence | \n",
+ " lift | \n",
+ " representativity | \n",
+ " leverage | \n",
+ " conviction | \n",
+ " zhangs_metric | \n",
+ " jaccard | \n",
+ " certainty | \n",
+ " kulczynski | \n",
+ " antecedent_len | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " (Kidney Beans, Eggs) | \n",
+ " (Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " antecedents consequents antecedent support consequent support \\\n",
+ "3 (Kidney Beans, Eggs) (Onion) 0.8 0.6 \n",
+ "\n",
+ " support confidence lift representativity leverage conviction \\\n",
+ "3 0.6 0.75 1.25 1.0 0.12 1.6 \n",
+ "\n",
+ " zhangs_metric jaccard certainty kulczynski antecedent_len \n",
+ "3 1.0 0.75 0.375 0.875 2 "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Frozensets**\n",
+ "\n",
+ "Note that the entries in the \"itemsets\" column are of type `frozenset`, which is built-in Python type that is similar to a Python `set` but immutable, which makes it more efficient for certain query or comparison operations (https://docs.python.org/3.6/library/stdtypes.html#frozenset). Since `frozenset`s are sets, the item order does not matter. I.e., the query\n",
+ "\n",
+ "`rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]`\n",
+ " \n",
+ "is equivalent to any of the following three\n",
+ "\n",
+ "- `rules[rules['antecedents'] == {'Kidney Beans', 'Eggs'}]`\n",
+ "- `rules[rules['antecedents'] == frozenset(('Eggs', 'Kidney Beans'))]`\n",
+ "- `rules[rules['antecedents'] == frozenset(('Kidney Beans', 'Eggs'))]`\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Example 3 -- Frequent Itemsets with Incomplete Antecedent and Consequent Information"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Most metrics computed by `association_rules` depends on the consequent and antecedent support score of a given rule provided in the frequent itemset input DataFrame. Consider the following example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " itemsets | \n",
+ " support | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " [177, 176] | \n",
+ " 0.253623 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " [177, 179] | \n",
+ " 0.253623 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " [176, 178] | \n",
+ " 0.217391 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " [176, 179] | \n",
+ " 0.217391 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " [93, 100] | \n",
+ " 0.181159 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " [177, 178] | \n",
+ " 0.108696 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " [177, 176, 178] | \n",
+ " 0.108696 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " itemsets support\n",
+ "0 [177, 176] 0.253623\n",
+ "1 [177, 179] 0.253623\n",
+ "2 [176, 178] 0.217391\n",
+ "3 [176, 179] 0.217391\n",
+ "4 [93, 100] 0.181159\n",
+ "5 [177, 178] 0.108696\n",
+ "6 [177, 176, 178] 0.108696"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import pandas as pd\n",
"\n",
@@ -409,28 +1409,609 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from mlxtend.frequent_patterns import association_rules\n",
- "\n",
- "res = association_rules(freq_itemsets, support_only=True, min_threshold=0.1)\n",
- "res"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "To clean up the representation, you may want to do the following:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " antecedents | \n",
+ " consequents | \n",
+ " antecedent support | \n",
+ " consequent support | \n",
+ " support | \n",
+ " confidence | \n",
+ " lift | \n",
+ " representativity | \n",
+ " leverage | \n",
+ " conviction | \n",
+ " zhangs_metric | \n",
+ " jaccard | \n",
+ " certainty | \n",
+ " kulczynski | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " (176) | \n",
+ " (177) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.253623 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " (177) | \n",
+ " (176) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.253623 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " (179) | \n",
+ " (177) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.253623 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " (177) | \n",
+ " (179) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.253623 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " (176) | \n",
+ " (178) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.217391 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " (178) | \n",
+ " (176) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.217391 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " (176) | \n",
+ " (179) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.217391 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " (179) | \n",
+ " (176) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.217391 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " (100) | \n",
+ " (93) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.181159 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " (93) | \n",
+ " (100) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.181159 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " (178) | \n",
+ " (177) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.108696 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " (177) | \n",
+ " (178) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.108696 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " (176, 178) | \n",
+ " (177) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.108696 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " (176, 177) | \n",
+ " (178) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.108696 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " (178, 177) | \n",
+ " (176) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.108696 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " (176) | \n",
+ " (178, 177) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.108696 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " (178) | \n",
+ " (176, 177) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.108696 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " (177) | \n",
+ " (176, 178) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.108696 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " antecedents consequents antecedent support consequent support support \\\n",
+ "0 (176) (177) NaN NaN 0.253623 \n",
+ "1 (177) (176) NaN NaN 0.253623 \n",
+ "2 (179) (177) NaN NaN 0.253623 \n",
+ "3 (177) (179) NaN NaN 0.253623 \n",
+ "4 (176) (178) NaN NaN 0.217391 \n",
+ "5 (178) (176) NaN NaN 0.217391 \n",
+ "6 (176) (179) NaN NaN 0.217391 \n",
+ "7 (179) (176) NaN NaN 0.217391 \n",
+ "8 (100) (93) NaN NaN 0.181159 \n",
+ "9 (93) (100) NaN NaN 0.181159 \n",
+ "10 (178) (177) NaN NaN 0.108696 \n",
+ "11 (177) (178) NaN NaN 0.108696 \n",
+ "12 (176, 178) (177) NaN NaN 0.108696 \n",
+ "13 (176, 177) (178) NaN NaN 0.108696 \n",
+ "14 (178, 177) (176) NaN NaN 0.108696 \n",
+ "15 (176) (178, 177) NaN NaN 0.108696 \n",
+ "16 (178) (176, 177) NaN NaN 0.108696 \n",
+ "17 (177) (176, 178) NaN NaN 0.108696 \n",
+ "\n",
+ " confidence lift representativity leverage conviction zhangs_metric \\\n",
+ "0 NaN NaN NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN NaN NaN \n",
+ "3 NaN NaN NaN NaN NaN NaN \n",
+ "4 NaN NaN NaN NaN NaN NaN \n",
+ "5 NaN NaN NaN NaN NaN NaN \n",
+ "6 NaN NaN NaN NaN NaN NaN \n",
+ "7 NaN NaN NaN NaN NaN NaN \n",
+ "8 NaN NaN NaN NaN NaN NaN \n",
+ "9 NaN NaN NaN NaN NaN NaN \n",
+ "10 NaN NaN NaN NaN NaN NaN \n",
+ "11 NaN NaN NaN NaN NaN NaN \n",
+ "12 NaN NaN NaN NaN NaN NaN \n",
+ "13 NaN NaN NaN NaN NaN NaN \n",
+ "14 NaN NaN NaN NaN NaN NaN \n",
+ "15 NaN NaN NaN NaN NaN NaN \n",
+ "16 NaN NaN NaN NaN NaN NaN \n",
+ "17 NaN NaN NaN NaN NaN NaN \n",
+ "\n",
+ " jaccard certainty kulczynski \n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "5 NaN NaN NaN \n",
+ "6 NaN NaN NaN \n",
+ "7 NaN NaN NaN \n",
+ "8 NaN NaN NaN \n",
+ "9 NaN NaN NaN \n",
+ "10 NaN NaN NaN \n",
+ "11 NaN NaN NaN \n",
+ "12 NaN NaN NaN \n",
+ "13 NaN NaN NaN \n",
+ "14 NaN NaN NaN \n",
+ "15 NaN NaN NaN \n",
+ "16 NaN NaN NaN \n",
+ "17 NaN NaN NaN "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from mlxtend.frequent_patterns import association_rules\n",
+ "\n",
+ "res = association_rules(freq_itemsets, support_only=True, min_threshold=0.1, num_itemsets=0)\n",
+ "res"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To clean up the representation, you may want to do the following:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " antecedents | \n",
+ " consequents | \n",
+ " support | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " (176) | \n",
+ " (177) | \n",
+ " 0.253623 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " (177) | \n",
+ " (176) | \n",
+ " 0.253623 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " (179) | \n",
+ " (177) | \n",
+ " 0.253623 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " (177) | \n",
+ " (179) | \n",
+ " 0.253623 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " (176) | \n",
+ " (178) | \n",
+ " 0.217391 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " (178) | \n",
+ " (176) | \n",
+ " 0.217391 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " (176) | \n",
+ " (179) | \n",
+ " 0.217391 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " (179) | \n",
+ " (176) | \n",
+ " 0.217391 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " (100) | \n",
+ " (93) | \n",
+ " 0.181159 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " (93) | \n",
+ " (100) | \n",
+ " 0.181159 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " (178) | \n",
+ " (177) | \n",
+ " 0.108696 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " (177) | \n",
+ " (178) | \n",
+ " 0.108696 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " (176, 178) | \n",
+ " (177) | \n",
+ " 0.108696 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " (176, 177) | \n",
+ " (178) | \n",
+ " 0.108696 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " (178, 177) | \n",
+ " (176) | \n",
+ " 0.108696 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " (176) | \n",
+ " (178, 177) | \n",
+ " 0.108696 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " (178) | \n",
+ " (176, 177) | \n",
+ " 0.108696 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " (177) | \n",
+ " (176, 178) | \n",
+ " 0.108696 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " antecedents consequents support\n",
+ "0 (176) (177) 0.253623\n",
+ "1 (177) (176) 0.253623\n",
+ "2 (179) (177) 0.253623\n",
+ "3 (177) (179) 0.253623\n",
+ "4 (176) (178) 0.217391\n",
+ "5 (178) (176) 0.217391\n",
+ "6 (176) (179) 0.217391\n",
+ "7 (179) (176) 0.217391\n",
+ "8 (100) (93) 0.181159\n",
+ "9 (93) (100) 0.181159\n",
+ "10 (178) (177) 0.108696\n",
+ "11 (177) (178) 0.108696\n",
+ "12 (176, 178) (177) 0.108696\n",
+ "13 (176, 177) (178) 0.108696\n",
+ "14 (178, 177) (176) 0.108696\n",
+ "15 (176) (178, 177) 0.108696\n",
+ "16 (178) (176, 177) 0.108696\n",
+ "17 (177) (176, 178) 0.108696"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"res = res[['antecedents', 'consequents', 'support']]\n",
"res"
@@ -453,9 +2034,184 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " antecedents | \n",
+ " consequents | \n",
+ " antecedent support | \n",
+ " consequent support | \n",
+ " support | \n",
+ " confidence | \n",
+ " lift | \n",
+ " representativity | \n",
+ " leverage | \n",
+ " conviction | \n",
+ " zhangs_metric | \n",
+ " jaccard | \n",
+ " certainty | \n",
+ " kulczynski | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " (Onion) | \n",
+ " (Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " (Eggs) | \n",
+ " (Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " (Kidney Beans, Onion) | \n",
+ " (Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " (Kidney Beans, Eggs) | \n",
+ " (Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " (Onion) | \n",
+ " (Kidney Beans, Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " (Eggs) | \n",
+ " (Kidney Beans, Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " antecedents consequents antecedent support \\\n",
+ "0 (Onion) (Eggs) 0.6 \n",
+ "1 (Eggs) (Onion) 0.8 \n",
+ "2 (Kidney Beans, Onion) (Eggs) 0.6 \n",
+ "3 (Kidney Beans, Eggs) (Onion) 0.8 \n",
+ "4 (Onion) (Kidney Beans, Eggs) 0.6 \n",
+ "5 (Eggs) (Kidney Beans, Onion) 0.8 \n",
+ "\n",
+ " consequent support support confidence lift representativity leverage \\\n",
+ "0 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "1 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "2 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "3 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "4 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "5 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "\n",
+ " conviction zhangs_metric jaccard certainty kulczynski \n",
+ "0 inf 0.5 0.75 1.000 0.875 \n",
+ "1 1.6 1.0 0.75 0.375 0.875 \n",
+ "2 inf 0.5 0.75 1.000 0.875 \n",
+ "3 1.6 1.0 0.75 0.375 0.875 \n",
+ "4 inf 0.5 0.75 1.000 0.875 \n",
+ "5 1.6 1.0 0.75 0.375 0.875 "
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import pandas as pd\n",
"from mlxtend.preprocessing import TransactionEncoder\n",
@@ -474,7 +2230,7 @@
"df = pd.DataFrame(te_ary, columns=te.columns_)\n",
"\n",
"frequent_itemsets = fpgrowth(df, min_support=0.6, use_colnames=True)\n",
- "rules = association_rules(frequent_itemsets, metric=\"lift\", min_threshold=1.2)\n",
+ "rules = association_rules(frequent_itemsets, metric=\"lift\", min_threshold=1.2, num_itemsets=len(df.index))\n",
"rules"
]
},
@@ -487,9 +2243,164 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " antecedents | \n",
+ " consequents | \n",
+ " antecedent support | \n",
+ " consequent support | \n",
+ " support | \n",
+ " confidence | \n",
+ " lift | \n",
+ " representativity | \n",
+ " leverage | \n",
+ " conviction | \n",
+ " zhangs_metric | \n",
+ " jaccard | \n",
+ " certainty | \n",
+ " kulczynski | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " (Onion) | \n",
+ " (Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " (Eggs) | \n",
+ " (Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " (Kidney Beans, Eggs) | \n",
+ " (Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " (Onion) | \n",
+ " (Kidney Beans, Eggs) | \n",
+ " 0.6 | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 1.00 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " inf | \n",
+ " 0.5 | \n",
+ " 0.75 | \n",
+ " 1.000 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " (Eggs) | \n",
+ " (Kidney Beans, Onion) | \n",
+ " 0.8 | \n",
+ " 0.6 | \n",
+ " 0.6 | \n",
+ " 0.75 | \n",
+ " 1.25 | \n",
+ " 1.0 | \n",
+ " 0.12 | \n",
+ " 1.6 | \n",
+ " 1.0 | \n",
+ " 0.75 | \n",
+ " 0.375 | \n",
+ " 0.875 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " antecedents consequents antecedent support \\\n",
+ "0 (Onion) (Eggs) 0.6 \n",
+ "1 (Eggs) (Onion) 0.8 \n",
+ "3 (Kidney Beans, Eggs) (Onion) 0.8 \n",
+ "4 (Onion) (Kidney Beans, Eggs) 0.6 \n",
+ "5 (Eggs) (Kidney Beans, Onion) 0.8 \n",
+ "\n",
+ " consequent support support confidence lift representativity leverage \\\n",
+ "0 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "1 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "3 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "4 0.8 0.6 1.00 1.25 1.0 0.12 \n",
+ "5 0.6 0.6 0.75 1.25 1.0 0.12 \n",
+ "\n",
+ " conviction zhangs_metric jaccard certainty kulczynski \n",
+ "0 inf 0.5 0.75 1.000 0.875 \n",
+ "1 1.6 1.0 0.75 0.375 0.875 \n",
+ "3 1.6 1.0 0.75 0.375 0.875 \n",
+ "4 inf 0.5 0.75 1.000 0.875 \n",
+ "5 1.6 1.0 0.75 0.375 0.875 "
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"antecedent_sele = rules['antecedents'] == frozenset({'Onion', 'Kidney Beans'}) # or frozenset({'Kidney Beans', 'Onion'})\n",
"consequent_sele = rules['consequents'] == frozenset({'Eggs'})\n",
@@ -507,24 +2418,26 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "C:\\Users\\User\\AppData\\Local\\Temp\\ipykernel_10132\\2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
+ "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
+ " df.iloc[idx[i], col[i]] = np.nan\n",
+ "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
" df.iloc[idx[i], col[i]] = np.nan\n",
- "C:\\Users\\User\\AppData\\Local\\Temp\\ipykernel_10132\\2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
+ "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
" df.iloc[idx[i], col[i]] = np.nan\n",
- "C:\\Users\\User\\AppData\\Local\\Temp\\ipykernel_10132\\2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
+ "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
" df.iloc[idx[i], col[i]] = np.nan\n",
- "C:\\Users\\User\\AppData\\Local\\Temp\\ipykernel_10132\\2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
+ "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
" df.iloc[idx[i], col[i]] = np.nan\n",
- "C:\\Users\\User\\AppData\\Local\\Temp\\ipykernel_10132\\2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
+ "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
" df.iloc[idx[i], col[i]] = np.nan\n",
- "C:\\Users\\User\\AppData\\Local\\Temp\\ipykernel_10132\\2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
+ "/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
" df.iloc[idx[i], col[i]] = np.nan\n"
]
},
@@ -573,21 +2486,21 @@
" True | \n",
" True | \n",
" True | \n",
- " NaN | \n",
- " False | \n",
" True | \n",
+ " False | \n",
+ " NaN | \n",
" \n",
" \n",
" 1 | \n",
" False | \n",
- " NaN | \n",
- " True | \n",
- " True | \n",
" False | \n",
" True | \n",
- " False | \n",
" True | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
" True | \n",
+ " NaN | \n",
" False | \n",
" NaN | \n",
"
\n",
@@ -595,15 +2508,15 @@
" 2 | \n",
" True | \n",
" False | \n",
- " NaN | \n",
- " NaN | \n",
+ " False | \n",
+ " True | \n",
" False | \n",
" True | \n",
" True | \n",
" False | \n",
" False | \n",
" False | \n",
- " NaN | \n",
+ " False | \n",
" \n",
" \n",
" 3 | \n",
@@ -613,7 +2526,7 @@
" False | \n",
" False | \n",
" True | \n",
- " NaN | \n",
+ " True | \n",
" False | \n",
" False | \n",
" True | \n",
@@ -624,13 +2537,13 @@
" False | \n",
" NaN | \n",
" False | \n",
- " NaN | \n",
" True | \n",
+ " NaN | \n",
" True | \n",
" False | \n",
" False | \n",
- " True | \n",
- " False | \n",
+ " NaN | \n",
+ " NaN | \n",
" False | \n",
"
\n",
" \n",
@@ -638,22 +2551,22 @@
""
],
"text/plain": [
- " Apple Corn Dill Eggs Ice cream Kidney Beans Milk Nutmeg Onion \\\n",
- "0 False False False True False True True True NaN \n",
- "1 False NaN True True False True False True True \n",
- "2 True False NaN NaN False True True False False \n",
- "3 False True False False False True NaN False False \n",
- "4 False NaN False NaN True True False False True \n",
- "\n",
- " Unicorn Yogurt \n",
- "0 False True \n",
- "1 False NaN \n",
- "2 False NaN \n",
- "3 True True \n",
- "4 False False "
+ " Apple Corn Dill Eggs Ice cream Kidney Beans Milk Nutmeg Onion \\\n",
+ "0 False False False True False True True True True \n",
+ "1 False False True True False NaN NaN True NaN \n",
+ "2 True False False True False True True False False \n",
+ "3 False True False False False True True False False \n",
+ "4 False NaN False True NaN True False False NaN \n",
+ "\n",
+ " Unicorn Yogurt \n",
+ "0 False NaN \n",
+ "1 False NaN \n",
+ "2 False False \n",
+ "3 True True \n",
+ "4 NaN False "
]
},
- "execution_count": 30,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -695,14 +2608,14 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "C:\\Users\\User\\OneDrive\\Υπολογιστής\\KONSTANTINOS-LAPTOP-C3JUH2VS\\KONSTANTINOS\\PROJECTS\\mlxtend\\mlxtend\\frequent_patterns\\association_rules.py:172: RuntimeWarning: invalid value encountered in divide\n",
+ "/home/marcelo/anaconda3/envs/analysis/lib/python3.10/site-packages/mlxtend/frequent_patterns/association_rules.py:182: RuntimeWarning: invalid value encountered in divide\n",
" cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom)\n"
]
},
@@ -746,37 +2659,37 @@
" \n",
" \n",
" 0 | \n",
- " (Yogurt) | \n",
+ " (Eggs) | \n",
" (Kidney Beans) | \n",
- " 0.666667 | \n",
+ " 0.80 | \n",
" 1.0 | \n",
- " 0.666667 | \n",
+ " 0.75 | \n",
" 1.0 | \n",
" 1.0 | \n",
- " 0.6 | \n",
- " 0.0 | \n",
+ " 0.8 | \n",
+ " -0.05 | \n",
" inf | \n",
+ " -0.25 | \n",
+ " 0.714286 | \n",
" 0.0 | \n",
- " 0.666667 | \n",
- " 0.0 | \n",
- " 0.833333 | \n",
+ " 0.875 | \n",
"
\n",
" \n",
" 1 | \n",
- " (Eggs) | \n",
+ " (Milk) | \n",
" (Kidney Beans) | \n",
- " 0.666667 | \n",
+ " 0.75 | \n",
" 1.0 | \n",
- " 0.666667 | \n",
+ " 0.75 | \n",
" 1.0 | \n",
" 1.0 | \n",
- " 0.6 | \n",
- " 0.0 | \n",
+ " 0.8 | \n",
+ " 0.00 | \n",
" inf | \n",
+ " 0.00 | \n",
+ " 0.750000 | \n",
" 0.0 | \n",
- " 0.666667 | \n",
- " 0.0 | \n",
- " 0.833333 | \n",
+ " 0.875 | \n",
"
\n",
" \n",
"\n",
@@ -784,19 +2697,19 @@
],
"text/plain": [
" antecedents consequents antecedent support consequent support \\\n",
- "0 (Yogurt) (Kidney Beans) 0.666667 1.0 \n",
- "1 (Eggs) (Kidney Beans) 0.666667 1.0 \n",
+ "0 (Eggs) (Kidney Beans) 0.80 1.0 \n",
+ "1 (Milk) (Kidney Beans) 0.75 1.0 \n",
"\n",
- " support confidence lift representativity leverage conviction \\\n",
- "0 0.666667 1.0 1.0 0.6 0.0 inf \n",
- "1 0.666667 1.0 1.0 0.6 0.0 inf \n",
+ " support confidence lift representativity leverage conviction \\\n",
+ "0 0.75 1.0 1.0 0.8 -0.05 inf \n",
+ "1 0.75 1.0 1.0 0.8 0.00 inf \n",
"\n",
" zhangs_metric jaccard certainty kulczynski \n",
- "0 0.0 0.666667 0.0 0.833333 \n",
- "1 0.0 0.666667 0.0 0.833333 "
+ "0 -0.25 0.714286 0.0 0.875 \n",
+ "1 0.00 0.750000 0.0 0.875 "
]
},
- "execution_count": 31,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -804,7 +2717,7 @@
"source": [
"frequent_itemsets = fpgrowth(df, min_support=0.6, null_values = True, use_colnames=True)\n",
"# frequent_itemsets = fpmax(df, min_support=0.6, null_values = True, use_colnames=True)\n",
- "rules = association_rules(frequent_itemsets, df, len(df), null_values = True, metric=\"confidence\", min_threshold=0.8)\n",
+ "rules = association_rules(frequent_itemsets, len(df), df, null_values = True, metric=\"confidence\", min_threshold=0.8)\n",
"rules"
]
},
@@ -829,7 +2742,7 @@
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -843,7 +2756,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.12.7"
+ "version": "3.10.13"
},
"toc": {
"nav_menu": {},