Download as pdf or txt
Download as pdf or txt
You are on page 1of 6

KDD & Data Mining

Lab Experiment No 7 : FP Growth Algorithm

Name:-Prathamesh More

PRN:- 20200802214

Aim : To perform FP Growth Algorithm on the given dataset (market-basket-optimisation.csv) using

1. By creating functions.
2. By using NumPy and FP Growth library.

And validating the results

By Creating Function

In [ ]: import numpy as np
import pandas as pd
from future import division, print_function
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules
import warnings
warnings.filterwarnings('ignore')

In [ ]: %pip install mlxtend --upgrade


In [ ]: # Define the Node class for the FP-Tree
class Node:
def init (self, item, count, parent):
self.item = item
self.count = count
self.parent = parent
self.children = {}

def increment(self, count):


self.count += count

# Create the FP-Tree


def create_fp_tree(data, min_support):
# Count the frequency of each item in the dataset
item_counts = {}
for transaction in data:
for item in transaction:
if item in item_counts:
item_counts[item] += 1
else:
item_counts[item] = 1

# Remove infrequent items from the dataset


data = [[item for item in transaction if item_counts[item] >= min_support] for trans
Loading [MathJax]/extensions/Safe.js
# Sort the items in each transaction by their frequency
data = [sorted(transaction, key=lambda item: item_counts[item], reverse=True) for tr

# Create the root node of the FP-Tree


root = Node(None, 0, None)

# Add each transaction to the FP-Tree


for transaction in data:
current_node = root
for item in transaction:
if item in current_node.children:
child_node = current_node.children[item]
child_node.increment(1)
else:
child_node = Node(item, 1, current_node)
current_node.children[item] = child_node
current_node = child_node

return root, item_counts

In [ ]: # Define the FP-Growth algorithm


def fp_growth(data, min_support):
# Create the FP-Tree
root, item_counts = create_fp_tree(data, min_support)

# Mine the FP-Tree for frequent itemsets


itemset_list = []
mine_fp_tree(root, [], itemset_list, min_support)

# Return the frequent itemsets and their counts


return itemset_list

# Define the function to recursively mine the FP-Tree for frequent itemsets
def mine_fp_tree(node, prefix, itemset_list, min_support):
if node.count >= min_support:
itemset = prefix + [node.item]
itemset_list.append((itemset, node.count))
for child_node in node.children.values():
mine_fp_tree(child_node, prefix + [node.item], itemset_list, min_support)

In [ ]: df = pd.read_csv('/content/Market_Basket_Optimisation.csv', header=None)

transaction = []

for i in df.itertuples():
l = set(list(i))
transaction.append([i for i in l if (str(i)!="nan" and type(i)!=int)])

len(transaction)

7501
Out[ ]:

In [ ]: itemsets = fp_growth(transaction,150)
itemsets

Loading [MathJax]/extensions/Safe.js
[([None, 'mineral water'], 1788),
Out[ ]:
([None, 'mineral water', 'eggs'], 382),
([None, 'mineral water', 'spaghetti'], 341),
([None, 'mineral water', 'chocolate'], 174),
([None, 'eggs'], 966),
([None, 'eggs', 'french fries'], 184),
([None, 'eggs', 'spaghetti'], 167),
([None, 'french fries'], 714),
([None, 'spaghetti'], 691),
([None, 'cookies'], 305),
([None, 'chocolate'], 434),
([None, 'green tea'], 360),
([None, 'escalope'], 177),
([None, 'milk'], 215)]

Using Libraries in-built func

In [ ]: df = pd.read_csv("/content/Market_Basket_Optimisation.csv", names=[i for i in range(20)]


df

Out[ ]: 0 1 2 3 4 5 6 7 8 9 10 11

whole low
vegetables green cottage energy tomato green
0 shrimp almonds avocado weat yams fat
mix grapes cheese drink juice tea
flour yogurt

1 burgers meatballs eggs NaN NaN NaN NaN NaN NaN NaN NaN NaN

2 chutney NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

3 turkey avocado NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

mineral energy whole


4 milk green tea NaN NaN NaN NaN NaN NaN NaN
water bar wheat rice

... ... ... ... ... ... ... ... ... ... ... ... ...
fresh
7496 butter light mayo NaN NaN NaN NaN NaN NaN NaN NaN NaN
bread

frozen french green


7497 burgers eggs magazines NaN NaN NaN NaN NaN NaN
vegetables fries tea

7498 chicken NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

7499 escalope green tea NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

frozen yogurt low fat NaN NaN NaN NaN NaN NaN NaN NaN
7500 eggs
smoothie cake yogurt

7501 rows × 20 columns

In [ ]: transaction = []

for i in df.itertuples():
l = set(list(i))
transaction.append([i for i in l if (str(i)!="nan" and type(i)!=int)])

len(transaction)

7501
Out[ ]:

In [ ]: t = TransactionEncoder()
t_arr = t.fit_transform(transaction)

data = pd.DataFrame(t_arr, columns=t.columns_)


data
Loading [MathJax]/extensions/Safe.js
Out[ ]: antioxydant babies barbecue black
asparagus almonds asparagus avocado bacon blueberries ...
juice food sauce tea

0 False True True False True False False False False False ...

1 False False False False False False False False False False ...

2 False False False False False False False False False False ...

3 False False False False True False False False False False ...

4 False False False False False False False False False False ...

... ... ... ... ... ... ... ... ... ... ... ...

7496 False False False False False False False False False False ...

7497 False False False False False False False False False False ...

7498 False False False False False False False False False False ...

7499 False False False False False False False False False False ...

7500 False False False False False False False False False False ...

7501 rows × 120 columns

In [ ]: res = fpgrowth(data, min_support=0.05, use_colnames=True)


res

Loading [MathJax]/extensions/Safe.js
Out[ ]: support itemsets

0 0.238368 (mineral water)

1 0.132116 (green tea)

2 0.076523 (low fat yogurt)

3 0.071457 (shrimp)

4 0.065858 (olive oil)

5 0.063325 (frozen smoothie)

6 0.179709 (eggs)

7 0.087188 (burgers)

8 0.062525 (turkey)

9 0.129583 (milk)

10 0.058526 (whole wheat rice)

11 0.170911 (french fries)

12 0.050527 (soup)

13 0.174110 (spaghetti)

14 0.095321 (frozen vegetables)

15 0.080389 (cookies)

16 0.051060 (cooking oil)

17 0.163845 (chocolate)

18 0.059992 (chicken)

19 0.068391 (tomatoes)

20 0.095054 (pancakes)

21 0.052393 (grated cheese)

22 0.098254 (ground beef)

23 0.079323 (escalope)

24 0.081056 (cake)

25 0.050927 (mineral water, eggs)

26 0.059725 (mineral water, spaghetti)

27 0.052660 (mineral water, chocolate)

In [ ]: res = association_rules(res,metric="confidence", min_threshold=0.06)


res

Loading [MathJax]/extensions/Safe.js
Out[ ]: antecedent consequent
antecedents consequents support confidence lift leverage conviction
support support

(mineral
0 (eggs) 0.238368 0.179709 0.050927 0.213647 1.188845 0.008090 1.043158
water)

(mineral
1 (eggs) 0.179709 0.238368 0.050927 0.283383 1.188845 0.008090 1.062815
water)
(mineral
2 (spaghetti) 0.238368 0.174110 0.059725 0.250559 1.439085 0.018223 1.102008
water)

(mineral
3 (spaghetti) 0.174110 0.238368 0.059725 0.343032 1.439085 0.018223 1.159314
water)
(mineral
4 (chocolate) 0.238368 0.163845 0.052660 0.220917 1.348332 0.013604 1.073256
water)

(mineral
5 (chocolate) 0.163845 0.238368 0.052660 0.321400 1.348332 0.013604 1.122357
water)

Loading [MathJax]/extensions/Safe.js

You might also like