Skip to content

Commit

Permalink
Merge pull request #360 from Tarun-Sreepada/main
Browse files Browse the repository at this point in the history
removed printing code and testing code and lowmemory(bitset mining)
  • Loading branch information
vanithakattumuri authored Apr 10, 2024
2 parents 1a8c9de + 3f0fe7b commit 6cb318a
Showing 1 changed file with 60 additions and 93 deletions.
153 changes: 60 additions & 93 deletions PAMI/frequentPattern/basic/Apriori.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,7 @@ def _convert(self, value: Union[int, float, str]) -> Union[int, float]:
else:
value = int(value)
return value

def _lowMemory(self) -> None:
print()


@deprecated("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.")
def startMine(self) -> None:
Expand Down Expand Up @@ -264,67 +262,67 @@ def bitPacker(self, data, maxIndex):

return packed_bits

# @profile
def mineLowMemory(self) -> None:
"""
Frequent pattern mining process will start from here
# Bitset implementation
"""
self._startTime = _ab._time.time()

self._Database = []

self._creatingItemSets()

self._minSup = self._convert(self._minSup)

items = {}
index = 0
for line in self._Database:
for item in line:
if tuple([item]) in items:
items[tuple([item])].append(index)
else:
items[tuple([item])] = [index]
index += 1

# sort by length in descending order
items = dict(sorted(items.items(), key=lambda x: len(x[1]), reverse=True))
cands = []
for key in items:
if len(items[key]) >= self._minSup:
self._finalPatterns[key] = len(items[key])
cands.append(key)
items[key] = self.bitPacker(items[key], index)
else:
break

while cands:
newCands = []
for i in range(len(cands)):
for j in range(i + 1, len(cands)):
if cands[i][:-1] == cands[j][:-1]:
newCand = tuple(cands[i] + tuple([cands[j][-1]]))
intersection = items[tuple([newCand[0]])]
for k in range(1, len(newCand)):
intersection &= items[tuple([newCand[k]])]
count = int.bit_count(intersection)
if count >= self._minSup:
# items[newCand] = intersection
newCands.append(newCand)
self._finalPatterns[newCand] = count
else:
break
# # @profile
# def mineLowMemory(self) -> None:
# """
# Frequent pattern mining process will start from here
# # Bitset implementation
# """
# self._startTime = _ab._time.time()

# self._Database = []

# self._creatingItemSets()

# self._minSup = self._convert(self._minSup)

# items = {}
# index = 0
# for line in self._Database:
# for item in line:
# if tuple([item]) in items:
# items[tuple([item])].append(index)
# else:
# items[tuple([item])] = [index]
# index += 1

# # sort by length in descending order
# items = dict(sorted(items.items(), key=lambda x: len(x[1]), reverse=True))
# cands = []
# for key in items:
# if len(items[key]) >= self._minSup:
# self._finalPatterns[key] = len(items[key])
# cands.append(key)
# items[key] = self.bitPacker(items[key], index)
# else:
# break

# while cands:
# newCands = []
# for i in range(len(cands)):
# for j in range(i + 1, len(cands)):
# if cands[i][:-1] == cands[j][:-1]:
# newCand = tuple(cands[i] + tuple([cands[j][-1]]))
# intersection = items[tuple([newCand[0]])]
# for k in range(1, len(newCand)):
# intersection &= items[tuple([newCand[k]])]
# count = int.bit_count(intersection)
# if count >= self._minSup:
# # items[newCand] = intersection
# newCands.append(newCand)
# self._finalPatterns[newCand] = count
# else:
# break

cands = newCands
# cands = newCands

self._endTime = _ab._time.time()
process = _ab._psutil.Process(_ab._os.getpid())
self._memoryUSS = float()
self._memoryRSS = float()
self._memoryUSS = process.memory_full_info().uss
self._memoryRSS = process.memory_info().rss
print("Frequent patterns were generated successfully using Apriori algorithm ")
# self._endTime = _ab._time.time()
# process = _ab._psutil.Process(_ab._os.getpid())
# self._memoryUSS = float()
# self._memoryRSS = float()
# self._memoryUSS = process.memory_full_info().uss
# self._memoryRSS = process.memory_info().rss
# print("Frequent patterns were generated successfully using Apriori algorithm ")

def mine(self) -> None:
"""
Expand Down Expand Up @@ -501,34 +499,3 @@ def printResults(self) -> None:
print("Total ExecutionTime in ms:", _ap.getRuntime())
else:
print("Error! The number of input parameters do not match the total number of parameters provided")


minUtils = [150]

for minUtil in minUtils:
file = "/Users/tarunsreepada/Downloads/Transactional_T10I4D100K.csv"
obj = Apriori(file, minUtil, sep='\t')
obj.mineLowMemory()
# obj.mine()
print("Total number of Frequent Patterns:", len(obj.getPatterns()))
print("Total Memory in USS:", obj.getMemoryUSS())
print("Total Memory in RSS", obj.getMemoryRSS())
print("Total ExecutionTime in seconds:", obj.getRuntime())

# print()

# obj.mine()
# print("Total number of Frequent Patterns:", len(obj.getPatterns()))
# print("Total Memory in USS:", obj.getMemoryUSS())
# print("Total Memory in RSS", obj.getMemoryRSS())
# print("Total ExecutionTime in seconds:", obj.getRuntime())

# print()

obj = Apriori(file, minUtil, sep='\t')
obj.mine()
# obj.mine()
print("Total number of Frequent Patterns:", len(obj.getPatterns()))
print("Total Memory in USS:", obj.getMemoryUSS())
print("Total Memory in RSS", obj.getMemoryRSS())
print("Total ExecutionTime in seconds:", obj.getRuntime())

0 comments on commit 6cb318a

Please sign in to comment.