-
Notifications
You must be signed in to change notification settings - Fork 0
/
classify.py
40 lines (28 loc) · 1.4 KB
/
classify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import pandas as pd
import nltk
import numpy as np
from nltk.tokenize import word_tokenize
from nltk import tokenize
csvdata = pd.read_csv(r'training_data.csv', skipinitialspace=True,delimiter=",")
#Convert data into a numpy array
csvdata1 = np.array(csvdata)
train = []
#Put contents of numpy array into empty list
train.extend(csvdata1)
######################################################################
# Search Path
######################################################################
def predict(line):
try:
wordlist = set(word.lower() for statement in train for word in word_tokenize(statement[0]))
x = [({word: (word in word_tokenize(x[0])) for word in wordlist}, x[1]) for x in train]
classifier = nltk.NaiveBayesClassifier.train(x)
test_data = line
if line.__len__() > 1:
# If there is a string, return if string is labeled as positive or negative
test_data_features = {word.lower(): (word in word_tokenize(test_data.lower())) for word in wordlist}
label = (classifier.classify(test_data_features))
print("label:",label)
except:
print("Failed.")
predict("I confirmed who will attend the FLL outreach, still need to confirm who will be able to go and get more specific response from Liquid Oxygen. The LO team wants to do some outreach events also, so we decided that they could do this event with us.")