X = []
for data in dataset:
vector = []
for word in freq_words:
if word in nltk.word_tokenize(data):
vector.append(1)
else:
vector.append(0)
X.append(vector)
X = np.asarray(X)
# Creating the Bag of Words model
word2count = {}
for data in dataset:
words = nltk.word_tokenize(data)
for word in words:
if word not in word2count.keys():
word2count[word] = 1
else:
word2count[word] += 1