Skip to content

Commit cf252cf

Browse files
authored
Merge pull request parulnith#1 from alvations/patch-1
Specify encoding when opening file.
2 parents 486eca8 + 2009804 commit cf252cf

File tree

1 file changed

+16
-11
lines changed

1 file changed

+16
-11
lines changed

chatbot.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,29 @@
33

44
# # Meet Robo: your friend
55

6-
import nltk
6+
import io
7+
import random
8+
import string # to process standard python strings
79
import warnings
8-
warnings.filterwarnings("ignore")
910

10-
# nltk.download() # for downloading packages
11+
from sklearn.feature_extraction.text import TfidfVectorizer
12+
from sklearn.metrics.pairwise import cosine_similarity
1113

1214
import numpy as np
13-
import random
14-
import string # to process standard python strings
1515

16+
import nltk
17+
from nltk.stem import WordNetLemmatizer
18+
19+
warnings.filterwarnings("ignore")
1620

17-
f=open('chatbot.txt','r',errors = 'ignore')
18-
raw=f.read()
19-
raw=raw.lower()# converts to lowercase
21+
nltk.download('popular', quiet=True) # for downloading packages
22+
# Includes the following already.
2023
#nltk.download('punkt') # first-time use only
2124
#nltk.download('wordnet') # first-time use only
25+
26+
with open('chatbot.txt','r', encoding='utf8', errors ='ignore') as fin:
27+
raw = fin.read().lower()
28+
2229
sent_tokens = nltk.sent_tokenize(raw)# converts to list of sentences
2330
word_tokens = nltk.word_tokenize(raw)# converts to list of words
2431

@@ -29,7 +36,7 @@
2936
word_tokens[:5]
3037

3138

32-
lemmer = nltk.stem.WordNetLemmatizer()
39+
lemmer = WordNetLemmatizer()
3340
def LemTokens(tokens):
3441
return [lemmer.lemmatize(token) for token in tokens]
3542
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
@@ -50,8 +57,6 @@ def greeting(sentence):
5057
return random.choice(GREETING_RESPONSES)
5158

5259

53-
from sklearn.feature_extraction.text import TfidfVectorizer
54-
from sklearn.metrics.pairwise import cosine_similarity
5560

5661

5762
# Generating response

0 commit comments

Comments
 (0)