3
3
4
4
# # Meet Robo: your friend
5
5
6
- import nltk
6
+ import io
7
+ import random
8
+ import string # to process standard python strings
7
9
import warnings
8
- warnings .filterwarnings ("ignore" )
9
10
10
- # nltk.download() # for downloading packages
11
+ from sklearn .feature_extraction .text import TfidfVectorizer
12
+ from sklearn .metrics .pairwise import cosine_similarity
11
13
12
14
import numpy as np
13
- import random
14
- import string # to process standard python strings
15
15
16
+ import nltk
17
+ from nltk .stem import WordNetLemmatizer
18
+
19
+ warnings .filterwarnings ("ignore" )
16
20
17
- f = open ('chatbot.txt' ,'r' ,errors = 'ignore' )
18
- raw = f .read ()
19
- raw = raw .lower ()# converts to lowercase
21
+ nltk .download ('popular' , quiet = True ) # for downloading packages
22
+ # Includes the following already.
20
23
#nltk.download('punkt') # first-time use only
21
24
#nltk.download('wordnet') # first-time use only
25
+
26
+ with open ('chatbot.txt' ,'r' , encoding = 'utf8' , errors = 'ignore' ) as fin :
27
+ raw = fin .read ().lower ()
28
+
22
29
sent_tokens = nltk .sent_tokenize (raw )# converts to list of sentences
23
30
word_tokens = nltk .word_tokenize (raw )# converts to list of words
24
31
29
36
word_tokens [:5 ]
30
37
31
38
32
- lemmer = nltk . stem . WordNetLemmatizer ()
39
+ lemmer = WordNetLemmatizer ()
33
40
def LemTokens (tokens ):
34
41
return [lemmer .lemmatize (token ) for token in tokens ]
35
42
remove_punct_dict = dict ((ord (punct ), None ) for punct in string .punctuation )
@@ -50,8 +57,6 @@ def greeting(sentence):
50
57
return random .choice (GREETING_RESPONSES )
51
58
52
59
53
- from sklearn .feature_extraction .text import TfidfVectorizer
54
- from sklearn .metrics .pairwise import cosine_similarity
55
60
56
61
57
62
# Generating response
0 commit comments