10 examples of 'nltk stopwords' in Python

Every line of 'nltk stopwords' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
5def stopwords_nltk():
6
7 return stopwords.words('english')
73def remove_stopwords(tokens, language):
74 """
75 去除中文结束词
76 """
77 from .stopwords import stopwords
78
79 # 删除结束词
80 tokens = set(tokens) - set(stopwords)
81
82 return tokens
433def stop_words_stem(self, stop_words=None):
434 if stop_words is not None:
435 stop_words_ = stop_words
436 else:
437 stop_words_ = self.stop_words
438 return list(set([stem(word) for word in stop_words_]))
53def remove_stopwords(tokens, stopwords=STOPWORDS):
54 """Remove stopwords using list from `gensim.parsing.preprocessing.STOPWORDS`.
55
56 Parameters
57 ----------
58 tokens : iterable of str
59 Sequence of tokens.
60 stopwords : iterable of str, optional
61 Sequence of stopwords
62
63 Returns
64 -------
65 list of str
66 List of tokens without `stopwords`.
67
68 """
69 return [token for token in tokens if token not in stopwords]
231def _remove_stop_words(phrase):
232 while len(phrase) > 0 and (phrase[0].is_stop
233 or str(phrase[0]).strip().lower() in Stopwords.get_words()):
234 phrase = phrase[1:]
235 while len(phrase) > 0 and (phrase[-1].is_stop
236 or str(phrase[-1]).strip().lower() in Stopwords.get_words()):
237 phrase = phrase[:-1]
238 return phrase
541@staticmethod
542def tokenize(text):
543 return [word for word in word_tokenize(text.lower()) if word not in stopwords.words('english')]
35@staticmethod
36def removeStopWords(words):
37 file = open("stopwords.dic")
38 stopwords = []
39 for line in file.readlines():
40 line = line.strip();
41 if len(line) > 0:
42 stopwords.append(line)
43 file.close()
44 rwords = []
45 for word in words:
46 flag = True
47 for stopword in stopwords:
48 #if word.encode('utf-8') == stopword.encode('utf-8'):
49 if word == stopword:
50 flag = False
51 break
52 if flag and len(word.strip()) > 0:
53 rwords.append(word)
54 return rwords
202@property
203def cleanTokens(self, removeStopwords=True):
204 clean = [token for token in self.tokens if token not in punctuation]
205 clean = [token.lower() for token in clean]
206 clean = [token for token in clean if token.isalpha()]
207 if removeStopwords:
208 clean = self.removeStopwords(clean)
209 return clean
6def stopwords_filter(string):
7
8 text = string
9 # strip tashkeel because the stop words list contains voweled words
10 text = araby.strip_tashkeel(text)
11 word_tokenizer = WordTokenizer("arabic")
12 tokens = word_tokenizer.tokenize(text)
13
14 # filter stop words
15 no_stops = [w for w in tokens if w not in ARABIC_STOPS]
16
17 return no_stops
9def _load_stopwords():
10 with open('./files/stopword.txt', 'r', encoding='utf-8') as fr:
11 lines = [line.strip('\r\n ') for line in fr]
12 return lines

Related snippets