10 examples of 'nltk.download('stopwords')' in Python

Every line of 'nltk.download('stopwords')' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
11def downloadNLTKConll2000Corpus():
12 logger = logging.getLogger('collective.classification')
13 logger.info("Downloading NLTK's conll2000 corpus")
14 download('conll2000')
5def stopwords_nltk():
6
7 return stopwords.words('english')
7@click.command('download')
8def download():
9 '''
10 Install required libraries.
11 Note this library will install nltk dependencies into your
12 user directory.
13 '''
14
15 click.echo("Installing nltk packages into your user directories in " +
16 "the following order of existence (first found):\n" +
17 '\n'.join(nltk.data.path))
18
19 extensions = [("taggers", "averaged_perceptron_tagger"),
20 ("corpora", "wordnet"),
21 ("tokenizers", "punkt")]
22
23 missing = check_packages_exist(extensions)
24
25 for ext_tuple in missing:
26 nltk.download(ext_tuple[1])
129def _download(self, data_sets=DATA_SETS):
130 """
131 Download the given `data_sets`
132
133 Args:
134 data_sets: a list of the datasets to download
135 """
136
137 for data_set_type, data_set_name in data_sets:
138 remote_file = data_set_name + SpeechCorpusProvider.SET_FILE_EXTENSION
139 self._download_if_not_exists(remote_file)
40def load_glove(data_dir_path=None, embedding_dim=None):
41 """
42 Load the glove models (and download the glove model if they don't exist in the data_dir_path
43 :param data_dir_path: the directory path on which the glove model files will be downloaded and store
44 :param embedding_dim: the dimension of the word embedding, available dimensions are 50, 100, 200, 300, default is 100
45 :return: the glove word embeddings
46 """
47 if embedding_dim is None:
48 embedding_dim = 100
49
50 glove_file_path = data_dir_path + "/glove.6B." + str(embedding_dim) + "d.txt"
51 download_glove(data_dir_path, glove_file_path)
52 _word2em = {}
53 file = open(glove_file_path, mode='rt', encoding='utf8')
54 for line in file:
55 words = line.strip().split()
56 word = words[0]
57 embeds = np.array(words[1:], dtype=np.float32)
58 _word2em[word] = embeds
59 file.close()
60 return _word2em
8@mock.patch("urllib.request.urlretrieve")
9def test_glove_6b_50(mock_urlretrieve):
10 directory = 'tests/_test_data/glove/'
11
12 # Make sure URL has a 200 status
13 mock_urlretrieve.side_effect = urlretrieve_side_effect
14
15 # Attempt to parse a subset of GloVe
16 vectors = GloVe(name="6B", dim="50", cache=directory)
17 assert len(vectors['the']) == 50
18
19 # Test with the unknown characters
20 assert len(vectors['漢字']) == 50
21
22 # Clean up
23 os.remove(directory + 'glove.6B.50d.txt.pt')
12def load_stopwords():
13 """Loads the stopwords.txt file into an array"""
14 print('loading stopwords')
15 with open('stopwords.txt') as f:
16 global STOP_WORDS
17 STOP_WORDS = np.array(f.read().splitlines())
18 print('loaded stopwords')
9def _load_stopwords():
10 with open('./files/stopword.txt', 'r', encoding='utf-8') as fr:
11 lines = [line.strip('\r\n ') for line in fr]
12 return lines
52def demo():
53 from nltk.corpora import webtext
54 from itertools import islice
55
56 for word in islice(webtext.raw('wine'), 0, 100):
57 print word,
21def get_glove_vectors(vocab):
22 """
23 Extracts glove vectors from seed file only for words present in vocab.
24 """
25 print 'Looking for GLOVE seeds'
26 glove_vectors = {}
27 found = 0
28 with open(GLOVE_FILE, 'r') as glove_file:
29 for i, line in enumerate(glove_file):
30 utils.write_status(i + 1, 0)
31 tokens = line.strip().split()
32 word = tokens[0]
33 if vocab.get(word):
34 vector = [float(e) for e in tokens[1:]]
35 glove_vectors[word] = np.array(vector)
36 found += 1
37 print '\n'
38 return glove_vectors

Related snippets