mirror of
https://github.com/RYDE-WORK/MediaCrawler.git
synced 2026-02-02 22:43:29 +08:00
Merge pull request #370 from Jasonyang2014/wordcloud-filter-whitespace
词云过滤空白字符
This commit is contained in:
commit
85727da1d3
@ -27,7 +27,7 @@ class AsyncWordCloudGenerator:
|
|||||||
|
|
||||||
async def generate_word_frequency_and_cloud(self, data, save_words_prefix):
|
async def generate_word_frequency_and_cloud(self, data, save_words_prefix):
|
||||||
all_text = ' '.join(item['content'] for item in data)
|
all_text = ' '.join(item['content'] for item in data)
|
||||||
words = [word for word in jieba.lcut(all_text) if word not in self.stop_words]
|
words = [word for word in jieba.lcut(all_text) if word not in self.stop_words and len(word.strip()) > 0]
|
||||||
word_freq = Counter(words)
|
word_freq = Counter(words)
|
||||||
|
|
||||||
# Save word frequency to file
|
# Save word frequency to file
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user