-
Notifications
You must be signed in to change notification settings - Fork 285
Expand file tree
/
Copy pathdemo_output_data.py
More file actions
23 lines (21 loc) · 808 Bytes
/
demo_output_data.py
File metadata and controls
23 lines (21 loc) · 808 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import pandas as pd
from scattertext import SampleCorpora, whitespace_nlp_with_sentences, produce_scattertext_explorer
from scattertext.CorpusFromPandas import CorpusFromPandas
from scattertext.Scalers import dense_rank
convention_df = SampleCorpora.ConventionData2012.get_data()
corpus = CorpusFromPandas(convention_df,
category_col='party',
text_col='text',
nlp=whitespace_nlp_with_sentences).build()
raw_data = produce_scattertext_explorer(
corpus,
category='democrat',
category_name='Democratic',
not_category_name='Republican',
minimum_term_frequency=5,
pmi_threshold_coefficient=8,
transform=dense_rank,
return_data=True,
)
df = pd.DataFrame(raw_data['data'])[['term', 'x', 'y']]
print(df)