# IMPORT DEPENDENCIES
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import multiprocessing
import pyLDAvis.gensim_models
# READ DATA
song_data = pd.read_csv('../content/merged_finaltop100_revised.csv')
# EXCLUDE ROWS WITH NULL VALUES
song_data = song_data.dropna()
song_data
Unnamed: 0 | track_id | artist_names | track_name | source | rank | weeks_on_chart | streams | country | danceability | ... | duration_ms | time_signature | album_release_date | lyrics | lyrics_trans | continent | iso_alpha3 | len_words_orig | len_words_trans | lyrics_clean | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0yLdNVWF3Srea0uzk55zFn | Miley Cyrus | Flowers | Columbia | 1 | 5 | 124198 | United Arab Emirates | 0.707 | ... | 200455.0 | 4.0 | 2023-01-13 | We were good, we were gold\nKinda dream that c... | we were good we were gold kinda dream that can... | Asia | ARE | 334 | 334 | good gold dream sell right til build home watc... |
1 | 1 | 1Qrg8KqiBpW07V7PNxwwwL | SZA | Kill Bill | Top Dawg Entertainment/RCA Records | 2 | 10 | 106927 | United Arab Emirates | 0.644 | ... | 153947.0 | 4.0 | 2022-12-08 | I'm still a fan even though I was salty\nHate ... | im still a fan even though i was salty hate to... | Asia | ARE | 362 | 362 | fan even though salty hate see broad know happ... |
2 | 2 | 6AQbmUe0Qwf5PZnt4HmTXv | PinkPantheress, Ice Spice | Boy's a liar Pt. 2 | Warner Records | 3 | 2 | 83627 | United Arab Emirates | 0.696 | ... | 131013.0 | 4.0 | 2023-02-03 | Take a look inside your heart\nIs there any ro... | take a look inside your heart is there any roo... | Asia | ARE | 372 | 372 | take look inside heart room room would hold br... |
3 | 3 | 0WtM2NBVQNNJLh6scP13H8 | Rema, Selena Gomez | Calm Down (with Selena Gomez) | Mavin Records / Jonzing World | 4 | 25 | 79714 | United Arab Emirates | 0.801 | ... | 239318.0 | 4.0 | 2022-08-25 | Vibez\nOh, no\nAnother banger\nBaby, calm down... | vibez oh no another banger baby calm down calm... | Asia | ARE | 495 | 495 | another banger baby calm calm girl body put he... |
4 | 4 | 2dHHgzDwk4BJdRwy9uXhTO | Metro Boomin, The Weeknd, 21 Savage | Creepin' (with The Weeknd & 21 Savage) | Republic Records | 5 | 11 | 79488 | United Arab Emirates | 0.715 | ... | 221520.0 | 4.0 | 2022-12-02 | Ooh, ooh-ooh\nOoh-ooh-ooh, ooh, ooh-ooh (Just ... | ooh oohooh oohoohooh ooh oohooh just cant beli... | Asia | ARE | 458 | 456 | believe man want somebody say saw person kiss ... |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
7295 | 7295 | 7ErtOGQ9DwyQa3lwP77j4u | Ruger | Asiwaju | Columbia | 96 | 4 | 54026 | South Africa | 0.727 | ... | 216000.0 | 4.0 | 2022-11-14 | Cook that thing\nMan getting high till I fade ... | cook that thing man getting high till i fade o... | Africa | ZAF | 591 | 591 | cook thing man get high fade pure way hullabal... |
7296 | 7296 | 4EI8VuxUuIHKfafU72emqz | Mariah Carey | We Belong Together | Island Records | 97 | 50 | 53828 | South Africa | 0.840 | ... | 201400.0 | 4.0 | 2005 | Sweet love, yeah\nI didn't mean it when I said... | sweet love yeah i didnt mean it when i said i ... | Africa | ZAF | 497 | 497 | sweet love yeah didnt mean say didnt love tigh... |
7297 | 7297 | 3Puq6i4xIRH4lrPvJxIC83 | Deep London, Nkosazana Daughter, Murumba Pitch... | Piano Ngijabulise | Cycad Wave | 98 | 14 | 53752 | South Africa | 0.835 | ... | 416037.0 | 4.0 | 2022-09-30 | Okokuqala ukuhlakanipha\nUkumesaba uJehova\nAy... | first is wisdom to fear jehovah they dont hear... | Africa | ZAF | 256 | 414 | first wisdom fear hear child piano first wisdo... |
7298 | 7298 | 7DQMBUK4oX9gV1qIzpoRz6 | Aymos | Mama | DJs Production | 99 | 14 | 53733 | South Africa | 0.802 | ... | 450304.0 | 4.0 | 2022-08-12 | Mama mama mama mama\nMama mama mama mama\nMama... | mother mother mother mother mother mother moth... | Africa | ZAF | 410 | 466 | mother mother mother mother mother mother moth... |
7299 | 7299 | 5emv0gRnEk4dVXWETqewAC | TOSS, Young Stunna, Tyler ICU | Tetema | Toss Indabakabani | 100 | 2 | 53252 | South Africa | 0.835 | ... | 378979.0 | 4.0 | 2023-02-03 | Alright\nAsambe, ishuu\nUm'sebenzi ngiyawazi\n... | okay lets go dude i know the work im going to ... | Africa | ZAF | 414 | 519 | let dude know work go closet go break bone saw... |
6809 rows × 30 columns
# GET NUMBER OF UNIQUE TOKENS
text = ' '.join(song_data['lyrics_clean']).split()
new_set = set(text)
print("# of unique tokens:", len(new_set))
# of unique tokens: 11006
!pip install bertopic
# IMPORT DEPENDENCIES
from bertopic import BERTopic
from sentence_transformers import SentenceTransformer
from umap import UMAP
from sklearn.feature_extraction.text import CountVectorizer
from gensim.models import CoherenceModel
from gensim import corpora
# CONVERT TO LIST
doc = song_data['lyrics_clean'].tolist()
# PREP EMBEDDINGS
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = sentence_model.encode(doc, show_progress_bar=False)
# TRAIN BERTopic MODEL
#topic_model = BERTopic().fit(doc, embeddings)
For topic modeling, UMAP can be used to reduce the dimensionality of the topic vectors generated by BERTopic. Hence, it helps improve BERTopic accuracy by reducing the dimensionality of the high-dimensional topic embeddings, which can help improve the clustering of similar topics together, speed up the processing time of BERTopic, and reduce noise.
# DEFINE UMAP
umap_model = UMAP(n_neighbors=10, n_components=20,
min_dist=0.0, metric='cosine', random_state=0)
# DEFINE VECTORIZER
vectorizer_model = CountVectorizer(stop_words="english")
# DEFINE BERTOPIC MODEL
bertopic_model = BERTopic(umap_model=umap_model,
verbose=True,
nr_topics=20,
n_gram_range=(1, 2),
top_n_words=20 ,
vectorizer_model=vectorizer_model,
calculate_probabilities=True,
language="English"
)
%%time
#Run BERTopic model
topics, probabilities = bertopic_model.fit_transform(doc, embeddings)
2023-03-10 04:08:28,316 - BERTopic - Reduced dimensionality 2023-03-10 04:09:07,529 - BERTopic - Clustered reduced embeddings 2023-03-10 04:09:10,922 - BERTopic - Reduced number of topics from 227 to 20
CPU times: user 1min 23s, sys: 1.25 s, total: 1min 24s Wall time: 1min 25s
# LIST OF TOPICS
bertopic_model.get_topic_info()
Topic | Count | Name | |
---|---|---|---|
0 | -1 | 1551 | -1_like_want_know_come |
1 | 0 | 4229 | 0_know_like_yeah_love |
2 | 1 | 141 | 1_main_mein_tera_mere |
3 | 2 | 117 | 2_dey_calm_wan_girl |
4 | 3 | 115 | 3_heat_wave_harry_june |
5 | 4 | 109 | 4_guy_like_happen_fret |
6 | 5 | 97 | 5_round_everybody_problem_teatime |
7 | 6 | 79 | 6_mature_drug_idea_therapist |
8 | 7 | 64 | 7_shake_dum_dance_hand |
9 | 8 | 60 | 8_dick_easy_hoe_spiteful |
10 | 9 | 45 | 9_wish_ditto_know_say |
11 | 10 | 42 | 10_lover_need_golden_hour |
12 | 11 | 32 | 11_reserve_desperate_adventure_undress |
13 | 12 | 30 | 12_deep_cortina_rust_ford |
14 | 13 | 27 | 13_hopeless_hold_safe_place |
15 | 14 | 19 | 14_spit_face_phase_love |
16 | 15 | 17 | 15_snap_heart_cause_need |
17 | 16 | 13 | 16_gay_topless_man_swing |
18 | 17 | 11 | 17_whip_shut_area_shutdown |
19 | 18 | 11 | 18_umbrella_rain_shine_stand |
# VISUALIZE RESULTS
#bertopic_model.visualize_topics()
fig = bertopic_model.visualize_topics()
fig.write_html("intdist.html")
Figure 3.4: BERTopic Intertopic Distance Map
Most of the lyrics appear to belong to topic 0 which appears to represent the topic about 'love'. This makes sense because the data contains popular songs for the week 2.16.2023 during valentines day season.
# PLOT BAR CHART
#bertopic_model.visualize_barchart()
fig = bertopic_model.visualize_barchart()
fig.write_html("barbert.html")
Figure 3.5: BERTopic Topic Word Scores
Coherence Scores
# PREPROCESS DOCS
documents = pd.DataFrame({"Document": doc,
"ID": range(len(doc)),
"Topic": topics})
documents_per_topic = documents.groupby(['Topic'], as_index=False).agg({'Document': ' '.join})
cleaned_docs = bertopic_model._preprocess_text(documents_per_topic.Document.values)
# EXTRACT VECTORIZER AND ANALYZER FROM BERTopic
vectorizer = bertopic_model.vectorizer_model
analyzer = vectorizer.build_analyzer()
# EXTRACT FEATURES FOR TOPIC COHERENCE EVALUATION
words = vectorizer.get_feature_names_out()
tokens = [analyzer(doc) for doc in cleaned_docs]
dictionary = corpora.Dictionary(tokens)
corpus = [dictionary.doc2bow(token) for token in tokens]
topic_words = [ [words for words, _ in bertopic_model.get_topic(topic)] for topic in range(len(set(topics))-2)]
# EVALUATE
coherence_model = CoherenceModel(topics=topic_words,
texts=tokens,
corpus=corpus,
dictionary=dictionary,
coherence='c_v')
coherence = coherence_model.get_coherence()
coherence
0.5787268298101792
# EVALUATE
coherence_model = CoherenceModel(topics=topic_words,
texts=tokens,
corpus=corpus,
dictionary=dictionary,
coherence='u_mass')
coherence = coherence_model.get_coherence()
coherence
-0.22810554022264162
The coherence scores obtained using BERTopic are higher than the ones computed for LDA. This means that the BERTopic model has more defined and interpretable topics than the LDA model. Comparing BERTopic's intertopic distance plot to the same plot generated for LDA, we can easily notice that the similar topics are more clustered together in BERTopic.
!pip install top2vec
# IMPORT DEPENDENCIES
from top2vec import Top2Vec
# CONVERT TO LIST OF LISTS
doc= song_data["lyrics_clean"].values.tolist()
!pip install top2vec[sentence_encoders]
%%time
# TRAIN WITH SLOWEST LEARNING RATE (DEEP-LEARN)
top2vec = Top2Vec(doc, embedding_model="universal-sentence-encoder",
speed="deep-learn", ngram_vocab=True,
ngram_vocab_args={"connector_words": "phrases.ENGLISH_CONNECTOR_WORDS"},
workers=multiprocessing.cpu_count())
# TOTAL NUMBER OF TOPICS
top2vec.get_num_topics()
125
# GET TOPICS
top2vec.get_topics()
(array([['learn ballad', 'cry beg', 'poem fall', ..., 'wait patiently', 'hug everywhere', 'sorrow begin'], ['poppin', 'jiggy jiggy', 'song poppin', ..., 'actin naughty', 'grind chop', 'lullaby choke'], ['cry beg', 'wake desire', 'lullaby choke', ..., 'deserve lover', 'suck suck', 'suck smash'], ..., ['bad memory', 'forgetfulness happen', 'will liquor', ..., 'bleed death', 'hard liquor', 'kiss disaster'], ['safe safe', 'keep safe', 'safe sound', ..., 'teardrop fall', 'feat dean', 'run bulletproof'], ['remember yesterday', 'send music', 'mourn mourn', ..., 'realize well', 'heart glow', 'amor']], dtype='<U25'), array([[0.30762494, 0.2911715 , 0.28021342, ..., 0.21185015, 0.21178201, 0.2109958 ], [0.27774128, 0.27477336, 0.25841653, ..., 0.1986558 , 0.19845141, 0.19789112], [0.24927759, 0.24156025, 0.24144508, ..., 0.19047771, 0.1903212 , 0.1900802 ], ..., [0.29280716, 0.27520993, 0.2652002 , ..., 0.20401493, 0.20398518, 0.20319232], [0.3177513 , 0.3144007 , 0.2839837 , ..., 0.18589859, 0.1857444 , 0.18569712], [0.23880085, 0.23874806, 0.22661242, ..., 0.1822575 , 0.18217489, 0.18139187]], dtype=float32), array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122]))
# GET TOPIC SIZE (# OF DOCS IN EACH TOPIC)
topic_sizes, topic_nums = top2vec.get_topic_sizes()
topic_sizes
array([693, 676, 223, 175, 140, 133, 120, 114, 111, 110, 106, 93, 89, 85, 83, 80, 75, 74, 74, 73, 70, 69, 68, 67, 67, 67, 66, 58, 56, 54, 54, 53, 52, 52, 50, 49, 49, 48, 48, 46, 46, 45, 45, 45, 42, 42, 41, 40, 40, 39, 38, 38, 38, 37, 35, 34, 34, 34, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 30, 29, 29, 29, 29, 29, 29, 29, 28, 27, 27, 26, 26, 26, 26, 25, 25, 25, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, 17, 16, 16, 15, 14])
# GET TOPIC SIZE (# OF DOCS IN EACH TOPIC)
for topic_size, topic_num in zip(topic_sizes[:10], topic_nums[:10]):
print(f"Topic Num {topic_num} has {topic_size} documents.")
Topic Num 0 has 693 documents. Topic Num 1 has 676 documents. Topic Num 2 has 223 documents. Topic Num 3 has 175 documents. Topic Num 4 has 140 documents. Topic Num 5 has 133 documents. Topic Num 6 has 120 documents. Topic Num 7 has 114 documents. Topic Num 8 has 111 documents. Topic Num 9 has 110 documents.
# KEYWORDS FOR EACH TOPIC
top2vec.topic_words
array([['jiggy jiggy', 'poppin', 'kiss deceive', ..., 'sit cock', 'push pus', 'sing lullaby'], ['learn ballad', 'cry beg', 'poem fall', ..., 'taffy forgive', 'swim silence', 'sorrow begin'], ['rhyme', 'suck suck', 'lullaby choke', ..., 'cheat throw', 'whore', 'grr'], ..., ['bad memory', 'forgetfulness happen', 'will liquor', ..., 'bleed death', 'hard liquor', 'kiss disaster'], ['tonight oxygen', 'breathe breathe', 'breathe', ..., 'song butterfly', 'jump cover', 'heart glow'], ['pay gym', 'must hustle', 'walk woof', ..., 'grind', 'buttock squeeze', 'beat fast']], dtype='<U25')
# CHECK FIRST 2 TOPICS WITH MOST DOCS
print(top2vec.topic_words[0])
print(top2vec.topic_words[1])
['learn ballad' 'cry beg' 'poem fall' 'mourn mourn' 'write poem' 'hide emotion' 'lullaby choke' 'heartbreak anniversary' 'amor' 'think linger' 'kiss disaster' 'tattoo amor' 'fade fade' 'sit sentimental' 'deserve lover' 'cry' 'heart adore' 'sing lullaby' 'rest peace' 'wake desire' 'devotion deep' 'smile mockingbird' 'eternal sadness' 'tear describe' 'lyric' 'summertime sadness' 'sunrise moonlight' 'scarf amor' 'sorrow many' 'monotonous rhythm' 'teardrop fall' 'disappear unthinkable' 'kiss deceive' 'song butterfly' 'trouble sentimental' 'lionheart heart' 'rain khair' 'difficult cry' 'heal wind' 'temptation whim' 'wind caress' 'beg pardon' 'moony tune' 'snooze miss' 'tear drop' 'sit humbly' 'envy silent' 'wait patiently' 'hug everywhere' 'sorrow begin'] ['poppin' 'jiggy jiggy' 'song poppin' 'whore whore' 'make rowdy' 'rhyme' 'fag blow' 'suck nymphet' 'beg pardon' 'wick wack' 'kiss deceive' 'must hustle' 'pistol poppin' 'suck savage' 'dum dam' 'suck suck' 'loser loser' 'snatch snatch' 'pimp hoe' 'cry beg' 'scratch yell' 'cuss rap' 'call greedy' 'naughty desire' 'young thug' 'fail moan' 'back poppin' 'pussy ratchet' 'drop guarantee' 'pull tune' 'push pus' 'drop drop' 'hoe rap' 'mambo bad' 'sit humbly' 'cheat throw' 'banger' 'strive' 'lyric' 'monotonous rhythm' 'stupid slap' 'guess slut' 'stick deadbeat' 'addict gamble' 'win pimp' 'taffy forgive' 'kiss disaster' 'actin naughty' 'grind chop' 'lullaby choke']
# REDUCE NUM OF TOPICS TO 10
top2vec.hierarchical_topic_reduction(num_topics=10)
print(top2vec.topic_words_reduced[0]) #updated list of keywords for topic 0
print(top2vec.topic_words_reduced[1]) #updated list of keywords for topic 1
['jiggy jiggy' 'poppin' 'song poppin' 'rhyme' 'whore whore' 'lullaby choke' 'suck nymphet' 'mambo bad' 'beg pardon' 'kiss deceive' 'cry beg' 'fail moan' 'suck suck' 'naughty desire' 'lyric' 'scratch yell' 'back poppin' 'make rowdy' 'touch ditty' 'monotonous rhythm' 'actin naughty' 'learn ballad' 'kiss disaster' 'fag blow' 'suck savage' 'drop drop' 'mess groove' 'rain bop' 'wick wack' 'guess slut' 'poem fall' 'dum dam' 'sing lullaby' 'must hustle' 'pistol poppin' 'succulent succulent' 'taffy forgive' 'tuck jewel' 'suck smash' 'temptation whim' 'trick feint' 'pull tune' 'bonbon grape' 'sex repeat' 'cheat throw' 'snatch snatch' 'loser loser' 'naughty' 'sing toto' 'listen song'] ['cry beg' 'sing lullaby' 'sit humbly' 'lullaby choke' 'wake desire' 'jiggy jiggy' 'wake scream' 'learn ballad' 'suck nymphet' 'sit cock' 'kiss deceive' 'scream' 'pray god' 'lick lick' 'tell dumb' 'sit sentimental' 'smile mockingbird' 'listen song' 'darling expect' 'wait patiently' 'closer whisper' 'snooze miss' 'listen classic' 'need lover' 'eat repeat' 'babe darling' 'kiss hopefully' 'whore whore' 'deserve lover' 'song poppin' 'call greedy' 'sing toto' 'buy mockingbird' 'scratch yell' 'sing nana' 'sex repeat' 'touch yell' 'lyric' 'suck suck' 'patience wan' 'scream laugh' 'hush little' 'naughty desire' 'walk woof' 'raise suspicion' 'listen blonde' 'beg' 'put slowly' 'hide emotion' 'stay alarm']
# PLOT
top2vec.generate_topic_wordcloud(0)
Figure 3.6: Top2vec Topic 0 Top words
# SEARCH FOR A SAMPLE TOPIC 0 DOC
top2vec.search_documents_by_topic(0, num_docs=2)
(array(['tell stay side promise day fragile could much make love fade away heartache song use hum together pick accident make tear well baby miss every day always mind make break many day lonely day miss day love even though want ever earnest see rush dream understand first day say let make cry much eye swell mind today mind confuse heart ache knee hand day walk side side wait way back maybe do there nothing leave connect love story bitter meet choose love wind carry apart ache heart quickly wipe eye tomorrow forget might also like sadness hide alone night reply write message line wonder long time understand give wholeheartedly lie say let', 'like raindrop fall winter lake cold heart sadness speechless see remember rainy afternoon laugh together much fun know know meet time sit next last time know say incompatible know blue sky tomorrow become late star let think every night time keep precious memory one delete promise make cool windy afternoon please engrave farewell word could say dandelion follow wind please roll away one know life many unpredictable thing every footstep every heartbeat wish turn back back separation person beside let together day day flower know meet time sit next last time know say incompatible know blue sky tomorrow become late star let think every night time keep precious memory one delete promise make cool windy afternoon please engrave farewell word could say dandelion follow wind please roll away happiness sunset watch rain porch one give much sincere love life people live petal carry say let fly beautiful paradise instead heart dear let fly beautiful paradise instead heart dear might also like'], dtype=object), array([0.8442501, 0.8221216], dtype=float32), array([6658, 6696]))
# PLOT
top2vec.generate_topic_wordcloud(1)
Figure 3.7: Top2vec Topic 1 Top words
# SEARCH FOR A SAMPLE TOPIC 1 DOC
top2vec.search_documents_by_topic(1, num_docs=2)
(array(['wake morning hallelujah money bag back gang steady fire leave gap mix monster eye dirty fit see smell do shall lead baby girl clean tear currency bullion van outside club tell say need protection hop black challenger road leadin dynasty know ourself know know tin tin tin money long make bless seize sha guy lady lady lady pull pool party torus order might also like pull brand new beamer know money like river dey get liver aint nobody bigger pelu yeah come spray pepper spray pepper yeah know say regular dey give keep hot like summer bring drama burst medulla automatic want party bring like twenty show place new designer steady everything get god grace make bless seize yes yeah sha thing say rap sell til start pay money come back track play get need application fee track make pastor pray cause know high take like park chart vacancy want patiently step room energy wey carry enter never lose know dish chop far go fit fathom nothing close random dey beg pardon leave right dribble like girl boy dey crush body move god victory certain get nothing tin tin tin money long make bless seize sha leadin side people sure die family life keep guy life nobody dim shine win end time', 'hell come slap even get involve name get bigger bigger house bigger bigger everyone look whats garden look wouldnt anything child silent long time take open mouth know job go even face necessary lot people lose tell didnt believe selfish take away everything much worry much stress enough hand need weapon mess gang use street shop today wake strange girl problem turn matter important leave home night spit think rapper say always come street many remain human there something wrong come tell face wherever always black cloud like time also booze spend many time alone didnt find real friend long time behind one know year get dude come far away money work hard mind enough series success many part life take promise work girl wait love run crazy much blow mind hope put money head feel like dark extra light like new make tour everyone flex everyone gangster go hurt get say fair kind always mess satan might also like rapper say always come street many remain human there something wrong come tell face cut face cover come beautiful wake plane land guy block one make bleed care say open henny spread money cut feel like go die fly sun family freeze thank god good papa never give cut everyone say year would number second phone ring loudly wait girl tonight girl ski head pussy power say one one make come true dream wall cold mother water cold pocket empty bowl empty house full trauma lot will tire give son never cry time stage come alone come say go go go go go go say yeah yeah'], dtype=object), array([0.88646424, 0.8779525 ], dtype=float32), array([4328, 2872]))
# PLOT
top2vec.generate_topic_wordcloud(2)
Figure 3.8: Top2vec Topic 2 Top words
# SEARCH FOR A SAMPLE TOPIC 2 DOC
top2vec.search_documents_by_topic(2, num_docs=2)
(array(['tell want sleep havent able since leave hey get arrive think want forget another know go lie go lie walk think want forget want repeat baby lose go today saw wear pal gym baby real plastic put nit talk romantic think every day one change know screw relationship bad baby know fight rude thing county sea view wake day go fifty eight beach never think would last day baby active puppet motorboat let see see around hey get get get think erase another taste go lie go lie drink think want forget want repeat sound baby like anyone sin anyone sin tell want put today baby net want decide look look slow little dog finish inside need decide look look slow little dog finish inside give know give water thirsty today want invest spend spend substance three see ticket low might also like tell fall asleep havent able since leave hey get get get think erase another taste go lie go lie walk think want forget want repeat baby whoa go fifty eight beach never think would last day whoa', 'tell want sleep havent able since leave hey get arrive think want forget another know go lie go lie walk think want forget want repeat baby lose go today saw wear pal gym baby real plastic put nit talk romantic think every day one change know screw relationship bad baby know fight rude thing county sea view wake day go fifty eight beach never think would last day baby active puppet motorboat let see see around hey get get get think erase another taste go lie go lie drink think want forget want repeat sound baby like anyone sin anyone sin tell want put today baby net want decide look look slow little dog finish inside need decide look look slow little dog finish inside give know give water thirsty today want invest spend spend substance three see ticket low might also like tell fall asleep havent able since leave hey get get get think erase another taste go lie go lie walk think want forget want repeat baby whoa go fifty eight beach never think would last day whoa'], dtype=object), array([0.84387624, 0.84387624], dtype=float32), array([5919, 575]))
# PLOT
top2vec.generate_topic_wordcloud(3)
Figure 3.9: Top2vec Topic 3 Top words
# SEARCH FOR A SAMPLE TOPIC 3 DOC
top2vec.search_documents_by_topic(3, num_docs=2)
(array(['money goal let head water pen leaf wet sock unstoppable continue sketcher nearby everyone van three day balloon puke like bottle guarantee cost press key sure floss stir fry hungry lie belly get fatter eat stew spit fire smoke businessman public transport pain lot misery safe anyway someone go dough ever look dough gunner donate think feel pain skeer think loot yes philosophize would tell know anyway sit come street ask cause sick time look many hit luckily wear face mask clap barrel jail escape long corona almost start fund rapper contact pin also cash coke would fall run home think baseball player boy pit make gap get present scene father day first one already hard second one go thing kick do much better keep head toe tight drive tight new valve measure know hot guy close road titty rain suit tenner little men brother know thing brother live already grab little little grab brother trifle get code little bitch neighborhood striker brother like drive boy run armor buy pipe bike little one mine long time eat extensively want hear anything time right turn gray day never come sometimes also understand stay carry corpse bag brother tear big shopper open must sweat friend keep moan dude check radar brother say cross border give another hour neighborhood open garage lie bed next wife think whats guy think get clothe smoke tobacco greenhouse buy golf eight could buy usually always quiet whore go sell money might also like', 'money goal let head water pen leaf wet sock unstoppable continue sketcher nearby everyone van three day balloon puke like bottle guarantee cost press key sure floss stir fry hungry lie belly get fatter eat stew spit fire smoke businessman public transport pain lot misery safe anyway someone go dough ever look dough gunner donate think feel pain skeer think loot yes philosophize would tell know anyway sit come street ask cause sick time look many hit luckily wear face mask clap barrel jail escape long corona almost start fund rapper contact pin also cash coke would fall run home think baseball player boy pit make gap get present scene father day first one already hard second one go thing kick do much better keep head toe tight drive tight new valve measure know hot guy close road titty rain suit tenner little men brother know thing brother live already grab little little grab brother trifle get code little bitch neighborhood striker brother like drive boy run armor buy pipe bike little one mine long time eat extensively want hear anything time right turn gray day never come sometimes also understand stay carry corpse bag brother tear big shopper open must sweat friend keep moan dude check radar brother say cross border give another hour neighborhood open garage lie bed next wife think whats guy think get clothe smoke tobacco greenhouse buy golf eight could buy usually always quiet whore go sell money might also like'], dtype=object), array([0.8698076, 0.8698076], dtype=float32), array([4516, 472]))
# PLOT
top2vec.generate_topic_wordcloud(4)
# SEARCH FOR A SAMPLE TOPIC 4 DOC
top2vec.search_documents_by_topic(4, num_docs=2)
(array(['get hit house lick tell wit home invasion persuasive persuasive nine five know dream life like rapper like rapper back wrapper cool cool go tell tell usher let burn come let burn come hot sauce top park car start thing free mind free mind freeze verse see dollar sign see dollar sign like easy silver spoon know come never know never know reverend revenue berry hallelujah pick poison tell everybody gon respect shooter one front gun life forever one front gun forever canal alleyway say money tree perfect place shade feel dollar might main bitch feel dollar might say come feel dollar might make lane switch feel dollar might turn rich feel dream life like rapper like rapper new school way school know big earl rational rational rational back reality poor another casualty war two bullet uncle tony head tony head say one day tour never would belt never ease pain would ease pain purchase day jerkin day jerkin pull church gang sign window offend say hood pot gold pot gold gon crash nobody home see ticket low might also like berry hallelujah pick poison tell everybody gon respect shooter one front gun life forever one front gun forever canal alleyway say money tree perfect place shade feel dollar might main bitch feel dollar might say come feel dollar might make lane switch feel dollar might turn rich feel last one get dough one bucket head hoe way hit street break code way hit brake patrol way last one get dough way love one bucket head hoe way hit street break code way hit brake patrol way rock project pick pocket miss stock liquor pistol poppin soda aint turkey hope lord forgive pot cocaine residue every day else thug cheese government provide daughter way get drum get band like parade drop work bush hope boy see stash tell truth last time might see garden grass aint cut serpent blood bitch pussy drug good break promise steal watch tell time take tell kick foot locker street heater dungaree dream shade money tree berry hallelujah pick poison tell everybody gon respect shooter one front gun life forever one front gun forever canal alleyway say money tree perfect place shade feel bring car back man another appointment figure back time anyways look get house man man one good get thing bring car back fade good look listen girl girl want body want body cause get big fat girl girl want body want body cause big fat girl want body cause big see high hell aint even damn domino bring car back somebody say domino', 'get hit house lick tell wit home invasion persuasive persuasive nine five know dream life like rapper like rapper back wrapper cool cool go tell tell usher let burn come let burn come hot sauce top park car start thing free mind free mind freeze verse see dollar sign see dollar sign like easy silver spoon know come never know never know reverend revenue berry hallelujah pick poison tell everybody gon respect shooter one front gun life forever one front gun forever canal alleyway say money tree perfect place shade feel dollar might main bitch feel dollar might say come feel dollar might make lane switch feel dollar might turn rich feel dream life like rapper like rapper new school way school know big earl rational rational rational back reality poor another casualty war two bullet uncle tony head tony head say one day tour never would belt never ease pain would ease pain purchase day jerkin day jerkin pull church gang sign window offend say hood pot gold pot gold gon crash nobody home see ticket low might also like berry hallelujah pick poison tell everybody gon respect shooter one front gun life forever one front gun forever canal alleyway say money tree perfect place shade feel dollar might main bitch feel dollar might say come feel dollar might make lane switch feel dollar might turn rich feel last one get dough one bucket head hoe way hit street break code way hit brake patrol way last one get dough way love one bucket head hoe way hit street break code way hit brake patrol way rock project pick pocket miss stock liquor pistol poppin soda aint turkey hope lord forgive pot cocaine residue every day else thug cheese government provide daughter way get drum get band like parade drop work bush hope boy see stash tell truth last time might see garden grass aint cut serpent blood bitch pussy drug good break promise steal watch tell time take tell kick foot locker street heater dungaree dream shade money tree berry hallelujah pick poison tell everybody gon respect shooter one front gun life forever one front gun forever canal alleyway say money tree perfect place shade feel bring car back man another appointment figure back time anyways look get house man man one good get thing bring car back fade good look listen girl girl want body want body cause get big fat girl girl want body want body cause big fat girl want body cause big see high hell aint even damn domino bring car back somebody say domino'], dtype=object), array([0.91754365, 0.91754365], dtype=float32), array([3818, 4755]))
Figure 3.10: Top2vec Topic 4 Top words
# Are there any topics about love?
# SEARCH FOR TOPICS USING KEYWORDS
topic_words, word_scores, topic_scores, topic_nums = top2vec.search_topics(keywords=["love"], num_topics=2)
print(topic_words[0])
print(topic_words[1])
['deserve lover' 'amor' 'tear' 'heartbreak anniversary' 'cry beg' 'scarf amor' 'lover fighter' 'heal wind' 'learn ballad' 'beat romance' 'love' 'cry' 'tear drop' 'need lover' 'lover' 'heart adore' 'teardrop fall' 'save tear' 'trouble sentimental' 'poem fall' 'tear use' 'emotional cause' 'bonbon grape' 'heart lionheart' 'point cry' 'mourn mourn' 'cry buy' 'tear apart' 'lionheart heart' 'stitch bae' 'lullaby choke' 'wind caress' 'tear cheek' 'tattoo amor' 'hurt' 'favorite song' 'heal' 'mary amor' 'wipe tear' 'taffy forgive' 'crush heart' 'difficult cry' 'song sing' 'forgiveness rancor' 'scooter scream' 'song poppin' 'kiss disaster' 'victim push' 'forgive forgive' 'remix twice'] ['amor' 'deserve lover' 'heart adore' 'scarf amor' 'kiss deceive' 'tattoo amor' 'underground amor' 'closer whisper' 'love' 'heart flutter' 'need lover' 'learn ballad' 'devotion deep' 'jiggy jiggy' 'trance sexy' 'poem fall' 'wake desire' 'dance sensual' 'lullaby choke' 'song butterfly' 'probably adore' 'cry beg' 'song poppin' 'lover' 'succulent succulent' 'beg pardon' 'poppin' 'eye sensual' 'lover fighter' 'bonbon grape' 'bolero wonder' 'kiss hopefully' 'lust weekend' 'wonder wonder' 'flame passion' 'touch yell' 'teardrop fall' 'lick lick' 'lyric' 'rush rush' 'actin excite' 'beat romance' 'wind caress' 'kiss disaster' 'pussy deep' 'tuck jewel' 'talk romantic' 'crush heart' 'mou lene' 'mambo']
# Are there any topics about hate?
# SEARCH FOR TOPICS USING KEYWORDS
topic_words, word_scores, topic_scores, topic_nums = top2vec.search_topics(keywords=["hate"], num_topics=2)
print(topic_words[0])
print(topic_words[1])
['everybody stare' 'gloomy face' 'exhaust always' 'envy silent' 'glance spiteful' 'hide emotion' 'narcissism disguise' 'spiteful crazy' 'silent burden' 'crazy daydream' 'madness begin' 'howl pajama' 'wake desire' 'wake scream' 'scream' 'ignore sun' 'everybody problem' 'silent attention' 'moonlight sunrise' 'frown' 'devilish smile' 'sometimes silence' 'darkness find' 'anger wind' 'spiteful' 'face daylight' 'exhaust' 'scream laugh' 'sorrow begin' 'hide repress' 'sunrise moonlight' 'bile bile' 'madness' 'happy hate' 'mirror mirror' 'stare' 'pull tune' 'call greedy' 'ordinary danger' 'want riddle' 'turn headlight' 'feel rebellious' 'paranoia cause' 'mirror road' 'antihero' 'hater wit' 'narcissism' 'ignore' 'lose snooze' 'outburst crazy'] ['cry beg' 'heartless could' 'wait patiently' 'cry buy' 'cry' 'believe pity' 'sade regret' 'difficult cry' 'leave remorse' 'decision feel' 'sad aware' 'fool shame' 'heartless' 'could heartless' 'regret pray' 'regret courage' 'shame fool' 'regret familiar' 'happy hate' 'feel rebellious' 'spiteful crazy' 'morat mad' 'regret forgive' 'sad' 'mourn mourn' 'scoundrel doubtful' 'glance spiteful' 'beg pardon' 'complacent decision' 'regret' 'remind damn' 'repress hurt' 'tear' 'super sad' 'deserve lover' 'mailbox hurt' 'point cry' 'regret young' 'sympathy hope' 'upset' 'imagine respect' 'sadness' 'feel' 'beg' 'spite realize' 'darling expect' 'view rude' 'fool' 'desperate damn' 'rest patient']
# Are there any topics about race?
# SEARCH FOR TOPICS USING KEYWORDS
topic_words, word_scores, topic_scores, topic_nums = top2vec.search_topics(keywords=["race"], num_topics=2)
print(topic_words[0])
print(topic_words[1])
['jiggy jiggy' 'poppin' 'song poppin' 'feat nicky' 'kiss hopefully' 'back poppin' 'mambo bad' 'must hustle' 'show heroin' 'rah rah' 'weekend' 'kiss deceive' 'guess slut' 'favorite tune' 'nicky jam' 'night funk' 'moony tune' 'make rowdy' 'wonder wonder' 'dum dam' 'universe moonshine' 'rhyme' 'crunk light' 'sex repeat' 'trance sexy' 'funk funk' 'rah' 'anything wah' 'puff daddy' 'suck nymphet' 'lullaby choke' 'funk' 'contagious dope' 'dead rave' 'bae bae' 'suck suck' 'darling expect' 'remix ultra' 'bet levitate' 'remix monte' 'rain bop' 'mine bonbon' 'finer' 'hopefully' 'funny remember' 'moonlight tequila' 'polish wish' 'shoot dope' 'dope dope' 'top gra'] ['song poppin' 'lullaby choke' 'rain bop' 'jiggy jiggy' 'sing lullaby' 'poppin' 'teardrop fall' 'sing toto' 'mambo bad' 'moony tune' 'dum dam' 'rhyme' 'listen song' 'feat nicky' 'puff daddy' 'learn ballad' 'toto' 'wish heather' 'naughty desire' 'beg pardon' 'poem fall' 'sha sha' 'song butterfly' 'moonlight tequila' 'favorite song' 'bae bae' 'toto wet' 'coconut fall' 'kilo kilo' 'babe rock' 'drop drop' 'trance trance' 'mambo kitten' 'tuck jewel' 'bop bop' 'trance sexy' 'mambo' 'show heroin' 'rock babe' 'intention song' 'favorite tune' 'mambo end' 'lyric' 'cuss rap' 'actin naughty' 'here song' 'pineapple feat' 'back poppin' 'apologize mistake' 'moonlight sunrise']
# Are there any topics about laws?
# SEARCH FOR TOPICS USING KEYWORDS
topic_words, word_scores, topic_scores, topic_nums = top2vec.search_topics(keywords=["law"], num_topics=2)
print(topic_words[0])
print(topic_words[1])
['lie congratulate' 'suit congratulate' 'well act' 'act' 'congratulate congratulate' 'well congratulate' 'congratulate' 'band congratulate' 'behave badly' 'fool claim' 'sincere' 'fool play' 'kiss deceive' 'trick feint' 'fool shame' 'behave' 'owe act' 'cry beg' 'sit humbly' 'smile mockingbird' 'magical act' 'behave pretty' 'laugh willy' 'tell dumb' 'sound sincere' 'ring sing' 'lie truly' 'continue show' 'sing' 'listen legend' 'smile prick' 'raise suspicion' 'fail moan' 'celebrate' 'act revenge' 'catch layer' 'pull celebrate' 'realize well' 'sing hallelujah' 'admit mistake' 'sit cock' 'apologize mistake' 'clap back' 'sing ring' 'sick fake' 'fool' 'lie' 'sell sincere' 'cheat law' 'slap slap'] ['everybody stare' 'gloomy face' 'exhaust always' 'envy silent' 'glance spiteful' 'hide emotion' 'narcissism disguise' 'spiteful crazy' 'silent burden' 'crazy daydream' 'madness begin' 'howl pajama' 'wake desire' 'wake scream' 'scream' 'ignore sun' 'everybody problem' 'silent attention' 'moonlight sunrise' 'frown' 'devilish smile' 'sometimes silence' 'darkness find' 'anger wind' 'spiteful' 'face daylight' 'exhaust' 'scream laugh' 'sorrow begin' 'hide repress' 'sunrise moonlight' 'bile bile' 'madness' 'happy hate' 'mirror mirror' 'stare' 'pull tune' 'call greedy' 'ordinary danger' 'want riddle' 'turn headlight' 'feel rebellious' 'paranoia cause' 'mirror road' 'antihero' 'hater wit' 'narcissism' 'ignore' 'lose snooze' 'outburst crazy']
# Are there any topics about family?
# SEARCH FOR TOPICS USING KEYWORDS
topic_words, word_scores, topic_scores, topic_nums = top2vec.search_topics(keywords=["family"], num_topics=2)
print(topic_words[0])
print(topic_words[1])
['stitch bae' 'lullaby choke' 'bleed death' 'cry beg' 'lyric' 'babe darling' 'sing lullaby' 'jiggy jiggy' 'suck savage' 'learn ballad' 'hater wit' 'suck suck' 'suck nymphet' 'bae bae' 'tear drop' 'sing nana' 'poppin' 'babe' 'scratch yell' 'stick deadbeat' 'gon respond' 'turn unfaithful' 'baby uncontrollable' 'whore whore' 'squeal squeal' 'yam yam' 'suck smash' 'mush yeah' 'song poppin' 'bleed' 'hater' 'gush bleed' 'closer whisper' 'plan bleed' 'play bae' 'mere mere' 'knit knit' 'accelerate heartbeat' 'nan nan' 'bae' 'cut bleed' 'sha sha' 'monotonous rhythm' 'bear whoa' 'babe rock' 'cry' 'fragile fragile' 'push pus' 'die green' 'amor'] ['heart lionheart' 'lionheart heart' 'heartless' 'could heartless' 'heartless could' 'heart harder' 'heart adore' 'heart flutter' 'heart' 'woman heartless' 'lullaby choke' 'snap snap' 'lyric' 'cry beg' 'heartbeat' 'song poppin' 'accelerate heartbeat' 'dishevel heart' 'heartbreak anniversary' 'sing lullaby' 'crush heart' 'jiggy jiggy' 'poppin' 'snap' 'teardrop fall' 'tear drop' 'honey honey' 'heart glow' 'song butterfly' 'cry buy' 'intention song' 'hush little' 'cry' 'puff daddy' 'listen song' 'might snap' 'temptation whim' 'moony tune' 'hoe rap' 'dum dam' 'thump thump' 'poem fall' 'pierce heart' 'flower firework' 'honey maybe' 'heal wind' 'deserve lover' 'sing toto' 'peace sign' 'make chorus']
# Are there any topics about guns?
# SEARCH FOR TOPICS USING KEYWORDS
topic_words, word_scores, topic_scores, topic_nums = top2vec.search_topics(keywords=["gun"], num_topics=2)
print(topic_words[0])
print(topic_words[1])
['thug live' 'thug' 'young thug' 'sneaker thug' 'pussy chopper' 'cardboard thug' 'die rather' 'run motorcycle' 'thug shooter' 'buy mockingbird' 'cut dope' 'fag blow' 'run away' 'make rowdy' 'rob gangster' 'hood jump' 'wick wack' 'run bulletproof' 'shoot dope' 'try backfire' 'stay alarm' 'addict footprint' 'please disown' 'go thin' 'suck nymphet' 'ride dink' 'scooter scream' 'feel rebellious' 'wrap bye' 'bone crank' 'poppin' 'pistol poppin' 'gangster' 'touch yell' 'swish ignition' 'buy zip' 'shy spark' 'jump jump' 'snooze miss' 'slap slap' 'idiot dead' 'sheriff crazy' 'addict gamble' 'move slow' 'wake desire' 'suck savage' 'gang murder' 'gunshot babe' 'shady chop' 'wake scream'] ['bad memory' 'forgetfulness happen' 'will liquor' 'mambo bad' 'crash huh' 'brain' 'lose' 'sing toto' 'funny remember' 'lose control' 'head' 'drink rum' 'shoot tequila' 'lullaby choke' 'alcohol confess' 'brain brainwash' 'lose due' 'tequila' 'head blast' 'drink alcohol' 'mix tequila' 'toto' 'beer memory' 'sober' 'hit zombie' 'listen song' 'head toe' 'eye hypnotize' 'round head' 'memory ecstasy' 'brain cabbage' 'liquor' 'whiskey' 'sing lullaby' 'addict gamble' 'gin gin' 'stubborn head' 'waste liquor' 'drum head' 'gin bother' 'memory' 'moonlight tequila' 'sing song' 'drink tonic' 'loss' 'able forget' 'drink sex' 'bleed death' 'hard liquor' 'kiss disaster']
# Are there any topics about guns?
# SEARCH FOR TOPICS USING KEYWORDS
topic_words, word_scores, topic_scores, topic_nums = top2vec.search_topics(keywords=["violence"], num_topics=2)
print(topic_words[0])
print(topic_words[1])
['tear drop' 'cry beg' 'deserve lover' 'amor' 'heartbreak anniversary' 'torment slap' 'fade fade' 'scratch yell' 'teardrop fall' 'hit hit' 'point cry' 'find ugly' 'drop drop' 'jaw drop' 'love' 'tattoo amor' 'yell yell' 'poem fall' 'stupid slap' 'kiss disaster' 'slap slap' 'learn ballad' 'touch yell' 'crush heart' 'slap' 'tear' 'cry' 'need lover' 'scream' 'save tear' 'tear cheek' 'heart adore' 'hide emotion' 'break bully' 'drop' 'closer whisper' 'scarf amor' 'disappear unthinkable' 'yell unknown' 'tear describe' 'yell' 'run away' 'violence alright' 'hug hug' 'kiss deceive' 'violence' 'smack smack' 'abuse loop' 'accelerate heartbeat' 'bleed death'] ['keep silent' 'sit humbly' 'yell yell' 'yell unknown' 'sit cock' 'cry beg' 'call greedy' 'touch yell' 'keep quiet' 'hide repress' 'scratch yell' 'hide emotion' 'wait patiently' 'call wallflower' 'sit sidewalk' 'yell' 'sit' 'suck nymphet' 'make rowdy' 'shut shut' 'flag listen' 'scream' 'hide' 'listen classic' 'poppin' 'sit sentimental' 'tell dumb' 'deserve lover' 'buy mockingbird' 'everyone shout' 'suck suck' 'slap' 'walk woof' 'stay stray' 'lock room' 'need lover' 'stop' 'go non' 'ignore' 'silent attention' 'listen zero' 'lonely lonely' 'stand please' 'fool play' 'slap slap' 'underground amor' 'stop invisible' 'kiss deceive' 'listen blonde' 'keep advance']
There are no topics about religion, politics, gender, education,marriage, justice
# QUERY DOCS BASED ON GIVEN TEXT INPUT
documents, doc_scores, doc_ids = top2vec.query_documents('gun policy in the US', num_docs=2)
print(documents[0]) #first doc
timon dope weapon iron flesh bone need lay hand already child use bigger one swear would rather take say everything fine glory say studio twenty air become tense problem weve already understand account banca card full debt unlimited two eighty run touch limit business hope delivery well batman pebble cellophane parcel thank god make know would around see use get way might also like dope weapon hooligan casino cousin deal dope weapon
# QUERY DOCS BASED ON GIVEN TEXT INPUT
documents, doc_scores, doc_ids = top2vec.query_documents('transgender', num_docs=2)
print(documents[0]) #first doc
print(documents[1])
yeah tomboy look handle take hook yeah mood coke like look toe name tattoo get drink hate cranky boy frown want blonde doll doll idle til call crazy kite whats loss yeah try without mercy lose yeah yeah there single scar body like love rather rotten perfume yeah tomboy yeah tomboy attitude yeah tomboy play would rather film raise prince queendom right like dance love friend sometimes wear without cigarette like drink whiskey would change hell see ticket low might also like say crazy whats loss yeah chew without mercy lose yeah yeah like love there even single tear eye want smile trivial word yeah tomboy yeah tomboy attitude yeah tomboy say get get song right get mean tomboy ready yeah three two one neither man woman man woman neither man woman idle neither man woman man woman neither man woman love tomboy seem take eye especially python move way hold thing wan hold thing baby let take look lord want girl whole world know wrong since man focus definitely give say know get say know get woman bed listen quick say want back home tell say car break mile girl would home soon lord want girl whole world know wrong since man focus definitely give say know get say know get woman bed might also like girl wayward like way forward back look like two planet world alright girl famous swear playboy burst brain much swear fit break seem take eye especially python move way hold thing wan hold thing baby let take look lord want girl whole world know wrong since man focus definitely give say know get say know get woman bed say know get say know get woman bed
# QUERY DOCS BASED ON GIVEN TEXT INPUT
documents, doc_scores, doc_ids = top2vec.query_documents('social injustice', num_docs=2)
print(documents[0]) #first doc
print(documents[1])
matter game come tell amount win panama nail hand yes know divider plot say get bag even distribution home game house style three knock nothing push lot upside want come yes always ready game game yeah wait come life behind even whole city somewhere life account balance demon taxman whatever say day come soon tell queen city belong street whole team go make progress take everything lose last year please give motive accord language police know burden many year limbo limbo game king king couple demon enemy miss lady make move except grip feel like feel like one lady enough need hoe also put bulk skinny tho gang lot flow start always dope give everything could even didnt work try know wouldnt end market five oclock get source first cross line waste time much could fool anything school end never warm anywhere game didnt even pellet go hard find chemistry might also like day come soon tell queen city belong street whole team go make progress take everything lose last year please give motive accord language police know burden many year limbo limbo game king king want mansion dollar smile sit roof terrace film sip cocktail skin color sleep late pool want mansion dollar smile sit roof terrace film sip cocktail skin color sleep late pool yeah sleep yeah damn like suddenly everyone want chi pay carry price ask rise eighteen carat bitch thin layer marzipan skin old teacher say foreigner good woman technology well business chase forest run forest run trust single ever since dick fashion week young age polo shirt mobile phone front mirror look great photo look female filthy rich cheat standard job visit lie garden keep rap street war want mansion dollar smile sit roof terrace film sip cocktail skin color sleep late pool want mansion dollar smile sit roof terrace film sip cocktail skin color sleep late pool come heartbreaker glass marc come baby who sex guide jacket sheepskin gangster rapper pull face like coffin bearer target delinquent evil like like golfer club luggage everyone like shindy chi drive around young woman like drive instructor convertible smell like code read math book read vogue right cool sex symbol get golden tan jet ski sex six sixth last sexy womanizer peel egg might also like want mansion dollar smile sit roof terrace film sip cocktail skin color sleep late pool want mansion dollar smile sit roof terrace film sip cocktail skin color sleep late pool
# QUERY DOCS BASED ON GIVEN TEXT INPUT
documents, doc_scores, doc_ids = top2vec.query_documents('abortion rights', num_docs=2)
print(documents[0]) #first doc
tell love love exist ugh ugh ugh see think would fall love anyone want see side baby free want baby completely want baby want see side baby free baby love one two want baby want see side baby free baby love one two want baby want baby want baby want baby want baby want see side baby free want baby completely want baby want baby want baby want baby want baby forget love ala afraid confess simply smoke cigarette understand much hell look exit vain picture might also like
# QUERY DOCS BASED ON GIVEN TEXT INPUT
documents, doc_scores, doc_ids = top2vec.query_documents('higher education', num_docs=2)
print(documents[0]) #first doc
print(documents[1])
hood jump get high murder mane time hood jump get high murder mane time hood jump get high hood jump get high hood jump get high hood jump get high hood jump get high murder mane time hood jump get high murder mane hood jump high hood jump get high hood jump get high hood jump get high hood jump get high murder mane time hood jump get high murder mane time hood jump get high murder mane time hood jump get high murder mane time hood jump get high hood jump get high hood jump get high hood jump get might also like superior crown clave whatever long get even cool get storm subside say youd come back wait decide find whatever take although endless highway even come run take come fence stair superior nothing else shoe scatter heart burn mark map superior superior down whatever thousand time bigger might also like last light path make move forward everything else leave behind set fire although endless highway even come run take come fence stair superior nothing else shoe scatter heart burn mark map superior superior although endless highway even come run take come fence stair superior nothing else shoe scatter heart burn mark map superior superior
!jupyter nbconvert --to html Topic_Analysis_II.ipynb
[NbConvertApp] Converting notebook Topic_Analysis_II.ipynb to html [NbConvertApp] Writing 4291315 bytes to Topic_Analysis_II.html