pymi allows you to:
from pymi import PyMi
import seaborn as sns
sentences = [
['i', 'enjoy', 'cooking', 'delicious', 'meals', 'for', 'my', 'friends', 'and', 'family'],
['colorless', 'green', 'sleeps', 'furiously'],
['the', 'sun', 'is', 'shining', 'brightly'],
['i', 'love', 'to', 'travel', 'and', 'explore', 'new', 'places'],
['music', 'makes', 'me', 'feel', 'alive'],
['coffee', 'is', 'my', 'go-to', 'morning', 'drink'],
['rainy', 'days', 'are', 'perfect', 'for', 'staying', 'in', 'and', 'reading', 'a', 'book'],
['hiking', 'in', 'the', 'mountains', 'is', 'a', 'great', 'way', 'to', 'unwind'],
['learning', 'new', 'things', 'is', 'always', 'exciting'],
['the', 'sound', 'of', 'waves', 'crashing', 'on', 'the', 'beach', 'is', 'so', 'soothing'],
['i', "can't", 'resist', 'a', 'good', 'piece', 'of', 'chocolate'],
['artistic', 'expression', 'is', 'a', 'beautiful', 'way', 'to', 'communicate'],
["i'm", 'always', 'up', 'for', 'a', 'fun', 'adventure'],
['the', 'smell', 'of', 'freshly', 'baked', 'bread', 'is', 'irresistible'],
['a', 'good', 'workout', 'can', 'boost', 'your', 'mood'],
['exploring', 'the', 'night', 'sky', 'with', 'a', 'telescope', 'is', 'fascinating'],
['spending', 'time', 'in', 'nature', 'recharges', 'my', 'energy'],
['singing', 'in', 'the', 'shower', 'is', 'my', 'guilty', 'pleasure'],
['i', 'believe', 'in', 'the', 'power', 'of', 'positive', 'thinking'],
['laughter', 'is', 'the', 'best', 'medicine'],
['helping', 'others', 'is', 'a', 'noble', 'pursuit'],
['dancing', 'is', 'a', 'great', 'way', 'to', 'express', 'yourself'],
['a', 'good', 'book', 'can', 'transport', 'you', 'to', 'another', 'world'],
['a', 'warm', 'cup', 'of', 'tea', 'is', 'perfect', 'for', 'a', 'cold', 'day'],
['i', 'enjoy', 'solving', 'challenging', 'puzzles'],
['animals', 'bring', 'joy', 'to', 'my', 'life'],
['traveling', 'allows', 'you', 'to', 'experience', 'different', 'cultures'],
['i', 'appreciate', 'the', 'beauty', 'of', 'a', 'starry', 'night'],
['meditation', 'helps', 'me', 'find', 'inner', 'peace'],
['spending', 'time', 'with', 'loved', 'ones', 'is', 'priceless'],
['learning', 'from', 'your', 'mistakes', 'is', 'important'],
['the', 'aroma', 'of', 'fresh', 'flowers', 'is', 'delightful'],
['i', 'find', 'solace', 'in', 'the', 'sound', 'of', 'a', 'babbling', 'brook'],
['challenges', 'make', 'us', 'stronger'],
['the', 'feeling', 'of', 'sand', 'between', 'your', 'toes', 'is', 'wonderful'],
['a', 'smile', 'can', 'brighten', "someone's", 'day'],
['i', 'believe', 'in', 'the', 'magic', 'of', 'the', 'universe'],
['sharing', 'a', 'meal', 'with', 'friends', 'is', 'a', 'special', 'moment'],
['learning', 'to', 'play', 'a', 'musical', 'instrument', 'is', 'fulfilling'],
['the', 'excitement', 'of', 'a', 'rollercoaster', 'ride', 'is', 'exhilarating'],
['kindness', 'is', 'a', 'virtue', 'we', 'should', 'all', 'practice'],
['watching', 'a', 'sunset', 'is', 'a', 'breathtaking', 'experience'],
['i', 'enjoy', 'the', 'thrill', 'of', 'a', 'good', 'mystery'],
['wandering', 'through', 'a', 'forest', 'is', 'a', 'tranquil', 'experience'],
['a', 'well-cooked', 'meal', 'is', 'a', 'work', 'of', 'art'],
['hugs', 'can', 'convey', 'more', 'than', 'words'],
['the', 'gentle', 'touch', 'of', 'a', 'loved', 'one', 'is', 'comforting'],
['a', 'good', 'movie', 'can', 'captivate', 'your', 'imagination'],
['candles', 'create', 'a', 'cozy', 'atmosphere']
]
mi_model = PyMi(sentences, use_pickle=False)
Getting bigrams...
100%|██████████| 49/49 [00:00<00:00, 56508.36it/s]
Getting word counts...
100%|██████████| 49/49 [00:00<00:00, 147538.33it/s]
mi_model.get_mi(['believe', 'in'])
0.03662289713349294
mi_model.sentence_to_tree(mi_model.documents[0], type_='ami').print()
_____.0097____ | | _________________________________.0207_ .0134_________ | | | | _______.0223_______________________ for my ___.0160_____ | | | | .0518__ ___________.0309__ friends _.0223___ | | | | | | i enjoy ___.0309____ meals and family | | cooking delicious
mi_model.segment_sentence(mi_model.documents[0], type_='ami', threshold=.03, seg=' ')
['i enjoy', 'cooking delicious meals', 'for', 'my', 'friends', 'and', 'family']
mi_model.sentence_to_tree(mi_model.documents[0], type_='mi').print()
_______________________________________________________21.079_____ | | 46.844_______________________________________________ 42.159__________ | | | | i ___________________________________105.39_ my ___70.266______ | | | | __140.53_________________________ for friends _140.53___ | | | | enjoy ___________421.59__ and family | | ___421.59____ meals | | cooking delicious
mi_model.segment_sentence(mi_model.documents[0], type_='mi', threshold=140, seg=' ')
['i', 'enjoy cooking delicious meals', 'for', 'my', 'friends', 'and family']
mi_model = PyMi('demo_docs_eng.pickle', use_pickle=True)
Getting bigrams...
100%|██████████| 49/49 [00:00<00:00, 84333.56it/s]
Getting word counts...
100%|██████████| 49/49 [00:00<00:00, 347751.09it/s] 100%|██████████| 49/49 [00:00<00:00, 347751.09it/s]
mi_model.save_mi_to_file(file_name='demo_docs_eng_mi_dic.pickle', type_='mi')
Found existing mi file with 281 ngrams.
0it [00:00, ?it/s] 0it [00:00, ?it/s]
mi_model.mi_dic
{('good', 'book'): 42.15981012658228, ('staying', 'in'): 60.22830018083182, ('my', 'go-to'): 84.31962025316456, ('helping', 'others'): 421.5981012658228, ('us', 'stronger'): 421.5981012658228, ('the', 'shower'): 23.42211673699016, ('captivate', 'your'): 105.3995253164557, ('a', 'babbling'): 14.053270042194093, ('piece', 'of'): 30.11415009041591, ('are', 'perfect'): 210.7990506329114, ('is', 'important'): 16.863924050632914, ('the', 'aroma'): 23.42211673699016, ('of', 'sand'): 30.11415009041591, ('of', 'positive'): 30.11415009041591, ('convey', 'more'): 421.5981012658228, ('expression', 'is'): 16.863924050632914, ('cozy', 'atmosphere'): 421.5981012658228, ('the', 'thrill'): 23.42211673699016, ('in', 'and'): 20.076100060277277, ('animals', 'bring'): 421.5981012658228, ('with', 'loved'): 70.26635021097047, ('joy', 'to'): 52.69976265822785, ('we', 'should'): 421.5981012658228, ('telescope', 'is'): 16.863924050632914, ('a', 'warm'): 14.053270042194093, ('and', 'family'): 140.53270042194094, ('laughter', 'is'): 16.863924050632914, ('your', 'mistakes'): 105.3995253164557, ('smile', 'can'): 84.31962025316456, ('of', 'chocolate'): 30.11415009041591, ('more', 'than'): 421.5981012658228, ('good', 'movie'): 84.31962025316456, ('friends', 'and'): 70.26635021097047, ('can', 'brighten'): 84.31962025316456, ('reading', 'a'): 14.053270042194093, ('find', 'inner'): 210.7990506329114, ('experience', 'different'): 140.53270042194094, ('a', 'beautiful'): 14.053270042194093, ('allows', 'you'): 210.7990506329114, ('make', 'us'): 421.5981012658228, ('learning', 'from'): 140.53270042194094, ('sky', 'with'): 140.53270042194094, ('one', 'is'): 16.863924050632914, ('the', 'night'): 11.71105836849508, ('hiking', 'in'): 60.22830018083182, ('the', 'gentle'): 23.42211673699016, ('singing', 'in'): 60.22830018083182, ('is', 'a'): 5.059177215189874, ('magic', 'of'): 30.11415009041591, ('a', 'rollercoaster'): 14.053270042194093, ('to', 'my'): 10.53995253164557, ('my', 'friends'): 42.15981012658228, ('bread', 'is'): 16.863924050632914, ('can', 'transport'): 84.31962025316456, ('to', 'express'): 52.69976265822785, ('of', 'a'): 5.019025015069319, ('rainy', 'days'): 421.5981012658228, ('music', 'makes'): 421.5981012658228, ('sound', 'of'): 30.11415009041591, ('with', 'friends'): 70.26635021097047, ('excitement', 'of'): 30.11415009041591, ('the', 'beach'): 23.42211673699016, ('up', 'for'): 105.3995253164557, ('a', 'cozy'): 14.053270042194093, ('learning', 'new'): 70.26635021097047, ('positive', 'thinking'): 421.5981012658228, ('warm', 'cup'): 421.5981012658228, ('enjoy', 'the'): 7.8073722456633865, ('candles', 'create'): 421.5981012658228, ('recharges', 'my'): 84.31962025316456, ('of', 'waves'): 30.11415009041591, ('i', 'enjoy'): 46.84423347398032, ('another', 'world'): 421.5981012658228, ('time', 'with'): 70.26635021097047, ('a', 'fun'): 14.053270042194093, ('great', 'way'): 140.53270042194094, ('feeling', 'of'): 30.11415009041591, ('fun', 'adventure'): 421.5981012658228, ('can', 'boost'): 84.31962025316456, ('sharing', 'a'): 14.053270042194093, ('is', 'always'): 8.431962025316457, ('shower', 'is'): 16.863924050632914, ('believe', 'in'): 60.22830018083182, ('so', 'soothing'): 421.5981012658228, ('is', 'fulfilling'): 16.863924050632914, ('morning', 'drink'): 421.5981012658228, ('i', 'find'): 23.42211673699016, ('watching', 'a'): 14.053270042194093, ('i', 'appreciate'): 46.84423347398032, ('breathtaking', 'experience'): 140.53270042194094, ('way', 'to'): 52.699762658227854, ('bring', 'joy'): 421.5981012658228, ('loved', 'ones'): 210.7990506329114, ('a', 'sunset'): 14.053270042194093, ('things', 'is'): 16.863924050632914, ('sunset', 'is'): 16.863924050632914, ('a', 'great'): 14.053270042194093, ('to', 'unwind'): 52.69976265822785, ('than', 'words'): 421.5981012658228, ('to', 'experience'): 17.566587552742618, ('challenging', 'puzzles'): 421.5981012658228, ('best', 'medicine'): 421.5981012658228, ('in', 'nature'): 60.22830018083182, ('is', 'shining'): 16.863924050632914, ('traveling', 'allows'): 421.5981012658228, ('good', 'mystery'): 84.31962025316456, ('meditation', 'helps'): 421.5981012658228, ('smell', 'of'): 30.11415009041591, ('cup', 'of'): 30.11415009041591, ('a', 'tranquil'): 14.053270042194093, ('special', 'moment'): 421.5981012658228, ('to', 'play'): 52.69976265822785, ('a', 'breathtaking'): 14.053270042194093, ('coffee', 'is'): 16.863924050632914, ('the', 'feeling'): 23.42211673699016, ('exploring', 'the'): 23.42211673699016, ('my', 'life'): 84.31962025316456, ('appreciate', 'the'): 23.42211673699016, ('colorless', 'green'): 421.5981012658228, ('flowers', 'is'): 16.863924050632914, ('meals', 'for'): 105.3995253164557, ('new', 'things'): 210.7990506329114, ('for', 'a'): 7.0266350210970465, ("i'm", 'always'): 210.7990506329114, ('a', 'book'): 7.0266350210970465, ('your', 'mood'): 105.3995253164557, ('my', 'energy'): 84.31962025316456, ('mountains', 'is'): 16.863924050632914, ('tea', 'is'): 16.863924050632914, ("can't", 'resist'): 421.5981012658228, ('is', 'perfect'): 8.431962025316457, ('hugs', 'can'): 84.31962025316456, ('noble', 'pursuit'): 421.5981012658228, ('is', 'priceless'): 16.863924050632914, ("someone's", 'day'): 210.7990506329114, ('well-cooked', 'meal'): 210.7990506329114, ('inner', 'peace'): 421.5981012658228, ('for', 'staying'): 105.3995253164557, ('is', 'my'): 6.745569620253166, ('makes', 'me'): 210.7990506329114, ('good', 'piece'): 84.31962025316456, ('brighten', "someone's"): 421.5981012658228, ('new', 'places'): 210.7990506329114, ('you', 'to'): 52.69976265822785, ('crashing', 'on'): 421.5981012658228, ('a', 'musical'): 14.053270042194093, ('a', 'well-cooked'): 14.053270042194093, ('freshly', 'baked'): 421.5981012658228, ('dancing', 'is'): 16.863924050632914, ('i', 'love'): 46.84423347398032, ('different', 'cultures'): 421.5981012658228, ('a', 'work'): 14.053270042194093, ('the', 'excitement'): 23.42211673699016, ('is', 'irresistible'): 16.863924050632914, ('perfect', 'for'): 105.3995253164557, ('express', 'yourself'): 421.5981012658228, ('days', 'are'): 421.5981012658228, ('beach', 'is'): 16.863924050632914, ('beautiful', 'way'): 140.53270042194094, ('power', 'of'): 30.11415009041591, ('is', 'the'): 0.9368846694796062, ('should', 'all'): 421.5981012658228, ('to', 'travel'): 52.69976265822785, ('baked', 'bread'): 421.5981012658228, ('forest', 'is'): 16.863924050632914, ('time', 'in'): 30.11415009041591, ('of', 'freshly'): 30.11415009041591, ('nature', 'recharges'): 421.5981012658228, ('of', 'the'): 1.6730083383564396, ('feel', 'alive'): 421.5981012658228, ('the', 'magic'): 23.42211673699016, ('green', 'sleeps'): 421.5981012658228, ('on', 'the'): 23.42211673699016, ('solving', 'challenging'): 421.5981012658228, ('sand', 'between'): 421.5981012658228, ('helps', 'me'): 210.7990506329114, ('is', 'wonderful'): 16.863924050632914, ('instrument', 'is'): 16.863924050632914, ('shining', 'brightly'): 421.5981012658228, ('sun', 'is'): 16.863924050632914, ('wandering', 'through'): 421.5981012658228, ('good', 'workout'): 84.31962025316456, ('is', 'fascinating'): 16.863924050632914, ('of', 'art'): 30.11415009041591, ('a', 'good'): 14.053270042194097, ('i', "can't"): 46.84423347398032, ('a', 'virtue'): 14.053270042194093, ('guilty', 'pleasure'): 421.5981012658228, ('a', 'starry'): 14.053270042194093, ('starry', 'night'): 210.7990506329114, ('meal', 'with'): 70.26635021097047, ('is', 'delightful'): 16.863924050632914, ('the', 'power'): 23.42211673699016, ('the', 'smell'): 23.42211673699016, ('virtue', 'we'): 421.5981012658228, ('and', 'reading'): 140.53270042194094, ('a', 'smile'): 14.053270042194093, ('from', 'your'): 105.3995253164557, ('resist', 'a'): 14.053270042194093, ('solace', 'in'): 60.22830018083182, ('mistakes', 'is'): 16.863924050632914, ('a', 'forest'): 14.053270042194093, ('through', 'a'): 14.053270042194093, ('the', 'best'): 23.42211673699016, ('me', 'find'): 105.3995253164557, ('the', 'sound'): 23.42211673699016, ('your', 'toes'): 105.3995253164557, ('a', 'loved'): 7.0266350210970465, ('of', 'fresh'): 30.11415009041591, ('touch', 'of'): 30.11415009041591, ('movie', 'can'): 84.31962025316456, ('always', 'up'): 210.7990506329114, ('ones', 'is'): 16.863924050632914, ('cold', 'day'): 210.7990506329114, ('others', 'is'): 16.863924050632914, ('love', 'to'): 52.69976265822785, ('me', 'feel'): 210.7990506329114, ('and', 'explore'): 140.53270042194094, ('of', 'tea'): 30.11415009041591, ('rollercoaster', 'ride'): 421.5981012658228, ('for', 'my'): 21.07990506329114, ('i', 'believe'): 46.84423347398032, ('workout', 'can'): 84.31962025316456, ('spending', 'time'): 210.7990506329114, ('find', 'solace'): 210.7990506329114, ('babbling', 'brook'): 421.5981012658228, ('musical', 'instrument'): 421.5981012658228, ('is', 'exhilarating'): 16.863924050632914, ('is', 'comforting'): 16.863924050632914, ('your', 'imagination'): 105.3995253164557, ('explore', 'new'): 210.7990506329114, ('travel', 'and'): 140.53270042194094, ('challenges', 'make'): 421.5981012658228, ('beauty', 'of'): 30.11415009041591, ('create', 'a'): 14.053270042194093, ('to', 'another'): 52.69976265822785, ('delicious', 'meals'): 421.5981012658228, ('sleeps', 'furiously'): 421.5981012658228, ('the', 'sun'): 23.42211673699016, ('work', 'of'): 30.11415009041591, ('enjoy', 'cooking'): 140.53270042194094, ('transport', 'you'): 210.7990506329114, ('a', 'noble'): 14.053270042194093, ('the', 'universe'): 23.42211673699016, ('enjoy', 'solving'): 140.53270042194094, ('is', 'so'): 16.863924050632914, ('a', 'telescope'): 14.053270042194093, ('my', 'guilty'): 84.31962025316456, ('fresh', 'flowers'): 421.5981012658228, ('kindness', 'is'): 16.863924050632914, ('tranquil', 'experience'): 140.53270042194094, ('to', 'communicate'): 52.69976265822785, ('cooking', 'delicious'): 421.5981012658228, ('waves', 'crashing'): 421.5981012658228, ('boost', 'your'): 105.3995253164557, ('can', 'convey'): 84.31962025316456, ('always', 'exciting'): 210.7990506329114, ('go-to', 'morning'): 421.5981012658228, ('loved', 'one'): 210.7990506329114, ('night', 'sky'): 210.7990506329114, ('the', 'beauty'): 23.42211673699016, ('in', 'the'): 16.7300833835644, ('with', 'a'): 4.684423347398032, ('toes', 'is'): 16.863924050632914, ('a', 'special'): 14.053270042194093, ('play', 'a'): 14.053270042194093, ('gentle', 'touch'): 421.5981012658228, ('book', 'can'): 42.15981012658228, ('ride', 'is'): 16.863924050632914, ('learning', 'to'): 17.566587552742618, ('the', 'mountains'): 23.42211673699016, ('aroma', 'of'): 30.11415009041591, ('a', 'meal'): 7.0266350210970465, ('thrill', 'of'): 30.11415009041591, ('artistic', 'expression'): 421.5981012658228, ('between', 'your'): 105.3995253164557, ('meal', 'is'): 8.431962025316457, ('a', 'cold'): 14.053270042194093, ('friends', 'is'): 8.431962025316457, ('can', 'captivate'): 84.31962025316456, ('all', 'practice'): 421.5981012658228}
distribution = mi_model.get_distribution(0, 1000, type_='mi')
threshold:1000.0; word percentage:last––0.14243823070353684 this––0.14243823070353684: 100%|██████████| 150/150 [00:00<00:00, 552.55it/s]
sns.lineplot(
x=distribution['threshold'],
y=distribution['mean_word_percentage']
)
<Axes: >