diff --git a/README.md b/README.md index 039ea77..bcc1bb4 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,9 @@ Output: ```python from emosent import get_emoji_sentiment_rank + +# This function returns the emoji sentiment rank +# mapped to the specified character. get_emoji_sentiment_rank('😂') ``` Output: @@ -66,6 +69,41 @@ Output: 'sentiment_score': 0.221} ``` +#### Example 3 + +```python +from emosent import get_emoji_sentiment_rank_multiple + +# Parses the input text character by character and +# extracts emoji sentiment ranks and their respective positions in the text. +get_emoji_sentiment_rank_multiple('well done buddy! 😁👏') +``` +Output: +```python +# Here, the emojis are found at positions 17 and 18 in the specified text. +[{'text_position': 17, + 'emoji_sentiment_rank': {'unicode_codepoint': '0x1f601', + 'occurrences': 2189, + 'position': 0.796151187, + 'negative': 278.0, + 'neutral': 648.0, + 'positive': 1263.0, + 'unicode_name': 'GRINNING FACE WITH SMILING EYES', + 'unicode_block': 'Emoticons', + 'sentiment_score': 0.45}}, + {'text_position': 18, + 'emoji_sentiment_rank': {'unicode_codepoint': '0x1f44f', + 'occurrences': 2336, + 'position': 0.787130164, + 'negative': 243.0, + 'neutral': 634.0, + 'positive': 1459.0, + 'unicode_name': 'CLAPPING HANDS SIGN', + 'unicode_block': 'Miscellaneous Symbols and Pictographs', + 'sentiment_score': 0.521}}] + +``` + ## Versioning We use [SemVer](http://semver.org/) for versioning. For the versions available, @@ -75,4 +113,4 @@ see the ## License This project is licensed under the MIT License - see the -[LICENSE.txt](LICENSE.txt) file for more details. \ No newline at end of file +[LICENSE.txt](LICENSE.txt) file for more details. diff --git a/emosent/__init__.py b/emosent/__init__.py index 8ed4fa8..8b65be6 100644 --- a/emosent/__init__.py +++ b/emosent/__init__.py @@ -1,3 +1,4 @@ """ Package Initialization file. """ -from emosent.emosent import EMOJI_SENTIMENT_DICT, get_emoji_sentiment_rank +from emosent.emosent import EMOJI_SENTIMENT_DICT, get_emoji_sentiment_rank, \ + get_emoji_sentiment_rank_multiple diff --git a/emosent/emosent.py b/emosent/emosent.py index e5c50ac..819e603 100644 --- a/emosent/emosent.py +++ b/emosent/emosent.py @@ -14,6 +14,7 @@ import csv import logging from os import path +import re logging.basicConfig( level=logging.INFO, @@ -27,10 +28,9 @@ def _build_dict_from_csv(csv_path): emoji_sentiment_rankings = {} - # MrMindy: - # Adding the encoding. At least on Windows, I tested it before, the terminal displays an error that implies - # with the charset. Adding the UTF-8, everything runs smoothly. - + # Explicit use of UTF-8 encoding is required while reading Emojis from CSV + # to avoid errors in systems where UTF-8 is not the default encoding (e.g. Windows). + # Credits to MrMindy for this fix. with open(csv_path, newline='', encoding='utf-8') as csv_file: csv_reader = csv.reader(csv_file) _header_row = next(csv_reader) @@ -63,10 +63,32 @@ def _build_dict_from_csv(csv_path): return emoji_sentiment_rankings -def get_emoji_sentiment_rank(emoji): - """ Returns the Sentiment Data mapped to the specified Emoji. """ +def get_emoji_sentiment_rank(char): + """ Returns the emoji sentiment rank mapped to the specified character. """ + + return EMOJI_SENTIMENT_DICT.get(char.strip()) + + +def get_emoji_sentiment_rank_multiple(text): + """ + Parses the input text character by character and extracts emoji + sentiment ranks and their respective positions in the text. + """ + + emoji_results = [] + + for index, char in enumerate(text.strip()): + if char.isalnum(): + continue + + sentiment_rank = EMOJI_SENTIMENT_DICT.get(char) + if sentiment_rank: + emoji_results.append({ + 'text_position': index, + 'emoji_sentiment_rank': sentiment_rank + }) - return EMOJI_SENTIMENT_DICT[emoji] + return emoji_results EMOJI_SENTIMENT_DICT = _build_dict_from_csv( diff --git a/run_tests b/run_tests new file mode 100755 index 0000000..c2278d5 --- /dev/null +++ b/run_tests @@ -0,0 +1,2 @@ +#!/bin/sh +python -m unittest tests/test_emosent.py diff --git a/setup.py b/setup.py index e1adb5f..af89b70 100644 --- a/setup.py +++ b/setup.py @@ -7,20 +7,20 @@ with open( path.join(path.abspath(path.dirname(__file__)), 'README.md'), - encoding='utf-8') as f: - LONG_DESCRIPTION = f.read() + encoding='utf-8' + ) as readme_file: + LONG_DESCRIPTION = readme_file.read() setup( name='emosent-py', packages=['emosent'], - version='0.1.6', + version='0.1.7', license='MIT', description='Python module to get Sentiment Rankings for Unicode Emojis.', long_description=LONG_DESCRIPTION, long_description_content_type='text/markdown', - author='Fintel Labs Inc.', - author_email='omkar@fintel.ai', - url='https://fintel.ai', + author='Omkar P', + url='https://github.com/omkar-foss/emosent-py', download_url=( 'https://github.com/FintelLabs/emosent-py/archive/master.zip' ), diff --git a/tests/test_emosent.py b/tests/test_emosent.py new file mode 100644 index 0000000..0bb90b5 --- /dev/null +++ b/tests/test_emosent.py @@ -0,0 +1,85 @@ +""" + Unit tests for this project. +""" + +import unittest +from emosent import get_emoji_sentiment_rank, get_emoji_sentiment_rank_multiple + + +class TestEmosent(unittest.TestCase): + + def test_basic_ranking(self): + print('\ntest_basic_ranking: It checks for basic ranking functionality\n') + emoji = '❤' + expected_result = { + 'unicode_codepoint': '0x2764', + 'occurrences': 8050, + 'position': 0.746943086, + 'negative': 355.0, + 'neutral': 1334.0, + 'positive': 6361.0, + 'unicode_name': 'HEAVY BLACK HEART', + 'unicode_block': 'Dingbats', + 'sentiment_score': 0.746 + } + result = get_emoji_sentiment_rank(emoji) + self.assertDictEqual(result, expected_result) + + emoji = '' + result = get_emoji_sentiment_rank(emoji) + self.assertIsNone(result) + + emoji = ' ' + result = get_emoji_sentiment_rank(emoji) + self.assertIsNone(result) + + def test_emojis_text_extraction(self): + print( + '\ntest_emojis_text_extraction: It checks for multiple emojis in ' + 'text functionality\n' + ) + text = 'that is amazing! 😂❤' + expected_result = [ + { + 'text_position': 17, + 'emoji_sentiment_rank': { + 'unicode_codepoint': '0x1f602', + 'occurrences': 14622, + 'position': 0.805100583, + 'negative': 3614.0, + 'neutral': 4163.0, + 'positive': 6845.0, + 'unicode_name': 'FACE WITH TEARS OF JOY', + 'unicode_block': 'Emoticons', + 'sentiment_score': 0.221 + } + }, + { + 'text_position': 18, + 'emoji_sentiment_rank': { + 'unicode_codepoint': '0x2764', + 'occurrences': 8050, + 'position': 0.746943086, + 'negative': 355.0, + 'neutral': 1334.0, + 'positive': 6361.0, + 'unicode_name': 'HEAVY BLACK HEART', + 'unicode_block': 'Dingbats', + 'sentiment_score': 0.746 + } + } + ] + result = get_emoji_sentiment_rank_multiple(text) + self.assertListEqual(result, expected_result) + + text = '' + result = get_emoji_sentiment_rank_multiple(text) + self.assertListEqual(result, []) + + text = ' ' + result = get_emoji_sentiment_rank_multiple(text) + self.assertListEqual(result, []) + + +if __name__ == '__main__': + unittest.main()