Add support for extracting multiple emojis in text and tests

omkar-foss · Jul 6, 2023 · b295f19 · b295f19
1 parent e5788e7
commit b295f19
Show file tree

Hide file tree

Showing 6 changed files with 163 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -51,6 +51,9 @@ Output:
 
 ```python
 from emosent import get_emoji_sentiment_rank
+
+# This function returns the emoji sentiment rank 
+# mapped to the specified character.
 get_emoji_sentiment_rank('😂')
 ```
 Output:
@@ -66,6 +69,41 @@ Output:
  'sentiment_score': 0.221}
 ```
 
+#### Example 3
+
+```python
+from emosent import get_emoji_sentiment_rank_multiple
+
+# Parses the input text character by character and 
+# extracts emoji sentiment ranks and their respective positions in the text.
+get_emoji_sentiment_rank_multiple('well done buddy! 😁👏')
+```
+Output:
+```python 
+# Here, the emojis are found at positions 17 and 18 in the specified text.
+[{'text_position': 17,
+  'emoji_sentiment_rank': {'unicode_codepoint': '0x1f601',
+   'occurrences': 2189,
+   'position': 0.796151187,
+   'negative': 278.0,
+   'neutral': 648.0,
+   'positive': 1263.0,
+   'unicode_name': 'GRINNING FACE WITH SMILING EYES',
+   'unicode_block': 'Emoticons',
+   'sentiment_score': 0.45}},
+ {'text_position': 18,
+  'emoji_sentiment_rank': {'unicode_codepoint': '0x1f44f',
+   'occurrences': 2336,
+   'position': 0.787130164,
+   'negative': 243.0,
+   'neutral': 634.0,
+   'positive': 1459.0,
+   'unicode_name': 'CLAPPING HANDS SIGN',
+   'unicode_block': 'Miscellaneous Symbols and Pictographs',
+   'sentiment_score': 0.521}}]
+
+```
+
 ## Versioning
 
 We use [SemVer](http://semver.org/) for versioning. For the versions available, 
@@ -75,4 +113,4 @@ see the
 ## License
 
 This project is licensed under the MIT License - see the 
-[LICENSE.txt](LICENSE.txt) file for more details.
+[LICENSE.txt](LICENSE.txt) file for more details.
diff --git a/emosent/__init__.py b/emosent/__init__.py
@@ -1,3 +1,4 @@
 """ Package Initialization file. """
 
-from emosent.emosent import EMOJI_SENTIMENT_DICT, get_emoji_sentiment_rank
+from emosent.emosent import EMOJI_SENTIMENT_DICT, get_emoji_sentiment_rank, \
+    get_emoji_sentiment_rank_multiple
diff --git a/emosent/emosent.py b/emosent/emosent.py
@@ -14,6 +14,7 @@
 import csv
 import logging
 from os import path
+import re
 
 logging.basicConfig(
     level=logging.INFO,
@@ -27,10 +28,9 @@ def _build_dict_from_csv(csv_path):
 
     emoji_sentiment_rankings = {}
 
-    # MrMindy:
-    # Adding the encoding. At least on Windows, I tested it before, the terminal displays an error that implies
-    # with the charset. Adding the UTF-8, everything runs smoothly.
-
+    # Explicit use of UTF-8 encoding is required while reading Emojis from CSV
+    # to avoid errors in systems where UTF-8 is not the default encoding (e.g. Windows).
+    # Credits to MrMindy for this fix.
     with open(csv_path, newline='', encoding='utf-8') as csv_file:
         csv_reader = csv.reader(csv_file)
         _header_row = next(csv_reader)
@@ -63,10 +63,32 @@ def _build_dict_from_csv(csv_path):
     return emoji_sentiment_rankings
 
 
-def get_emoji_sentiment_rank(emoji):
-    """ Returns the Sentiment Data mapped to the specified Emoji. """
+def get_emoji_sentiment_rank(char):
+    """ Returns the emoji sentiment rank mapped to the specified character. """
+
+    return EMOJI_SENTIMENT_DICT.get(char.strip())
+
+
+def get_emoji_sentiment_rank_multiple(text):
+    """
+        Parses the input text character by character and extracts emoji 
+        sentiment ranks and their respective positions in the text.
+    """
+
+    emoji_results = []
+
+    for index, char in enumerate(text.strip()):
+        if char.isalnum():
+            continue
+
+        sentiment_rank = EMOJI_SENTIMENT_DICT.get(char)
+        if sentiment_rank:
+            emoji_results.append({
+                'text_position': index,
+                'emoji_sentiment_rank': sentiment_rank
+            })
 
-    return EMOJI_SENTIMENT_DICT[emoji]
+    return emoji_results
 
 
 EMOJI_SENTIMENT_DICT = _build_dict_from_csv(

diff --git a/run_tests b/run_tests
@@ -0,0 +1,2 @@
+#!/bin/sh
+python -m unittest tests/test_emosent.py
diff --git a/setup.py b/setup.py
@@ -7,20 +7,20 @@
 
 with open(
         path.join(path.abspath(path.dirname(__file__)), 'README.md'),
-        encoding='utf-8') as f:
-    LONG_DESCRIPTION = f.read()
+        encoding='utf-8'
+    ) as readme_file:
+    LONG_DESCRIPTION = readme_file.read()
 
 setup(
     name='emosent-py',
     packages=['emosent'],
-    version='0.1.6',
+    version='0.1.7',
     license='MIT',
     description='Python module to get Sentiment Rankings for Unicode Emojis.',
     long_description=LONG_DESCRIPTION,
     long_description_content_type='text/markdown',
-    author='Fintel Labs Inc.',
-    author_email='[email protected]',
-    url='https://fintel.ai',
+    author='Omkar P',
+    url='https://github.com/omkar-foss/emosent-py',
     download_url=(
         'https://github.com/FintelLabs/emosent-py/archive/master.zip'
     ),

diff --git a/tests/test_emosent.py b/tests/test_emosent.py
@@ -0,0 +1,85 @@
+"""
+    Unit tests for this project.
+"""
+
+import unittest
+from emosent import get_emoji_sentiment_rank, get_emoji_sentiment_rank_multiple
+
+
+class TestEmosent(unittest.TestCase):
+
+    def test_basic_ranking(self):
+        print('\ntest_basic_ranking: It checks for basic ranking functionality\n')
+        emoji = '❤'
+        expected_result = {
+            'unicode_codepoint': '0x2764',
+            'occurrences': 8050,
+            'position': 0.746943086,
+            'negative': 355.0,
+            'neutral': 1334.0,
+            'positive': 6361.0,
+            'unicode_name': 'HEAVY BLACK HEART',
+            'unicode_block': 'Dingbats',
+            'sentiment_score': 0.746
+        }
+        result = get_emoji_sentiment_rank(emoji)
+        self.assertDictEqual(result, expected_result)
+
+        emoji = ''
+        result = get_emoji_sentiment_rank(emoji)
+        self.assertIsNone(result)
+
+        emoji = '   '
+        result = get_emoji_sentiment_rank(emoji)
+        self.assertIsNone(result)
+
+    def test_emojis_text_extraction(self):
+        print(
+            '\ntest_emojis_text_extraction: It checks for multiple emojis in '
+            'text functionality\n'
+        )
+        text = 'that is amazing! 😂❤'
+        expected_result = [
+            {
+                'text_position': 17,
+                'emoji_sentiment_rank': {
+                    'unicode_codepoint': '0x1f602',
+                    'occurrences': 14622,
+                    'position': 0.805100583,
+                    'negative': 3614.0,
+                    'neutral': 4163.0,
+                    'positive': 6845.0,
+                    'unicode_name': 'FACE WITH TEARS OF JOY',
+                    'unicode_block': 'Emoticons',
+                    'sentiment_score': 0.221
+                }
+            },
+            {
+                'text_position': 18,
+                'emoji_sentiment_rank': {
+                    'unicode_codepoint': '0x2764',
+                    'occurrences': 8050,
+                    'position': 0.746943086,
+                    'negative': 355.0,
+                    'neutral': 1334.0,
+                    'positive': 6361.0,
+                    'unicode_name': 'HEAVY BLACK HEART',
+                    'unicode_block': 'Dingbats',
+                    'sentiment_score': 0.746
+                }
+            }
+        ]
+        result = get_emoji_sentiment_rank_multiple(text)
+        self.assertListEqual(result, expected_result)
+
+        text = ''
+        result = get_emoji_sentiment_rank_multiple(text)
+        self.assertListEqual(result, [])
+
+        text = '   '
+        result = get_emoji_sentiment_rank_multiple(text)
+        self.assertListEqual(result, [])
+
+
+if __name__ == '__main__':
+    unittest.main()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		#!/bin/sh
		python -m unittest tests/test_emosent.py