Skip to content

Commit

Permalink
Add support for extracting multiple emojis in text and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
omkar-foss committed Jul 6, 2023
1 parent e5788e7 commit b295f19
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 15 deletions.
40 changes: 39 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ Output:

```python
from emosent import get_emoji_sentiment_rank

# This function returns the emoji sentiment rank
# mapped to the specified character.
get_emoji_sentiment_rank('😂')
```
Output:
Expand All @@ -66,6 +69,41 @@ Output:
'sentiment_score': 0.221}
```

#### Example 3

```python
from emosent import get_emoji_sentiment_rank_multiple

# Parses the input text character by character and
# extracts emoji sentiment ranks and their respective positions in the text.
get_emoji_sentiment_rank_multiple('well done buddy! 😁👏')
```
Output:
```python
# Here, the emojis are found at positions 17 and 18 in the specified text.
[{'text_position': 17,
'emoji_sentiment_rank': {'unicode_codepoint': '0x1f601',
'occurrences': 2189,
'position': 0.796151187,
'negative': 278.0,
'neutral': 648.0,
'positive': 1263.0,
'unicode_name': 'GRINNING FACE WITH SMILING EYES',
'unicode_block': 'Emoticons',
'sentiment_score': 0.45}},
{'text_position': 18,
'emoji_sentiment_rank': {'unicode_codepoint': '0x1f44f',
'occurrences': 2336,
'position': 0.787130164,
'negative': 243.0,
'neutral': 634.0,
'positive': 1459.0,
'unicode_name': 'CLAPPING HANDS SIGN',
'unicode_block': 'Miscellaneous Symbols and Pictographs',
'sentiment_score': 0.521}}]

```

## Versioning

We use [SemVer](http://semver.org/) for versioning. For the versions available,
Expand All @@ -75,4 +113,4 @@ see the
## License

This project is licensed under the MIT License - see the
[LICENSE.txt](LICENSE.txt) file for more details.
[LICENSE.txt](LICENSE.txt) file for more details.
3 changes: 2 additions & 1 deletion emosent/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
""" Package Initialization file. """

from emosent.emosent import EMOJI_SENTIMENT_DICT, get_emoji_sentiment_rank
from emosent.emosent import EMOJI_SENTIMENT_DICT, get_emoji_sentiment_rank, \
get_emoji_sentiment_rank_multiple
36 changes: 29 additions & 7 deletions emosent/emosent.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import csv
import logging
from os import path
import re

logging.basicConfig(
level=logging.INFO,
Expand All @@ -27,10 +28,9 @@ def _build_dict_from_csv(csv_path):

emoji_sentiment_rankings = {}

# MrMindy:
# Adding the encoding. At least on Windows, I tested it before, the terminal displays an error that implies
# with the charset. Adding the UTF-8, everything runs smoothly.

# Explicit use of UTF-8 encoding is required while reading Emojis from CSV
# to avoid errors in systems where UTF-8 is not the default encoding (e.g. Windows).
# Credits to MrMindy for this fix.
with open(csv_path, newline='', encoding='utf-8') as csv_file:
csv_reader = csv.reader(csv_file)
_header_row = next(csv_reader)
Expand Down Expand Up @@ -63,10 +63,32 @@ def _build_dict_from_csv(csv_path):
return emoji_sentiment_rankings


def get_emoji_sentiment_rank(emoji):
""" Returns the Sentiment Data mapped to the specified Emoji. """
def get_emoji_sentiment_rank(char):
""" Returns the emoji sentiment rank mapped to the specified character. """

return EMOJI_SENTIMENT_DICT.get(char.strip())


def get_emoji_sentiment_rank_multiple(text):
"""
Parses the input text character by character and extracts emoji
sentiment ranks and their respective positions in the text.
"""

emoji_results = []

for index, char in enumerate(text.strip()):
if char.isalnum():
continue

sentiment_rank = EMOJI_SENTIMENT_DICT.get(char)
if sentiment_rank:
emoji_results.append({
'text_position': index,
'emoji_sentiment_rank': sentiment_rank
})

return EMOJI_SENTIMENT_DICT[emoji]
return emoji_results


EMOJI_SENTIMENT_DICT = _build_dict_from_csv(
Expand Down
2 changes: 2 additions & 0 deletions run_tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/sh
python -m unittest tests/test_emosent.py
12 changes: 6 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,20 @@

with open(
path.join(path.abspath(path.dirname(__file__)), 'README.md'),
encoding='utf-8') as f:
LONG_DESCRIPTION = f.read()
encoding='utf-8'
) as readme_file:
LONG_DESCRIPTION = readme_file.read()

setup(
name='emosent-py',
packages=['emosent'],
version='0.1.6',
version='0.1.7',
license='MIT',
description='Python module to get Sentiment Rankings for Unicode Emojis.',
long_description=LONG_DESCRIPTION,
long_description_content_type='text/markdown',
author='Fintel Labs Inc.',
author_email='[email protected]',
url='https://fintel.ai',
author='Omkar P',
url='https://github.com/omkar-foss/emosent-py',
download_url=(
'https://github.com/FintelLabs/emosent-py/archive/master.zip'
),
Expand Down
85 changes: 85 additions & 0 deletions tests/test_emosent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""
Unit tests for this project.
"""

import unittest
from emosent import get_emoji_sentiment_rank, get_emoji_sentiment_rank_multiple


class TestEmosent(unittest.TestCase):

def test_basic_ranking(self):
print('\ntest_basic_ranking: It checks for basic ranking functionality\n')
emoji = '❤'
expected_result = {
'unicode_codepoint': '0x2764',
'occurrences': 8050,
'position': 0.746943086,
'negative': 355.0,
'neutral': 1334.0,
'positive': 6361.0,
'unicode_name': 'HEAVY BLACK HEART',
'unicode_block': 'Dingbats',
'sentiment_score': 0.746
}
result = get_emoji_sentiment_rank(emoji)
self.assertDictEqual(result, expected_result)

emoji = ''
result = get_emoji_sentiment_rank(emoji)
self.assertIsNone(result)

emoji = ' '
result = get_emoji_sentiment_rank(emoji)
self.assertIsNone(result)

def test_emojis_text_extraction(self):
print(
'\ntest_emojis_text_extraction: It checks for multiple emojis in '
'text functionality\n'
)
text = 'that is amazing! 😂❤'
expected_result = [
{
'text_position': 17,
'emoji_sentiment_rank': {
'unicode_codepoint': '0x1f602',
'occurrences': 14622,
'position': 0.805100583,
'negative': 3614.0,
'neutral': 4163.0,
'positive': 6845.0,
'unicode_name': 'FACE WITH TEARS OF JOY',
'unicode_block': 'Emoticons',
'sentiment_score': 0.221
}
},
{
'text_position': 18,
'emoji_sentiment_rank': {
'unicode_codepoint': '0x2764',
'occurrences': 8050,
'position': 0.746943086,
'negative': 355.0,
'neutral': 1334.0,
'positive': 6361.0,
'unicode_name': 'HEAVY BLACK HEART',
'unicode_block': 'Dingbats',
'sentiment_score': 0.746
}
}
]
result = get_emoji_sentiment_rank_multiple(text)
self.assertListEqual(result, expected_result)

text = ''
result = get_emoji_sentiment_rank_multiple(text)
self.assertListEqual(result, [])

text = ' '
result = get_emoji_sentiment_rank_multiple(text)
self.assertListEqual(result, [])


if __name__ == '__main__':
unittest.main()

0 comments on commit b295f19

Please sign in to comment.