|
| 1 | +import re |
| 2 | +import spacy |
| 3 | +from datetime import datetime, timedelta |
| 4 | + |
| 5 | +class RegNERModel(): |
| 6 | + def __init__(self): |
| 7 | + self.nlp = spacy.load("en_core_web_sm") |
| 8 | + |
| 9 | + print("Model loaded successfully") |
| 10 | + |
| 11 | + def detect_email(self, sentence): |
| 12 | + email_regex_pattern = '[A-Za-z0-9._%+-]*@[A-Za-z0-9.-]*\.[A-Z|a-z]*' |
| 13 | + emails_matches = [] |
| 14 | + |
| 15 | + for match in re.finditer(email_regex_pattern, sentence): |
| 16 | + emails_matches.append( {"name": match.group(), "start": match.start(), "end": match.end(), "score": 1.0} ) |
| 17 | + |
| 18 | + return emails_matches |
| 19 | + |
| 20 | + def detect_time(self, sentence): |
| 21 | + time_regex = r'\b(?:1[0-2]|0?[1-9])(?::[0-5][0-9])?(?:\s?[ap]m)?\b' |
| 22 | + times = [] |
| 23 | + |
| 24 | + for match in re.finditer(time_regex, sentence, re.IGNORECASE): |
| 25 | + times.append( {"name": match.group(), "start": match.start(), "end": match.end(), "score": 1.0} ) |
| 26 | + |
| 27 | + return times |
| 28 | + |
| 29 | + def detect_phone_numbers(self, sentence): |
| 30 | + phone_regex = r'(\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4})' |
| 31 | + |
| 32 | + phone_numbers = [] |
| 33 | + for match in re.finditer(phone_regex, sentence): |
| 34 | + phone_numbers.append( {"name": match.group(), "start": match.start(), "end": match.end(), "score": 1.0} ) |
| 35 | + |
| 36 | + return phone_numbers |
| 37 | + |
| 38 | + def detect_numbers_with_units(self, sentence, phone_numbers): |
| 39 | + number_unit_regex = r'(?<!\d)(\d+(?:\.\d+)?)(?:\s+)(\w+)(?!\d)' |
| 40 | + |
| 41 | + numbers_with_units = [] |
| 42 | + |
| 43 | + for match in re.finditer(number_unit_regex, sentence): |
| 44 | + number, unit = match.groups() |
| 45 | + if number not in phone_numbers: |
| 46 | + numbers_with_units.append( {"name": f"{number} {unit}", "start": match.start(), "end": match.end(), "score": 1.0} ) |
| 47 | + |
| 48 | + return numbers_with_units |
| 49 | + |
| 50 | + def detect_dates(self, sentence): |
| 51 | + # Current date |
| 52 | + today = datetime.now() |
| 53 | + |
| 54 | + # Define regex patterns for relative date expressions |
| 55 | + patterns = [ |
| 56 | + r"(next|agle)\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday|somvar|mangalwar|budhwar|guruwar|shukrawar|shaniwar|raviwar)", |
| 57 | + r"(kal)", |
| 58 | + r"(next|agle)\s+(week|month|year|hafte|mahine|saal)" |
| 59 | + ] |
| 60 | + |
| 61 | + # Initialize empty list to store detected dates |
| 62 | + detected_dates = [] |
| 63 | + |
| 64 | + # Iterate through patterns and search for matches in text |
| 65 | + for pattern in patterns: |
| 66 | + for matchdates in re.finditer(pattern, sentence.lower()): |
| 67 | + match = matchdates.groups() |
| 68 | + if match[0] in ['next', 'agle']: |
| 69 | + if match[1] in ['monday', 'somvar']: |
| 70 | + # Find next Monday |
| 71 | + days_until_weekday = (today.weekday() - 1) % 7 |
| 72 | + next_date = today + timedelta(days=days_until_weekday) |
| 73 | + detected_dates.append({"name": next_date.strftime("%d-%m-%Y"), "start": matchdates.start(), "end": matchdates.end(), "score": 1.0}) |
| 74 | + elif match[1] in ['tuesday', 'mangalwar']: |
| 75 | + # Find next Tuesday |
| 76 | + days_until_weekday = (today.weekday() - 0) % 7 |
| 77 | + next_date = today + timedelta(days=days_until_weekday ) |
| 78 | + detected_dates.append({"name": next_date.strftime("%d-%m-%Y"), "start": matchdates.start(), "end": matchdates.end(), "score": 1.0}) |
| 79 | + elif match[1] in ['wednesday', 'budhwar']: |
| 80 | + # Find next Wednesday |
| 81 | + days_until_weekday = (today.weekday() +1) % 7 |
| 82 | + next_date = today + timedelta(days=days_until_weekday ) |
| 83 | + detected_dates.append({"name": next_date.strftime("%d-%m-%Y"), "start": matchdates.start(), "end": matchdates.end(), "score": 1.0}) |
| 84 | + elif match[1] in ['thursday', 'guruwar']: |
| 85 | + # Find next Thursday |
| 86 | + days_until_weekday = (today.weekday() +2) % 7 |
| 87 | + next_date = today + timedelta(days=days_until_weekday ) |
| 88 | + detected_dates.append({"name": next_date.strftime("%d-%m-%Y"), "start": matchdates.start(), "end": matchdates.end(), "score": 1.0}) |
| 89 | + elif match[1] in ['friday', 'shukrawar']: |
| 90 | + # Find next Friday |
| 91 | + days_until_weekday = (today.weekday() +3) % 7 |
| 92 | + next_date = today + timedelta(days=days_until_weekday ) |
| 93 | + detected_dates.append({"name": next_date.strftime("%d-%m-%Y"), "start": matchdates.start(), "end": matchdates.end(), "score": 1.0}) |
| 94 | + elif match[1] in ['saturday', 'shaniwar']: |
| 95 | + # Find next Saturday |
| 96 | + days_until_weekday = (today.weekday() +4) % 7 |
| 97 | + next_date = today + timedelta(days=days_until_weekday ) |
| 98 | + detected_dates.append({"name": next_date.strftime("%d-%m-%Y"), "start": matchdates.start(), "end": matchdates.end(), "score": 1.0}) |
| 99 | + elif match[1] in ['sunday', 'raviwar']: |
| 100 | + # Find next Sunday |
| 101 | + days_until_weekday = (today.weekday() +5) % 7 |
| 102 | + next_date = today + timedelta(days=days_until_weekday ) |
| 103 | + detected_dates.append({"name": next_date.strftime("%d-%m-%Y"), "start": matchdates.start(), "end": matchdates.end(), "score": 1.0}) |
| 104 | + elif match[1] in ['week', 'hafte']: |
| 105 | + # Find next week |
| 106 | + next_date = today + timedelta(days=(7 - today.weekday())+6) |
| 107 | + detected_dates.append({"name": next_date.strftime("%d-%m-%Y"), "start": matchdates.start(), "end": matchdates.end(), "score": 1.0}) |
| 108 | + elif match[1] in ['month', 'mahine']: |
| 109 | + # Find next month |
| 110 | + next_date = today.replace(day=1, month=today.month+1) |
| 111 | + detected_dates.append({"name": next_date.strftime("%d-%m-%Y"), "start": matchdates.start(), "end": matchdates.end(), "score": 1.0}) |
| 112 | + elif match[1] in ['year', 'saal']: |
| 113 | + # Find next year |
| 114 | + next_date = today.replace(day=1, month=1, year=today.year+1) |
| 115 | + detected_dates.append({"name": next_date.strftime("%d-%m-%Y"), "start": matchdates.start(), "end": matchdates.end(), "score": 1.0}) |
| 116 | + elif match[0] == 'kal': |
| 117 | + # Find tomorrow's date |
| 118 | + next_date = today + timedelta(1) |
| 119 | + detected_dates.append({"name": next_date.strftime("%d-%m-%Y"), "start": matchdates.start(), "end": matchdates.end(), "score": 1.0}) |
| 120 | + |
| 121 | + return detected_dates |
| 122 | + |
| 123 | + def inference(self, sentence): |
| 124 | + detected_emails = self.detect_email(sentence) |
| 125 | + detected_time = self.detect_time(sentence) |
| 126 | + detected_phone_numbers = self.detect_phone_numbers(sentence) |
| 127 | + detected_number_units = self.detect_numbers_with_units(sentence, detected_phone_numbers) |
| 128 | + detected_dates = self.detect_dates(sentence) |
| 129 | + |
| 130 | + aggregated_entities = {} |
| 131 | + |
| 132 | + if detected_emails: |
| 133 | + aggregated_entities["email"] = detected_emails |
| 134 | + if detected_time: |
| 135 | + aggregated_entities["time"] = detected_time |
| 136 | + if detected_phone_numbers: |
| 137 | + aggregated_entities["phone_number"] = detected_phone_numbers |
| 138 | + if detected_number_units: |
| 139 | + aggregated_entities["number_with_unit"] = detected_number_units |
| 140 | + if detected_dates: |
| 141 | + aggregated_entities["date"] = detected_dates |
| 142 | + |
| 143 | + return aggregated_entities |
0 commit comments