-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathP1_bigram_probability.cpp
59 lines (51 loc) · 2.02 KB
/
P1_bigram_probability.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include <map>
using namespace std;
vector<string> tokenize(const string& sentence) {
vector<string> tokens;
stringstream ss(sentence);
string word;
while (ss >> word) tokens.push_back(word);
return tokens;
}
double calculateBigramProbability(const vector<string>& corpus, const string& sentence) {
map<pair<string, string>, int> bigramCounts;
map<string, int> wordCounts;
for (const string& s : corpus) {
vector<string> tokens = tokenize("<s> " + s + " </s>");
for (size_t i = 0; i < tokens.size() - 1; ++i) {
bigramCounts[{tokens[i], tokens[i + 1]}]++;
wordCounts[tokens[i]]++;
}
wordCounts[tokens.back()]++;
}
vector<string> targetTokens = tokenize("<s> " + sentence + " </s>");
double probability = 1.0;
cout << "\nBigram Probabilities for the test sentence:\n";
for (size_t i = 0; i < targetTokens.size() - 1; ++i) {
auto bigram = make_pair(targetTokens[i], targetTokens[i + 1]);
if (bigramCounts.find(bigram) != bigramCounts.end()) {
double bigramProbability = static_cast<double>(bigramCounts[bigram]) / wordCounts[bigram.first];
cout << "P(" << bigram.second << " | " << bigram.first << ") = " << bigramProbability << endl;
probability *= bigramProbability;
} else {
cout << "P(" << bigram.second << " | " << bigram.first << ") = 0 (Bigram not found in corpus)" << endl;
return 0;
}
}
return probability;
}
int main() {
vector<string> corpus = {
"There is a big garden",
"Children play in a garden",
"They play inside beautiful garden"
};
string sentence = "They play in a big garden";
double probability = calculateBigramProbability(corpus, sentence);
cout << "\nFinal Probability of the sentence \"" << sentence << "\": " << probability << endl;
return 0;
}