Skip to content

Commit b957d5c

Browse files
committed
Add lemmas
1 parent 2d91892 commit b957d5c

File tree

8 files changed

+72
-33
lines changed

8 files changed

+72
-33
lines changed

sentiment-analysis.userprefs

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,18 @@
11
<Properties StartupConfiguration="{40F7DC2F-CD82-41E6-9747-ADC082D03CB2}|Default">
2-
<MonoDevelop.Ide.Workbench ActiveDocument="sentiment-analysis/Core/Database/Service/WordsFilterService.cs">
2+
<MonoDevelop.Ide.Workbench ActiveDocument="sentiment-analysis/Program.cs">
33
<Files>
4-
<File FileName="sentiment-analysis/Program.cs" Line="12" Column="20" />
4+
<File FileName="sentiment-analysis/Program.cs" Line="70" Column="31" />
55
<File FileName="sentiment-analysis/Config/Common/TelegramConfig.cs" Line="13" Column="35" />
66
<File FileName="sentiment-analysis/Core/TelegramSender.cs" Line="8" Column="1" />
7-
<File FileName="sentiment-analysis/Core/Parser.cs" Line="2" Column="29" />
8-
<File FileName="sentiment-analysis/Core/Database/Service/WordsFilterService.cs" Line="57" Column="41" />
7+
<File FileName="sentiment-analysis/Core/Parser.cs" Line="41" Column="52" />
8+
<File FileName="sentiment-analysis/Core/Database/Service/WordsFilterService.cs" Line="13" Column="9" />
9+
<File FileName="sentiment-analysis/Core/PostParser.cs" Line="116" Column="90" />
10+
<File FileName="sentiment-analysis/Core/ToLemmasConverter.cs" Line="8" Column="58" />
11+
<File FileName="sentiment-analysis/Core/Analysis/InputPostAnalisator.cs" Line="32" Column="18" />
12+
<File FileName="sentiment-analysis/Config/Site/AbstractSiteConfig.cs" Line="30" Column="23" />
13+
<File FileName="sentiment-analysis/Core/Analysis/Analizators/AnalizatorFactory.cs" Line="26" Column="43" />
14+
<File FileName="sentiment-analysis/Core/Analysis/Analizators/GrowthAnalizator.cs" Line="6" Column="46" />
915
</Files>
10-
<Pads>
11-
<Pad Id="ProjectPad">
12-
<State name="__root__">
13-
<Node name="sentiment-analysis" expanded="True">
14-
<Node name="sentiment-analysis" expanded="True">
15-
<Node name="Config" selected="True" />
16-
</Node>
17-
</Node>
18-
</State>
19-
</Pad>
20-
</Pads>
2116
</MonoDevelop.Ide.Workbench>
2217
<MonoDevelop.Ide.Workspace ActiveConfiguration="Debug|x86" />
2318
<MonoDevelop.Ide.DebuggingService.Breakpoints>

sentiment-analysis/Config/Site/CoindeskConfig.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ public CoindeskConfig()
77
baseUrl = "https://www.coindesk.com/";
88
titleCssSelector = "div:not(#coindesk_follow_us_widget-2) > h3";
99
timeCssSelector = "p.timeauthor > time";
10-
hrefCssSelector = "div > a.fade";
10+
hrefCssSelector = "div a.fade";
1111
nextPagePostfix = "page/{0}/";
1212
}
1313
}

sentiment-analysis/Core/Analysis/InputPostAnalisator.cs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using sentimentanalysis.Core;
23
using System.Collections.Generic;
34
using sentimentanalysis.Core.Database.Entity;
45
using sentimentanalysis.Core.Analysis.Service;
@@ -8,9 +9,12 @@ namespace sentimentanalysis.Core.Analysis
89
public class InputPostAnalisator
910
{
1011
protected SentimentCoeficientCalculator sentimentCoeficientCalculator;
12+
protected ToLemmasConverter toLemmaConverter;
1113

12-
public InputPostAnalisator(SentimentCoeficientCalculator sentimentCoeficientCalculator)
14+
public InputPostAnalisator(ToLemmasConverter toLemmaConverter,
15+
SentimentCoeficientCalculator sentimentCoeficientCalculator)
1316
{
17+
this.toLemmaConverter = toLemmaConverter;
1418
this.sentimentCoeficientCalculator = sentimentCoeficientCalculator;
1519
}
1620

@@ -38,14 +42,15 @@ private float getPostCoeficient(string[] words, bool isDebug = false)
3842

3943
foreach (string word in words)
4044
{
41-
float coeficient = getWordCoeficient(word);
45+
string lemmatizedWord = toLemmaConverter.ToLemma(word);
46+
float coeficient = getWordCoeficient(lemmatizedWord);
4247
postCoeficient += coeficient;
4348

4449
if (Math.Abs(coeficient) > .000001)
4550
{
4651
if (isDebug)
4752
{
48-
Console.WriteLine("{0}: {1}", word, coeficient);
53+
Console.WriteLine("{0}: {1}", lemmatizedWord, coeficient);
4954
}
5055

5156
estimatedWordsCount++;

sentiment-analysis/Core/Parser.cs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using sentimentanalysis.Config;
1+
using sentimentanalysis.Core;
2+
using sentimentanalysis.Config;
23
using sentimentanalysis.Core.Site;
34
using sentimentanalysis.Core.Site.Iterator;
45
using sentimentanalysis.Core.Site.Generator;
@@ -16,6 +17,7 @@ public class Parser
1617
protected UrlGenerator urlGenerator;
1718
protected WebClient webClient;
1819
protected CoreConfig config;
20+
protected ToLemmasConverter toLemmaConverter;
1921

2022
public Parser(PostService postService,
2123
CurrencyValueService currencyValueService,
@@ -27,12 +29,16 @@ public Parser(PostService postService,
2729
this.webClient = new WebClient();
2830
this.webPagesIterator = new WebPagesIterator(urlGenerator, webClient);
2931
this.config = config;
32+
this.toLemmaConverter = new ToLemmasConverter();
3033
}
3134

3235
public void Parse()
3336
{
3437
Post post = postService.SelectLastRecord(config);
35-
PostParser postParser = new PostParser(postService, webPagesIterator, config);
38+
PostParser postParser = new PostParser(postService,
39+
webPagesIterator,
40+
toLemmaConverter,
41+
config);
3642

3743
if (null != post)
3844
{

sentiment-analysis/Core/PostParser.cs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using System;
22
using System.Net;
33
using AngleSharp.Dom;
4+
using sentimentanalysis.Core;
45
using sentimentanalysis.Config;
56
using sentimentanalysis.Core.Site;
67
using sentimentanalysis.Core.Site.Entity;
@@ -14,14 +15,17 @@ public class PostParser
1415
{
1516
protected PostService postService;
1617
protected WebPagesIterator webPagesIterator;
18+
protected ToLemmasConverter toLemmaConverter;
1719
protected CoreConfig config;
1820

1921
public PostParser(PostService postService,
20-
WebPagesIterator webPagesIterator,
22+
WebPagesIterator webPagesIterator,
23+
ToLemmasConverter toLemmaConverter,
2124
CoreConfig config)
2225
{
2326
this.postService = postService;
2427
this.webPagesIterator = webPagesIterator;
28+
this.toLemmaConverter = toLemmaConverter;
2529
this.config = config;
2630
}
2731

@@ -79,16 +83,19 @@ private void insertData(IHtmlCollection<IElement> titles,
7983
IHtmlCollection<IElement> times,
8084
IHtmlCollection<IElement> hrefs)
8185
{
86+
Console.WriteLine(hrefs.Length + "; " + titles.Length);
8287
for (int i = 0, l = titles.Length; i < l; i++)
8388
{
8489
string timeString = times[i].GetAttribute("datetime");
8590
string title = titles[i].TextContent;
8691
string href = hrefs[i].GetAttribute("href");
8792

88-
if (0 == timeString.Length || 0 == title.Length) continue;
93+
if (0 == timeString.Length || 0 == title.Length || 0 == href.Length) continue;
8994

9095
DateTime time = new TimeParser(timeString).GetDateTime();
91-
postService.Insert(new Post(title, href, time, config));
96+
string lemmatizedTitle = toLemmaConverter.ToLemma(title);
97+
98+
postService.Insert(new Post(lemmatizedTitle, href, time, config));
9299
}
93100
}
94101

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
using System;
2+
namespace sentimentanalysis.Core
3+
{
4+
public class ToLemmasConverter
5+
{
6+
public string ToLemma(string str)
7+
{
8+
for (int i = str.Length >= 4 ? 4 : str.Length; i > 0; i--)
9+
{
10+
string part = str.Substring(str.Length - i, i);
11+
12+
switch(part)
13+
{
14+
case "sses": return str.Replace(part, "ss");
15+
case "ies": return str.Replace(part, "i");
16+
case "ss": return str.Replace(part, "ss");
17+
case "s": return str.Replace(part, "");
18+
}
19+
}
20+
21+
return str;
22+
}
23+
}
24+
}

sentiment-analysis/Program.cs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ public static void Main(string[] args)
4242

4343
SentimentCoeficientCalculator calcuator =
4444
new SentimentCoeficientCalculator(wordService, postExtremumService);
45-
InputPostAnalisator inputPostAnalizator = new InputPostAnalisator(calcuator);
45+
InputPostAnalisator inputPostAnalizator =
46+
new InputPostAnalisator(new ToLemmasConverter(), calcuator);
4647

4748
/********** I **********/
4849
//Parser parser = new Parser(postService, currencyValueService, config);
@@ -69,17 +70,17 @@ public static void Main(string[] args)
6970
/********** III **********/
7071

7172
/********** IV **********/
72-
//ConfigEntity lastPostTime = configService.Get(ConfigEntity.LAST_POST_TIME);
73-
//TimeParser timeParser = new TimeParser(lastPostTime.Value);
74-
//List<Post> posts = postService.GetPostsSinceDate(timeParser.GetDateTime(), config);
73+
ConfigEntity lastPostTime = configService.Get(ConfigEntity.LAST_POST_TIME);
74+
TimeParser timeParser = new TimeParser(lastPostTime.Value);
75+
List<Post> posts = postService.GetPostsSinceDate(timeParser.GetDateTime(), config);
7576

76-
//Dictionary<Post, float> estimatedPosts = inputPostAnalizator.GetPosts(posts);
77+
Dictionary<Post, float> estimatedPosts = inputPostAnalizator.GetPosts(posts);
7778

78-
//IEnumerable<KeyValuePair<Post, float>> orderedEstimatedPosts = estimatedPosts.Where(pair => Math.Abs(pair.Value) >= 0.02);
79-
//TelegramSender telegramSender = new TelegramSender(config);
80-
//List<Task<Message>> taskList = telegramSender.SendEstimatedPosts(orderedEstimatedPosts);
79+
IEnumerable<KeyValuePair<Post, float>> orderedEstimatedPosts = estimatedPosts.Where(pair => Math.Abs(pair.Value) >= 0.02);
80+
TelegramSender telegramSender = new TelegramSender(config);
81+
List<Task<Message>> taskList = telegramSender.SendEstimatedPosts(orderedEstimatedPosts);
8182

82-
//Task.WaitAll(taskList.ToArray());
83+
Task.WaitAll(taskList.ToArray());
8384
/********** IV **********/
8485

8586
connection.Close();

sentiment-analysis/sentiment-analysis.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@
120120
<Compile Include="Core\Analysis\Service\SentimentCoeficientCalculator.cs" />
121121
<Compile Include="Config\Common\TelegramConfig.cs" />
122122
<Compile Include="Core\TelegramSender.cs" />
123+
<Compile Include="Core\ToLemmasConverter.cs" />
123124
</ItemGroup>
124125
<ItemGroup>
125126
<None Include="packages.config" />

0 commit comments

Comments
 (0)