-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.cs
More file actions
88 lines (72 loc) · 3.29 KB
/
parser.cs
File metadata and controls
88 lines (72 loc) · 3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using ConsoleApplication1;
namespace ConsoleApplication1
{
class parser
{
private Dictionary<int, sentence> sentenceMap;
private static String XML_NODE_WORD = "W";
private static String XML_NODE_SENTENCE = "S";
private static String XML_ROOT_NODE = "_root";
private static String SENTENCE_ATTR_ID = "ID";
private static String WORD_ATTR_DOM = "DOM";
private static String WORD_ATTR_FEAT = "FEAT";
private static String WORD_ATTR_ID = "ID";
private static String WORD_ATTR_LEMMA = "LEMMA";
private static String WORD_ATTR_LINK = "LINK";
public parser(string fileName)
{
parse(fileName);
}
public void parse(string fileName)
{
XDocument doc = XDocument.Load(fileName);
sentenceMap = new Dictionary<int, sentence>();
foreach (XElement sentence in doc.Root.Elements().Elements(XML_NODE_SENTENCE))
{
Dictionary<int, word> wordsMap = new Dictionary<int, word>();
foreach (XElement _word in sentence.Elements(XML_NODE_WORD))
{
int dom = 0;
if (Convert.ToString((string) _word.Attribute(WORD_ATTR_DOM).Value) != XML_ROOT_NODE)
dom = Convert.ToInt32((string) _word.Attribute(WORD_ATTR_DOM).Value);
word w = new word(dom,
(string)_word.Attribute(WORD_ATTR_FEAT),
Convert.ToInt32((string)_word.Attribute(WORD_ATTR_ID)),
(string)_word.Attribute(WORD_ATTR_LEMMA),
(string)_word.Attribute(WORD_ATTR_LINK));
wordsMap.Add(w.id, w);
}
sentence s = new sentence(Convert.ToInt32((string) sentence.Attribute(SENTENCE_ATTR_ID).Value),
sentence.Value,
wordsMap);
sentenceMap.Add(s.id, s);
}
}
//foreach(KeyValuePair<int, word> kvpWord in kvpSentence.Value)
public void getStats()
{
foreach (KeyValuePair<int, sentence> kvpSentence in sentenceMap)
{
foreach (KeyValuePair<int, word> kvpWord in kvpSentence.Value.wordsMap)
{
string bigram; //словосочетание
if (kvpWord.Value.dom == 0) continue;
word parent = kvpSentence.Value.wordsMap[kvpWord.Value.dom];
string delimiter = ">";
if (kvpWord.Value.id < parent.id) delimiter = "<";
bigram = kvpWord.Value.featValues[0] + delimiter + parent.featValues[0];
//здесь мы в главный словарь закидываем полученное словосочетание
//kvpSentence.Value.wordsMap.TryGetValue(kvpSentence.Value.wordsMap.TryGetValue(w.dom, out w.dom), out w.id));
if (main.stats.ContainsKey(bigram))
main.stats[bigram]++;
else main.stats.Add(bigram, 1);
}
}
}
}
}