Skip to content

Commit be7a899

Browse files
authored
SyntaxNet cleanups (tensorflow#281)
Cleanup changes for syntaxnet.
1 parent 0a40f8d commit be7a899

38 files changed

+140
-122
lines changed

syntaxnet/syntaxnet/BUILD

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
# A syntactic parser and part-of-speech tagger in TensorFlow.
33

44
package(
5-
default_visibility = ["//visibility:private"],
5+
default_visibility = [
6+
"//visibility:private",
7+
],
68
features = ["-layering_check"],
79
)
810

@@ -108,9 +110,9 @@ cc_library(
108110
srcs = ["test_main.cc"],
109111
linkopts = ["-lm"],
110112
deps = [
113+
"//external:gtest",
111114
"@org_tensorflow//tensorflow/core:lib",
112115
"@org_tensorflow//tensorflow/core:testlib",
113-
"//external:gtest",
114116
],
115117
)
116118

@@ -130,6 +132,7 @@ cc_library(
130132
srcs = ["text_formats.cc"],
131133
deps = [
132134
":document_format",
135+
":sentence_proto",
133136
],
134137
alwayslink = 1,
135138
)
@@ -150,7 +153,6 @@ cc_library(
150153
deps = [
151154
":feature_extractor_proto",
152155
":fml_parser",
153-
":kbest_syntax_proto",
154156
":sentence_proto",
155157
":task_context",
156158
],
@@ -166,7 +168,6 @@ cc_library(
166168
deps = [
167169
":document_format",
168170
":feature_extractor_proto",
169-
":kbest_syntax_proto",
170171
":proto_io",
171172
":sentence_proto",
172173
":task_context",
@@ -182,6 +183,7 @@ cc_library(
182183
deps = [
183184
":dictionary_proto",
184185
":feature_extractor",
186+
":sentence_proto",
185187
":shared_store",
186188
":term_frequency_map",
187189
":utils",
@@ -263,6 +265,7 @@ cc_library(
263265
deps = [
264266
":kbest_syntax_proto",
265267
":registry",
268+
":sentence_proto",
266269
":shared_store",
267270
":task_context",
268271
":term_frequency_map",
@@ -279,6 +282,7 @@ cc_library(
279282
":dictionary_proto",
280283
":sentence_proto",
281284
":task_context",
285+
":task_spec_proto",
282286
":term_frequency_map",
283287
":test_main",
284288
],
@@ -294,7 +298,6 @@ cc_library(
294298
":parser_transitions",
295299
":registry",
296300
":sentence_features",
297-
":sentence_proto",
298301
":task_context",
299302
":term_frequency_map",
300303
":workspace",
@@ -325,6 +328,7 @@ cc_library(
325328
":feature_extractor",
326329
":parser_features",
327330
":parser_transitions",
331+
":sentence_proto",
328332
":sparse_proto",
329333
":task_context",
330334
":task_spec_proto",
@@ -344,6 +348,7 @@ cc_library(
344348
":parser_transitions",
345349
":sentence_batch",
346350
":sentence_proto",
351+
":sparse_proto",
347352
":task_context",
348353
":task_spec_proto",
349354
],
@@ -360,7 +365,6 @@ cc_library(
360365
":sentence_batch",
361366
":sentence_proto",
362367
":task_context",
363-
":task_spec_proto",
364368
":text_formats",
365369
],
366370
alwayslink = 1,
@@ -370,13 +374,13 @@ cc_library(
370374
name = "lexicon_builder",
371375
srcs = ["lexicon_builder.cc"],
372376
deps = [
377+
":dictionary_proto",
373378
":document_format",
374379
":parser_features",
375380
":parser_transitions",
376381
":sentence_batch",
377382
":sentence_proto",
378383
":task_context",
379-
":task_spec_proto",
380384
":text_formats",
381385
],
382386
alwayslink = 1,
@@ -429,6 +433,11 @@ filegroup(
429433
],
430434
)
431435

436+
filegroup(
437+
name = "parsey_data",
438+
srcs = glob(["models/parsey_mcparseface/*"]),
439+
)
440+
432441
cc_test(
433442
name = "shared_store_test",
434443
size = "small",
@@ -464,6 +473,8 @@ cc_test(
464473
deps = [
465474
":parser_transitions",
466475
":populate_test_inputs",
476+
":sentence_proto",
477+
":task_spec_proto",
467478
":test_main",
468479
],
469480
)
@@ -476,6 +487,8 @@ cc_test(
476487
deps = [
477488
":parser_transitions",
478489
":populate_test_inputs",
490+
":sentence_proto",
491+
":task_spec_proto",
479492
":test_main",
480493
],
481494
)
@@ -519,10 +532,10 @@ py_library(
519532
name = "graph_builder",
520533
srcs = ["graph_builder.py"],
521534
deps = [
522-
"@org_tensorflow//tensorflow:tensorflow_py",
523-
"@org_tensorflow//tensorflow/core:protos_all_py",
524535
":load_parser_ops_py",
525536
":parser_ops",
537+
"@org_tensorflow//tensorflow:tensorflow_py",
538+
"@org_tensorflow//tensorflow/core:protos_all_py",
526539
],
527540
)
528541

syntaxnet/syntaxnet/affix.cc

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,14 @@ static const int kInitialBuckets = 1024;
3939
// Fill factor for term and affix hash maps.
4040
static const int kFillFactor = 2;
4141

42-
int TermHash(string term) {
42+
int TermHash(const string &term) {
4343
return utils::Hash32(term.data(), term.size(), 0xDECAF);
4444
}
4545

4646
// Copies a substring of a Unicode text to a string.
47-
static void UnicodeSubstring(UnicodeText::const_iterator start,
48-
UnicodeText::const_iterator end, string *result) {
47+
static void UnicodeSubstring(const UnicodeText::const_iterator &start,
48+
const UnicodeText::const_iterator &end,
49+
string *result) {
4950
result->clear();
5051
result->append(start.utf8_data(), end.utf8_data() - start.utf8_data());
5152
}
@@ -79,7 +80,7 @@ void AffixTable::Read(const AffixTableEntry &table_entry) {
7980
const auto &affix_entry = table_entry.affix(affix_id);
8081
CHECK_GE(affix_entry.length(), 0);
8182
CHECK_LE(affix_entry.length(), max_length_);
82-
CHECK(FindAffix(affix_entry.form()) == NULL); // forbid duplicates
83+
CHECK(FindAffix(affix_entry.form()) == nullptr); // forbid duplicates
8384
Affix *affix = AddNewAffix(affix_entry.form(), affix_entry.length());
8485
CHECK_EQ(affix->id(), affix_id);
8586
}
@@ -117,7 +118,7 @@ void AffixTable::Write(AffixTableEntry *table_entry) const {
117118
affix_entry->set_form(affix->form());
118119
affix_entry->set_length(affix->length());
119120
affix_entry->set_shorter_id(
120-
affix->shorter() == NULL ? -1 : affix->shorter()->id());
121+
affix->shorter() == nullptr ? -1 : affix->shorter()->id());
121122
}
122123
}
123124

@@ -137,7 +138,7 @@ Affix *AffixTable::AddAffixesForWord(const char *word, size_t size) {
137138
// Determine longest affix.
138139
int affix_len = length;
139140
if (affix_len > max_length_) affix_len = max_length_;
140-
if (affix_len == 0) return NULL;
141+
if (affix_len == 0) return nullptr;
141142

142143
// Find start and end of longest affix.
143144
UnicodeText::const_iterator start, end;
@@ -150,25 +151,25 @@ Affix *AffixTable::AddAffixesForWord(const char *word, size_t size) {
150151
}
151152

152153
// Try to find successively shorter affixes.
153-
Affix *top = NULL;
154-
Affix *ancestor = NULL;
154+
Affix *top = nullptr;
155+
Affix *ancestor = nullptr;
155156
string s;
156157
while (affix_len > 0) {
157158
// Try to find affix in table.
158159
UnicodeSubstring(start, end, &s);
159160
Affix *affix = FindAffix(s);
160-
if (affix == NULL) {
161+
if (affix == nullptr) {
161162
// Affix not found, add new one to table.
162163
affix = AddNewAffix(s, affix_len);
163164

164165
// Update ancestor chain.
165-
if (ancestor != NULL) ancestor->set_shorter(affix);
166+
if (ancestor != nullptr) ancestor->set_shorter(affix);
166167
ancestor = affix;
167-
if (top == NULL) top = affix;
168+
if (top == nullptr) top = affix;
168169
} else {
169170
// Affix found. Update ancestor if needed and return match.
170-
if (ancestor != NULL) ancestor->set_shorter(affix);
171-
if (top == NULL) top = affix;
171+
if (ancestor != nullptr) ancestor->set_shorter(affix);
172+
if (top == nullptr) top = affix;
172173
break;
173174
}
174175

@@ -187,15 +188,15 @@ Affix *AffixTable::AddAffixesForWord(const char *word, size_t size) {
187188

188189
Affix *AffixTable::GetAffix(int id) const {
189190
if (id < 0 || id >= static_cast<int>(affixes_.size())) {
190-
return NULL;
191+
return nullptr;
191192
} else {
192193
return affixes_[id];
193194
}
194195
}
195196

196197
string AffixTable::AffixForm(int id) const {
197198
Affix *affix = GetAffix(id);
198-
if (affix == NULL) {
199+
if (affix == nullptr) {
199200
return "";
200201
} else {
201202
return affix->form();
@@ -204,7 +205,7 @@ string AffixTable::AffixForm(int id) const {
204205

205206
int AffixTable::AffixId(const string &form) const {
206207
Affix *affix = FindAffix(form);
207-
if (affix == NULL) {
208+
if (affix == nullptr) {
208209
return -1;
209210
} else {
210211
return affix->id();
@@ -234,11 +235,11 @@ Affix *AffixTable::FindAffix(const string &form) const {
234235

235236
// Try to find affix in hash table.
236237
Affix *affix = buckets_[hash & (buckets_.size() - 1)];
237-
while (affix != NULL) {
238+
while (affix != nullptr) {
238239
if (strcmp(affix->form_.c_str(), form.c_str()) == 0) return affix;
239240
affix = affix->next_;
240241
}
241-
return NULL;
242+
return nullptr;
242243
}
243244

244245
void AffixTable::Resize(int size_hint) {
@@ -250,7 +251,7 @@ void AffixTable::Resize(int size_hint) {
250251
// Distribute affixes in new buckets.
251252
buckets_.resize(new_size);
252253
for (size_t i = 0; i < buckets_.size(); ++i) {
253-
buckets_[i] = NULL;
254+
buckets_[i] = nullptr;
254255
}
255256
for (size_t i = 0; i < affixes_.size(); ++i) {
256257
Affix *affix = affixes_[i];

syntaxnet/syntaxnet/affix.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
1313
limitations under the License.
1414
==============================================================================*/
1515

16-
#ifndef $TARGETDIR_AFFIX_H_
17-
#define $TARGETDIR_AFFIX_H_
16+
#ifndef SYNTAXNET_AFFIX_H_
17+
#define SYNTAXNET_AFFIX_H_
1818

1919
#include <stddef.h>
2020
#include <string>
@@ -40,7 +40,11 @@ class Affix {
4040
private:
4141
friend class AffixTable;
4242
Affix(int id, const char *form, int length)
43-
: id_(id), length_(length), form_(form), shorter_(NULL), next_(NULL) {}
43+
: id_(id),
44+
length_(length),
45+
form_(form),
46+
shorter_(nullptr),
47+
next_(nullptr) {}
4448

4549
public:
4650
// Returns unique id of affix.
@@ -152,4 +156,4 @@ class AffixTable {
152156

153157
} // namespace syntaxnet
154158

155-
#endif // $TARGETDIR_AFFIX_H_
159+
#endif // SYNTAXNET_AFFIX_H_

syntaxnet/syntaxnet/arc_standard_transitions.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ limitations under the License.
3030

3131
#include <string>
3232

33-
#include "syntaxnet/utils.h"
3433
#include "syntaxnet/parser_state.h"
3534
#include "syntaxnet/parser_transitions.h"
35+
#include "syntaxnet/utils.h"
3636
#include "tensorflow/core/lib/strings/strcat.h"
3737

3838
namespace syntaxnet {

syntaxnet/syntaxnet/arc_standard_transitions_test.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,16 @@ limitations under the License.
1515

1616
#include <memory>
1717
#include <string>
18-
#include <gmock/gmock.h>
19-
20-
#include "syntaxnet/utils.h"
2118
#include "syntaxnet/parser_state.h"
2219
#include "syntaxnet/parser_transitions.h"
2320
#include "syntaxnet/populate_test_inputs.h"
2421
#include "syntaxnet/sentence.pb.h"
2522
#include "syntaxnet/task_context.h"
2623
#include "syntaxnet/task_spec.pb.h"
2724
#include "syntaxnet/term_frequency_map.h"
25+
#include "syntaxnet/utils.h"
26+
#include <gmock/gmock.h>
27+
2828
#include "tensorflow/core/lib/core/status.h"
2929
#include "tensorflow/core/platform/env.h"
3030
#include "tensorflow/core/platform/test.h"

syntaxnet/syntaxnet/base.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
1313
limitations under the License.
1414
==============================================================================*/
1515

16-
#ifndef $TARGETDIR_BASE_H_
17-
#define $TARGETDIR_BASE_H_
16+
#ifndef SYNTAXNET_BASE_H_
17+
#define SYNTAXNET_BASE_H_
1818

1919
#include <functional>
2020
#include <string>
@@ -50,4 +50,4 @@ using std::string;
5050

5151
// namespace syntaxnet
5252

53-
#endif // $TARGETDIR_BASE_H_
53+
#endif // SYNTAXNET_BASE_H_

syntaxnet/syntaxnet/beam_reader_ops.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ limitations under the License.
2424
#include "syntaxnet/base.h"
2525
#include "syntaxnet/parser_state.h"
2626
#include "syntaxnet/parser_transitions.h"
27-
#include "syntaxnet/sentence_batch.h"
2827
#include "syntaxnet/sentence.pb.h"
28+
#include "syntaxnet/sentence_batch.h"
2929
#include "syntaxnet/shared_store.h"
3030
#include "syntaxnet/sparse.pb.h"
3131
#include "syntaxnet/task_context.h"

syntaxnet/syntaxnet/document_filters.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ class DocumentSource : public OpKernel {
8484
mutex_lock lock(mu_);
8585
Sentence *document;
8686
vector<Sentence *> document_batch;
87-
while ((document = corpus_->Read()) != NULL) {
87+
while ((document = corpus_->Read()) != nullptr) {
8888
document_batch.push_back(document);
8989
if (static_cast<int>(document_batch.size()) == batch_size_) {
9090
OutputDocuments(context, &document_batch);

0 commit comments

Comments
 (0)