Skip to content

Commit

Permalink
remove unless function and merge files
Browse files Browse the repository at this point in the history
  • Loading branch information
vcaesar committed Sep 20, 2021
1 parent 69d10a4 commit be4ddfd
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 115 deletions.
61 changes: 0 additions & 61 deletions seg.go

This file was deleted.

25 changes: 25 additions & 0 deletions seg_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,28 @@ func Join(text []Text) string {
}
return string(b)
}

func printTokens(tokens []*Token, numTokens int) (output string) {
for iToken := 0; iToken < numTokens; iToken++ {
for _, word := range tokens[iToken].text {
output += fmt.Sprint(string(word))
}
output += " "
}
return
}

func toWords(strings ...string) []Text {
words := []Text{}
for _, s := range strings {
words = append(words, []byte(s))
}
return words
}

func bytesToString(bytes []Text) (output string) {
for _, b := range bytes {
output += (string(b) + "/")
}
return
}
2 changes: 1 addition & 1 deletion segmenter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func TestGetVer(t *testing.T) {
ver := GetVersion()

tt.Expect(t, Version, ver)
expect(t, Version, ver)
tt.Expect(t, Version, ver)
tt.Equal(t, Version, ver)
}

Expand Down
53 changes: 0 additions & 53 deletions test_utils.go

This file was deleted.

45 changes: 45 additions & 0 deletions token.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,51 @@

package gse

// AnalyzeToken analyze the segment info structure
type AnalyzeToken struct {
// 分词在文本中的起始位置
Start int
End int

Position int
Len int

Type string

Text string
Freq float64
Pos string
}

// Segment 文本中的一个分词
type Segment struct {
// 分词在文本中的起始字节位置
start int

// 分词在文本中的结束字节位置(不包括该位置)
end int

Position int

// 分词信息
token *Token
}

// Start 返回分词在文本中的起始字节位置
func (s *Segment) Start() int {
return s.start
}

// End 返回分词在文本中的结束字节位置(不包括该位置)
func (s *Segment) End() int {
return s.end
}

// Token 返回分词信息
func (s *Segment) Token() *Token {
return s.token
}

// Text 字串类型,可以用来表达
// 1. 一个字元,比如 "世" 又如 "界", 英文的一个字元是一个词
// 2. 一个分词,比如 "世界" 又如 "人口"
Expand Down

0 comments on commit be4ddfd

Please sign in to comment.