Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update users channels and messages via API #102

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 168 additions & 0 deletions internal/slacklog/api.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
package slacklog

import (
"encoding/json"
"fmt"
"log"
"net/http"
"net/url"
"os"
"path/filepath"
)

func reverse(slice []interface{}) {
halfLen := len(slice) / 2
for i := 0; i < halfLen; i++ {
j := len(slice) - i - 1
slice[i], slice[j] = slice[j], slice[i]
}
}

func DownloadEntitiesToFile(slackToken string, apiMethod string, extraParams map[string]string, jsonKey string, isReverse bool, destJSONFilePath string) error {
results, err := getPaginatedEntites(slackToken, apiMethod, extraParams, jsonKey, isReverse)
if err != nil {
return err
}

if len(results) == 0 {
return nil
}

destDir := filepath.Dir(destJSONFilePath)
err = os.MkdirAll(destDir, 0777)
if err != nil {
return err
}

w, err := os.Create(destJSONFilePath)
if err != nil {
return err
}
defer w.Close()

encoder := json.NewEncoder(w)
encoder.SetEscapeHTML(false)
err = encoder.Encode(&results)
if err != nil {
return err
}

return nil
}

func getPaginatedEntites(slackToken string, apiMethod string, extraParams map[string]string, jsonKey string, isReverse bool) ([]interface{}, error) {
// Slack API の pagination の仕様: https://api.slack.com/docs/pagination
var results []interface{}
var nextCursor string

pageNum := 1
for {
var json map[string]interface{}

log.Printf("GET %v page %d of %s ... ", extraParams, pageNum, apiMethod)
params := map[string]string{
"token": slackToken,
"cursor": nextCursor,
}
for key, value := range extraParams {
params[key] = value
}
err := httpGetJSON("https://slack.com/api/"+apiMethod, params, &json)
if err != nil {
return nil, err
}

responseOK := true
okResponseValue, ok := json["ok"]
if ok {
responseOK, ok = okResponseValue.(bool)
if !ok {
responseOK = false
}
} else {
responseOK = false
}
if !responseOK {
errorMessage, ok := json["error"].(string)
if ok {
return nil, fmt.Errorf("Error response: %s", errorMessage)
}
return nil, fmt.Errorf("Error response: %v", json)
}

targetItem, ok := json[jsonKey]
if !ok {
return nil, fmt.Errorf("Key not found in response:" + jsonKey)
}
list, ok := targetItem.([]interface{})
if !ok {
return nil, fmt.Errorf("Unknown type of value:" + jsonKey)
}

log.Printf("fetched %s count: %d\n", jsonKey, len(list))

results = append(results, list...)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

patination が必要だと考えてるものを、メモリ上で全部合成するのはあんま嬉しくなさげ。
以下みたいに都度ファイルに出力してしまったほうが良いのでは?

もしくはドキュメントコメントにあんま大きなものは取らないで、みたいに書くべきか?

[
{"entity": "123456"},
{"entity": "123456"},
{"entity": "123456"},

{"entity": "123456"},
{"entity": "123456"},
{"entity": "123456"},

]

reverse との兼ね合いはあるが…そもそもJSONに書き出す段階で reverse する=順序を気にする必要ってなんなんだろう?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

どうせ読み込む時は全部メモリに読みますが…しかも全期間分のものを同時に。ダウンロード時にだけそこを気にする必要ありますかね?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

いまここから手を付けておけばその方向性が示せますよね。
generatorも今の全部メモリに乗せる方式は破綻が見えてるので、直さなきゃいけないですから。

downloaderは SQLiteとかのローカルのDBに逐次乗せる。
generatorはそれを切り出して1ページずつHTML生成
みたいなのが目指すべきゴールかなと。

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

generator 側はともかく、こちらは1チャンネル/1日分です。1チャンネルの1日分のログのサイズはたかが知れていると思っているんですが(実際に 400 件以上の発言があった場合でも JSON で 300KB に満たない、メモリに展開しても 1MB も取ることはなさそう)、そこを今頑張る必要あるでしょうか?
この部分は過去の全ログを処理する generator と違って、常に1日分なので今後ログが蓄積していっても処理する量が増えて行ったりはしません。
メモリも基本的に潤沢な今の時代にそこを気にするのは早すぎる最適化を感じてしまいます。


nextCursor = getNextCursor(json)
if nextCursor == "" {
break
}
pageNum += 1
}

if isReverse {
reverse(results)
}

return results, nil
}

func getNextCursor(json map[string]interface{}) string {
metadataValue, ok := json["response_metadata"]
if !ok {
return ""
}
metadata, ok := metadataValue.(map[string]interface{})
if !ok {
return ""
}
nextCursorValue, ok := metadata["next_cursor"]
if !ok {
return ""
}
nextCursor, ok := nextCursorValue.(string)
if !ok {
return ""
}
return nextCursor
}

func httpGetJSON(rawurl string, params map[string]string, dst interface{}) error {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

APIの汎用性を考えると paramsmap[string][]string のが良いかもしれない。
同じ名前の query string ってなんだよって思うけれど url.Values はそれを考慮した設計になってるし、
まれにそういうAPIもある。

当然 values.Set ではなくて values.Add で地道に構築する形になる。

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

そのケースは使わないだろうと判断して省略しました。

url, err := url.Parse(rawurl)
if err != nil {
return err
}

values := url.Query()
for name, value := range params {
values.Set(name, value)
}
url.RawQuery = values.Encode()

resp, err := http.Get(url.String())
if err != nil {
return err
}
defer resp.Body.Close()

if resp.StatusCode/100 != 2 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

これ(特にGET)は普通に 200 で比較したほうが良いんじゃないかな?

Suggested change
if resp.StatusCode/100 != 2 {
if resp.StatusCode == http.StatusOK {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

整数除算なので、

if resp.StatusCode < 200 || resp.StatusCode >= 300 {

が同等なコードですね

Copy link
Member

@koron koron May 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Getで 201(created) や 204(no content) って明らかに異常な状態なので
それを十把一絡げで扱うのはよろしくないって話でもあるのです。

なので 200 (OK) でダイレクト比較でほかのケースは落とすのが良いです。

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

なるほど、たしかにそうですね

return fmt.Errorf("[%s]: %s", resp.Status, url.String())
}

err = json.NewDecoder(resp.Body).Decode(dst)
if err != nil {
return err
}

return nil
}
60 changes: 60 additions & 0 deletions subcmd/fetch_messages.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package subcmd

import (
"fmt"
"os"
"path/filepath"
"strconv"
"time"

"github.com/vim-jp/slacklog-generator/internal/slacklog"
)

func FetchMessages(args []string) error {
slackToken := os.Getenv("SLACK_TOKEN")
if slackToken == "" {
return fmt.Errorf("$SLACK_TOKEN required")
}

if len(args) < 2 {
fmt.Println("Usage: go run scripts/main.go update-messages {data-dir} {yyyy-mm-dd}")
return nil
}

dataDir := filepath.Clean(args[0])

loc, err := time.LoadLocation("Asia/Tokyo")
if err != nil {
return err
}

date, err := time.ParseInLocation("2006-01-02", args[1], loc)
if err != nil {
return err
}
dateEndTime := date.AddDate(0, 0, 1)

channelsJSONPath := filepath.Join(dataDir, "channels.json")
channelTable, err := slacklog.NewChannelTable(channelsJSONPath, []string{"*"})
if err != nil {
return err
}

for _, channel := range channelTable.Channels {
channelDir := filepath.Join(dataDir, channel.ID)

extraParams := map[string]string{
"channel": channel.ID,
"oldest": strconv.FormatInt(date.Unix(), 10) + ".000000",
"latest": strconv.FormatInt(dateEndTime.Unix(), 10) + ".000000",
"limit": "200",
}
outFile := filepath.Join(channelDir, date.Format("2006-01-02")+".json")
err := slacklog.DownloadEntitiesToFile(slackToken, "conversations.history", extraParams, "messages", true, outFile)
if err != nil {
return err
}
}

return nil
}
11 changes: 10 additions & 1 deletion subcmd/subcmd.go
Original file line number Diff line number Diff line change
@@ -13,7 +13,10 @@ func Run() error {
convert-exported-logs
download-emoji
download-files
generate-html`)
fetch-messages
generate-html
update-channel-list
update-user-list`)
return nil
}

@@ -26,8 +29,14 @@ func Run() error {
return DownloadEmoji(args)
case "download-files":
return DownloadFiles(args)
case "fetch-messages":
return FetchMessages(args)
case "generate-html":
return GenerateHTML(args)
case "update-channel-list":
return UpdateChannelList(args)
case "update-user-list":
return UpdateUserList(args)
}

return fmt.Errorf("unknown subcmd: %s", subCmdName)
30 changes: 30 additions & 0 deletions subcmd/update_channel_list.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package subcmd

import (
"fmt"
"os"
"path/filepath"

"github.com/vim-jp/slacklog-generator/internal/slacklog"
)

func UpdateChannelList(args []string) error {
slackToken := os.Getenv("SLACK_TOKEN")
if slackToken == "" {
return fmt.Errorf("$SLACK_TOKEN required")
}

if len(args) < 1 {
fmt.Println("Usage: go run scripts/main.go update-channel-list {out-file}")
return nil
}

outFile := filepath.Clean(args[0])

err := slacklog.DownloadEntitiesToFile(slackToken, "conversations.list", nil, "channels", false, outFile)
if err != nil {
return err
}

return nil
}
30 changes: 30 additions & 0 deletions subcmd/update_user_list.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package subcmd

import (
"fmt"
"os"
"path/filepath"

"github.com/vim-jp/slacklog-generator/internal/slacklog"
)

func UpdateUserList(args []string) error {
slackToken := os.Getenv("SLACK_TOKEN")
if slackToken == "" {
return fmt.Errorf("$SLACK_TOKEN required")
}

if len(args) < 1 {
fmt.Println("Usage: go run scripts/main.go update-user-list {out-file}")
return nil
}

outFile := filepath.Clean(args[0])

err := slacklog.DownloadEntitiesToFile(slackToken, "users.list", nil, "members", false, outFile)
if err != nil {
return err
}

return nil
}