-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathinsert_txt.go
147 lines (126 loc) · 3.37 KB
/
insert_txt.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
package main
import (
"context"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"github.com/jackc/pgx/v5"
)
var DATABASE_URL string = orDefault(
os.Getenv("DATABASE_URL"),
"postgres://postgres:Postgres2022!@localhost:5432/scrapjobs")
// var nFlag *int = flag.Int("n", 1234, "help message for flag n")
var fromFlag *string = flag.String("from", "", "Insert from this folder")
var truncateFlag *bool = flag.Bool("truncate-table", false, "Truncate the table before inserting the new data")
var updateNewFlag *bool = flag.Bool("update-tags", false, "Delete \"new\" tag from existing entries before adding the new entries")
var dryRunFlag *bool = flag.Bool("dry-run", true, "Do not do anything, print what whould be done instead")
func orDefault(s, def string) string {
if s == "" {
return def
}
return s
}
type Jobs struct {
Title string `json:"title"`
Descrip string `json:"descrip"`
Url string `json:"url"`
Tags []string `json:"tags"`
Metadata map[string]string `json:"metadata"`
}
func readJobs() *[]Jobs {
var err error
var jsonFiles []string
var jobs []Jobs
if *fromFlag != "" {
jsonFiles, err = filepath.Glob(fmt.Sprintf("%s/*.json", *fromFlag))
if err != nil {
panic(err)
}
jobs = make([]Jobs, len(jsonFiles))
} else {
// Read json from stdin
log.Printf("Reading JSON jobs from the stdin\n")
bytes, err := io.ReadAll(os.Stdin)
if err != nil {
panic(err)
}
if err := json.Unmarshal(bytes, &jobs); err != nil {
panic(err)
}
}
for i, val := range jsonFiles {
jsonFile, err := os.Open(val)
if err != nil {
log.Printf("JSON file error: %s: '%s'\n", err, val)
continue
}
defer jsonFile.Close()
bytes, err := io.ReadAll(jsonFile)
if err := json.Unmarshal(bytes, &jobs[i]); err != nil {
panic(err)
}
}
return &jobs
}
func main() {
flag.Parse()
log.Printf("Connecting to the db\n")
conn, err := pgx.Connect(context.Background(), DATABASE_URL)
if err != nil {
panic(err)
}
var version string
err = conn.QueryRow(context.Background(),
"select version()").Scan(&version)
if err != nil {
log.Fatalf("QueryRow failed: %v\n", err)
}
log.Printf("Version: %s\n", version)
defer conn.Close(context.Background())
// Truncate the table
if *truncateFlag {
log.Printf("Truncating the table")
if !*dryRunFlag {
if _, err = conn.Exec(context.Background(), "truncate table jobs"); err != nil {
log.Panic(err)
}
} else {
log.Printf("Wound truncate the jobs table")
}
}
if *updateNewFlag {
log.Printf("Updating \"new\" tags")
qry := "update jobs set tags = array_remove(tags, 'new')"
if !*dryRunFlag {
if _, err = conn.Exec(context.Background(), qry); err != nil {
log.Panic(err)
}
} else {
log.Printf("Wound update the tags: %s", qry)
}
}
for _, data := range *readJobs() {
// Append the *new* tag for the new imported data
var tags = append(data.Tags, "new")
if !*dryRunFlag {
conn.Exec(context.Background(),
`INSERT INTO jobs (title, descrip, url, tags, metadata)
VALUES($1, $2, $3, $4, $5)
ON CONFLICT (url) DO UPDATE
SET title = $1,
descrip = $2,
tags = array_remove($4, 'new'),
metadata = $5
`,
data.Title, data.Descrip, data.Url, tags, data.Metadata)
fmt.Println(data.Title, "inserted")
} else {
log.Printf("Wound insert or update the job: %s", data.Title)
}
}
os.Exit(0)
}