Skip to content

Commit 2aa9dac

Browse files
committed
Rewrite insert_txt.py in go
1 parent de154e6 commit 2aa9dac

File tree

5 files changed

+153
-39
lines changed

5 files changed

+153
-39
lines changed

insert_txt.py

-39
This file was deleted.

tools/README.md

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# insert_txt.go
2+
3+
Small tool to insert the output of the scrappers (in the `../output/`
4+
folder) into the database.
5+
6+
Reads the database url from the `DATABASE_URL` environment variable.

tools/go.mod

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
module tools
2+
3+
go 1.22.4
4+
5+
require (
6+
github.com/jackc/pgx v3.6.2+incompatible
7+
github.com/jackc/pgx/v5 v5.7.1
8+
)
9+
10+
require (
11+
github.com/cockroachdb/apd v1.1.0 // indirect
12+
github.com/gofrs/uuid v4.4.0+incompatible // indirect
13+
github.com/jackc/fake v0.0.0-20150926172116-812a484cc733 // indirect
14+
github.com/jackc/pgpassfile v1.0.0 // indirect
15+
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
16+
github.com/lib/pq v1.10.9 // indirect
17+
github.com/pkg/errors v0.9.1 // indirect
18+
github.com/shopspring/decimal v1.4.0 // indirect
19+
golang.org/x/crypto v0.27.0 // indirect
20+
golang.org/x/text v0.18.0 // indirect
21+
)

tools/go.sum

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I=
2+
github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ=
3+
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
5+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
6+
github.com/gofrs/uuid v4.4.0+incompatible h1:3qXRTX8/NbyulANqlc0lchS1gqAVxRgsuW1YrTJupqA=
7+
github.com/gofrs/uuid v4.4.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
8+
github.com/jackc/fake v0.0.0-20150926172116-812a484cc733 h1:vr3AYkKovP8uR8AvSGGUK1IDqRa5lAAvEkZG1LKaCRc=
9+
github.com/jackc/fake v0.0.0-20150926172116-812a484cc733/go.mod h1:WrMFNQdiFJ80sQsxDoMokWK1W5TQtxBFNpzWTD84ibQ=
10+
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
11+
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
12+
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
13+
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
14+
github.com/jackc/pgx v3.6.2+incompatible h1:2zP5OD7kiyR3xzRYMhOcXVvkDZsImVXfj+yIyTQf3/o=
15+
github.com/jackc/pgx v3.6.2+incompatible/go.mod h1:0ZGrqGqkRlliWnWB4zKnWtjbSWbGkVEFm4TeybAXq+I=
16+
github.com/jackc/pgx/v5 v5.7.1 h1:x7SYsPBYDkHDksogeSmZZ5xzThcTgRz++I5E+ePFUcs=
17+
github.com/jackc/pgx/v5 v5.7.1/go.mod h1:e7O26IywZZ+naJtWWos6i6fvWK+29etgITqrqHLfoZA=
18+
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
19+
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
20+
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
21+
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
22+
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
23+
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
24+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
25+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
26+
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
27+
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
28+
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
29+
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
30+
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
31+
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
32+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
33+
golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A=
34+
golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70=
35+
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
36+
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
37+
golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224=
38+
golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
39+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
40+
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
41+
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
42+
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

tools/insert_txt.go

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"flag"
7+
"fmt"
8+
"io"
9+
"log"
10+
"os"
11+
"path/filepath"
12+
13+
"github.com/jackc/pgx/v5"
14+
)
15+
16+
var DATABASE_URL string = orDefault(
17+
os.Getenv("DATABASE_URL"),
18+
"postgres://postgres:Postgres2022!@localhost:5432/scrapjobs")
19+
20+
// var nFlag *int = flag.Int("n", 1234, "help message for flag n")
21+
22+
func orDefault(s, def string) string {
23+
if s == "" {
24+
return def
25+
}
26+
27+
return s
28+
}
29+
30+
func main() {
31+
flag.Parse()
32+
33+
conn, err := pgx.Connect(context.Background(), DATABASE_URL)
34+
if err != nil {
35+
panic(err)
36+
}
37+
log.Printf("Connected!!!\n")
38+
39+
var version string
40+
err = conn.QueryRow(context.Background(),
41+
"select version()").Scan(&version)
42+
if err != nil {
43+
fmt.Fprintf(os.Stderr, "QueryRow failed: %v\n", err)
44+
os.Exit(1)
45+
}
46+
fmt.Printf("Version: %s\n", version)
47+
defer conn.Close(context.Background())
48+
49+
var matches []string
50+
if matches, err = filepath.Glob("../output/*.json"); err != nil {
51+
panic(err)
52+
}
53+
for _, val := range matches {
54+
jsonFile, err := os.Open(val)
55+
if err != nil {
56+
continue
57+
}
58+
defer jsonFile.Close()
59+
60+
type Jobs struct {
61+
Title string `json:"title"`
62+
Descrip string `json:"descrip"`
63+
Url string `json:"url"`
64+
Tags []string `json:"tags"`
65+
}
66+
67+
bytes, err := io.ReadAll(jsonFile)
68+
var data Jobs
69+
json.Unmarshal(bytes, &data)
70+
71+
conn.Exec(context.Background(),
72+
"INSERT INTO jobs (title, descrip, url, tags) VALUES($1, $2, $3, $4)",
73+
data.Title, data.Descrip, data.Url, data.Tags)
74+
75+
fmt.Println(data.Title, "inserted")
76+
}
77+
78+
os.Exit(0)
79+
80+
// cleanup the jobs table
81+
//conn.Exec(context.Background(),
82+
// "truncate jobs")
83+
84+
}

0 commit comments

Comments
 (0)