feat: initial version

This commit is contained in:
2020-02-07 20:59:25 +01:00
commit 3aaa69175c
12 changed files with 19277 additions and 0 deletions
+11
View File
@@ -0,0 +1,11 @@
root = true
[*]
end_of_line = lf
insert_final_newline = true
charset = utf-8
trim_trailing_whitespace = true
[*.go]
indent_style = tab
indent_size = 2
+1
View File
@@ -0,0 +1 @@
.idea
+33
View File
@@ -0,0 +1,33 @@
variables:
DOCKER_HOST: tcp://docker:2375
DOCKER_DRIVER: overlay2
stages:
- build
- deploy-prod
image: buildtool/build-tools:0.0.13
services:
- docker:dind
build:
stage: build
script:
- build
- push
artifacts:
paths:
- release/
- coverage.html
- k8s
deploy-prod:
stage: deploy-prod
before_script:
- echo Deploy to prod
script:
- deploy prod
only:
- master
environment:
name: prod
+24
View File
@@ -0,0 +1,24 @@
FROM golang:1.13 as build
WORKDIR /build
ENV CGO_ENABLED=0
ADD . /build
RUN if [ $(go mod tidy -v 2>&1 | grep -c unused) != 0 ]; then echo "Unused modules, please run 'go mod tidy'"; exit 1; fi
RUN go fmt ./...
RUN go vet ./...
RUN CGO_ENABLED=1 go test -mod=readonly -race -coverprofile=.testCoverage.txt.tmp -covermode=atomic -coverpkg=$(go list ./... | tr '\n' , | sed 's/,$//') ./...
RUN cat .testCoverage.txt.tmp | grep -v generated.go | grep -v _gen.go > .testCoverage.txt
RUN go tool cover -html=.testCoverage.txt -o coverage.html
RUN go tool cover -func=.testCoverage.txt
RUN GOOS=linux GOARCH=amd64 go build \
-tags prod \
-a -installsuffix cgo \
-mod=readonly \
-o /release/dancefetcher \
-ldflags '-w -s' \
./cmd/dancefetcher/dancefetcher.go
FROM scratch
COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
COPY --from=build /release/dancefetcher /
CMD ["/dancefetcher"]
+16
View File
@@ -0,0 +1,16 @@
package main
import (
"fmt"
"gitlab.com/unboundsoftware/dancefinder/dancefetcher/pkg"
"os"
)
var exitFunc = os.Exit
func main() {
if err := pkg.Start(); err != nil {
fmt.Printf("%v\n", err)
exitFunc(1)
}
}
+11
View File
@@ -0,0 +1,11 @@
module gitlab.com/unboundsoftware/dancefinder/dancefetcher
go 1.13
require (
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d // indirect
github.com/gocolly/colly/v2 v2.0.1
github.com/goodsign/monday v0.0.0-20191222141057-7672e75c119d
gopkg.in/alecthomas/kingpin.v2 v2.2.6
)
+66
View File
@@ -0,0 +1,66 @@
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d h1:UQZhZ2O0vMHr2cI+DC1Mbh0TJxzA3RcLoMsFw+aXw7E=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/antchfx/htmlquery v1.0.0 h1:O5IXz8fZF3B3MW+B33MZWbTHBlYmcfw0BAxgErHuaMA=
github.com/antchfx/htmlquery v1.0.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8=
github.com/antchfx/xmlquery v1.0.0 h1:YuEPqexGG2opZKNc9JU3Zw6zFXwC47wNcy6/F8oKsrM=
github.com/antchfx/xmlquery v1.0.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk=
github.com/antchfx/xpath v1.0.0 h1:Q5gFgh2O40VTSwMOVbFE7nFNRBu3tS21Tn0KAWeEjtk=
github.com/antchfx/xpath v1.0.0/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
github.com/gocolly/colly/v2 v2.0.1 h1:GGPzBEdrEsavhzVK00FQXMMHBHRpwrbbCCcEKM/0Evw=
github.com/gocolly/colly/v2 v2.0.1/go.mod h1:ePrRZlJcLTU2C/f8pJzXfkdBtBDHL5hOaKLcBoiJcq8=
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/goodsign/monday v0.0.0-20191222141057-7672e75c119d h1:5URHmFBBzalNLBkj/RSv5NG2rnojpuA1doVnE+rrx/I=
github.com/goodsign/monday v0.0.0-20191222141057-7672e75c119d/go.mod h1:u85tpSNZDCgHeHt42TxSKGXsjpz+bhxvRbSkLpewiaA=
github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg=
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA=
github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80 h1:Ao/3l156eZf2AW5wK8a7/smtodRU+gha3+BeqJ69lRk=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I=
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+16
View File
@@ -0,0 +1,16 @@
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: dancefetcher
spec:
schedule: '0 0 * * *'
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
spec:
containers:
- name: dancefetcher
imagePullPolicy: Always
image: registry.gitlab.com/unboundsoftware/dancefinder/dancefetcher:${COMMIT}
restartPolicy: OnFailure
+234
View File
@@ -0,0 +1,234 @@
package pkg
import (
"bytes"
"encoding/json"
"fmt"
"github.com/gocolly/colly/v2"
"github.com/goodsign/monday"
"gopkg.in/alecthomas/kingpin.v2"
"io/ioutil"
"net/http"
"regexp"
"sort"
"strconv"
"strings"
"time"
)
func Start() error {
api := kingpin.Flag("api", "URL to use to connect to Dancefinder API").Envar("API").Default("http://dancefinder").String()
kingpin.Parse()
fmt.Printf("Will push new events to '%s'\n", *api)
urls := []string{
"http://www.danslogen.se/dansprogram/lan/blekinge",
"http://www.danslogen.se/dansprogram/lan/dalarna",
"http://www.danslogen.se/dansprogram/lan/gotland",
"http://www.danslogen.se/dansprogram/lan/gavleborg",
"http://www.danslogen.se/dansprogram/lan/halland",
"http://www.danslogen.se/dansprogram/lan/jamtland",
"http://www.danslogen.se/dansprogram/lan/jonkoping",
"http://www.danslogen.se/dansprogram/lan/kalmar",
"http://www.danslogen.se/dansprogram/lan/kronoberg",
"http://www.danslogen.se/dansprogram/lan/norrbotten",
"http://www.danslogen.se/dansprogram/lan/skane",
"http://www.danslogen.se/dansprogram/lan/stockholm",
"http://www.danslogen.se/dansprogram/lan/sodermanland",
"http://www.danslogen.se/dansprogram/lan/uppsala",
"http://www.danslogen.se/dansprogram/lan/varmland",
"http://www.danslogen.se/dansprogram/lan/vasterbotten",
"http://www.danslogen.se/dansprogram/lan/vasternorrland",
"http://www.danslogen.se/dansprogram/lan/vastmanland",
"http://www.danslogen.se/dansprogram/lan/vasta_gotalan",
"http://www.danslogen.se/dansprogram/lan/orebro",
"http://www.danslogen.se/dansprogram/lan/ostergotland",
"http://www.danslogen.se/dansprogram/batar",
}
var events []Event
for _, url := range urls {
if e, err := parse(url); err != nil {
return err
} else {
events = append(events, e...)
}
}
if len(events) > 0 {
sort.SliceStable(events, func(i, j int) bool {
return time.Time(events[i].Date).Before(time.Time(events[j].Date))
})
data, err := json.Marshal(&Events{
Start: time.Time(events[0].Date).Format("2006-01-02"),
Events: events,
})
buff := bytes.NewBuffer(data)
response, err := http.Post(fmt.Sprintf("%s/event", *api), "application/json", buff)
if err != nil {
fmt.Printf("Error: %+v\n", err)
return err
}
if response.StatusCode != 200 {
body, _ := ioutil.ReadAll(response.Body)
fmt.Printf("Error: %+v\n", response.Status)
fmt.Printf("Body: %+v\n", string(body))
}
}
return nil
}
func parse(url string, opts ...colly.CollectorOption) ([]Event, error) {
fmt.Printf("Parsing %s\n", url)
collector := colly.NewCollector(opts...)
var ignoredStyles []string
empty := false
collector.OnHTML("style", func(e *colly.HTMLElement) {
r := regexp.MustCompile(`tr\.(?P<style>.*) {.*font-size: 0`)
parts := strings.Split(e.Text, "\n")
for _, part := range parts {
if r.MatchString(part) {
ignoredStyles = append(ignoredStyles, r.FindStringSubmatch(part)[1])
}
}
})
collector.OnHTML("h2", func(e *colly.HTMLElement) {
if e.Text == "Inga danser funna" {
empty = true
}
})
var headers []string
var events []Event
now := time.Now()
collector.OnHTML("table tr", func(e *colly.HTMLElement) {
if !empty {
if len(headers) == 0 {
e.ForEach("th", func(i int, cell *colly.HTMLElement) {
if len(strings.TrimSpace(cell.Text)) > 0 {
headers = append(headers, strings.TrimSpace(cell.Text))
if cell.Attr("colspan") == "2" {
headers = append(headers, strings.TrimSpace(cell.Text))
}
}
})
} else {
class := e.Attr("class")
if len(class) == 0 || !ignored(class, ignoredStyles) {
var parts []string
e.ForEachWithBreak("td", func(i int, cell *colly.HTMLElement) bool {
if len(cell.Attr("colspan")) > 0 {
dateString := strings.TrimSpace(cell.Text)
date, err := monday.ParseInLocation("January", dateString, now.Location(), monday.LocaleSvSE)
if err != nil {
date, err = monday.ParseInLocation("January 2006", dateString, now.Location(), monday.LocaleSvSE)
if err != nil {
return false
} else {
now = time.Date(date.Year(), date.Month(), 1, 0, 0, 0, 0, now.Location())
}
} else {
now = time.Date(now.Year(), date.Month(), 1, 0, 0, 0, 0, now.Location())
}
return false
} else {
parts = append(parts, cell.Text)
}
return true
})
if len(parts) > 0 {
var fields []string
for i, p := range parts {
if i <= 2 || i == len(parts)-1 || len(p) != 0 {
fields = append(fields, p)
}
}
if len(fields) >= 3 {
band := value("Dansband", headers, fields)
if regexp.MustCompile(`.*\d\.\d.*`).MatchString(band) {
fields = append(fields[:2], fields[3:]...)
}
date := value("Datum", headers, fields)
day, err := strconv.Atoi(date)
if err != nil {
return
}
event := Event{
Date: LocalDate(time.Date(now.Year(), now.Month(), day, 0, 0, 0, 0, now.Location())),
Time: strings.ReplaceAll(strings.ReplaceAll(value("Tid", headers, fields), " ", ":"), ".", ":"),
Band: value("Dansband", headers, fields),
Place: value("Dansställe", headers, fields),
City: value("Ort", headers, fields),
Municipality: value("Kommun", headers, fields),
State: value("Län", headers, fields),
Extra: value("Övrigt", headers, fields),
}
events = append(events, event)
}
}
}
}
}
})
if err := collector.Visit(url); err != nil {
return nil, err
}
collector.Wait()
return events, nil
}
func value(name string, headers, values []string) string {
for i, h := range headers {
if h == name {
if name == "Datum" {
return strings.TrimSpace(values[i+1])
}
return strings.TrimSpace(values[i])
}
}
return ""
}
func ignored(class string, styles []string) bool {
for _, s := range styles {
if class == s {
return true
}
}
return false
}
type Events struct {
Start string `json:"start"`
Events []Event `json:"events"`
}
type Event struct {
Date LocalDate `json:"date"`
Time string `json:"time"`
Band string `json:"band"`
Place string `json:"place"`
City string `json:"city"`
Municipality string `json:"municipality"`
State string `json:"state"`
Extra string `json:"extra"`
}
func (e *Event) String() string {
return fmt.Sprintf(`{Date: date("%s"), Time: "%s", Band: "%s", Place: "%s", City: "%s", Municipality: "%s", State: "%s", Extra: "%s"}`, time.Time(e.Date).Format("2006-01-02"), e.Time, e.Band, e.Place, e.City, e.Municipality, e.State, e.Extra)
}
type LocalDate time.Time
func (l LocalDate) MarshalJSON() ([]byte, error) {
str := l.Format()
return json.Marshal(str)
}
func (l LocalDate) Format() string {
return time.Time(l).Format("2006-01-02")
}
+1017
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff