232 lines
6.8 KiB
Go
232 lines
6.8 KiB
Go
package pkg
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"regexp"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/alecthomas/kingpin/v2"
|
|
"github.com/gocolly/colly/v2"
|
|
"github.com/goodsign/monday"
|
|
)
|
|
|
|
func Start() error {
|
|
api := kingpin.Flag("api", "URL to use to connect to Dancefinder API").Envar("API").Default("http://dancefinder").String()
|
|
kingpin.Parse()
|
|
|
|
fmt.Printf("Will push new events to '%s'\n", *api)
|
|
|
|
urls := []string{
|
|
"http://www.danslogen.se/dansprogram/lan/blekinge",
|
|
"http://www.danslogen.se/dansprogram/lan/dalarna",
|
|
"http://www.danslogen.se/dansprogram/lan/gotland",
|
|
"http://www.danslogen.se/dansprogram/lan/gavleborg",
|
|
"http://www.danslogen.se/dansprogram/lan/halland",
|
|
"http://www.danslogen.se/dansprogram/lan/jamtland",
|
|
"http://www.danslogen.se/dansprogram/lan/jonkoping",
|
|
"http://www.danslogen.se/dansprogram/lan/kalmar",
|
|
"http://www.danslogen.se/dansprogram/lan/kronoberg",
|
|
"http://www.danslogen.se/dansprogram/lan/norrbotten",
|
|
"http://www.danslogen.se/dansprogram/lan/skane",
|
|
"http://www.danslogen.se/dansprogram/lan/stockholm",
|
|
"http://www.danslogen.se/dansprogram/lan/sodermanland",
|
|
"http://www.danslogen.se/dansprogram/lan/uppsala",
|
|
"http://www.danslogen.se/dansprogram/lan/varmland",
|
|
"http://www.danslogen.se/dansprogram/lan/vasterbotten",
|
|
"http://www.danslogen.se/dansprogram/lan/vasternorrland",
|
|
"http://www.danslogen.se/dansprogram/lan/vastmanland",
|
|
"http://www.danslogen.se/dansprogram/lan/vasta_gotalan",
|
|
"http://www.danslogen.se/dansprogram/lan/orebro",
|
|
"http://www.danslogen.se/dansprogram/lan/ostergotland",
|
|
"http://www.danslogen.se/dansprogram/batar",
|
|
}
|
|
|
|
var events []Event
|
|
for _, url := range urls {
|
|
if e, err := parse(url); err != nil {
|
|
return err
|
|
} else {
|
|
events = append(events, e...)
|
|
}
|
|
}
|
|
|
|
if len(events) > 0 {
|
|
sort.SliceStable(events, func(i, j int) bool {
|
|
return time.Time(events[i].Date).Before(time.Time(events[j].Date))
|
|
})
|
|
|
|
data, err := json.Marshal(&Events{
|
|
Start: events[0].Date,
|
|
Events: events,
|
|
})
|
|
buff := bytes.NewBuffer(data)
|
|
response, err := http.Post(fmt.Sprintf("%s/event", *api), "application/json", buff)
|
|
if err != nil {
|
|
fmt.Printf("Error: %+v\n", err)
|
|
return err
|
|
}
|
|
if response.StatusCode != 200 {
|
|
body, _ := ioutil.ReadAll(response.Body)
|
|
fmt.Printf("Error: %+v\n", response.Status)
|
|
fmt.Printf("Body: %+v\n", string(body))
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func parse(url string, opts ...colly.CollectorOption) ([]Event, error) {
|
|
fmt.Printf("Parsing %s\n", url)
|
|
|
|
collector := colly.NewCollector(opts...)
|
|
|
|
var ignoredStyles []string
|
|
empty := false
|
|
collector.OnHTML("style", func(e *colly.HTMLElement) {
|
|
r := regexp.MustCompile(`tr\.(?P<style>.*) {.*font-size: 0`)
|
|
parts := strings.Split(e.Text, "\n")
|
|
for _, part := range parts {
|
|
if r.MatchString(part) {
|
|
ignoredStyles = append(ignoredStyles, r.FindStringSubmatch(part)[1])
|
|
}
|
|
}
|
|
})
|
|
collector.OnHTML("h2", func(e *colly.HTMLElement) {
|
|
if e.Text == "Inga danser hittades" {
|
|
empty = true
|
|
}
|
|
})
|
|
var headers []string
|
|
var events []Event
|
|
now := time.Now()
|
|
collector.OnHTML("table tr", func(e *colly.HTMLElement) {
|
|
if !empty {
|
|
if len(headers) == 0 {
|
|
e.ForEach("th", func(i int, cell *colly.HTMLElement) {
|
|
headers = append(headers, strings.TrimSpace(cell.Text))
|
|
if cell.Attr("colspan") == "2" {
|
|
headers = append(headers, strings.TrimSpace(cell.Text))
|
|
}
|
|
})
|
|
} else {
|
|
class := e.Attr("class")
|
|
if len(class) == 0 || !ignored(class, ignoredStyles) {
|
|
var parts []string
|
|
e.ForEachWithBreak("td", func(i int, cell *colly.HTMLElement) bool {
|
|
if len(cell.Attr("colspan")) > 0 {
|
|
re := regexp.MustCompile(`\s+`)
|
|
dateString := re.ReplaceAllString(strings.TrimSpace(cell.Text), " ")
|
|
date, err := monday.ParseInLocation("January", dateString, now.Location(), monday.LocaleSvSE)
|
|
if err != nil {
|
|
date, err = monday.ParseInLocation("January 2006", dateString, now.Location(), monday.LocaleSvSE)
|
|
if err != nil {
|
|
return false
|
|
} else {
|
|
now = time.Date(date.Year(), date.Month(), 1, 0, 0, 0, 0, now.Location())
|
|
}
|
|
} else {
|
|
now = time.Date(now.Year(), date.Month(), 1, 0, 0, 0, 0, now.Location())
|
|
}
|
|
return false
|
|
} else {
|
|
parts = append(parts, cell.Text)
|
|
}
|
|
return true
|
|
})
|
|
if len(parts) > 0 {
|
|
fields := parts
|
|
|
|
if len(fields) >= 3 {
|
|
band := value("Dansband", headers, fields)
|
|
if regexp.MustCompile(`.*\d\.\d.*`).MatchString(band) {
|
|
fmt.Printf("Removing field 2\n")
|
|
fields = append(fields[:2], fields[3:]...)
|
|
}
|
|
date := value("Datum", headers, fields)
|
|
day, err := strconv.Atoi(date)
|
|
if err != nil {
|
|
fmt.Printf("Unable to parse '%s' as an int\n", date)
|
|
return
|
|
}
|
|
event := Event{
|
|
Date: LocalDate(time.Date(now.Year(), now.Month(), day, 0, 0, 0, 0, now.Location())),
|
|
Time: strings.ReplaceAll(strings.ReplaceAll(value("Tid", headers, fields), " ", ":"), ".", ":"),
|
|
Band: value("Dansband", headers, fields),
|
|
Place: value("Dansställe", headers, fields),
|
|
City: value("Ort", headers, fields),
|
|
Municipality: value("Kommun", headers, fields),
|
|
State: value("Län", headers, fields),
|
|
Extra: value("Övrigt", headers, fields),
|
|
}
|
|
events = append(events, event)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
})
|
|
if err := collector.Visit(url); err != nil {
|
|
return nil, err
|
|
}
|
|
collector.Wait()
|
|
return events, nil
|
|
}
|
|
|
|
func value(name string, headers, values []string) string {
|
|
for i, h := range headers {
|
|
if h == name {
|
|
if name == "Datum" {
|
|
return strings.TrimSpace(values[i+1])
|
|
}
|
|
return strings.TrimSpace(values[i])
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func ignored(class string, styles []string) bool {
|
|
for _, s := range styles {
|
|
if class == s {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
type Events struct {
|
|
Start LocalDate `json:"start"`
|
|
Events []Event `json:"events"`
|
|
}
|
|
|
|
type Event struct {
|
|
Date LocalDate `json:"date"`
|
|
Time string `json:"time"`
|
|
Band string `json:"band"`
|
|
Place string `json:"place"`
|
|
City string `json:"city"`
|
|
Municipality string `json:"municipality"`
|
|
State string `json:"state"`
|
|
Extra string `json:"extra"`
|
|
}
|
|
|
|
func (e *Event) String() string {
|
|
return fmt.Sprintf(`{Date: date("%s"), Time: "%s", Band: "%s", Place: "%s", City: "%s", Municipality: "%s", State: "%s", Extra: "%s"}`, time.Time(e.Date).Format("2006-01-02"), e.Time, e.Band, e.Place, e.City, e.Municipality, e.State, e.Extra)
|
|
}
|
|
|
|
type LocalDate time.Time
|
|
|
|
func (l LocalDate) MarshalJSON() ([]byte, error) {
|
|
str := l.Format()
|
|
return json.Marshal(str)
|
|
}
|
|
|
|
func (l LocalDate) Format() string {
|
|
return time.Time(l).Format("2006-01-02")
|
|
}
|