Files
dancefetcher/pkg/main.go
T

231 lines
6.8 KiB
Go

package pkg
import (
"bytes"
"encoding/json"
"fmt"
"github.com/gocolly/colly/v2"
"github.com/goodsign/monday"
"gopkg.in/alecthomas/kingpin.v2"
"io/ioutil"
"net/http"
"regexp"
"sort"
"strconv"
"strings"
"time"
)
func Start() error {
api := kingpin.Flag("api", "URL to use to connect to Dancefinder API").Envar("API").Default("http://dancefinder").String()
kingpin.Parse()
fmt.Printf("Will push new events to '%s'\n", *api)
urls := []string{
"http://www.danslogen.se/dansprogram/lan/blekinge",
"http://www.danslogen.se/dansprogram/lan/dalarna",
"http://www.danslogen.se/dansprogram/lan/gotland",
"http://www.danslogen.se/dansprogram/lan/gavleborg",
"http://www.danslogen.se/dansprogram/lan/halland",
"http://www.danslogen.se/dansprogram/lan/jamtland",
"http://www.danslogen.se/dansprogram/lan/jonkoping",
"http://www.danslogen.se/dansprogram/lan/kalmar",
"http://www.danslogen.se/dansprogram/lan/kronoberg",
"http://www.danslogen.se/dansprogram/lan/norrbotten",
"http://www.danslogen.se/dansprogram/lan/skane",
"http://www.danslogen.se/dansprogram/lan/stockholm",
"http://www.danslogen.se/dansprogram/lan/sodermanland",
"http://www.danslogen.se/dansprogram/lan/uppsala",
"http://www.danslogen.se/dansprogram/lan/varmland",
"http://www.danslogen.se/dansprogram/lan/vasterbotten",
"http://www.danslogen.se/dansprogram/lan/vasternorrland",
"http://www.danslogen.se/dansprogram/lan/vastmanland",
"http://www.danslogen.se/dansprogram/lan/vasta_gotalan",
"http://www.danslogen.se/dansprogram/lan/orebro",
"http://www.danslogen.se/dansprogram/lan/ostergotland",
"http://www.danslogen.se/dansprogram/batar",
}
var events []Event
for _, url := range urls {
if e, err := parse(url); err != nil {
return err
} else {
events = append(events, e...)
}
}
if len(events) > 0 {
sort.SliceStable(events, func(i, j int) bool {
return time.Time(events[i].Date).Before(time.Time(events[j].Date))
})
data, err := json.Marshal(&Events{
Start: events[0].Date,
Events: events,
})
buff := bytes.NewBuffer(data)
response, err := http.Post(fmt.Sprintf("%s/event", *api), "application/json", buff)
if err != nil {
fmt.Printf("Error: %+v\n", err)
return err
}
if response.StatusCode != 200 {
body, _ := ioutil.ReadAll(response.Body)
fmt.Printf("Error: %+v\n", response.Status)
fmt.Printf("Body: %+v\n", string(body))
}
}
return nil
}
func parse(url string, opts ...colly.CollectorOption) ([]Event, error) {
fmt.Printf("Parsing %s\n", url)
collector := colly.NewCollector(opts...)
var ignoredStyles []string
empty := false
collector.OnHTML("style", func(e *colly.HTMLElement) {
r := regexp.MustCompile(`tr\.(?P<style>.*) {.*font-size: 0`)
parts := strings.Split(e.Text, "\n")
for _, part := range parts {
if r.MatchString(part) {
ignoredStyles = append(ignoredStyles, r.FindStringSubmatch(part)[1])
}
}
})
collector.OnHTML("h2", func(e *colly.HTMLElement) {
if e.Text == "Inga danser hittades" {
empty = true
}
})
var headers []string
var events []Event
now := time.Now()
collector.OnHTML("table tr", func(e *colly.HTMLElement) {
if !empty {
if len(headers) == 0 {
e.ForEach("th", func(i int, cell *colly.HTMLElement) {
headers = append(headers, strings.TrimSpace(cell.Text))
if cell.Attr("colspan") == "2" {
headers = append(headers, strings.TrimSpace(cell.Text))
}
})
} else {
class := e.Attr("class")
if len(class) == 0 || !ignored(class, ignoredStyles) {
var parts []string
e.ForEachWithBreak("td", func(i int, cell *colly.HTMLElement) bool {
if len(cell.Attr("colspan")) > 0 {
re := regexp.MustCompile(`\s+`)
dateString := re.ReplaceAllString(strings.TrimSpace(cell.Text), " ")
date, err := monday.ParseInLocation("January", dateString, now.Location(), monday.LocaleSvSE)
if err != nil {
date, err = monday.ParseInLocation("January 2006", dateString, now.Location(), monday.LocaleSvSE)
if err != nil {
return false
} else {
now = time.Date(date.Year(), date.Month(), 1, 0, 0, 0, 0, now.Location())
}
} else {
now = time.Date(now.Year(), date.Month(), 1, 0, 0, 0, 0, now.Location())
}
return false
} else {
parts = append(parts, cell.Text)
}
return true
})
if len(parts) > 0 {
fields := parts
if len(fields) >= 3 {
band := value("Dansband", headers, fields)
if regexp.MustCompile(`.*\d\.\d.*`).MatchString(band) {
fmt.Printf("Removing field 2\n")
fields = append(fields[:2], fields[3:]...)
}
date := value("Datum", headers, fields)
day, err := strconv.Atoi(date)
if err != nil {
fmt.Printf("Unable to parse '%s' as an int\n", date)
return
}
event := Event{
Date: LocalDate(time.Date(now.Year(), now.Month(), day, 0, 0, 0, 0, now.Location())),
Time: strings.ReplaceAll(strings.ReplaceAll(value("Tid", headers, fields), " ", ":"), ".", ":"),
Band: value("Dansband", headers, fields),
Place: value("Dansställe", headers, fields),
City: value("Ort", headers, fields),
Municipality: value("Kommun", headers, fields),
State: value("Län", headers, fields),
Extra: value("Övrigt", headers, fields),
}
events = append(events, event)
}
}
}
}
}
})
if err := collector.Visit(url); err != nil {
return nil, err
}
collector.Wait()
return events, nil
}
func value(name string, headers, values []string) string {
for i, h := range headers {
if h == name {
if name == "Datum" {
return strings.TrimSpace(values[i+1])
}
return strings.TrimSpace(values[i])
}
}
return ""
}
func ignored(class string, styles []string) bool {
for _, s := range styles {
if class == s {
return true
}
}
return false
}
type Events struct {
Start LocalDate `json:"start"`
Events []Event `json:"events"`
}
type Event struct {
Date LocalDate `json:"date"`
Time string `json:"time"`
Band string `json:"band"`
Place string `json:"place"`
City string `json:"city"`
Municipality string `json:"municipality"`
State string `json:"state"`
Extra string `json:"extra"`
}
func (e *Event) String() string {
return fmt.Sprintf(`{Date: date("%s"), Time: "%s", Band: "%s", Place: "%s", City: "%s", Municipality: "%s", State: "%s", Extra: "%s"}`, time.Time(e.Date).Format("2006-01-02"), e.Time, e.Band, e.Place, e.City, e.Municipality, e.State, e.Extra)
}
type LocalDate time.Time
func (l LocalDate) MarshalJSON() ([]byte, error) {
str := l.Format()
return json.Marshal(str)
}
func (l LocalDate) Format() string {
return time.Time(l).Format("2006-01-02")
}