feat: initial version

This commit is contained in:
2020-02-07 20:59:25 +01:00
commit 3aaa69175c
12 changed files with 19277 additions and 0 deletions
+234
View File
@@ -0,0 +1,234 @@
package pkg
import (
"bytes"
"encoding/json"
"fmt"
"github.com/gocolly/colly/v2"
"github.com/goodsign/monday"
"gopkg.in/alecthomas/kingpin.v2"
"io/ioutil"
"net/http"
"regexp"
"sort"
"strconv"
"strings"
"time"
)
func Start() error {
api := kingpin.Flag("api", "URL to use to connect to Dancefinder API").Envar("API").Default("http://dancefinder").String()
kingpin.Parse()
fmt.Printf("Will push new events to '%s'\n", *api)
urls := []string{
"http://www.danslogen.se/dansprogram/lan/blekinge",
"http://www.danslogen.se/dansprogram/lan/dalarna",
"http://www.danslogen.se/dansprogram/lan/gotland",
"http://www.danslogen.se/dansprogram/lan/gavleborg",
"http://www.danslogen.se/dansprogram/lan/halland",
"http://www.danslogen.se/dansprogram/lan/jamtland",
"http://www.danslogen.se/dansprogram/lan/jonkoping",
"http://www.danslogen.se/dansprogram/lan/kalmar",
"http://www.danslogen.se/dansprogram/lan/kronoberg",
"http://www.danslogen.se/dansprogram/lan/norrbotten",
"http://www.danslogen.se/dansprogram/lan/skane",
"http://www.danslogen.se/dansprogram/lan/stockholm",
"http://www.danslogen.se/dansprogram/lan/sodermanland",
"http://www.danslogen.se/dansprogram/lan/uppsala",
"http://www.danslogen.se/dansprogram/lan/varmland",
"http://www.danslogen.se/dansprogram/lan/vasterbotten",
"http://www.danslogen.se/dansprogram/lan/vasternorrland",
"http://www.danslogen.se/dansprogram/lan/vastmanland",
"http://www.danslogen.se/dansprogram/lan/vasta_gotalan",
"http://www.danslogen.se/dansprogram/lan/orebro",
"http://www.danslogen.se/dansprogram/lan/ostergotland",
"http://www.danslogen.se/dansprogram/batar",
}
var events []Event
for _, url := range urls {
if e, err := parse(url); err != nil {
return err
} else {
events = append(events, e...)
}
}
if len(events) > 0 {
sort.SliceStable(events, func(i, j int) bool {
return time.Time(events[i].Date).Before(time.Time(events[j].Date))
})
data, err := json.Marshal(&Events{
Start: time.Time(events[0].Date).Format("2006-01-02"),
Events: events,
})
buff := bytes.NewBuffer(data)
response, err := http.Post(fmt.Sprintf("%s/event", *api), "application/json", buff)
if err != nil {
fmt.Printf("Error: %+v\n", err)
return err
}
if response.StatusCode != 200 {
body, _ := ioutil.ReadAll(response.Body)
fmt.Printf("Error: %+v\n", response.Status)
fmt.Printf("Body: %+v\n", string(body))
}
}
return nil
}
func parse(url string, opts ...colly.CollectorOption) ([]Event, error) {
fmt.Printf("Parsing %s\n", url)
collector := colly.NewCollector(opts...)
var ignoredStyles []string
empty := false
collector.OnHTML("style", func(e *colly.HTMLElement) {
r := regexp.MustCompile(`tr\.(?P<style>.*) {.*font-size: 0`)
parts := strings.Split(e.Text, "\n")
for _, part := range parts {
if r.MatchString(part) {
ignoredStyles = append(ignoredStyles, r.FindStringSubmatch(part)[1])
}
}
})
collector.OnHTML("h2", func(e *colly.HTMLElement) {
if e.Text == "Inga danser funna" {
empty = true
}
})
var headers []string
var events []Event
now := time.Now()
collector.OnHTML("table tr", func(e *colly.HTMLElement) {
if !empty {
if len(headers) == 0 {
e.ForEach("th", func(i int, cell *colly.HTMLElement) {
if len(strings.TrimSpace(cell.Text)) > 0 {
headers = append(headers, strings.TrimSpace(cell.Text))
if cell.Attr("colspan") == "2" {
headers = append(headers, strings.TrimSpace(cell.Text))
}
}
})
} else {
class := e.Attr("class")
if len(class) == 0 || !ignored(class, ignoredStyles) {
var parts []string
e.ForEachWithBreak("td", func(i int, cell *colly.HTMLElement) bool {
if len(cell.Attr("colspan")) > 0 {
dateString := strings.TrimSpace(cell.Text)
date, err := monday.ParseInLocation("January", dateString, now.Location(), monday.LocaleSvSE)
if err != nil {
date, err = monday.ParseInLocation("January 2006", dateString, now.Location(), monday.LocaleSvSE)
if err != nil {
return false
} else {
now = time.Date(date.Year(), date.Month(), 1, 0, 0, 0, 0, now.Location())
}
} else {
now = time.Date(now.Year(), date.Month(), 1, 0, 0, 0, 0, now.Location())
}
return false
} else {
parts = append(parts, cell.Text)
}
return true
})
if len(parts) > 0 {
var fields []string
for i, p := range parts {
if i <= 2 || i == len(parts)-1 || len(p) != 0 {
fields = append(fields, p)
}
}
if len(fields) >= 3 {
band := value("Dansband", headers, fields)
if regexp.MustCompile(`.*\d\.\d.*`).MatchString(band) {
fields = append(fields[:2], fields[3:]...)
}
date := value("Datum", headers, fields)
day, err := strconv.Atoi(date)
if err != nil {
return
}
event := Event{
Date: LocalDate(time.Date(now.Year(), now.Month(), day, 0, 0, 0, 0, now.Location())),
Time: strings.ReplaceAll(strings.ReplaceAll(value("Tid", headers, fields), " ", ":"), ".", ":"),
Band: value("Dansband", headers, fields),
Place: value("Dansställe", headers, fields),
City: value("Ort", headers, fields),
Municipality: value("Kommun", headers, fields),
State: value("Län", headers, fields),
Extra: value("Övrigt", headers, fields),
}
events = append(events, event)
}
}
}
}
}
})
if err := collector.Visit(url); err != nil {
return nil, err
}
collector.Wait()
return events, nil
}
func value(name string, headers, values []string) string {
for i, h := range headers {
if h == name {
if name == "Datum" {
return strings.TrimSpace(values[i+1])
}
return strings.TrimSpace(values[i])
}
}
return ""
}
func ignored(class string, styles []string) bool {
for _, s := range styles {
if class == s {
return true
}
}
return false
}
type Events struct {
Start string `json:"start"`
Events []Event `json:"events"`
}
type Event struct {
Date LocalDate `json:"date"`
Time string `json:"time"`
Band string `json:"band"`
Place string `json:"place"`
City string `json:"city"`
Municipality string `json:"municipality"`
State string `json:"state"`
Extra string `json:"extra"`
}
func (e *Event) String() string {
return fmt.Sprintf(`{Date: date("%s"), Time: "%s", Band: "%s", Place: "%s", City: "%s", Municipality: "%s", State: "%s", Extra: "%s"}`, time.Time(e.Date).Format("2006-01-02"), e.Time, e.Band, e.Place, e.City, e.Municipality, e.State, e.Extra)
}
type LocalDate time.Time
func (l LocalDate) MarshalJSON() ([]byte, error) {
str := l.Format()
return json.Marshal(str)
}
func (l LocalDate) Format() string {
return time.Time(l).Format("2006-01-02")
}
+1017
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff