norikae/src/scrape.go

65 行
1.4 KiB
Go
Raw 通常表示 履歴

package src
2023-06-20 07:17:33 +09:00
import (
"log"
"net/http"
2023-06-22 19:56:48 +09:00
"time"
"net"
"fmt"
2023-06-20 07:17:33 +09:00
"github.com/gocolly/colly"
)
func Scrape (gurl string) []Route {
2024-02-16 17:37:16 +09:00
ua := "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
"AppleWebKit/537.36 (KHTML, like Gecko) " +
"Chrome/110.0.0.0 Safari/537.36"
2023-06-22 19:56:48 +09:00
sc := colly.NewCollector(
colly.AllowURLRevisit(),
colly.Async(true),
)
2023-06-20 07:17:33 +09:00
2023-06-22 19:56:48 +09:00
sc.WithTransport(&http.Transport {
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
DualStack: true,
}).DialContext,
ForceAttemptHTTP2: true,
MaxIdleConns: 100,
IdleConnTimeout: 90 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
})
2023-06-20 07:17:33 +09:00
2023-06-22 19:56:48 +09:00
sc.OnRequest(func(r *colly.Request) {
r.Headers.Set("User-Agent", ua)
2024-02-16 17:37:16 +09:00
r.Headers.Set(
"Accept",
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
)
2023-06-22 19:56:48 +09:00
r.Headers.Set("Accept-Language", "en-US,en;q=0.5")
})
2023-06-20 07:17:33 +09:00
2023-06-22 19:56:48 +09:00
sc.OnError(func(_ *colly.Response, err error) {
log.Fatal("エラー:", err)
})
var routeArr []Route
2023-06-20 07:17:33 +09:00
2023-06-22 19:56:48 +09:00
for i := 1; i <= 3; i++ {
route := fmt.Sprintf("div#route%02d", i)
sc.OnHTML("div.elmRouteDetail " + route, func (e *colly.HTMLElement) {
2024-02-16 17:37:16 +09:00
Routes := getRouteDetail(e)
2023-06-22 19:56:48 +09:00
routeArr = append(routeArr, Routes)
2023-06-20 07:17:33 +09:00
})
2023-06-22 19:56:48 +09:00
}
sc.Visit(gurl)
sc.Wait()
2023-06-20 07:17:33 +09:00
2023-06-22 19:56:48 +09:00
return routeArr
2023-06-20 07:17:33 +09:00
}