2024-02-16 16:21:06 +09:00
|
|
|
package src
|
2023-06-20 07:17:33 +09:00
|
|
|
|
|
|
|
import (
|
|
|
|
"log"
|
|
|
|
"net/http"
|
2023-06-22 19:56:48 +09:00
|
|
|
"time"
|
|
|
|
"net"
|
|
|
|
"fmt"
|
2023-06-20 07:17:33 +09:00
|
|
|
|
|
|
|
"github.com/gocolly/colly"
|
|
|
|
)
|
|
|
|
|
2024-02-16 16:21:06 +09:00
|
|
|
func Scrape (gurl string) []Route {
|
2024-02-16 17:37:16 +09:00
|
|
|
ua := "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
|
|
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) " +
|
|
|
|
"Chrome/110.0.0.0 Safari/537.36"
|
|
|
|
|
2023-06-22 19:56:48 +09:00
|
|
|
sc := colly.NewCollector(
|
|
|
|
colly.AllowURLRevisit(),
|
|
|
|
colly.Async(true),
|
|
|
|
)
|
2023-06-20 07:17:33 +09:00
|
|
|
|
2023-06-22 19:56:48 +09:00
|
|
|
sc.WithTransport(&http.Transport {
|
|
|
|
Proxy: http.ProxyFromEnvironment,
|
|
|
|
DialContext: (&net.Dialer{
|
|
|
|
Timeout: 30 * time.Second,
|
|
|
|
KeepAlive: 30 * time.Second,
|
|
|
|
DualStack: true,
|
|
|
|
}).DialContext,
|
|
|
|
ForceAttemptHTTP2: true,
|
|
|
|
MaxIdleConns: 100,
|
|
|
|
IdleConnTimeout: 90 * time.Second,
|
|
|
|
TLSHandshakeTimeout: 10 * time.Second,
|
|
|
|
ExpectContinueTimeout: 1 * time.Second,
|
|
|
|
})
|
2023-06-20 07:17:33 +09:00
|
|
|
|
2023-06-22 19:56:48 +09:00
|
|
|
sc.OnRequest(func(r *colly.Request) {
|
|
|
|
r.Headers.Set("User-Agent", ua)
|
2024-02-16 17:37:16 +09:00
|
|
|
r.Headers.Set(
|
|
|
|
"Accept",
|
|
|
|
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
|
|
)
|
2023-06-22 19:56:48 +09:00
|
|
|
r.Headers.Set("Accept-Language", "en-US,en;q=0.5")
|
|
|
|
})
|
2023-06-20 07:17:33 +09:00
|
|
|
|
2023-06-22 19:56:48 +09:00
|
|
|
sc.OnError(func(_ *colly.Response, err error) {
|
|
|
|
log.Fatal("エラー:", err)
|
|
|
|
})
|
|
|
|
|
|
|
|
var routeArr []Route
|
2023-06-20 07:17:33 +09:00
|
|
|
|
2023-06-22 19:56:48 +09:00
|
|
|
for i := 1; i <= 3; i++ {
|
|
|
|
route := fmt.Sprintf("div#route%02d", i)
|
|
|
|
sc.OnHTML("div.elmRouteDetail " + route, func (e *colly.HTMLElement) {
|
2024-02-16 17:37:16 +09:00
|
|
|
Routes := getRouteDetail(e)
|
2023-06-22 19:56:48 +09:00
|
|
|
routeArr = append(routeArr, Routes)
|
2023-06-20 07:17:33 +09:00
|
|
|
})
|
2023-06-22 19:56:48 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
sc.Visit(gurl)
|
|
|
|
sc.Wait()
|
2023-06-20 07:17:33 +09:00
|
|
|
|
2023-06-22 19:56:48 +09:00
|
|
|
return routeArr
|
2023-06-20 07:17:33 +09:00
|
|
|
}
|