SVNからのミラー
This commit is contained in:
115
page.go
Normal file
115
page.go
Normal file
@@ -0,0 +1,115 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
/* ページのタイトル */
|
||||
func gettitle(str string) string {
|
||||
re := regexp.MustCompile("<title>(.*)</title>")
|
||||
matches := re.FindStringSubmatch(str)
|
||||
if len(matches) > 1 {
|
||||
return matches[1]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func getimg(str string, cnf Config) string {
|
||||
re := regexp.MustCompile(`<img class="NEWS_tempPhoto__picture" src="(.*)" alt="">`)
|
||||
matches := re.FindStringSubmatch(str)
|
||||
if len(matches) > 1 {
|
||||
return strings.Replace(matches[1], "https://", cnf.imgproxy+"/", -1)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func getdesc(str string) string {
|
||||
re := regexp.MustCompile(`<div class="newsArticle">(.*?)</div>`)
|
||||
res := re.ReplaceAllString(str, "")
|
||||
return strip_tags(res)
|
||||
}
|
||||
|
||||
/* 記事の受取 */
|
||||
func get(url string, cnf Config) map[string]string {
|
||||
// デフォルト=エラー
|
||||
res := make(map[string]string)
|
||||
res["title"] = "見つけられない"
|
||||
res["content"] = `
|
||||
<div class="newsArticle"><div class="articleHeading02">
|
||||
<div class="headingArea">
|
||||
<h1>見つけられなかった</h1>
|
||||
</div>
|
||||
</div>
|
||||
<div class="contents clearfix">
|
||||
<div class="article decoratable">
|
||||
<p>ごめんね!</p>
|
||||
</div>
|
||||
</div>
|
||||
`
|
||||
res["img"] = ""
|
||||
res["desc"] = ""
|
||||
res["err"] = ""
|
||||
|
||||
resp, err := http.Get("https://news.mixi.jp" + url)
|
||||
if err != nil {
|
||||
res["err"] = "URLエラー"
|
||||
fmt.Println(res["err"] + ": " + err.Error())
|
||||
return res
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
bytebody, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
res["err"] = "内容はバイトコードとして読み込みに失敗。"
|
||||
fmt.Println(res["err"])
|
||||
return res
|
||||
}
|
||||
|
||||
body, err := EUCJPToUTF8(bytebody)
|
||||
if err != nil {
|
||||
res["err"] = err.Error()
|
||||
fmt.Println(res["err"])
|
||||
return res
|
||||
}
|
||||
|
||||
id, _ := getid(url)
|
||||
|
||||
res["title"] = gettitle(body)
|
||||
if isarticle(url) {
|
||||
if !strings.Contains(body, "newsArticle") {
|
||||
res["content"] = rmebloat(body, cnf)
|
||||
} else {
|
||||
res["img"] = getimg(body, cnf)
|
||||
res["content"] = rmbloat(id, body, cnf)
|
||||
}
|
||||
} else if ispublish(url) {
|
||||
res["content"] = rmpbloat(body, cnf)
|
||||
} else if issubcat(url) {
|
||||
if strings.Contains(body, `<p class="messageAlert">存在しないカテゴリです</p>`) {
|
||||
res["content"] = rmebloat(body, cnf)
|
||||
} else {
|
||||
res["content"] = rmsbloat(body, cnf)
|
||||
}
|
||||
} else if istubayaki(url) {
|
||||
if !strings.Contains(body, "quoteList") {
|
||||
res["content"] = rmebloat(body, cnf)
|
||||
} else {
|
||||
res["content"] = rmqbloat(body, cnf)
|
||||
}
|
||||
} else {
|
||||
if !strings.Contains(body, "注目のニュース") {
|
||||
res["content"] = rmebloat(body, cnf)
|
||||
} else {
|
||||
res["content"] = rmcbloat(body, cnf)
|
||||
}
|
||||
}
|
||||
res["desc"] = getdesc(res["content"])
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
Reference in New Issue
Block a user