From bb8e434000f970137a1ed5e8f17d12b28919f885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AB=8F=E8=A8=AA=E5=AD=90?= Date: Wed, 15 Nov 2023 00:38:04 +0900 Subject: [PATCH] =?UTF-8?q?=E3=81=A4=E3=81=B6=E3=82=84=E3=81=8D=E3=82=92?= =?UTF-8?q?=E8=A6=8B=E3=82=8B=E3=83=9A=E3=83=BC=E3=82=B8=EF=BC=88list=5Fqu?= =?UTF-8?q?ote.pl=EF=BC=89=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- check.go | 50 ++++++++++++++++----- page.go | 10 ++++- rmbloat.go | 46 ++++++++++++++++--- static/style.css | 113 ++++++++++++++++++++++++++++++++++++++++++++--- view/news.html | 1 - 5 files changed, 196 insertions(+), 24 deletions(-) diff --git a/check.go b/check.go index 70e4477..0bb997f 100644 --- a/check.go +++ b/check.go @@ -2,33 +2,63 @@ package main import ( "strings" + "net/url" + "fmt" ) +func getid(u string) (string, error) { + parse, err := url.Parse(u) + if err != nil { + return "", err + } + + params, err := url.ParseQuery(parse.RawQuery) + if err != nil { + return "", err + } + + id, ok := params["id"] + if !ok || len(id) == 0 { + return "", fmt.Errorf("IDを見つけられませんでした。") + } + + return id[0], nil +} + /* 記事かの確認 */ -func isarticle(url string) bool { - chk := strings.Split(url, "=") +func isarticle(u string) bool { + chk := strings.Split(u, "=") return len(chk) > 2 && (chk[0] == "/view_news.pl?id" || chk[0] == "/view_news.pl?from" || chk[0] == "/view_news.pl?media_id" || chk[0] == "/view_news.pl?stkt") } /* 部分圏かの確認 */ -func issubcat(url string) bool { - chk := strings.Split(url, "=") +func issubcat(u string) bool { + chk := strings.Split(u, "=") return len(chk) > 1 && (chk[0] == "/list_news_category.pl?id" || chk[0] == "/list_news_category.pl?page" || chk[0] == "/list_news_category.pl?sort" || chk[0] == "/list_news_category.pl?type" || chk[0] == "/list_news_category.pl?sub_category_id") && - strings.Contains(url, "type=bn") + strings.Contains(u, "type=bn") } /* 部分かの確認 */ -func iscategory(url string) bool { - chk := strings.Split(url, "=") +func iscategory(u string) bool { + chk := strings.Split(u, "=") return len(chk) > 1 && (chk[0] == "/list_news_category.pl?id" || chk[0] == "/list_news_category.pl?sub_category_id" || chk[0] == "/list_news_category?from") && - !strings.Contains(url, "type=bn") + !strings.Contains(u, "type=bn") } /* 出版社かの確認 */ -func ispublish(url string) bool { - chk := strings.Split(url, "=") +func ispublish(u string) bool { + chk := strings.Split(u, "=") return len(chk) > 1 && (chk[0] == "/list_news_media.pl?id" || chk[0] == "/list_news_media.pl?page") } + +/* つぶやきかの確認 */ +func istubayaki(u string) bool { + chk := strings.Split(u, "=") + return len(chk) > 1 && + (chk[0] == "/list_quote.pl?id" || chk[0] == "/list_quote.pl?type" || chk[0] == "/list_quote.pl?sort" || chk[0] == "/list_quote.pl?news_id") && + strings.Contains(u, "type=voice") && + (strings.Contains(u, "sort=post_time") || strings.Contains(u, "sort=feedback_count")) +} diff --git a/page.go b/page.go index 27dbb12..c2b9c79 100644 --- a/page.go +++ b/page.go @@ -77,13 +77,15 @@ func get(url string, cnf Config) map[string]string { return res } + id, _ := getid(url) + res["title"] = gettitle(body) if isarticle(url) { if !strings.Contains(body, "newsArticle") { res["content"] = rmebloat(body, cnf) } else { res["img"] = getimg(body, cnf) - res["content"] = rmbloat(body, cnf) + res["content"] = rmbloat(id, body, cnf) } } else if ispublish(url) { res["content"] = rmpbloat(body, cnf) @@ -93,6 +95,12 @@ func get(url string, cnf Config) map[string]string { } else { res["content"] = rmsbloat(body, cnf) } + } else if istubayaki(url) { + if !strings.Contains(body, "quoteList") { + res["content"] = rmebloat(body, cnf) + } else { + res["content"] = rmqbloat(body, cnf) + } } else { if !strings.Contains(body, "注目のニュース") { res["content"] = rmebloat(body, cnf) diff --git a/rmbloat.go b/rmbloat.go index feb3ffe..fbab1e3 100644 --- a/rmbloat.go +++ b/rmbloat.go @@ -53,7 +53,7 @@ func rmcbloat(body string, cnf Config) string { } body = strings.TrimSpace("
\n" + strings.TrimSpace(body)) + "\n
\n" - return "
" + body + "
" + return "
\nトップへ\n" + body + "
" } /* エラーだけが残るまで消す */ @@ -73,7 +73,36 @@ func rmebloat(body string, cnf Config) string { body = re.ReplaceAllString(body, r.repl) } - body = strings.TrimSpace("
\n" + strings.TrimSpace(body)) + "\n
\n" + body = strings.TrimSpace("
\nトップへ\n" + strings.TrimSpace(body)) + "\n
\n" + return body +} + +/* つばやきだけが残るまで消す */ +func rmqbloat(body string, cnf Config) string { + var re *regexp.Regexp + + rep := []struct { + pat string + repl string + }{ + {`(?s).*?
`, ""}, + {`(?s)
.*?`, ""}, + {`(?s)
.*?
`, `
`}, + {`https://news-image.mixi.net`, cnf.imgproxy + `/news-image.mixi.net`}, + {`https://img.mixi.net`, cnf.imgproxy + `/img.mixi.net`}, + {`https://news.mixi.jp/`, cnf.domain + `/`}, + {`(?s)
.*?
`, ""}, + // {`・ `, ""}, + // {`\[`, ""}, + // {`\]`, ""}, + } + + for _, r := range rep { + re = regexp.MustCompile(r.pat) + body = re.ReplaceAllString(body, r.repl) + } + + body = strings.TrimSpace("
\nトップへ\n" + strings.TrimSpace(body)) + "\n
\n" return body } @@ -100,7 +129,7 @@ func rmsbloat(body string, cnf Config) string { body = re.ReplaceAllString(body, r.repl) } - body = strings.TrimSpace("
\n" + strings.TrimSpace(body)) + "\n
\n" + body = strings.TrimSpace("
\nトップへ\n" + strings.TrimSpace(body)) + "\n
\n" return body } @@ -128,12 +157,12 @@ func rmpbloat(body string, cnf Config) string { body = re.ReplaceAllString(body, r.repl) } - body = strings.TrimSpace("
\n" + strings.TrimSpace(body)) + "\n
\n" + body = strings.TrimSpace("
\nトップへ\n" + strings.TrimSpace(body)) + "\n
\n" return body } /* 記事だけが残るまで消す */ -func rmbloat(body string, cnf Config) string { +func rmbloat(id string, body string, cnf Config) string { var re *regexp.Regexp rep := []struct { @@ -157,7 +186,6 @@ func rmbloat(body string, cnf Config) string { {`