From 5374096c8ac3f676480901113a77ca5af01fdd05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AB=8F=E8=A8=AA=E5=AD=90?= Date: Mon, 13 Nov 2023 15:58:02 +0900 Subject: [PATCH] =?UTF-8?q?=E5=87=BA=E7=89=88=E7=A4=BE=E3=83=9A=E3=83=BC?= =?UTF-8?q?=E3=82=B8=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- page.go | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/page.go b/page.go index 39459c6..be1e257 100644 --- a/page.go +++ b/page.go @@ -77,6 +77,12 @@ func isarticle(url string) bool { return len(chk) > 2 && chk[0] == "/view_news.pl?id" } +/* 出版社かの確認 */ +func ispublish(url string) bool { + chk := strings.Split(url, "=") + return len(chk) > 1 && chk[0] == "/list_news_media.pl?id" +} + /* カテゴリーだけが残るまで消す */ func rmcbloat(body string, cnf Config) string { var re *regexp.Regexp @@ -149,6 +155,35 @@ func rmebloat(body string, cnf Config) string { return body } +/* 出版社だけが残るまで消す */ +func rmpbloat(body string, cnf Config) string { + var re *regexp.Regexp + + rep := []struct { + pat string + repl string + }{ + {`(?s).*?
`, ""}, + {`(?s).*?`, ""}, + {`(?s)
.*?

`, `

`}, + {`(?s)

\n" + strings.TrimSpace(body)) + "\n
\n" + return body +} + /* 記事だけが残るまで消す */ func rmbloat(body string, cnf Config) string { var re *regexp.Regexp @@ -174,6 +209,7 @@ func rmbloat(body string, cnf Config) string { {`