2022-11-30 23:08:16 +09:00
|
|
|
package pages
|
|
|
|
|
|
|
|
import (
|
2023-03-22 03:28:20 +09:00
|
|
|
"bytes"
|
2023-02-14 23:54:33 +09:00
|
|
|
"context"
|
2023-04-02 01:53:44 +09:00
|
|
|
"log"
|
|
|
|
"net/http"
|
|
|
|
"os"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"codeberg.org/gothub/gothub/utils"
|
2023-03-22 03:28:20 +09:00
|
|
|
"github.com/bytesparadise/libasciidoc"
|
|
|
|
"github.com/bytesparadise/libasciidoc/pkg/configuration"
|
2022-12-01 01:37:27 +09:00
|
|
|
"github.com/carlmjohnson/requests"
|
2023-02-14 23:54:33 +09:00
|
|
|
"github.com/gocolly/colly"
|
2022-11-30 23:08:16 +09:00
|
|
|
"github.com/gofiber/fiber/v2"
|
2023-04-02 01:53:44 +09:00
|
|
|
rst "github.com/hhatto/gorst"
|
2023-03-22 03:28:20 +09:00
|
|
|
"github.com/honmaple/org-golang"
|
|
|
|
"github.com/m4tty/cajun"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
|
|
"github.com/yuin/goldmark"
|
|
|
|
"github.com/yuin/goldmark/extension"
|
|
|
|
"github.com/yuin/goldmark/parser"
|
|
|
|
"github.com/yuin/goldmark/renderer/html"
|
2022-11-30 23:08:16 +09:00
|
|
|
)
|
|
|
|
|
|
|
|
type Repo struct {
|
|
|
|
Fullname string
|
|
|
|
Description string
|
|
|
|
Parent string
|
2023-02-14 23:54:33 +09:00
|
|
|
Stars string
|
|
|
|
Forks string
|
|
|
|
Watchers string
|
2023-02-15 00:21:44 +09:00
|
|
|
Language []string
|
2022-11-30 23:08:16 +09:00
|
|
|
License string
|
|
|
|
DefaultBranch string
|
2023-02-14 23:54:33 +09:00
|
|
|
Readme string
|
2023-03-16 18:55:17 +09:00
|
|
|
Link string
|
|
|
|
Tags []string
|
2023-03-17 01:45:21 +09:00
|
|
|
Branch []string
|
2022-11-30 23:08:16 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
type RepoFiles struct {
|
2022-12-01 01:37:27 +09:00
|
|
|
Name string
|
|
|
|
Path string
|
|
|
|
Type string
|
|
|
|
Fullname string
|
|
|
|
DefaultBranch string
|
2022-11-30 23:08:16 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
func HandleRepo(c *fiber.Ctx) error {
|
|
|
|
var repoArray []Repo
|
2022-12-01 01:37:27 +09:00
|
|
|
var repoFilesArray []RepoFiles
|
2023-03-17 01:45:21 +09:00
|
|
|
branchExists := ""
|
|
|
|
if strings.Count(c.Params("branch"), "")-1 > 0 {
|
|
|
|
branchExists = "/tree/" + c.Params("branch")
|
|
|
|
}
|
2023-03-19 19:19:21 +09:00
|
|
|
repoUrl := strings.TrimSuffix(c.Params("repo"), ".git")
|
|
|
|
resp, statusErr := http.Get("https://github.com/" + c.Params("user") + "/" + repoUrl + branchExists)
|
2023-02-14 23:54:33 +09:00
|
|
|
if statusErr != nil {
|
|
|
|
log.Println(statusErr)
|
|
|
|
}
|
|
|
|
if resp.StatusCode == 404 {
|
|
|
|
// I need a better way to do this
|
2022-12-01 01:37:27 +09:00
|
|
|
return c.Status(404).Render("error", fiber.Map{
|
2023-01-06 04:58:41 +09:00
|
|
|
"title": "Error",
|
2023-03-19 19:19:21 +09:00
|
|
|
"error": "Repository " + c.Params("user") + "/" + repoUrl + branchExists + " not found",
|
2022-12-01 01:37:27 +09:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2023-02-14 23:54:33 +09:00
|
|
|
// Scraping
|
2023-02-15 00:21:44 +09:00
|
|
|
Scrape := Repo{}
|
2023-02-14 23:54:33 +09:00
|
|
|
|
|
|
|
UserAgent, ok := os.LookupEnv("GOTHUB_USER_AGENT")
|
|
|
|
if !ok {
|
|
|
|
UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
|
|
|
|
}
|
2022-12-01 01:37:27 +09:00
|
|
|
|
2023-02-14 23:54:33 +09:00
|
|
|
sc := colly.NewCollector(colly.AllowedDomains("github.com"), colly.UserAgent(UserAgent))
|
|
|
|
sc.OnHTML("div.Layout-sidebar", func(e *colly.HTMLElement) {
|
2023-03-19 19:19:21 +09:00
|
|
|
Scrape.Fullname = c.Params("user") + "/" + repoUrl
|
2023-02-14 23:54:33 +09:00
|
|
|
Scrape.Description = e.ChildText("p.f4")
|
2023-03-19 19:19:21 +09:00
|
|
|
Scrape.Stars = e.ChildText("a[href*='/" + c.Params("user") + "/" + repoUrl + "/stargazers' i] strong")
|
|
|
|
Scrape.Watchers = e.ChildText("a[href*='/" + c.Params("user") + "/" + repoUrl + "/watchers' i] strong")
|
|
|
|
Scrape.Forks = e.ChildText("a[href*='/" + c.Params("user") + "/" + repoUrl + "/forks' i] strong")
|
2023-03-16 18:55:17 +09:00
|
|
|
Scrape.Link = e.ChildAttr("span.css-truncate a.text-bold", "href")
|
2023-02-14 23:54:33 +09:00
|
|
|
Scrape.License = e.ChildText("a[data-analytics-event*='{\"category\":\"Repository Overview\",\"action\":\"click\",\"label\":\"location:sidebar;file:license\"}']")
|
2023-03-16 18:55:17 +09:00
|
|
|
e.ForEach("a.topic-tag", func(i int, el *colly.HTMLElement) {
|
|
|
|
Scrape.Tags = append(Scrape.Tags, strings.TrimPrefix(el.Attr("data-octo-dimensions"), "topic:"))
|
|
|
|
})
|
2023-02-14 23:54:33 +09:00
|
|
|
})
|
|
|
|
sc.OnHTML("div#readme", func(e *colly.HTMLElement) {
|
2023-03-22 03:20:39 +09:00
|
|
|
Scrape.Readme = e.ChildText("a[href='#readme']")
|
2023-02-14 23:54:33 +09:00
|
|
|
})
|
2023-02-15 00:21:44 +09:00
|
|
|
sc.OnHTML("div.BorderGrid-cell ul.list-style-none", func(e *colly.HTMLElement) {
|
|
|
|
e.ForEach("li.d-inline .d-inline-flex", func(i int, el *colly.HTMLElement) {
|
|
|
|
Scrape.Language = append(Scrape.Language, el.ChildText("span.text-bold")+" "+el.ChildText("span:contains('%')"))
|
|
|
|
})
|
|
|
|
})
|
2023-02-14 23:54:33 +09:00
|
|
|
sc.OnHTML("div#repository-container-header", func(e *colly.HTMLElement) {
|
|
|
|
Scrape.Parent = e.ChildText("span.text-small a")
|
|
|
|
})
|
|
|
|
sc.OnHTML("summary[title*='Switch branches or tags']", func(e *colly.HTMLElement) {
|
|
|
|
Scrape.DefaultBranch = e.ChildText("span.css-truncate-target")
|
|
|
|
})
|
2023-02-15 21:32:48 +09:00
|
|
|
sc.OnHTML("div.js-details-container div.Details-content--hidden-not-important", func(e *colly.HTMLElement) {
|
|
|
|
e.ForEach("div.js-navigation-item", func(i int, el *colly.HTMLElement) {
|
|
|
|
var FileType string
|
|
|
|
if el.ChildAttr("div.flex-shrink-0 svg", "aria-label") == "Directory" {
|
|
|
|
FileType = "dir"
|
|
|
|
} else {
|
|
|
|
FileType = "file"
|
|
|
|
}
|
|
|
|
repoFilesArray = append(repoFilesArray, RepoFiles{
|
|
|
|
Name: el.ChildText("div.flex-auto span.d-block a.js-navigation-open"),
|
|
|
|
Path: el.ChildText("div.flex-auto span.d-block a.js-navigation-open"),
|
|
|
|
Type: FileType,
|
|
|
|
Fullname: Scrape.Fullname,
|
|
|
|
DefaultBranch: Scrape.DefaultBranch,
|
|
|
|
})
|
|
|
|
})
|
|
|
|
})
|
2023-03-19 19:19:21 +09:00
|
|
|
sc.Visit("https://github.com/" + c.Params("user") + "/" + repoUrl + branchExists)
|
2023-02-14 23:54:33 +09:00
|
|
|
// Add scrape-based info to repoArray
|
|
|
|
repoArray = append(repoArray, Scrape)
|
|
|
|
|
|
|
|
// README
|
|
|
|
var readmee string
|
2022-12-01 01:37:27 +09:00
|
|
|
err := requests.
|
2023-03-19 19:19:21 +09:00
|
|
|
URL("https://raw.githubusercontent.com/" + c.Params("user") + "/" + repoUrl + "/" + Scrape.DefaultBranch + "/" + Scrape.Readme).
|
2022-12-01 01:37:27 +09:00
|
|
|
ToString(&readmee).
|
|
|
|
Fetch(context.Background())
|
|
|
|
if err != nil {
|
|
|
|
readmee = ""
|
|
|
|
log.Println(err)
|
|
|
|
}
|
2023-03-22 03:28:20 +09:00
|
|
|
ext := strings.TrimPrefix(strings.ToLower(Scrape.Readme), "readme.")
|
|
|
|
var mightBeUnsafe []byte
|
|
|
|
if ext == "md" || ext == "markdown" || ext == "mdown" || ext == "mkdn" {
|
|
|
|
md := goldmark.New(
|
|
|
|
goldmark.WithExtensions(extension.GFM),
|
|
|
|
goldmark.WithParserOptions(
|
|
|
|
parser.WithAutoHeadingID(),
|
|
|
|
),
|
|
|
|
goldmark.WithRendererOptions(
|
|
|
|
html.WithHardWraps(),
|
|
|
|
html.WithXHTML(),
|
|
|
|
),
|
|
|
|
)
|
|
|
|
var buf bytes.Buffer
|
|
|
|
if err := md.Convert([]byte(readmee), &buf); err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
|
|
|
mightBeUnsafe = []byte(buf.String())
|
|
|
|
} else if ext == "org" {
|
|
|
|
readmeReader := strings.NewReader(readmee)
|
|
|
|
readme := org.HTML(readmeReader)
|
|
|
|
mightBeUnsafe = []byte(readme)
|
|
|
|
} else if ext == "creole" {
|
|
|
|
output, _ := cajun.Transform(readmee)
|
|
|
|
mightBeUnsafe = []byte(output)
|
|
|
|
} else if ext == "rst" {
|
|
|
|
readmeReader := strings.NewReader(readmee)
|
|
|
|
readmeWriter := &strings.Builder{}
|
|
|
|
p := rst.NewParser(nil)
|
|
|
|
p.ReStructuredText(readmeReader, rst.ToHTML(readmeWriter))
|
|
|
|
mightBeUnsafe = []byte(readmeWriter.String())
|
|
|
|
} else if ext == "asciidoc" || ext == "adoc" || ext == "asc" {
|
|
|
|
readmeReader := strings.NewReader(readmee)
|
|
|
|
readmeWriter := &strings.Builder{}
|
|
|
|
logrus.SetLevel(logrus.ErrorLevel)
|
|
|
|
adocconfig := configuration.NewConfiguration(
|
|
|
|
configuration.WithBackEnd("html5"),
|
|
|
|
configuration.WithHeaderFooter(false))
|
|
|
|
libasciidoc.Convert(readmeReader, readmeWriter, adocconfig)
|
|
|
|
mightBeUnsafe = []byte(readmeWriter.String())
|
|
|
|
} else {
|
|
|
|
readme := "<pre>\n" + readmee + "\n</pre>"
|
|
|
|
mightBeUnsafe = []byte(readme)
|
|
|
|
}
|
2022-12-01 01:37:27 +09:00
|
|
|
// Trust Nobody
|
2023-02-13 18:25:39 +09:00
|
|
|
readmeOutput := utils.UGCPolicy().SanitizeBytes(mightBeUnsafe)
|
2022-12-01 01:37:27 +09:00
|
|
|
|
2022-11-30 23:08:16 +09:00
|
|
|
return c.Render("repo", fiber.Map{
|
2023-04-02 01:53:44 +09:00
|
|
|
"title": c.Params("user") + "/" + repoUrl + branchExists,
|
2022-12-01 01:37:27 +09:00
|
|
|
"repo": repoArray,
|
|
|
|
"files": repoFilesArray,
|
|
|
|
"readme": string(readmeOutput),
|
2022-11-30 23:08:16 +09:00
|
|
|
})
|
|
|
|
}
|