package pages import ( "bytes" "context" "log" "net/http" "os" "strings" "codeberg.org/gothub/gothub/utils" "github.com/bytesparadise/libasciidoc" "github.com/bytesparadise/libasciidoc/pkg/configuration" "github.com/carlmjohnson/requests" "github.com/gocolly/colly" "github.com/gofiber/fiber/v2" rst "github.com/hhatto/gorst" "github.com/honmaple/org-golang" "github.com/m4tty/cajun" "github.com/sirupsen/logrus" "github.com/yuin/goldmark" "github.com/yuin/goldmark/extension" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/renderer/html" ) type Repo struct { Fullname string Description string Parent string Stars string Forks string Watchers string Language []string License string DefaultBranch string Readme string Link string Tags []string Branch []string } type RepoFiles struct { Name string Path string Type string Fullname string DefaultBranch string } func HandleRepo(c *fiber.Ctx) error { var repoArray []Repo var repoFilesArray []RepoFiles branchExists := "" if strings.Count(c.Params("branch"), "")-1 > 0 { branchExists = "/tree/" + c.Params("branch") } repoUrl := strings.TrimSuffix(c.Params("repo"), ".git") resp, statusErr := http.Get("https://github.com/" + c.Params("user") + "/" + repoUrl + branchExists) if statusErr != nil { log.Println(statusErr) } if resp.StatusCode == 404 { // I need a better way to do this return c.Status(404).Render("error", fiber.Map{ "title": "Error", "error": "Repository " + c.Params("user") + "/" + repoUrl + branchExists + " not found", }) } // Scraping Scrape := Repo{} UserAgent, ok := os.LookupEnv("GOTHUB_USER_AGENT") if !ok { UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36" } sc := colly.NewCollector(colly.AllowedDomains("github.com"), colly.UserAgent(UserAgent)) sc.OnHTML("div.Layout-sidebar", func(e *colly.HTMLElement) { Scrape.Fullname = c.Params("user") + "/" + repoUrl Scrape.Description = e.ChildText("p.f4") Scrape.Stars = e.ChildText("a[href*='/" + c.Params("user") + "/" + repoUrl + "/stargazers' i] strong") Scrape.Watchers = e.ChildText("a[href*='/" + c.Params("user") + "/" + repoUrl + "/watchers' i] strong") Scrape.Forks = e.ChildText("a[href*='/" + c.Params("user") + "/" + repoUrl + "/forks' i] strong") Scrape.Link = e.ChildAttr("span.css-truncate a.text-bold", "href") Scrape.License = e.ChildText("a[data-analytics-event*='{\"category\":\"Repository Overview\",\"action\":\"click\",\"label\":\"location:sidebar;file:license\"}']") e.ForEach("a.topic-tag", func(i int, el *colly.HTMLElement) { Scrape.Tags = append(Scrape.Tags, strings.TrimPrefix(el.Attr("data-octo-dimensions"), "topic:")) }) }) sc.OnHTML("div#readme", func(e *colly.HTMLElement) { Scrape.Readme = e.ChildText("a[href='#readme']") }) sc.OnHTML("div.BorderGrid-cell ul.list-style-none", func(e *colly.HTMLElement) { e.ForEach("li.d-inline .d-inline-flex", func(i int, el *colly.HTMLElement) { Scrape.Language = append(Scrape.Language, el.ChildText("span.text-bold")+" "+el.ChildText("span:contains('%')")) }) }) sc.OnHTML("div#repository-container-header", func(e *colly.HTMLElement) { Scrape.Parent = e.ChildText("span.text-small a") }) sc.OnHTML("summary[title*='Switch branches or tags']", func(e *colly.HTMLElement) { Scrape.DefaultBranch = e.ChildText("span.css-truncate-target") }) sc.OnHTML("div.js-details-container div.Details-content--hidden-not-important", func(e *colly.HTMLElement) { e.ForEach("div.js-navigation-item", func(i int, el *colly.HTMLElement) { var FileType string if el.ChildAttr("div.flex-shrink-0 svg", "aria-label") == "Directory" { FileType = "dir" } else { FileType = "file" } repoFilesArray = append(repoFilesArray, RepoFiles{ Name: el.ChildText("div.flex-auto span.d-block a.js-navigation-open"), Path: el.ChildText("div.flex-auto span.d-block a.js-navigation-open"), Type: FileType, Fullname: Scrape.Fullname, DefaultBranch: Scrape.DefaultBranch, }) }) }) sc.Visit("https://github.com/" + c.Params("user") + "/" + repoUrl + branchExists) // Add scrape-based info to repoArray repoArray = append(repoArray, Scrape) // README var readmee string err := requests. URL("https://raw.githubusercontent.com/" + c.Params("user") + "/" + repoUrl + "/" + Scrape.DefaultBranch + "/" + Scrape.Readme). ToString(&readmee). Fetch(context.Background()) if err != nil { readmee = "" log.Println(err) } ext := strings.TrimPrefix(strings.ToLower(Scrape.Readme), "readme.") var mightBeUnsafe []byte if ext == "md" || ext == "markdown" || ext == "mdown" || ext == "mkdn" { md := goldmark.New( goldmark.WithExtensions(extension.GFM), goldmark.WithParserOptions( parser.WithAutoHeadingID(), ), goldmark.WithRendererOptions( html.WithHardWraps(), html.WithXHTML(), ), ) var buf bytes.Buffer if err := md.Convert([]byte(readmee), &buf); err != nil { log.Println(err) } mightBeUnsafe = []byte(buf.String()) } else if ext == "org" { readmeReader := strings.NewReader(readmee) readme := org.HTML(readmeReader) mightBeUnsafe = []byte(readme) } else if ext == "creole" { output, _ := cajun.Transform(readmee) mightBeUnsafe = []byte(output) } else if ext == "rst" { readmeReader := strings.NewReader(readmee) readmeWriter := &strings.Builder{} p := rst.NewParser(nil) p.ReStructuredText(readmeReader, rst.ToHTML(readmeWriter)) mightBeUnsafe = []byte(readmeWriter.String()) } else if ext == "asciidoc" || ext == "adoc" || ext == "asc" { readmeReader := strings.NewReader(readmee) readmeWriter := &strings.Builder{} logrus.SetLevel(logrus.ErrorLevel) adocconfig := configuration.NewConfiguration( configuration.WithBackEnd("html5"), configuration.WithHeaderFooter(false)) libasciidoc.Convert(readmeReader, readmeWriter, adocconfig) mightBeUnsafe = []byte(readmeWriter.String()) } else { readme := "
\n" + readmee + "\n
" mightBeUnsafe = []byte(readme) } // Trust Nobody readmeOutput := utils.UGCPolicy().SanitizeBytes(mightBeUnsafe) return c.Render("repo", fiber.Map{ "title": c.Params("user") + "/" + repoUrl + branchExists, "repo": repoArray, "files": repoFilesArray, "readme": string(readmeOutput), }) }