gitlin/pages/repo.go

package pages

import (
	"log"
	"net/http"
	"os"
	"strings"

	"gitler.moe/suwako/gitlin/utils"
	"github.com/gocolly/colly"
	"github.com/gofiber/fiber/v2"
)

type Repo struct {
	Fullname      string
	Description   string
	Parent        string
	Stars         string
	Forks         string
	CommitsBehind string
	Watchers      string
	Language      []string
	License       string
	DefaultBranch string
	Readme        string
	Link          string
	Tags          []string
	Branch        []string
}

type RepoFiles struct {
	Name          string
	Path          string
	Type          string
	Fullname      string
	DefaultBranch string
}

func HandleRepo(c *fiber.Ctx) error {
	var repoArray []Repo
	var repoFilesArray []RepoFiles
	var readmeOutput string
	branchExists := ""
	if strings.Count(c.Params("branch"), "")-1 > 0 {
		branchExists = "/tree/" + c.Params("branch")
	}
	repoUrl := strings.TrimSuffix(c.Params("repo"), ".git")
	resp, statusErr := http.Get("https://github.com/" + c.Params("user") + "/" + repoUrl + branchExists)
	if statusErr != nil {
		log.Println(statusErr)
	}
	if resp.StatusCode == 404 {
		// I need a better way to do this
		return c.Status(404).Render("error", fiber.Map{
			"title": "Error",
			"error": "Repository " + c.Params("user") + "/" + repoUrl + branchExists + " not found",
		})
	}

	// Scraping
	Scrape := Repo{}

	UserAgent, ok := os.LookupEnv("GITLIN_USER_AGENT")
	if !ok {
		UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
	}

	sc := colly.NewCollector(colly.AllowedDomains("github.com"), colly.UserAgent(UserAgent))
	sc.OnHTML("div.Layout-sidebar", func(e *colly.HTMLElement) {
		Scrape.Fullname = c.Params("user") + "/" + repoUrl
		Scrape.Description = e.ChildText("p.f4")
		Scrape.Stars = e.ChildText("a[href*='/" + c.Params("user") + "/" + repoUrl + "/stargazers' i] strong")
		Scrape.Watchers = e.ChildText("a[href*='/" + c.Params("user") + "/" + repoUrl + "/watchers' i] strong")
		Scrape.Forks = e.ChildText("a[href*='/" + c.Params("user") + "/" + repoUrl + "/forks' i] strong")
		Scrape.Link = e.ChildAttr("span.css-truncate a.text-bold", "href")
		Scrape.License = e.ChildText("a[data-analytics-event*='{\"category\":\"Repository Overview\",\"action\":\"click\",\"label\":\"location:sidebar;file:license\"}']")
		e.ForEach("a.topic-tag", func(i int, el *colly.HTMLElement) {
			Scrape.Tags = append(Scrape.Tags, strings.TrimPrefix(el.Attr("data-octo-dimensions"), "topic:"))
		})
	})
	sc.OnHTML("div.Box-body div.d-flex div span", func(e *colly.HTMLElement) {
		Scrape.CommitsBehind = strings.TrimSuffix(e.ChildText("a"), " commits behind")
	})
	sc.OnHTML("div#readme", func(e *colly.HTMLElement) {
		Scrape.Readme = e.ChildText("a[href='#readme']")
	})
	sc.OnHTML("div#readme div.Box-body", func(e *colly.HTMLElement) {
		Content, _ := e.DOM.Html()
		readmeOutput = strings.Replace(strings.Replace(strings.Replace(strings.Replace(string(utils.UGCPolicy().SanitizeBytes([]byte(Content))), "https://github.com", "", -1), "user-content-", "", -1), "https://camo.githubusercontent.com", "/camo", -1), "https://raw.githubusercontent.com", "/raw", -1)
	})
	sc.OnHTML("div.BorderGrid-cell ul.list-style-none", func(e *colly.HTMLElement) {
		e.ForEach("li.d-inline .d-inline-flex", func(i int, el *colly.HTMLElement) {
			Scrape.Language = append(Scrape.Language, el.ChildText("span.text-bold")+" "+el.ChildText("span:contains('%')"))
		})
	})
	sc.OnHTML("div#repository-container-header", func(e *colly.HTMLElement) {
		Scrape.Parent = e.ChildText("span.text-small a")
	})
	sc.OnHTML("summary[title*='Switch branches or tags']", func(e *colly.HTMLElement) {
		Scrape.DefaultBranch = e.ChildText("span.css-truncate-target")
	})
	sc.OnHTML("div.js-details-container div.Details-content--hidden-not-important", func(e *colly.HTMLElement) {
		e.ForEach("div.js-navigation-item", func(i int, el *colly.HTMLElement) {
			var FileType string
			if el.ChildAttr("div.flex-shrink-0 svg", "aria-label") == "Directory" {
				FileType = "dir"
			} else {
				FileType = "file"
			}
			repoFilesArray = append(repoFilesArray, RepoFiles{
				Name:          el.ChildText("div.flex-auto span.d-block a.js-navigation-open"),
				Path:          el.ChildText("div.flex-auto span.d-block a.js-navigation-open"),
				Type:          FileType,
				Fullname:      Scrape.Fullname,
				DefaultBranch: Scrape.DefaultBranch,
			})
		})
	})
	sc.Visit("https://github.com/" + c.Params("user") + "/" + repoUrl + branchExists)
	// Add scrape-based info to repoArray
	repoArray = append(repoArray, Scrape)
	return c.Render("repo", fiber.Map{
		"title":  c.Params("user") + "/" + repoUrl + branchExists,
		"branch": utils.Branch,
		"repo":   repoArray,
		"files":  repoFilesArray,
		"readme": readmeOutput,
	})
}
Add repo page Signed-off-by: Odyssey <odyssey346@disroot.org> 2022-11-30 23:08:16 +09:00			`package pages`

			`import (`
The Instance Owner Update Signed-off-by: Odyssium <hi@odyssey346.dev> 2023-04-02 01:53:44 +09:00			`"log"`
			`"net/http"`
			`"os"`
			`"strings"`

GotHub→Gitlin、Codeberg→Gitler 2023-06-06 16:04:50 +09:00			`"gitler.moe/suwako/gitlin/utils"`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`"github.com/gocolly/colly"`
Add repo page Signed-off-by: Odyssey <odyssey346@disroot.org> 2022-11-30 23:08:16 +09:00			`"github.com/gofiber/fiber/v2"`
			`)`

			`type Repo struct {`
			`Fullname string`
			`Description string`
			`Parent string`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`Stars string`
			`Forks string`
add commitsbehind 2023-04-28 15:38:42 +09:00			`CommitsBehind string`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`Watchers string`
add languages used in repo 2023-02-15 00:21:44 +09:00			`Language []string`
Add repo page Signed-off-by: Odyssey <odyssey346@disroot.org> 2022-11-30 23:08:16 +09:00			`License string`
			`DefaultBranch string`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`Readme string`
add support for repo tags and links 2023-03-16 18:55:17 +09:00			`Link string`
			`Tags []string`
Add support for viewing different branches of a repo 2023-03-17 01:45:21 +09:00			`Branch []string`
Add repo page Signed-off-by: Odyssey <odyssey346@disroot.org> 2022-11-30 23:08:16 +09:00			`}`

			`type RepoFiles struct {`
Initial repo view. Signed-off-by: Odyssey <odyssey346@disroot.org> 2022-12-01 01:37:27 +09:00			`Name string`
			`Path string`
			`Type string`
			`Fullname string`
			`DefaultBranch string`
Add repo page Signed-off-by: Odyssey <odyssey346@disroot.org> 2022-11-30 23:08:16 +09:00			`}`

			`func HandleRepo(c *fiber.Ctx) error {`
			`var repoArray []Repo`
Initial repo view. Signed-off-by: Odyssey <odyssey346@disroot.org> 2022-12-01 01:37:27 +09:00			`var repoFilesArray []RepoFiles`
move to scraping readme for dirview and fileview as well 2023-04-27 21:57:44 +09:00			`var readmeOutput string`
Add support for viewing different branches of a repo 2023-03-17 01:45:21 +09:00			`branchExists := ""`
			`if strings.Count(c.Params("branch"), "")-1 > 0 {`
			`branchExists = "/tree/" + c.Params("branch")`
			`}`
Load even when repoUrl ends with .git 2023-03-19 19:19:21 +09:00			`repoUrl := strings.TrimSuffix(c.Params("repo"), ".git")`
			`resp, statusErr := http.Get("https://github.com/" + c.Params("user") + "/" + repoUrl + branchExists)`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`if statusErr != nil {`
			`log.Println(statusErr)`
			`}`
			`if resp.StatusCode == 404 {`
			`// I need a better way to do this`
Initial repo view. Signed-off-by: Odyssey <odyssey346@disroot.org> 2022-12-01 01:37:27 +09:00			`return c.Status(404).Render("error", fiber.Map{`
Add titles Signed-off-by: Odyssey <odyssey346@disroot.org> 2023-01-06 04:58:41 +09:00			`"title": "Error",`
Load even when repoUrl ends with .git 2023-03-19 19:19:21 +09:00			`"error": "Repository " + c.Params("user") + "/" + repoUrl + branchExists + " not found",`
Initial repo view. Signed-off-by: Odyssey <odyssey346@disroot.org> 2022-12-01 01:37:27 +09:00			`})`
			`}`

add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`// Scraping`
add languages used in repo 2023-02-15 00:21:44 +09:00			`Scrape := Repo{}`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00
GotHub→Gitlin、Codeberg→Gitler 2023-06-06 16:04:50 +09:00			`UserAgent, ok := os.LookupEnv("GITLIN_USER_AGENT")`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`if !ok {`
			`UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"`
			`}`
Initial repo view. Signed-off-by: Odyssey <odyssey346@disroot.org> 2022-12-01 01:37:27 +09:00
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`sc := colly.NewCollector(colly.AllowedDomains("github.com"), colly.UserAgent(UserAgent))`
			`sc.OnHTML("div.Layout-sidebar", func(e *colly.HTMLElement) {`
Load even when repoUrl ends with .git 2023-03-19 19:19:21 +09:00			`Scrape.Fullname = c.Params("user") + "/" + repoUrl`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`Scrape.Description = e.ChildText("p.f4")`
Load even when repoUrl ends with .git 2023-03-19 19:19:21 +09:00			`Scrape.Stars = e.ChildText("a[href*='/" + c.Params("user") + "/" + repoUrl + "/stargazers' i] strong")`
			`Scrape.Watchers = e.ChildText("a[href*='/" + c.Params("user") + "/" + repoUrl + "/watchers' i] strong")`
			`Scrape.Forks = e.ChildText("a[href*='/" + c.Params("user") + "/" + repoUrl + "/forks' i] strong")`
add support for repo tags and links 2023-03-16 18:55:17 +09:00			`Scrape.Link = e.ChildAttr("span.css-truncate a.text-bold", "href")`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`Scrape.License = e.ChildText("a[data-analytics-event*='{\"category\":\"Repository Overview\",\"action\":\"click\",\"label\":\"location:sidebar;file:license\"}']")`
add support for repo tags and links 2023-03-16 18:55:17 +09:00			`e.ForEach("a.topic-tag", func(i int, el *colly.HTMLElement) {`
			`Scrape.Tags = append(Scrape.Tags, strings.TrimPrefix(el.Attr("data-octo-dimensions"), "topic:"))`
			`})`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`})`
add commitsbehind 2023-04-28 15:38:42 +09:00			`sc.OnHTML("div.Box-body div.d-flex div span", func(e *colly.HTMLElement) {`
			`Scrape.CommitsBehind = strings.TrimSuffix(e.ChildText("a"), " commits behind")`
			`})`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`sc.OnHTML("div#readme", func(e *colly.HTMLElement) {`
fix Scrape.Readme 2023-03-22 03:20:39 +09:00			`Scrape.Readme = e.ChildText("a[href='#readme']")`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`})`
move to scraping readme for dirview and fileview as well 2023-04-27 21:57:44 +09:00			`sc.OnHTML("div#readme div.Box-body", func(e *colly.HTMLElement) {`
			`Content, _ := e.DOM.Html()`
add image support (with proxying) (closes #38) 2023-04-28 19:19:33 +09:00			`readmeOutput = strings.Replace(strings.Replace(strings.Replace(strings.Replace(string(utils.UGCPolicy().SanitizeBytes([]byte(Content))), "https://github.com", "", -1), "user-content-", "", -1), "https://camo.githubusercontent.com", "/camo", -1), "https://raw.githubusercontent.com", "/raw", -1)`
move to scraping readme for dirview and fileview as well 2023-04-27 21:57:44 +09:00			`})`
add languages used in repo 2023-02-15 00:21:44 +09:00			`sc.OnHTML("div.BorderGrid-cell ul.list-style-none", func(e *colly.HTMLElement) {`
			`e.ForEach("li.d-inline .d-inline-flex", func(i int, el *colly.HTMLElement) {`
			`Scrape.Language = append(Scrape.Language, el.ChildText("span.text-bold")+" "+el.ChildText("span:contains('%')"))`
			`})`
			`})`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`sc.OnHTML("div#repository-container-header", func(e *colly.HTMLElement) {`
			`Scrape.Parent = e.ChildText("span.text-small a")`
			`})`
			`sc.OnHTML("summary[title='Switch branches or tags']", func(e colly.HTMLElement) {`
			`Scrape.DefaultBranch = e.ChildText("span.css-truncate-target")`
			`})`
implement file retrive with scraping 2023-02-15 21:32:48 +09:00			`sc.OnHTML("div.js-details-container div.Details-content--hidden-not-important", func(e *colly.HTMLElement) {`
			`e.ForEach("div.js-navigation-item", func(i int, el *colly.HTMLElement) {`
			`var FileType string`
			`if el.ChildAttr("div.flex-shrink-0 svg", "aria-label") == "Directory" {`
			`FileType = "dir"`
			`} else {`
			`FileType = "file"`
			`}`
			`repoFilesArray = append(repoFilesArray, RepoFiles{`
			`Name: el.ChildText("div.flex-auto span.d-block a.js-navigation-open"),`
			`Path: el.ChildText("div.flex-auto span.d-block a.js-navigation-open"),`
			`Type: FileType,`
			`Fullname: Scrape.Fullname,`
			`DefaultBranch: Scrape.DefaultBranch,`
			`})`
			`})`
			`})`
Load even when repoUrl ends with .git 2023-03-19 19:19:21 +09:00			`sc.Visit("https://github.com/" + c.Params("user") + "/" + repoUrl + branchExists)`
add basic metadata scraping for repos 2023-02-14 23:54:33 +09:00			`// Add scrape-based info to repoArray`
			`repoArray = append(repoArray, Scrape)`
Add repo page Signed-off-by: Odyssey <odyssey346@disroot.org> 2022-11-30 23:08:16 +09:00			`return c.Render("repo", fiber.Map{`
The Instance Owner Update Signed-off-by: Odyssium <hi@odyssey346.dev> 2023-04-02 01:53:44 +09:00			`"title": c.Params("user") + "/" + repoUrl + branchExists,`
specify branch using ldflag (closes #63) 2023-04-29 02:08:04 +09:00			`"branch": utils.Branch,`
Initial repo view. Signed-off-by: Odyssey <odyssey346@disroot.org> 2022-12-01 01:37:27 +09:00			`"repo": repoArray,`
			`"files": repoFilesArray,`
move to scraping readme for dirview and fileview as well 2023-04-27 21:57:44 +09:00			`"readme": readmeOutput,`
Add repo page Signed-off-by: Odyssey <odyssey346@disroot.org> 2022-11-30 23:08:16 +09:00			`})`
			`}`