From 4e3bb59c41d47c0fb51a8bf1b004cf848814ab1d Mon Sep 17 00:00:00 2001 From: Arya Kiran Date: Wed, 8 Feb 2023 15:56:40 +0530 Subject: [PATCH] make the user page mostly work when api is disabled --- pages/user.go | 175 +++++++++++++++++++++++------------------------- views/user.html | 4 +- 2 files changed, 87 insertions(+), 92 deletions(-) diff --git a/pages/user.go b/pages/user.go index 00cf947..e45754f 100644 --- a/pages/user.go +++ b/pages/user.go @@ -2,7 +2,6 @@ package pages import ( "codeberg.org/gothub/gothub/utils" - "fmt" "github.com/carlmjohnson/requests" "github.com/gocolly/colly" "github.com/gofiber/fiber/v2" @@ -38,6 +37,10 @@ type Ratelimit struct { // HandleUser handles the user page. func HandleUser(c *fiber.Ctx) error { + // Declare Array used for displaying data + var userArray []User + + // API user := utils.GetRequest("https://api.github.com/users/" + c.Params("user")) if user.Get("message").String() == "Not Found" { return c.Status(404).Render("error", fiber.Map{ @@ -46,7 +49,7 @@ func HandleUser(c *fiber.Ctx) error { } if strings.Contains(user.Get("message").String(), "rate limit") { // dont wanna get the status code so i'll just do this instead 👍 ratelimitJSON := utils.GetRequest("https://api.github.com/rate_limit") - fmt.Println(ratelimitJSON) + log.Println(ratelimitJSON) var ratelimitArray []Ratelimit ratelimitArray = append(ratelimitArray, Ratelimit{ @@ -54,36 +57,13 @@ func HandleUser(c *fiber.Ctx) error { Limit: ratelimitJSON.Get("resources.core.limit").Int(), }) - fmt.Println(ratelimitArray) + log.Println(ratelimitArray) return c.Render("ratelimit", fiber.Map{ "Title": "GitHub API /users endpoint rate limit exceeded", "ratelimit": ratelimitArray, }) } else { - var userArray []User - - var readmee string - - err := requests. - URL("https://raw.githubusercontent.com/" + c.Params("user") + "/" + c.Params("user") + "/master/README.md"). - ToString(&readmee). - Fetch(context.Background()) - if err != nil { - err2 := requests. - URL("https://raw.githubusercontent.com/" + c.Params("user") + "/.github/master/profile/README.md"). - ToString(&readmee). - Fetch(context.Background()) - if err2 != nil { - readmee = "" - log.Println(err) - } - } - - mightBeUnsafe := markdown.ToHTML([]byte(readmee), nil, nil) - - // Trust Nobody - readmeOutput := UGCPolicy().SanitizeBytes(mightBeUnsafe) var link string @@ -99,71 +79,86 @@ func HandleUser(c *fiber.Ctx) error { log.Println("Has no prefix") } } - // scraping - var OrgOrUser string - - Scrape := User{ - Link: link, - Type: user.Get("type").String(), - EwTwitter: user.Get("twitter_username").String(), - Readme: string(readmeOutput), - } - - sc1 := colly.NewCollector(colly.AllowedDomains("github.com")) - - sc1.OnHTML("div[itemtype]", func(e *colly.HTMLElement) { - OrgOrUser = e.Attr("itemtype") - }) - - sc1.Visit("https://github.com/" + c.Params("user") + "/") - sc := colly.NewCollector( - colly.AllowedDomains("github.com"), - ) - - if OrgOrUser == "http://schema.org/Person" { - // Bio - sc.OnHTML("div[data-bio-text]", func(e *colly.HTMLElement) { - Scrape.Bio = e.Attr("data-bio-text") - }) - // Avatar - sc.OnHTML("img[alt*=Avatar]", func(e *colly.HTMLElement) { - Scrape.AvatarUrl = e.Attr("src") - }) - // Metadata (Location/Workplace/Website/Twitter etc.) - sc.OnHTML("ul.vcard-details", func(e *colly.HTMLElement) { - Scrape.Location = e.ChildText("li[itemprop*='homeLocation'] span") - Scrape.Company = e.ChildText("li[itemprop*='worksFor'] span") - }) - // Followers/Following - sc.OnHTML("a[href*='https://github.com/"+c.Params("user")+"?tab=followers' i]", func(e *colly.HTMLElement) { - Scrape.Followers, err = strconv.ParseInt(e.ChildText("span"), 10, 64) - }) - sc.OnHTML("a[href*='https://github.com/"+c.Params("user")+"?tab=following' i]", func(e *colly.HTMLElement) { - Scrape.Following, err = strconv.ParseInt(e.ChildText("span"), 10, 64) - }) - // User/Full Name - sc.OnHTML("h1.vcard-names", func(e *colly.HTMLElement) { - Scrape.Login = e.ChildText("span[itemprop*='additionalName']") - Scrape.Name = e.ChildText("span[itemprop*='name']") - }) - - } else { - sc.OnHTML("img[alt*='@"+c.Params("user")+"' i]", func(e *colly.HTMLElement) { - Scrape.AvatarUrl = e.Attr("src") - }) - log.Println("Bio and Location routes cannot be scraped for organizations") - } - sc.Visit("https://github.com/" + c.Params("user") + "/") - userArray = append(userArray, Scrape) - - fmt.Println(userArray) - - - return c.Render("user", fiber.Map{ - "Title": "User " + c.Params("user"), - "user": userArray, - }) } + // User README + var readmee string + + err := requests. + URL("https://raw.githubusercontent.com/" + c.Params("user") + "/" + c.Params("user") + "/master/README.md"). + ToString(&readmee). + Fetch(context.Background()) + if err != nil { + err2 := requests. + URL("https://raw.githubusercontent.com/" + c.Params("user") + "/.github/master/profile/README.md"). + ToString(&readmee). + Fetch(context.Background()) + if err2 != nil { + readmee = "" + log.Println(err) + } + } + mightBeUnsafe := markdown.ToHTML([]byte(readmee), nil, nil) + // Sanitize the user README incase there is any weird shit in it + readmeOutput := UGCPolicy().SanitizeBytes(mightBeUnsafe) + + // scraping + Scrape := User{ + //Link: link, + EwTwitter: user.Get("twitter_username").String(), + Readme: string(readmeOutput), + } + + sc1 := colly.NewCollector(colly.AllowedDomains("github.com")) + + sc1.OnHTML("div[itemtype]", func(e *colly.HTMLElement) { + Scrape.Type = e.Attr("itemtype") + }) + + sc1.Visit("https://github.com/" + c.Params("user") + "/") + sc := colly.NewCollector( + colly.AllowedDomains("github.com"), + ) + + if Scrape.Type == "http://schema.org/Person" { + // Bio + sc.OnHTML("div[data-bio-text]", func(e *colly.HTMLElement) { + Scrape.Bio = e.Attr("data-bio-text") + }) + // Avatar + sc.OnHTML("img[alt*=Avatar]", func(e *colly.HTMLElement) { + Scrape.AvatarUrl = e.Attr("src") + }) + // Metadata (Location/Workplace/Website/Twitter etc.) + sc.OnHTML("ul.vcard-details", func(e *colly.HTMLElement) { + Scrape.Location = e.ChildText("li[itemprop*='homeLocation'] span") + Scrape.Company = e.ChildText("li[itemprop*='worksFor'] span") + }) + // Followers/Following + sc.OnHTML("a[href*='https://github.com/"+c.Params("user")+"?tab=followers' i]", func(e *colly.HTMLElement) { + Scrape.Followers, err = strconv.ParseInt(e.ChildText("span"), 10, 64) + }) + sc.OnHTML("a[href*='https://github.com/"+c.Params("user")+"?tab=following' i]", func(e *colly.HTMLElement) { + Scrape.Following, err = strconv.ParseInt(e.ChildText("span"), 10, 64) + }) + // User/Full Name + sc.OnHTML("h1.vcard-names", func(e *colly.HTMLElement) { + Scrape.Login = e.ChildText("span[itemprop*='additionalName']") + Scrape.Name = e.ChildText("span[itemprop*='name']") + }) + + } else { + sc.OnHTML("img[alt*='@"+c.Params("user")+"' i]", func(e *colly.HTMLElement) { + Scrape.AvatarUrl = e.Attr("src") + }) + log.Println("Bio and Location routes cannot be scraped for organizations") + } + sc.Visit("https://github.com/" + c.Params("user") + "/") + userArray = append(userArray, Scrape) + + return c.Render("user", fiber.Map{ + "Title": "User " + c.Params("user"), + "user": userArray, + }) } // copied from bluemonday's GitHub repostiory, with some adaptations diff --git a/views/user.html b/views/user.html index 127751f..ddcb207 100644 --- a/views/user.html +++ b/views/user.html @@ -12,7 +12,7 @@

{{.Name}}

{{ end }}

{{.Login}}

- {{ if eq .Type "User" }} + {{ if eq .Type "http://schema.org/Person" }}

{{.Followers}} followers - {{.Following}} following

{{ else }}

{{.Followers}} followers

@@ -48,4 +48,4 @@

User not found

That user doesn't exist.

{{ end }} - \ No newline at end of file +