From 924a41ddcaa09e88d01caf2b018208bbd4da9745 Mon Sep 17 00:00:00 2001 From: Arya Kiran Date: Mon, 13 Feb 2023 19:29:44 +0530 Subject: [PATCH] add support for organizations --- pages/user.go | 34 +++++++++++++++++++++++++++------- utils/ugcpolicy.go | 5 +++-- views/user.html | 7 +++++-- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/pages/user.go b/pages/user.go index a59ef7a..86a6b18 100644 --- a/pages/user.go +++ b/pages/user.go @@ -23,12 +23,13 @@ type User struct { StatusEmoji string AvatarUrl string Location string + Email string Timezone string Following int64 Followers int64 Link string Social []string - Organization []string + Organizations []string Company string Type string Contributions string @@ -115,7 +116,6 @@ func HandleUser(c *fiber.Ctx) error { // scraping Scrape := User{ - Link: link, Readme: string(readmeOutput), } @@ -131,7 +131,6 @@ func HandleUser(c *fiber.Ctx) error { sc1.Visit("https://github.com/" + c.Params("user") + "/") sc := colly.NewCollector(colly.AllowedDomains("github.com"), colly.UserAgent(UserAgent)) - if Scrape.Type == "http://schema.org/Person" { // Bio sc.OnHTML("div[data-bio-text]", func(e *colly.HTMLElement) { @@ -149,7 +148,7 @@ func HandleUser(c *fiber.Ctx) error { e.ForEach("li[itemprop*='social']", func(i int, el *colly.HTMLElement) { Scrape.Social = append(Scrape.Social, el.ChildText("a.Link--primary")) }) - + Scrape.Link = e.ChildText("a[itemprop*='url']") }) // Followers/Following sc.OnHTML("a[href*='https://github.com/"+c.Params("user")+"?tab=followers' i]", func(e *colly.HTMLElement) { @@ -178,21 +177,42 @@ func HandleUser(c *fiber.Ctx) error { // Organizations sc.OnHTML("div.mt-3", func(e *colly.HTMLElement) { e.ForEach("a[data-hovercard-type*='organization']", func(i int, el *colly.HTMLElement) { - Scrape.Organization = append(Scrape.Organization, el.Attr("aria-label")) + Scrape.Organizations = append(Scrape.Organizations, el.Attr("aria-label")) }) }) } else { + sc.OnHTML("div.container-xl div.flex-md-items-center div.flex-1", func(e *colly.HTMLElement) { + Scrape.Bio = e.ChildText("div.color-fg-muted div") + Scrape.Followers, err = strconv.ParseInt(e.ChildText("a[href*='/orgs/"+c.Params("user")+"/followers' i] span"), 10, 64) + Scrape.Name = e.ChildText("h1.h2") + Scrape.Location = e.ChildText("span[itemprop*='location']") + Scrape.Link = e.ChildText("a[itemprop*='url']") + Scrape.Email = e.ChildText("a[itemprop*='email']") + e.ForEach("a.Link--primary", func(i int, el *colly.HTMLElement) { + Scrape.Social = append(Scrape.Social, el.Attr("href")) + }) + }) sc.OnHTML("img[alt*='@"+c.Params("user")+"' i]", func(e *colly.HTMLElement) { Scrape.AvatarUrl = e.Attr("src") + Scrape.Login = e.Attr("alt") }) - log.Println("Bio and Location routes cannot be scraped for organizations") } sc.Visit("https://github.com/" + c.Params("user") + "/") // Fixing the output a bit Scrape.AvatarUrl = strings.TrimPrefix(Scrape.AvatarUrl, "https://avatars.githubusercontent.com/u/") Scrape.AvatarUrl = "/avatar/" + Scrape.AvatarUrl - Scrape.StatusEmoji = emoji.Parse(":"+Scrape.StatusEmoji+":") + if Scrape.StatusEmoji != "" { + Scrape.StatusEmoji = emoji.Parse(":" + Scrape.StatusEmoji + ":") + } + Scrape.Login = strings.TrimPrefix(Scrape.Login, "@") // Only for orgs + if strings.HasPrefix(Scrape.Link, "https://") { + Scrape.Link = strings.TrimPrefix(Scrape.Link, "https://") + } else if strings.HasPrefix(Scrape.Link, "http://") { + Scrape.Link = strings.TrimPrefix(Scrape.Link, "http://") + } else { + log.Println("Has no prefix") + } // Add scrape-based info to userArray userArray = append(userArray, Scrape) diff --git a/utils/ugcpolicy.go b/utils/ugcpolicy.go index 665aa41..ffcb4a8 100644 --- a/utils/ugcpolicy.go +++ b/utils/ugcpolicy.go @@ -1,8 +1,10 @@ package utils + import ( - "regexp" "github.com/microcosm-cc/bluemonday" + "regexp" ) + // copied from bluemonday's GitHub repostiory, with some adaptations func UGCPolicy() *bluemonday.Policy { @@ -194,4 +196,3 @@ func UGCPolicy() *bluemonday.Policy { return p } - diff --git a/views/user.html b/views/user.html index 8a5eabd..6d052f2 100644 --- a/views/user.html +++ b/views/user.html @@ -29,14 +29,17 @@ {{ if .Link }}

🌐 {{.Link}}

{{ end }} + {{ if .Email }} +

✉️ {{.Email}}

+ {{ end }} {{ if .Social }} {{range .Social}}

🔗 {{.}}

{{ end }} {{ end }} - {{ if .Organization }} + {{ if .Organizations }}

Organizations: - {{range .Organization}} + {{range .Organizations}} {{.}} {{ end }}