package pages import ( "os" "strings" "gitler.moe/suwako/gitlin/utils" "github.com/enescakir/emoji" "github.com/gocolly/colly" "github.com/gofiber/fiber/v2" ) type User struct { Login string Name string Bio string Status string StatusEmoji string AvatarUrl string Location string Email string Timezone string Following string Followers string Link string Social []string Organizations []string OrgMembers []string Company string Type string Contributions string Readme string ReadmeUrl string MainRepos []RepoList PinOrPopular string } type RepoList struct { Name string Type string Link string Desc string Lang string Stars string Forks string ForkOf string } // HandleUser handles the user page. func HandleUser(c *fiber.Ctx) error { // Declare Array used for displaying data var userArray []User // Scraping Scrape := User{} UserAgent, ok := os.LookupEnv("GITLIN_USER_AGENT") if !ok { UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36" } sc1 := colly.NewCollector(colly.AllowedDomains("github.com"), colly.UserAgent(UserAgent)) sc1.OnHTML("div[itemscope]", func(e *colly.HTMLElement) { Scrape.Type = e.Attr("itemtype") }) sc1.Visit("https://github.com/" + c.Params("user") + "/") sc := colly.NewCollector(colly.AllowedDomains("github.com"), colly.UserAgent(UserAgent)) if Scrape.Type == "http://schema.org/Person" { sc.OnHTML("div.js-profile-editable-replace", func(e *colly.HTMLElement) { // Main info Scrape.Login = e.ChildText("span[itemprop*='additionalName']") Scrape.Name = e.ChildText("span[itemprop*='name']") Scrape.Bio = e.ChildText("div[data-bio-text] div") Scrape.AvatarUrl = e.ChildAttr("img[alt*='Avatar']", "src") // Metadata Scrape.Location = e.ChildText("li[itemprop*='homeLocation'] span") Scrape.Timezone = e.ChildText("li[itemprop*='localTime'] span") Scrape.Company = e.ChildText("li[itemprop*='worksFor'] span") Scrape.Link = e.ChildText("li[itemprop*='url'] a") e.ForEach("li[itemprop*='social']", func(i int, el *colly.HTMLElement) { Scrape.Social = append(Scrape.Social, el.ChildAttr("a.Link--primary", "href")) }) // Followers/Following Scrape.Followers = e.ChildText("a[href*='https://github.com/" + c.Params("user") + "?tab=followers' i] span") Scrape.Following = e.ChildText("a[href*='https://github.com/" + c.Params("user") + "?tab=following' i] span") // Organizations e.ForEach("a[data-hovercard-type*='organization']", func(i int, el *colly.HTMLElement) { Scrape.Organizations = append(Scrape.Organizations, el.Attr("aria-label")) }) // User Status Scrape.Status = e.ChildText("div.user-status-circle-badge div.user-status-message-wrapper div") Scrape.StatusEmoji = e.ChildAttr("div.user-status-circle-badge div.user-status-emoji-container g-emoji", "alias") }) // Contributions sc.OnHTML("div.js-yearly-contributions", func(e *colly.HTMLElement) { Scrape.Contributions = e.ChildText("h2") }) } else if Scrape.Type == "http://schema.org/Code" { sc.OnHTML("div.container-xl div.flex-md-items-center div.flex-1", func(e *colly.HTMLElement) { // Main info Scrape.Name = e.ChildText("h1.h2") Scrape.Bio = e.ChildText("div.color-fg-muted div") // Metadata Scrape.Location = e.ChildText("span[itemprop*='location']") Scrape.Email = e.ChildText("a[itemprop*='email']") Scrape.Link = e.ChildText("a[itemprop*='url']") e.ForEach("a.Link--primary", func(i int, el *colly.HTMLElement) { Scrape.Social = append(Scrape.Social, el.Attr("href")) }) // Followers Scrape.Followers = e.ChildText("a[href*='/orgs/" + c.Params("user") + "/followers' i] span") }) sc.OnHTML("img[alt*='@"+c.Params("user")+"' i]", func(e *colly.HTMLElement) { Scrape.AvatarUrl = e.Attr("src") Scrape.Login = e.Attr("alt") }) // Org Members sc.OnHTML("div.clearfix", func(e *colly.HTMLElement) { e.ForEach("a[data-hovercard-type*='user'] img", func(i int, el *colly.HTMLElement) { Scrape.OrgMembers = append(Scrape.OrgMembers, strings.TrimPrefix(el.Attr("alt"), "@")) }) }) } else { return c.Status(404).Render("error", fiber.Map{ "error": "User " + c.Params("user") + " not found", }) } sc.OnHTML("div.js-pinned-items-reorder-container", func(e *colly.HTMLElement) { e.ForEach("div.pinned-item-list-item-content", func(i int, el *colly.HTMLElement) { var MainRepo RepoList MainRepo = RepoList{} // Clear data if old data is present MainRepo.Name = strings.TrimPrefix(el.ChildAttr("div.width-full a", "href"), "/") MainRepo.Link = el.ChildAttr("div.width-full a", "href") if strings.Contains(MainRepo.Name, "https://gist.github.com/") { MainRepo.Name = el.ChildAttr("div.width-full a span", "title") MainRepo.Link = "/gist" + strings.TrimPrefix(el.ChildAttr("div.width-full a", "href"), "https://gist.github.com") } if strings.Contains(MainRepo.Link, "/gist") { MainRepo.Type = "Gist" } else { MainRepo.Type = "Repository" } MainRepo.Desc = el.ChildText("p.pinned-item-desc") if MainRepo.Type == "Gist" { MainRepo.Desc = "" el.ForEach("div.rounded-bottom-2 div.flex-items-center", func(in int, ele *colly.HTMLElement) { MainRepo.Desc = MainRepo.Desc + "\n" + ele.ChildText("pre") }) } MainRepo.Lang = el.ChildText("p.color-fg-muted span[itemprop*='programmingLanguage']") MainRepo.Stars = el.ChildText("p.color-fg-muted a[href*='/stargazers' i]") MainRepo.Forks = el.ChildText("p.color-fg-muted a[href*='/forks' i]") MainRepo.ForkOf = el.ChildText("p.text-small a.Link--muted") Scrape.MainRepos = append(Scrape.MainRepos, MainRepo) }) Scrape.PinOrPopular = strings.TrimSuffix(e.ChildText("h2.text-normal"), " repositories") }) sc.OnHTML("article.markdown-body", func(e *colly.HTMLElement) { Content, _ := e.DOM.Html() Scrape.Readme = strings.Replace(strings.Replace(strings.Replace(strings.Replace(string(utils.UGCPolicy().SanitizeBytes([]byte(Content))), "https://github.com", "", -1), "user-content-", "", -1), "https://camo.githubusercontent.com", "/camo", -1), "https://raw.githubusercontent.com", "/raw", -1) }) sc.OnHTML("div.text-mono", func(e *colly.HTMLElement) { Scrape.ReadmeUrl = strings.Replace(e.ChildAttr("a", "href"), "tree", "blob", -1) }) sc.Visit("https://github.com/" + c.Params("user") + "/") // Fixing the output a bit Scrape.AvatarUrl = strings.TrimPrefix(Scrape.AvatarUrl, "https://avatars.githubusercontent.com/u/") Scrape.AvatarUrl = "/avatar/" + Scrape.AvatarUrl // Avatar needs to be in /avatar so its proxied if Scrape.StatusEmoji != "" { Scrape.StatusEmoji = emoji.Parse(":" + Scrape.StatusEmoji + ":") // Convert the emoji code to an actual emoji } Scrape.Login = strings.TrimPrefix(Scrape.Login, "@") // Only for orgs // Remove HTTP(s) from user website url if it exists if strings.HasPrefix(Scrape.Link, "https://") { Scrape.Link = strings.TrimPrefix(Scrape.Link, "https://") } else if strings.HasPrefix(Scrape.Link, "http://") { Scrape.Link = strings.TrimPrefix(Scrape.Link, "http://") } // Add scrape-based info to userArray userArray = append(userArray, Scrape) return c.Render("user", fiber.Map{ "title": c.Params("user"), "branch": utils.Branch, "user": userArray, }) }