add basic metadata scraping for repos

このコミットが含まれているのは:
Arya Kiran 2023-02-14 20:24:33 +05:30
コミット 8aef883056
この署名に対応する既知のキーがデータベースに存在しません
GPGキーID: 842D12BDA50DF120
3個のファイルの変更68行の追加43行の削除

ファイルの表示

@ -1,27 +1,28 @@
package pages
import (
"context"
"log"
"codeberg.org/gothub/gothub/utils"
"context"
"github.com/carlmjohnson/requests"
"github.com/gocolly/colly"
"github.com/gofiber/fiber/v2"
"github.com/gomarkdown/markdown"
"log"
"net/http"
"os"
)
type Repo struct {
Fullname string
Description string
HtmlUrl string
Fork bool
Parent string
Stars int64
Forks int64
Watchers int64
Stars string
Forks string
Watchers string
Language string
License string
DefaultBranch string
Readme string
}
type RepoFiles struct {
@ -35,14 +36,20 @@ type RepoFiles struct {
func HandleRepo(c *fiber.Ctx) error {
var repoArray []Repo
var repoFilesArray []RepoFiles
// get repo
repo := utils.GetRequest("https://api.github.com/repos/" + c.Params("user") + "/" + c.Params("repo"))
if repo.Get("message").String() == "Not Found" {
resp, statusErr := http.Get("https://github.com/" + c.Params("user") + "/" + c.Params("repo"))
if statusErr != nil {
log.Println(statusErr)
}
if resp.StatusCode == 404 {
// I need a better way to do this
return c.Status(404).Render("error", fiber.Map{
"title": "Error",
"error": "Repository " + c.Params("user") + "/" + c.Params("repo") + " not found",
})
}
// API
repo := utils.GetRequest("https://api.github.com/repos/" + c.Params("user") + "/" + c.Params("repo"))
repoFiles := utils.GetRequest("https://api.github.com/repos/" + c.Params("user") + "/" + c.Params("repo") + "/contents")
bruh := repoFiles.Get("#.@pretty").Array()
for _, item := range bruh {
@ -55,36 +62,54 @@ func HandleRepo(c *fiber.Ctx) error {
})
}
var readmee string
// Scraping
Scrape := Repo{
Language: repo.Get("language").String(),
}
UserAgent, ok := os.LookupEnv("GOTHUB_USER_AGENT")
if !ok {
UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
}
sc := colly.NewCollector(colly.AllowedDomains("github.com"), colly.UserAgent(UserAgent))
sc.OnHTML("div.Layout-sidebar", func(e *colly.HTMLElement) {
Scrape.Fullname = c.Params("user") + "/" + c.Params("repo")
Scrape.Description = e.ChildText("p.f4")
Scrape.Stars = e.ChildText("a[href*='/" + c.Params("user") + "/" + c.Params("repo") + "/stargazers' i] strong")
Scrape.Watchers = e.ChildText("a[href*='/" + c.Params("user") + "/" + c.Params("repo") + "/watchers' i] strong")
Scrape.Forks = e.ChildText("a[href*='/" + c.Params("user") + "/" + c.Params("repo") + "/network/members' i] strong")
Scrape.License = e.ChildText("a[data-analytics-event*='{\"category\":\"Repository Overview\",\"action\":\"click\",\"label\":\"location:sidebar;file:license\"}']")
})
sc.OnHTML("div#readme", func(e *colly.HTMLElement) {
Scrape.Readme = e.ChildText("a[href*='#readme']")
})
sc.OnHTML("div#repository-container-header", func(e *colly.HTMLElement) {
Scrape.Parent = e.ChildText("span.text-small a")
})
sc.OnHTML("summary[title*='Switch branches or tags']", func(e *colly.HTMLElement) {
Scrape.DefaultBranch = e.ChildText("span.css-truncate-target")
})
sc.Visit("https://github.com/" + c.Params("user") + "/" + c.Params("repo") + "/")
// Add scrape-based info to repoArray
repoArray = append(repoArray, Scrape)
// README
var readmee string
err := requests.
URL("https://raw.githubusercontent.com/" + c.Params("user") + "/" + c.Params("repo") + "/" + repo.Get("default_branch").String() + "/README.md").
URL("https://raw.githubusercontent.com/" + c.Params("user") + "/" + c.Params("repo") + "/" + Scrape.DefaultBranch + "/" + Scrape.Readme).
ToString(&readmee).
Fetch(context.Background())
if err != nil {
readmee = ""
log.Println(err)
}
mightBeUnsafe := markdown.ToHTML([]byte(readmee), nil, nil)
// Trust Nobody
readmeOutput := utils.UGCPolicy().SanitizeBytes(mightBeUnsafe)
repoArray = append(repoArray, Repo{
Fullname: repo.Get("full_name").String(),
Description: repo.Get("description").String(),
HtmlUrl: repo.Get("html_url").String(),
Fork: repo.Get("fork").Bool(),
Stars: repo.Get("stargazers_count").Int(),
Forks: repo.Get("forks_count").Int(),
Watchers: repo.Get("watchers_count").Int(),
Language: repo.Get("language").String(),
License: repo.Get("license").Get("name").String(),
Parent: repo.Get("parent").Get("full_name").String(),
DefaultBranch: repo.Get("default_branch").String(),
})
return c.Render("repo", fiber.Map{
"title": "Repository " + c.Params("user") + "/" + c.Params("repo"),
"repo": repoArray,

ファイルの表示

@ -5,7 +5,6 @@ import (
"log"
"net/http"
"os"
"strconv"
"strings"
"codeberg.org/gothub/gothub/utils"
@ -26,8 +25,8 @@ type User struct {
Location string
Email string
Timezone string
Following int64
Followers int64
Following string
Followers string
Link string
Social []string
Organizations []string
@ -108,8 +107,8 @@ func HandleUser(c *fiber.Ctx) error {
Scrape.Social = append(Scrape.Social, el.ChildText("a.Link--primary"))
})
// Followers/Following
Scrape.Followers, err = strconv.ParseInt(e.ChildText("a[href*='https://github.com/"+c.Params("user")+"?tab=followers' i] span"), 10, 64)
Scrape.Following, err = strconv.ParseInt(e.ChildText("a[href*='https://github.com/"+c.Params("user")+"?tab=following' i] span"), 10, 64)
Scrape.Followers = e.ChildText("a[href*='https://github.com/" + c.Params("user") + "?tab=followers' i] span")
Scrape.Following = e.ChildText("a[href*='https://github.com/" + c.Params("user") + "?tab=following' i] span")
// Organizations
e.ForEach("a[data-hovercard-type*='organization']", func(i int, el *colly.HTMLElement) {
Scrape.Organizations = append(Scrape.Organizations, el.Attr("aria-label"))
@ -135,7 +134,7 @@ func HandleUser(c *fiber.Ctx) error {
Scrape.Social = append(Scrape.Social, el.Attr("href"))
})
// Followers
Scrape.Followers, err = strconv.ParseInt(e.ChildText("a[href*='/orgs/"+c.Params("user")+"/followers' i] span"), 10, 64)
Scrape.Followers = e.ChildText("a[href*='/orgs/" + c.Params("user") + "/followers' i] span")
})
sc.OnHTML("img[alt*='@"+c.Params("user")+"' i]", func(e *colly.HTMLElement) {
Scrape.AvatarUrl = e.Attr("src")

ファイルの表示

@ -10,19 +10,16 @@
<div class="userProfile">
<h1>{{.Fullname}}</h1>
{{ if .Fork }}
{{ if .Parent }}
<p>This repository is a fork of <a href="/{{.Parent}}">{{.Parent}}</a>.</p>
{{ end }}
{{ if .Description }}
<p>{{.Description}}</p>
{{ end }}
{{ if .Language}}
{{ if .License }}
<p>⭐ {{.Stars}} 🍴 {{.Forks}} 👀 {{.Watchers}} ⚖️ {{.License}} 🗒️ {{.Language}}</p>
{{ else }}
<p>⭐ {{.Stars}} 🍴 {{.Forks}} 👀 {{.Watchers}} ⚖️ No license 🗒️ {{.Language}}</p>
{{ end }}
<p>⭐ {{.Stars}} 🍴 {{.Forks}} 👀 {{.Watchers}} ⚖️ {{.License}} {{ if .Language }} 🗒️ {{.Language}} {{end}} 🌿 {{.DefaultBranch}}</p>
{{ else }}
<p>⭐ {{.Stars}} 🍴 {{.Forks}} 👀 {{.Watchers}} ⚖️ No license {{ if .Language }} 🗒️ {{.Language}} {{end}} 🌿 {{.DefaultBranch}}</p>
{{ end }}
</div>
{{end}}
@ -44,7 +41,11 @@
{{ end }}
{{ if .readme}}
<div class="userReadme">
<h3>README.md</h3>
{{ if .repo }}
{{ range $key, $value := .repo}}
<h3>{{.Readme}}</h3>
{{end}}
{{end}}
<div class="userReadmeText">
{{ unescape .readme}}
</div>