add basic metadata scraping for repos
このコミットが含まれているのは:
コミット
8aef883056
|
@ -1,27 +1,28 @@
|
||||||
package pages
|
package pages
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"log"
|
|
||||||
|
|
||||||
"codeberg.org/gothub/gothub/utils"
|
"codeberg.org/gothub/gothub/utils"
|
||||||
|
"context"
|
||||||
"github.com/carlmjohnson/requests"
|
"github.com/carlmjohnson/requests"
|
||||||
|
"github.com/gocolly/colly"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/gomarkdown/markdown"
|
"github.com/gomarkdown/markdown"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Repo struct {
|
type Repo struct {
|
||||||
Fullname string
|
Fullname string
|
||||||
Description string
|
Description string
|
||||||
HtmlUrl string
|
|
||||||
Fork bool
|
|
||||||
Parent string
|
Parent string
|
||||||
Stars int64
|
Stars string
|
||||||
Forks int64
|
Forks string
|
||||||
Watchers int64
|
Watchers string
|
||||||
Language string
|
Language string
|
||||||
License string
|
License string
|
||||||
DefaultBranch string
|
DefaultBranch string
|
||||||
|
Readme string
|
||||||
}
|
}
|
||||||
|
|
||||||
type RepoFiles struct {
|
type RepoFiles struct {
|
||||||
|
@ -35,14 +36,20 @@ type RepoFiles struct {
|
||||||
func HandleRepo(c *fiber.Ctx) error {
|
func HandleRepo(c *fiber.Ctx) error {
|
||||||
var repoArray []Repo
|
var repoArray []Repo
|
||||||
var repoFilesArray []RepoFiles
|
var repoFilesArray []RepoFiles
|
||||||
// get repo
|
|
||||||
repo := utils.GetRequest("https://api.github.com/repos/" + c.Params("user") + "/" + c.Params("repo"))
|
resp, statusErr := http.Get("https://github.com/" + c.Params("user") + "/" + c.Params("repo"))
|
||||||
if repo.Get("message").String() == "Not Found" {
|
if statusErr != nil {
|
||||||
|
log.Println(statusErr)
|
||||||
|
}
|
||||||
|
if resp.StatusCode == 404 {
|
||||||
|
// I need a better way to do this
|
||||||
return c.Status(404).Render("error", fiber.Map{
|
return c.Status(404).Render("error", fiber.Map{
|
||||||
"title": "Error",
|
"title": "Error",
|
||||||
"error": "Repository " + c.Params("user") + "/" + c.Params("repo") + " not found",
|
"error": "Repository " + c.Params("user") + "/" + c.Params("repo") + " not found",
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
// API
|
||||||
|
repo := utils.GetRequest("https://api.github.com/repos/" + c.Params("user") + "/" + c.Params("repo"))
|
||||||
repoFiles := utils.GetRequest("https://api.github.com/repos/" + c.Params("user") + "/" + c.Params("repo") + "/contents")
|
repoFiles := utils.GetRequest("https://api.github.com/repos/" + c.Params("user") + "/" + c.Params("repo") + "/contents")
|
||||||
bruh := repoFiles.Get("#.@pretty").Array()
|
bruh := repoFiles.Get("#.@pretty").Array()
|
||||||
for _, item := range bruh {
|
for _, item := range bruh {
|
||||||
|
@ -55,36 +62,54 @@ func HandleRepo(c *fiber.Ctx) error {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
var readmee string
|
// Scraping
|
||||||
|
Scrape := Repo{
|
||||||
|
Language: repo.Get("language").String(),
|
||||||
|
}
|
||||||
|
|
||||||
|
UserAgent, ok := os.LookupEnv("GOTHUB_USER_AGENT")
|
||||||
|
if !ok {
|
||||||
|
UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
|
||||||
|
}
|
||||||
|
|
||||||
|
sc := colly.NewCollector(colly.AllowedDomains("github.com"), colly.UserAgent(UserAgent))
|
||||||
|
sc.OnHTML("div.Layout-sidebar", func(e *colly.HTMLElement) {
|
||||||
|
Scrape.Fullname = c.Params("user") + "/" + c.Params("repo")
|
||||||
|
Scrape.Description = e.ChildText("p.f4")
|
||||||
|
Scrape.Stars = e.ChildText("a[href*='/" + c.Params("user") + "/" + c.Params("repo") + "/stargazers' i] strong")
|
||||||
|
Scrape.Watchers = e.ChildText("a[href*='/" + c.Params("user") + "/" + c.Params("repo") + "/watchers' i] strong")
|
||||||
|
Scrape.Forks = e.ChildText("a[href*='/" + c.Params("user") + "/" + c.Params("repo") + "/network/members' i] strong")
|
||||||
|
Scrape.License = e.ChildText("a[data-analytics-event*='{\"category\":\"Repository Overview\",\"action\":\"click\",\"label\":\"location:sidebar;file:license\"}']")
|
||||||
|
})
|
||||||
|
sc.OnHTML("div#readme", func(e *colly.HTMLElement) {
|
||||||
|
Scrape.Readme = e.ChildText("a[href*='#readme']")
|
||||||
|
})
|
||||||
|
sc.OnHTML("div#repository-container-header", func(e *colly.HTMLElement) {
|
||||||
|
Scrape.Parent = e.ChildText("span.text-small a")
|
||||||
|
})
|
||||||
|
sc.OnHTML("summary[title*='Switch branches or tags']", func(e *colly.HTMLElement) {
|
||||||
|
Scrape.DefaultBranch = e.ChildText("span.css-truncate-target")
|
||||||
|
})
|
||||||
|
|
||||||
|
sc.Visit("https://github.com/" + c.Params("user") + "/" + c.Params("repo") + "/")
|
||||||
|
|
||||||
|
// Add scrape-based info to repoArray
|
||||||
|
repoArray = append(repoArray, Scrape)
|
||||||
|
|
||||||
|
// README
|
||||||
|
var readmee string
|
||||||
err := requests.
|
err := requests.
|
||||||
URL("https://raw.githubusercontent.com/" + c.Params("user") + "/" + c.Params("repo") + "/" + repo.Get("default_branch").String() + "/README.md").
|
URL("https://raw.githubusercontent.com/" + c.Params("user") + "/" + c.Params("repo") + "/" + Scrape.DefaultBranch + "/" + Scrape.Readme).
|
||||||
ToString(&readmee).
|
ToString(&readmee).
|
||||||
Fetch(context.Background())
|
Fetch(context.Background())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
readmee = ""
|
readmee = ""
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
mightBeUnsafe := markdown.ToHTML([]byte(readmee), nil, nil)
|
mightBeUnsafe := markdown.ToHTML([]byte(readmee), nil, nil)
|
||||||
|
|
||||||
// Trust Nobody
|
// Trust Nobody
|
||||||
readmeOutput := utils.UGCPolicy().SanitizeBytes(mightBeUnsafe)
|
readmeOutput := utils.UGCPolicy().SanitizeBytes(mightBeUnsafe)
|
||||||
|
|
||||||
repoArray = append(repoArray, Repo{
|
|
||||||
Fullname: repo.Get("full_name").String(),
|
|
||||||
Description: repo.Get("description").String(),
|
|
||||||
HtmlUrl: repo.Get("html_url").String(),
|
|
||||||
Fork: repo.Get("fork").Bool(),
|
|
||||||
Stars: repo.Get("stargazers_count").Int(),
|
|
||||||
Forks: repo.Get("forks_count").Int(),
|
|
||||||
Watchers: repo.Get("watchers_count").Int(),
|
|
||||||
Language: repo.Get("language").String(),
|
|
||||||
License: repo.Get("license").Get("name").String(),
|
|
||||||
Parent: repo.Get("parent").Get("full_name").String(),
|
|
||||||
DefaultBranch: repo.Get("default_branch").String(),
|
|
||||||
})
|
|
||||||
|
|
||||||
return c.Render("repo", fiber.Map{
|
return c.Render("repo", fiber.Map{
|
||||||
"title": "Repository " + c.Params("user") + "/" + c.Params("repo"),
|
"title": "Repository " + c.Params("user") + "/" + c.Params("repo"),
|
||||||
"repo": repoArray,
|
"repo": repoArray,
|
||||||
|
|
|
@ -5,7 +5,6 @@ import (
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"codeberg.org/gothub/gothub/utils"
|
"codeberg.org/gothub/gothub/utils"
|
||||||
|
@ -26,8 +25,8 @@ type User struct {
|
||||||
Location string
|
Location string
|
||||||
Email string
|
Email string
|
||||||
Timezone string
|
Timezone string
|
||||||
Following int64
|
Following string
|
||||||
Followers int64
|
Followers string
|
||||||
Link string
|
Link string
|
||||||
Social []string
|
Social []string
|
||||||
Organizations []string
|
Organizations []string
|
||||||
|
@ -108,8 +107,8 @@ func HandleUser(c *fiber.Ctx) error {
|
||||||
Scrape.Social = append(Scrape.Social, el.ChildText("a.Link--primary"))
|
Scrape.Social = append(Scrape.Social, el.ChildText("a.Link--primary"))
|
||||||
})
|
})
|
||||||
// Followers/Following
|
// Followers/Following
|
||||||
Scrape.Followers, err = strconv.ParseInt(e.ChildText("a[href*='https://github.com/"+c.Params("user")+"?tab=followers' i] span"), 10, 64)
|
Scrape.Followers = e.ChildText("a[href*='https://github.com/" + c.Params("user") + "?tab=followers' i] span")
|
||||||
Scrape.Following, err = strconv.ParseInt(e.ChildText("a[href*='https://github.com/"+c.Params("user")+"?tab=following' i] span"), 10, 64)
|
Scrape.Following = e.ChildText("a[href*='https://github.com/" + c.Params("user") + "?tab=following' i] span")
|
||||||
// Organizations
|
// Organizations
|
||||||
e.ForEach("a[data-hovercard-type*='organization']", func(i int, el *colly.HTMLElement) {
|
e.ForEach("a[data-hovercard-type*='organization']", func(i int, el *colly.HTMLElement) {
|
||||||
Scrape.Organizations = append(Scrape.Organizations, el.Attr("aria-label"))
|
Scrape.Organizations = append(Scrape.Organizations, el.Attr("aria-label"))
|
||||||
|
@ -135,7 +134,7 @@ func HandleUser(c *fiber.Ctx) error {
|
||||||
Scrape.Social = append(Scrape.Social, el.Attr("href"))
|
Scrape.Social = append(Scrape.Social, el.Attr("href"))
|
||||||
})
|
})
|
||||||
// Followers
|
// Followers
|
||||||
Scrape.Followers, err = strconv.ParseInt(e.ChildText("a[href*='/orgs/"+c.Params("user")+"/followers' i] span"), 10, 64)
|
Scrape.Followers = e.ChildText("a[href*='/orgs/" + c.Params("user") + "/followers' i] span")
|
||||||
})
|
})
|
||||||
sc.OnHTML("img[alt*='@"+c.Params("user")+"' i]", func(e *colly.HTMLElement) {
|
sc.OnHTML("img[alt*='@"+c.Params("user")+"' i]", func(e *colly.HTMLElement) {
|
||||||
Scrape.AvatarUrl = e.Attr("src")
|
Scrape.AvatarUrl = e.Attr("src")
|
||||||
|
|
|
@ -10,20 +10,17 @@
|
||||||
|
|
||||||
<div class="userProfile">
|
<div class="userProfile">
|
||||||
<h1>{{.Fullname}}</h1>
|
<h1>{{.Fullname}}</h1>
|
||||||
{{ if .Fork }}
|
{{ if .Parent }}
|
||||||
<p>This repository is a fork of <a href="/{{.Parent}}">{{.Parent}}</a>.</p>
|
<p>This repository is a fork of <a href="/{{.Parent}}">{{.Parent}}</a>.</p>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
{{ if .Description }}
|
{{ if .Description }}
|
||||||
<p>{{.Description}}</p>
|
<p>{{.Description}}</p>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
{{ if .Language}}
|
|
||||||
{{ if .License }}
|
{{ if .License }}
|
||||||
<p>⭐ {{.Stars}} 🍴 {{.Forks}} 👀 {{.Watchers}} ⚖️ {{.License}} 🗒️ {{.Language}}</p>
|
<p>⭐ {{.Stars}} 🍴 {{.Forks}} 👀 {{.Watchers}} ⚖️ {{.License}} {{ if .Language }} 🗒️ {{.Language}} {{end}} 🌿 {{.DefaultBranch}}</p>
|
||||||
{{ else }}
|
{{ else }}
|
||||||
<p>⭐ {{.Stars}} 🍴 {{.Forks}} 👀 {{.Watchers}} ⚖️ No license 🗒️ {{.Language}}</p>
|
<p>⭐ {{.Stars}} 🍴 {{.Forks}} 👀 {{.Watchers}} ⚖️ No license {{ if .Language }} 🗒️ {{.Language}} {{end}} 🌿 {{.DefaultBranch}}</p>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
{{ else }}
|
|
||||||
{{ end }}
|
|
||||||
</div>
|
</div>
|
||||||
{{end}}
|
{{end}}
|
||||||
{{ if .files}}
|
{{ if .files}}
|
||||||
|
@ -44,7 +41,11 @@
|
||||||
{{ end }}
|
{{ end }}
|
||||||
{{ if .readme}}
|
{{ if .readme}}
|
||||||
<div class="userReadme">
|
<div class="userReadme">
|
||||||
<h3>README.md</h3>
|
{{ if .repo }}
|
||||||
|
{{ range $key, $value := .repo}}
|
||||||
|
<h3>{{.Readme}}</h3>
|
||||||
|
{{end}}
|
||||||
|
{{end}}
|
||||||
<div class="userReadmeText">
|
<div class="userReadmeText">
|
||||||
{{ unescape .readme}}
|
{{ unescape .readme}}
|
||||||
</div>
|
</div>
|
||||||
|
@ -54,4 +55,4 @@
|
||||||
<h2>Repository not found</h2>
|
<h2>Repository not found</h2>
|
||||||
<p>That repository doesn't exist.</p>
|
<p>That repository doesn't exist.</p>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
</main>
|
</main>
|
||||||
|
|
読み込み中…
新しいイシューから参照