add basic metadata scraping for repos
このコミットが含まれているのは:
コミット
8aef883056
|
@ -1,27 +1,28 @@
|
|||
package pages
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
|
||||
"codeberg.org/gothub/gothub/utils"
|
||||
"context"
|
||||
"github.com/carlmjohnson/requests"
|
||||
"github.com/gocolly/colly"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gomarkdown/markdown"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
)
|
||||
|
||||
type Repo struct {
|
||||
Fullname string
|
||||
Description string
|
||||
HtmlUrl string
|
||||
Fork bool
|
||||
Parent string
|
||||
Stars int64
|
||||
Forks int64
|
||||
Watchers int64
|
||||
Stars string
|
||||
Forks string
|
||||
Watchers string
|
||||
Language string
|
||||
License string
|
||||
DefaultBranch string
|
||||
Readme string
|
||||
}
|
||||
|
||||
type RepoFiles struct {
|
||||
|
@ -35,14 +36,20 @@ type RepoFiles struct {
|
|||
func HandleRepo(c *fiber.Ctx) error {
|
||||
var repoArray []Repo
|
||||
var repoFilesArray []RepoFiles
|
||||
// get repo
|
||||
repo := utils.GetRequest("https://api.github.com/repos/" + c.Params("user") + "/" + c.Params("repo"))
|
||||
if repo.Get("message").String() == "Not Found" {
|
||||
|
||||
resp, statusErr := http.Get("https://github.com/" + c.Params("user") + "/" + c.Params("repo"))
|
||||
if statusErr != nil {
|
||||
log.Println(statusErr)
|
||||
}
|
||||
if resp.StatusCode == 404 {
|
||||
// I need a better way to do this
|
||||
return c.Status(404).Render("error", fiber.Map{
|
||||
"title": "Error",
|
||||
"error": "Repository " + c.Params("user") + "/" + c.Params("repo") + " not found",
|
||||
})
|
||||
}
|
||||
// API
|
||||
repo := utils.GetRequest("https://api.github.com/repos/" + c.Params("user") + "/" + c.Params("repo"))
|
||||
repoFiles := utils.GetRequest("https://api.github.com/repos/" + c.Params("user") + "/" + c.Params("repo") + "/contents")
|
||||
bruh := repoFiles.Get("#.@pretty").Array()
|
||||
for _, item := range bruh {
|
||||
|
@ -55,36 +62,54 @@ func HandleRepo(c *fiber.Ctx) error {
|
|||
})
|
||||
}
|
||||
|
||||
var readmee string
|
||||
// Scraping
|
||||
Scrape := Repo{
|
||||
Language: repo.Get("language").String(),
|
||||
}
|
||||
|
||||
UserAgent, ok := os.LookupEnv("GOTHUB_USER_AGENT")
|
||||
if !ok {
|
||||
UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
|
||||
}
|
||||
|
||||
sc := colly.NewCollector(colly.AllowedDomains("github.com"), colly.UserAgent(UserAgent))
|
||||
sc.OnHTML("div.Layout-sidebar", func(e *colly.HTMLElement) {
|
||||
Scrape.Fullname = c.Params("user") + "/" + c.Params("repo")
|
||||
Scrape.Description = e.ChildText("p.f4")
|
||||
Scrape.Stars = e.ChildText("a[href*='/" + c.Params("user") + "/" + c.Params("repo") + "/stargazers' i] strong")
|
||||
Scrape.Watchers = e.ChildText("a[href*='/" + c.Params("user") + "/" + c.Params("repo") + "/watchers' i] strong")
|
||||
Scrape.Forks = e.ChildText("a[href*='/" + c.Params("user") + "/" + c.Params("repo") + "/network/members' i] strong")
|
||||
Scrape.License = e.ChildText("a[data-analytics-event*='{\"category\":\"Repository Overview\",\"action\":\"click\",\"label\":\"location:sidebar;file:license\"}']")
|
||||
})
|
||||
sc.OnHTML("div#readme", func(e *colly.HTMLElement) {
|
||||
Scrape.Readme = e.ChildText("a[href*='#readme']")
|
||||
})
|
||||
sc.OnHTML("div#repository-container-header", func(e *colly.HTMLElement) {
|
||||
Scrape.Parent = e.ChildText("span.text-small a")
|
||||
})
|
||||
sc.OnHTML("summary[title*='Switch branches or tags']", func(e *colly.HTMLElement) {
|
||||
Scrape.DefaultBranch = e.ChildText("span.css-truncate-target")
|
||||
})
|
||||
|
||||
sc.Visit("https://github.com/" + c.Params("user") + "/" + c.Params("repo") + "/")
|
||||
|
||||
// Add scrape-based info to repoArray
|
||||
repoArray = append(repoArray, Scrape)
|
||||
|
||||
// README
|
||||
var readmee string
|
||||
err := requests.
|
||||
URL("https://raw.githubusercontent.com/" + c.Params("user") + "/" + c.Params("repo") + "/" + repo.Get("default_branch").String() + "/README.md").
|
||||
URL("https://raw.githubusercontent.com/" + c.Params("user") + "/" + c.Params("repo") + "/" + Scrape.DefaultBranch + "/" + Scrape.Readme).
|
||||
ToString(&readmee).
|
||||
Fetch(context.Background())
|
||||
if err != nil {
|
||||
readmee = ""
|
||||
log.Println(err)
|
||||
}
|
||||
|
||||
mightBeUnsafe := markdown.ToHTML([]byte(readmee), nil, nil)
|
||||
|
||||
// Trust Nobody
|
||||
readmeOutput := utils.UGCPolicy().SanitizeBytes(mightBeUnsafe)
|
||||
|
||||
repoArray = append(repoArray, Repo{
|
||||
Fullname: repo.Get("full_name").String(),
|
||||
Description: repo.Get("description").String(),
|
||||
HtmlUrl: repo.Get("html_url").String(),
|
||||
Fork: repo.Get("fork").Bool(),
|
||||
Stars: repo.Get("stargazers_count").Int(),
|
||||
Forks: repo.Get("forks_count").Int(),
|
||||
Watchers: repo.Get("watchers_count").Int(),
|
||||
Language: repo.Get("language").String(),
|
||||
License: repo.Get("license").Get("name").String(),
|
||||
Parent: repo.Get("parent").Get("full_name").String(),
|
||||
DefaultBranch: repo.Get("default_branch").String(),
|
||||
})
|
||||
|
||||
return c.Render("repo", fiber.Map{
|
||||
"title": "Repository " + c.Params("user") + "/" + c.Params("repo"),
|
||||
"repo": repoArray,
|
||||
|
|
|
@ -5,7 +5,6 @@ import (
|
|||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"codeberg.org/gothub/gothub/utils"
|
||||
|
@ -26,8 +25,8 @@ type User struct {
|
|||
Location string
|
||||
Email string
|
||||
Timezone string
|
||||
Following int64
|
||||
Followers int64
|
||||
Following string
|
||||
Followers string
|
||||
Link string
|
||||
Social []string
|
||||
Organizations []string
|
||||
|
@ -108,8 +107,8 @@ func HandleUser(c *fiber.Ctx) error {
|
|||
Scrape.Social = append(Scrape.Social, el.ChildText("a.Link--primary"))
|
||||
})
|
||||
// Followers/Following
|
||||
Scrape.Followers, err = strconv.ParseInt(e.ChildText("a[href*='https://github.com/"+c.Params("user")+"?tab=followers' i] span"), 10, 64)
|
||||
Scrape.Following, err = strconv.ParseInt(e.ChildText("a[href*='https://github.com/"+c.Params("user")+"?tab=following' i] span"), 10, 64)
|
||||
Scrape.Followers = e.ChildText("a[href*='https://github.com/" + c.Params("user") + "?tab=followers' i] span")
|
||||
Scrape.Following = e.ChildText("a[href*='https://github.com/" + c.Params("user") + "?tab=following' i] span")
|
||||
// Organizations
|
||||
e.ForEach("a[data-hovercard-type*='organization']", func(i int, el *colly.HTMLElement) {
|
||||
Scrape.Organizations = append(Scrape.Organizations, el.Attr("aria-label"))
|
||||
|
@ -135,7 +134,7 @@ func HandleUser(c *fiber.Ctx) error {
|
|||
Scrape.Social = append(Scrape.Social, el.Attr("href"))
|
||||
})
|
||||
// Followers
|
||||
Scrape.Followers, err = strconv.ParseInt(e.ChildText("a[href*='/orgs/"+c.Params("user")+"/followers' i] span"), 10, 64)
|
||||
Scrape.Followers = e.ChildText("a[href*='/orgs/" + c.Params("user") + "/followers' i] span")
|
||||
})
|
||||
sc.OnHTML("img[alt*='@"+c.Params("user")+"' i]", func(e *colly.HTMLElement) {
|
||||
Scrape.AvatarUrl = e.Attr("src")
|
||||
|
|
|
@ -10,20 +10,17 @@
|
|||
|
||||
<div class="userProfile">
|
||||
<h1>{{.Fullname}}</h1>
|
||||
{{ if .Fork }}
|
||||
{{ if .Parent }}
|
||||
<p>This repository is a fork of <a href="/{{.Parent}}">{{.Parent}}</a>.</p>
|
||||
{{ end }}
|
||||
{{ if .Description }}
|
||||
<p>{{.Description}}</p>
|
||||
{{ end }}
|
||||
{{ if .Language}}
|
||||
{{ if .License }}
|
||||
<p>⭐ {{.Stars}} 🍴 {{.Forks}} 👀 {{.Watchers}} ⚖️ {{.License}} 🗒️ {{.Language}}</p>
|
||||
<p>⭐ {{.Stars}} 🍴 {{.Forks}} 👀 {{.Watchers}} ⚖️ {{.License}} {{ if .Language }} 🗒️ {{.Language}} {{end}} 🌿 {{.DefaultBranch}}</p>
|
||||
{{ else }}
|
||||
<p>⭐ {{.Stars}} 🍴 {{.Forks}} 👀 {{.Watchers}} ⚖️ No license 🗒️ {{.Language}}</p>
|
||||
<p>⭐ {{.Stars}} 🍴 {{.Forks}} 👀 {{.Watchers}} ⚖️ No license {{ if .Language }} 🗒️ {{.Language}} {{end}} 🌿 {{.DefaultBranch}}</p>
|
||||
{{ end }}
|
||||
{{ else }}
|
||||
{{ end }}
|
||||
</div>
|
||||
{{end}}
|
||||
{{ if .files}}
|
||||
|
@ -44,7 +41,11 @@
|
|||
{{ end }}
|
||||
{{ if .readme}}
|
||||
<div class="userReadme">
|
||||
<h3>README.md</h3>
|
||||
{{ if .repo }}
|
||||
{{ range $key, $value := .repo}}
|
||||
<h3>{{.Readme}}</h3>
|
||||
{{end}}
|
||||
{{end}}
|
||||
<div class="userReadmeText">
|
||||
{{ unescape .readme}}
|
||||
</div>
|
||||
|
@ -54,4 +55,4 @@
|
|||
<h2>Repository not found</h2>
|
||||
<p>That repository doesn't exist.</p>
|
||||
{{ end }}
|
||||
</main>
|
||||
</main>
|
||||
|
|
読み込み中…
新しいイシューから参照