gitlin/pages/dirview.go

package pages

import (
  "log"
  "net/http"
  "os"
  "strings"
  "time"

  "gitler.moe/suwako/gitlin/utils"
  "github.com/gocolly/colly"
  "github.com/gofiber/fiber/v2"
)

type Dir struct {
  Readme   string
  Username string
  Reponame string
  DirName  string
  Branch   string
}

type DirFiles struct {
  Name     string
  Path     string
  Commit   string
  Date     string
  Type     string
  Branch   string
  Username string
  Reponame string
  DirName  string
}

func DirView(c *fiber.Ctx) error {
  var dirArray []Dir
  var dirFilesArray []DirFiles
  var readmeOutput string

  resp, statusErr := http.Get("https://github.com/" + c.Params("user") + "/" + c.Params("repo") + "/tree/" + c.Params("branch") + "/" + c.Params("+"))
  if statusErr != nil {
    log.Println(statusErr)
  }
  if resp.StatusCode == 404 {
    // I need a better way to do this
    return c.Status(404).Render("error", fiber.Map{
      "title": "Error",
      "ver":    utils.Ver,
      "ves":    utils.Ves,
      "error": "Directory" + c.Params("+") + "not found",
    })
  }

  // Scraping
  Scrape := Dir{}
  Scrape.Username = c.Params("user")
  Scrape.Reponame = c.Params("repo")
  Scrape.DirName = c.Params("+")
  Scrape.Branch = c.Params("branch")

  UserAgent, ok := os.LookupEnv("GITLIN_USER_AGENT")
  if !ok {
    UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
  }

  sc := colly.NewCollector(colly.AllowedDomains("github.com"), colly.UserAgent(UserAgent))
  sc.Limit(&colly.LimitRule{
    DomainGlob:  "github.githubassets.com/*",
    Delay: 15 * time.Second,
    RandomDelay: 15 * time.Second,
  })
  sc.OnHTML("div#readme", func(e *colly.HTMLElement) {
    Scrape.Readme = e.ChildText("a[href='#readme']")
  })
  sc.OnHTML("div#readme div.Box-body", func(e *colly.HTMLElement) {
    Content, _ := e.DOM.Html()
    readmeOutput = strings.Replace(strings.Replace(strings.Replace(strings.Replace(string(utils.UGCPolicy().SanitizeBytes([]byte(Content))), "https://github.com", "", -1), "user-content-", "", -1), "https://camo.githubusercontent.com", "/camo", -1), "https://raw.githubusercontent.com", "/raw", -1)
  })
  sc.OnHTML("div.js-details-container div.Details-content--hidden-not-important", func(e *colly.HTMLElement) {
    e.ForEach("div.js-navigation-item", func(i int, el *colly.HTMLElement) {
      var FileType string
      if el.ChildAttr("div.flex-shrink-0 svg", "aria-label") == "Directory" {
        FileType = "dir"
      } else {
        FileType = "file"
      }
      tstring := el.ChildAttr("relative-time", "datetime")
      if tstring != "" {
        tstring = strings.ReplaceAll(tstring, "T", "、")
        tstring = strings.ReplaceAll(tstring, "Z", "")
        tstring = strings.Split(tstring, "+")[0]
        tstrings := strings.Split(tstring, "-")
        tstring = tstrings[0] + "-" + tstrings[1] + "-" + tstrings[2]
      }
      if el.ChildText("div.flex-auto span.d-block a.js-navigation-open") != "" {
        dirFilesArray = append(dirFilesArray, DirFiles{
          Name:     el.ChildText("div.flex-auto span.d-block a.js-navigation-open"),
          Path:     el.ChildText("div.flex-auto span.d-block a.js-navigation-open"),
          Commit:   el.ChildText("div.flex-auto span.d-block a.Link--secondary"),
          Date:     tstring,
          Type:     FileType,
          Username: Scrape.Username,
          Reponame: Scrape.Reponame,
          DirName:  Scrape.DirName,
          Branch:   Scrape.Branch,
        })
      }
    })
  })
  sc.Visit("https://github.com/" + c.Params("user") + "/" + c.Params("repo") + "/tree/" + c.Params("branch") + "/" + c.Params("+"))
  // Add scrape-based info to dirArray
  dirArray = append(dirArray, Scrape)
  return c.Render("dir", fiber.Map{
    "title":  c.Params("+") + " フォルダ | " + c.Params("user") + "/" + c.Params("repo"),
    "dir":    dirArray,
    "ver":    utils.Ver,
    "ves":    utils.Ves,
    "files":  dirFilesArray,
    "readme": string(readmeOutput),
  })
}