feat(scraper): proper type data

このコミットが含まれているのは:
sinkaroid 2022-06-04 14:07:07 +07:00
コミット 1d52106291
この署名に対応する既知のキーがデータベースに存在しません
GPGキーID: A7DF4E245FDD8159
10個のファイルの変更152行の追加61行の削除

ファイルの表示

@ -2,38 +2,45 @@ import { load } from "cheerio";
import p from "phin";
import c from "../../utils/options";
interface IHentai2readGet {
title: string;
id: string;
image: string[];
}
interface IHentai2readGetPush {
data: object;
main_url: string;
current_url: string;
next_url?: string;
previus_url?: string;
}
export async function scrapeContent(url: string) {
try {
const res = await p(url);
const $ = load(res.body as Buffer);
//get all <script>
const script = $("script").map((i, el) => $(el).text()).get();
//find 'var gData = {}' inside script
const gData = script.find(el => el.includes("var gData"));
//remove all javascript code
const gDataClean: string = gData?.replace(/[\s\S]*var gData = /, "").replace(/;/g, "").replace(/'/g, "\"") || "";
const gDataJson = JSON.parse(gDataClean);
//add "https://" before all image array
const images = gDataJson.images.map((el: any) => `https://cdn-ngocok-static.sinxdr.workers.dev/hentai${el}`);
const objectData = {
const objectData: IHentai2readGet = {
title: gDataJson.title,
id: url.replace(c.HENTAI2READ, ""),
image: images,
image: images
};
const data = {
const data: IHentai2readGetPush = {
data: objectData,
main_url: gDataJson.mainURL,
current_url: gDataJson.currentURL,
next_url: gDataJson.nextURL,
previous_url: gDataJson.previousURL
previus_url: gDataJson.previousURL
};
return data;
} catch (err: any) {

ファイルの表示

@ -3,24 +3,26 @@ import p from "phin";
import c from "../../utils/options";
import { getId } from "../../utils/modifier";
interface IHentai2readSearch {
title: string;
cover: string;
id: string;
link: string;
message: string;
}
export async function scrapeContent(url: string) {
try {
const res = await p(url);
const $ = load(res.body as Buffer);
//get all <span class="title-text"> then map all
const title = $(".title-text").map((i, el) => $(el).text()).get();
//get all <img data-src="
const imgSrc = $("img").map((i, el) => $(el).attr("data-src")).get();
//get all href in <div class="overlay-title">
const id = $(".overlay-title").map((i, el) => $(el).children("a").attr("href")).get();
const idClean = id.map(el => getId(el));
const content = [];
for (const abc of title) {
const objectData = {
const objectData: IHentai2readSearch = {
title: title[title.indexOf(abc)],
cover: `${c.HENTAI2READ}${imgSrc[title.indexOf(abc)]}`,
id: idClean[title.indexOf(abc)],

ファイルの表示

@ -2,11 +2,20 @@ import { load } from "cheerio";
import p from "phin";
import c from "../../utils/options";
interface IHentaiFoxGet {
title: string;
id: number;
tags: string[];
type: string;
total: number;
image: string[];
}
export async function scrapeContent(url: string) {
try {
const res = await p(url);
const $ = load(res.body as Buffer);
const id = $("a.g_button")?.attr("href")?.split("/")[2];
const id = parseInt($("a.g_button")?.attr("href")?.split("/")[2] || "");
const category = $("a.tag_btn").map((i, abc) => {
return $(abc)?.text()?.replace(/[0-9]/g, "").trim();
@ -20,21 +29,20 @@ export async function scrapeContent(url: string) {
return $(abc).text();
}).get();
const pageCount = info[0].replace(/[^0-9]/g, "");
const pageCount = parseInt(info[0].replace(/[^0-9]/g, ""));
const image = [];
for (let i = 0; i < Number(pageCount); i++) {
image.push(`${parameterImg}/${i + 1}${extensionImg}`);
}
const titleInfo = $("div.info").children("h1").text();
const objectData = {
const objectData: IHentaiFoxGet = {
title: titleInfo,
id: id,
tags: category,
tags: category,
type: extensionImg,
total: pageCount,
image: image,
};
const data = {

ファイルの表示

@ -2,6 +2,14 @@ import { load } from "cheerio";
import p from "phin";
import c from "../../utils/options";
interface IHentaiFoxSearch {
title: string;
cover: string;
id: number;
category: string;
link: string;
}
export async function scrapeContent(url: string) {
try {
const res = await p(url);
@ -25,13 +33,12 @@ export async function scrapeContent(url: string) {
const content = [];
for (const abc of title) {
const objectData = {
const objectData: IHentaiFoxSearch = {
title: title[title.indexOf(abc)],
cover: imgSrcClean[title.indexOf(abc)],
id: link[title.indexOf(abc)],
id: parseInt(link[title.indexOf(abc)]),
category: category[title.indexOf(abc)],
link: `${c.HENTAIFOX}/gallery/${link[title.indexOf(abc)]}`,
};
content.push(objectData);

ファイルの表示

@ -2,6 +2,23 @@ import p from "phin";
import c from "../../utils/options";
import { getDate, timeAgo } from "../../utils/modifier";
interface INhentaiGet {
title: string;
optional_title: object;
id: number;
language: string;
tags: string[];
total: number;
image: string[];
num_pages: number;
num_favorites: number;
artist: string[];
group: string;
parodies: string;
characters: string[];
upload_date: string;
}
export async function scrapeContent(url: string) {
try {
const res = await p({ url: url, parse: "json" });
@ -47,7 +64,7 @@ export async function scrapeContent(url: string) {
const time = new Date(dataRaw.upload_date * 1000);
const objectData = {
const objectData: INhentaiGet = {
title: dataRaw.title.pretty,
optional_title: {
english: dataRaw.title.english,
@ -66,7 +83,6 @@ export async function scrapeContent(url: string) {
parodies: parodies,
characters: characters,
upload_date: `${getDate(time)} (${timeAgo(time)})`,
};
const data = {

ファイルの表示

@ -2,6 +2,14 @@ import p from "phin";
import c from "../../utils/options";
import { getDate, timeAgo } from "../../utils/modifier";
interface INhentaiRelated {
title: string;
id: number;
upload_date: string;
total: number;
tags: string[];
}
export async function scrapeContent(url: string) {
try {
const res = await p({ url: url, parse: "json" });
@ -10,19 +18,16 @@ export async function scrapeContent(url: string) {
const content = [];
for (let i = 0; i < rawData.result.length; i++) {
const time = new Date(rawData.result[i].upload_date * 1000);
const objectData = {
const objectData: INhentaiRelated = {
title: rawData.result[i].title,
id: rawData.result[i].id,
upload_date: `${getDate(time)} (${timeAgo(time)})`,
total: rawData.result[i].num_pages,
//get all tags name
tags: rawData.result[i].tags.map((tag: any) => tag.name),
};
content.push(objectData);
}
const data = {
data: content,
source: url.replace(c.NHENTAI_IP, c.NHENTAI),

ファイルの表示

@ -2,6 +2,16 @@ import p from "phin";
import c from "../../utils/options";
import { getDate, timeAgo } from "../../utils/modifier";
interface INhentaiSearch {
title: string;
id: number;
language: string;
upload_date: string;
total: number;
cover: string;
tags: string[];
}
export async function scrapeContent(url: string) {
try {
const res = await p({ url: url, parse: "json" });
@ -16,7 +26,7 @@ export async function scrapeContent(url: string) {
};
for (let i = 0; i < rawData.result.length; i++) {
const time = new Date(rawData.result[i].upload_date * 1000);
const objectData = {
const objectData: INhentaiSearch = {
title: rawData.result[i].title,
id: rawData.result[i].id,
language: rawData.result[i].tags.find((tag: any) => tag.type === "language") ? rawData.result[i].tags.find((tag: any) => tag.type === "language").name : null,
@ -24,12 +34,10 @@ export async function scrapeContent(url: string) {
total: rawData.result[i].num_pages,
cover: `${GALLERY}/${rawData.result[i].media_id}/1.${TYPE[rawData.result[i].images.cover.t]}`,
tags: rawData.result[i].tags.map((tag: any) => tag.name),
};
content.push(objectData);
}
const data = {
data: content,
page: Number(url.split("&page=")[1]),

ファイルの表示

@ -3,14 +3,28 @@ import p from "phin";
import c from "../../utils/options";
import { getPururinInfo, getUrl } from "../../utils/modifier";
interface IGetPururin {
title: string;
id: number;
tags: string[];
extension: string;
total: number;
image: string[];
}
interface IData{
data: object;
source: string;
}
export async function scrapeContent(url: string) {
try {
const res = await p(url);
const $ = load(res.body);
const title = $("div.content-wrapper h1").html();
const title: string = $("div.content-wrapper h1").html() || "";
const tags: string[] = $("div.content-wrapper ul.list-inline li").map((i, elm) => {
return getPururinInfo($(elm).text());
const tags: string[] = $("div.content-wrapper ul.list-inline li").map((i, abc) => {
return getPururinInfo($(abc).text());
}).get();
const cover = $("meta[property='og:image']").attr("content");
@ -23,18 +37,18 @@ export async function scrapeContent(url: string) {
image.push(`${getUrl(cover?.replace("cover", `${i + 1}`) ?? "")}`);
}
const objectData = {
title: title,
id: id,
tags: tags,
type: extension,
total: total,
image: image
const objectData: IGetPururin = {
title,
id,
tags,
extension,
total,
image
};
const data = {
const data: IData = {
data: objectData,
source: `${c.PURURIN}/gallery/${id}/janda`,
source: `${c.PURURIN}/gallery/${id}/janda`
};
return data;
} catch (err: any) {

ファイルの表示

@ -4,6 +4,22 @@ import c from "../../utils/options";
import { isText } from "domhandler";
import { getPururinInfo, getPururinPageCount } from "../../utils/modifier";
interface ISearchPururin {
title: string;
cover: string;
id: number;
info: string;
link: string;
total: number;
}
interface IData {
data: object;
page: number;
sort: string;
source: string;
}
export async function scrapeContent(url: string) {
try {
const res = await p(url);
@ -20,25 +36,25 @@ export async function scrapeContent(url: string) {
}
const content = [];
for (const elm of dataRaw) {
for (const abc of dataRaw) {
const objectData = {
title: elm.attribs["alt"],
cover: elm.attribs["data-src"].replace(/^\/\//, "https://"),
id: elm.attribs["data-src"].split("data/")[1].split("/cover")[0],
info: infoBook[dataRaw.index(elm)],
link: `${c.PURURIN}/gallery/${elm.attribs["data-src"].split("data/")[1].split("/cover")[0]}/janda`,
total: getPururinPageCount(infoBook[dataRaw.index(elm)])
const objectData: ISearchPururin = {
title: abc.attribs["alt"],
cover: abc.attribs["data-src"].replace(/^\/\//, "https://"),
id: parseInt(abc.attribs["data-src"].split("data/")[1].split("/cover")[0]),
info: infoBook[dataRaw.index(abc)],
link: `${c.PURURIN}/gallery/${abc.attribs["data-src"].split("data/")[1].split("/cover")[0]}/janda`,
total: getPururinPageCount(infoBook[dataRaw.index(abc)])
};
content.push(objectData);
}
const data = {
const data: IData = {
data: content,
page: Number(url.split("&page=")[1]),
page: parseInt(url.split("&page=")[1]),
sort: url.split("/search/")[1].split("?")[0],
source: c.PURURIN,
source: c.PURURIN
};
return data;
} catch (err: any) {

ファイルの表示

@ -2,6 +2,15 @@ import { load } from "cheerio";
import p from "phin";
import c from "../../utils/options";
interface ISimplyHentaiGet {
title: string;
id: string;
tags: string[];
total: number;
image: string[];
language: string;
}
export async function scrapeContent(url: string) {
try {
const res = await p(url);
@ -16,14 +25,13 @@ export async function scrapeContent(url: string) {
const language = json.props.pageProps.data.language;
const metaRaw= json.props.pageProps.meta;
const objectData = {
const objectData: ISimplyHentaiGet = {
title: metaRaw.title,
id: url.replace(c.SIMPLY_HENTAI_PROXIFIED, ""),
tags: tags,
total: images.length,
image: images,
language: language.slug
};
const data = {