wibu-rss/src/tirto

43 行
1.5 KiB
Bash
実行ファイル

#!/bin/sh
tirto() {
id="tirto"
host="https://tirto.id"
validateFeed "$id" "$host"
cat $tmp \
| shup "body" "a" \
| grep "<a\shref=\"" \
| sed "s/<a\shref=\"//" \
| sed "s/\".*$//" \
| sed "s/\/?.*//" \
| sed "s/?.*$//" \
| sed "s/^\//https:\/\/tirto.id\//" \
| grep "\S" \
| uniq \
| while read line ; do
# Don't @ me
url=$line
# Check if post/entry/article already exist
[ ! -z "$(grep "$url" "$dir/feeds/$id.xml")" ] && echo "Entry already exists."
# Proceed if doesn't exist
[ -z "$(grep "$url" "$dir/feeds/$id.xml")" ] && curl -s $url > $tmp
# Check if content and pubdate are empty, meaning not an article
content=$(cat $tmp | shup "body" "div" "article" "div[content-text-editor]" | grep "\S" | sed "s/^\s*//" | awk '{printf ("%s", $0)}' | sed "s/<script.*>.*<\/script>//")
pubdate=$(cat $tmp | grep "datePublished" | grep -o "[0-9]*-[0-9]*-[0-9]*\s[0-9]*:[0-9]*:[0-9]*" | sed "s/$/+0700/" | xargs -i date -d {} -R)
[ ! -z "$content" ] && [ ! -z "$pubdate" ] \
&& title=$(cat $tmp | shup "title" | sed "s/<titl.*\">//" | sed "s/<\/title>//" | sed "s/^\s*//" | grep "\S") \
&& printf "<!-- content -->\n<item>\n<title>%s</title>\n<pubDate>%s</pubDate>\n<guid>%s</guid>\n<link>%s</link>\n<description><![CDATA[%s]]></description>\n</item>\n" "$title" "$pubdate" "$url" "$url" "$content" > $tmp \
&& sed -i "/<!-- content -->/ {
r $tmp
d
}" "$dir/feeds/$id.xml" \
&& echo "Entry inserted."
done
}