wibu-rss/src/kanau

33 行
1.4 KiB
Bash
実行ファイル

#!/bin/sh
kanau() {
id="kanau"
host="https://kanau.org"
validateFeed "$id" "$host"
cat $tmp \
| shup "article" "h2[title]" "a" \
| grep "<a\shref=\"" \
| sed "s/\sclass=\"post-title\spost-url\"//" \
| sed "s/\sclass=\"post-url\spost-title\"//" \
| while read line ; do
url=$(echo $line | grep -o "http.*\">" | cut -d '"' -f 1)
# Check if post/entry/article already exist
[ ! -z "$(grep "$url" "$dir/feeds/$id.xml")" ] && echo "Entry already exists."
# Proceed if doesn't exist
[ -z "$(grep "$url" "$dir/feeds/$id.xml")" ] && curl -s $url > $tmp \
&& title=$(cat $tmp | shup "title" | sed "s/<title>//" | sed "s/<\/title>//" | sed "s/^\s*//" | grep "\S") \
&& pubdate=$(cat $tmp | grep "<meta\sproperty=\"article:published_time\"" | grep -o "[0-9]*-[0-9]*-[0-9]*T.*0[0-9]:00" | xargs -i date -d {} -R) \
&& content=$(cat $tmp | shup "body" "div" "main" "div" "div" "div" "div" "article" "div[entry-content]" | grep "\S" | sed "s/^\s*//" | awk '{printf("%s", $0)}') \
&& printf "<!-- content -->\n<item>\n<title>%s</title>\n<pubDate>%s</pubDate>\n<guid>%s</guid>\n<link>%s</link>\n<description><![CDATA[%s]]></description>\n</item>\n" "$title" "$pubdate" "$url" "$url" "$content" > $tmp \
&& sed -i "/<!-- content -->/ {
r $tmp
d
}" "$dir/feeds/$id.xml" \
&& echo "Entry inserted."
done
}