added -p option to re-process all files; fixed an issue that was preventing updating a list when it is empty; various minor enhancements for the web site

This commit is contained in:
Costa Tsaousis 2015-08-10 12:59:53 +03:00
parent 4a12797435
commit 337575577c

@ -264,6 +264,10 @@ CACHE_DIR="/var/lib/update-ipsets"
# where is the web url to show info about each ipset
# the ipset name is appended to it
WEB_URL="http://ktsaou.github.io/blocklist-ipsets/?ipset="
WEB_URL2="https://ktsaou.github.io/blocklist-ipsets/?ipset="
GITHUB_LOCAL_COPY_URL="https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/"
GITHUB_CHANGES_URL="https://github.com/ktsaou/blocklist-ipsets/commits/master/"
# options to be given to iprange for reducing netsets
IPSET_REDUCE_FACTOR="20"
@ -279,6 +283,7 @@ PUSH_TO_GIT=0
ENABLE_ALL=0
IGNORE_LASTCHECKED=0
FORCE_WEB_REBUILD=0
REPROCESS_ALL=0
SILENT=0
VERBOSE=0
CONFIG_FILE="/etc/firehol/update-ipsets.conf"
@ -286,6 +291,7 @@ while [ ! -z "${1}" ]
do
case "${1}" in
--rebuild|-r) FORCE_WEB_REBUILD=1;;
--reprocess|-p) REPROCESS_ALL=1;;
--silent|-s) SILENT=1;;
--push-git|-g) PUSH_TO_GIT=1;;
--recheck|-i) IGNORE_LASTCHECKED=1;;
@ -491,7 +497,13 @@ commit_to_git() {
echo >&2
syslog "Committing ${to_be_pushed[@]} to git repository"
git commit "${to_be_pushed[@]}" set_file_timestamps.sh -m "`date -u` update"
local date="$(date -u)"
# we commit each file alone, to have a clear history per file in github
for d in "${to_be_pushed[@]}" set_file_timestamps.sh
do
echo "${d}..."
git commit "${d}" -m "${date} update"
done
if [ ${PUSH_TO_GIT} -ne 0 ]
then
@ -841,6 +853,14 @@ ipset_json() {
info=$(echo "${info}" | sed "s/)/)\n/g" | sed "s|\[\(.*\)\](\(.*\))|<a href=\"\2\">\1</a>|g" | tr "\n" " ")
info="${info//\"/\\\"}"
local file_local=
local commit_history=
if [ -z "${DO_NOT_REDISTRIBUTE[${IPSET_FILE[${ipset}]}]}" ]
then
file_local="${GITHUB_LOCAL_COPY_URL}${IPSET_FILE[${ipset}]}"
commit_history="${GITHUB_CHANGES_URL}${IPSET_FILE[${ipset}]}"
fi
cat <<EOFJSON
{
"name": "${ipset}",
@ -861,7 +881,9 @@ ipset_json() {
"history": "${ipset}_history.csv",
"geolite2": "${geolite2}",
"ipdeny": "${ipdeny}",
"comparison": "${comparison}"
"comparison": "${comparison}",
"file_local": "${file_local}",
"commit_history": "${commit_history}"
}
EOFJSON
}
@ -1080,6 +1102,8 @@ params_sort() {
}
update_web() {
local sitemap_date="$(date -I)"
[ -z "${WEB_DIR}" -o ! -d "${WEB_DIR}" ] && return 1
[ "${#UPDATED_SETS[@]}" -eq 0 -a ! ${FORCE_WEB_REBUILD} -eq 1 ] && return 1
@ -1167,14 +1191,35 @@ update_web() {
printf >>"${RUN_DIR}/all-ipsets.json" ",\n \"${x}\""
fi
cat >>"${RUN_DIR}/sitemap.xml" <<EOFSITEMAP
cat >>"${RUN_DIR}/sitemap.xml" <<EOFSITEMAP1
<url>
<loc>${WEB_URL}${x}</loc>
<lastmod>$(date -I)</lastmod>
<lastmod>${sitemap_date}</lastmod>
<changefreq>always</changefreq>
</url>
EOFSITEMAP
EOFSITEMAP1
if [ ! -z "${WEB_URL2}" ]
then
cat >>"${RUN_DIR}/sitemap.xml" <<EOFSITEMAP2
<url>
<loc>${WEB_URL2}${x}</loc>
<lastmod>${sitemap_date}</lastmod>
<changefreq>always</changefreq>
</url>
EOFSITEMAP2
fi
if [ -z "${DO_NOT_REDISTRIBUTE[${IPSET_FILE[$x]}]}" ]
then
cat >>"${RUN_DIR}/sitemap.xml" <<EOFSITEMAP3
<url>
<loc>${GITHUB_LOCAL_COPY_URL}${x}</loc>
<lastmod>${sitemap_date}</lastmod>
<changefreq>always</changefreq>
</url>
EOFSITEMAP3
fi
fi
done
printf >>"${RUN_DIR}/all-ipsets.json" "\n]\n"
@ -1436,7 +1481,7 @@ finalize() {
# compare the new and the old
diff -q "${tmp}.old" "${tmp}" >/dev/null 2>&1
if [ $? -eq 0 ]
if [ $? -eq 0 -a ${REPROCESS_ALL} -eq 0 ]
then
# they are the same
rm "${tmp}" "${tmp}.old"
@ -1499,21 +1544,28 @@ finalize() {
#
# ${ipv} hash:${hash} ipset
#
`echo "${info}" | fold -w 60 -s | sed "s/^/# /g"`
#
# Source URL: ${url}
`echo "${info}" | sed "s|](|] (|g" | fold -w 60 -s | sed "s/^/# /g"`
#
# Maintainer : ${maintainer}
# Maintainer URL : ${maintainer_url}
# List source URL : ${url}
# Source File Date: `date -r "${src}" -u`
#
# Category : ${category}
#
# This File Date : `date -u`
# Update Frequency: `mins_to_text ${mins}`
# Aggregation : `mins_to_text ${history_mins}`
# Entries : ${quantity}
# Category : ${category}
#
# Maintainer : ${maintainer}
# Maintainer URL : ${maintainer_url}
# Full list analysis, including geolocation map, history,
# retention policy, overlaps with other lists, etc.
# available at:
#
# ${WEB_URL}${ipset}
#
# Generated by FireHOL's update-ipsets.sh
# Processed with FireHOL's iprange
#
EOFHEADER
@ -1642,7 +1694,7 @@ update() {
if [ $? -eq ${DOWNLOAD_FAILED} -o \( $? -eq ${DOWNLOAD_NOT_UPDATED} -a -f "${install}.${hash}net" \) ]
then
if [ ! -s "${install}.source" ]; then return 1
elif [ -f "${install}.${hash}set" ]
elif [ -f "${install}.${hash}set" -a ${REPROCESS_ALL} -eq 0 ]
then
check_file_too_old "${ipset}" "${install}.${hash}set"
return 1
@ -1665,7 +1717,7 @@ update() {
fi
# check if the source file has been updated
if [ ! "${install}.source" -nt "${install}.${hash}set" ]
if [ ${REPROCESS_ALL} -eq 0 -a ! "${install}.source" -nt "${install}.${hash}set" ]
then
echo >&2 "${ipset}: not updated - no reason to process it again."
check_file_too_old "${ipset}" "${install}.${hash}set"
@ -1680,12 +1732,11 @@ update() {
${pre_filter} |\
${filter} |\
${post_filter} |\
${post_filter2} |\
sort -u >"${tmp}"
${post_filter2} >"${tmp}"
if [ $? -ne 0 ]
then
syslog "${ipset}: failed to convert file."
syslog "${ipset}: failed to convert file (processor: ${processor}, pre_filter: ${pre_filter}, filter: ${filter}, post_filter: ${post_filter}, post_filter2: ${post_filter2})."
rm "${tmp}"
check_file_too_old "${ipset}" "${install}.${hash}set"
return 1
@ -1843,6 +1894,10 @@ rename_ipset clean_mx_viruses cleanmx_viruses
# INTERNAL FILTERS
# all these should be used with pipes
# grep and egrep return 1 if they match nothing
# this will break the filters if the source is empty
# so we make them return 0 always
# match a single IPv4 IP
# zero prefix is not permitted 0 - 255, not 000, 010, etc
IP4_MATCH="(((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.){3}(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))"
@ -1852,19 +1907,19 @@ MK4_MATCH="(3[12]|[12][0-9]|[1-9])"
# strict checking of IPv4 IPs - all subnets excluded
# we remove /32 before matching
filter_ip4() { remove_slash32 | egrep "^${IP4_MATCH}$"; }
filter_ip4() { remove_slash32 | egrep "^${IP4_MATCH}$"; return 0; }
# strict checking of IPv4 CIDRs, except /32
# this is to support older ipsets that do not accept /32 in hash:net ipsets
filter_net4() { remove_slash32 | egrep "^${IP4_MATCH}/${MK4_MATCH}$"; }
filter_net4() { remove_slash32 | egrep "^${IP4_MATCH}/${MK4_MATCH}$"; return 0; }
# strict checking of IPv4 IPs or CIDRs
# hosts may or may not have /32
filter_all4() { egrep "^${IP4_MATCH}(/${MK4_MATCH})?$"; }
filter_all4() { egrep "^${IP4_MATCH}(/${MK4_MATCH})?$"; return 0; }
filter_ip6() { remove_slash128 | egrep "^([0-9a-fA-F:]+)$"; }
filter_net6() { remove_slash128 | egrep "^([0-9a-fA-F:]+/[0-9]+)$"; }
filter_all6() { egrep "^([0-9a-fA-F:]+(/[0-9]+)?)$"; }
filter_ip6() { remove_slash128 | egrep "^([0-9a-fA-F:]+)$"; return 0; }
filter_net6() { remove_slash128 | egrep "^([0-9a-fA-F:]+/[0-9]+)$"; return 0; }
filter_all6() { egrep "^([0-9a-fA-F:]+(/[0-9]+)?)$"; return 0; }
remove_slash32() { sed "s|/32$||g"; }
remove_slash128() { sed "s|/128$||g"; }
@ -1883,6 +1938,7 @@ append_slash128() {
filter_invalid4() {
egrep -v "^(0\.0\.0\.0|.*/0)$"
return 0
}
@ -2141,7 +2197,8 @@ geolite2_country() {
download_manager "${ipset}" "${mins}" "${url}"
if [ $? -eq ${DOWNLOAD_FAILED} -o $? -eq ${DOWNLOAD_NOT_UPDATED} ]
then
[ -d ${ipset} -o ! -s "${ipset}.source" ] && return 1
[ ! -s "${ipset}.source" ] && return 1
[ -d ${ipset} -a ${REPROCESS_ALL} -eq 0 ] && return 1
fi
# create a temp dir
@ -2282,7 +2339,8 @@ ipdeny_country() {
download_manager "${ipset}" "${mins}" "${url}"
if [ $? -eq ${DOWNLOAD_FAILED} -o $? -eq ${DOWNLOAD_NOT_UPDATED} ]
then
[ -d ${ipset} -o ! -s "${ipset}.source" ] && return 1
[ ! -s "${ipset}.source" ] && return 1
[ -d ${ipset} -a ${REPROCESS_ALL} -eq 0 ] && return 1
fi
# create a temp dir