firehol/sbin/iprange.c

/* iprange
 *
 * FireHOL - A firewall for humans...
 *
 * FireHOL Copyright
 *
 *      Copyright (C) 2003-2015 Costa Tsaousis <costa@tsaousis.gr>
 *      Copyright (C) 2012-2015 Phil Whineray <phil@sanewall.org>
 *
 * Original iprange.c Copyright:
 *
 *      Copyright (C) 2003 Gabriel L. Somlo
 *
 *      comment by Costa Tsaousis:
 *      An excellent work by Gabriel Somlo for loading and merging CIDRs.
 *      I have built all the features this tool provides on top of the
 *      (still) almost untouched original source.
 *
 *  License
 *
 *      This program is free software; you can redistribute it and/or modify
 *      it under the terms of the GNU General Public License as published by
 *      the Free Software Foundation; either version 2 of the License, or
 *      (at your option) any later version.
 *
 *      This program is distributed in the hope that it will be useful,
 *      but WITHOUT ANY WARRANTY; without even the implied warranty of
 *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *      GNU General Public License for more details.
 *
 *      You should have received a copy of the GNU General Public License
 *      along with this program. If not, see <http://www.gnu.org/licenses/>.
 *
 *      See the file COPYING for details.
 *
 * To compile:
 *  on Linux:
 *   gcc -o iprange iprange.c -O2 -Wall
 *  on Solaris 8, Studio 8 CC:
 *   cc -xO5 -xarch=v8plusa -xdepend iprange.c -o iprange -lnsl -lresolv
 *
 * CHANGELOG:
 *  2003 Gabriel L. Somlo, the original author of iprange.c core
 *   - found at http://www.cs.colostate.edu/~somlo/iprange.c
 *  2004-10-16 Paul Townsend (alpha alpha beta at purdue dot edu)
 *   - more general input/output formatting
 *  2015-05-31 Costa Tsaousis (costa@tsaousis.gr)
 *   - added -C option to report count of unique IPs
 *   - some optimizations to speed it up by 10% - 20%
 *  2015-06-06 Costa Tsaousis (costa@tsaousis.gr)
 *   - added support for loading multiple sets
 *   - added support for merging multiple files
 *   - added support for comparing ipsets (all-to-all, one-to-all)
 *   - added support for parsing IP ranges from the input file
 *     (much like -s did for a single range)
 *   - added support for parsing netmasks
 *   - added support for min prefix generated
 *   - added support for generated only specific prefixes
 *   - added support for reducing the prefixes for iptables ipsets
 *   - the output is now always optimized (reduced / merged)
 *   - removed option -s (convert a single IP range to CIDR)
 *   - added support for finding the common IPs in multiple files
 *   - added timings
 *   - added verbose output
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/time.h>

#ifdef __GNUC__
// gcc branch optimization
// #warning "Using GCC branch optimizations"
#define likely(x)       __builtin_expect(!!(x), 1)
#define unlikely(x)     __builtin_expect(!!(x), 0)
#else
#define likely(x)       (x)
#define unlikely(x)     (x)
#endif

// if set, use MODE_COMMON to compare files
// this is 20 times faster than MODE COMBINE
#define COMPARE_WITH_COMMON 1

#define BINARY_HEADER_V10 "iprange binary format v1.0\n"
uint32_t endianess = 0x1A2B3C4D;

static char *PROG;
int debug = 0;
int cidr_use_network = 1;
int default_prefix = 32;

char *print_prefix_ips  = "";
char *print_prefix_nets = "";
char *print_suffix_ips  = "";
char *print_suffix_nets = "";

/*---------------------------------------------------------------------*/
/* network address type: one field for the net address, one for prefix */
/*---------------------------------------------------------------------*/
typedef struct network_addr {
	in_addr_t addr;
	in_addr_t broadcast;
} network_addr_t;

/*------------------------------------------------------------------*/
/* Set a bit to a given value (0 or 1); MSB is bit 1, LSB is bit 32 */
/*------------------------------------------------------------------*/
static inline in_addr_t set_bit(in_addr_t addr, int bitno, int val)
{

	if (val)
		return (addr | (1 << (32 - bitno)));
	else
		return (addr & ~(1 << (32 - bitno)));

}				/* set_bit() */

/*--------------------------------------*/
/* Compute netmask address given prefix */
/*--------------------------------------*/
static inline in_addr_t netmask(int prefix)
{

	if (prefix == 0)
		return (~((in_addr_t) - 1));
	else
		return (~((1 << (32 - prefix)) - 1));

}				/* netmask() */

/*----------------------------------------------------*/
/* Compute broadcast address given address and prefix */
/*----------------------------------------------------*/
static inline in_addr_t broadcast(in_addr_t addr, int prefix)
{

	return (addr | ~netmask(prefix));

}				/* broadcast() */

/*--------------------------------------------------*/
/* Compute network address given address and prefix */
/*--------------------------------------------------*/
static inline in_addr_t network(in_addr_t addr, int prefix)
{

	return (addr & netmask(prefix));

}				/* network() */

/*------------------------------------------------*/
/* Print out a 32-bit address in A.B.C.D/M format */
/*------------------------------------------------*/

int prefix_counters[33] = { 0 };
int prefix_enabled[33] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
int split_range_disable_printing = 0;

static inline void print_addr(in_addr_t addr, int prefix)
{

	if(likely(prefix >= 0 && prefix <= 32))
		prefix_counters[prefix]++;

	if(unlikely(split_range_disable_printing)) return;

	struct in_addr in;

	in.s_addr = htonl(addr);

	if (prefix < 32)
		printf("%s%s/%d%s\n", print_prefix_nets, inet_ntoa(in), prefix, print_suffix_nets);
	else
		printf("%s%s%s\n", print_prefix_ips, inet_ntoa(in), print_suffix_ips);

}				/* print_addr() */

/*------------------------------------------------------------*/
/* Recursively compute network addresses to cover range lo-hi */
/*------------------------------------------------------------*/
/* Note: Worst case scenario is when lo=0.0.0.1 and hi=255.255.255.254
 *       We then have 62 CIDR bloks to cover this interval, and 125
 *       calls to split_range();
 *       The maximum possible recursion depth is 32.
 */

static inline void split_range(in_addr_t addr, int prefix, in_addr_t lo, in_addr_t hi)
{

	in_addr_t bc, lower_half, upper_half;

	if (unlikely((prefix < 0) || (prefix > 32))) {
		fprintf(stderr, "%s: Invalid mask size %d!\n", PROG, prefix);
		return;
	}

	bc = broadcast(addr, prefix);

	if (unlikely((lo < addr) || (hi > bc))) {
		fprintf(stderr, "%s: Out of range limits: %x, %x for "
			"network %x/%d, broadcast: %x!\n", PROG, lo, hi, addr, prefix, bc);
		return;
	}

	if ((lo == addr) && (hi == bc) && prefix_enabled[prefix]) {
		print_addr(addr, prefix);
		return;
	}

	prefix++;
	lower_half = addr;
	upper_half = set_bit(addr, prefix, 1);

	if (hi < upper_half) {
		split_range(lower_half, prefix, lo, hi);
	} else if (lo >= upper_half) {
		split_range(upper_half, prefix, lo, hi);
	} else {
		split_range(lower_half, prefix, lo, broadcast(lower_half, prefix));
		split_range(upper_half, prefix, upper_half, hi);
	}

}				/* split_range() */

/*-----------------------------------------------------------*/
/* Convert an A.B.C.D address into a 32-bit host-order value */
/*-----------------------------------------------------------*/
static inline in_addr_t a_to_hl(char *ipstr) {
	struct in_addr in;

	if (unlikely(!inet_aton(ipstr, &in))) {
		fprintf(stderr, "%s: Invalid address %s. Reason: %s\n", PROG, ipstr, strerror(errno));
		in.s_addr = 0;
		return (ntohl(in.s_addr));
	}

	return (ntohl(in.s_addr));

}				/* a_to_hl() */

/*-----------------------------------------------------------------*/
/* convert a network address char string into a host-order network */
/* address and an integer prefix value                             */
/*-----------------------------------------------------------------*/
static inline network_addr_t str_to_netaddr(char *ipstr) {

	long int prefix = default_prefix;
	char *prefixstr;
	network_addr_t netaddr;

	if ((prefixstr = strchr(ipstr, '/'))) {
		*prefixstr = '\0';
		prefixstr++;
		errno = 0;
		prefix = strtol(prefixstr, (char **)NULL, 10);
		if (unlikely(errno || (*prefixstr == '\0') || (prefix < 0) || (prefix > 32))) {
			// try the netmask format
			in_addr_t mask = ~a_to_hl(prefixstr);
			//fprintf(stderr, "mask is %u (0x%08x)\n", mask, mask);
			prefix = 32;
			while((likely(mask & 0x00000001))) {
				mask >>= 1;
				prefix--;
			}

			if(unlikely(mask)) {
				fprintf(stderr, "%s: Invalid netmask %s (calculated prefix = %ld, remaining = 0x%08x)\n", PROG, prefixstr, prefix, mask << (32 - prefix));
				netaddr.addr = 0;
				netaddr.broadcast = 0;
				return (netaddr);
			}
		}
	}

	if(likely(cidr_use_network))
		netaddr.addr = network(a_to_hl(ipstr), prefix);
	else
		netaddr.addr = a_to_hl(ipstr);

	netaddr.broadcast = broadcast(netaddr.addr, prefix);

	return (netaddr);

}				/* str_to_netaddr() */

/*----------------------------------------------------------*/
/* compare two network_addr_t structures; used with qsort() */
/* sort in increasing order by address, then by prefix.     */
/*----------------------------------------------------------*/
int compar_netaddr(const void *p1, const void *p2)
{

	network_addr_t *na1 = (network_addr_t *) p1, *na2 = (network_addr_t *) p2;

	if (na1->addr < na2->addr)
		return (-1);
	if (na1->addr > na2->addr)
		return (1);
	if (na1->broadcast > na2->broadcast)
		return (-1);
	if (na1->broadcast < na2->broadcast)
		return (1);
	return (0);

}				/* compar_netaddr() */

/*------------------------------------------------------*/
/* Print out an address range in a.b.c.d-A.B.C.D format */
/*------------------------------------------------------*/
static inline void print_addr_range(in_addr_t lo, in_addr_t hi)
{

	struct in_addr in;

	if (likely(lo != hi)) {
		in.s_addr = htonl(lo);
		printf("%s%s-", print_prefix_nets, inet_ntoa(in));
		in.s_addr = htonl(hi);
		printf("%s%s\n", inet_ntoa(in), print_suffix_nets);
	}
	else {
		in.s_addr = htonl(hi);
		printf("%s%s%s\n", print_prefix_ips, inet_ntoa(in), print_suffix_ips);
	}
}				/* print_addr_range() */


// ----------------------------------------------------------------------------

#define NETADDR_INC 1024
#define MAX_LINE 1024

#define IPSET_FLAG_OPTIMIZED 	0x00000001

typedef struct ipset {
	char filename[FILENAME_MAX+1];
	// char name[FILENAME_MAX+1];

	unsigned long int lines;
	unsigned long int entries;
	unsigned long int entries_max;
	unsigned long int unique_ips;		// this is updated only after calling ipset_optimize()

	uint32_t flags;

	struct ipset *next;
	struct ipset *prev;

	network_addr_t *netaddrs;
} ipset;


/* ----------------------------------------------------------------------------
 * ipset_create()
 *
 * create an empty ipset with the given name and free entries in its array
 *
 */

static inline ipset *ipset_create(const char *filename, int entries) {
	if(entries < NETADDR_INC) entries = NETADDR_INC;

	ipset *ips = malloc(sizeof(ipset));
	if(unlikely(!ips)) return NULL;

	ips->netaddrs = malloc(entries * sizeof(network_addr_t));
	if(unlikely(!ips->netaddrs)) {
		free(ips);
		return NULL;
	}

	ips->lines = 0;
	ips->entries = 0;
	ips->entries_max = entries;
	ips->unique_ips = 0;
	ips->next = NULL;
	ips->prev = NULL;
	ips->flags = 0;

	strncpy(ips->filename, (filename && *filename)?filename:"stdin", FILENAME_MAX);
	ips->filename[FILENAME_MAX] = '\0';

	//strcpy(ips->name, ips->filename);

	return ips;
}


/* ----------------------------------------------------------------------------
 * ipset_free()
 *
 * release the memory of an ipset and re-link its siblings so that lingage will
 * be consistent
 *
 */

static inline void ipset_free(ipset *ips) {
	if(ips->next) ips->next->prev = ips->prev;
	if(ips->prev) ips->prev->next = ips->next;

	free(ips->netaddrs);
	free(ips);
}


/* ----------------------------------------------------------------------------
 * ipset_free_all()
 *
 * release all the memory occupied by all ipsets linked together (prev, next)
 *
 */

static inline void ipset_free_all(ipset *ips) {
	if(ips->prev) {
		ips->prev->next = NULL;
		ipset_free_all(ips->prev);
	}

	if(ips->next) {
		ips->next->prev = NULL;
		ipset_free_all(ips->next);
	}

	ipset_free(ips);
}


/* ----------------------------------------------------------------------------
 * ipset_expand()
 *
 * exprand the ipset so that it will have at least the given number of free
 * entries in its internal array
 *
 */

static inline void ipset_expand(ipset *ips, unsigned long int free_entries_needed) {
	if(unlikely(!free_entries_needed)) free_entries_needed = 1;

	if(unlikely(ips && (ips->entries_max - ips->entries) < free_entries_needed)) {
		ips->entries_max += (free_entries_needed < NETADDR_INC)?NETADDR_INC:free_entries_needed;

		network_addr_t *n = realloc(ips->netaddrs, ips->entries_max * sizeof(network_addr_t));
		if(unlikely(!n)) {
			fprintf(stderr, "%s: Cannot re-allocate memory (%ld bytes)\n", PROG, ips->entries_max * sizeof(network_addr_t));
			exit(1);
		}
		ips->netaddrs = n;
	}
}

static inline void ipset_added_entry(ipset *ips) {
	register unsigned long entries = ips->entries;

	ips->lines++;
	ips->unique_ips += ips->netaddrs[entries].broadcast - ips->netaddrs[entries].addr + 1;

	if(likely(ips->flags & IPSET_FLAG_OPTIMIZED && entries > 0)) {
		// the new is just next to the last
		if(unlikely(ips->netaddrs[entries].addr == (ips->netaddrs[entries - 1].broadcast + 1))) {
			ips->netaddrs[entries - 1].broadcast = ips->netaddrs[entries].broadcast;
			return;
		}

		// the new is after the end of the last
		if(likely(ips->netaddrs[entries].addr > ips->netaddrs[entries - 1].broadcast)) {
			ips->entries++;
			return;
		}

		// the new is before the beginning of the last
		ips->flags ^= IPSET_FLAG_OPTIMIZED;

		if(unlikely(debug)) {
			in_addr_t new_from = ips->netaddrs[ips->entries].addr;
			in_addr_t new_to = ips->netaddrs[ips->entries].broadcast;

			in_addr_t last_from = ips->netaddrs[ips->entries - 1].addr;
			in_addr_t last_to = ips->netaddrs[ips->entries - 1].broadcast;

			struct in_addr nf, nt, lf, lt;
			nf.s_addr = htonl(new_from);
			nt.s_addr = htonl(new_to);
			lf.s_addr = htonl(last_from);
			lt.s_addr = htonl(last_to);

			fprintf(stderr, "%s: NON-OPTIMIZED %s at line %lu, entry %lu, last was %s (%u) - ", PROG, ips->filename, ips->lines, ips->entries, inet_ntoa(lf), last_from);
			fprintf(stderr, "%s (%u), new is ", inet_ntoa(lt), last_to);
			fprintf(stderr, "%s (%u) - ", inet_ntoa(nf), new_from);
			fprintf(stderr, "%s (%u)\n", inet_ntoa(nt), new_to);
		}
	}

	ips->entries++;
}

/* ----------------------------------------------------------------------------
 * ipset_add_ipstr()
 *
 * add a single IP entry to an ipset, by parsing the given IP string
 *
 */

static inline void ipset_add_ipstr(ipset *ips, char *ipstr) {
	ipset_expand(ips, 1);

	ips->netaddrs[ips->entries] = str_to_netaddr(ipstr);
	ipset_added_entry(ips);

}


/* ----------------------------------------------------------------------------
 * ipset_add()
 *
 * add an IP entry (from - to) to the ipset given
 *
 */

static inline void ipset_add(ipset *ips, in_addr_t from, in_addr_t to) {
	ipset_expand(ips, 1);

	ips->netaddrs[ips->entries].addr = from;
	ips->netaddrs[ips->entries].broadcast = to;
	ipset_added_entry(ips);

}


/* ----------------------------------------------------------------------------
 * ipset_optimize()
 *
 * takes an ipset with any number of entries (lo-hi pairs) in any order and
 * it optimizes it in place
 * after this optimization, all entries in the ipset are sorted (ascending)
 * and non-overlapping (it returns less or equal number of entries)
 *
 */

static inline void ipset_optimize(ipset *ips) {
	if(unlikely(ips->flags & IPSET_FLAG_OPTIMIZED)) {
		fprintf(stderr, "%s: Is already optimized %s\n", PROG, ips->filename);
		return;
	}

	if(unlikely(debug)) fprintf(stderr, "%s: Optimizing %s\n", PROG, ips->filename);

	// sort it
	qsort((void *)ips->netaddrs, ips->entries, sizeof(network_addr_t), compar_netaddr);

	// optimize it in a new space
	network_addr_t *naddrs = malloc(ips->entries * sizeof(network_addr_t));
	if(unlikely(!naddrs)) {
		ipset_free(ips);
		fprintf(stderr, "%s: Cannot allocate memory (%ld bytes)\n", PROG, ips->entries * sizeof(network_addr_t));
		exit(1);
	}

	int i, n = ips->entries, lines = ips->lines;

	network_addr_t *oaddrs = ips->netaddrs;
	ips->netaddrs = naddrs;
	ips->entries = 0;
	ips->unique_ips = 0;
	ips->lines = 0;

	if(!n) return;

	in_addr_t lo = oaddrs[0].addr, hi = oaddrs[0].broadcast;
	for (i = 1; i < n; i++) {
		// if the broadcast of this
		// is before the broadcast of the last
		// then skip it = it fits entirely inside the current
		if (oaddrs[i].broadcast <= hi)
			continue;

		// if the network addr of this
		// overlaps or is adjustent to the last
		// then merge it = extent the broadcast of the last
		if (oaddrs[i].addr <= hi + 1) {
			hi = oaddrs[i].broadcast;
			continue;
		}

		// at this point we are sure the old lo, hi
		// do not overlap and are not adjustent to the current
		// so, add the last to the new set
		ipset_add(ips, lo, hi);

		// prepare for the next loop
		lo = oaddrs[i].addr;
		hi = oaddrs[i].broadcast;
	}
	ipset_add(ips, lo, hi);
	ips->lines = lines;

	ips->flags |= IPSET_FLAG_OPTIMIZED;

	free(oaddrs);
}

unsigned long int ipset_unique_ips(ipset *ips) {
	if(unlikely(!(ips->flags & IPSET_FLAG_OPTIMIZED)))
		ipset_optimize(ips);

	return(ips->unique_ips);
}

/* ----------------------------------------------------------------------------
 * ipset_optimize_all()
 *
 * it calls ipset_optimize() for all ipsets linked to 'next' to the given
 *
 */

static inline void ipset_optimize_all(ipset *root) {
	ipset *ips;

	for(ips = root; ips ;ips = ips->next)
		ipset_optimize(ips);
}


/* ----------------------------------------------------------------------------
 * ipset_common()
 *
 * it takes 2 ipsets - THEY MUST BE OPTIMIZED ALREADY (ipset_optimize())
 * it returns 1 new ipset having all the IPs common to both ipset given
 *
 * the result is optimized
 */

static inline ipset *ipset_common(ipset *ips1, ipset *ips2) {
	if(unlikely(!(ips1->flags & IPSET_FLAG_OPTIMIZED)))
		ipset_optimize(ips1);

	if(unlikely(!(ips2->flags & IPSET_FLAG_OPTIMIZED)))
		ipset_optimize(ips2);

	if(unlikely(debug)) fprintf(stderr, "%s: Finding common IPs in %s and %s\n", PROG, ips1->filename, ips2->filename);

	ipset *ips = ipset_create("common", 0);
	if(unlikely(!ips)) return NULL;

	unsigned long int
		n1 = ips1->entries,
		n2 = ips2->entries,
		i1 = 0,
		i2 = 0;

	in_addr_t
		lo1 = ips1->netaddrs[0].addr,
		lo2 = ips2->netaddrs[0].addr,
		hi1 = ips1->netaddrs[0].broadcast,
		hi2 = ips2->netaddrs[0].broadcast,
		lo, hi;

	while(i1 < n1 && i2 < n2) {
		if(lo1 > hi2) {
			i2++;
			if(i2 < n2) {
				lo2 = ips2->netaddrs[i2].addr;
				hi2 = ips2->netaddrs[i2].broadcast;
			}
			continue;
		}

		if(lo2 > hi1) {
			i1++;
			if(i1 < n1) {
				lo1 = ips1->netaddrs[i1].addr;
				hi1 = ips1->netaddrs[i1].broadcast;
			}
			continue;
		}

		// they overlap

		if(lo1 > lo2) lo = lo1;
		else lo = lo2;

		if(hi1 < hi2) {
			hi = hi1;
			i1++;
			if(i1 < n1) {
				lo1 = ips1->netaddrs[i1].addr;
				hi1 = ips1->netaddrs[i1].broadcast;
			}
		}
		else {
			hi = hi2;
			i2++;
			if(i2 < n2) {
				lo2 = ips2->netaddrs[i2].addr;
				hi2 = ips2->netaddrs[i2].broadcast;
			}
		}

		ipset_add(ips, lo, hi);
	}

	ips->lines = ips1->lines + ips2->lines;
	ips->flags |= IPSET_FLAG_OPTIMIZED;

	return ips;
}


/* ----------------------------------------------------------------------------
 * ipset_exclude()
 *
 * it takes 2 ipsets (ips1, ips2) - THEY MUST BE OPTIMIZED ALREADY (ipset_optimize())
 * it returns 1 new ipset having all the IPs of ips1, excluding the IPs of ips2
 *
 * the result is optimized
 */

static inline ipset *ipset_exclude(ipset *ips1, ipset *ips2) {
	if(unlikely(!(ips1->flags & IPSET_FLAG_OPTIMIZED)))
		ipset_optimize(ips1);

	if(unlikely(!(ips2->flags & IPSET_FLAG_OPTIMIZED)))
		ipset_optimize(ips2);

	if(unlikely(debug)) fprintf(stderr, "%s: Removing IPs in %s from %s\n", PROG, ips2->filename, ips1->filename);

	ipset *ips = ipset_create(ips1->filename, 0);
	if(unlikely(!ips)) return NULL;

	unsigned long int
		n1 = ips1->entries,
		n2 = ips2->entries,
		i1 = 0,
		i2 = 0;

	in_addr_t
		lo1 = ips1->netaddrs[0].addr,
		lo2 = ips2->netaddrs[0].addr,
		hi1 = ips1->netaddrs[0].broadcast,
		hi2 = ips2->netaddrs[0].broadcast;

	while(i1 < n1 && i2 < n2) {
		if(lo1 > hi2) {
			i2++;
			if(i2 < n2) {
				lo2 = ips2->netaddrs[i2].addr;
				hi2 = ips2->netaddrs[i2].broadcast;
			}
			continue;
		}

		if(lo2 > hi1) {
			ipset_add(ips, lo1, hi1);

			i1++;
			if(i1 < n1) {
				lo1 = ips1->netaddrs[i1].addr;
				hi1 = ips1->netaddrs[i1].broadcast;
			}
			continue;
		}

		// they overlap

		if(lo1 < lo2) {
			ipset_add(ips, lo1, lo2-1);
			lo1 = lo2;
		}

		if(hi1 == hi2) {
			i1++;
			if(i1 < n1) {
				lo1 = ips1->netaddrs[i1].addr;
				hi1 = ips1->netaddrs[i1].broadcast;
			}

			i2++;
			if(i2 < n2) {
				lo2 = ips2->netaddrs[i2].addr;
				hi2 = ips2->netaddrs[i2].broadcast;
			}
		}
		else if(hi1 < hi2) {
			i1++;
			if(i1 < n1) {
				lo1 = ips1->netaddrs[i1].addr;
				hi1 = ips1->netaddrs[i1].broadcast;
			}
		}
		else if(hi1 > hi2) {
			lo1 = hi2 + 1;
			i2++;
			if(i2 < n2) {
				lo2 = ips2->netaddrs[i2].addr;
				hi2 = ips2->netaddrs[i2].broadcast;
			}
		}
	}

	if(i1 < n1) {
		ipset_add(ips, lo1, hi1);
		i1++;

		// if there are entries left in ips1, copy them
		while(i1 < n1) {
			ipset_add(ips, ips1->netaddrs[i1].addr, ips1->netaddrs[i1].broadcast);
			i1++;
		}
	}

	ips->lines = ips1->lines + ips2->lines;
	ips->flags |= IPSET_FLAG_OPTIMIZED;
	return ips;
}


/* ----------------------------------------------------------------------------
 * parse_line()
 *
 * it parses a single line of input
 * returns
 * 		-1 = cannot parse line
 * 		 0 = skip line - nothing useful here
 * 		 1 = parsed 1 ip address
 * 		 2 = parsed 2 ip addresses
 *
 */

static inline int parse_line(char *line, int lineid, char *ipstr, char *ipstr2, int len) {
	char *s = line;

	// skip all spaces
	while(unlikely(*s == ' ' || *s == '\t')) s++;

	// skip a line of comment
	if(unlikely(*s == '#' || *s == ';')) return 0;

	// if we reached the end of line
	if(unlikely(*s == '\r' || *s == '\n' || *s == '\0')) return 0;

	// get the ip address
	int i = 0;
	while(likely(i < len && ((*s >= '0' && *s <= '9') || *s == '.' || *s == '/')))
		ipstr[i++] = *s++;

	if(unlikely(!i)) return -1;

	// terminate ipstr
	ipstr[i] = '\0';

	// skip all spaces
	while(unlikely(*s == ' ' || *s == '\t')) s++;

	// the rest is comment
	if(unlikely(*s == '#' || *s == ';')) return 1;

	// if we reached the end of line
	if(likely(*s == '\r' || *s == '\n' || *s == '\0')) return 1;

	if(unlikely(*s != '-')) {
		fprintf(stderr, "%s: Ignoring text on line %d, expected a - after %s, but found '%s'\n", PROG, lineid, ipstr, s);
		return 1;
	}

	// skip the -
	s++;

	// skip all spaces
	while(unlikely(*s == ' ' || *s == '\t')) s++;

	// the rest is comment
	if(unlikely(*s == '#' || *s == ';')) {
		fprintf(stderr, "%s: Ignoring text on line %d, expected an ip address after -, but found '%s'\n", PROG, lineid, s);
		return 1;
	}

	// if we reached the end of line
	if(unlikely(*s == '\r' || *s == '\n' || *s == '\0')) {
		fprintf(stderr, "%s: Incomplete range on line %d, expected an ip address after -, but line ended\n", PROG, lineid);
		return 1;
	}

	// get the ip 2nd address
	i = 0;
	while(likely(i < len && ((*s >= '0' && *s <= '9') || *s == '.' || *s == '/')))
		ipstr2[i++] = *s++;

	if(unlikely(!i)) {
		fprintf(stderr, "%s: Incomplete range on line %d, expected an ip address after -, but line ended\n", PROG, lineid);
		return 1;
	}

	// terminate ipstr
	ipstr2[i] = '\0';

	// skip all spaces
	while(unlikely(*s == ' ' || *s == '\t')) s++;

	// the rest is comment
	if(unlikely(*s == '#' || *s == ';')) return 2;

	// if we reached the end of line
	if(likely(*s == '\r' || *s == '\n' || *s == '\0')) return 2;

	fprintf(stderr, "%s: Ignoring text on line %d, after the second ip address: '%s'\n", PROG, lineid, s);
	return 2;
}


/* ----------------------------------------------------------------------------
 * binary files v1.0
 *
 */

int ipset_load_binary_v10(FILE *fp, ipset *ips, int first_line_missing) {
	char buffer[MAX_LINE + 1], *s;

	if(!first_line_missing) {
		s = fgets(buffer, MAX_LINE, fp);
		buffer[MAX_LINE] = '\0';
		if(!s || strcmp(s, BINARY_HEADER_V10)) {
			fprintf(stderr, "%s: %s expecting binary header but found '%s'.\n", PROG, ips->filename, s?s:"");
			return 1;
		}
	}

	s = fgets(buffer, MAX_LINE, fp);
	buffer[MAX_LINE] = '\0';
	if(!s || ( strcmp(s, "optimized\n") && strcmp(s, "non-optimized\n") )) {
		fprintf(stderr, "%s: %s 2nd line should be the optimized flag, but found '%s'.\n", PROG, ips->filename, s?s:"");
		return 1;
	}
	if(!strcmp(s, "optimized\n")) ips->flags |= IPSET_FLAG_OPTIMIZED;
	else if(ips->flags & IPSET_FLAG_OPTIMIZED) ips->flags ^= IPSET_FLAG_OPTIMIZED;

	s = fgets(buffer, MAX_LINE, fp);
	buffer[MAX_LINE] = '\0';
	if(!s || strncmp(s, "record size ", 12)) {
		fprintf(stderr, "%s: %s 3rd line should be the record size, but found '%s'.\n", PROG, ips->filename, s?s:"");
		return 1;
	}
	if(atol(&s[12]) != sizeof(network_addr_t)) {
		fprintf(stderr, "%s: %s: invalid record size %lu (expected %lu)\n", PROG, ips->filename, atol(&s[12]), sizeof(network_addr_t));
		return 1;
	}

	s = fgets(buffer, MAX_LINE, fp);
	buffer[MAX_LINE] = '\0';
	if(!s || strncmp(s, "records ", 8)) {
		fprintf(stderr, "%s: %s 4th line should be the number of records, but found '%s'.\n", PROG, ips->filename, s?s:"");
		return 1;
	}
	unsigned long entries = strtoul(&s[8], NULL, 10);

	s = fgets(buffer, MAX_LINE, fp);
	buffer[MAX_LINE] = '\0';
	if(!s || strncmp(s, "bytes ", 6)) {
		fprintf(stderr, "%s: %s 5th line should be the number of bytes, but found '%s'.\n", PROG, ips->filename, s?s:"");
		return 1;
	}
	unsigned long bytes = strtoul(&s[6], NULL, 10);

	s = fgets(buffer, MAX_LINE, fp);
	buffer[MAX_LINE] = '\0';
	if(!s || strncmp(s, "lines ", 6)) {
		fprintf(stderr, "%s: %s 6th line should be the number of lines read, but found '%s'.\n", PROG, ips->filename, s?s:"");
		return 1;
	}
	unsigned long lines = strtoul(&s[6], NULL, 10);

	s = fgets(buffer, MAX_LINE, fp);
	buffer[MAX_LINE] = '\0';
	if(!s || strncmp(s, "unique ips ", 11)) {
		fprintf(stderr, "%s: %s 7th line should be the number of unique IPs, but found '%s'.\n", PROG, ips->filename, s?s:"");
		return 1;
	}
	unsigned long unique_ips = strtoul(&s[11], NULL, 10);

	if(bytes != ((sizeof(network_addr_t) * entries) + sizeof(uint32_t))) {
		fprintf(stderr, "%s: %s invalid number of bytes, found %lu, expected %lu.\n", PROG, ips->filename, bytes, ((sizeof(network_addr_t) * entries) + sizeof(uint32_t)));
		return 1;
	}

	uint32_t endian;

	size_t loaded = fread(&endian, sizeof(uint32_t), 1, fp);
	if(endian != endianess) {
		fprintf(stderr, "%s: %s: incompatible endianess\n", PROG, ips->filename);
		return 1;
	}

	if(unique_ips < entries) {
		fprintf(stderr, "%s: %s: unique IPs (%lu) cannot be less than entries (%lu)\n", PROG, ips->filename, unique_ips, entries);
		return 1;
	}

	if(lines < entries) {
		fprintf(stderr, "%s: %s: lines (%lu) cannot be less than entries (%lu)\n", PROG, ips->filename, lines, entries);
		return 1;
	}

	ipset_expand(ips, entries);

	loaded = fread(&ips->netaddrs[ips->entries], sizeof(network_addr_t), entries, fp);

	if(loaded != entries) {
		fprintf(stderr, "%s: %s: expected to load %lu entries, loaded %zd\n", PROG, ips->filename, entries, loaded);
		return 1;
	}

	ips->entries += loaded;
	ips->lines += lines;
	ips->unique_ips += unique_ips;

	return 0;
}

void ipset_save_binary_v10(ipset *ips) {
	if(!ips->entries) return;

	fprintf(stdout, BINARY_HEADER_V10);
	if(ips->flags & IPSET_FLAG_OPTIMIZED) fprintf(stdout, "optimized\n");
	else fprintf(stdout, "non-optimized\n");
	fprintf(stdout, "record size %lu\n", sizeof(network_addr_t));
	fprintf(stdout, "records %lu\n", ips->entries);
	fprintf(stdout, "bytes %lu\n", (sizeof(network_addr_t) * ips->entries) + sizeof(uint32_t));
	fprintf(stdout, "lines %lu\n", ips->lines);
	fprintf(stdout, "unique ips %lu\n", ips->unique_ips);
	fwrite(&endianess, sizeof(uint32_t), 1, stdout);
	fwrite(ips->netaddrs, sizeof(network_addr_t), ips->entries, stdout);
}

/* ----------------------------------------------------------------------------
 * ipset_load()
 *
 * loads a file and stores all entries it finds to a new ipset it creates
 * if the filename is NULL, stdin is used
 *
 * the result is not optimized. To optimize it call ipset_optimize().
 *
 */

ipset *ipset_load(const char *filename) {
	ipset *ips = ipset_create((filename && *filename)?filename:"stdin", 0);
	if(unlikely(!ips)) return NULL;

	FILE *fp = stdin;
	if (likely(filename && *filename)) {
		fp = fopen(filename, "r");
		if (unlikely(!fp)) {
			fprintf(stderr, "%s: %s - %s\n", PROG, filename, strerror(errno));
			return NULL;
		}
	}

	// load it
	if(unlikely(debug)) fprintf(stderr, "%s: Loading from %s\n", PROG, ips->filename);

	// it will be removed, if the loaded ipset is not optimized on disk
	ips->flags |= IPSET_FLAG_OPTIMIZED;

	int lineid = 0;
	char line[MAX_LINE + 1], ipstr[101], ipstr2[101];
	if(!fgets(line, MAX_LINE, fp)) return ips;

	if(unlikely(!strcmp(line, BINARY_HEADER_V10))) {
		if(ipset_load_binary_v10(fp, ips, 1)) {
			fprintf(stderr, "%s: Cannot fast load %s\n", PROG, filename);
			ipset_free(ips);
			ips = NULL;
		}

		if(likely(fp != stdin)) fclose(fp);
		if(unlikely(debug)) if(ips) fprintf(stderr, "%s: Binary loaded %s %s\n", PROG, (ips->flags & IPSET_FLAG_OPTIMIZED)?"optimized":"non-optimized", ips->filename);

		return ips;
	}

	do {
		lineid++;

		switch(parse_line(line, lineid, ipstr, ipstr2, 100)) {
			case -1:
				// cannot read line
				fprintf(stderr, "%s: Cannot understand line No %d from %s: %s\n", PROG, lineid, ips->filename, line);
				break;

			case 0:
				// nothing on this line
				break;

			case 1:
				// 1 IP on this line
				ipset_add_ipstr(ips, ipstr);
				break;

			case 2:
				// 2 IPs in range on this line
				{
					in_addr_t lo, hi;
					network_addr_t netaddr1, netaddr2;
					netaddr1 = str_to_netaddr(ipstr);
					netaddr2 = str_to_netaddr(ipstr2);

					lo = (netaddr1.addr < netaddr2.addr)?netaddr1.addr:netaddr2.addr;
					hi = (netaddr1.broadcast > netaddr2.broadcast)?netaddr1.broadcast:netaddr2.broadcast;
					ipset_add(ips, lo, hi);
				}
				break;

			default:
				fprintf(stderr, "%s: Cannot understand result code. This is an internal error.\n", PROG);
				exit(1);
				break;
		}
	} while(likely(ips && fgets(line, MAX_LINE, fp)));

	if(likely(fp != stdin)) fclose(fp);

	if(unlikely(!ips)) return NULL;

	if(unlikely(debug)) fprintf(stderr, "%s: Loaded %s %s\n", PROG, (ips->flags & IPSET_FLAG_OPTIMIZED)?"optimized":"non-optimized", ips->filename);

	//if(unlikely(!ips->entries)) {
	//	free(ips);
	//	return NULL;
	//}

	return ips;
}


/* ----------------------------------------------------------------------------
 * ipset_reduce()
 *
 * takes an ipset, an acceptable increase % and a minimum accepted entries
 * and disables entries in the global prefix_enabled[] array, so that once
 * the ipset is printed, only the enabled prefixes will be used
 *
 * prefix_enable[] is not reset before use, so that it can be initialized with
 * some of the prefixes enabled and others disabled already (user driven)
 *
 * the ipset given MUST BE OPTIMIZED for this function to work
 *
 * this function does not alter the given ipset and it does not print it
 */

void ipset_reduce(ipset *ips, int acceptable_increase, int min_accepted) {
	if(unlikely(!(ips->flags & IPSET_FLAG_OPTIMIZED)))
		ipset_optimize(ips);

	int i, n = ips->entries, total = 0, acceptable, iterations = 0, initial = 0, eliminated = 0;

	// reset the prefix counters
	for(i = 0; i <= 32; i++)
		prefix_counters[i] = 0;

	// disable printing
	split_range_disable_printing = 1;

	// find how many prefixes are there
	if(unlikely(debug)) fprintf(stderr, "\nCounting prefixes in %s\n", ips->filename);
	for(i = 0; i < n ;i++)
		split_range(0, 0, ips->netaddrs[i].addr, ips->netaddrs[i].broadcast);

	// count them
	if(unlikely(debug)) fprintf(stderr, "Break down by prefix:\n");
	total = 0;
	for(i = 0; i <= 32 ;i++) {
		if(prefix_counters[i]) {
			if(unlikely(debug)) fprintf(stderr, "	- prefix /%d counts %d entries\n", i, prefix_counters[i]);
			total += prefix_counters[i];
			initial++;
		}
		else prefix_enabled[i] = 0;
	}
	if(unlikely(debug)) fprintf(stderr, "Total %d entries generated\n", total);

	// find the upper limit
	acceptable = total * acceptable_increase / 100;
	if(acceptable < min_accepted) acceptable = min_accepted;
	if(unlikely(debug)) fprintf(stderr, "Acceptable is to reach %d entries by reducing prefixes\n", acceptable);

	// reduce the possible prefixes
	while(total < acceptable) {
		iterations++;

		// find the prefix with the least increase
		int min = -1, to = -1, min_increase = acceptable * 10, j, multiplier, increase;
		for(i = 0; i <= 31 ;i++) {
			if(!prefix_counters[i] || !prefix_enabled[i]) continue;

			for(j = i + 1, multiplier = 2; j <= 32 ; j++, multiplier *= 2) {
				if(!prefix_counters[j]) continue;

				increase = prefix_counters[i] * (multiplier - 1);
				if(unlikely(debug)) fprintf(stderr, "		> Examining merging prefix %d to %d (increase by %d)\n", i, j, increase);

				if(increase < min_increase) {
					min_increase = increase;
					min = i;
					to = j;
				}
				break;
			}
		}

		if(min == -1 || to == -1 || min == to) {
			if(unlikely(debug)) fprintf(stderr, "	Nothing more to reduce\n");
			break;
		}

	 	multiplier = 1;
		for(i = min; i < to; i++) multiplier *= 2;

		increase = prefix_counters[min] * multiplier - prefix_counters[min];
		if(unlikely(debug)) fprintf(stderr, "		> Selected prefix %d (%d entries) to be merged in %d (total increase by %d)\n", min, prefix_counters[min], to, increase);

		if(total + increase > acceptable) {
			if(unlikely(debug)) fprintf(stderr, "	Cannot proceed to increase total %d by %d, above acceptable %d.\n", total, increase, acceptable);
			break;
		}

		int old_to_counters = prefix_counters[to];

		total += increase;
		prefix_counters[to] += increase + prefix_counters[min];
		prefix_counters[min] = 0;
		prefix_enabled[min] = 0;
		eliminated++;
		if(unlikely(debug)) fprintf(stderr, "		Eliminating prefix %d in %d (had %d, now has %d entries), total is now %d (increased by %d)\n", min, to, old_to_counters, prefix_counters[to], total, increase);
	}

	if(unlikely(debug)) fprintf(stderr, "\nEliminated %d out of %d prefixes (%d remain in the final set).\n\n", eliminated, initial, initial - eliminated);

	// reset the prefix counters
	for(i = 0; i <= 32; i++)
		prefix_counters[i] = 0;

	// enable printing
	split_range_disable_printing = 0;
}


/* ----------------------------------------------------------------------------
 * ipset_print()
 *
 * print the ipset given to stdout
 *
 */

#define PRINT_RANGE 1
#define PRINT_CIDR 2
#define PRINT_SINGLE_IPS 3
#define PRINT_BINARY 4

void ipset_print(ipset *ips, int print) {
	if(unlikely(!(ips->flags & IPSET_FLAG_OPTIMIZED)))
		ipset_optimize(ips);

	if(print == PRINT_BINARY) {
		ipset_save_binary_v10(ips);
		return;
	}

	int i, n = ips->entries;
	unsigned long int total = 0;

	// reset the prefix counters
	for(i = 0; i <= 32; i++)
		prefix_counters[i] = 0;

	if(unlikely(debug)) fprintf(stderr, "%s: Printing %s\n", PROG, ips->filename);

	for(i = 0; i < n ;i++) {
		if(likely(print == PRINT_CIDR))
			split_range(0, 0, ips->netaddrs[i].addr, ips->netaddrs[i].broadcast);

		else if(likely(print == PRINT_SINGLE_IPS)) {
			in_addr_t x = ips->netaddrs[i].addr, broadcast = ips->netaddrs[i].broadcast;
			for( ; likely(1) ; ) {
				print_addr_range(x, x);
				total++;

				if(unlikely(x++ == broadcast)) break;
			}
		}
		else {
			print_addr_range(ips->netaddrs[i].addr, ips->netaddrs[i].broadcast);
			total++;
		}
	}

	// print prefix break down
	if(unlikely(debug)) {
		int prefixes = 0;

		if (print == PRINT_CIDR) {
			fprintf(stderr, "\nBreak down by prefix:\n");
			for(i = 0; i <= 32 ;i++) {
				if(prefix_counters[i]) {
					fprintf(stderr, "	- prefix /%d counts %d entries\n", i, prefix_counters[i]);
					total += prefix_counters[i];
					prefixes++;
				}
			}
		}
		else if (print == PRINT_SINGLE_IPS) prefixes = 1;

		char *units = "";
		if (print == PRINT_CIDR) units = "CIDRs";
		else if (print == PRINT_SINGLE_IPS) units = "IPs";
		else units = "ranges";

		fprintf(stderr, "\ntotals: %lu lines read, %lu distinct IP ranges found, %d CIDR prefixes, %lu %s printed, %lu unique IPs\n", ips->lines, ips->entries, prefixes, total, units, ips->unique_ips);
	}
}


/* ----------------------------------------------------------------------------
 * ipset_merge()
 *
 * merges the second ipset (add) to the first ipset (to)
 * they may not be optimized
 * the result is never optimized (even if the sources are)
 * to optimize it call ipset_optimize()
 *
 */

static inline void ipset_merge(ipset *to, ipset *add) {
	if(unlikely(debug)) fprintf(stderr, "%s: Merging %s to %s\n", PROG, add->filename, to->filename);

	ipset_expand(to, add->entries);

	memcpy(&to->netaddrs[to->entries], &add->netaddrs[0], add->entries * sizeof(network_addr_t));

	to->entries = to->entries + add->entries;
	to->lines += add->lines;

	if(unlikely(to->flags & IPSET_FLAG_OPTIMIZED))
		to->flags ^= IPSET_FLAG_OPTIMIZED;
}


/* ----------------------------------------------------------------------------
 * ipset_copy()
 *
 * it returns a new ipset that is an exact copy of the ipset given
 *
 */

static inline ipset *ipset_copy(ipset *ips1) {
	if(unlikely(debug)) fprintf(stderr, "%s: Copying %s\n", PROG, ips1->filename);

	ipset *ips = ipset_create(ips1->filename, ips1->entries);
	if(unlikely(!ips)) return NULL;

	//strcpy(ips->name, ips1->name);
	memcpy(&ips->netaddrs[0], &ips1->netaddrs[0], ips1->entries * sizeof(network_addr_t));

	ips->entries = ips1->entries;
	ips->unique_ips = ips1->unique_ips;
	ips->lines = ips1->lines;
	ips->flags = ips1->flags;

	return ips;
}


/* ----------------------------------------------------------------------------
 * ipset_combine()
 *
 * it returns a new ipset that has all the entries of both ipsets given
 * the result is never optimized, even when the source ipsets are
 *
 */

static inline ipset *ipset_combine(ipset *ips1, ipset *ips2) {
	if(unlikely(debug)) fprintf(stderr, "%s: Combining %s and %s\n", PROG, ips1->filename, ips2->filename);

	ipset *ips = ipset_create("combined", ips1->entries + ips2->entries);
	if(unlikely(!ips)) return NULL;

	memcpy(&ips->netaddrs[0], &ips1->netaddrs[0], ips1->entries * sizeof(network_addr_t));
	memcpy(&ips->netaddrs[ips1->entries], &ips2->netaddrs[0], ips2->entries * sizeof(network_addr_t));

	ips->entries = ips1->entries + ips2->entries;
	ips->lines = ips1->lines + ips2->lines;

	return ips;
}

/* ----------------------------------------------------------------------------
 * ipset_histogram()
 *
 * generate histogram for ipset
 *
 */

//int ipset_histogram(ipset *ips, const char *path) {
	// make sure the path exists
	// if this is the first time:
	//  - create a directory for this ipset, in path
	//  - create the 'new' directory inside this ipset path
	//  - assume the 'latest' is empty
	//  - keep the starting date
	//  - print an empty histogram
	// save in 'new' the IPs of current excluding the 'latest'
	// save 'current' as 'latest'
	// assume the histogram is complete
	// for each file in 'new'
	//  - if the file is <= to histogram start date, the histogram is incomplete
	//  - calculate the hours passed to the 'current'
	//  - find the IPs in this file common to 'current' = 'stillthere'
	//  - find the IPs in this file not in 'stillthere' = 'removed'
	//  - if there are IPs in 'removed', add an entry to the retention histogram
	//  - if there are no IPs in 'stillthere', delete the file
	//  - else replace the file with the contents of 'stillthere'
//
//	return 0;
//}


/* ----------------------------------------------------------------------------
 * usage()
 *
 * print help for the user
 *
 */

void usage(const char *me) {
	fprintf(stderr, "\n"
		"iprange\n"
		"manage IP ranges\n"
#ifdef VERSION
		"version: " VERSION " ($Id$)\n"
#else
		"version: $Id$\n"
#endif
		"\n"
		"Original,   Copyright (C) 2003 Gabriel L. Somlo\n"
		"Adapted,    Copyright (C) 2004 Paul Townsend\n"
		"Refactored, Copyright (C) 2015 Costa Tsaousis for FireHOL\n"
		"License: GPL\n"
		"\n"
		"Usage: %s [options] file1 file2 file3 ...\n"
		"\n"
		"options (multiple options are aliases):\n"
		"\n"
		"	--------------------------------------------------------------\n"
		"	CIDR OUTPUT MODES\n"
		"\n"
		"	--optimize\n"
		"	--combine\n"
		"	--merge\n"
		"	--union\n"
		"	--union-all\n"
		"	-J\n"
		"		> UNION mode (the default)\n"
		"		returns all IPs found on all files\n"
		"		the resulting set is sorted\n"
		"\n"
		"	--common\n"
		"	--intersect\n"
		"	--intersect-all\n"
		"		> INTERSECT mode\n"
		"		intersect all files to find their common IPs\n"
		"		the resulting set is sorted\n"
		"\n"
		"	--exclude-next\n"
		"	--complement\n"
		"	--complement-next\n"
		"		> COMPLEMENT mode\n"
		"		1. union all files before this parameter (A set)\n"
		"		2. remove all IPs found in the files after this\n"
		"		   parameter, from the set A\n"
		"		the resulting set is sorted\n"
		"\n"
		"	--ipset-reduce PERCENT\n"
		"	--reduce-factor PERCENT\n"
		"		> IPSET REDUCE mode\n"
		"		union all files and print the merged set\n"
		"		but try to reduce the number of prefixes (subnets)\n"
		"		found, while allowing some increase in entries\n"
		"		the PERCENT is how much percent to allow\n"
		"		increase on the number of entries in order to reduce\n"
		"		the prefixes (subnets)\n"
		"		(the internal default PERCENT is 20)\n"
		"		(use -v to see exactly what it does)\n"
		"		the resulting set is sorted\n"
		"\n"
		"	--ipset-reduce-entries ENTRIES\n"
		"	--reduce-entries ENTRIES\n"
		"		> IPSET REDUCE mode\n"
		"		allow increasing the entries above PERCENT, if\n"
		"		they are below ENTRIES\n"
		"		(the internal default ENTRIES is 16384)\n"
//		"\n"
//		"	--histogram\n"
//		"		> IPSET HISTOGRAM mode\n"
//		"		maintain histogram data for ipset and dump current\n"
//		"		status\n"
//		"\n"
//		"	--histogram-dir PATH\n"
//		"		> IPSET HISTOGRAM mode\n"
//		"		the directory to keep histogram data\n"
		"\n"
		"\n"
		"	--------------------------------------------------------------\n"
		"	CSV OUTPUT MODES\n"
		"\n"
		"	--compare\n"
		"		> COMPARE ALL mode (CSV output)\n"
		"		compare all files with all other files\n"
		"		add --header to get the CSV header too\n"
		"\n"
		"	--compare-first\n"
		"		> COMPARE FIRST mode (CSV output)\n"
		"		compare the first file with all other files\n"
		"		add --header to get the CSV header too\n"
		"\n"
		"	--compare-next\n"
		"		> COMPARE NEXT mode (CSV output)\n"
		"		compare all the files that appear before this\n"
		"		parameter, to all files that appear after this\n"
		"		parameter\n"
		"		add --header to get the CSV header too\n"
		"\n"
		"	--count-unique\n"
		"	-C\n"
		"		> COUNT UNIQUE mode (CSV output)\n"
		"		merge all files and print its counts\n"
		"		add --header to get the CSV header too\n"
		"\n"
		"	--count-unique-all\n"
		"		> COUNT UNIQUE ALL mode (CSV output)\n"
		"		print counts for each file\n"
		"		add --header to get the CSV header too\n"
		"\n"
		"\n"
		"	--------------------------------------------------------------\n"
		"	OPTIONS THAT AFFECT INPUT\n"
		"\n"
		"	--dont-fix-network\n"
		"		by default, the network address of all CIDRs\n"
		"		is used (i.e. 1.1.1.17/24 is read as 1.1.1.0/24)\n"
		"		this option disables this feature\n"
		"		(i.e. 1.1.1.17/24 is read as 1.1.1.17-1.1.1.255)\n"
		"\n"
		"	--default-prefix PREFIX\n"
		"	-p PREFIX\n"
		"		Set the default prefix for all IPs without mask\n"
		"		the default is 32\n"
		"\n"
		"\n"
		"	--------------------------------------------------------------\n"
		"	OPTIONS THAT AFFECT CIDR OUTPUT\n"
		"\n"
		"	--min-prefix N\n"
		"		do not generate prefixes larger than N\n"
		"		i.e. if N is 24 then /24 to /32 entries will be\n"
		"		     generated (a /16 network will be generated\n"
		"		     using multiple /24 networks)\n"
		"		this is useful to optimize netfilter/iptables\n"
		"		ipsets, where each different prefix increases the\n"
		"		lookup time for each packet, but the number of\n"
		"		entries in the ipset do not affect its performance\n"
		"		with this setting more entries will be produced\n"
		"		to accomplish the same match\n"
		"		warning: misuse of this parameter can create a large\n"
		"		         number of entries in the generated set\n"
		"\n"
		"	--prefixes N,N,N, ...\n"
		"		enable only the given prefixes to express all CIDRs\n"
		"		prefix 32 is always enabled\n"
		"		warning: misuse of this parameter can create a large\n"
		"		         number of entries in the generated set\n"
		"	--print-ranges\n"
		"	-j\n"
		"		print IP ranges (A.A.A.A-B.B.B.B)\n"
		"		the default is to print CIDRs (A.A.A.A/B)\n"
		"		it only applies when the output is not CSV\n"
		"\n"
		"	--print-single-ips\n"
		"	-1\n"
		"		print single IPs\n"
		"		this can produce large output\n"
		"		the default is to print CIDRs (A.A.A.A/B)\n"
		"		it only applies when the output is not CSV\n"
		"\n"
		"	--print-binary\n"
		"		print binary data\n"
		"\n"
		"	--print-prefix STRING\n"
		"		print STRING before each IP, range or CIDR\n"
		"		this sets both --print-prefix-ips and\n"
		"		--print-prefix-nets\n"
		"\n"
		"	--print-prefix-ips STRING\n"
		"		print STRING before each single IP\n"
		"		useful for entering single IPs to a different\n"
		"		ipset than the networks\n"
		"\n"
		"	--print-prefix-nets STRING\n"
		"		print STRING before each range or CIDR\n"
		"		useful for entering sunbets to a different\n"
		"		ipset than single IPs\n"
		"\n"
		"	--print-suffix STRING\n"
		"		print STRING after each IP, range or CIDR\n"
		"		this sets both --print-suffix-ips and\n"
		"		--print-suffix-nets\n"
		"\n"
		"	--print-suffix-ips STRING\n"
		"		print STRING after each single IP\n"
		"		useful for giving single IPs different\n"
		"		ipset options\n"
		"\n"
		"	--print-suffix-nets STRING\n"
		"		print STRING after each range or CIDR\n"
		"		useful for giving subnets different\n"
		"		ipset options\n"
		"\n"
		"\n"
		"	--------------------------------------------------------------\n"
		"	OPTIONS THAT AFFECT CSV OUTPUT\n"
		"\n"
		"	--header\n"
		"		when the output is CSV, print the header line\n"
		"		the default is to not print the header line\n"
		"\n"
		"\n"
		"	--------------------------------------------------------------\n"
		"	OTHER OPTIONS\n"
		"\n"
		"	--has-compare\n"
		"	--has-reduce\n"
		"		exits with 0\n"
		"		other versions of iprange will exit with 1\n"
		"		use this option in scripts to find if this\n"
		"		version of iprange is present in a system\n"
		"\n"
		"	-v\n"
		"		be verbose on stderr\n"
		"\n"
		"	--help\n"
		"	-h\n"
		"		print this message\n"
		"\n"
		"\n"
		"	--------------------------------------------------------------\n"
		"	INPUT FILES\n"
		"\n"
		"	fileN\n"
		"		a filename or - for stdin\n"
		"		each filename can be followed by [as NAME]\n"
		"		to change its name in the CSV output\n"
		"\n"
		"		if no filename is given, stdin is assumed\n"
		"\n"
		"		files may contain:\n"
		"		- comments starting with # or ;\n"
		"		- one IP per line (without mask)\n"
		"		- a CIDR per line (A.A.A.A/B)\n"
		"		- an IP range per line (A.A.A.A - B.B.B.B)\n"
		"		- a CIDR range per line (A.A.A.A/B - C.C.C.C/D)\n"
		"		  the range is calculated as the network address of\n"
		"		  A.A.A.A/B to the broadcast address of C.C.C.C/D\n"
		"		  (this is affected by --dont-fix-network)\n"
		"		- CIDRs can be given in either prefix or netmask\n"
		"		  format in all cases (including ranges)\n"
		"		- spaces and empty lines are ignored\n"
		"\n"
		"		any number of files can be given\n"
		"\n"
		, me);
	exit(1);
}

#define MODE_COMBINE 1
#define MODE_COMPARE 2
#define MODE_COMPARE_FIRST 3
#define MODE_COMPARE_NEXT 4
#define MODE_COUNT_UNIQUE_MERGED 5
#define MODE_COUNT_UNIQUE_ALL 6
#define MODE_REDUCE 7
#define MODE_COMMON 8
#define MODE_EXCLUDE_NEXT 9
//#define MODE_HISTOGRAM 10

int main(int argc, char **argv) {
//	char histogram_dir[FILENAME_MAX + 1] = "/var/lib/iprange";

	struct timeval start_dt, load_dt, print_dt, stop_dt;
	gettimeofday(&start_dt, NULL);

	int ipset_reduce_factor = 120;
	int ipset_reduce_min_accepted = 16384;

	if ((PROG = strrchr(argv[0], '/')))
		PROG++;
	else
		PROG = argv[0];

	ipset *root = NULL, *ips = NULL, *first = NULL, *second = NULL;
	int i, mode = MODE_COMBINE, print = PRINT_CIDR, header = 0, read_second = 0;

	for(i = 1; i < argc ; i++) {
		if(i+1 < argc && !strcmp(argv[i], "as")) {
			if(!read_second) {
				if(root) {
					strncpy(root->filename, argv[++i], FILENAME_MAX);
					root->filename[FILENAME_MAX] = '\0';
				}
			}
			else {
				if(second) {
					strncpy(second->filename, argv[++i], FILENAME_MAX);
					second->filename[FILENAME_MAX] = '\0';
				}
			}
		}
		else if(i+1 < argc && !strcmp(argv[i], "--min-prefix")) {
			int j, min_prefix = atoi(argv[++i]);
			if(min_prefix < 1 || min_prefix > 32) {
				fprintf(stderr, "Only prefixes 1 to 31 can be disabled. %d is invalid.\n", min_prefix);
				exit(1);
			}
			for(j = 0; j < min_prefix; j++)
				prefix_enabled[j] = 0;
		}
		else if(i+1 < argc && !strcmp(argv[i], "--prefixes")) {
			char *s = NULL, *e = argv[++i];
			int j;

			for(j = 0; j < 32; j++)
				prefix_enabled[j] = 0;

			while(e && *e && e != s) {
				s = e;
				j = strtol(s, &e, 10);
				if(j <= 0 || j > 32) {
					fprintf(stderr, "%s: Only prefixes from 1 to 32 can be set (32 is always enabled). %d is invalid.\n", PROG, j);
					exit(1);
				}
				if(debug) fprintf(stderr, "Enabling prefix %d\n", j);
				prefix_enabled[j] = 1;
				if(*e == ',' || *e == ' ') e++;
			}

			if(e && *e) {
				fprintf(stderr, "%s: Invalid prefix '%s'\n", PROG, e);
				exit(1);
			}
		}
		else if(i+1 < argc && (
			   !strcmp(argv[i], "--default-prefix")
			|| !strcmp(argv[i], "-p")
			)) {
			default_prefix = atoi(argv[++i]);
		}
		else if(i+1 < argc && (
			   !strcmp(argv[i], "--ipset-reduce")
			|| !strcmp(argv[i], "--reduce-factor")
			)) {
			ipset_reduce_factor = 100 + atoi(argv[++i]);
			mode = MODE_REDUCE;
		}
		else if(i+1 < argc && (
			   !strcmp(argv[i], "--ipset-reduce-entries")
			|| !strcmp(argv[i], "--reduce-entries")
			)) {
			ipset_reduce_min_accepted = atoi(argv[++i]);
			mode = MODE_REDUCE;
		}
		else if(!strcmp(argv[i], "--optimize")
			|| !strcmp(argv[i], "--combine")
			|| !strcmp(argv[i], "--merge")
			|| !strcmp(argv[i], "--union")
			|| !strcmp(argv[i], "--union-all")
			|| !strcmp(argv[i], "-J")
			) {
			mode = MODE_COMBINE;
		}
		else if(!strcmp(argv[i], "--common")
			|| !strcmp(argv[i], "--intersect")
			|| !strcmp(argv[i], "--intersect-all")) {
			mode = MODE_COMMON;
		}
		else if(!strcmp(argv[i], "--exclude-next")
			|| !strcmp(argv[i], "--complement-next")
			|| !strcmp(argv[i], "--complement")) {
			mode = MODE_EXCLUDE_NEXT;
			read_second = 1;
		}
		else if(!strcmp(argv[i], "--compare")) {
			mode = MODE_COMPARE;
		}
		else if(!strcmp(argv[i], "--compare-first")) {
			mode = MODE_COMPARE_FIRST;
		}
		else if(!strcmp(argv[i], "--compare-next")) {
			mode = MODE_COMPARE_NEXT;
			read_second = 1;
		}
		else if(!strcmp(argv[i], "--count-unique")
			|| !strcmp(argv[i], "-C")) {
			mode = MODE_COUNT_UNIQUE_MERGED;
		}
		else if(!strcmp(argv[i], "--count-unique-all")) {
			mode = MODE_COUNT_UNIQUE_ALL;
		}
//		else if(!strcmp(argv[i], "--histogram")) {
//			mode = MODE_HISTOGRAM;
//		}
//		else if(i+1 < argc && !strcmp(argv[i], "--histogram-dir")) {
//			mode = MODE_HISTOGRAM;
//			strncpy(histogram_dir, argv[++i], FILENAME_MAX);
//		}
		else if(!strcmp(argv[i], "--help")
			|| !strcmp(argv[i], "-h")) {
			usage(argv[0]);
		}
		else if(!strcmp(argv[i], "-v")) {
			debug = 1;
		}
		else if(!strcmp(argv[i], "--print-ranges")
			|| !strcmp(argv[i], "-j")) {
			print = PRINT_RANGE;
		}
		else if(!strcmp(argv[i], "--print-binary")) {
			print = PRINT_BINARY;
		}
		else if(!strcmp(argv[i], "--print-single-ips")
			|| !strcmp(argv[i], "-1")) {
			print = PRINT_SINGLE_IPS;
		}
		else if(i+1 < argc && !strcmp(argv[i], "--print-prefix")) {
			print_prefix_ips  = argv[++i];
			print_prefix_nets = print_prefix_ips;
		}
		else if(i+1 < argc && !strcmp(argv[i], "--print-prefix-ips")) {
			print_prefix_ips = argv[++i];
		}
		else if(i+1 < argc && !strcmp(argv[i], "--print-prefix-nets")) {
			print_prefix_nets = argv[++i];
		}
		else if(i+1 < argc && !strcmp(argv[i], "--print-suffix")) {
			print_suffix_ips = argv[++i];
			print_suffix_nets = print_suffix_ips;
		}
		else if(i+1 < argc && !strcmp(argv[i], "--print-suffix-ips")) {
			print_suffix_ips = argv[++i];
		}
		else if(i+1 < argc && !strcmp(argv[i], "--print-suffix-nets")) {
			print_suffix_nets = argv[++i];
		}
		else if(!strcmp(argv[i], "--header")) {
			header = 1;
		}
		else if(!strcmp(argv[i], "--dont-fix-network")) {
			cidr_use_network = 0;
		}
		else if(!strcmp(argv[i], "--has-compare")
			|| !strcmp(argv[i], "--has-reduce")) {
			fprintf(stderr, "yes, compare and reduce is present.\n");
			exit(0);
		}
		else {
			if(!strcmp(argv[i], "-"))
				ips = ipset_load(NULL);
			else
				ips = ipset_load(argv[i]);

			if(!ips) {
				fprintf(stderr, "%s: Cannot load ipset: %s\n", PROG, argv[i]);
				exit(1);
			}

			if(read_second) {
				ips->next = second;
				second = ips;
				if(ips->next) ips->next->prev = ips;
			}
			else {
				if(!first) first = ips;
				ips->next = root;
				root = ips;
				if(ips->next) ips->next->prev = ips;
			}
		}
	}

	// if no ipset was given on the command line
	// assume stdin

	if(!root) {
		first = root = ipset_load(NULL);
		if(!root) {
			fprintf(stderr, "%s: No ipsets to merge.\n", PROG);
			exit(1);
		}
	}

	gettimeofday(&load_dt, NULL);

	if(mode == MODE_COMBINE || mode == MODE_REDUCE || mode == MODE_COUNT_UNIQUE_MERGED) {
		// for debug mode to show something meaningful
		strcpy(root->filename, "combined ipset");

		for(ips = root->next; ips ;ips = ips->next)
			ipset_merge(root, ips);

		// ipset_optimize(root);
		if(mode == MODE_REDUCE) ipset_reduce(root, ipset_reduce_factor, ipset_reduce_min_accepted);

		gettimeofday(&print_dt, NULL);

		if(mode == MODE_COMBINE || mode == MODE_REDUCE)
			ipset_print(root, print);

		else if(mode == MODE_COUNT_UNIQUE_MERGED) {
			if(unlikely(header)) printf("entries,unique_ips\n");
			printf("%lu,%lu\n", root->lines, ipset_unique_ips(root));
		}
	}
	else if(mode == MODE_COMMON) {
		if(!root->next) {
			fprintf(stderr, "%s: two ipsets at least are needed to be compared to find their common IPs.\n", PROG);
			exit(1);
		}

		// ipset_optimize_all(root);

		ipset *common = NULL, *ips2 = NULL;

		common = ipset_common(root, root->next);
		for(ips = root->next->next; ips ;ips = ips->next) {
			ips2 = ipset_common(common, ips);
			ipset_free(common);
			common = ips2;
		}

		gettimeofday(&print_dt, NULL);
		ipset_print(common, print);
	}
	else if(mode == MODE_COMPARE) {
		if(!root->next) {
			fprintf(stderr, "%s: two ipsets at least are needed to be compared.\n", PROG);
			exit(1);
		}

		if(unlikely(header)) printf("name1,name2,entries1,entries2,ips1,ips2,combined_ips,common_ips\n");

		// ipset_optimize_all(root);

		ipset *ips2;
		for(ips = root; ips ;ips = ips->next) {
			for(ips2 = ips; ips2 ;ips2 = ips2->next) {
				if(ips == ips2) continue;

#ifdef COMPARE_WITH_COMMON
				ipset *common = ipset_common(ips, ips2);
				if(!common) {
					fprintf(stderr, "%s: Cannot find the common IPs of ipset %s and %s\n", PROG, ips->filename, ips2->filename);
					exit(1);
				}
				fprintf(stdout, "%s,%s,%lu,%lu,%lu,%lu,%lu,%lu\n", ips->filename, ips2->filename, ips->lines, ips2->lines, ips->unique_ips, ips2->unique_ips, ips->unique_ips + ips2->unique_ips - common->unique_ips, common->unique_ips);
				ipset_free(common);
#else
				ipset *combined = ipset_combine(ips, ips2);
				if(!combined) {
					fprintf(stderr, "%s: Cannot merge ipset %s and %s\n", PROG, ips->filename, ips2->filename);
					exit(1);
				}

				ipset_optimize(combined);
				fprintf(stdout, "%s,%s,%lu,%lu,%lu,%lu,%lu,%lu\n", ips->filename, ips2->filename, ips->lines, ips2->lines, ips->unique_ips, ips2->unique_ips, combined->unique_ips, ips->unique_ips + ips2->unique_ips - combined->unique_ips);
				ipset_free(combined);
#endif
			}
		}
		gettimeofday(&print_dt, NULL);
	}
	else if(mode == MODE_COMPARE_NEXT) {
		if(!second) {
			fprintf(stderr, "%s: no files given after the --compare-next parameter.\n", PROG);
			exit(1);
		}

		if(unlikely(header)) printf("name1,name2,entries1,entries2,ips1,ips2,combined_ips,common_ips\n");

		// ipset_optimize_all(root);
		// ipset_optimize_all(second);

		ipset *ips2;
		for(ips = root; ips ;ips = ips->next) {
			for(ips2 = second; ips2 ;ips2 = ips2->next) {
#ifdef COMPARE_WITH_COMMON
				ipset *common = ipset_common(ips, ips2);
				if(!common) {
					fprintf(stderr, "%s: Cannot find the common IPs of ipset %s and %s\n", PROG, ips->filename, ips2->filename);
					exit(1);
				}
				fprintf(stdout, "%s,%s,%lu,%lu,%lu,%lu,%lu,%lu\n", ips->filename, ips2->filename, ips->lines, ips2->lines, ips->unique_ips, ips2->unique_ips, ips->unique_ips + ips2->unique_ips - common->unique_ips, common->unique_ips);
				ipset_free(common);
#else
				ipset *combined = ipset_combine(ips, ips2);
				if(!combined) {
					fprintf(stderr, "%s: Cannot merge ipset %s and %s\n", PROG, ips->filename, ips2->filename);
					exit(1);
				}

				ipset_optimize(combined);
				fprintf(stdout, "%s,%s,%lu,%lu,%lu,%lu,%lu,%lu\n", ips->filename, ips2->filename, ips->lines, ips2->lines, ips->unique_ips, ips2->unique_ips, combined->unique_ips, ips->unique_ips + ips2->unique_ips - combined->unique_ips);
				ipset_free(combined);
#endif
			}
		}
		gettimeofday(&print_dt, NULL);
	}
	else if(mode == MODE_COMPARE_FIRST) {
		if(!root->next) {
			fprintf(stderr, "%s: two ipsets at least are needed to be compared.\n", PROG);
			exit(1);
		}

		if(unlikely(header)) printf("name,entries,unique_ips,common_ips\n");

		// ipset_optimize_all(root);

		for(ips = root; ips ;ips = ips->next) {
			if(ips == first) continue;

#ifdef COMPARE_WITH_COMMON
			ipset *common = ipset_common(ips, first);
			if(!common) {
				fprintf(stderr, "%s: Cannot find the common IPs of ipset %s and %s\n", PROG, ips->filename, first->filename);
				exit(1);
			}
			printf("%s,%lu,%lu,%lu\n", ips->filename, ips->lines, ips->unique_ips, common->unique_ips);
			ipset_free(common);
#else
			ipset *combined = ipset_combine(ips, first);
			if(!combined) {
				fprintf(stderr, "%s: Cannot merge ipset %s and %s\n", PROG, ips->filename, first->filename);
				exit(1);
			}

			ipset_optimize(combined);
			printf("%s,%lu,%lu,%lu\n", ips->filename, ips->lines, ips->unique_ips, ips->unique_ips + first->unique_ips - combined->unique_ips);
			ipset_free(combined);
#endif
		}
		gettimeofday(&print_dt, NULL);
	}
	else if(mode == MODE_EXCLUDE_NEXT) {
		if(!second) {
			fprintf(stderr, "%s: no files given after the --exclude-next parameter.\n", PROG);
			exit(1);
		}

		// merge them
		for(ips = root->next; ips ;ips = ips->next)
			ipset_merge(root, ips);

		// ipset_optimize(root);
		// ipset_optimize_all(second);

		ipset *excluded = root;
		root = root->next;
		for(ips = second; ips ;ips = ips->next) {
			ipset *tmp = ipset_exclude(excluded, ips);
			if(!tmp) {
				fprintf(stderr, "%s: Cannot exclude the IPs of ipset %s from %s\n", PROG, ips->filename, excluded->filename);
				exit(1);
			}

			ipset_free(excluded);
			excluded = tmp;
		}
		gettimeofday(&print_dt, NULL);
		ipset_print(excluded, print);
	}
	else if(mode == MODE_COUNT_UNIQUE_ALL) {
		if(unlikely(header)) printf("name,entries,unique_ips\n");

		ipset_optimize_all(root);

		for(ips = root; ips ;ips = ips->next) {
			printf("%s,%lu,%lu\n", ips->filename, ips->lines, ips->unique_ips);
		}
		gettimeofday(&print_dt, NULL);
	}
//	else if(mode == MODE_HISTOGRAM) {
//		for(ips = root; ips ;ips = ips->next) {
//			ipset_histogram(ips, histogram_dir);
//		}
//	}
	else {
		fprintf(stderr, "%s: Unknown mode.\n", PROG);
		exit(1);
	}

	gettimeofday(&stop_dt, NULL);
	if(debug)
		fprintf(stderr, "completed in %0.5f seconds (read %0.5f + think %0.5f + speak %0.5f)\n"
			, ((double)(stop_dt.tv_sec  * 1000000 + stop_dt.tv_usec) - (double)(start_dt.tv_sec * 1000000 + start_dt.tv_usec)) / (double)1000000
			, ((double)(load_dt.tv_sec  * 1000000 + load_dt.tv_usec) - (double)(start_dt.tv_sec * 1000000 + start_dt.tv_usec)) / (double)1000000
			, ((double)(print_dt.tv_sec  * 1000000 + print_dt.tv_usec) - (double)(load_dt.tv_sec * 1000000 + load_dt.tv_usec)) / (double)1000000
			, ((double)(stop_dt.tv_sec  * 1000000 + stop_dt.tv_usec) - (double)(print_dt.tv_sec * 1000000 + print_dt.tv_usec)) / (double)1000000
		);

	exit(0);
}