/*************************************************************************** * Copyright (C) 2019 by John D. Robertson * * john@rrci.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include #include #include #include "str.h" #include "util.h" int STR_sinit(STR *self, size_t sz_hint) /********************************************************************************** * Initialization to be called for static instances of STR each use, but * actual initialization only occurs once. */ { int rtn= 1; if(!self->buf) { if(!STR_constructor(self, sz_hint)) goto abort; } else { STR_reset(self); } rtn= 0; abort: return rtn; } STR* STR_constructor(STR *self, size_t sz_hint) /********************************************************************************** * Prepare a STR for use with initial size of sz_hint. */ { STR *rtn= NULL; assert(sz_hint); self->sz= sz_hint; if(!(self->buf= malloc(self->sz))) goto abort; STR_reset(self); rtn= self; abort: return rtn; } void* STR_destructor(STR *self) /********************************************************************************** * Free resources associated with STR. */ { if(self->buf) free(self->buf); return self; } static int growbuf(STR *self) /********************************************************************************** * Attempt to increase the size buffer initially trying to double it, then * backing off 10% at a time. * Returns non-zero for error. */ { int rtn= 1; size_t i; /* Initially try do double memory. If that fails, back off 10% at a time */ for(i= 20; i > 10; i--) { char *p; size_t new_sz= self->sz * i / 10; /* Try to reallocate the memory */ if(!(p= realloc(self->buf, new_sz))) continue; /* Try vsnprintf() again */ self->buf= p; self->sz = new_sz; break; } if(i != 10) rtn= 0; /* Not success if we grew the buffer */ abort: return rtn; } int STR_sprintf(STR *self, const char *fmt, ...) /********************************************************************************** * Same as sprintf, except you don't have to worry about buffer overflows. * Returns non-zero for error. */ { int is_done, rtn= -1; va_list arglist; /* Catch empty strings */ if(!strlen(fmt)) return 0; /* vsprintf the fmt string */ for(is_done= 0; !is_done;) { int rc; va_start (arglist, fmt); rc= vsnprintf(self->buf+self->len, self->sz - self->len, fmt, arglist); if(rc >= (self->sz - self->len)) { /* Buffer isn't large enough */ if(growbuf(self)) is_done= 1; } else { if(rc != -1) { /* Successful return */ rtn= rc; self->len += rc; } is_done= 1; } va_end (arglist); } return rtn; } int STR_vsprintf(STR *self, const char *fmt, va_list ap) /********************************************************************************** * Same as vsprintf, except you don't have to worry about buffer overflows. * Returns non-zero for error. */ { int is_done, rtn= -1; /* Catch empty strings */ if(!strlen(fmt)) return 0; /* vsprintf the fmt string */ for(is_done= 0; !is_done;) { int rc; va_list arglist; #if __GNUC__ == 2 arglist= ap; #else va_copy(arglist, ap); #endif rc= vsnprintf(self->buf+self->len, self->sz - self->len, fmt, arglist); if(rc >= (self->sz - self->len)) { /* Buffer isn't large enough */ if(growbuf(self)) is_done= 1; } else { if(rc != -1) { /* Successful return */ rtn= rc; self->len += rc; } is_done= 1; } va_end (arglist); } return rtn; } int STR_append(STR *self, const char *str, size_t str_len) /********************************************************************************** * Append string str to the end of the current buffer. * Returns -1 for error. */ { if(str_len == -1) str_len= strlen(str); /* Make sure there is enough space to store the string */ while(self->len + str_len + 1 >= self->sz) { if(growbuf(self)) return 1; } /* Copy the string into place */ memcpy(self->buf + self->len, str, str_len); /* Update length */ self->len += str_len; /* Null terminate string */ self->buf[self->len] = '\0'; return 0; } int STR_putc(STR *self, int c) /********************************************************************************** * Append a single character to the end of the current buffer. * Returns -1 for error. */ { /* Make sure there is enough space to store the string */ while(self->len + 1 >= self->sz) { if(growbuf(self)) return 1; } /* Copy the character into place */ self->buf[self->len]= c; /* Update length */ ++self->len; /* Null terminate string */ self->buf[self->len] = '\0'; return 0; } const char* STR_tolower(STR *self) /********************************************************************************** * Convert all characters in buffer to lower case. */ { char *c; for(c= self->buf; *c; ++c) { *c= tolower(*c); } return self->buf; } const char* STR_XMLencode(STR *self, const char *src) /************************************************************** * encode the src string for XML into self. Return self's buffer. * NOTE: self does not get reset! */ { const char *c; if(!src) return ""; for(c= src; *c; ++c) { const char *str= NULL; unsigned len= 0; switch(*c) { #define doit(litstr) \ len= strlen(litstr); str= litstr case '"': doit("""); break; case '\'': doit("'"); break; case '<': doit("<"); break; case '>': doit(">"); break; case '&': doit("&"); break; #undef doit } if(str) { // Special string was assigned STR_append(self, str, len); } else { STR_putc(self, *c); } } return STR_str(self); } const char* STR_URLencode(STR *self, const char *src) /************************************************************** * encode the src string for URL into self. Return self's buffer. * NOTE: self does not get reset! */ { const char *c; if(!src) return ""; for(c= src; *c; ++c) { const char *str= NULL; switch(*c) { case '!': str= "%21"; break; case '#': str= "%23"; break; case '$': str= "%24"; break; case '&': str= "%26"; break; case '\'': str= "%27"; break; case '(': str= "%28"; break; case ')': str= "%29"; break; case '*': str= "%2A"; break; case '+': str= "%2B"; break; case ',': str= "%2C"; break; // case '/': str= "%2F"; break; case ':': str= "%3A"; break; case ';': str= "%3B"; break; case '=': str= "%3D"; break; case '?': str= "%3F"; break; case '@': str= "%40"; break; case '[': str= "%5B"; break; case ']': str= "%5D"; break; } if(str) { // Special string was assigned STR_append(self, str, 3); } else { STR_putc(self, *c); } } return STR_str(self); } const char* STR_utf8toHTML(STR *self, const char *src) /************************************************************** * place the HTML representation of the utf-8 src string into self. * Return self's buffer. * NOTE: self does not get reset! */ { const unsigned char *c; unsigned code= '?'; if(!src) return ""; for(c= (const unsigned char*)src; *c; ++c) { /******* Map UTF-8 to a code point **********/ if(0xF0 == (*c & 0xF8)) { // first of four bytes /* Make sure the string doesn't end too soon */ assert(c[1] && c[2] && c[3]); code= (*c & ~0xF8) << 18; ++c; code |= (*c & ~0xC0) << 12; ++c; code |= (*c & ~0xC0) << 6; ++c; code |= (*c & ~0xC0); } else if(0xE0 == (*c & 0xF0)) { // first of three bytes /* Make sure the string doesn't end too soon */ assert(c[1] && c[2]); code= (*c & ~0xF0) << 12; ++c; code |= (*c & ~0xC0) << 6; ++c; code |= (*c & ~0xC0); } else if(0xC0 == (*c & 0xE0)) { // first of two bytes /* Make sure the string doesn't end too soon */ assert(c[1]); code= (*c & ~0xE0) << 6; ++c; code |= (*c & ~0xC0); } else if(0 == (*c & 0x80)) {// first of one byte code= *c; } /************** Assign HTML special string if one is defined **********/ const char *str= NULL; unsigned len= 0; switch(code) { #define doit(litstr) \ len= strlen(litstr); str= litstr case '"': doit("""); break; case '<': doit("<"); break; case '>': doit(">"); break; case '&': doit("&"); break; case 160: doit(" "); break; case 161: doit("¡"); break; case 162: doit("¢"); break; case 163: doit("£"); break; case 164: doit("¤"); break; case 165: doit("¥"); break; case 166: doit("¦"); break; case 167: doit("§"); break; case 168: doit("¨"); break; case 169: doit("©"); break; case 170: doit("ª"); break; case 171: doit("«"); break; case 172: doit("¬"); break; case 173: doit("­"); break; case 174: doit("®"); break; case 175: doit("¯"); break; case 176: doit("°"); break; case 177: doit("±"); break; case 178: doit("²"); break; case 179: doit("³"); break; case 180: doit("´"); break; case 181: doit("µ"); break; case 182: doit("¶"); break; case 183: doit("·"); break; case 184: doit("¸"); break; case 185: doit("¹"); break; case 186: doit("º"); break; case 187: doit("»"); break; case 188: doit("¼"); break; case 189: doit("½"); break; case 190: doit("¾"); break; case 191: doit("¿"); break; case 192: doit("À"); break; case 193: doit("Á"); break; case 194: doit("Â"); break; case 195: doit("Ã"); break; case 196: doit("Ä"); break; case 197: doit("Å"); break; case 198: doit("Æ"); break; case 199: doit("Ç"); break; case 200: doit("È"); break; case 201: doit("É"); break; case 202: doit("Ê"); break; case 203: doit("Ë"); break; case 204: doit("Ì"); break; case 205: doit("Í"); break; case 206: doit("Î"); break; case 207: doit("Ï"); break; case 208: doit("Ð"); break; case 209: doit("Ñ"); break; case 210: doit("Ò"); break; case 211: doit("Ó"); break; case 212: doit("Ô"); break; case 213: doit("Õ"); break; case 214: doit("Ö"); break; case 215: doit("×"); break; case 216: doit("Ø"); break; case 217: doit("Ù"); break; case 218: doit("Ú"); break; case 219: doit("Û"); break; case 220: doit("Ü"); break; case 221: doit("Ý"); break; case 222: doit("Þ"); break; case 223: doit("ß"); break; case 224: doit("à"); break; case 225: doit("á"); break; case 226: doit("â"); break; case 227: doit("ã"); break; case 228: doit("ä"); break; case 229: doit("å"); break; case 230: doit("æ"); break; case 231: doit("ç"); break; case 232: doit("è"); break; case 233: doit("é"); break; case 234: doit("ê"); break; case 235: doit("ë"); break; case 236: doit("ì"); break; case 237: doit("í"); break; case 238: doit("î"); break; case 239: doit("ï"); break; case 240: doit("ð"); break; case 241: doit("ñ"); break; case 242: doit("ò"); break; case 243: doit("ó"); break; case 244: doit("ô"); break; case 245: doit("õ"); break; case 246: doit("ö"); break; case 247: doit("÷"); break; case 248: doit("ø"); break; case 249: doit("ù"); break; case 250: doit("ú"); break; case 251: doit("û"); break; case 252: doit("ü"); break; case 253: doit("ý"); break; case 254: doit("þ"); break; case 255: doit("ÿ"); break; case 8364: doit("€"); break; #undef doit } /****** Place final representation into our string buffer ******/ if(str) { // Special string was assigned STR_append(self, str, len); } else if(code < 128 && isprint(code)) { // Normal ASCII character STR_putc(self, code); } else if(65533 == code) { // This is the placeholder for unrecognized characters } else { // All others STR_sprintf(self, "&#%u;", code); } } return STR_str(self); } const char* STR_escapeJSONstr(STR *self, const char *src) /************************************************************** * Escape any characters such the src can be used in a JSON * string. * Return self's buffer. * NOTE: self does not get reset! */ { const char *pc; for(pc= src; *pc; ++pc) { switch(*pc) { case '\\': STR_sprintf(self, "\\\\"); break; case '"': STR_sprintf(self, "\\\""); break; default: if(iscntrl(*pc)) { STR_sprintf(self, "\\u%4X", (int)(*pc)); } else { STR_putc(self, *pc); } } } return STR_str(self); }