/*- * Copyright (c) 2013 James K. Lowden * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include "parser.h" static errfunc_t errfunc = NULL; errfunc_t set_error_handler( errfunc_t e ) { errfunc_t old = errfunc; errfunc = e; return old; } typedef enum { start, end, in_field, end_field, last_field, parse_error } state_t; static const char *state_str[] = { "start", "end", "in_field", "end_field", "last_field", "parse_error" }; struct fragment_t { const char *start, *end;}; struct field_t { char *data; size_t len, capacity; }; const char * read_fragment( struct fragment_t *pfrag, state_t *pstate ) { const char *p = ++pfrag->start, *pend = pfrag->end; for( ; p < pend; p++ ) { switch(*p) { case '"': pfrag->end = p; if( p == pfrag->start ) { /* escaped quote is data */ pfrag->end++; } *pstate = in_field; return ++p; } } *pstate = parse_error; return p; } void append_to_field( const char input[], const char pend[], struct field_t *pout ) { size_t len = pend - input; if( pout->capacity < pout->len + len ) { pout->capacity = pout->len + len; if( (pout->data = realloc(pout->data, pout->capacity)) == NULL ) { err(EXIT_FAILURE, __FUNCTION__); } } memcpy( pout->data + pout->len, input, len); pout->len += len; } const char * read_field( const char input[], const char pend[], struct field_t *pout, state_t *pstate ) { const char *p; struct fragment_t frag = { input, input }; #if 0 printf("%s: '", __FUNCTION__); fwrite(input, pend-input, 1, stdout); printf("'\n"); #endif if( frag.start == pend ) { *pstate = end; return input; } for( ; frag.end < pend; frag.end++ ) { switch(*frag.end) { case '\\': case '"': append_to_field( frag.start, frag.end, pout ); frag.start = frag.end; frag.end = pend; p = read_fragment(&frag, pstate); /* printf("fragment is %lu bytes\n", frag.end - frag.start); */ append_to_field( frag.start, frag.end, pout ); if( *pstate != in_field ) { return p; } frag.start = frag.end; continue; case ',': *pstate = end_field; append_to_field( frag.start, frag.end, pout ); return frag.end + 1; case '\n': *pstate = last_field; append_to_field( frag.start, frag.end, pout ); return frag.end + 1; } } *pstate = last_field; return frag.end; } const char * parse( const char input[], size_t len, colfunc_t colfunc, void * vprow ) { const char *p = input, *pend = input+len; char msg[1024] = "parse error"; state_t state = start; struct field_t field = { NULL, 0, 0 }; while( p <= pend) { #if 0 printf("at %3lu (%c) state is %s\n", p - input, *p, state_str[state]); #endif switch( state ) { case start: case in_field: p = read_field(p, pend, &field, &state); break; case end_field: if( colfunc ) colfunc(field.data, field.len, vprow); field.len = 0; p = read_field(p, pend, &field, &state); break; case last_field: if( colfunc ) colfunc(field.data, field.len, vprow); return p; break; case end: return p; case parse_error: if( errfunc ) { errfunc(p, msg); return NULL; } err(EXIT_FAILURE, msg); } } fprintf(stderr, "p - input is %lu, len is %lu\n", p - input, len); return p; }