/*- * Copyright (c) 2013 James K. Lowden * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "csvtab.h" static void splat( const char s[], size_t len ) { char *buf = alloca(len + 1); strncpy(buf, s, len); buf[len] = '\0'; fprintf(stderr, "data[%lu]: '%s'\n", len, buf); } static int log_level() { int level; const char *var = getenv("CSVTAB"); if( var == NULL ) { return -1; } sscanf(var, "%d", &level); return level; } enum {FUNCTION_LOG = 2}; static void logger( int threshold, const char *fmt, ... ) { va_list ap; if( threshold <= log_level() ) { va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); } } /* * Accumulate column data. * Callback function provided to CSV parser. * Do not use strdup because data may include embedded NUL. */ void col_func( const char s[], size_t len, void *vprow ) { struct row_t *prow = vprow; struct col_t *pcol; if( log_level() > 0 ) splat(s, len); if( prow->ncols <= prow->icol ) { prow->ncols = 1 + prow->icol; struct col_t *p = realloc(prow->cols, prow->ncols * sizeof(prow->cols[0])); if( !p ) { err(EXIT_FAILURE, __FUNCTION__); } prow->cols = p; } assert(prow->cols); pcol = prow->cols + prow->icol++; if( (pcol->data = malloc(len)) == NULL ) { err(EXIT_FAILURE, __FUNCTION__); } memcpy( pcol->data, s, len ); pcol->len = len; } /* * Utility functions to generate DDL from xCreate arguments */ static const char * map_mem( const char *path, struct col_t *pmem ) { int fd = open(path, O_RDONLY, 0); if( fd < 0 ) { err(errno, "%s: '%s'", __FUNCTION__, path); } if( (pmem->len = lseek(fd, 0, SEEK_END)) < 0 ) { err(errno, __FUNCTION__); } if( (pmem->data = mmap(NULL, pmem->len, PROT_READ, MAP_FILE, fd, 0)) == MAP_FAILED ) { err(errno, __FUNCTION__); } return pmem->data; } /* * Read the first line of the datafile and parse it for column names. * Parsing stops at the first newline (quoted or not), which must appear * within the first 1K of the file. */ const char * const * generate_column_names( const struct col_t *pmem, const char **ppbegin ) { /* read & parse the first line, hope there are no quoted newlines */ char **output; int i; struct row_t row = { 0, 0, NULL }; assert(ppbegin != NULL); if( (*ppbegin = parse(pmem->data, pmem->len, col_func, &row)) == NULL ) { assert(*ppbegin != NULL); /* can't happen */ err(EXIT_FAILURE, "parser failed"); } assert(row.ncols > 0); assert(row.cols); /* add NULL to terminate the list */ if( (output = calloc(row.ncols+1, sizeof(*output))) == NULL ) { err(EXIT_FAILURE, "%s", __FUNCTION__); } for( i=0; i < row.ncols; i++ ) { if( (output[i] = calloc(1, 1+row.cols[i].len)) == NULL ) { err(EXIT_FAILURE, "%s", __FUNCTION__); } memcpy(output[i], row.cols[i].data, row.cols[i].len); } return (const char * const *)output; } /* * Formulate the CREATE TABLE statement either from the column names * provided by the user or from the first line of the data file. */ const char * generate_ddl( const char tablename[], const struct col_t *pcsvmem, const char **ppbegin, int argc, const char * const *argv ) { int i; long len; FILE *DDL = tmpfile(); char comma = '('; char *ddl; if( argc == 0 ) { argv = generate_column_names( pcsvmem, ppbegin ); while(argv[argc] != NULL) { argc++; } } assert(DDL); fprintf(DDL, "create table %s\n", tablename); for( i=0; i < argc; i++ ) { fprintf(DDL, "\t%c \"%s\" TEXT\n", comma, argv[i]); comma = ','; } fprintf(DDL, "\t);\n%c", '\0'); fflush(DDL); len = ftell(DDL); assert(len); if( (ddl = mmap(NULL, len, PROT_READ, MAP_FILE, fileno(DDL), 0)) == MAP_FAILED ) { err(errno, __FUNCTION__); } return ddl; } /* * SQLite Virtual Table functions */ SQLITE_EXTENSION_INIT1 /* * argv[0] -> module name ("cvs") * argv[1] -> database name * argv[2] -> tablename * argv[3] -> filename * argv[4..N]-> column names, or 0 to use first line as column names */ int xCreate( sqlite3 *db, void *pAux, int argc, const char * const *argv, sqlite3_vtab **ppVTab, char **pzErr ) { const char *dbname = argv[1]; const char *tablename = argv[2]; const char *filename = argv[3]; struct vtab *ptab; const char *ddl; logger(FUNCTION_LOG,__FUNCTION__); if( argc < 4 ) return SQLITE_ERROR; /* * Allocate a vtable with room at the end for the dbname. * Initialize dbname pointer to the address of the additional storage. */ if( (ptab = sqlite3_malloc(sizeof(*ptab) + 3 + strlen(dbname) + strlen(tablename) + strlen(filename))) == NULL ) { return SQLITE_NOMEM; } memset(ptab, '\0', sizeof(*ptab)); ptab->db = db; ptab->dbname = ptab->metadata; ptab->tablename = ptab->dbname + strlen(dbname) + 1; ptab->filename = ptab->tablename + strlen(tablename) + 1; strcpy( ptab->dbname, dbname ); strcpy( ptab->tablename, tablename ); strcpy( ptab->filename, filename ); ptab->colfunc = col_func; ptab->pbegin = map_mem(filename, &ptab->csvmem); ddl = generate_ddl(tablename, &ptab->csvmem, &ptab->pbegin, argc-4, argv + 4); logger(FUNCTION_LOG, ddl); #if 0 fprintf(stderr, "%s %s %s\n", argv[1], argv[2], argv[3]); fprintf(stderr, "%s %s %s\n", ptab->dbname, ptab->tablename, ptab->filename); #endif if( sqlite3_declare_vtab(db, ddl) != SQLITE_OK ) { sqlite3_free(ptab); return SQLITE_ERROR; } *ppVTab = (sqlite3_vtab *)ptab; return SQLITE_OK; } static int xConnect( sqlite3 *db, void *pAux, int argc, const char* const *argv, sqlite3_vtab **ppVTab, char **pzErr ) { logger(FUNCTION_LOG,__FUNCTION__); return xCreate(db, pAux, argc, argv, ppVTab, pzErr); } static int xBestIndex(sqlite3_vtab *pvtab, sqlite3_index_info* pi) { /* we're only going to iterate over the file each time */ pi->idxNum = 0; pi->idxStr = "any"; pi->needToFreeIdxStr = 0; pi->orderByConsumed = 0; pi->estimatedCost = 1; return SQLITE_OK; } static int xDisconnect(sqlite3_vtab *pVTab) { logger(FUNCTION_LOG,__FUNCTION__); return SQLITE_OK; } static int xDestroy(sqlite3_vtab *pVTab) { struct vtab *pvtab = (struct vtab *)pVTab; logger(FUNCTION_LOG,__FUNCTION__); munmap(pvtab->csvmem.data, pvtab->csvmem.len); sqlite3_free(pVTab); return SQLITE_OK; } static int xNext(sqlite3_vtab_cursor* pCursor); static int xOpen(sqlite3_vtab *pVtab, sqlite3_vtab_cursor **ppCursor) { struct vtab_cursor *pcursor = sqlite3_malloc( sizeof(struct vtab_cursor) ); logger(FUNCTION_LOG,__FUNCTION__); if( pcursor == NULL ) return SQLITE_NOMEM; pcursor->base.pVtab = pVtab; pcursor->row.icol = 0; pcursor->row.ncols = 0; pcursor->row.cols = NULL; pcursor->p = ((struct vtab *)pVtab)->pbegin; *ppCursor = &pcursor->base; return xNext(*ppCursor); } static int xClose(sqlite3_vtab_cursor *pCursor) { logger(FUNCTION_LOG,__FUNCTION__); sqlite3_free(pCursor); return SQLITE_OK; } static int xEof(sqlite3_vtab_cursor *pCursor) { struct vtab_cursor *pcur = (struct vtab_cursor *)pCursor; struct vtab *ptab = (struct vtab*) pcur->base.pVtab; logger(FUNCTION_LOG,__FUNCTION__); return pcur->p == ptab->csvmem.data + ptab->csvmem.len; } static int xFilter(sqlite3_vtab_cursor *pCursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv) { logger(FUNCTION_LOG,__FUNCTION__); return SQLITE_OK; } static int xNext(sqlite3_vtab_cursor* pCursor) { struct vtab_cursor *pcur = (struct vtab_cursor *)pCursor; struct vtab *ptab = (struct vtab*) pcur->base.pVtab; size_t len = ptab->csvmem.len - (pcur->p - ptab->csvmem.data); ssize_t nbytes; logger(FUNCTION_LOG,__FUNCTION__); assert(pcur->p + len == ptab->csvmem.data + ptab->csvmem.len); pcur->row.icol = 0; pcur->p = parse(pcur->p, len, ptab->colfunc, &pcur->row); return SQLITE_OK; } static int xColumn(sqlite3_vtab_cursor *pCursor, sqlite3_context *pcontext, int N) { struct vtab_cursor *pcur = (struct vtab_cursor *)pCursor; struct col_t *pcol; const char *value; assert(pCursor); assert(pcur); assert(pcur->base.pVtab); if( pcur->row.cols == NULL ) { sqlite3_result_error_code(pcontext, SQLITE_ERROR); return SQLITE_ERROR; } logger(FUNCTION_LOG, "%s: fetching column %d of %d\n", __FUNCTION__, N, pcur->row.ncols); assert(N < pcur->row.ncols); pcol = pcur->row.cols + N; logger(FUNCTION_LOG, "%s: fetching %lu bytes for column %d\n", __FUNCTION__, pcol->len, N); sqlite3_result_text(pcontext, pcol->data, pcol->len, SQLITE_TRANSIENT); return SQLITE_OK; } static int xRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid) { struct vtab_cursor *pcur = (struct vtab_cursor *)pCursor; logger(FUNCTION_LOG,__FUNCTION__); *pRowid = pcur->p - (char *)0; return SQLITE_OK; } /* * Rename the table. */ static int xRename(sqlite3_vtab *pVTab, const char *zNew) { struct vtab *pvtab = (struct vtab *)pVTab; char *newname = sqlite3_mprintf("%s", zNew); int erc; logger(FUNCTION_LOG,__FUNCTION__); if( newname == NULL ){ return SQLITE_NOMEM; } char *sql = sqlite3_mprintf("ALTER TABLE \"%w\".\"%w\" RENAME TO \"%w\"", pvtab->dbname, pvtab->tablename, newname); if( sql == NULL ) { return SQLITE_NOMEM; } if( (erc = sqlite3_exec(pvtab->db, sql, 0, 0, 0)) != SQLITE_OK ) { sqlite3_free(sql); return erc; } sqlite3_free(pvtab->tablename); pvtab->tablename = newname; return SQLITE_OK; } /* * A virtual table module for a CSV file */ static sqlite3_module csvModule = { 0, /* iVersion */ xCreate, /* xCreate - handle CREATE VIRTUAL TABLE */ xConnect, /* xConnect - reconnected to an existing table */ xBestIndex, /* xBestIndex - figure out how to do a query */ xDisconnect, /* xDisconnect - close a connection */ xDestroy, /* xDestroy - handle DROP TABLE */ xOpen, /* xOpen - open a cursor */ xClose, /* xClose - close a cursor */ xFilter, /* xFilter - configure scan constraints */ xNext, /* xNext - advance a cursor */ xEof, /* xEof - check for end of scan */ xColumn, /* xColumn - read data */ xRowid, /* xRowid - read data */ 0, /* xUpdate */ 0, /* xBegin */ 0, /* xSync */ 0, /* xCommit */ 0, /* xRollback */ 0, /* xFindMethod */ xRename, /* xRename */ }; /* * Register functions and the virtual table. */ static int register_module(sqlite3 *db, char **pzErrMsg) { int erc = sqlite3_create_module(db, "csv", &csvModule, 0); if( erc != SQLITE_OK ) { *pzErrMsg = sqlite3_mprintf("register_module: %d", erc); return erc; } return SQLITE_OK; } /* * Extension load function called by shell.c. */ int sqlite3_extension_init( sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi ) { SQLITE_EXTENSION_INIT2(pApi); return register_module(db, pzErrMsg); }