csvtable.c
Go to the documentation of this file.
1 
19 #ifdef STANDALONE
20 #include <sqlite3.h>
21 #else
22 #include <sqlite3ext.h>
23 static SQLITE_EXTENSION_INIT1
24 #endif
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <limits.h>
30 
31 #ifdef _WIN32
32 #include <windows.h>
33 #define strcasecmp _stricmp
34 #define strncasecmp _strnicmp
35 #endif
36 
43 typedef struct csv_file {
44  FILE *f;
45  char *sep;
46  char *quot;
47  int isdos;
48  int maxl;
49  char *line;
50  long pos0;
51  int maxc;
52  int ncols;
53  char **cols;
55 
62 typedef struct csv_guess_fmt {
63  int nlines;
64  int hist[256];
66 
73 typedef struct csv_vtab {
74  sqlite3_vtab vtab;
76  int convert;
77  char coltypes[1];
79 
86 typedef struct {
87  sqlite3_vtab_cursor cursor;
88  long pos;
89 } csv_cursor;
90 
96 static void
97 append_free(char **in)
98 {
99  long *p = (long *) *in;
100 
101  if (p) {
102  p -= 2;
103  sqlite3_free(p);
104  *in = 0;
105  }
106 }
107 
117 static char *
118 append(char **in, char const *append, char quote)
119 {
120  long *p = (long *) *in;
121  long len, maxlen, actlen;
122  int i;
123  char *pp;
124  int nappend = append ? strlen(append) : 0;
125 
126  if (p) {
127  p -= 2;
128  maxlen = p[0];
129  actlen = p[1];
130  } else {
131  maxlen = actlen = 0;
132  }
133  len = nappend + actlen;
134  if (quote) {
135  len += 2;
136  for (i = 0; i < nappend; i++) {
137  if (append[i] == quote) {
138  len++;
139  }
140  }
141  } else if (!nappend) {
142  return *in;
143  }
144  if (len >= maxlen - 1) {
145  long *q;
146 
147  maxlen = (len + 0x03ff) & (~0x3ff);
148  q = (long *) sqlite3_realloc(p, maxlen + 1 + 2 * sizeof (long));
149  if (!q) {
150  return 0;
151  }
152  if (!p) {
153  q[1] = 0;
154  }
155  p = q;
156  p[0] = maxlen;
157  *in = (char *) (p + 2);
158  }
159  pp = *in + actlen;
160  if (quote) {
161  *pp++ = quote;
162  for (i = 0; i < nappend; i++) {
163  *pp++ = append[i];
164  if (append[i] == quote) {
165  *pp++ = quote;
166  }
167  }
168  *pp++ = quote;
169  *pp = '\0';
170  } else {
171  if (nappend) {
172  memcpy(pp, append, nappend);
173  pp += nappend;
174  *pp = '\0';
175  }
176  }
177  p[1] = pp - *in;
178  return *in;
179 }
180 
187 static char *
188 unquote(char const *in)
189 {
190  char c, *ret;
191  int i;
192 
193  ret = sqlite3_malloc(strlen(in) + 1);
194  if (ret) {
195  c = in[0];
196  if ((c == '"') || (c == '\'')) {
197  i = strlen(in + 1);
198  if ((i > 0) && (in[i] == c)) {
199  strcpy(ret, in + 1);
200  ret[i - 1] = '\0';
201  return ret;
202  }
203  }
204  strcpy(ret, in);
205  }
206  return ret;
207 }
208 
215 static int
216 maptype(char const *type)
217 {
218  int typelen = type ? strlen(type) : 0;
219 
220  if ((typelen >= 3) &&
221  (strncasecmp(type, "integer", 7) == 0)) {
222  return SQLITE_INTEGER;
223  }
224  if ((typelen >= 6) &&
225  (strncasecmp(type, "double", 6) == 0)) {
226  return SQLITE_FLOAT;
227  }
228  if ((typelen >= 5) &&
229  (strncasecmp(type, "float", 5) == 0)) {
230  return SQLITE_FLOAT;
231  }
232  if ((typelen >= 4) &&
233  (strncasecmp(type, "real", 4) == 0)) {
234  return SQLITE_FLOAT;
235  }
236  return SQLITE_TEXT;
237 }
238 
245 static void
246 conv_names(char **names, int ncols)
247 {
248  int i;
249  char *p, *q;
250  static const char ws[] = "\n\t\r\b\v ";
251 
252  if (!names || ncols <= 0) {
253  return;
254  }
255  for (i = 0; i < ncols; i++) {
256  p = names[i];
257 
258  while (*p) {
259  if (strchr(ws, *p)) {
260  *p++ = '_';
261  q = p;
262  while (*q && strchr(ws, *q)) {
263  ++q;
264  }
265  if (*q && q > p) {
266  strcpy(p, q);
267  }
268  continue;
269  }
270  ++p;
271  }
272  }
273 }
274 
285 static void
286 result_or_bind(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
287  char *data, int len, int type)
288 {
289  char *endp;
290 
291  if (!data) {
292  if (ctx) {
293  sqlite3_result_null(ctx);
294  } else {
295  sqlite3_bind_null(stmt, idx);
296  }
297  return;
298  }
299  if (type == SQLITE_INTEGER) {
300  sqlite_int64 val;
301 #if defined(_WIN32) || defined(_WIN64)
302  char endc;
303 
304  if (sscanf(data, "%I64d%c", &val, &endc) == 1) {
305  if (ctx) {
306  sqlite3_result_int64(ctx, val);
307  } else {
308  sqlite3_bind_int64(stmt, idx, val);
309  }
310  return;
311  }
312 #else
313  endp = 0;
314 #ifdef __osf__
315  val = strtol(data, &endp, 0);
316 #else
317  val = strtoll(data, &endp, 0);
318 #endif
319  if (endp && (endp != data) && !*endp) {
320  if (ctx) {
321  sqlite3_result_int64(ctx, val);
322  } else {
323  sqlite3_bind_int64(stmt, idx, val);
324  }
325  return;
326  }
327 #endif
328  } else if (type == SQLITE_FLOAT) {
329  double val;
330 
331  endp = 0;
332  val = strtod(data, &endp);
333  if (endp && (endp != data) && !*endp) {
334  if (ctx) {
335  sqlite3_result_double(ctx, val);
336  } else {
337  sqlite3_bind_double(stmt, idx, val);
338  }
339  return;
340  }
341  }
342  if (ctx) {
343  sqlite3_result_text(ctx, data, len, SQLITE_TRANSIENT);
344  } else {
345  sqlite3_bind_text(stmt, idx, data, len, SQLITE_TRANSIENT);
346  }
347 }
348 
359 static int
360 process_col(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
361  char *data, int type, int conv)
362 {
363  char c, *p;
364  const char flchars[] = "Ee+-.,0123456789";
365 
366  if (!data) {
367  goto putdata;
368  }
369 
370  /*
371  * Floating point number test,
372  * converts single comma to dot.
373  */
374  c = data[0];
375  if ((c != '\0') && strchr(flchars + 2, c)) {
376  p = data + 1;
377  while (*p && strchr(flchars, *p)) {
378  ++p;
379  }
380  if (*p == '\0') {
381  char *first = 0;
382  int n = 0;
383 
384  p = data;
385  while (p) {
386  p = strchr(p, ',');
387  if (!p) {
388  break;
389  }
390  if (++n == 1) {
391  first = p;
392  }
393  ++p;
394  }
395  if (first) {
396  *first = '.';
397  goto putdata;
398  }
399  }
400  }
401  if (conv) {
402  char *utf = sqlite3_malloc(strlen(data) * 2 + 2);
403 
404  if (utf) {
405  p = utf;
406  while ((c = *data) != '\0') {
407  if (((conv & 10) == 10) && (c == '\\')) {
408  if (data[1] == 'q') {
409  *p++ = '\'';
410  data += 2;
411  continue;
412  }
413  }
414  if ((conv & 2) && (c == '\\')) {
415  char c2 = data[1];
416 
417  switch (c2) {
418  case '\0':
419  goto convdone;
420  case 'n':
421  *p = '\n';
422  break;
423  case 't':
424  *p = '\t';
425  break;
426  case 'r':
427  *p = '\r';
428  break;
429  case 'f':
430  *p = '\f';
431  break;
432  case 'v':
433  *p = '\v';
434  break;
435  case 'b':
436  *p = '\b';
437  break;
438  case 'a':
439  *p = '\a';
440  break;
441  case '?':
442  *p = '\?';
443  break;
444  case '\'':
445  *p = '\'';
446  break;
447  case '"':
448  *p = '\"';
449  break;
450  case '\\':
451  *p = '\\';
452  break;
453  default:
454  *p++ = c;
455  *p = c2;
456  break;
457  }
458  p++;
459  data += 2;
460  continue;
461  }
462  if ((conv & 1) && (c & 0x80)) {
463  *p++ = 0xc0 | ((c >> 6) & 0x1f);
464  *p++ = 0x80 | (c & 0x3f);
465  } else {
466  *p++ = c;
467  }
468  data++;
469  }
470 convdone:
471  *p = '\0';
472  result_or_bind(ctx, stmt, idx, utf, p - utf, type);
473  sqlite3_free(utf);
474  return SQLITE_OK;
475  } else {
476  if (ctx) {
477  sqlite3_result_error(ctx, "out of memory", -1);
478  }
479  return SQLITE_NOMEM;
480  }
481  }
482 putdata:
483  result_or_bind(ctx, stmt, idx, data, -1, type);
484  return SQLITE_OK;
485 }
486 
495 static csv_file *
496 csv_open(const char *filename, const char *sep, const char *quot)
497 {
498  FILE *f;
499  csv_file *csv;
500 
501 #ifdef _WIN32
502  f = fopen(filename, "rb");
503 #else
504  f = fopen(filename, "r");
505 #endif
506  if (!f) {
507  return 0;
508  }
509  csv = sqlite3_malloc(sizeof (csv_file));
510  if (!csv) {
511 error0:
512  fclose(f);
513  return 0;
514  }
515  csv->f = f;
516  if (sep && sep[0]) {
517  csv->sep = sqlite3_malloc(strlen(sep) + 1);
518  if (!csv->sep) {
519 error1:
520  sqlite3_free(csv);
521  goto error0;
522  }
523  strcpy(csv->sep, sep);
524  } else {
525  csv->sep = 0;
526  }
527  if (quot && quot[0]) {
528  csv->quot = sqlite3_malloc(strlen(quot) + 1);
529  if (!csv->quot) {
530  if (csv->sep) {
531  sqlite3_free(csv->sep);
532  }
533  goto error1;
534  }
535  strcpy(csv->quot, quot);
536  } else {
537  csv->quot = 0;
538  }
539  csv->isdos = 0;
540  csv->maxl = 0;
541  csv->line = 0;
542  csv->pos0 = 0;
543  csv->maxc = 0;
544  csv->ncols = 0;
545  csv->cols = 0;
546  return csv;
547 }
548 
554 static void
556 {
557  if (csv) {
558  if (csv->sep) {
559  sqlite3_free(csv->sep);
560  }
561  if (csv->quot) {
562  sqlite3_free(csv->quot);
563  }
564  if (csv->line) {
565  sqlite3_free(csv->line);
566  }
567  if (csv->cols) {
568  sqlite3_free(csv->cols);
569  }
570  if (csv->f) {
571  fclose(csv->f);
572  }
573  sqlite3_free(csv);
574  }
575 }
576 
583 static int
585 {
586  if (csv && csv->f) {
587  return feof(csv->f);
588  }
589  return 1;
590 }
591 
599 static long
600 csv_seek(csv_file *csv, long pos)
601 {
602  if (csv && csv->f) {
603  return fseek(csv->f, pos, SEEK_SET);
604  }
605  return EOF;
606 }
607 
613 static void
615 {
616  if (csv && csv->f) {
617  csv_seek(csv, csv->pos0);
618  }
619 }
620 
627 static long
629 {
630  if (csv && csv->f) {
631  return ftell(csv->f);
632  }
633  return EOF;
634 }
635 
643 static int
645 {
646  int i, index = 0, inq = 0, c, col;
647  char *p, *sep;
648 
649  if (!csv || !csv->f) {
650  return EOF;
651  }
652  while (1) {
653  c = fgetc(csv->f);
654  if (c == EOF) {
655  if (index > 0) {
656  break;
657  }
658  return EOF;
659  }
660  if (c == '\0') {
661  continue;
662  }
663  if (c == '\r') {
664  int c2 = fgetc(csv->f);
665  c = '\n';
666 
667  if (c2 == '\n') {
668  csv->isdos = 1;
669  } else if (c2 != EOF) {
670  ungetc(c2, csv->f);
671  }
672  }
673  /* check for DOS EOF (Ctrl-Z) */
674  if (csv->isdos && (c == '\032')) {
675  int c2 = fgetc(csv->f);
676 
677  if (c2 == EOF) {
678  if (index > 0) {
679  break;
680  }
681  return EOF;
682  }
683  ungetc(c2, csv->f);
684  }
685  if (index >= csv->maxl - 1) {
686  int n = csv->maxl * 2;
687  char *line;
688 
689  if (n <= 0) {
690  n = 4096;
691  }
692  line = sqlite3_malloc(n);
693  if (!line) {
694  return EOF;
695  }
696  if (csv->line) {
697  memcpy(line, csv->line, index);
698  sqlite3_free(csv->line);
699  }
700  csv->maxl = n;
701  csv->line = line;
702  }
703  csv->line[index++] = c;
704  if (csv->quot && (p = strchr(csv->quot, c))) {
705  if (inq) {
706  if (*p == inq) {
707  inq = 0;
708  }
709  } else {
710  inq = *p;
711  }
712  }
713  if (!inq && (c == '\n')) {
714  break;
715  }
716  }
717  if (guess) {
718  for (i = 0; i < index; i++) {
719  guess->hist[csv->line[i] & 0xFF] += 1;
720  }
721  guess->nlines += 1;
722  csv->ncols = 0;
723  return 0;
724  }
725 
726  for (i = index - 1; i >= 0; i--) {
727  if (csv->line[i] != '\n') {
728  break;
729  }
730  }
731  index = i + 1;
732  csv->line[index] = '\0';
733  i = inq = col = 0;
734  sep = csv->sep ? csv->sep : ";";
735  if (!csv->cols) {
736  int n = 128;
737 
738  csv->cols = sqlite3_malloc(sizeof (char *) * n);
739  if (!csv->cols) {
740  return EOF;
741  }
742  csv->maxc = n;
743  }
744  csv->cols[col++] = csv->line;
745  while (i < index) {
746  if (csv->quot && (p = strchr(csv->quot, csv->line[i]))) {
747  if (inq) {
748  if (*p == inq) {
749  inq = 0;
750  }
751  } else {
752  inq = *p;
753  }
754  }
755  if (!inq && (p = strchr(sep, csv->line[i]))) {
756  p = csv->line + i;
757  *p = '\0';
758  if (col >= csv->maxc) {
759  int n = csv->maxc * 2;
760  char **cols;
761 
762  cols = sqlite3_realloc(csv->cols, sizeof (char *) * n);
763  if (!cols) {
764  return EOF;
765  }
766  csv->cols = cols;
767  csv->maxc = n;
768  }
769  csv->cols[col++] = p + 1;
770  }
771  ++i;
772  }
773  csv->ncols = col;
774 
775  /* strip off quotes */
776  if (csv->quot) {
777  for (i = 0; i < col; i++) {
778  if (*csv->cols[i]) {
779  p = strchr(csv->quot, *csv->cols[i]);
780  if (p) {
781  char *src, *dst;
782 
783  c = *p;
784  csv->cols[i] += 1;
785  sep = csv->cols[i] + strlen(csv->cols[i]) - 1;
786  if ((sep >= csv->cols[i]) && (*sep == c)) {
787  *sep = '\0';
788  }
789  /* collapse quote escape sequences */
790  src = csv->cols[i];
791  dst = 0;
792  while (*src) {
793  if ((*src == c) && (src[1] == c)) {
794  if (!dst) {
795  dst = src;
796  }
797  src++;
798  while (*src) {
799  *dst++ = *src++;
800  if (*src == c) {
801  --src;
802  break;
803  }
804  }
805  }
806  ++src;
807  }
808  if (dst) {
809  *dst++ = '\0';
810  }
811  }
812  }
813  }
814  }
815  return col;
816 }
817 
824 static int
826 {
827  if (csv && csv->cols) {
828  return csv->ncols;
829  }
830  return 0;
831 }
832 
840 static char *
841 csv_coldata(csv_file *csv, int n)
842 {
843  if (csv && csv->cols && (n >= 0) && (n < csv->ncols)) {
844  return csv->cols[n];
845  }
846  return 0;
847 }
848 
855 static int
857 {
858  csv_guess_fmt guess;
859  int i, n;
860  char *p, sep[32], quot[4];
861  const struct {
862  int c;
863  int min;
864  } sep_test[] = {
865  { ',', 2 },
866  { ';', 2 },
867  { '\t', 2 },
868  { ' ', 4 },
869  { '|', 2 }
870  };
871 
872  if (!csv) {
873  return EOF;
874  }
875  memset(&guess, 0, sizeof (guess));
876  csv->pos0 = 0;
877  csv_rewind(csv);
878  for (i = n = 0; i < 10; i++) {
879  n = csv_getline(csv, &guess);
880  if (n == EOF) {
881  break;
882  }
883  }
884  csv_rewind(csv);
885  if (n && !i) {
886  return EOF;
887  }
888  p = quot;
889  n = '"';
890  if (guess.hist[n] > 1) {
891  *p++ = n;
892  }
893  n = '\'';
894  if (guess.hist[n] > 1) {
895  *p++ = n;
896  }
897  *p = '\0';
898  p = sep;
899  for (i = 0; i < sizeof (sep_test) / sizeof (sep_test[0]); i++) {
900  if (guess.hist[sep_test[i].c] > sep_test[i].min * guess.nlines) {
901  *p++ = sep_test[i].c;
902  }
903  }
904  *p = '\0';
905  if (quot[0]) {
906  p = sqlite3_malloc(strlen(quot) + 1);
907  if (p) {
908  strcpy(p, quot);
909  if (csv->quot) {
910  sqlite3_free(csv->quot);
911  }
912  csv->quot = p;
913  } else {
914  return EOF;
915  }
916  }
917  if (sep[0]) {
918  p = sqlite3_malloc(strlen(sep) + 1);
919  if (p) {
920  strcpy(p, sep);
921  if (csv->sep) {
922  sqlite3_free(csv->sep);
923  }
924  csv->sep = p;
925  } else {
926  return EOF;
927  }
928  }
929  return 0;
930 }
931 
965 static int
966 csv_vtab_connect(sqlite3* db, void *aux, int argc, const char * const *argv,
967  sqlite3_vtab **vtabp, char **errp)
968 {
969  csv_file *csv;
970  int rc = SQLITE_ERROR, i, k, ncnames, row1, *colmap = 0;
971  char **cnames, *schema = 0, **nargv;
972  csv_vtab *vtab = 0;
973 
974  if (argc < 4) {
975  *errp = sqlite3_mprintf("input file name missing");
976  return SQLITE_ERROR;
977  }
978  nargv = sqlite3_malloc(sizeof (char *) * argc);
979  memset(nargv, 0, sizeof (char *) * argc);
980  for (i = 3; i < argc; i++) {
981  nargv[i] = unquote(argv[i]);
982  }
983  csv = csv_open(nargv[3], (argc > 6) ? nargv[6] : 0,
984  (argc > 7) ? nargv[7] : 0);
985  if (!csv) {
986  *errp = sqlite3_mprintf("unable to open input file");
987 cleanup:
988  append_free(&schema);
989  for (i = 3; i < argc; i++) {
990  if (nargv[i]) {
991  sqlite3_free(nargv[i]);
992  }
993  }
994  if (vtab) {
995  sqlite3_free(vtab);
996  }
997  if (colmap) {
998  sqlite3_free(colmap);
999  }
1000  return rc;
1001  }
1002  if (!csv->sep && !csv->quot) {
1003  csv_guess(csv);
1004  }
1005  csv->pos0 = 0;
1006  row1 = 0;
1007  if (argc > 4) {
1008  row1 = strtol(nargv[4], 0, 10);
1009  }
1010  if (row1) {
1011  /* use column names from 1st row */
1012  csv_getline(csv, 0);
1013  if (csv->ncols < 1) {
1014  csv_close(csv);
1015  *errp = sqlite3_mprintf("unable to get column names");
1016  goto cleanup;
1017  }
1018  csv->pos0 = csv_tell(csv);
1019  csv_rewind(csv);
1020  ncnames = csv_ncols(csv);
1021  cnames = csv->cols;
1022  } else if (argc > 8) {
1023  ncnames = argc - 8;
1024  cnames = (char **) nargv + 8;
1025  } else {
1026  /* use number of columns from 1st row */
1027  csv_getline(csv, 0);
1028  if (csv->ncols < 1) {
1029  csv_close(csv);
1030  *errp = sqlite3_mprintf("unable to get column names");
1031  goto cleanup;
1032  }
1033  csv_rewind(csv);
1034  ncnames = csv_ncols(csv);
1035  cnames = 0;
1036  }
1037  colmap = sqlite3_malloc(sizeof (int) * ncnames);
1038  if (!colmap) {
1039  csv_close(csv);
1040  *errp = sqlite3_mprintf("out of memory");
1041  goto cleanup;
1042  }
1043  memset(colmap, 0, sizeof (int) * ncnames);
1044  vtab = sqlite3_malloc(sizeof (csv_vtab) + ncnames);
1045  if (!vtab) {
1046  csv_close(csv);
1047  *errp = sqlite3_mprintf("out of memory");
1048  goto cleanup;
1049  }
1050  memset(vtab, 0, sizeof (*vtab));
1051  vtab->convert = 0;
1052  if (argc > 5) {
1053  vtab->convert = strtol(nargv[5], 0, 10);
1054  if (row1 && (vtab->convert & 4)) {
1055  conv_names(cnames, ncnames);
1056  }
1057  }
1058  vtab->csv = csv;
1059  append(&schema, "CREATE TABLE x(", 0);
1060  for (i = 0; cnames && (i < ncnames); i++) {
1061  if (!cnames[i] || (cnames[i][0] == '\0')) {
1062  continue;
1063  }
1064  k = strlen(cnames[i]);
1065  if ((k > 7) && (strncasecmp("column_", cnames[i], 7) == 0)) {
1066  char c;
1067 
1068  if (sscanf(cnames[i] + 7, "%d%c", &k, &c) == 1) {
1069  colmap[i] = k;
1070  }
1071  }
1072  }
1073  for (i = 0; i < ncnames; i++) {
1074  vtab->coltypes[i] = SQLITE_TEXT;
1075  if (!cnames || !cnames[i] || (cnames[i][0] == '\0')) {
1076  int want = i + 1;
1077  char colname[64];
1078 
1079  while (1) {
1080  for (k = 0; k < ncnames; k++) {
1081  if ((k != i) && (colmap[k] == want)) {
1082  want++;
1083  break;
1084  }
1085  }
1086  if (k >= ncnames) {
1087  colmap[i] = want;
1088  break;
1089  }
1090  }
1091  sprintf(colname, "column_%d", colmap[i]);
1092  append(&schema, colname, '"');
1093  } else if (row1 > 0) {
1094  append(&schema, cnames[i], '"');
1095  } else if (row1 < 0) {
1096  append(&schema, cnames[i], '"');
1097  if (i + 8 < argc) {
1098  char *type = nargv[i + 8];
1099 
1100  append(&schema, " ", 0);
1101  append(&schema, type, 0);
1102  vtab->coltypes[i] = maptype(type);
1103  }
1104  } else {
1105  char *type = cnames[i];
1106 
1107  append(&schema, cnames[i], 0);
1108  while (*type && !strchr(" \t", *type)) {
1109  type++;
1110  }
1111  while (*type && strchr(" \t", *type)) {
1112  type++;
1113  }
1114  vtab->coltypes[i] = maptype(type);
1115  }
1116  if (i < ncnames - 1) {
1117  append(&schema, ",", 0);
1118  }
1119  }
1120  append(&schema, ")", 0);
1121  rc = sqlite3_declare_vtab(db, schema);
1122  if (rc != SQLITE_OK) {
1123  csv_close(csv);
1124  *errp = sqlite3_mprintf("table definition failed, error %d, "
1125  "schema '%s'", rc, schema);
1126  goto cleanup;
1127  }
1128  *vtabp = &vtab->vtab;
1129  *errp = 0;
1130  vtab = 0;
1131  goto cleanup;
1132 }
1133 
1145 static int
1146 csv_vtab_create(sqlite3* db, void *aux, int argc,
1147  const char *const *argv,
1148  sqlite3_vtab **vtabp, char **errp)
1149 {
1150  return csv_vtab_connect(db, aux, argc, argv, vtabp, errp);
1151 }
1152 
1159 static int
1160 csv_vtab_disconnect(sqlite3_vtab *vtab)
1161 {
1162  csv_vtab *tab = (csv_vtab *) vtab;
1163 
1164  csv_close(tab->csv);
1165  sqlite3_free(tab);
1166  return SQLITE_OK;
1167 }
1168 
1175 static int
1176 csv_vtab_destroy(sqlite3_vtab *vtab)
1177 {
1178  return csv_vtab_disconnect(vtab);
1179 }
1180 
1188 static int
1189 csv_vtab_bestindex(sqlite3_vtab *vtab, sqlite3_index_info *info)
1190 {
1191  return SQLITE_OK;
1192 }
1193 
1201 static int
1202 csv_vtab_open(sqlite3_vtab *vtab, sqlite3_vtab_cursor **cursorp)
1203 {
1204  csv_cursor *cur = sqlite3_malloc(sizeof (*cur));
1205  csv_vtab *tab = (csv_vtab *) vtab;
1206 
1207  if (!cur) {
1208  return SQLITE_ERROR;
1209  }
1210  cur->cursor.pVtab = vtab;
1211  csv_rewind(tab->csv);
1212  cur->pos = csv_tell(tab->csv);
1213  *cursorp = &cur->cursor;
1214  return SQLITE_OK;
1215 }
1216 
1223 static int
1224 csv_vtab_close(sqlite3_vtab_cursor *cursor)
1225 {
1226  sqlite3_free(cursor);
1227  return SQLITE_OK;
1228 }
1229 
1236 static int
1237 csv_vtab_next(sqlite3_vtab_cursor *cursor)
1238 {
1239  csv_cursor *cur = (csv_cursor *) cursor;
1240  csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
1241 
1242  cur->pos = csv_tell(tab->csv);
1243  csv_getline(tab->csv, 0);
1244  return SQLITE_OK;
1245 }
1246 
1257 static int
1258 csv_vtab_filter(sqlite3_vtab_cursor *cursor, int idxNum,
1259  const char *idxStr, int argc, sqlite3_value **argv)
1260 {
1261  csv_cursor *cur = (csv_cursor *) cursor;
1262  csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
1263 
1264  csv_rewind(tab->csv);
1265  return csv_vtab_next(cursor);
1266 }
1267 
1274 static int
1275 csv_vtab_eof(sqlite3_vtab_cursor *cursor)
1276 {
1277  csv_cursor *cur = (csv_cursor *) cursor;
1278  csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
1279 
1280  return csv_eof(tab->csv);
1281 }
1282 
1291 static int
1292 csv_vtab_column(sqlite3_vtab_cursor *cursor, sqlite3_context *ctx, int n)
1293 {
1294  csv_cursor *cur = (csv_cursor *) cursor;
1295  csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
1296  char *data = csv_coldata(tab->csv, n);
1297 
1298  return process_col(ctx, 0, 0, data, tab->coltypes[n], tab->convert);
1299 }
1300 
1308 static int
1309 csv_vtab_rowid(sqlite3_vtab_cursor *cursor, sqlite_int64 *rowidp)
1310 {
1311  csv_cursor *cur = (csv_cursor *) cursor;
1312 
1313  *rowidp = cur->pos;
1314  return SQLITE_OK;
1315 }
1316 
1317 #if (SQLITE_VERSION_NUMBER > 3004000)
1318 
1325 static int
1326 csv_vtab_rename(sqlite3_vtab *vtab, const char *newname)
1327 {
1328  return SQLITE_OK;
1329 }
1330 
1331 #endif
1332 
1337 static const sqlite3_module csv_vtab_mod = {
1338  1, /* iVersion */
1339  csv_vtab_create, /* xCreate */
1340  csv_vtab_connect, /* xConnect */
1341  csv_vtab_bestindex, /* xBestIndex */
1342  csv_vtab_disconnect, /* xDisconnect */
1343  csv_vtab_destroy, /* xDestroy */
1344  csv_vtab_open, /* xOpen */
1345  csv_vtab_close, /* xClose */
1346  csv_vtab_filter, /* xFilter */
1347  csv_vtab_next, /* xNext */
1348  csv_vtab_eof, /* xEof */
1349  csv_vtab_column, /* xColumn */
1350  csv_vtab_rowid, /* xRowid */
1351  0, /* xUpdate */
1352  0, /* xBegin */
1353  0, /* xSync */
1354  0, /* xCommit */
1355  0, /* xRollback */
1356  0, /* xFindFunction */
1357 #if (SQLITE_VERSION_NUMBER > 3004000)
1358  csv_vtab_rename, /* xRename */
1359 #endif
1360 };
1361 
1389 static void
1390 csv_import_func(sqlite3_context *ctx, int argc, sqlite3_value **argv)
1391 {
1392  csv_file *csv;
1393  int rc, i, k, ncnames, row1, convert = 0, useargs = 0, *colmap = 0;
1394  char *tname, *fname, *sql = 0, **cnames, *coltypes = 0;
1395  sqlite3 *db = (sqlite3 *) sqlite3_user_data(ctx);
1396  sqlite3_stmt *stmt = 0;
1397 
1398  if (argc < 2) {
1399  sqlite3_result_error(ctx, "need at least 2 arguments", -1);
1400  return;
1401  }
1402  tname = (char *) sqlite3_value_text(argv[0]);
1403  if (!tname) {
1404  sqlite3_result_error(ctx, "table name is NULL", -1);
1405  return;
1406  }
1407  fname = (char *) sqlite3_value_text(argv[1]);
1408  if (!fname) {
1409  sqlite3_result_error(ctx, "file name is NULL", -1);
1410  return;
1411  }
1412  csv = csv_open(fname,
1413  (argc > 4) ? (char *) sqlite3_value_text(argv[4]) : 0,
1414  (argc > 5) ? (char *) sqlite3_value_text(argv[5]) : 0);
1415  if (!csv) {
1416  sqlite3_result_error(ctx, "unable to open input file", -1);
1417 cleanup:
1418  if (stmt) {
1419  sqlite3_finalize(stmt);
1420  }
1421  append_free(&sql);
1422  if (colmap) {
1423  sqlite3_free(colmap);
1424  }
1425  if (csv) {
1426  csv_close(csv);
1427  }
1428  return;
1429  }
1430  if (!csv->sep && !csv->quot) {
1431  csv_guess(csv);
1432  }
1433  csv->pos0 = 0;
1434  row1 = 0;
1435  if (argc > 2) {
1436  row1 = sqlite3_value_int(argv[2]);
1437  }
1438  if (row1) {
1439  /* use column names from 1st row */
1440  csv_getline(csv, 0);
1441  if (csv->ncols < 1) {
1442  sqlite3_result_error(ctx, "unable to get column names", -1);
1443  goto cleanup;
1444  }
1445  csv->pos0 = csv_tell(csv);
1446  csv_rewind(csv);
1447  ncnames = csv_ncols(csv);
1448  cnames = csv->cols;
1449  } else if (argc > 6) {
1450  ncnames = argc - 6;
1451  cnames = 0;
1452  useargs = 1;
1453  } else {
1454  /* use number of columns from 1st row */
1455  csv_getline(csv, 0);
1456  if (csv->ncols < 1) {
1457  sqlite3_result_error(ctx, "unable to get column names", -1);
1458  goto cleanup;
1459  }
1460  csv_rewind(csv);
1461  ncnames = csv_ncols(csv);
1462  cnames = 0;
1463  }
1464  convert = 0;
1465  if (argc > 3) {
1466  convert = sqlite3_value_int(argv[3]);
1467  if (row1 && (convert & 4)) {
1468  conv_names(cnames, ncnames);
1469  }
1470  }
1471  /* test if table exists */
1472  append(&sql, "PRAGMA table_info(", 0);
1473  append(&sql, tname, '"');
1474  append(&sql, ")", 0);
1475  if (!sql) {
1476 oom:
1477  sqlite3_result_error(ctx, "out of memory", -1);
1478  goto cleanup;
1479  }
1480  rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
1481  append_free(&sql);
1482  if (rc != SQLITE_OK) {
1483 prepfail:
1484  sqlite3_result_error(ctx, "prepare failed", -1);
1485  goto cleanup;
1486  }
1487  /* find number of colums */
1488  i = 0;
1489  rc = sqlite3_step(stmt);
1490  while (rc == SQLITE_ROW) {
1491  i++;
1492  rc = sqlite3_step(stmt);
1493  }
1494  if (rc != SQLITE_DONE) {
1495 selfail:
1496  sqlite3_result_error(ctx, "select failed", -1);
1497  goto cleanup;
1498  }
1499  if (i > 0) {
1500  /* get column types */
1501  sqlite3_reset(stmt);
1502  ncnames = i;
1503  coltypes = sqlite3_malloc(ncnames);
1504  if (!coltypes) {
1505  goto oom;
1506  }
1507  rc = sqlite3_step(stmt);
1508  i = 0;
1509  while (rc == SQLITE_ROW) {
1510  coltypes[i++] = maptype((char *) sqlite3_column_text(stmt, 2));
1511  rc = sqlite3_step(stmt);
1512  }
1513  if (rc != SQLITE_DONE) {
1514  goto selfail;
1515  }
1516  } else {
1517  /* create new table */
1518  sqlite3_finalize(stmt);
1519  stmt = 0;
1520  colmap = (int *) sqlite3_malloc(ncnames + sizeof (int) * ncnames);
1521  if (!colmap) {
1522  goto oom;
1523  }
1524  memset(colmap, 0, sizeof (int) * ncnames);
1525  coltypes = (char *) (colmap + ncnames);
1526  append(&sql, "CREATE TABLE ", 0);
1527  append(&sql, tname, '"');
1528  append(&sql, "(", 0);
1529  for (i = 0; cnames && (i < ncnames); i++) {
1530  if (!cnames[i] || (cnames[i][0] == '\0')) {
1531  continue;
1532  }
1533  k = strlen(cnames[i]);
1534  if ((k > 7) && (strncasecmp("column_", cnames[i], 7) == 0)) {
1535  char c;
1536 
1537  if (sscanf(cnames[i] + 7, "%d%c", &k, &c) == 1) {
1538  colmap[i] = k;
1539  }
1540  }
1541  }
1542  for (i = 0; i < ncnames; i++) {
1543  int want = i + 1;
1544  char colname[64];
1545 
1546  coltypes[i] = SQLITE_TEXT;
1547  if (useargs) {
1548  char *type = (char *) sqlite3_value_text(argv[i + 6]);
1549 
1550  if (!type) {
1551  goto defcol;
1552  }
1553  append(&sql, type, 0);
1554  while (*type && !strchr(" \t", *type)) {
1555  type++;
1556  }
1557  while (*type && strchr(" \t", *type)) {
1558  type++;
1559  }
1560  coltypes[i] = maptype(type);
1561  } else if (!cnames || !cnames[i]) {
1562 defcol:
1563  while (1) {
1564  for (k = 0; k < ncnames; k++) {
1565  if ((k != i) && (colmap[k] == want)) {
1566  want++;
1567  break;
1568  }
1569  }
1570  if (k >= ncnames) {
1571  colmap[i] = want;
1572  break;
1573  }
1574  }
1575  sprintf(colname, "column_%d", colmap[i]);
1576  append(&sql, colname, '"');
1577  } else if (row1 > 0) {
1578  append(&sql, cnames[i], '"');
1579  } else if (row1 < 0) {
1580  append(&sql, cnames[i], '"');
1581  if (i + 6 < argc) {
1582  char *type = (char *) sqlite3_value_text(argv[i + 6]);
1583 
1584  if (type) {
1585  append(&sql, " ", 0);
1586  append(&sql, type, 0);
1587  coltypes[i] = maptype(type);
1588  }
1589  }
1590  }
1591  if (i < ncnames - 1) {
1592  append(&sql, ",", 0);
1593  }
1594  }
1595  append(&sql, ")", 0);
1596  rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
1597  if (rc != SQLITE_OK) {
1598  goto prepfail;
1599  }
1600  rc = sqlite3_step(stmt);
1601  if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
1602  sqlite3_result_error(ctx, "create table failed", -1);
1603  goto cleanup;
1604  }
1605  append_free(&sql);
1606  }
1607  sqlite3_finalize(stmt);
1608  stmt = 0;
1609  /* make INSERT statement */
1610  append(&sql, "INSERT INTO ", 0);
1611  append(&sql, tname, '"');
1612  append(&sql, " VALUES(", 0);
1613  for (i = 0; i < ncnames; i++) {
1614  append(&sql, (i < ncnames - 1) ? "?," : "?)", 0);
1615  }
1616  rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
1617  if (rc != SQLITE_OK) {
1618  goto prepfail;
1619  }
1620  append_free(&sql);
1621  /* import the CSV file */
1622  row1 = 0;
1623  while (csv_getline(csv, 0) != EOF) {
1624  for (i = 0; i < ncnames; i++) {
1625  char *data = csv_coldata(csv, i);
1626 
1627  rc = process_col(0, stmt, i + 1, data, coltypes[i], convert);
1628  if (rc != SQLITE_OK) {
1629  goto inserr;
1630  }
1631  }
1632  rc = sqlite3_step(stmt);
1633  if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
1634  if ((rc != SQLITE_MISMATCH) && (rc != SQLITE_CONSTRAINT)) {
1635 inserr:
1636  sqlite3_result_error(ctx, "insert failed", -1);
1637  goto cleanup;
1638  }
1639  } else {
1640  row1++;
1641  }
1642  sqlite3_reset(stmt);
1643  }
1644  sqlite3_result_int(ctx, row1);
1645  goto cleanup;
1646 }
1647 
1654 #ifndef STANDALONE
1655 static
1656 #endif
1657 int
1658 csv_vtab_init(sqlite3 *db)
1659 {
1660  sqlite3_create_function(db, "import_csv", -1, SQLITE_UTF8,
1661  (void *) db, csv_import_func, 0, 0);
1662  return sqlite3_create_module(db, "csvtable", &csv_vtab_mod, 0);
1663 }
1664 
1665 #ifndef STANDALONE
1666 
1675 int
1676 sqlite3_extension_init(sqlite3 *db, char **errmsg,
1677  const sqlite3_api_routines *api)
1678 {
1679  SQLITE_EXTENSION_INIT2(api);
1680  return csv_vtab_init(db);
1681 }
1682 
1683 #endif
csv_cursor::pos
long pos
CSV file position.
Definition: csvtable.c:88
csv_file::cols
char ** cols
column buffer
Definition: csvtable.c:53
csv_file
struct csv_file csv_file
csv_vtab_create
static int csv_vtab_create(sqlite3 *db, void *aux, int argc, const char *const *argv, sqlite3_vtab **vtabp, char **errp)
Create virtual table.
Definition: csvtable.c:1146
csv_file::maxl
int maxl
max.
Definition: csvtable.c:48
csv_file::sep
char * sep
column separator characters
Definition: csvtable.c:45
csv_ncols
static int csv_ncols(csv_file *csv)
Return number of columns of current row in CSV file.
Definition: csvtable.c:825
csv_coldata
static char * csv_coldata(csv_file *csv, int n)
Return nth column of current row in CSV file.
Definition: csvtable.c:841
csv_seek
static long csv_seek(csv_file *csv, long pos)
Position CSV file handle.
Definition: csvtable.c:600
result_or_bind
static void result_or_bind(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx, char *data, int len, int type)
Make result data or parameter binding accoring to type.
Definition: csvtable.c:286
conv_names
static void conv_names(char **names, int ncols)
Convert and collapse white space in column names to underscore.
Definition: csvtable.c:246
csv_vtab::csv
csv_file * csv
CSV file handle.
Definition: csvtable.c:75
append_free
static void append_free(char **in)
Free dynamically allocated string buffer.
Definition: csvtable.c:97
csv_vtab::convert
int convert
convert flags
Definition: csvtable.c:76
csv_cursor::cursor
sqlite3_vtab_cursor cursor
SQLite virtual table cursor.
Definition: csvtable.c:87
process_col
static int process_col(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx, char *data, int type, int conv)
Process one column of the current row.
Definition: csvtable.c:360
csv_guess_fmt
Definition: csvtable.c:62
csv_vtab_destroy
static int csv_vtab_destroy(sqlite3_vtab *vtab)
Destroy virtual table.
Definition: csvtable.c:1176
unquote
static char * unquote(char const *in)
Strip off quotes given string.
Definition: csvtable.c:188
csv_open
static csv_file * csv_open(const char *filename, const char *sep, const char *quot)
Open CSV file for reading and return handle to it.
Definition: csvtable.c:496
csv_vtab::coltypes
char coltypes[1]
column types
Definition: csvtable.c:77
csv_vtab
Definition: csvtable.c:73
append
static char * append(char **in, char const *append, char quote)
Append a string to dynamically allocated string buffer with optional quoting.
Definition: csvtable.c:118
csv_vtab_filter
static int csv_vtab_filter(sqlite3_vtab_cursor *cursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv)
Filter function for virtual table.
Definition: csvtable.c:1258
csv_close
static void csv_close(csv_file *csv)
Close CSV file handle.
Definition: csvtable.c:555
csv_file::f
FILE * f
CSV file.
Definition: csvtable.c:44
maptype
static int maptype(char const *type)
Map string to SQLite data type.
Definition: csvtable.c:216
csv_getline
static int csv_getline(csv_file *csv, csv_guess_fmt *guess)
Read and process one line of CSV file handle.
Definition: csvtable.c:644
csv_vtab_column
static int csv_vtab_column(sqlite3_vtab_cursor *cursor, sqlite3_context *ctx, int n)
Return column data of virtual table.
Definition: csvtable.c:1292
csv_guess_fmt::hist
int hist[256]
Definition: csvtable.c:64
csv_vtab_close
static int csv_vtab_close(sqlite3_vtab_cursor *cursor)
Close virtual table cursor.
Definition: csvtable.c:1224
csv_vtab_eof
static int csv_vtab_eof(sqlite3_vtab_cursor *cursor)
Return end of table state of virtual table cursor.
Definition: csvtable.c:1275
csv_cursor
Definition: csvtable.c:86
csv_vtab_init
static int csv_vtab_init(sqlite3 *db)
Module initializer creating SQLite functions and modules.
Definition: csvtable.c:1658
csv_vtab_bestindex
static int csv_vtab_bestindex(sqlite3_vtab *vtab, sqlite3_index_info *info)
Determines information for filter function according to constraints.
Definition: csvtable.c:1189
csv_file::pos0
long pos0
file position for rewind
Definition: csvtable.c:50
sqlite3_extension_init
int sqlite3_extension_init(sqlite3 *db, char **errmsg, const sqlite3_api_routines *api)
Initializer for SQLite extension load mechanism.
Definition: csvtable.c:1676
csv_tell
static long csv_tell(csv_file *csv)
Return current position of CSV file handle.
Definition: csvtable.c:628
min
#define min(a, b)
Definition: sqlite3odbc.c:225
csv_guess
static int csv_guess(csv_file *csv)
Guess CSV layout of CSV file handle.
Definition: csvtable.c:856
stmt
Definition: sqlite3odbc.h:231
csv_vtab_mod
static const sqlite3_module csv_vtab_mod
SQLite module descriptor.
Definition: csvtable.c:1337
csv_guess_fmt
struct csv_guess_fmt csv_guess_fmt
csv_file::maxc
int maxc
max.
Definition: csvtable.c:51
csv_file::quot
char * quot
text quoting characters
Definition: csvtable.c:46
csv_import_func
static void csv_import_func(sqlite3_context *ctx, int argc, sqlite3_value **argv)
Import CSV file as table into database.
Definition: csvtable.c:1390
csv_rewind
static void csv_rewind(csv_file *csv)
Rewind CSV file handle.
Definition: csvtable.c:614
csv_file
Definition: csvtable.c:43
csv_file::isdos
int isdos
true, when DOS format detected
Definition: csvtable.c:47
csv_vtab_disconnect
static int csv_vtab_disconnect(sqlite3_vtab *vtab)
Disconnect virtual table.
Definition: csvtable.c:1160
csv_guess_fmt::nlines
int nlines
Definition: csvtable.c:63
csv_vtab_connect
static int csv_vtab_connect(sqlite3 *db, void *aux, int argc, const char *const *argv, sqlite3_vtab **vtabp, char **errp)
Connect to virtual table.
Definition: csvtable.c:966
csv_vtab_next
static int csv_vtab_next(sqlite3_vtab_cursor *cursor)
Retrieve next row from virtual table cursor.
Definition: csvtable.c:1237
csv_file::line
char * line
line buffer
Definition: csvtable.c:49
csv_eof
static int csv_eof(csv_file *csv)
Test EOF on CSV file handle.
Definition: csvtable.c:584
csv_vtab_rowid
static int csv_vtab_rowid(sqlite3_vtab_cursor *cursor, sqlite_int64 *rowidp)
Return current rowid of virtual table cursor.
Definition: csvtable.c:1309
csv_vtab::vtab
sqlite3_vtab vtab
SQLite virtual table.
Definition: csvtable.c:74
csv_file::ncols
int ncols
number of columns
Definition: csvtable.c:52
csv_vtab_open
static int csv_vtab_open(sqlite3_vtab *vtab, sqlite3_vtab_cursor **cursorp)
Open virtual table and return cursor.
Definition: csvtable.c:1202
csv_vtab
struct csv_vtab csv_vtab

Generated on Mon Aug 17 2020 by doxygen.
Contact: chw@ch-werner.de