Logo Search packages:      
Sourcecode: gretl version File versions  Download package

dbwrite.c

/* 
 *  gretl -- Gnu Regression, Econometrics and Time-series Library
 *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
 * 
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 * 
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 */

#include "libgretl.h"
#include "dbwrite.h"

/**
 * SECTION:dbwrite
 * @short_description: writing to a gretl database
 * @title: DB write
 * @include: gretl/libgretl.h, gretl/dbwrite.h
 *
 * Functionality for writing series to a native-format gretl database.
 */

#define DB_DEBUG 0

static void dotify (char *s)
{
    while (*s) {
      if (*s == ':') *s = '.';
      s++;
    }
}

static char pd_char (const DATAINFO *pdinfo)
{
    if (pdinfo->pd == 4) {
      return 'Q';
    } else if (pdinfo->pd == 12) {
      return 'M';
    } else if (dataset_is_time_series(pdinfo)) {
      return 'A';
    } else {
      return 'U';
    }
}

static int get_db_series_names (const char *idxname, char ***pnames,
                        int *pnv)
{
    char line[256];
    char **vnames = NULL;
    FILE *fp;
    int i, j, nv;
    int err = 0;

    fp = gretl_fopen(idxname, "r");
    if (fp == NULL) {
      return E_FOPEN;
    }

#if DB_DEBUG
    fprintf(stderr, "get_db_series_names: opened %s\n", idxname);
#endif

    /* first pass: count the number of vars */
    i = nv = 0;
    while (fgets(line, sizeof line, fp)) {
      if (*line == '#' || string_is_blank(line)) {
          continue;
      }
      i++;
      if (i % 2) {
          /* odd-numbered lines hold varnames */
          nv++;
      }
    }

#if DB_DEBUG
    fprintf(stderr, " found %d varnames\n", nv);
#endif

    if (nv == 0) {
      err = E_DATA;
    } else {
      vnames = strings_array_new_with_length(nv, VNAMELEN);
      if (vnames == NULL) {
          err = E_ALLOC;
      }
    }

    if (err) {
      fclose(fp);
      return err;
    }

    rewind(fp);

    /* second pass: grab all the varnames */
    i = j = 0;
    while (fgets(line, sizeof line, fp) && !err) {
      if (*line == '#' || string_is_blank(line)) {
          continue;
      }
      i++;
      if (i % 2) {
          if (sscanf(line, "%15s", vnames[j]) != 1) {
            err = E_DATA;
          }
          j++;
      }
    }    

    fclose(fp);

    if (err) {
      free_strings_array(vnames, nv);
    } else {
      *pnames = vnames;
      *pnv = nv;
    }

    return err;
}

/* Given a list of variables to be appended to a gretl database,
   check that there is not already a variable in the database
   with the same name as any of those in the list.  Return 0
   if no duplicates, -1 on failure, or the positive number 
   of duplicated variables.
*/

static int 
check_for_db_duplicates (const int *list, const DATAINFO *pdinfo,
                   const char *idxname, int *err)
{
    char **snames = NULL;
    int i, j, v, oldv = 0;
    int ret = 0;

#if DB_DEBUG   
    printlist(list, "check_for_db_duplicates: input save list");
#endif

    *err = get_db_series_names(idxname, &snames, &oldv);
    if (*err) {
      return -1;
    }

    for (i=1; i<=list[0]; i++) {
      v = list[i];
      for (j=0; j < oldv; j++) {
          if (!strcmp(pdinfo->varname[v], snames[j])) {
            ret++;
            break;
          }
      }
    }

    free_strings_array(snames, oldv);

    return ret;
}

static int output_db_var (int v, const double **Z, const DATAINFO *pdinfo,
                    FILE *fidx, FILE *fbin) 
{
    char stobs[OBSLEN], endobs[OBSLEN];
    int t, t1, t2;
    int nobs;
    float val;

    t1 = 0;
    t2 = pdinfo->n - 1;

    if (dataset_is_time_series(pdinfo)) {
      for (t=0; t<pdinfo->n; t++) {
          if (na(Z[v][t])) t1++;
          else break;
      }
      for (t=pdinfo->n - 1; t>=t1; t--) {
          if (na(Z[v][t])) t2--;
          else break;
      }
    }

    nobs = t2 - t1 + 1;
    if (nobs <= 0) {
      return 0;
    }

    ntodate(stobs, t1, pdinfo);
    ntodate(endobs, t2, pdinfo);
    dotify(stobs);
    dotify(endobs);     

    fprintf(fidx, "%s  %s\n", pdinfo->varname[v], VARLABEL(pdinfo, v));
    fprintf(fidx, "%c  %s - %s  n = %d\n", pd_char(pdinfo),
          stobs, endobs, nobs);

    for (t=t1; t<=t2; t++) {
      if (na(Z[v][t])) {
          val = DBNA;
      } else {
          val = Z[v][t];
      }
      fwrite(&val, sizeof val, 1, fbin);
    }

    return 0;
}

static int write_old_bin_chunk (long offset, int nvals, FILE *fin, FILE *fout)
{
    int i, err = 0;
    float val;

    fseek(fin, offset, SEEK_SET);

    for (i=0; i<nvals && !err; i++) {
      if (fread(&val, sizeof val, 1, fin) != 1) {
          err = 1;
      } 
      if (!err) {
          if (fwrite(&val, sizeof val, 1, fout) != 1) {
            err = 1;
          }
      }
    } 

    return err;
}

static void list_delete_element (int *list, int m)
{
    int i;

    for (i=1; i<=list[0]; i++) {
      if (list[i] == m) {
          gretl_list_delete_at_pos(list, i);
          break;
      }
    }
}

/* writing to a previously existing database, replacing any existing
   variables with the same name as "new" ones, but otherwise
   preserving the existing content 
*/

static int 
append_db_data_with_replacement (const char *idxname, const char *binname,
                         int *list, const double **Z, 
                         const DATAINFO *pdinfo) 
{
    FILE *fidx = NULL, *fbin = NULL;
    char **oldnames = NULL;
    char *mask = NULL;
    int *newlist = NULL;
    int i, j, v, oldv;
    int nrep, err = 0;

    err = get_db_series_names(idxname, &oldnames, &oldv);
    if (err) {
      return err;
    }

    mask = calloc(oldv, 1);
    if (mask == NULL) {
      err = E_ALLOC;
      goto bailout;
    } 

    newlist = gretl_list_copy(list);
    if (newlist == NULL) {
      err = E_ALLOC;
      goto bailout;
    }

    nrep = 0;
    for (i=1; i<=list[0]; i++) {
      v = list[i];
      for (j=0; j<oldv; j++) {
          if (!strcmp(oldnames[j], pdinfo->varname[v])) {
            /* match: remove var v from "newlist" and flag that it
               is a replacement in "mask" */
            list_delete_element(newlist, v);
#if DB_DEBUG
            fprintf(stderr, "match: var %d and old db var %d\n", v, j);
#endif    
            mask[j] = 1;
            nrep++;
            break;
          }
      }
    }

#if DB_DEBUG
    fprintf(stderr, "write_db_data_with_replacement: replicated vars = %d\n", 
          nrep);
    printlist(list, "full var list");
    printlist(newlist, "new var list");
#endif    

    /* handle replacement variables first */
    if (nrep > 0) {
      char idxcpy[FILENAME_MAX];
      char bincpy[FILENAME_MAX];

      strcpy(idxcpy, idxname);
      strcat(idxcpy, ".cpy");

      strcpy(bincpy, binname);
      strcat(bincpy, ".cpy");

      err = gretl_copy_file(idxname, idxcpy);

      if (!err) {
          err = gretl_copy_file(binname, bincpy);
      }

      if (!err) {
          FILE *fp = NULL, *fq = NULL;
          char line1[256], line2[256];
          long offset = 0L;
          int nobs;

          fp = fq = NULL;
          
          fp = gretl_fopen(idxcpy, "r");
          if (fp == NULL) {
            err = E_FOPEN;
          }

          if (!err) {
            fidx = gretl_fopen(idxname, "w");
            if (fidx == NULL) {
                err = E_FOPEN;
            }
          }

          if (!err) {
            fq = gretl_fopen(bincpy, "rb");
            if (fq == NULL) {
                err = E_FOPEN;
            }
          }

          if (!err) {
            fbin = gretl_fopen(binname, "wb");
            if (fbin == NULL) {
                err = E_FOPEN;
            }
          }     

          i = 0;
          while (fgets(line1, sizeof line1, fp) && !err) {
            if (*line1 == '#' || string_is_blank(line1)) {
                if (*line1 == '#') {
                  fputs(line1, fidx);
                } 
                continue;
            }
            if (fgets(line2, sizeof line2, fp) == NULL) {
                /* db index lines must be in pairs */
                err = 1;
                break;
            }
            if (sscanf(line2, "%*s  %*s - %*s  n = %d\n", &nobs) != 1) {
                err = 1;
                break;
            }

#if DB_DEBUG
            fprintf(stderr, "old db, var %d, nobs = %d\n", i, nobs);
#endif
            if (mask[i]) {
                v = series_index(pdinfo, oldnames[i]);
#if DB_DEBUG
                fprintf(stderr, "replacing this with var %d\n", v);
#endif
                output_db_var(v, Z, pdinfo, fidx, fbin);
            } else {
#if DB_DEBUG
                fprintf(stderr, "passing through old var\n");
#endif
                fputs(line1, fidx);
                fputs(line2, fidx);
                write_old_bin_chunk(offset, nobs, fq, fbin);
            }
            i++;
            offset += nobs * sizeof(float);
          }

          if (fp != NULL) fclose(fp);
          if (fq != NULL) fclose(fq);
      }

      gretl_remove(idxcpy);
      gretl_remove(bincpy);
    } else {
      /* no variables to be replaced */
      fidx = gretl_fopen(idxname, "a");
      if (fidx == NULL) {
          err = E_FOPEN;
      }
      if (!err) {
          fbin = gretl_fopen(binname, "ab");
          if (fbin == NULL) {
            err = E_FOPEN;
          }
      }
    }

    if (!err) {
      /* do any newly added variables */
      for (i=1; i<=newlist[0]; i++) {
#if DB_DEBUG
          fprintf(stderr, "adding new var, %d\n", newlist[i]);
#endif
          output_db_var(newlist[i], Z, pdinfo, fidx, fbin);
      }
    }

 bailout:

    if (fidx != NULL) fclose(fidx);
    if (fbin != NULL) fclose(fbin);

    free_strings_array(oldnames, oldv);
    free(mask);
    free(newlist);

    return err;
}

static int 
open_db_files (const char *fname, char *idxname, char *binname,
             FILE **fidx, FILE **fbin, int *append)
{
    FILE *fp;
    char base[FILENAME_MAX];
    char imode[3] = "w";
    char bmode[3] = "wb";
    char *p;

    strcpy(base, fname);
    p = strchr(base, '.');
    if (p != NULL) {
      *p = 0;
    }

    strcpy(idxname, base);
    strcat(idxname, ".idx");

    fprintf(stderr, "open_db_files: doing test open on '%s'\n", idxname);

    fp = gretl_fopen(idxname, "r");
    if (fp != NULL) {
      *append = 1;
      strcpy(imode, "a");
      strcpy(bmode, "ab");
      fclose(fp);
    }

    *fidx = gretl_fopen(idxname, imode);
    if (*fidx == NULL) {
      gretl_errmsg_sprintf(_("Couldn't open %s for writing"), idxname);
      return 1;
    }

    strcpy(binname, base);
    strcat(binname, ".bin");
    
    *fbin = gretl_fopen(binname, bmode);
    if (*fbin == NULL) {
      gretl_errmsg_sprintf(_("Couldn't open %s for writing"), binname);
      fclose(*fidx);
      if (*append == 0) {
          gretl_remove(idxname);
      }
      return 1;
    }

    fprintf(stderr, "Opened database index '%s' in mode '%s'\n", 
          idxname, imode);
    fprintf(stderr, "Opened database binary '%s' in mode '%s'\n", 
          binname, bmode);

    return 0;
}

/* screen out scalars and any empty series, after discounting
   missing obs.
*/

static int *make_db_save_list (const int *list, const double **Z, 
                         const DATAINFO *pdinfo)
{
    int *dlist = gretl_list_new(list[0]);
    int i, t;

    if (dlist == NULL) {
      return NULL;
    }

    dlist[0] = 0;

    for (i=1; i<=list[0]; i++) {
      int v = list[i];
      int gotobs = 0;

      for (t=0; t<pdinfo->n; t++) {
          if (!na(Z[v][t])) {
            gotobs = 1;
            break;
          }
      }

      if (!gotobs) {
          continue;
      }

      dlist[0] += 1;
      dlist[dlist[0]] = v;
    }

    return dlist;
}

/**
 * write_db_data:
 * @fname: name of target database file (e.g. "foo.bin").
 * @list: list of series ID numbers.
 * @opt: option flag.
 * @Z: data array.
 * @pdinfo: dataset information.
 *
 * Writes the listed series from @Z to a gretl database. If @opt
 * includes OPT_F (force, overwrite), then in case any variables
 * in the database have the same names as some of those in @list,
 * replace the ones in the database.  Otherwise, in case of replicated
 * variables, set an error message and return E_DB_DUP.
 *
 * Returns: 0 on success, non-zero code on error.
 */

int write_db_data (const char *fname, const int *list, gretlopt opt,
               const double **Z, const DATAINFO *pdinfo) 
{
    char idxname[FILENAME_MAX];
    char binname[FILENAME_MAX];
    FILE *fbin = NULL, *fidx = NULL;
    const int *mylist = list;
    int *dlist = NULL;
    int append = 0;
    int force = (opt & OPT_F);
    int i, err = 0;

    if (dataset_is_time_series(pdinfo)) {
      if (pdinfo->pd != 1 && pdinfo->pd != 4 && pdinfo->pd != 12) {
          return 1;
      }
    } else if (pdinfo->pd != 1) {
      return 1;
    }

    if (open_db_files(fname, idxname, binname, 
                  &fidx, &fbin, &append)) {
      return 1;
    }

    if (append) {
#if DB_DEBUG
      fprintf(stderr, "Appending to existing db\n");
#endif
      dlist = make_db_save_list(list, Z, pdinfo);
      if (dlist == NULL) {
          err = E_ALLOC;
          goto bailout;
      }

      if (force) {
#if DB_DEBUG
          fprintf(stderr, "Got force flag, overwriting\n");
#endif
          fclose(fidx);
          fclose(fbin);
          return append_db_data_with_replacement(idxname, binname, dlist,
                                       Z, pdinfo);
      } else {
          int dups = check_for_db_duplicates(dlist, pdinfo, idxname, &err);

#if DB_DEBUG
          fprintf(stderr, "No force flag, checking for dups\n");
#endif
          if (err) {
            fputs("check_for_db_duplicates failed\n", stderr);
          } else if (dups > 0) {
            gretl_errmsg_sprintf(_("Of the variables to be saved, %d were already "
                               "present in the database."), dups);
            /* FIXME add message for command line use, about the
               --overwrite option */
            err = E_DB_DUP;
          }
          if (err) {
            goto bailout;
          }
          mylist = dlist;
      } 
    } 

    if (!append) {
      fprintf(fidx, "# Description goes here\n");
    }

    for (i=1; i<=mylist[0]; i++) {
      int v = mylist[i];

      output_db_var(v, Z, pdinfo, fidx, fbin);
    }

 bailout:

    if (fidx != NULL) fclose(fidx);
    if (fbin != NULL) fclose(fbin);

    if (dlist != NULL) {
      free(dlist);
    }

    return err;
}

#ifdef notyet

/* apparatus for writing XML databases */

static void
xml_write_tagged (const char *tag, const char *s, gzFile fz)
{
    gzprintf(fz, "<%s>\n", tag);
    gzputs(s, gz);
    gzprintf(fz, "</%s>\n", tag);
}

static void 
xml_write_attrib (const char *attr, const char *s, gzFile fz)
{
    gzprintf(fz, "%s=\"", attr);
    gzputs(s, gz);
    gzputs("\"", gz);
}

static void 
xml_write_int_attrib (const char *attr, int val, gzFile fz)
{
    gzprintf(fz, "%s=\"%d\" ", attr, val);
}

static void xml_write_db_header (const char *name, gzFile fz)
{
    double gretl_db_version = 1.0;

    gzputs("<?xml version=\"1.0\"?>\n"
         "<!DOCTYPE gretldb SYSTEM \"gretldb.dtd\">\n\n",
         fz);
    gzprintf(fz, "<gretldb name=\"%s\" version=\"%.1f\">\n", 
           name, gretl_db_version);
}

static void xml_open_tag (const char *tag, gzFile fz)
{
    gzprintf(fz, "<%s ", tag);
}

static void xml_close_tag (const char *tag, gzFile fz)
{
    gzprintf(fz, "\n</%s>\n", tag);
}

static void xml_write_db_footer (gzFile fz)
{
    gzputs("\n</gretldb>\n", fz);
}

int write_gretl_xml_db (gretl_db *db, const char *fname)
{
    gzFile *fz = Z_NULL;
    int i, t, err = 0;

    fz = gretl_gzopen(fname, "wb");
    if (fz == Z_NULL) {
      return E_FOPEN;
    }

    xml_write_db_header(db->name, fz);

    if (db->source != NULL) {
      xml_write_tagged("source", db->source, fz);
    }
    
    if (db->descrip != NULL) {
      xml_write_tagged("description", db->descrip, fz);
    }

    if (db->codebook != NULL) {
      xml_write_tagged("codebook", db->codebook, fz);
    }

    for (i=0; i<db->nchaps; i++) {
      xml_open_tag("chapter", fz);
      xml_write_attrib("title", db->chapters[i]->title);
      if (db->chapters[i]->descrip != NULL) {
          xml_write_tagged("description", db->chapters[i]->descrip);
      }
      xml_close_tag("chapter", fz);
    }

    for (i=0; i<db->nseries; i++) {
      xml_open_tag("series", fz);
      xml_write_attrib("name", db->series[i]->name);
      if (db->series[i]->chapter > 0) {
          xml_write_int_attrib("chapter", db->series[i]->chapter);
      }
      if (db->series[i]->label != NULL) {
          xml_write_attrib("label", db->series[i]->label);
      }
      if (db->series[i]->displayname != NULL) {
          xml_write_attrib("displayname", db->series[i]->displayname);
      }
      if (db->series[i]->frequency > 0) {
          xml_write_int_attrib("frequency", db->series[i]->frequency);
      }
      xml_write_attrib("startobs", db->series[i]->startobs);
      xml_write_attrib("endobs", db->series[i]->endobs);
      /* FIXME type, compact-method */

      xml_open_tag("observations", fz);
      xml_write_int_attrib("count", db->series[i]->nobs);

      if (db->series[i]->markers) {
          xml_write_attrib("labels", "true");
          for (t=0; i<db->series[i]->nobs; t++) {
            gzprintf(fz, "<obs label=\"%s\">%.8g</obs>\n", 
                   db->series[i]->S[t], db->series[i]->x[t]);
          }
      } else {
          for (t=0; i<db->series[i]->nobs; t++) {
            gzprintf(fz, "<obs>%.8g</obs>\n", db->series[i]->x[t]);
          }
      }         
      
      xml_close_tag("observations", fz);
      xml_close_tag("series", fz);
    }    
      
    xml_write_db_footer(fz);

    gzclose(fz);

    return err;
}

#endif



Generated by  Doxygen 1.6.0   Back to index