Logo Search packages:      
Sourcecode: gretl version File versions  Download package

dataio.c

/* 
 *  gretl -- Gnu Regression, Econometrics and Time-series Library
 *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
 * 
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 * 
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 */

#include "libgretl.h"
#include "gretl_string_table.h"
#include "dbwrite.h"
#include "libset.h"
#include "gretl_xml.h"
#include "gretl_panel.h"
#include "csvdata.h"
#include "usermat.h"

#include <ctype.h>
#include <time.h>
#include <errno.h>

#include <glib.h>

/**
 * SECTION:dataio
 * @short_description: data handling (internal)
 * @title: Data support
 * @include: gretl/libgretl.h
 *
 * The following data handling functions are basically internal to
 * gretl and not in a state where they can be readily
 * documented as public APIs.
 * 
 */

typedef enum {
    GRETL_FMT_FLOAT = 1, /* single-precision binary data */
    GRETL_FMT_DOUBLE,    /* double-precision binary data */
    GRETL_FMT_OCTAVE,    /* data in Gnu Octave format */
    GRETL_FMT_CSV,       /* data in Comma Separated Values format */
    GRETL_FMT_R,         /* data in Gnu R format */
    GRETL_FMT_GZIPPED,   /* gzipped data */
    GRETL_FMT_TRAD,      /* traditional (ESL-style) data */
    GRETL_FMT_DAT,       /* data in PcGive format */
    GRETL_FMT_DB,        /* gretl native database format */
    GRETL_FMT_JM         /* JMulti ascii data */
} GretlDataFormat;

#define IS_DATE_SEP(c) (c == '.' || c == ':' || c == ',')

static int writelbl (const char *lblfile, const int *list, 
                 const DATASET *dset);
static int writehdr (const char *hdrfile, const int *list, 
                 const DATASET *dset, int opt);

static char STARTCOMMENT[3] = "(*";
static char ENDCOMMENT[3] = "*)";

#define PROGRESS_BAR "progress_bar"

/**
 * get_date_x:
 * @pd: frequency of data.
 * @obs: observation string.
 * 
 * Returns: the floating-point representation of @obs.
 */

double get_date_x (int pd, const char *obs)
{
    double x = 1.0;

    if ((pd == 5 || pd == 6 || pd == 7 || pd == 52) && strlen(obs) > 4) { 
      /* calendar data */
      long ed = get_epoch_day(obs);

      if (ed >= 0) {
          x = ed;
      }
    } else {
      x = obs_str_to_double(obs); 
    }

    return x;
}

/* Skip past comments in .hdr file.  Return 0 if comments found,
   otherwise 1.
*/

static int skipcomments (FILE *fp, const char *str)
{
    char word[MAXLEN];  /* should be big enough to accommodate
                     strings among the comments? */

    *word = '\0';

    if (strncmp(str, STARTCOMMENT, 2) == 0) {
        while (strcmp(word, ENDCOMMENT)) {
            fscanf(fp, "%s", word);
        }
        return 0;
    } 

    return 1;
}

static int comment_lines (FILE *fp, char **pbuf)
{
    char s[MAXLEN], *mybuf = NULL;
    int count = 0, bigger = 1;

    if (fgets(s, sizeof s, fp) == NULL) {
      return 0;
    }

    if (!strncmp(s, STARTCOMMENT, 2)) {
      *pbuf = malloc(20 * MAXLEN);

      if (*pbuf == NULL) {
          return -1;
      }

      **pbuf = '\0';

      while (fgets(s, sizeof s, fp)) {
          if (!strncmp(s, ENDCOMMENT, 2)) {
            break;
          }
          if (++count > 20 * bigger) {
            size_t bufsize = 20 * MAXLEN * ++bigger;

            mybuf = realloc(*pbuf, bufsize);
            if (mybuf == NULL) {
                return -1;
            } else {
                *pbuf = mybuf;
            }
          }
          strcat(*pbuf, s);
      } 
    }

    return count;
}

static void eatspace (FILE *fp)
{
    char c;

    while (1) {
      c = fgetc(fp);
      if (!isspace((unsigned char) c)) {
          ungetc(c, fp);
          return;
      }
    }
}

static int readdata (FILE *fp, const DATASET *dset,
                 int binary, int old_byvar)
{
    int i, t, n = dset->n;
    char c, marker[OBSLEN];
    int err = 0;

    gretl_error_clear();

    if (binary == 1) { 
      /* single-precision binary data */
      float x;

      for (i=1; i<dset->v; i++) {
          for (t=0; t<n; t++) {
            if (!fread(&x, sizeof x, 1, fp)) {
                gretl_errmsg_sprintf(_("WARNING: binary data read error at "
                                 "var %d"), i);
                return 1;
            }
            if (x == -999.0) {
                dset->Z[i][t] = NADBL;
            } else {
                dset->Z[i][t] = (double) x;
            }
          }
      }
    } else if (binary == 2) { 
      /* double-precision binary data */
      double x;

      for (i=1; i<dset->v; i++) {
          for (t=0; t<n; t++) {
            if (!fread(&x, sizeof x, 1, fp)) {
                gretl_errmsg_sprintf(_("WARNING: binary data read error at var %d"), i);
                return 1;
            }
            if (x == -999.0) {
                dset->Z[i][t] = NADBL;
            } else {
                dset->Z[i][t] = x;
            }
          }
      }
    } else if (old_byvar) {
      /* ascii data by variable */
      for (i=1; i<dset->v; i++) {
          for (t=0; t<n && !err; t++) {
            if ((fscanf(fp, "%lf", &dset->Z[i][t])) != 1) {
                gretl_errmsg_sprintf(_("WARNING: ascii data read error at var %d, "
                                 "obs %d"), i, t + 1);
                err = 1;
                break;
            }
            if (dset->Z[i][t] == -999.0) {
                dset->Z[i][t] = NADBL;
            } 
          }
      }            
    } else { 
      /* ascii data by observation */
      char sformat[8];

      sprintf(sformat, "%%%ds", OBSLEN - 1);

      gretl_push_c_numeric_locale();

      for (t=0; t<n && !err; t++) {
          eatspace(fp);
          c = fgetc(fp);  /* test for a #-opened comment line */
          if (c == '#') {
            while (c != '\n') {
                c = fgetc(fp);
            }
          } else {
            ungetc(c, fp);
          }
          if (dset->markers) {
            *marker = '\0';
            fscanf(fp, sformat, marker);
            if (*marker == '"' || *marker == '\'') {
                strcpy(dset->S[t], marker + 1);
            } else {
                strcpy(dset->S[t], marker);
            }
          }
          for (i=1; i<dset->v; i++) {
            if ((fscanf(fp, "%lf", &dset->Z[i][t])) != 1) {
                gretl_errmsg_sprintf(_("WARNING: ascii data read error at var %d, "
                                 "obs %d"), i, t + 1);
                err = 1;
                break;
            }
            if (dset->Z[i][t] == -999.0) {
                dset->Z[i][t] = NADBL;
            } 
          }
      }

      gretl_pop_c_numeric_locale();
    }

    return err;
}

static int gz_readdata (gzFile fz, DATASET *dset, int binary)
{
    int i, t, n = dset->n;
    int err = 0;
    
    gretl_error_clear();

    if (binary == 1) { 
      /* single-precision binary data */
      float xx;

      for (i=1; i<dset->v; i++) {
          for (t=0; t<n; t++) {
            if (!gzread(fz, &xx, sizeof xx)) {
                gretl_errmsg_sprintf(_("WARNING: binary data read error at "
                                 "var %d"), i);
                return 1;
            }
            dset->Z[i][t] = (double) xx;
          }
      }
    } else if (binary == 2) { 
      /* double-precision binary data */
      for (i=1; i<dset->v; i++) {
          if (!gzread(fz, &dset->Z[i][0], n * sizeof(double))) {
            gretl_errmsg_sprintf(_("WARNING: binary data read error at var %d"), i);
            return 1;
          }
      }
    } else { 
      /* ascii data */
      char *line, numstr[24], sformat[8];
      int llen = dset->v * 32;
      size_t offset;

      line = malloc(llen);
      if (line == NULL) {
          return E_ALLOC;
      }

      sprintf(sformat, "%%%ds", OBSLEN - 1);

      gretl_push_c_numeric_locale();

      for (t=0; t<n; t++) {
          offset = 0L;
          if (!gzgets(fz, line, llen - 1)) {
            gretl_errmsg_sprintf(_("WARNING: ascii data read error at "
                               "obs %d"), t + 1);
            err = 1;
            break;
          }

          chopstr(line);
          compress_spaces(line);
          if (line[0] == '#') {
            t--;
            continue;
          }

          if (dset->markers) {
            if (sscanf(line, sformat, dset->S[t]) != 1) {
                gretl_errmsg_sprintf(_("WARNING: failed to read case marker for "
                                 "obs %d"), t + 1);
                err = 1;
                break;
            }
            dset->S[t][OBSLEN-1] = 0;
            offset += strlen(dset->S[t]) + 1;
          }

          for (i=1; i<dset->v; i++) {
            if (sscanf(line + offset, "%23s", numstr) != 1) {
                gretl_errmsg_sprintf(_("WARNING: ascii data read error at var %d, "
                                 "obs %d"), i, t + 1);
                err = 1;
                break;
            }
            numstr[23] = 0;
            dset->Z[i][t] = atof(numstr);
            if (i < dset->v - 1) {
                offset += strlen(numstr) + 1;
            }
          }

          if (err) break;
      }

      free(line);

      gretl_pop_c_numeric_locale();
    }

    return err;
}

/**
 * check_varname:
 * @varname: putative name for variable (or object).
 * 
 * Check a variable/object name for legality: the name
 * must start with a letter, and be composed of letters,
 * numbers or the underscore character, and nothing else.
 * 
 * Returns: 0 if name is OK, non-zero if not.
 */

int check_varname (const char *varname)
{
    int testchar = 'a';
    int ret = 0;

    gretl_error_clear();

    if (gretl_reserved_word(varname)) {
      ret = VARNAME_RESERVED;
    } else if (!(isalpha((unsigned char) *varname))) {
      testchar = *varname;
        ret = VARNAME_FIRSTCHAR;
    } else {
      const char *p = varname;

      while (*p && testchar == 'a') {
          if (!(isalpha((unsigned char) *p))  
            && !(isdigit((unsigned char) *p))
            && *p != '_') {
            testchar = *p;
            ret = VARNAME_BADCHAR;
          }
          p++;
      }
    }

    if (testchar != 'a') {
      if (isprint((unsigned char) testchar)) {
          if (ret == VARNAME_FIRSTCHAR) {
            gretl_errmsg_sprintf(_("First char of varname ('%c') is bad\n"
                               "(first must be alphabetical)"), 
                             (unsigned char) testchar);
          } else {
            gretl_errmsg_sprintf(_("Varname contains illegal character '%c'\n"
                               "Use only letters, digits and underscore"), 
                             (unsigned char) testchar);
          }
      } else {
          if (ret == VARNAME_FIRSTCHAR) {
            gretl_errmsg_sprintf(_("First char of varname (0x%x) is bad\n"
                               "(first must be alphabetical)"), 
                             (unsigned) testchar);
          } else {
            gretl_errmsg_sprintf(_("Varname contains illegal character 0x%x\n"
                               "Use only letters, digits and underscore"), 
                             (unsigned) testchar);
          }
      }
    }

    return ret;
}   

static int readhdr (const char *hdrfile, DATASET *dset, 
                int *binary, int *old_byvar)
{
    FILE *fp;
    int n, i = 0, panel = 0, descrip = 0;
    char str[MAXLEN], byobs[6], option[8];

    gretl_error_clear();

    fp = gretl_fopen(hdrfile, "r");
    if (fp == NULL) {
      gretl_errmsg_sprintf(_("Couldn't open file %s"),  hdrfile);
      return E_FOPEN;
    }

    fscanf(fp, "%s", str);
    i += skipcomments(fp, str); 

    /* find number of variables */

    while (1) {
        if (fscanf(fp, "%s", str) != 1) {
          fclose(fp);
          gretl_errmsg_sprintf(_("Opened header file %s\n"
                           "Couldn't find list of variables (must "
                           "be terminated with a semicolon)"), 
                         hdrfile);
          return 1;
      }
      n = strlen(str);
      if (str[n-1] == ';') {
          if (n > 1) i++;
          break;
      } else i++;
    }

    dset->v = i + 1;
    fclose(fp);

    if (dataset_allocate_varnames(dset)) {
      return E_ALLOC;
    }

    i = 1;
    fp = gretl_fopen(hdrfile, "r");

    str[0] = 0;
    fscanf(fp, "%s", str);
    if (skipcomments(fp, str)) {
        safecpy(dset->varname[i], str, VNAMELEN - 1);
      if (check_varname(dset->varname[i++])) {
          goto varname_error;
      }
    } else {
      descrip = 1; /* comments were found */
    }

    while (1) {
        fscanf(fp, "%s", str);
      n = strlen(str);
      if (str[n-1] != ';') {
            safecpy(dset->varname[i], str, VNAMELEN - 1);
          if (check_varname(dset->varname[i++])) {
            goto varname_error;
          }
        } else {
          if (n > 1) {
            safecpy(dset->varname[i], str, n-1);
            dset->varname[i][n] = '\0';
            if (check_varname(dset->varname[i])) {
                goto varname_error; 
            }
          }
          break;
      }
    }

    fscanf(fp, "%d", &dset->pd);
    fscanf(fp, "%s", dset->stobs);
    fscanf(fp, "%s", dset->endobs);

    colonize_obs(dset->stobs);
    colonize_obs(dset->endobs);

    dset->sd0 = get_date_x(dset->pd, dset->stobs);

    if (dset->sd0 >= 2.0) {
        dset->structure = TIME_SERIES; /* actual time series? */
    } else if (dset->sd0 > 1.0) {
      dset->structure = STACKED_TIME_SERIES; /* panel data? */
    } else {
      dset->structure = CROSS_SECTION;
    }

    dset->n = -1;
    dset->n = dateton(dset->endobs, dset) + 1;

    *binary = 0;
    dset->markers = NO_MARKERS;

    n = fscanf(fp, "%5s %7s", byobs, option);

    if (n == 1 && strcmp(byobs, "BYVAR") == 0) {
      *old_byvar = 1;
    } else if (n == 2) {
      if (strcmp(option, "SINGLE") == 0) {
          *binary = 1;
      } else if (strcmp(option, "BINARY") == 0) {
          *binary = 2;
      } else if (strcmp(option, "MARKERS") == 0) {
          dset->markers = 1;
      } else if (strcmp(option, "PANEL2") == 0) {
          panel = 1;
          dset->structure = STACKED_TIME_SERIES;
      } else if (strcmp(option, "PANEL3") == 0) {
          panel = 1;
          dset->structure = STACKED_CROSS_SECTION;
      }
    } 

    if (!panel && fscanf(fp, "%6s", option) == 1) {
      if (strcmp(option, "PANEL2") == 0) {
          dset->structure = STACKED_TIME_SERIES;
      } else if (strcmp(option, "PANEL3") == 0) {
          dset->structure = STACKED_CROSS_SECTION;
      }
    }

    if (fp != NULL) {
      fclose(fp);
    }

    /* last pass, to pick up data description */
    dset->descrip = NULL;
    if (descrip) {
      char *dbuf = NULL;
      int lines;

      fp = gretl_fopen(hdrfile, "r");
      if (fp == NULL) return 0;
      if ((lines = comment_lines(fp, &dbuf)) > 0) {
          delchar('\r', dbuf);
          dset->descrip = malloc(strlen(dbuf) + 1);
          if (dset->descrip != NULL) {
            strcpy(dset->descrip, dbuf);
          }
          free(dbuf);
      } else if (lines < 0) {
          fputs(I_("Failed to store data comments\n"), stderr);
      }
      fclose(fp);
    } 

    return 0;

    varname_error:

    fclose(fp);
    clear_datainfo(dset, CLEAR_FULL);

    return E_DATA;
}

static int bad_date_string (const char *s)
{
    int err = 0;

    gretl_error_clear();

    while (*s && !err) {
      if (!isdigit((unsigned char) *s) && !IS_DATE_SEP(*s)) {
          if (isprint((unsigned char) *s)) {
            gretl_errmsg_sprintf(_("Bad character '%c' in date string"), *s);
          } else {
            gretl_errmsg_sprintf(_("Bad character %d in date string"), *s);
          }
          err = 1;
      }
      s++;
    }

    return err;
}

static void maybe_unquote_label (char *targ, const char *src)
{
    if (*src == '"' || *src == '\'') {
      int n;

      strcpy(targ, src + 1);
      n = strlen(targ);
      if (n > 0 && (targ[n-1] == '"' || targ[n-1] == '\'')) {
          targ[n-1] = '\0';
      }
    } else {
      strcpy(targ, src);
    }
}

static int get_dot_pos (const char *s)
{
    int i, pos = 0;

    for (i=0; *s != '\0'; i++, s++) {
      if (IS_DATE_SEP(*s)) {
          pos = i;
          break;
      }
    }

    return pos;
}

#define DATES_DEBUG 0

static int match_obs_marker (const char *s, const DATASET *dset)
{
    char test[OBSLEN];
    int t;

#if DATES_DEBUG
    fprintf(stderr, "dateton: checking marker strings\n");
#endif

    maybe_unquote_label(test, s);

    for (t=0; t<dset->n; t++) {
      if (!strcmp(test, dset->S[t])) {
          /* handled */
          return t;
      }
    }

    if (isalpha(*s)) {
      /* try harder */
      int k = strlen(test);

      for (t=0; t<dset->n; t++) {
          if (!strncmp(test, dset->S[t], k)) {
            return t;
          }
      }
    }

    return -1;
}

static int 
real_dateton (const char *date, const DATASET *dset, int nolimit)
{
    int handled = 0;
    int t, n = -1;

    /* first check if this is calendar data and if so,
       treat accordingly */

    if (calendar_data(dset)) {
#if DATES_DEBUG
      fprintf(stderr, "dateton: treating as calendar data\n");
#endif
      if (dataset_has_markers(dset)) {
          /* "hard-wired" calendar dates as strings */
          for (t=0; t<dset->n; t++) {
            if (!strcmp(date, dset->S[t])) {
                /* handled */
                return t;
            }
          }
          /* try allowing for 2- versus 4-digit years? */
          if (strlen(dset->S[0]) == 10 &&
            (!strncmp(dset->S[0], "19", 2) || 
             !strncmp(dset->S[0], "20", 2))) {
            for (t=0; t<dset->n; t++) {
                if (!strcmp(date, dset->S[t] + 2)) {
                  /* handled */
                  return t;
                }
            }           
          }
          /* out of options: abort */
          return -1;
      } else {
          /* automatic calendar dates */
          n = calendar_obs_number(date, dset);
          handled = 1;
      } 
    } else if (dataset_is_daily(dset) ||
             dataset_is_weekly(dset)) {
#if DATES_DEBUG
      fprintf(stderr, "dateton: trying undated time series\n");
#endif
      t = positive_int_from_string(date);
      if (t > 0) {
          n = t - 1;
          handled = 1;
      }
    } else if (dataset_is_decennial(dset)) {
      t = positive_int_from_string(date);
      if (t > 0) {
          n = (t - dset->sd0) / 10;
          handled = 1;
      }     
    } else if (dataset_has_markers(dset)) {
      t = match_obs_marker(date, dset);
      if (t >= 0) {
          return t;
      }
      /* else maybe just a straight obs number */
      t = positive_int_from_string(date);
      if (t > 0) {
          n = t - 1;
          handled = 1;
      }
    }

    if (!handled) {
      int dotpos1, dotpos2;

#if DATES_DEBUG
      fprintf(stderr, "dateton: treating as regular numeric obs\n");
#endif
      if (bad_date_string(date)) {
          return -1;
      }

      dotpos1 = get_dot_pos(date);
      dotpos2 = get_dot_pos(dset->stobs);

      if ((dotpos1 && !dotpos2) || (dotpos2 && !dotpos1)) {
          gretl_errmsg_set(_("Date strings inconsistent"));
      } else if (!dotpos1 && !dotpos2) {
          n = atoi(date) - atoi(dset->stobs);
      } else {
          char majstr[5] = {0};
          char minstr[3] = {0};
          char majstr0[5] = {0};
          char minstr0[3] = {0};

          int maj, min;
          int maj0, min0;

          strncat(majstr, date, dotpos1);
          maj = atoi(majstr);
          strncat(minstr, date + dotpos1 + 1, 2);
          min = atoi(minstr);     

          strncat(majstr0, dset->stobs, dotpos2);
          maj0 = atoi(majstr0);
          strncat(minstr0, dset->stobs + dotpos2 + 1, 2);
          min0 = atoi(minstr0);
    
          n = dset->pd * (maj - maj0) + (min - min0);
      }
    }

    if (!nolimit && dset->n > 0 && n >= dset->n) {
      fprintf(stderr, "n = %d, dset->n = %d: out of bounds\n", n, dset->n);
      gretl_errmsg_set(_("Observation number out of bounds"));
      n = -1; 
    }

    return n;
}

/**
 * dateton:
 * @date: string representation of date for processing.
 * @dset: pointer to data information struct.
 * 
 * Determines the observation number corresponding to @date,
 * relative to @dset. It is an error if @date represents an 
 * observation that lies outside of the full data range 
 * specified in @dset.
 * 
 * Returns: zero-based observation number, or -1 on error.
 */

int dateton (const char *date, const DATASET *dset)
{
    return real_dateton(date, dset, 0);
}

/**
 * merge_dateton:
 * @date: string representation of date for processing.
 * @dset: pointer to data information struct.
 * 
 * Works just as dateton(), except that for this function it
 * is not an error if @date represents an observation that
 * lies beyond the data range specified in @dset. This is 
 * inended for use when merging data, or when creating a new
 * dataset.
 * 
 * Returns: zero-based observation number, or -1 on error.
 */

int merge_dateton (const char *date, const DATASET *dset)
{
    return real_dateton(date, dset, 1);
}

static char *panel_obs (char *s, int t, const DATASET *dset)
{
    int i = t / dset->pd + 1;
    int j = (t + 1) % dset->pd;
    int d = 1 + floor(log10(dset->pd));

    if (j == 0) {
      j = dset->pd;
    }

    sprintf(s, "%d:%0*d", i, d, j);

    return s;
}

/**
 * ntodate:
 * @datestr: char array to which date is to be printed.
 * @t: zero-based observation number.
 * @dset: data information struct.
 * 
 * Prints to @datestr (which must be at least #OBSLEN bytes)
 * the calendar representation of observation number @t.
 * 
 * Returns: the observation string.
 */

char *ntodate (char *datestr, int t, const DATASET *dset)
{
    double x;

#if 0
    fprintf(stderr, "real_ntodate: t=%d, pd=%d, sd0=%g\n",
          t, dset->pd, dset->sd0);
#endif

    if (calendar_data(dset)) {
      /* handles both daily and dated weekly data */
      if (dataset_has_markers(dset)) {
          strcpy(datestr, dset->S[t]);
      } else {
          calendar_date_string(datestr, t, dset);
      }
      return datestr;
    } else if (dataset_is_daily(dset) || 
             dataset_is_weekly(dset)) {
      /* undated time series */
      x = date(t, 1, dset->sd0);
      sprintf(datestr, "%d", (int) x);
      return datestr;
    } else if (dataset_is_decennial(dset)) {
      x = dset->sd0 + 10 * t;
      sprintf(datestr, "%d", (int) x);
      return datestr;
    } else if (dataset_is_panel(dset)) {
      panel_obs(datestr, t, dset);
      return datestr;
    }

    x = date(t, dset->pd, dset->sd0);

    if (dset->pd == 1) {
        sprintf(datestr, "%d", (int) x);
    } else {
      int pdp = dset->pd, len = 1;
      char fmt[8];

      while ((pdp = pdp / 10)) len++;
      sprintf(fmt, "%%.%df", len);
      sprintf(datestr, fmt, x);
      colonize_obs(datestr);
    }
    
    return datestr;
}

#define xround(x) (((x-floor(x))>.5)? ceil(x) : floor(x))

/**
 * get_subperiod:
 * @t: zero-based observation number.
 * @dset: data information struct.
 * @err: location to receive error code, or NULL.
 * 
 * For "seasonal" time series data (in a broad sense), 
 * determines the sub-period at observation @t. The "sub-period" 
 * might be a quarter, month, hour or whatever.  The value
 * returned is zero-based (e.g. first quarter = 0).
 * If the data are not "seasonal", 0 is returned and if
 * @err is non-NULL it receives a non-zero error code.
 * 
 * Returns: the sub-period.
 */

int get_subperiod (int t, const DATASET *dset, int *err)
{
    int ret = 0;

    if (!dataset_is_seasonal(dset)) {
      if (err != NULL) {
          *err = E_PDWRONG;
      }
      return 0;
    }

    if (dataset_is_weekly(dset)) {
      /* bodge -- what else to do? */
      ret = t % dset->pd;
    } else if (calendar_data(dset)) {
      /* dated daily data */
      char datestr[12];

      calendar_date_string(datestr, t, dset);
      ret = get_day_of_week(datestr); 
    } else if (dataset_is_daily(dset)) {
      /* bodge, again */
      ret = t % dset->pd;
    } else {
      /* quarterly, monthly, hourly... */
      double x = date(t, dset->pd, dset->sd0);
      int i, d = ceil(log10(dset->pd));

      x -= floor(x);
      for (i=0; i<d; i++) {
          x *= 10;
      }
      ret = xround(x) - 1;
    }
    
    return ret;    
}

static int blank_check (FILE *fp)
{
    int i, deflt = 1;
    char s[MAXLEN];

    for (i=0; i<3 && deflt && fgets(s, MAXLEN-1, fp); i++) {
      if (i == 0 && strncmp(s, "(*", 2)) {
          deflt = 0;
      } else if (i == 1 && strncmp(s, _("space for comments"), 18)) {
          deflt = 0;
      } else if (i == 2 && strncmp(s, "*)", 2)) {
          deflt = 0;
      }
    }

    fclose(fp);

    return deflt;
}

/**
 * get_info:
 * @hdrfile: name of data header file.
 * @prn: gretl printing struct.
 * 
 * print to @prn the informative comments contained in the given
 * data file (if any).
 * 
 * Returns: 0 on successful completion, non-zero on error or if there
 * are no informative comments.
 */

int get_info (const char *hdrfile, PRN *prn)
{      
    char s[MAXLEN];
    int i = 0;
    FILE *hdr;

    if ((hdr = gretl_fopen(hdrfile, "r")) == NULL) {
      pprintf(prn, _("Couldn't open %s\n"), hdrfile); 
      return 1;
    }

    /* see if it's just the default "space for comments" */
    if (blank_check(hdr)) { /* yes */
      pprintf(prn, _("No info in %s\n"), hdrfile);
      return 2;
    } 

    /* no, so restart the read */
    if ((hdr = gretl_fopen(hdrfile, "r")) == NULL) {
      pprintf(prn, _("Couldn't open %s\n"), hdrfile); 
      return 1;
    }    

    pprintf(prn, _("Data info in file %s:\n\n"), hdrfile);

    if (fgets(s, MAXLEN-1, hdr) != NULL && !strncmp(s, STARTCOMMENT, 2)) {
      do {
          if (fgets(s, MAXLEN-1, hdr) != NULL && strncmp(s, "*)", 2)) {
#ifndef WIN32
            delchar('\r', s);
#endif
            pputs(prn, s);
            i++;
          }
      } while (s != NULL && strncmp(s, ENDCOMMENT, 2));
    }

    if (i == 0) {
      pputs(prn, _(" (none)\n"));
    }

    pputc(prn, '\n');

    if (hdr != NULL) {
      fclose(hdr);
    }

    return 0;
}

static int writehdr (const char *hdrfile, const int *list, 
                 const DATASET *dset, int opt)
{
    FILE *fp;
    char startdate[OBSLEN], enddate[OBSLEN];
    int i, binary = 0;

    if (opt == GRETL_FMT_FLOAT) {
      binary = 1;
    } else if (opt == GRETL_FMT_DOUBLE) {
      binary = 2;
    }

    ntodate(startdate, dset->t1, dset);
    ntodate(enddate, dset->t2, dset);

    fp = gretl_fopen(hdrfile, "w");
    if (fp == NULL) {
      return 1;
    }

    /* write description of data set, if any */
    if (dset->descrip != NULL) {
      size_t len = strlen(dset->descrip);

      if (len > 2) {
          fprintf(fp, "(*\n%s%s*)\n", dset->descrip,
                (dset->descrip[len-1] == '\n')? "" : "\n");
      }
    }

    /* then list of variables */
    for (i=1; i<=list[0]; i++) {
      if (list[i] == 0) {
          continue;
      }
      fprintf(fp, "%s ", dset->varname[list[i]]);
      if (i && i <list[0] && (i+1) % 8 == 0) {
          fputc('\n', fp);
      }
    }  
  
    fputs(";\n", fp);

    /* then obs line */
    fprintf(fp, "%d %s %s\n", dset->pd, startdate, enddate);
    
    /* and flags as required */
    if (binary == 1) {
      fputs("BYVAR\nSINGLE\n", fp);
    } else if (binary == 2) {
      fputs("BYVAR\nBINARY\n", fp);
    } else { 
      fputs("BYOBS\n", fp);
      if (dset->markers) {
          fputs("MARKERS\n", fp);
      }
    }

    if (dset->structure == STACKED_TIME_SERIES) {
      fputs("PANEL2\n", fp);
    } else if (dset->structure == STACKED_CROSS_SECTION) {
      fputs("PANEL3\n", fp);
    }
    
    fclose(fp);

    return 0;
}

/**
 * get_precision:
 * @x: data vector.
 * @n: length of @x.
 * @placemax: the maximum number of decimal places to try.
 *
 * Find the number of decimal places required to represent a given
 * data series uniformly and accurately, if possible.
 * 
 * Returns: the required number of decimal places or
 * #PMAX_NOT_AVAILABLE if it can't be done.
 */

int get_precision (const double *x, int n, int placemax)
{
    int t, p, pmax = 0;
    char *s, numstr[64];
    int n_ok = 0;
    double z;

    for (t=0; t<n; t++) {
      if (na(x[t])) {
          continue;
      }

      n_ok++;
      z = fabs(x[t]);

      /* escape clause: numbers are too big or too small for
         this treatment */
      if (z > 0 && (z < 1.0e-6 || z > 1.0e+8)) {
          return PMAX_NOT_AVAILABLE;
      }

      p = placemax;
      sprintf(numstr, "%.*f", p, z);
      /* go to the end and drop trailing zeros */
      s = numstr + strlen(numstr) - 1;
      while (*s-- == '0') {
          p--;
      }
      if (p > pmax) {
          pmax = p;
      }
    }

    if (n_ok == 0) {
      pmax = PMAX_NOT_AVAILABLE;
    }

    return pmax;
}

gretlopt data_save_opt_from_suffix (const char *fname)
{
    gretlopt opt = OPT_NONE;

    if (has_suffix(fname, ".R")) {
      opt = OPT_R;
    } else if (has_suffix(fname, ".m")) {
      opt = OPT_M;
    } else if (has_suffix(fname, ".csv") ||
             has_suffix(fname, ".txt") ||
             has_suffix(fname, ".asc")) {
      opt = OPT_C;
    } 

    return opt;
}

static GretlDataFormat 
format_from_opt_or_name (gretlopt opt, const char *fname,
                   char *delim)
{
    GretlDataFormat fmt = 0;
    
    if (opt & OPT_T) {
      fmt = GRETL_FMT_TRAD;
    } else if (opt & OPT_M) {
      fmt = GRETL_FMT_OCTAVE;
    } else if (opt & OPT_R) {
      fmt = GRETL_FMT_R;
    } else if (opt & OPT_C) {
      fmt = GRETL_FMT_CSV;
    } else if (opt & OPT_Z) {
      fmt = GRETL_FMT_GZIPPED;
    } else if (opt & OPT_D) {
      fmt = GRETL_FMT_DB;
    } else if (opt & OPT_G) {
      fmt = GRETL_FMT_DAT;
    } else if (opt & OPT_J) {
      fmt = GRETL_FMT_JM;
    }

    if (fmt == 0) {
      if (has_suffix(fname, ".R")) {
          fmt = GRETL_FMT_R;
      } else if (has_suffix(fname, ".csv")) {
          fmt = GRETL_FMT_CSV;
      } else if (has_suffix(fname, ".m")) {
          fmt = GRETL_FMT_OCTAVE;
      } else if (has_suffix(fname, ".txt") ||
               has_suffix(fname, ".asc")) {
          fmt = GRETL_FMT_CSV;
          *delim = ' ';
      } 
    }

    return fmt;
}

static void date_maj_min (int t, const DATASET *dset, int *maj, int *min)
{
    char obs[OBSLEN];
    char *s;

    ntodate(obs, t, dset);

    *maj = atoi(obs);
    s = strchr(obs, ':');
    if (s != NULL && strlen(s) > 1) {
      *min = atoi(s + 1);
    } else {
      *min = 1;
    }
}

#define DEFAULT_CSV_DIGITS 12

#define annual_data(p) (p->structure == TIME_SERIES && p->pd == 1)

/**
 * write_data:
 * @fname: name of file to write.
 * @list: list of variables to write (or %NULL to write all series).
 * @dset: dataset struct.
 * @opt: option flag indicating format in which to write the data.
 * @progress: may be 1 when called from gui to display progress
 * bar in case of a large data write; generally should be 0.
 * 
 * Write out a data file containing the values of the given set
 * of variables.
 * 
 * Returns: 0 on successful completion, non-zero on error.
 */

int write_data (const char *fname, int *list, const DATASET *dset, 
            gretlopt opt, int progress)
{
    int i, t, v, l0;
    GretlDataFormat fmt;
    char datfile[MAXLEN], hdrfile[MAXLEN], lblfile[MAXLEN];
    int tsamp = sample_size(dset);
    int n = dset->n;
    int pop_locale = 0;
    char delim = 0;
    FILE *fp = NULL;
    int *pmax = NULL;
    int freelist = 0;
    int csv_digits = 0;
    double xx;
    int err = 0;

    gretl_error_clear();

    if (list != NULL && list[0] == 0) {
      return E_ARGS;
    }

    if (list == NULL) {
      list = full_var_list(dset, &l0);
      if (l0 == 0) {
          return E_ARGS;
      } else if (list == NULL) {
          return E_ALLOC;
      } else {
          freelist = 1;
      }
    }

    l0 = list[0];
    fmt = format_from_opt_or_name(opt, fname, &delim);
    fname = gretl_maybe_switch_dir(fname);

    if (fmt == 0 || fmt == GRETL_FMT_GZIPPED) {
      err = gretl_write_gdt(fname, list, dset, 
                        (fmt == GRETL_FMT_GZIPPED)? OPT_Z : OPT_NONE,
                        progress);
      goto write_exit;
    }

    if (fmt == GRETL_FMT_DB) {
      err = write_db_data(fname, list, opt, dset);
      goto write_exit;
    }

    if (fmt == GRETL_FMT_CSV && get_csv_delim(dset) == ',' && 
      ',' == dset->decpoint) {
      gretl_errmsg_set(_("You can't use the same character for "
                     "the column delimiter and the decimal point"));
      err = E_DATA;
      goto write_exit;
    }

    strcpy(datfile, fname);

    /* write header and label files if not exporting to other formats */
    if (fmt != GRETL_FMT_R && fmt != GRETL_FMT_CSV && 
      fmt != GRETL_FMT_OCTAVE && fmt != GRETL_FMT_DAT && 
      fmt != GRETL_FMT_JM) {
      if (!has_suffix(datfile, ".gz")) {
          switch_ext(hdrfile, datfile, "hdr");
          switch_ext(lblfile, datfile, "lbl");
      } else {
          gz_switch_ext(hdrfile, datfile, "hdr");
          gz_switch_ext(lblfile, datfile, "lbl");
      }
      if (writehdr(hdrfile, list, dset, fmt)) {
          fputs(I_("Write of header file failed"), stderr);
          err = E_FOPEN;
          goto write_exit;
      }
      if (writelbl(lblfile, list, dset)) {
          fputs(I_("Write of labels file failed"), stderr);
          err = E_FOPEN;
          goto write_exit;
      }
    }

    /* open file for output */
    fp = gretl_fopen(datfile, "w");
    if (fp == NULL) {
      err = E_FOPEN;
      goto write_exit;
    }

    if (fmt == GRETL_FMT_CSV || fmt == GRETL_FMT_OCTAVE || 
      GRETL_FMT_R || fmt == GRETL_FMT_TRAD || 
      fmt == GRETL_FMT_DAT || fmt == GRETL_FMT_JM) { 
      /* an ASCII variant of some sort */
      csv_digits = libset_get_int(CSV_DIGITS);

      pmax = malloc(l0 * sizeof *pmax);
      if (pmax == NULL) {
          fclose(fp);
          err = E_ALLOC;
          goto write_exit;
      }     

      if (csv_digits == 0) {
          /* the user has not over-riden the default */
          for (i=1; i<=l0; i++) {
            v = list[i];
            pmax[i-1] = get_precision(&dset->Z[v][dset->t1], tsamp, 
                                DEFAULT_CSV_DIGITS);
          }
          csv_digits = DEFAULT_CSV_DIGITS;
      } else {
          for (i=0; i<l0; i++) {
            pmax[i] = PMAX_NOT_AVAILABLE;
          }
      }
    }

    if (fmt != GRETL_FMT_CSV || dset->decpoint != ',') {
      gretl_push_c_numeric_locale();
      pop_locale = 1;
    }

    if (fmt == GRETL_FMT_TRAD) { 
      /* plain ASCII */
      for (t=dset->t1; t<=dset->t2; t++) {
          if (dataset_has_markers(dset)) {
            fprintf(fp, "%s ", dset->S[t]);
          }
          for (i=1; i<=l0; i++) {
            v = list[i];
            if (na(dset->Z[v][t])) {
                fprintf(fp, "-999 ");
            } else if (pmax[i-1] == PMAX_NOT_AVAILABLE) {
                fprintf(fp, "%.*g ", csv_digits, dset->Z[v][t]);
            } else {
                fprintf(fp, "%.*f ", pmax[i-1], dset->Z[v][t]);
            }
          }
          fputc('\n', fp);
      }
    } else if (fmt == GRETL_FMT_CSV || fmt == GRETL_FMT_R) { 
      /* export CSV or GNU R (dataframe) */
      char na_string[8] = "NA";
      int print_obs = 0;

      if (fmt == GRETL_FMT_CSV) {
          if ((dset->structure == TIME_SERIES || dset->S != NULL)
            && !(opt & OPT_X)) {
            print_obs = 1;
          }
          if (!delim) {
            delim = get_csv_delim(dset);
          }
          strcpy(na_string, get_csv_na_string());
      } else {
          print_obs = (dset->S != NULL);
          delim = ' ';
      }

      if (fmt == GRETL_FMT_R && dataset_is_time_series(dset)) {
          char datestr[OBSLEN];

          ntodate(datestr, dset->t1, dset);
          fprintf(fp, "# time-series data: start = %s, frequency = %d\n",
                datestr, dset->pd);
      }

      if (fmt == GRETL_FMT_CSV) {
          /* optional comment */
          const char *msg = get_optval_string(STORE, OPT_E);

          if (msg != NULL && *msg != '\0') {
            fprintf(fp, "# %s\n", msg);
          }
      }         

      if (fmt == GRETL_FMT_CSV && (opt & OPT_N)) {
          ; /* no header */
      } else {
          /* header: variable names */
          if (fmt == GRETL_FMT_CSV && print_obs && 
            (dset->S != NULL || dset->structure != CROSS_SECTION)) {
            fprintf(fp, "obs%c", delim);
          }
          for (i=1; i<l0; i++) {
            fprintf(fp, "%s%c", dset->varname[list[i]], delim);
          }
          fprintf(fp, "%s\n", dset->varname[list[l0]]);
      }
      
      for (t=dset->t1; t<=dset->t2; t++) {
          if (print_obs) {
            if (dset->S != NULL) {
                fprintf(fp, "\"%s\"%c", dset->S[t], delim);
            } else {
                char tmp[OBSLEN];

                ntodate(tmp, t, dset);
                if (quarterly_or_monthly(dset)) {
                  modify_date_for_csv(tmp, dset->pd);
                }
                fprintf(fp, "%s%c", tmp, delim);
            }
          }
          for (i=1; i<=l0; i++) { 
            v = list[i];
            xx = dset->Z[v][t];
            if (na(xx)) {
                fputs(na_string, fp);
            } else if (pmax[i-1] == PMAX_NOT_AVAILABLE) {
                fprintf(fp, "%.*g", csv_digits, xx);
            } else {
                fprintf(fp, "%.*f", pmax[i-1], xx);
            }
            if (i < l0) {
                fputc(delim, fp);
            } else {
                fputc('\n', fp);
            }
          }
      }
    } else if (fmt == GRETL_FMT_OCTAVE) { 
      /* GNU Octave: write out data as several matrices (one per
         series) in the same file */

      for (i=1; i<=list[0]; i++) {
          v = list[i];
          fprintf(fp, "# name: %s\n# type: matrix\n# rows: %d\n# columns: 1\n", 
                dset->varname[v], n);
          for (t=dset->t1; t<=dset->t2; t++) {
            xx = dset->Z[v][t];
            if (na(xx)) {
                fputs("NaN ", fp);
            } else      if (pmax[i-1] == PMAX_NOT_AVAILABLE) {
                fprintf(fp, "%.15g ", xx);
            } else {
                fprintf(fp, "%.*f ", pmax[i-1], xx); 
            }
            if (t == dset->t2 || t % 4 == 0) {
                fputc('\n', fp);
            }
          }
      }
    } else if (fmt == GRETL_FMT_DAT) { 
      /* PcGive: data file with load info */
      int pd = dset->pd;

      for (i=1; i<=list[0]; i++) {
          fprintf(fp, ">%s ", dset->varname[list[i]]);
          if (dset->structure == TIME_SERIES &&
            (pd == 1 || pd == 4 || pd == 12)) {
            int maj, min;

            date_maj_min(dset->t1, dset, &maj, &min);
            fprintf(fp, "%d %d ", maj, min);
            date_maj_min(dset->t2, dset, &maj, &min);
            fprintf(fp, "%d %d %d", maj, min, pd);
          } else {
            fprintf(fp, "%d 1 %d 1 1", dset->t1, dset->t2);
          }
                     
          fputc('\n', fp);

          for (t=dset->t1; t<=dset->t2; t++) {
            v = list[i];
            xx = dset->Z[v][t];
            if (na(xx)) {
                fprintf(fp, "-9999.99");
            } else if (pmax[i-1] == PMAX_NOT_AVAILABLE) {
                fprintf(fp, "%.*g", csv_digits, xx);
            } else {
                fprintf(fp, "%.*f", pmax[i-1], xx);
            }
            fputc('\n', fp);
          }
          fputc('\n', fp);
      }
    } else if (fmt == GRETL_FMT_JM) { 
      /* JMulti: ascii with comments and date info */
      int maj, min;

      fputs("/*\n", fp);
      for (i=1; i<=list[0]; i++) {
          v = list[i];
          fprintf(fp, " %s: %s\n", dset->varname[v], VARLABEL(dset, v));
      }
      fputs("*/\n", fp);
      date_maj_min(dset->t1, dset, &maj, &min);
      if (dset->pd == 4 || dset->pd == 12) {
          fprintf(fp, "<%d %c%d>\n", maj, (dset->pd == 4)? 'Q' : 'M', min);
      } else if (dset->pd == 1) {
          fprintf(fp, "<%d>\n", maj);
      } else {
          fputs("<1>\n", fp);
      }
      for (i=1; i<=list[0]; i++) {
          v = list[i];
          fprintf(fp, " %s", dset->varname[v]);
      }
      fputc('\n', fp);
      for (t=dset->t1; t<=dset->t2; t++) {
          for (i=1; i<=list[0]; i++) {
            v = list[i];
            if (na(dset->Z[v][t])) {
                fputs("NaN ", fp);
            } else if (pmax[i-1] == PMAX_NOT_AVAILABLE) {
                fprintf(fp, "%.*g ", csv_digits, dset->Z[v][t]);
            } else {
                fprintf(fp, "%.*f ", pmax[i-1], dset->Z[v][t]);
            }
          }
          fputc('\n', fp);
      }
    }

    if (pop_locale) {
      gretl_pop_c_numeric_locale();
    }

    if (pmax != NULL) {
      free(pmax);
    }

    if (fp != NULL) {
      fclose(fp);
    }

 write_exit:

    if (freelist) {
      free(list);
    }

    return err;
}

static int no_case_series_index (const DATASET *dset,
                         const char *vname)
{
    char s1[VNAMELEN], s2[VNAMELEN];
    int i;

    *s1 = '\0';
    strncat(s1, vname, VNAMELEN - 1);
    lower(s1);

    for (i=1; i<dset->v; i++) {
      strcpy(s2, dset->varname[i]);
      lower(s2);
      if (strcmp(s1, s2) == 0) {
          return i;
      }
    }

    return -1;
}

/* read data "labels" from file */

static int readlbl (const char *lblfile, DATASET *dset)
{
    FILE * fp;
    char line[MAXLEN], varname[VNAMELEN];
    char *p;
    int v;
    
    gretl_error_clear();

    fp = gretl_fopen(lblfile, "r");
    if (fp == NULL) {
      return 0;
    }

    while (fgets(line, MAXLEN, fp)) {
      tailstrip(line);
        if (sscanf(line, "%s", varname) != 1) {
          gretl_errmsg_sprintf(_("Bad data label in %s"), lblfile); 
            break;
        }
      v = series_index(dset, varname);
      if (v == dset->v) {
          v = no_case_series_index(dset, varname);
      }
      if (v > 0 && v < dset->v) {
          p = line + strlen(varname);
          p += strspn(p, " \t");
          VARLABEL(dset, v)[0] = '\0';
          strncat(VARLABEL(dset, v), p, MAXLABEL - 1);
      } else {
          fprintf(stderr, I_("extraneous label for var '%s'\n"), varname);
      }
    }

    fclose(fp);

    return 0;
}

static int writelbl (const char *lblfile, const int *list, 
                 const DATASET *dset)
{
    FILE *fp;
    int i, lblcount = 0;

    for (i=1; i<=list[0]; i++) {
      if (list[i] == 0) {
          continue;
      }
      if (strlen(VARLABEL(dset, list[i])) > 2) {
          lblcount++;
          break;
      }
    }

    if (lblcount == 0) return 0;

    fp = gretl_fopen(lblfile, "w");
    if (fp == NULL) return 1;

    /* spit out varnames and labels (if filled out) */
    for (i=1; i<=list[0]; i++) {
      if (list[i] == 0) {
          continue;
      }
      if (strlen(VARLABEL(dset, list[i])) > 2) {
          fprintf(fp, "%s %s\n", dset->varname[list[i]],
                VARLABEL(dset, list[i]));
      }
    }
    
    if (fp != NULL) fclose(fp);

    return 0;
}

/**
 * is_gzipped:
 * @fname: filename to examine.
 * 
 * Determine if the given file is gzipped.
 * 
 * Returns: 1 in case of a gzipped file, 0 if not gzipped or
 * inaccessible.
 * 
 */

int is_gzipped (const char *fname)
{
    FILE *fp;
    int gz = 0;

    if (fname == NULL || *fname == '\0') {
      return 0;
    }

    fp = gretl_fopen(fname, "rb");
    if (fp == NULL) {
      return 0;
    }

    if (fgetc(fp) == 037 && fgetc(fp) == 0213) {
      gz = 1;
    }

    fclose(fp);

    return gz;
}

/**
 * gz_switch_ext:
 * @targ: target or "output" filename (must be pre-allocated).
 * @src: source or "input" filename.
 * @ext: suffix to add to filename.
 * 
 * Copy @src filename to @targ, without the existing suffix (if any),
 * and adding the supplied extension or suffix.
 * 
 */

void gz_switch_ext (char *targ, char *src, char *ext)
{
    size_t i = dotpos(src), j = slashpos(src), k;

    strcpy(targ, src);
    targ[i] = '\0';

    k = dotpos(targ);
    if (j > 0 && k < strlen(targ) && k > j) {
      i = k;
    }

    targ[i] = '.';
    targ[i + 1] = '\0';
    strcat(targ, ext);
}

static void try_gdt (char *fname)
{
    char *suff;

    if (fname != NULL) {
      suff = strrchr(fname, '.');
      if (suff != NULL && !strcmp(suff, ".dat")) {
          strcpy(suff, ".gdt");
      } else {
          strcat(fname, ".gdt");
      }
    }
}

/**
 * gretl_get_data:
 * @fname: name of file to try.
 * @dset: dataset struct.
 * @opt: option flags.
 * @prn: where messages should be written.
 * 
 * Read "native" data from file into gretl's work space, 
 * allocating space as required. This function handles
 * both the current gretl XML data format and the
 * traditional data format of gretl's precursor, ESL.
 * It also handles incomplete information: it can perform 
 * path-searching on @fname, and will try adding the .gdt
 * extension to @fname if this is not given.
 *
 * A more straightforward function for reading a current
 * gretl XML data file (.gdt), given the correct path,
 * is gretl_read_gdt().
 *
 * The only applicable option is that @opt may contain
 * OPT_T when appending data to a panel dataset: in
 * that case we try to interpret the new data as time
 * series, in common across all panel units. In most
 * cases, just give OPT_NONE.
 * 
 * Returns: 0 on successful completion, non-zero otherwise.
 */

int gretl_get_data (char *fname, DATASET *dset, 
                gretlopt opt, PRN *prn) 
{
    DATASET *tmpset = NULL;
    FILE *dat = NULL;
    gzFile fz = NULL;
    char hdrfile[MAXLEN], lblfile[MAXLEN];
    int gdtsuff, gzsuff = 0;
    int binary = 0, old_byvar = 0;
    int err = 0;

    gretl_error_clear();

    *hdrfile = '\0';

    gdtsuff = has_suffix(fname, ".gdt");
    if (!gdtsuff) {
      gzsuff = has_suffix(fname, ".gz");
    }

    if (gretl_addpath(fname, 0) == NULL) { 
      /* not found yet */
      char tryfile[MAXLEN];
      int found = 0;

      if (!gdtsuff) {
          /* try using the .gdt suffix? */
          *tryfile = '\0';
          strncat(tryfile, fname, MAXLEN-1);
          try_gdt(tryfile); 
          found = (gretl_addpath(tryfile, 0) != NULL);
          if (found) {
            gdtsuff = 1;
          }
      }

      /* or maybe the file is gzipped but lacks a .gz extension?
         (backward compatibility) 
      */
      if (!found && !gzsuff) { 
          sprintf(tryfile, "%s.gz", fname);
          if (gretl_addpath(tryfile, 0) != NULL) {
            gzsuff = 1;
            found = 1;
          }
      }

      if (!found) {
          gretl_errmsg_sprintf(_("Couldn't open file %s"), fname);
          return E_FOPEN;
      } else {
          strcpy(fname, tryfile);
      }
    }

    /* catch XML files that have strayed in here? */
    if (gdtsuff && gretl_is_xml_file(fname)) {
      return gretl_read_gdt(fname, dset, OPT_NONE, prn);
    }

    tmpset = datainfo_new();
    if (tmpset == NULL) {
      return E_ALLOC;
    }
      
    if (!gzsuff) {
      switch_ext(hdrfile, fname, "hdr");
      switch_ext(lblfile, fname, "lbl");
    } else {
      gz_switch_ext(hdrfile, fname, "hdr");
      gz_switch_ext(lblfile, fname, "lbl");
    }

    /* try reading data header file */
    err = readhdr(hdrfile, tmpset, &binary, &old_byvar);
    if (err) {
      free(tmpset);
    }

    if (err == E_FOPEN) {
      /* no header file, so maybe it's just an ascii datafile */
      return import_csv(fname, dset, OPT_NONE, prn);
    } else if (err) {
      return err;
    } else { 
      pprintf(prn, I_("\nReading header file %s\n"), hdrfile);
    }

    /* deal with case where first col. of data file contains
       "marker" strings */
    tmpset->S = NULL;
    if (tmpset->markers && dataset_allocate_obs_markers(tmpset)) {
      return E_ALLOC; 
    }
    
    /* allocate dataset */
    if (allocate_Z(tmpset)) {
      err = E_ALLOC;
      goto bailout;
    }

    /* Invoke data (Z) reading function */
    if (gzsuff) {
      fz = gretl_gzopen(fname, "rb");
      if (fz == NULL) {
          err = E_FOPEN;
          goto bailout;
      }
    } else {
      if (binary) {
          dat = gretl_fopen(fname, "rb");
      } else {
          dat = gretl_fopen(fname, "r");
      }
      if (dat == NULL) {
          err = E_FOPEN;
          goto bailout;
      }
    }

    if (gzsuff) {
      err = gz_readdata(fz, tmpset, binary); 
      gzclose(fz);
    } else {
      err = readdata(dat, tmpset, binary, old_byvar); 
      fclose(dat);
    }

    if (err) goto bailout;

    if (tmpset->structure == STACKED_CROSS_SECTION) {
      err = switch_panel_orientation(tmpset);
    }

    if (err) goto bailout;

    /* print out basic info from the files read */
    pprintf(prn, I_("periodicity: %d, maxobs: %d\n"
         "observations range: %s-%s\n"), tmpset->pd, tmpset->n,
         tmpset->stobs, tmpset->endobs);

    pputs(prn, I_("\nReading "));
    pputs(prn, (tmpset->structure == TIME_SERIES) ? 
          I_("time-series") : _("cross-sectional"));
    pputs(prn, I_(" datafile"));
    if (strlen(fname) > 40) {
      pputc(prn, '\n');
    }
    pprintf(prn, " %s\n\n", fname);

    /* Set sample range to entire length of dataset by default */
    tmpset->t1 = 0; 
    tmpset->t2 = tmpset->n - 1;

    err = readlbl(lblfile, tmpset);
    if (err) goto bailout;

    err = merge_or_replace_data(dset, &tmpset, opt, prn);

 bailout:

    if (err && tmpset != NULL) {
      destroy_dataset(tmpset);
    }

    return err;
}

/**
 * open_nulldata:
 * @dset: dataset struct.
 * @data_status: indicator for whether a data file is currently open
 * in gretl's work space (1) or not (0).
 * @length: desired length of data series.
 * @prn: gretl printing struct.
 * 
 * Create an empty "dummy" data set, suitable for simulations.
 * 
 * Returns: 0 on successful completion, non-zero otherwise.
 *
 */

int open_nulldata (DATASET *dset, int data_status, int length,
               PRN *prn) 
{
    int t;

    /* clear any existing data info */
    if (data_status) {
      clear_datainfo(dset, CLEAR_FULL);
    }

    /* dummy up the data info */
    dset->n = length;
    dset->v = 2;
    dataset_obs_info_default(dset);

    if (dataset_allocate_varnames(dset)) {
      return E_ALLOC;
    }

    /* allocate dataset */
    if (allocate_Z(dset)) {
      return E_ALLOC;
    }

    /* add an index var */
    strcpy(dset->varname[1], "index");
    strcpy(VARLABEL(dset, 1), _("index variable"));
    for (t=0; t<dset->n; t++) {
      dset->Z[1][t] = (double) (t + 1);
    }

    /* print out basic info */
    pprintf(prn, M_("periodicity: %d, maxobs: %d\n"
         "observations range: %s-%s\n"), dset->pd, dset->n,
         dset->stobs, dset->endobs);

    /* Set sample range to entire length of data-set by default */
    dset->t1 = 0; 
    dset->t2 = dset->n - 1;

    return 0;
}

static int extend_markers (DATASET *dset, int old_n, int new_n)
{
    char **S = realloc(dset->S, new_n * sizeof *S);
    int t, err = 0;
         
    if (S == NULL) {
      err = 1;
    } else {
      dset->S = S;
      for (t=old_n; t<new_n && !err; t++) {
          S[t] = malloc(OBSLEN);
          if (S[t] == NULL) {
            err = 1;
          } 
      }
    }

    return err;
}

static void merge_error (char *msg, PRN *prn)
{
    pputs(prn, msg);
    gretl_errmsg_set(msg);
}

static int count_new_vars (const DATASET *dset, const DATASET *addinfo,
                     PRN *prn)
{
    const char *newname;
    /* default to all new, and subtract */
    int addvars = addinfo->v - 1;
    int i, j;

    for (i=1; i<addinfo->v && addvars >= 0; i++) {
      newname = addinfo->varname[i];
      if (get_matrix_by_name(newname)) {
          merge_error("can't replace matrix with series\n", prn);
          addvars = -1;
      } else if (get_string_by_name(newname)) {
          merge_error("can't replace string with series\n", prn);
          addvars = -1;
      } else {
          for (j=1; j<dset->v; j++) {
            /* FIXME collision with scalar, matrix names */
            if (!strcmp(newname, dset->varname[j])) {
                addvars--;
            }
          }
      }
    }

    return addvars;
}

static int compare_ranges (const DATASET *targ,
                     const DATASET *src,
                     int *offset)
{
    int ed0 = dateton(targ->endobs, targ);
    int sd1 = merge_dateton(src->stobs, targ);
    int ed1 = merge_dateton(src->endobs, targ);
    int addobs = -1;

#if 0
    fprintf(stderr, "compare_ranges:\n"
          " targ->n = %d, src->n = %d\n"
          " targ->stobs = '%s', src->stobs = '%s'\n" 
          " sd1 = %d, ed1 = %d\n",
          targ->n, src->n, targ->stobs, src->stobs,
          sd1, ed1);
#endif

    if (sd1 < 0) {
      /* case: new data start earlier than old */
      if (ed1 < 0) {
          fprintf(stderr, "no overlap in ranges, can't merge\n");
      } else if (ed1 > ed0) {
          fprintf(stderr, "new data start earlier, end later, can't handle\n");
      } else {
          *offset = sd1;
          addobs = 0;
      }
    } else if (sd1 == 0 && ed1 == ed0) {
      /* case: exact match of ranges */
      *offset = 0;
      addobs = 0;
    } else if (sd1 == 0) {
      /* case: starting obs the same */
      *offset = 0;
      if (ed1 > ed0) {
          addobs = ed1 - ed0;
      } else {
          addobs = 0;
      }
    } else if (sd1 == ed0 + 1) {
      /* case: new data start right after end of old */
      *offset = sd1;
      addobs = src->n;
    } else if (sd1 > 0) {
      /* case: new data start later than old */
      if (sd1 <= ed0) {
          /* but there's some overlap */
          *offset = sd1;
          if (ed1 > ed0) {
            addobs = ed1 - ed0;
          } else {
            addobs = 0;
          }
      }
    }

    if (addobs < 0) {
      fputs("compare_ranges: returning error\n", stderr);
    }

    return addobs;
}

/* When appending data to a current panel dataset, and the length of
   the series in the new data is less than the full panel size
   (n * T), try to determine if it's OK to expand the incoming data to
   match.

   We'll say it's OK if the new series length equals the panel T: in
   that case we'll take the new data to be time-series, which should
   be replicated for each panel unit.

   A second possibility arises if the length of the new series 
   equals the panel n: in that case we could treat it as a time-
   invariant characteristic of the panel unit, which should be
   replicated for each time period.  But note that if OPT_T is
   given, this second expansion is forbidden: the user has
   stipulated that the new data are time-varying.
*/

static int panel_expand_ok (DATASET *dset, DATASET *addinfo,
                      gretlopt opt)
{
    int n = dset->n / dset->pd;
    int T = dset->pd;
    int ok = 0;

    if (addinfo->n == T) {
      ok = 1;
    } else if (!(opt & OPT_T) &&
             addinfo->n == n && 
             addinfo->pd == 1) {
      ok = 1;
    }

    return ok;
}

static int panel_append_special (int addvars, 
                         DATASET *dset, 
                         DATASET *addset,
                         gretlopt opt,
                         PRN *prn)
{
    int n = dset->n / dset->pd;
    int T = dset->pd;
    int k = dset->v;
    int tsdata;
    int i, j, s, p, t;
    int err = 0;

    if (addvars > 0 && dataset_add_series(addvars, dset)) {
      merge_error(_("Out of memory!\n"), prn);
      err = E_ALLOC;
    }

    tsdata = ((opt & OPT_T) || addset->n != n);

    for (i=1; i<addset->v && !err; i++) {
      int v = series_index(dset, addset->varname[i]);

      if (v >= k) {
          /* a new variable */
          v = k++;
          strcpy(dset->varname[v], addset->varname[i]);
          copy_varinfo(dset->varinfo[v], addset->varinfo[i]);
      } 

      s = 0;
      for (j=0; j<n; j++) {
          /* loop across units */
          for (t=0; t<T; t++) {
            /* loop across periods */
            p = (tsdata)? t : j;
            dset->Z[v][s++] = addset->Z[i][p]; 
          }
      }
    }

    return err;
}

static int 
just_append_rows (const DATASET *targ, const DATASET *src,
              int *offset)
{
    if (targ->structure == CROSS_SECTION &&
      src->structure == CROSS_SECTION &&
      targ->markers == 0 && src->markers == 0 &&
      targ->sd0 == 1 && src->sd0 == 1) {
      *offset = targ->n;
      return src->n;
    } else {
      return 0;
    }
}

static int simple_range_match (const DATASET *targ, const DATASET *src,
                         int *offset)
{
    int ret = 0;

    if (src->pd == 1 && src->structure == CROSS_SECTION) {
      if (src->n == targ->n) {
          ret = 1;
      } else if (src->n == targ->t2 - targ->t1 + 1) {
          ret = 1;
          *offset = targ->t1;
      }
    }

    return ret;
}

#if 0
static int markers_are_ints (const DATASET *dset)
{
    char *test;
    int i;

    errno = 0;

    for (i=0; i<dset->n; i++) {
      strtol(dset->S[i], &test, 10);
      if (*test || errno) {
          errno = 0;
          return 0;
      }
    }

    return 1;
}
#endif

#define simple_structure(p) (p->structure == TIME_SERIES ||       \
                       p->structure == SPECIAL_TIME_SERIES ||     \
                       (p->structure == CROSS_SECTION &&          \
                        p->S == NULL))

/**
 * merge_data:
 * @dset: dataset struct.
 * @addset: dataset to be merged in.
 * @opt: may include OPT_T to force a time-series interpretation
 * when appending to a panel dataset.
 * @prn: print struct to accept messages.
 * 
 * Attempt to merge the content of a newly opened data file into
 * gretl's current working data set.  
 * 
 * Returns: 0 on successful completion, non-zero otherwise.
 */

static int merge_data (DATASET *dset, DATASET *addset,
                   gretlopt opt, PRN *prn)
{
    int dayspecial = 0;
    int addsimple = 0;
    int addpanel = 0;
    int addvars = 0;
    int addobs = 0;
    int offset = 0;
    int err = 0;

    /* first see how many new vars we have */
    addvars = count_new_vars(dset, addset, prn);
    if (addvars < 0) {
      return 1;
    }

    if (dated_daily_data(dset) && dated_daily_data(addset)) {
      fprintf(stderr, "special: merging daily data\n");
      dayspecial = 1;
    }

    /* below: had additional condition: simple_structure(dset)
       relaxed this on 2009-05-15 */

    if (simple_range_match(dset, addset, &offset)) {
      /* we'll allow undated data to be merged with the existing
         dateset, sideways, provided the number of observations
         matches OK */
      addsimple = 1;
    } else if (dataset_is_panel(dset) && 
             panel_expand_ok(dset, addset, opt)) {
      /* allow appending to panel when the number of obs matches
         either the cross-section size or the time-series length */
      addpanel = 1;
    } else if (dset->pd != addset->pd) {
      merge_error(_("Data frequency does not match\n"), prn);
      err = 1;
    }

    if (!err) {
      if (!addsimple && !addpanel) {
          addobs = compare_ranges(dset, addset, &offset);
      }
      if (addobs <= 0 && addvars == 0) {
          addobs = just_append_rows(dset, addset, &offset);
      }
    }

    if (!err && (addobs < 0 || addvars < 0)) {
      merge_error(_("New data not conformable for appending\n"), prn);
      err = 1;
    }

    if (!err && !addpanel && dset->markers != addset->markers) {
      if (addset->n != dset->n) {
          merge_error(_("Inconsistency in observation markers\n"), prn);
          err = 1;
      } else if (addset->markers && !dset->markers) {
          dataset_destroy_obs_markers(addset);
      }
    }

#if 0
    fprintf(stderr, "merge_data: addvars = %d, addobs = %d\n",
          addvars, addobs);
#endif

    /* if checks are passed, try merging the data */

    if (!err && addobs > 0) { 
      int i, t, new_n = dset->n + addobs;

      if (dset->markers) {
          err = extend_markers(dset, dset->n, new_n);
          if (!err) {
            for (t=dset->n; t<new_n; t++) {
                strcpy(dset->S[t], addset->S[t - offset]);
            }
          }
      }

      for (i=0; i<dset->v && !err; i++) {
          double *x;

          x = realloc(dset->Z[i], new_n * sizeof *x);
          if (x == NULL) {
            err = 1;
            break;
          }

          for (t=dset->n; t<new_n; t++) {
            if (i == 0) {
                x[t] = 1.0;
            } else {
                x[t] = NADBL;
            }
          }
          dset->Z[i] = x;
      }

      if (err) { 
          merge_error(_("Out of memory!\n"), prn);
      } else {
          dset->n = new_n;
          ntodate(dset->endobs, new_n - 1, dset);
          dset->t2 = dset->n - 1;
      }
    }

    if (!err && addpanel) {
      err = panel_append_special(addvars, dset, addset, 
                           opt, prn);
    } else if (!err) { 
      int k = dset->v;
      int i, t;

      if (addvars > 0 && dataset_add_series(addvars, dset)) {
          merge_error(_("Out of memory!\n"), prn);
          err = E_ALLOC;
      }

      for (i=1; i<addset->v && !err; i++) {
          int v = series_index(dset, addset->varname[i]);
          int newvar = 0;

          if (v >= k) {
            /* a new variable */
            v = k++;
            newvar = 1;
            strcpy(dset->varname[v], addset->varname[i]);
            copy_varinfo(dset->varinfo[v], addset->varinfo[i]);
          } 

          if (dayspecial) {
            char obs[OBSLEN];
            int s;

            for (t=0; t<dset->n; t++) {
                ntodate(obs, t, dset);
                s = dateton(obs, addset);
                if (s >= 0 && s < addset->n) {
                  dset->Z[v][t] = addset->Z[i][s];
                } else {
                  dset->Z[v][t] = NADBL;
                }
            }
          } else {
            for (t=0; t<dset->n; t++) {
                if (t >= offset && t - offset < addset->n) {
                  dset->Z[v][t] = addset->Z[i][t - offset];
                } else if (newvar) {
                  dset->Z[v][t] = NADBL;
                }
            }
          }
      }
    }

    if (!err && (addvars || addobs) && gretl_messages_on()) {
      pputs(prn, _("Data appended OK\n"));
    }

    return err;
}

/**
 * merge_or_replace_data:
 * @dset0: original dataset struct.
 * @pdset1: new dataset struct.
 * @opt: may include OPT_T when appending to a panel dataset,
 * to force a time-series interpretation of the added data.
 * @prn: print struct to accept messages.
 *
 * Given a newly-created dataset, pointed to by @pdset1, either 
 * attempt to merge it with @dset0, if the original data array 
 * is non-NULL, or replace the content of the original pointer
 * with the new dataset.
 *
 * In case merging is not successful, the new dataset is
 * destroyed.
 * 
 * Returns: 0 on successful completion, non-zero otherwise.
 */

int merge_or_replace_data (DATASET *dset0, DATASET **pdset1,
                     gretlopt opt, PRN *prn)
{
    int err = 0;

    if (dset0->Z != NULL) {
      err = merge_data(dset0, *pdset1, opt, prn);
      destroy_dataset(*pdset1);
    } else {
      *dset0 = **pdset1;
      free(*pdset1);
    }

    *pdset1 = NULL;

    return err;
}

static int check_imported_string (char *src, int i, size_t len)
{
    int err = 0;

    if (!g_utf8_validate(src, -1, NULL)) {
      gchar *trstr = NULL;
      gsize bytes;

      trstr = g_locale_to_utf8(src, -1, NULL, &bytes, NULL);

      if (trstr == NULL) {
          gretl_errmsg_sprintf("Invalid characters in imported string, line %d", i);
          err = E_DATA;
      } else {
          *src = '\0';
          strncat(src, trstr, len - 1);
          g_free(trstr);
      }
    }

    return err;
}

/**
 * add_obs_markers_from_file:
 * @dset: data information struct.
 * @fname: name of file containing case markers.
 * 
 * Read case markers (strings of %OBSLEN - 1 characters or less that identify
 * the observations) from a file, and associate them with the 
 * current data set.  The file should contain one marker per line,
 * with a number of lines equal to the number of observations in
 * the current data set.
 * 
 * Returns: 0 on successful completion, non-zero otherwise.
 */

int add_obs_markers_from_file (DATASET *dset, const char *fname)
{
    char **S = NULL;
    FILE *fp;
    char line[128], marker[32];
    int t, err = 0;

    fp = gretl_fopen(fname, "r");
    if (fp == NULL) {
      return E_FOPEN;
    }

    S = strings_array_new_with_length(dset->n, OBSLEN);
    if (S == NULL) {
      fclose(fp);
      return E_ALLOC;
    }
    
    for (t=0; t<dset->n && !err; t++) {
      if (fgets(line, sizeof line, fp) == NULL) {
          gretl_errmsg_sprintf("Expected %d markers; found %d\n", 
                         dset->n, t);
          err = E_DATA;
      } else if (sscanf(line, "%31[^\n\r]", marker) != 1) {
          gretl_errmsg_sprintf("Couldn't read marker on line %d", t+1);
          err = E_DATA;
      } else {
          g_strstrip(marker);
          strncat(S[t], marker, OBSLEN - 1);
          err = check_imported_string(S[t], t+1, OBSLEN);
      }
    }

    if (err) {
      free_strings_array(S, dset->n);
    } else {
      if (dset->S != NULL) {
          free_strings_array(dset->S, dset->n);
      } 
      dset->markers = REGULAR_MARKERS;
      dset->S = S;
    }

    return err;
}

/**
 * dataset_has_var_labels:
 * @dset: data information struct.
 * 
 * Returns: 1 if at least one variable in the current dataset
 * has a descriptive label, otherwise 0.
 */

int dataset_has_var_labels (const DATASET *dset)
{
    const char *label;
    int i, imin = 1;

    if (dset->v > 1) {
      if (!strcmp(dset->varname[1], "index") &&
          !strcmp(VARLABEL(dset, 1), _("index variable"))) {
          imin = 2;
      }
    }

    for (i=imin; i<dset->v; i++) {
      label = VARLABEL(dset, i);
      if (*label != '\0') {
          return 1;
      }
    }

    return 0;
}

/**
 * save_var_labels_to_file:
 * @dset: data information struct.
 * @fname: name of file containing labels.
 * 
 * Writes to @fname the descriptive labels for the series in
 * the current dataset.
 * 
 * Returns: 0 on successful completion, non-zero otherwise.
 */

int save_var_labels_to_file (const DATASET *dset, const char *fname)
{
    FILE *fp;
    int i, err = 0;

    fp = gretl_fopen(fname, "w");

    if (fp == NULL) {
      err = E_FOPEN;
    } else {
      for (i=1; i<dset->v; i++) {
          fprintf(fp, "%s\n", VARLABEL(dset, i));
      }
      fclose(fp);
    }

    return err;
}

/**
 * add_var_labels_from_file:
 * @dset: data information struct.
 * @fname: name of file containing labels.
 * 
 * Read descriptive variables for labels (strings of %MAXLABEL - 1 
 * characters or less) from a file, and associate them with the 
 * current data set.  The file should contain one label per line,
 * with a number of lines equal to the number of variables in
 * the current data set, excluding the constant.
 * 
 * Returns: 0 on successful completion, non-zero otherwise.
 */

int add_var_labels_from_file (DATASET *dset, const char *fname)
{
    FILE *fp;
    char line[256], label[MAXLABEL];
    char *targ;
    int nlabels = 0;
    int i, err = 0;

    fp = gretl_fopen(fname, "r");
    if (fp == NULL) {
      return E_FOPEN;
    }

    for (i=1; i<dset->v && !err; i++) {
      if (fgets(line, sizeof line, fp) == NULL) {
          break;
      } else if (sscanf(line, "%127[^\n\r]", label) != 1) {
          continue;
      } else {
          targ = VARLABEL(dset, i);
          g_strstrip(label);
          *targ = '\0';
          strncat(targ, label, MAXLABEL - 1);
          err = check_imported_string(targ, i+1, MAXLABEL);
          if (err) {
            *targ = '\0';
          } else {
            nlabels++;
          }
      }
    }

    if (!err && nlabels == 0) {
      gretl_errmsg_set("No labels found");
      err = E_DATA;
    }

    return err;
}

int read_or_write_var_labels (gretlopt opt, DATASET *dset, PRN *prn)
{
    const char *fname;
    int err;

    err = incompatible_options(opt, OPT_T | OPT_F); 
    if (err) {
      return err;
    }

    fname = get_optval_string(LABELS, opt);
    if (fname == NULL) {
      return E_BADOPT;
    }

    if (opt & OPT_T) {
      /* to-file */
      if (!dataset_has_var_labels(dset)) {
          pprintf(prn, "No labels are available for writing\n");
          err = E_DATA;
      } else {
          err = save_var_labels_to_file(dset, fname);
          if (!err && gretl_messages_on() && !gretl_looping_quietly()) {
            pprintf(prn, "Labels written OK\n");
          }
      }
    } else if (opt & OPT_F) {
      /* from-file */
      err = add_var_labels_from_file(dset, fname);
      if (!err && gretl_messages_on() && !gretl_looping_quietly()) {
          pprintf(prn, "Labels loaded OK\n");
      }     
    }

    return err;
}

static void 
octave_varname (char *name, const char *s, int nnum, int v)
{
    char nstr[8];
    int len, tr;

    if (nnum == 0) {
      strcpy(name, s);
    } else {
      sprintf(nstr, "%d", nnum);
      len = strlen(nstr);
      tr = VNAMELEN - len;

      if (tr > 0) {
          strncat(name, s, tr);
          strcat(name, nstr);
      } else {
          sprintf(name, "v%d", v);
      }
    }
}

static int get_max_line_length (FILE *fp, PRN *prn)
{
    int c, c1, cc = 0;
    int maxlen = 0;

    while ((c = fgetc(fp)) != EOF) {
      if (c == 0x0d) {
          /* CR */
          c1 = fgetc(fp);
          if (c1 == EOF) {
            break;
          } else if (c1 == 0x0a) {
            /* CR + LF -> LF */
            c = c1;
          } else {
            /* Mac-style: CR not followed by LF */
            c = 0x0a;
            ungetc(c1, fp);
          }
      }
      if (c == 0x0a) {
          if (cc > maxlen) {
            maxlen = cc;
          }
          cc = 0;
          continue;
      }
      if (!isspace((unsigned char) c) && !isprint((unsigned char) c) &&
          !(c == CTRLZ)) {
          pprintf(prn, M_("Binary data (%d) encountered: this is not a valid "
                     "text file\n"), c);
          return -1;
      }
      cc++;
    }

    if (maxlen == 0) {
      pprintf(prn, M_("Data file is empty\n"));
    } 

    if (maxlen > 0) {
      /* allow for newline and null terminator */
      maxlen += 3;
    }

    return maxlen;
}

static int import_octave (const char *fname, DATASET *dset, 
                    gretlopt opt, PRN *prn)
{
    DATASET *octset = NULL;
    FILE *fp = NULL;
    char *line = NULL;
    char tmp[8], name[32];
    int nrows = 0, ncols = 0, nblocks = 0;
    int brows = 0, bcols = 0, oldbcols = 0;
    int maxlen, got_type = 0, got_name = 0;
    int i, t, err = 0;

    pprintf(prn, "%s %s...\n", M_("parsing"), fname);

    maxlen = get_max_line_length(fp, prn);
    if (maxlen <= 0) {
      err = E_DATA;
      goto oct_bailout;
    }
 
    line = malloc(maxlen);
    if (line == NULL) {
      err = E_ALLOC;
      goto oct_bailout;
    }

    pprintf(prn, M_("   longest line: %d characters\n"), maxlen - 1);

    rewind(fp);

    while (fgets(line, maxlen, fp) && !err) {
      if (*line == '#') {
          if (!got_name) {
            if (sscanf(line, "# name: %31s", name) == 1) {
                got_name = 1;
                nblocks++;
                continue;
            }
          }
          if (!got_type) {
            if (sscanf(line, "# type: %7s", tmp) == 1) {
                if (!got_name || strcmp(tmp, "matrix")) {
                  err = 1;
                } else {
                  got_type = 1;
                }
                continue;
            }
          }
          if (brows == 0) {
            if (sscanf(line, "# rows: %d", &brows) == 1) {
                if (!got_name || !got_type || brows <= 0) {
                  err = 1;
                } else if (nrows > 0 && brows != nrows) {
                  err = 1;
                } else {
                  nrows = brows;
                }
                continue;
            }         
          } 
          if (bcols == 0) {
            if (sscanf(line, "# columns: %d", &bcols) == 1) {
                if (!got_name || !got_type || bcols <= 0) {
                  err = 1;
                } else {
                  ncols += bcols;
                  pprintf(prn, M_("   Found matrix '%s' with "
                              "%d rows, %d columns\n"), name, brows, bcols);
                }
                continue;
            }
          }
      } else if (string_is_blank(line)) {
          continue;
      } else {
          got_name = 0;
          got_type = 0;
          brows = 0;
          bcols = 0;
      }
    }

    if (err || nrows == 0 || ncols == 0) {
      pputs(prn, M_("Invalid data file\n"));
      err = E_DATA;
      goto oct_bailout;
    } 

    /* initialize datainfo and Z */

    octset = datainfo_new();
    if (octset == NULL) {
      pputs(prn, M_("Out of memory!\n"));
      err = E_ALLOC;
      goto oct_bailout;
    }

    octset->n = nrows;
    octset->v = ncols + 1;

    if (start_new_Z(octset, 0)) {
      pputs(prn, M_("Out of memory!\n"));
      err = E_ALLOC;
      goto oct_bailout;
    }  

    rewind(fp);

    pprintf(prn, M_("   number of variables: %d\n"), ncols);
    pprintf(prn, M_("   number of observations: %d\n"), nrows);
    pprintf(prn, M_("   number of data blocks: %d\n"), nblocks); 

    i = 1;
    t = 0;

    while (fgets(line, maxlen, fp) && !err) {
      char *s = line;
      int j;

      if (*s == '#') {
          if (sscanf(line, "# name: %15s", name) == 1) {
            ;
          } else if (sscanf(line, "# rows: %d", &brows) == 1) {
            t = 0;
          } else if (sscanf(line, "# columns: %d", &bcols) == 1) {
            i += oldbcols;
            oldbcols = bcols;
          }
      } 

      if (*s == '#' || string_is_blank(s)) {
          continue;
      }

      if (t >= octset->n) {
          err = 1;
      }

      for (j=0; j<bcols && !err; j++) {
          double x;
          int v = i + j;

          if (t == 0) {
            int nnum = (bcols > 1)? j + 1 : 0;

            octave_varname(octset->varname[i+j], name, nnum, v);
          }

          while (isspace(*s)) s++;
          if (sscanf(s, "%lf", &x) != 1) {
            fprintf(stderr, "error: '%s', didn't get double\n", s);
            err = 1;
          } else {
            octset->Z[v][t] = x;
            while (!isspace(*s)) s++;
          } 
      }
      t++;
    }

    if (err) {
      pputs(prn, M_("Invalid data file\n"));
      err = E_DATA;
      goto oct_bailout;
    } 

    err = merge_or_replace_data(dset, &octset, opt, prn);

 oct_bailout:

    if (fp != NULL) {
      fclose(fp);
    }

    if (line != NULL) {
      free(line);
    }

    if (octset != NULL) {
      clear_datainfo(octset, CLEAR_FULL);
    }

    console_off();

    return err;
}

/**
 * import_other:
 * @fname: name of file.
 * @ftype: type of data file.
 * @dset: pointer to dataset struct.
 * @opt: option flag; see gretl_get_data().
 * @prn: gretl printing struct.
 * 
 * Open a data file of a type that requires a special plugin.
 * 
 * Returns: 0 on successful completion, non-zero otherwise.
 */

int import_other (const char *fname, GretlFileType ftype,
              DATASET *dset, gretlopt opt, PRN *prn)
{
    void *handle;
    FILE *fp;
    int (*importer) (const char *, DATASET *, 
                 gretlopt, PRN *);
    int err = 0;

    check_for_console(prn);

    fp = gretl_fopen(fname, "r");
    if (fp == NULL) {
      pprintf(prn, M_("Couldn't open %s\n"), fname);
      err = E_FOPEN;
      goto bailout;
    }

    fclose(fp);

    if (ftype == GRETL_OCTAVE) {
      /* plugin not needed */
      return import_octave(fname, dset, opt, prn);
    }

    if (ftype == GRETL_WF1) {
      importer = get_plugin_function("wf1_get_data", &handle);
    } else if (ftype == GRETL_DTA) {
      importer = get_plugin_function("dta_get_data", &handle);
    } else if (ftype == GRETL_SAV) {
      importer = get_plugin_function("sav_get_data", &handle);
    } else if (ftype == GRETL_SAS) {
      importer = get_plugin_function("xport_get_data", &handle);
    } else if (ftype == GRETL_JMULTI) {
      importer = get_plugin_function("jmulti_get_data", &handle);
    } else {
      pprintf(prn, M_("Unrecognized data type"));
      pputc(prn, '\n');
      return E_DATA;
    }

    if (importer == NULL) {
        err = 1;
    } else {
      err = (*importer)(fname, dset, opt, prn);
      close_plugin(handle);
    }

 bailout:

    console_off();

    return err;
}

/**
 * import_spreadsheet:
 * @fname: name of file.
 * @ftype: type of data file.
 * @list: list of parameters for spreadsheet import, or NULL.
 * @sheetname: name of specific worksheet, or NULL.
 * @dset: dataset struct.
 * @opt: option flag; see gretl_get_data().
 * @prn: gretl printing struct.
 * 
 * Open a data file of a type that requires a special plugin.
 * Acceptable values for @ftype are %GRETL_GNUMERIC,
 * %GRETL_XLS, %GRETL_XLSX and %GRETL_ODS.
 * 
 * Returns: 0 on successful completion, non-zero otherwise.
 */

int import_spreadsheet (const char *fname, GretlFileType ftype, 
                  int *list, char *sheetname,
                  DATASET *dset, gretlopt opt, PRN *prn)
{
    void *handle;
    FILE *fp;
    int (*importer) (const char*, int *, char *,
                 DATASET *, gretlopt, PRN *);
    int err = 0;

    check_for_console(prn);

    fp = gretl_fopen(fname, "r");
    if (fp == NULL) {
      pprintf(prn, M_("Couldn't open %s\n"), fname);
      err = E_FOPEN;
      goto bailout;
    }

    fclose(fp);

    if (ftype == GRETL_GNUMERIC) {
      importer = get_plugin_function("gnumeric_get_data", &handle);
    } else if (ftype == GRETL_XLS) {
      importer = get_plugin_function("xls_get_data", &handle);
    } else if (ftype == GRETL_XLSX) {
      importer = get_plugin_function("xlsx_get_data", &handle);
    } else if (ftype == GRETL_ODS) {
      importer = get_plugin_function("ods_get_data", &handle);
    } else {
      pprintf(prn, M_("Unrecognized data type"));
      pputc(prn, '\n');
      return E_DATA;
    }

    if (importer == NULL) {
        err = 1;
    } else {
      err = (*importer)(fname, list, sheetname, dset, opt, prn);
      close_plugin(handle);
    }

 bailout:

    console_off();

    return err;
}

static int is_jmulti_datafile (const char *fname)
{
    FILE *fp;
    int ret = 0;

    fp = gretl_fopen(fname, "r");

    if (fp != NULL) {
      char test[128] = {0};
      int gotobs = 0;
      int gotcomm = 0;
      int incomm = 0;

      /* look for characteristic C-style comment and
         <obs stuff> field, outside of comment */

      while (fgets(test, sizeof test, fp)) {
          if (!incomm && strstr(test, "/*")) {
            gotcomm = 1;
            incomm = 1;
          }
          if (incomm && strstr(test, "*/")) {
            incomm = 0;
          }
          if (!incomm && *test == '<' && strchr(test, '>')) {
            gotobs = 1;
          }
          if (gotcomm && gotobs) {
            ret = 1;
            break;
          }
      } 
      fclose(fp);
    } 

    return ret;
}

/**
 * gretl_is_pkzip_file:
 * @fname: name of file to examine.
 * 
 * Returns: 1 if @fname is readable and is a PKZIP file,
 * else 0.
 */

int gretl_is_pkzip_file (const char *fname)
{
    FILE *fp;
    char test[3] = {0};
    int ret = 0;

    fp = gretl_fopen(fname, "rb");
    if (fp != NULL) {
      if (fread(test, 1, 2, fp) == 2) {
          if (!strcmp(test, "PK")) ret = 1;
      } 
      fclose(fp);
    } 

    return ret;
}

/**
 * detect_filetype:
 * @fname: name of file to examine.
 * @opt: include OPT_P to permit path-searching if @fname
 * is not an absolute path; in that case the @fname argument
 * may be modified, otherwise it will be left unchanged.
 * 
 * Attempt to determine the type of a file to be opened in gretl:
 * data file (of various formats), or command script. If OPT_P
 * is given, the @fname argument must be an array of length 
 * at least %MAXLEN.
 * 
 * Returns: integer code indicating the type of file.
 */

GretlFileType detect_filetype (char *fname, gretlopt opt)
{
    int i, c, ftype = GRETL_NATIVE_DATA;
    FILE *fp;

    /* might be a script file? (watch out for DOS-mangled names) */
    if (has_suffix(fname, ".inp")) { 
      return GRETL_SCRIPT;
    }

    if (has_suffix(fname, ".gretl")) {
      if (gretl_is_pkzip_file(fname)) {
          return GRETL_SESSION;
      } else {
          return GRETL_SCRIPT;
      }
    }

    if (has_suffix(fname, ".gnumeric"))
      return GRETL_GNUMERIC;
    if (has_suffix(fname, ".xlsx"))
      return GRETL_XLSX;
    if (has_suffix(fname, ".xls"))
      return GRETL_XLS;
    if (has_suffix(fname, ".ods"))
      return GRETL_ODS;
    if (has_suffix(fname, ".wf1"))
      return GRETL_WF1;
    if (has_suffix(fname, ".dta"))
      return GRETL_DTA;
    if (has_suffix(fname, ".sav"))
      return GRETL_SAV;
    if (has_suffix(fname, ".xpt"))
      return GRETL_SAS;
    if (has_suffix(fname, ".bin"))
      return GRETL_NATIVE_DB;
    if (has_suffix(fname, ".rat"))
      return GRETL_RATS_DB;
    if (has_suffix(fname, ".csv"))
      return GRETL_CSV;
    if (has_suffix(fname, ".txt"))
      return GRETL_CSV;
    if (has_suffix(fname, ".asc"))
      return GRETL_CSV;
    if (has_suffix(fname, ".m"))
      return GRETL_OCTAVE;
    if (has_suffix(fname, ".bn7"))
      return GRETL_PCGIVE_DB;

    if (opt & OPT_P) {
      gretl_addpath(fname, 0); 
    }

    if (gretl_is_xml_file(fname)) {
      return GRETL_XML_DATA;  
    } 

    if (has_suffix(fname, ".dat") && is_jmulti_datafile(fname)) {
      return GRETL_JMULTI; 
    }

    fp = gretl_fopen(fname, "r");
    if (fp == NULL) { 
      /* may be native file in different location */
      return GRETL_NATIVE_DATA; 
    }

    /* take a peek at content */
    for (i=0; i<80; i++) {
      c = getc(fp);
      if (c == EOF || c == '\n') {
          break;
      }
      if (!isprint(c) && c != '\r' && c != '\t') {
          ftype = GRETL_NATIVE_DATA; /* native binary data? */
          break;
      }
    }

    fclose(fp);

    return ftype;
}

/**
 * check_atof:
 * @numstr: string to check.
 *
 * Returns: 0 if @numstr is blank, or is a valid string representation
 * of a floating point number, else 1.
 */

int check_atof (const char *numstr)
{
    char *test;

    /* accept blank entries */
    if (*numstr == '\0') return 0;

    errno = 0;

    strtod(numstr, &test);

    if (*test == '\0' && errno != ERANGE) return 0;

    if (!strcmp(numstr, test)) {
      gretl_errmsg_sprintf(M_("'%s' -- no numeric conversion performed!"), numstr);
      return 1;
    }

    if (*test != '\0') {
      if (isprint(*test)) {
          gretl_errmsg_sprintf(M_("Extraneous character '%c' in data"), *test);
      } else {
          gretl_errmsg_sprintf(M_("Extraneous character (0x%x) in data"), *test);
      }
      return 1;
    }

    if (errno == ERANGE) {
      gretl_errmsg_sprintf(M_("'%s' -- number out of range!"), numstr);
    }

    return 1;
}

/**
 * check_atoi:
 * @numstr: string to check.
 *
 * Returns: 0 if @numstr is blank, or is a valid string representation
 * of an int, else 1.
 */

int check_atoi (const char *numstr)
{
    long int val;
    char *test;

    /* accept blank entries */
    if (*numstr == '\0') return 0;

    errno = 0;

    val = strtol(numstr, &test, 10);

    if (*test == '\0' && errno != ERANGE) return 0;

    if (!strcmp(numstr, test)) {
      gretl_errmsg_sprintf(M_("'%s' -- no numeric conversion performed!"), numstr);
      return 1;
    }

    if (*test != '\0') {
      if (isprint(*test)) {
          gretl_errmsg_sprintf(M_("Extraneous character '%c' in data"), *test);
      } else {
          gretl_errmsg_sprintf(M_("Extraneous character (0x%x) in data"), *test);
      }
      return 1;
    }

    if (errno == ERANGE || val <= INT_MIN || val >= INT_MAX) {
      gretl_errmsg_sprintf(M_("'%s' -- number out of range!"), numstr);
    }

    return 1;
}

static int transpose_varname_used (const char *vname, 
                           DATASET *dinfo,
                           int imax)
{
    int i;

    for (i=0; i<imax; i++) {
      if (!strcmp(vname, dinfo->varname[i])) {
          return 1;
      }
    }

    return 0;
}

/**
 * transpose_data:
 * @dset: pointer to dataset information struct.
 *
 * Attempts to transpose the current dataset, so that each
 * variable becomes interpreted as an observation and each
 * observation as a variable.
 *
 * Returns: 0 on success, non-zero error code on error.
 */

int transpose_data (DATASET *dset)
{
    DATASET *tset;
    int k = dset->n + 1;
    int T = dset->v - 1;
    int i, t;

    tset = create_new_dataset(k, T, 0);
    if (tset == NULL) {
      return E_ALLOC;
    }

    for (i=1; i<dset->v; i++) {
      for (t=0; t<dset->n; t++) {
          tset->Z[t+1][i-1] = dset->Z[i][t];
      }
    }

    for (t=0; t<dset->n; t++) {
      int k = t + 1;
      char *targ = tset->varname[k];

      if (dset->S != NULL && dset->S[t][0] != '\0') {
          int err;

          *targ = '\0';
          strncat(targ, dset->S[t], VNAMELEN - 1);
          charsub(targ, ' ', '_');
          err = check_varname(targ);
          if (err) {
            sprintf(targ, "v%d", k);
            gretl_error_clear();
          } else if (transpose_varname_used(targ, tset, k)) {
            sprintf(targ, "v%d", k);
          }
      } else {
          sprintf(targ, "v%d", k);
      }
    }

    free_Z(dset);
    dset->Z = tset->Z;

    clear_datainfo(dset, CLEAR_FULL);

    dset->v = k;
    dset->n = T;
    dset->t1 = 0;
    dset->t2 = dset->n - 1;

    dset->varname = tset->varname;
    dset->varinfo = tset->varinfo;

    dataset_obs_info_default(dset);

    free(tset);

    return 0;
}

void dataset_set_regular_markers (DATASET *dset)
{
    dset->markers = REGULAR_MARKERS;
}

03468 struct filetype_info {
    GretlFileType type;
    const char *src;
};

/**
 * dataset_add_import_info:
 * @dset: pointer to dataset information struct.
 * @fname: the name of a file from which data have been imported.
 * @type: code representing the type of the file identified by
 * @fname.
 *
 * On successful import of data from some "foreign" format,
 * add a note to the "descrip" member of the new dataset
 * saying where it came from and when.
 */

void dataset_add_import_info (DATASET *dset, const char *fname,
                        GretlFileType type)
{
    struct filetype_info ftypes[] = {
      { GRETL_CSV,      "CSV" },
      { GRETL_GNUMERIC, "Gnumeric" },
      { GRETL_XLS,      "Excel" },
      { GRETL_XLSX,     "Excel" },
      { GRETL_ODS,      "Open Document" },
      { GRETL_WF1,      "Eviews" },
      { GRETL_DTA,      "Stata" },
      { GRETL_SAV,      "SPSS" },
      { GRETL_SAS,      "SAS" },
      { GRETL_JMULTI,   "JMulTi" }
    };
    int i, nt = sizeof ftypes / sizeof ftypes[0];
    const char *src = NULL;
    gchar *note = NULL;
    char tstr[48];

    for (i=0; i<nt; i++) {
      if (type == ftypes[i].type) {
          src = ftypes[i].src;
          break;
      }
    }

    if (src == NULL) {
      return;
    }

    print_time(tstr);

    if (g_utf8_validate(fname, -1, NULL)) {
      const char *p = strrchr(fname, SLASH);

      if (p != NULL) {
          fname = p + 1;
      }
      note = g_strdup_printf("Data imported from %s file '%s', %s\n",
                         src, fname, tstr);
    } else {
      note = g_strdup_printf("Data imported from %s, %s\n",
                         src, tstr);
    }

    if (note != NULL) {
      if (dset->descrip == NULL) {
          dset->descrip = gretl_strdup(note);
      } else {
          int dlen = strlen(dset->descrip);
          int nlen = strlen(note);
          char *tmp = realloc(dset->descrip, dlen + nlen + 3);

          if (tmp != NULL) {
            dset->descrip = tmp;
            strcat(dset->descrip, "\n\n");
            strncat(dset->descrip, note, nlen);
          }
      }
      g_free(note);
    }
}

Generated by  Doxygen 1.6.0   Back to index