/*$Id: io.c,v 1.13 2007/01/25 10:02:10 mcv21 Exp $*/
/*
 * This file is part of the library of graph analysis and disease
 * simulation functions submitted along with the thesis "Spacial Spread
 * of Farm Animal Diseases" for the degree of Doctor of Philosophy at the
 * University of Cambridge. 
 *
 * The library is Copyright (C) 2007 Matthew Vernon <matthew@debian.org>
 *
 * This library is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License,
 * or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program (as gpl.txt); if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA
 */

/*This file contains code for reading in networks, or outputting them
 */

#include "gens.h"
#include "g_al.h"
#include "ferror.h"
#include "gsalgs.h"

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <ctype.h>
#include <errno.h>
#include <string.h>

static int *reverse_map(const int *map,const int maplen,const int len);

/*if bi is non-zero, bidirectionalise the graph, equivalent to making
 *it undirected; note that in this case the edge a->b will be counted as
 *a duplicate of the edge b->a, should both exist in the input file.
 *specifying G_ANY will get you the adjaceny-lists style of network.
 *If sort is non-zero, then the adjacency lists are sorted.
 */
struct gennet *edges_load(FILE *f, int *size, int **map, int *maplen,
			  int **revmap, int *edges, int *dup, 
			  const int bi, const int sort,
			  const net_desired_t type)
{
  struct gennet *g;
  struct adjlnet *h;
  struct adjl *l=NULL;
  char line[1024], *mid;
  long lt;
  int sfrom, sto,sbig,small; /*for each end of the edge*/
  int *sql=NULL,sqlmax=-1, sqlmin=0; 
  int n=0,e=0,d=0; /*how many nodes assigned,edges,duplicate edges*/
  int a,b; /*loop variable*/

  /*loop through the lines of the input file
   *reading in two numbers, which are beginning and end of edge
   *Both are checked in the lookup table (and added if needed)
   *sqlid==number in input file
   *gaga id==number in our internal representation
   *sql[x]==g where x is the sql number and g is the gaga number
   *finally an appropriate adjacency-list entry is created.
   *
   *There are a couple of complications. We keep track of the smallest-numbered
   *node we've encountered - this saves having to initialise great tracts of
   *memory (important when dealing with few high-numbered sql nodes, as often
   *seen in RADAR output.
   *
   *This strategy wins over the approach in hacky_edges_load by
   *being order n rather than order n^2 (looking up each sql->gaga
   *mapping is order 1), but if the sql numbers are very high, will
   *lead to the sql array becoming impractically large.
   */
  for(;;){
    if(NULL==fgets(line,1024,f)){
      if(ferror(f))
	fatal_error("Error reading data file",NULL,1);
      else break;
    }
    errno=0;
    lt=strtol(line,&mid,10);
    if(errno) fatal_error("Error parsing input line",line,1);
    if((lt>INT_MAX)||(lt<INT_MIN))
      fatal_error("Value without integer range",line,0);
    sfrom=(int)lt;
    if(isspace((int)*mid)) mid++;
    else fatal_error("No space in middle of line",line,0);
    errno=0;
    lt=strtol(mid,NULL,10);
    if(errno) fatal_error("Error parsing input line",line,1);
    if((lt>INT_MAX)||(lt<INT_MIN))
      fatal_error("Value without integer range",line,0);
    sto=(int)lt;
    /*Ignore self-loops*/
    if(sto==sfrom) continue;
    /*Ignore the unknown node, -1*/
    if((-1==sto)||(-1==sfrom)) continue;
    sbig=sto>=sfrom?sto:sfrom;
    small=sto>=sfrom?sfrom:sto;

    if(small<sqlmin){
      for(a=small;a<sqlmin;a++)
	sql[a]=-1;
      sqlmin=small;
    }

    if(sbig>sqlmax){ /*grow the sql array as necessary*/
      sql=xrealloc(sql,(sbig+1)*sizeof(*sql));
      if(sqlmin>sqlmax){ /*initial state*/
	for(a=small;a<(sbig+1);a++)
	  sql[a]=-1;
	sqlmin=small;
      }
      else
	for(a=(sqlmax+1);a<(sbig+1);a++)
	  sql[a]=-1;
      sqlmax=sbig;
    }
    if(sql[sto]==-1){
      sql[sto]=n;
      l=xrealloc(l,(n+1)*sizeof(*l));
      l[n].id=n; l[n].length=0; l[n].n=0; l[n].neighb=NULL;
      n++;
    }
    if(sql[sfrom]==-1){
      sql[sfrom]=n;
      l=xrealloc(l,(n+1)*sizeof(*l));
      l[n].id=n; l[n].length=0; l[n].n=0; l[n].neighb=NULL;
      n++;
    }
    if(adjl_add(&l[sql[sfrom]],sql[sto])) e++;
    else {e++; d++;}
    if(bi)adjl_add(&l[sql[sto]],sql[sfrom]);
  }
  
  *size=n;
  /*if map is non-NULL, set it to be the list of SQLids
   *We must also set elements 0->sqlmin to -1, otherwise
   *This will later confuse matters
   */
  if(map){
    for(a=0;a<sqlmin;a++) sql[a]=-1;
    *map=sql; 
  }else free(sql); /*avoid a leak*/
  if(maplen) *maplen=sqlmax+1;
  if(revmap) *revmap=reverse_map(sql,*maplen,n);
  if(edges) *edges=e;
  if(dup) *dup=d;

  if(G_ADJL==type || G_ANY==type){
    g=adjlist_create(0);
    h=(struct adjlnet *)g;
    h->n=n;
    h->net=l;
    h->sorted=0;
    if(sort) g->vtable->sort(g);
    return(g);
  }
  else if(G_BITM==type)
    g=bitmatrix_create(n);
  else if(G_INTM==type)
    g=intmatrix_create(n);
  else fatal_error("Unknown network type",NULL,0);
  for(a=0;a<n;a++)
    for(b=0;b<l[a].n;b++)
      g->vtable->set(g,a,l[a].neighb[b]);

  /*Avoid a memory leak*/
  adjlstar_free(l,n);

  return(g);
}

/*This is a very simplistic parser of UCINET's "DL" format; specifically
 *you must get the whole network output, without labels
 */
struct gennet *dl_load(FILE *f, int *size, const int binary, 
		       const net_desired_t type)
{
  char header[200];
  char *s, *line, *t;
  long n,l,a,b,tmpv;
  struct gennet *g;

  if(NULL==fgets(header,200,f)){
    if(ferror(f))
      fatal_error("Error reading dl file",NULL,1);
    else fatal_error("No data in dl file",NULL,1);
  }  

  if(strncasecmp(header,"DL",2))
    fatal_error("dl file lacks appropriate header",header,0);
  if(NULL==(s=(strstr(header,"n="))))
    fatal_error("syntax error: no \"n=\" found", header,0);
  /*advance two characters, so we're at the beginning of the number*/
  s+=2;
  errno=0;
  n=strtol(s,NULL,10);
  if(0!=errno)
    fatal_error("failed to read number of nodes",s,1);

  if(NULL==fgets(header,200,f)){
    if(ferror(f))
      fatal_error("Error reading dl file",NULL,1);
    else fatal_error("dl file lacks second line!",NULL,1);
  }
  if(strncasecmp("data:",header,5))
    fatal_error("Unexpected contents of second dl line",header,0);

  if(n>INT_MAX)
    fatal_error("n is greater than INT_MAX",NULL,0);
  *size=(int)n;

  if(G_ANY==type)
    g=intmatrix_create(n);
  else
    g=type_create(n,type);

  if(binary)
    l=(n*2)+2; /*digit+space for each node in each line and space for nl.*/
  else l=(n*5)+2; /*Allow space for 4-digit strengths...*/ 
  line=xmalloc(l*sizeof(*line));

  for(a=0L;a<n;a++){
    if(NULL==(fgets(line,l,f))){
      if(ferror(f))
	fatal_error("Error reading dl file",NULL,1);
      else 
	fatal_error("EOF reached too early",NULL,0);
    }
    s=line;
    b=0L;
    if(binary){
      for(;;){
	switch(*s){
	case '1': g->vtable->set(g,a,b); /*note fall-through*/
	case '0': b++;
	  break;
	case ' ': break; /*do nothing with a space*/
	default: fatal_error("Unexpected character in dl file",line,0);
	  break;
	}
	/*NB extra data on a line gets ignored*/
	s++; if(('\n'==*s)||('\r'==*s)){
	  if(b==n) break; /*done with this line*/
	  else fatal_error("not enough data on line",line,0);
	}
      }
    }else{
      for(b=0;b<n;b++){
	if(NULL==s) fatal_error("Out of data on line",line,0);
	errno=0;
	tmpv=(strtol(s,&t,10));
	if(ERANGE==errno) fatal_error("Number too large",s,1);
	if(t==s) fatal_error("Unable to parse line",s,0);
	if((tmpv>INT_MAX)||(tmpv<INT_MIN))
	  fatal_error("Value out of range",s,0);
	g->vtable->setval(g,a,b,((int)tmpv));
	s=t;
      }
    }
  }
  return(g);
}

/*returns -1 if printf fails - caller must check errno for reason why*/
int dl_output(const struct gennet *g, const int n, FILE *f)
{
  int a,b;

  if(0>(fprintf(f,"DL n=%d\ndata:\n",n))) return(-1);

  for(a=0;a<n;a++){
    for(b=0;b<n;b++)
      if(G_INTM==g->vtable->type){
	if(0>(fprintf(f,"%d ",g->vtable->test(g,a,b)))) return(-1);
      }else
	if(0>(fprintf(f,"%d ",g->vtable->test(g,a,b)?1:0))) return(-1);
    if(0>(fprintf(f,"\n"))) return(-1);
  }

  return(0);
}

/*returns -1 if printf fails - caller must check errno for reason why*/
int dd_output(const struct degdist *d, FILE *f)
{
  int i;

  for(i=0;i<d->n;i++)
    if(0>(fprintf(f,"%d %d\n",d->in[i],d->out[i]))) return(-1);
  return(0);
}

static int *reverse_map(const int *map,const int maplen,const int len)
{
  int x,*revmap;

  revmap=xcalloc(len,sizeof(int));
  for(x=0;x<maplen;x++)
    if(map[x]!=-1)
      revmap[map[x]]=x;
  return revmap;
}
