/*$Id: genalg.c,v 1.23 2007/01/25 10:02:10 mcv21 Exp $*/
/*
 * This file is part of the library of graph analysis and disease
 * simulation functions submitted along with the thesis "Spacial Spread
 * of Farm Animal Diseases" for the degree of Doctor of Philosophy at the
 * University of Cambridge. 
 *
 * The library is Copyright (C) 2007 Matthew Vernon <matthew@debian.org>
 *
 * This library is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License,
 * or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program (as gpl.txt); if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA
 */

/*This file contains graph algorithms that only use gens.h
 *structures and don't need to see inside the various 
 *representations
 */
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <math.h>

#include "ferror.h"
#include "gens.h"
#include "gsalgs.h"
#include "gads.h"

struct sccs{ /*strong component count state*/
  const struct gennet *g;
  int *visited;
  int *root;
  short int *incomponent;
  struct sll *stack;
  FILE *out;
}; 

static int rlinks(int *nodes,int n,const struct gennet *g,int cur);
static int scc_visit(struct sccs *s,int node,int vcount);

/*Populate the matrix with e edges
 *if sym is non-zero, generate a symmetric graph
 *This naive algorithm is apparantly better
 *than a more complex one except in pathological cases
 *(see Batagelj:2005 for details of that)
 */
struct gennet *rand_graph(struct gennet *g, const int n,
			  const int e,const int sym)
{
  int a,u,v;

  for(a=0;a<e;a++){
    u=(int)(random()%n);
    v=(int)(random()%n);    
    /*set returns 0 if the edge already exists*/
    if((!(g->vtable->set(g,u,v)))||(u==v)) a--;
    else if(sym) g->vtable->set(g,v,u);
  }
  return(g);
}

/*return an array of ints, being the shortest path to
 *each of the other nodes in the network starting from start
 */
int *dijkstra(const struct gennet *g, const int n, const int start)
{
  /*C99-ism*/
  struct fh_node *nodes[n], *u, *v;
  struct fh_heap *h;
  int a, *ans;

  h=fh_new_heap();

  /*initialise things*/
  for(a=0;a<n;a++){
    nodes[a]=fh_new_node();
    nodes[a]->id=a;
    if(start==a) nodes[a]->key=0;
    else nodes[a]->key=INT_MAX;
    fh_insert_node(nodes[a],h);
  }

  /*Dijkstra's algorithm (eg p527 of Introduction to Algorithms)
   *We are assuming an unweighted graph, so each edge is of unit weight
   */
  for(;;){
    u=fh_extract_min(h);
    if(NULL==u) break;
    for(a=0;a<n;a++){
      if(g->vtable->test(g,u->id,a)){
	v=nodes[a];
	if(u->key < ((v->key)-1))
	  fh_decrease_key(v,h,(u->key +1));
      }
    }
  }
      
  ans=xmalloc(n * sizeof(*ans));
  /*Free our nodes as we go along*/
  for(a=0;a<n;a++){ 
    ans[a]=nodes[a]->key;
    free(nodes[a]);
  }
  return(ans);
}

/*calculates betweenness centrality on an unweighted digraph
 *algorithm from Brandes(2001)
 *This is then scaled according to Freeman(1979) if normalise is non-zero
 *Note that if this is an undirected graph, you must divide by 2 again
 */
double *between(const struct gennet *g, const int n, const int normalise)
{
  /*C99 syntax*/
  struct sll p[n]; /*predessesor lists*/
  int sp[n]; /*number of shortest paths to node [x]*/
  int d[n]; /*distance to node [x]*/
  int a,b,v,w;
  double depend[n]; /*array of pair-dependancies*/
  double scale; /*(n^2-3n+2)/2*/
  struct stack *s;
  struct fifo_q *q;
  double *ans;

  ans=xmalloc(n * sizeof(*ans));

  for(a=0;a<n;a++)ans[a]=0.0;

  for(a=0;a<n;a++){
    /*initialize things*/
    s=new_stack(n); 
    for(b=0;b<n;b++){ 
      p[b].top=NULL;
      p[b].bottom=NULL;
      sp[b]=0;
      d[b]=-1; /*-1 === not reachable*/
      depend[b]=0.0;
    }
    sp[a]=1; /*there exists a shortest path to our starting point*/
    d[a]=0; /*And it is zero distance away*/
    q=fq_new_queue(n);
    
    fq_insert_node(q,a);
    while(-1!=(v=fq_remove_node(q))){ /*while q not empty*/
      push(s,v);
      for(w=g->vtable->next_neighb(g,v,-1);w!=-1;
	  w=g->vtable->next_neighb(g,v,w)){
	if(d[w]<0){ /*w found for first time?*/
	  fq_insert_node(q,w);
	  d[w]=d[v]+1;
	}
	if(d[w]==d[v]+1){ /*SP to w is via v?*/
	  sp[w]+=sp[v]; /*since there are now sp[v] more routes to w*/
	  sll_append(&p[w],v); /*v is a predessesor of w*/
	}
      }
    }
    /*s will provide vertices in order of non-increasing
     *distance from a
     */
    while(-1!=(w=pop(s))){
      while(-1!=(v=sll_remove(&p[w]))){
	depend[v]+=(((1.0+depend[w]) * sp[v])/sp[w]);
      }
	/*apart from the source, increase the betweenness value
	 *of every node according to its betweennees between
	 *the source and everything else*/
      if(w!=a) ans[w]+=depend[w];      
    }
    /*free memory, to prevent leaks*/
    free(s->s);
    free(s);
    free(q->queue);
    free(q);
  }
  /*Now scale the centrality values*/
  if(normalise){
    if(INT_MAX/n <= n)
      fatal_error("Normalising factor greater than INT_MAX",NULL,0);
    scale=(n*n - 3*n +2)/2.0;
    for(a=0;a<n;a++) ans[a]/=scale;
  }
  return(ans);
}

long double msp (const struct gennet *g, const int n)
{
  /*C99 syntax again*/
  long tot[n]; /*total paths for each starting vertex*/
  int howmany[n]; /*how many pairings from each starting vertex*/
  long gt=0L; /*grand total path length*/
  int tp=0; /*total number of pairs of vertices*/
  int *lengths;
  int a,b;
  long double dt; /*grand total as a double*/

  /*calculate all-pairs shortest paths, and sum each*/
  for(a=0;a<n;a++){
    tot[a]=0;
    lengths=dijkstra(g,n,a);
    howmany[a]=n-1; /*discount the a-a pair of nodes*/
    for(b=0;b<n;b++){
      if(INT_MAX==lengths[b]) howmany[a]--; /*no link exists*/
      else tot[a]+=lengths[b];
    }
    if((LONG_MAX-gt)<tot[a]){
      fprintf(stderr,"Error: LONG_MAX about to be exceeded, giving up\n");
      exit(1);
    }
    gt+=tot[a];
    if((INT_MAX-tp)<howmany[a]){
      fprintf(stderr,"Error: INT_MAX about to be exceeded, giving up\n");
      exit(1);
    }
    tp+=howmany[a];
  }
  dt=gt;
  if((long)dt != gt) fprintf(stderr,"Warning: %ld rounded to %ld\n",
			     gt,(long)dt);
  return(dt/tp);
}

/*calculates clustering coefficient for the network if exclude is
 *non-zero, then average over only nodes with valence >1, rather than
 *the whole network
 */
double cluster(const struct gennet *g, const int n, const int exclude)
{
  int a,b,n2=n;
  int neighbours[n];
  int links;
  int nodes[n];
  double clust=0L;
 
  for(a=0;a<n;a++){
    neighbours[a]=0;
    links=0;
    memset(&(nodes[0]),0,n*sizeof(*nodes));
    for(b=g->vtable->next_neighb(g,a,-1);b!=-1;
	b=g->vtable->next_neighb(g,a,b))
      {
	nodes[neighbours[a]]=b;
	links+=rlinks(&(nodes[0]),neighbours[a],g,neighbours[a]);
	neighbours[a]++;
      }
    /*There are n(n-1)/2 possible links between neighbours*/
    if(neighbours[a]>1){
      clust+=(links/((neighbours[a])*((neighbours[a])-1.0)));
    }
    else n2--;
  }
  if(exclude) return(clust/n2);
  else return(clust/n);
}

static int rlinks(int *nodes,int n,const struct gennet *g,int cur)
{
  /*XXX should 1 node be clustering of 1 or 0???*/
  if(0==n) return(0);
  else if(1==n) return
		  (g->vtable->test(g,nodes[cur],nodes[0])?1:0) + 
		  (g->vtable->test(g,nodes[0],nodes[cur])?1:0);
  else return((g->vtable->test(g,nodes[cur],nodes[n-1])?1:0)+
	      (g->vtable->test(g,nodes[n-1],nodes[cur])?1:0)+
	      rlinks(nodes,n-1,g,cur));
}

struct degdist *dd(const struct gennet *g,const int n)
{
  struct degdist *d;
  int i,j;

  d=xmalloc(sizeof(*d));
  d->in=xcalloc(n,sizeof(*d->in));
  d->out=xcalloc(n,sizeof(*d->out));
  d->n=n;

  for(i=0;i<n;i++)
    for(j=g->vtable->next_neighb(g,i,-1);j!=-1;j=g->vtable->next_neighb(g,i,j)){
      d->in[j]++;
      d->out[i]++;
    }
  return(d);
}

/*e is the number of edges*/
struct gennet *dd_gen(const int e,const struct degdist *d,
		      const net_desired_t type)
{
  struct gennet *g;
  int *in,nc,ec=0; /*array of indegrees, count of nodes/edges*/
  struct dll *out;
  struct dlln *tmp;
  int sl,dup; /*how many self-loops/duplicates*/
  int i,j,from;

  if(G_ANY==type) g=adjlist_create(d->n);
  else g=type_create(d->n,type);

  out=new_dll();
  in=xmalloc(e*sizeof(*in));

  for(nc=0;nc<d->n;nc++){
    for(i=0;i<d->in[nc];i++){
      in[ec]=nc; ec++;
    }
    for(i=0;i<d->out[nc];i++)
      dll_append(out,nc);
  }

  sl=0;dup=0;
  for(i=0;i<e;i++){
    from=floor((drand48()*(ec-1))+0.5);
    tmp=out->top;
    for(j=0;j<from;j++) tmp=tmp->next;
    if(tmp->x == in[i]){ /*avoid a self-loop*/
      i--; sl++; /*XXX should we make this configurable?*/
      if(sl>e) fatal_error("Excessive self-loops",NULL,0);
    }
    else{
      if(g->vtable->set(g,tmp->x,in[i])){
	dll_remove(out,tmp);
	ec--;
	sl=0; dup=0;
      }
      else{i--;dup++;
	if(dup>e) fatal_error("Excessive duplicate edges",NULL,0);
      }
    }
  }

  free(out); free(in);
  return(g);
}

/*Very similar in effect to rand_graph above, only uses drand48()
 *for random numbers, and generates the graph structure itself
 */
struct gennet *poisson_gen(const int n, const int e, const int sym,
			   const net_desired_t type)
{
  struct gennet *g;
  int a,u,v;

  if(G_ANY==type) g=adjlist_create(n);
  else g=type_create(n,type);

  for(a=0;a<e;a++){
    u=(int)(drand48()*n);
    v=(int)(drand48()*n);
    /*set returns 0 if the edge already exists*/
    if((!(g->vtable->set(g,u,v)))||(u==v)) a--;
    else if(sym) g->vtable->set(g,v,u);
  }
  return(g);
}

/*Generates a random scale-free network using
 *the preferential attachment model of Barabasi(1999)
 *This isn't a very efficient model
 *Calculating x,x_zero,t for desired final node and edge counts
 *is a little tricky - see my PhD thesis for how to do it.
 *[the below makes a couple of unsafe assumptions, but may work for you]
 *n=x0+t and e=xt => x0=n-e/x
 *The region of x,x0 values where x0>=x is bounded where x=x0
 * i.e. p=n-e/p => p^2-np+e=0 => p=(n+-sqrt(n^2-4e))/2
 * You want the lowest integer value of x that satisfies this, so 
 * take the negative root, and pick the next largest integer.e.g.:
 * p=sqrt(n*n - 4*e);
 * if(isnan(p)) fatal_error("sqrt failed",NULL,1);
 * x=ceil((n-p)/2.0);
 * t=floor((e/(double)x)+0.5); XXX use round()
 * x_zero=n-t;
 * fprintf(stderr,"%s using x=%d, xzero=%d, t=%d\n",argv[1],x,x_zero,t);
 * round() is better if you have a proper C99 implementation
 * If so, sqrtl() will handle larger graphs, too.
 */
struct gennet *barabasi_gen(const int x_zero, const int x, 
			    const int t, const net_desired_t type)
{
  struct gennet *g;
  int n;
  int a,b,c,u;
  int e=0;
  int *degree;
  n=x_zero;
 
  if(x>x_zero)
    fatal_error("x must be less than or equal to x_zero",NULL,0);

  if(G_ANY==type) g=adjlist_create(x_zero+t);
  else g=type_create(x_zero+t,type);
  degree=xcalloc(x_zero+t,sizeof(*degree));

  /*We start at node number x_zero, because the model begins with
   *x_zero unconnected nodes, and the first node is linked to
   *x of them
   */
  for(a=0;a<t;a++){
    n++;
    b=x_zero+a;
    /*special case - first node just gets added to the first x
     *other nodes*/
    if(0==e){
      for(c=0;c<x;c++){
	g->vtable->set(g,c,b); g->vtable->set(g,b,c); 
	degree[c]++; degree[b]++;
	e++;
      }
    }else{
      /*otherwise, we have x edges to add, with each likelyhood
       *being u->n/e*/
    for(c=0;c<x;)
      for(u=0;u<(n-1);u++){
	if((drand48())<=(((double)degree[u])/e)){
	  if(g->vtable->set(g,u,b)){
	    g->vtable->set(g,b,u);
	    c++; e++;
	    degree[b]++; degree[u]++;
	    if(c==x) break;
	  }
	}/*else{printf("[%d] %d,%d,%ld,%d\n",n,m[u]->n,e,ltmp,RAND_MAX);}*/
      }
    }
  }
  free(degree);
  return(g);
}

/*This algorithm is described in Nuutila:1993
 *It is "Algorithm 1" in that paper, as we need to be able to 
 *get at the nodes in each component, rather than merely the roots
 *of each component
 *
 *XXX COMMENT this!
 */

static int scc_visit(struct sccs *s,int node,int vcount)
{
  int vc, w,size;
  s->root[node]=node; s->incomponent[node]=0;
  s->visited[node]=vcount;
  vc=vcount+1;
  for(w=s->g->vtable->next_neighb(s->g,node,-1);w!=-1;
      w=s->g->vtable->next_neighb(s->g,node,w)){
    if(-1==s->visited[w])
      vc=scc_visit(s,w,vc);
    if(0==s->incomponent[w])
      if(s->visited[s->root[node]] > s->visited[s->root[w]]) 
	s->root[node]=s->root[w];
  }
  if(s->root[node]==node){
    size=1;
    s->incomponent[node]=1;
    while((s->stack->top==NULL?-1:s->visited[s->stack->top->x]) 
	  > s->visited[node]){
      w=sll_pop(s->stack);
      s->incomponent[w]=1;
      size++;
    }
    fprintf(s->out,"%d\n",size);
  }else sll_push(s->stack,node);
  return(vc);
}

void strong_component_count(const struct gennet *g, const int n,FILE *out)
{
  int visited[n]; /*c99-ism*/
  int root[n];
  short int incomponent[n];
  struct sccs s;
  int i,vc=0;

  for(i=0;i<n;i++){
    visited[i]=-1; root[i]=INT_MAX; incomponent[i]=0;
  }

  /*pack the scc state structure*/
  s.g=g; s.visited=visited; s.root=root; s.incomponent=incomponent;
  s.stack=new_sll(); s.out=out;

  for(i=0;i<n;i++)
    if(-1==visited[i]) vc=scc_visit(&s,i,vc);
}

struct gennet *bidirectionalise(const struct gennet *g, const int n)
{
  struct gennet *new;
  int i,j;
  new=type_create(n,g->vtable->type);

  for(i=0;i<n;i++)
    for(j=g->vtable->next_neighb(g,i,-1);j!=-1;
	j=g->vtable->next_neighb(g,i,j))
      {
	new->vtable->set(new,i,j);
	new->vtable->set(new,j,i);
      }
  return(new);
}

void weak_component_count(const struct gennet *g, const int n,FILE *out)
{
  struct gennet *bi;
  bi=bidirectionalise(g,n);
  strong_component_count(bi,n,out);
  bi->vtable->free(bi);
}
