/*$Id: gsalgs.h,v 1.7 2007/01/25 10:02:10 mcv21 Exp $*/
/*
 * This file is part of the library of graph analysis and disease
 * simulation functions submitted along with the thesis "Spacial Spread
 * of Farm Animal Diseases" for the degree of Doctor of Philosophy at the
 * University of Cambridge. 
 *
 * The library is Copyright (C) 2007 Matthew Vernon <matthew@debian.org>
 *
 * This library is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License,
 * or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program (as gpl.txt); if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA
 */

#ifndef GSALGS_H
#define GSALGS_H

#include <stdio.h>

#include "gens.h"
#include "census.h"

/***GENERATING FUNCTIONS***/

/*Very similar in effect to rand_graph below, only uses drand48()
 *for random numbers, and generates the graph structure itself
 */
struct gennet *poisson_gen(const int n, const int e, const int sym,
			   const net_desired_t type);

/*Generates a random scale-free network using
 *the preferential attachment model of Barabasi(1999)
 *This isn't a very efficient model
 *Calculating x,x_zero,t for desired final node and edge counts
 *is a little tricky:
 *n=x0+t and e=xt => x0=n-e/x
 *The region of x,x0 values where x0>=x is bounded where x=x0
 * i.e. p=n-e/p => p^2-np+e=0 => p=(n+-sqrt(n^2-4e))/2
 * You want the lowest integer value of x that satisfies this, so 
 * take the negative root, and pick the next largest integer.e.g.:
 * p=sqrt(n*n - 4*e);
 * if(isnan(p)) fatal_error("sqrt failed",NULL,1);
 * x=ceil((n-p)/2.0);
 * t=floor((e/(double)x)+0.5); XXX use round()
 * x_zero=n-t;
 * fprintf(stderr,"%s using x=%d, xzero=%d, t=%d\n",argv[1],x,x_zero,t);
 * round() is better if you have a proper C99 implementation
 * If so, sqrtl() will handle larger graphs, too.
 */
struct gennet *barabasi_gen(const int x_zero, const int x, 
			    const int t, const net_desired_t type);

/*generate a network with a particular degree distribution.
 *e is the number of edges
 */
struct gennet *dd_gen(const int e,const struct degdist *d,
		      const net_desired_t type);

/*rewire the network to match the dyad census dc,
 *whilst preserving the 2-dimensional degree distribution
 *uses adjacency-lists representation
 *NB! if you pass in another type of network, then the adjacency-list
 *form is returned, but the net you passed in IS NOT freed. If you
 *don't intend using that again, free it yourself!
 */
struct gennet *dyad_rewire(struct gennet *g,const int n, 
			   const dyad_count *dc);

/*Deprecated - use poisson_gen instead!
 *Populate the matrix with e edges
 *if sym is non-zero, generate a symmetric graph
 *This naive algorithm is apparantly better
 *than a more complex one except in pathological cases
 *(see Batagelj:2005 for details of that)
 */
struct gennet *rand_graph(struct gennet *g, const int n,
			  const int e,const int sym);

/***SIMULATION FUNCTION***/

/*Stochastic discrete-time simulation of disease (SIR model)
 *on the network g (with n nodes). istart nodes start infected
 *each I->S edge transmits infection with probability risk
 *per time point. Nodes remain infected for remain iterations,
 *and the simulation runs for t time points or until the infection has
 *died out (whichever is sooner).
 */
void sir_net(struct gennet *g, const int n, const int istart, 
	     const double risk, const int remain, const int t,
	     FILE *out);

/***I/O FUNCTIONS***/
/*Load in a network specified with one edge on a line:
 *to from\n <-- to and from are numbers
 *if bi is non-zero, bidirectionalise the graph, equivalent to making
 *it undirected; note that in this case the edge a->b will be counted as
 *a duplicate of the edge b->a, should both exist in the input file
 *specifying G_ANY will get you the adjaceny-lists style of network
 *If sort is non-zero, the adjacency-lists are sorted.
 */
struct gennet *edges_load(FILE *f, int *size, int **map, int *maplen,
			  int **revmap, int *edges, int *dup, 
			  const int bi, const int sort,
			  const net_desired_t type);

/*This is a very simplistic parser of UCINET's "DL" format; specifically
 *you must get the whole network output, without labels
 *specifying G_ANY will get you the integer-matrix style of network
 */
struct gennet *dl_load(FILE *f, int *size, const int binary, 
		       const net_desired_t type);

/*Outputs the network as a ucinet dl file.
 *returns -1 if printf fails - caller must check errno for reason why
 */
int dl_output(const struct gennet *g, const int n, FILE *f);

/*Output the 2-dimensional degree distribution of the network - 
 *each line contains the in-degree and out-degree of a node:
 * "%d %d\n",in[node],out[node]
 *returns -1 if printf fails - caller must check errno for reason why*/
int dd_output(const struct degdist *d, FILE *f);

/***ANALYSIS FUNCTIONS***/

/*return an array of ints, being the shortest path to
 *each of the other nodes in the network starting from start
 *This uses Dijkstra's shortest-path algorithm.
 */
int *dijkstra(const struct gennet *g, const int n, const int start);

/*calculate the mean shortest path between nodes on a network
 *this uses the dijkstra routine above.
 *
 */
long double msp (const struct gennet *g, const int n);

/*calculates betweenness centrality on an unweighted digraph
 *algorithm from Brandes(2001)
 *This is then scaled according to Freeman(1979) if normalise is non-zero
 *Note that if this is an undirected graph, you must divide by 2 again
 */
double *between(const struct gennet *g, const int n, const int normalise);

/*calculates clustering coefficient for the network if exclude is
 *non-zero, then average over only nodes with valence >1, rather than
 *the whole network
 */
double cluster(const struct gennet *g, const int n, const int exclude);

/*measure the degree distribution of the network*/
struct degdist *dd(const struct gennet *g,const int n);

/*This implements the algorithm in Batagelj:2001
 *And should run in O(m) time, where m is the number of edges
 *it requires an adjacency-list-style network structure
 *so the first step is to convert the passed-in network 
 *if necessary
 */
void triad_census(struct gennet *g, const int n, triad_count tc);

/*This also needs an adjacency-style network structure
 *this approach is quicker than simply iterating over every
 *pair of nodes. instead for each node >0, we consider the
 *in- and out- node lists of nodes <that node:
 *M==|in&&out| and A==|inXORout|
 */
void dyad_census(struct gennet *g, const int n, dyad_count *dc);


/*returns a version of g with each edge made bidirectional*/
struct gennet *bidirectionalise(const struct gennet *g, const int n);

/*Output the size of each strong component to out.
 *This uses the algorithm of Nuutila:1993, which is 
 *reasonably fast but uses a lot of stack; you may need to check
 *ulimit if this crashes for you.
 */
void strong_component_count(const struct gennet *g, const int n,FILE *out);
/*Output the size of each weak component to out.
 *This works by bidiredctionalising every edge, and then calling
 *strong_component_count on the result
 */
void weak_component_count(const struct gennet *g, const int n,FILE *out);


#endif /*GSALGS_H*/
