The most commonly used measure, is to calculate is the overlap (Hess (2000), Phys. Rev. E, 62, 8438, Amandei et al (1999), Proteins, 36, 419) :
203d205cfrac7b17d7bn7d205csum_7bi3d17d5e7bn7d205csum_7bj3d17d5e7bn7d20(7b5cbf20v7d_i20205ccdot207b5cbf20w7d_j20)5e20a202424.png)
v and w are are two sets of eigenvectors, each set containing n eigenvectors (the operation inside the summation is the dot product).
The C program that follows implements the above equation. It will read a specified number of eigenvectors from two files, and it will calculate the overlap of the corresponding sub-spaces. The way to run the program is something in the spirit of
./a.out carma.eigenvectors.1 carma.eigenvectors.2 number_of_eigenvectors
The files should contain one (unitary) eigenvector per column (as produced by carma).
#include <stdio.h> #include <math.h> #define MAXVECTORS 100 #define MAXVECTLEN 3000 #define MAXLINELEN 25000 main(argc, argv) int argc; char *argv[]; { FILE *file1, *file2; int n; int i, j, l, k1, k2; double inter, over; float eig1[MAXVECTORS][MAXVECTLEN]; float eig2[MAXVECTORS][MAXVECTLEN]; char line[MAXLINELEN]; /* Input parsing and file handling */ if ( argc != 4 ) { printf("Usage: eigenoverlap file1 file2 number_of_vectors\n"); exit(1); } file1 = fopen( argv[1], "r"); file2 = fopen( argv[2], "r"); if ( file1 == NULL || file2 == NULL ) { printf("Can not open input files.\n"); exit(1); } n = atoi( argv[3] ); printf("First set of vectors from file %s\n", argv[1] ); printf("Second set of vectors from file %s\n", argv[2] ); printf("Will calculate overlap between %d vectors\n", n ); k1 = 0; while( fgets(line, MAXLINELEN, file1) != NULL ) { i = 0; if ( sscanf( (char *)(strtok( line, " \t\n")), "%f", &eig1[i][k1] ) != 1 ) { printf("Not enough vectors in file %s\n", argv[1] ); exit( 1 ); } for ( i=1 ; i < n ; i++ ) { if ( sscanf( (char *)(strtok( NULL, " \t\n")), "%f", &eig1[i][k1] ) != 1 ) { printf("Not enough vectors in file %s\n", argv[1] ); exit( 1 ); } } k1++; } printf("Read %d vectors, each of length %d from file %s\n", n, k1, argv[1] ); k2 = 0; while( fgets(line, MAXLINELEN, file2) != NULL ) { i = 0; if ( sscanf( (char *)(strtok( line, " \t\n")), "%f", &eig2[i][k2] ) != 1 ) { printf("Not enough vectors in file %s\n", argv[2] ); exit( 1 ); } for ( i=1 ; i < n ; i++ ) { if ( sscanf( (char *)(strtok( NULL, " \t\n")), "%f", &eig2[i][k2] ) != 1 ) { printf("Not enough vectors in file %s\n", argv[2] ); exit( 1 ); } } k2++; } printf("Read %d vectors, each of length %d from file %s\n", n, k2, argv[2] ); if ( k1 != k2 ) { printf("Dimensions of vectors not equal. Abort.\n"); exit( 1 ); } /* The actual calculation */ over = 0.0; for ( i=0 ; i < n ; i++ ) for ( j=0 ; j < n ; j++ ) { inter = 0.0; for ( l=0 ; l < k1 ; l++ ) inter += (double)(eig1[i][l]) * (double)(eig2[j][l]); over += inter * inter; } over = over / (double)(n); printf("\nOverlap = %8.4f\n", (float)(over) ); }
If you would rather have it in perl :
#!/usr/bin/perl -w $v1=[]; $v2=[]; # File handling ( scalar( @ARGV) == 3 ) || die "\n\nUsage : eigenoverlap file1 file2 number_of_vectors\n"; open ( file1, $ARGV[0] ) || die "\n\nCan not open file $ARGV[0]\n"; open ( file2, $ARGV[1] ) || die "\n\nCan not open file $ARGV[1]\n"; $n = $ARGV[2]; print "First set of vectors from file $ARGV[0]\n"; print "Second set of vectors from file $ARGV[1]\n"; print "Will calculate overlap between $n vectors\n"; @eig1 = <file1>; @eig2 = <file2>; $k1 = scalar( @eig1 ); $k2 = scalar( @eig2 ); ( $k1 == $k2 ) || die "\n\nDimensions of vectors not equal\n"; # Place the data in matrices for easy of use for ( $i = 0 ; $i < $k1 ; $i++ ) { @words = split( ' ', $eig1[$i]); ( scalar( @words ) >= $n ) || die "\n\nNot enough vectors in file $ARGV[0]\n"; for ( $k = 0 ; $k < $n ; $k++ ) { $$v1[$i][$k] = $words[$k]; } } for ( $i = 0 ; $i < $k2 ; $i++ ) { @words = split( ' ', $eig2[$i]); ( scalar( @words ) >= $n ) || die "\n\nNot enough vectors in file $ARGV[1]\n"; for ( $k = 0 ; $k < $n ; $k++ ) { $$v2[$i][$k] = $words[$k]; } } # The actual calculation (re-writing C) $over = 0.0; for ( $i=0 ; $i < $n ; $i++ ) { for ( $j=0 ; $j < $n ; $j++ ) { $inter = 0.0; for ( $l=0 ; $l < $k1 ; $l++ ) { $inter += $$v1[$l][$i] * $$v2[$l][$j]; } $over += $inter * $inter; } } $over = $over / $n; printf "\n\nOverlap = %6.4f\n", $over; close( file2 ); close( file1 );