MBG wiki
|
RecentChanges
|
Blog
|
2024-04-20
|
2024-04-19
Editing Comparison of sets of eigenvectors
If you have two independent trajectories (or, two independent pieces from the same trajectory), you may want to compare (quantitatively) the corresponding eigenvectors. The aim for such a comparison is to show (or otherwise) that the principal components determined from the two trajectories agree with each other (and, thus, that they may indeed correspond to the principal components of two sufficiently sampled trajectories). The most commonly used measure, is to calculate is the overlap (Hess (2000), Phys. Rev. E, 62, 8438, Amandei et al (1999), Proteins, 36, 419) : \[ overlap({\bf v}, {\bf w}) = \frac{1}{n} \sum_{i=1}^{n} \sum_{j=1}^{n} ({\bf v}_i \cdot {\bf w}_j )^2 \] <b>v</b> and *w* are are two sets of eigenvectors, each set containing *n* eigenvectors (the operation inside the summation is the dot product). The C program that follows implements the above equation. It will read a specified number of eigenvectors from two files, and it will calculate the overlap of the corresponding sub-spaces. The way to run the program is something in the spirit of <code> ./a.out carma.eigenvectors.1 carma.eigenvectors.2 number_of_eigenvectors </code> The files should contain one (unitary) eigenvector per column (as produced by carma). <code> #include <stdio.h> #include <math.h> #define MAXVECTORS 100 #define MAXVECTLEN 3000 #define MAXLINELEN 25000 main(argc, argv) int argc; char *argv[]; { FILE *file1, *file2; int n; int i, j, l, k1, k2; double inter, over; float eig1[MAXVECTORS][MAXVECTLEN]; float eig2[MAXVECTORS][MAXVECTLEN]; char line[MAXLINELEN]; /* Input parsing and file handling */ if ( argc != 4 ) { printf("Usage: eigenoverlap file1 file2 number_of_vectors\n"); exit(1); } file1 = fopen( argv[1], "r"); file2 = fopen( argv[2], "r"); if ( file1 == NULL || file2 == NULL ) { printf("Can not open input files.\n"); exit(1); } n = atoi( argv[3] ); printf("First set of vectors from file %s\n", argv[1] ); printf("Second set of vectors from file %s\n", argv[2] ); printf("Will calculate overlap between %d vectors\n", n ); k1 = 0; while( fgets(line, MAXLINELEN, file1) != NULL ) { i = 0; if ( sscanf( (char *)(strtok( line, " \t\n")), "%f", &eig1[i][k1] ) != 1 ) { printf("Not enough vectors in file %s\n", argv[1] ); exit( 1 ); } for ( i=1 ; i < n ; i++ ) { if ( sscanf( (char *)(strtok( NULL, " \t\n")), "%f", &eig1[i][k1] ) != 1 ) { printf("Not enough vectors in file %s\n", argv[1] ); exit( 1 ); } } k1++; } printf("Read %d vectors, each of length %d from file %s\n", n, k1, argv[1] ); k2 = 0; while( fgets(line, MAXLINELEN, file2) != NULL ) { i = 0; if ( sscanf( (char *)(strtok( line, " \t\n")), "%f", &eig2[i][k2] ) != 1 ) { printf("Not enough vectors in file %s\n", argv[2] ); exit( 1 ); } for ( i=1 ; i < n ; i++ ) { if ( sscanf( (char *)(strtok( NULL, " \t\n")), "%f", &eig2[i][k2] ) != 1 ) { printf("Not enough vectors in file %s\n", argv[2] ); exit( 1 ); } } k2++; } printf("Read %d vectors, each of length %d from file %s\n", n, k2, argv[2] ); if ( k1 != k2 ) { printf("Dimensions of vectors not equal. Abort.\n"); exit( 1 ); } /* The actual calculation */ over = 0.0; for ( i=0 ; i < n ; i++ ) for ( j=0 ; j < n ; j++ ) { inter = 0.0; for ( l=0 ; l < k1 ; l++ ) inter += (double)(eig1[i][l]) * (double)(eig2[j][l]); over += inter * inter; } over = over / (double)(n); printf("\nOverlap = %8.4f\n", (float)(over) ); } </code> If you would rather have it in perl : <code> #!/usr/bin/perl -w $v1=[]; $v2=[]; # File handling ( scalar( @ARGV) == 3 ) || die "\n\nUsage : eigenoverlap file1 file2 number_of_vectors\n"; open ( file1, $ARGV[0] ) || die "\n\nCan not open file $ARGV[0]\n"; open ( file2, $ARGV[1] ) || die "\n\nCan not open file $ARGV[1]\n"; $n = $ARGV[2]; print "First set of vectors from file $ARGV[0]\n"; print "Second set of vectors from file $ARGV[1]\n"; print "Will calculate overlap between $n vectors\n"; @eig1 = <file1>; @eig2 = <file2>; $k1 = scalar( @eig1 ); $k2 = scalar( @eig2 ); ( $k1 == $k2 ) || die "\n\nDimensions of vectors not equal\n"; # Place the data in matrices for easy of use for ( $i = 0 ; $i < $k1 ; $i++ ) { @words = split( ' ', $eig1[$i]); ( scalar( @words ) >= $n ) || die "\n\nNot enough vectors in file $ARGV[0]\n"; for ( $k = 0 ; $k < $n ; $k++ ) { $$v1[$i][$k] = $words[$k]; } } for ( $i = 0 ; $i < $k2 ; $i++ ) { @words = split( ' ', $eig2[$i]); ( scalar( @words ) >= $n ) || die "\n\nNot enough vectors in file $ARGV[1]\n"; for ( $k = 0 ; $k < $n ; $k++ ) { $$v2[$i][$k] = $words[$k]; } } # The actual calculation (re-writing C) $over = 0.0; for ( $i=0 ; $i < $n ; $i++ ) { for ( $j=0 ; $j < $n ; $j++ ) { $inter = 0.0; for ( $l=0 ; $l < $k1 ; $l++ ) { $inter += $$v1[$l][$i] * $$v2[$l][$j]; } $over += $inter * $inter; } } $over = $over / $n; printf "\n\nOverlap = %6.4f\n", $over; close( file2 ); close( file1 ); </code>
Summary:
This change is a minor edit.
Username:
Replace this text with a file.