MBG wiki | RecentChanges | Blog | 2019-11-22 | 2019-11-21

Secondary structure analysis of protein trajectories

vmd plus a few tricks :

# Cache secondary structure information for a given molecule

#VMD  --- start of VMD description block
#Name:
# SSCache
#Synopsis:
# Automatically stores secondary structure information for animations
#Version:
# 1.0
#Uses VMD version:
# 1.1
#Ease of use:
# 2
#Procedures:
# <li>start_sscache molid - start caching the given molecule
# <li>stop_sscache molid - stop caching
# <li>reset_sscache - reset the cache
# <li>sscache - internal function used by trace
#Description:
# Calculates and stores the secondary structure assignment for 
# each timestep.  This lets you see how the secondary structure
# changes over a trajectory.
# <p>
# It is turned on with the command "start_sscache> followed by the
# molecule number of the molecule whose secondary structure should be
# saved (the default is "top", which gets converted to the correct
# molecule index).  Whenever the frame for that molecule changes, the
# procedure "sscache" is called.
# <p>
#   "sscache" is the heart of the script.  It checks if a secondary
# structure definition for the given molecule number and frame already
# exists in the Tcl array sscache_data(molecule,frame).  If so, it uses
# the data to redefine the "structure" keyword values (but only for
# the protein residues).  If not, it calls the secondary structure
# routine to evaluate the secondary structure based on the new
# coordinates.  The results are saved in the sscache_data array.
# <p>
# Once the secondary structure values are saved, the molecule can be
# animated rather quickly and the updates can be controlled by the
# animate form.
# <p>
#  To turn off the trace, use the command "stop_sscache", which
# also takes the molecule number.  There must be one "stop_sscache"
# for each "start_sscache".  The command "clear_sscache" resets
# the saved secondary structure data for all the molecules and all the
# frames.
#Files: 
# <a href="sscache.vmd">sscache.vmd</a>
#See also:
# the VMD user's guide
#Author: 
# Andrew Dalke <dalke@ks.uiuc.edu>
#Url: 
# http://www.ks.uiuc.edu/Research/vmd/script_library/sscache/
#\VMD  --- end of block


# start the cache for a given molecule
proc start_sscache {{molid top}} {
    global sscache_data
    if {! [string compare $molid top]} {
        set molid [molinfo top]
    }
    global vmd_frame
    # set a trace to detect when an animation frame changes
    trace variable vmd_frame($molid) w sscache

    return
}

# remove the trace (need one stop for every start)
proc stop_sscache {{molid top}} {
    if {! [string compare $molid top]} {
        set molid [molinfo top]
    }
    global vmd_frame
    trace vdelete vmd_frame($molid) w sscache
    return
}


# reset the whole secondary structure data cache
proc reset_sscache {} {
    if [info exists sscache_data] {
        unset sscache_data
    }
    return
}

# when the frame changes, trace calls this function
proc sscache {name index op} {
    # name == vmd_frame
    # index == molecule id of the newly changed frame
    # op == w
    
    global sscache_data

    # get the protein CA atoms
    set sel [atomselect $index "name CA"]

    ## get the new frame number
    # Tcl doesn't yet have it, but VMD does ...
    set frame [molinfo $index get frame]

    # see if the ss data exists in the cache
    if [info exists sscache_data($index,$frame)] {
        $sel set structure $sscache_data($index,$frame)
        return
    }


    # doesn't exist, so (re)calculate it
    vmd_calculate_structure $index
    # save the data for next time
    set sscache_data($index,$frame) [$sel get structure]

    set listfile [open ss_traj.dat a+]

    set sel [atomselect 0 "name CA"] 
    set structList [$sel get structure] 
    puts $listfile "[format "%10d" $frame] $structList"
    
    close $listfile
    
    return
}

source ss_traj.tcl
start_sscache

ss_plot < ss_traj.dat > test.ps
gv test.ps

 ss_plot image

You can also use the ss_traj.dat file to make a nice sequence logo showing the secondary structure variability per sequence position :

 ss_logo image

This you do as follows :

>                                                                     
>
TTTTTEEEEEEEEEEEEEEHHHHHHCCTTEEEEECCCTTTEEEEEETTEEEEEEEEEEETTEEEEEEEEE...
>
TTTTTEEEEEEEEEEEEEEHHHHHHCCCCEEEEETTTTTTEEEEEETTEEEEEEEEEEETTEEEEEEEEE...
>
TTTTTEEEEEEEEEEEEEEHHHHHHCCCCEEEEETTTTTTEEEEEETTEEEEEEEEEEETTEEEEEEEEE...
>
TTTTTEEEEEEEEEEEEEEHHHHHHCCCCEEEEECCCTTTEEEEEETTEEEEEEEEEEETTEEEEEEEEE...
>
TTTTTEEEEEEEEEEEEEEHHHHHHCCCCEEEEEBTTTTTEEEEEETTEEEEEEEEEEETTEEEEEEEEE...
>
TTTTTEEEEEEEEEEEEEEHHHHHHCCCCEEEEECCCTTTEEEEEETTEEEEEEEEEEETTEEEEEEEEE...
>
....

/usr/local/weblogo/seqlogo -F PNG -c -Y -C 40 -w 20 -f myfile.dat > logo.png
ee logo.png


If you do not like the colors of ss_plot

Get the source code (ss_plot.c) :

#include <stdio.h>
#include <math.h>
#include <ctype.h>


#define         HCOL    160,32,240      /* Alpha helix */
#define         GCOL    203,78,97       /* 3-10 helix */
#define         ICOL    255,0,0         /* pi helix */
#define         ECOL    255,255,0       /* extended */
#define         BCOL    210,180,140     /* Bridge */
#define         TCOL    0,255,255       /* Turn */
#define         CCOL    255,255,255     /* Coil */


main()
{

        char    data[30000][500];
        char    c;
        int     frame;
        int     residue;
        int     nofres;
        float   rel_scale;
        float   abs_scale;
        int     count;
        int     gm, gf;



        frame = 0;
        residue = 0;
        nofres = 0;
        while( scanf("%c", &c) != EOF  )
                {
                        if ( c == '\n' )
                                {
                                        frame++;
                                        
                                        if ( !nofres )
                                                nofres = residue;
                                        else
                                                if ( residue != nofres )
                                                        {
                                                                printf("Varying number of residues ? Abort.\n");
                                                                exit(1);
                                                        }       
                                        
                                        residue = 0;
                                }

                        if ( isalpha( (int)(c) ) )
                                data[frame][residue++] = c;
                }



        abs_scale = 550.0;
        rel_scale =  (frame) / (float)(nofres);
        if ( abs_scale * rel_scale > 841 )
                abs_scale = 800.0 / rel_scale;
         

        printf("%%!PS-Adobe\n");
        printf("%%%% From ss_plot.\n");
        printf("/M { moveto } def\n/C { lineto stroke newpath } def\n");
        printf("0.8 setlinewidth\n");
        printf("22.5 %f translate\n", (841 - abs_scale * rel_scale) /2.0 );
        printf("%f %f scale\n", abs_scale, abs_scale * rel_scale );
        printf("/DataString %d string def\n", nofres );
        printf("%d %d 8 [ %d 0 0 %d 0 0 ]\n", nofres, frame, nofres, frame );
        printf("{ currentfile DataString readhexstring pop }\nfalse 3\ncolorimage\n");
        count = 0;
        for ( gm=0 ; gm < frame  ; gm++ )
        for ( gf=0 ; gf < nofres ; gf++ )
                {
                        if ( data[gm][gf] == 'H' )
                                printf("%02x%02x%02x", HCOL );
                        else if ( data[gm][gf] == 'G' )
                                printf("%02x%02x%02x", GCOL );
                        else if ( data[gm][gf] == 'I' )
                                printf("%02x%02x%02x", ICOL );
                        else if ( data[gm][gf] == 'E' )
                                printf("%02x%02x%02x", ECOL );
                        else if ( data[gm][gf] == 'B' || data[gm][gf] == 'b')
                                printf("%02x%02x%02x", BCOL );
                        else if ( data[gm][gf] == 'T' )
                                printf("%02x%02x%02x", TCOL );
                        else if ( data[gm][gf] == 'C' )
                                printf("%02x%02x%02x", CCOL );
                        else 
                                {
                                        fprintf(stderr, "Unknown STRIDE character ??? Abort.\n");
                                        exit(1);
                                                        }

                        count++;
                        if ( count == 10 )
                                {         
                                        printf("\n");
                                        count = 0;         
                                                        }
                                                                }
                                          
        printf("0 0 0 setrgbcolor 0.0010 setlinewidth\n");
        printf("newpath 0.0 0.0 moveto 1.0 0.0 lineto 1.0 1.0 lineto 0.0 1.0 lineto 0.0 0.0 lineto stroke\n");
        
        printf("\nshowpage\n");






}

Edit it, change the RGB color definitions (the #defines), save it, compile it with gcc ss_plot.c -lm, run it with ./a.out < ss_traj.dat > test.ps