#include /* Copyright (c) 2002 Robert B. Russell * EMBL, Meyerhofstrasse 1, 69917 Heidelberg, Germany * Email: russell@embl.de * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ /* Rob Russell's attempt to make a COILS program */ main(int argc, char *argv[]) { int i,j,k,l; int verb; int window,pt; int which,weighted; int nseq; int t,tc; int seqlen; int mode; int min_seg; char heptfile[1000]; char *buff; static char *env; char *seq,*title,*ident; float min_P; struct hept_pref *h; FILE *MAT; /* defaults */ window = 21; weighted = 0; verb = 0; mode = 0; /* 0 = column mode, 1 = fasta, 2 = concise */ min_P = 0.5; if((env=getenv("COILSDIR"))==NULL) { fprintf(stderr,"error: environment variable COILSDIR must be set\n"); exit(-1); } strcpy(&heptfile[0],env); strcpy(&heptfile[strlen(heptfile)],"/new.mat"); for(i=1; i=argc) exit_error(); strcpy(&heptfile[0],argv[i+1]); i++; } else if(strcmp(&argv[i][1],"win")==0) { if((i+1)>=argc) exit_error(); sscanf(argv[i+1],"%d",&window); i++; } else if(strcmp(&argv[i][1],"c")==0) { mode=2; } else if(strcmp(&argv[i][1],"min_seg")==0) { if((i+1)>=argc) exit_error(); sscanf(argv[i+1],"%d",&min_seg); i++; } else if(strcmp(&argv[i][1],"c")==0) { mode=2; } else if((strcmp(&argv[i][1],"f")==0) || (strcmp(&argv[i][1],"fasta")==0)) { mode=1; } else if((strcmp(&argv[i][1],"min_P")==0)) { if((i+1)>=argc) exit_error(); sscanf(argv[i+1],"%f",&min_P); i++; } else if(strcmp(&argv[i][1],"help")==0) { exit_error(); } else if(strcmp(&argv[i][1],"w")==0) { weighted=1; } else if(strcmp(&argv[i][1],"V")==0 || strcmp(&argv[i][1],"v")==0) { verb=1; } else { fprintf(stderr," can't understand flag/field %s\n",argv[i]); exit_error(); } } if(verb) printf("Matrix file is %s\n",heptfile); /* Read in matrix */ if((MAT=fopen(heptfile,"r"))==NULL) { fprintf(stderr,"Error reading %s\n",heptfile); exit(-1); } h = read_matrix(MAT); if(verb) { for(i=0; im[pt][0],h->m[pt][1],h->m[pt][2],h->m[pt][3],h->m[pt][4], h->m[pt][5],h->m[pt][6]); } for(i=0; in; ++i) { printf("Window %4d %1d %f %f %f %f %f\n", h->f[i].win,h->f[i].w,h->f[i].m_cc,h->f[i].sd_cc,h->f[i].m_g,h->f[i].sd_g,h->f[i].sc); } } /* See if there is a file for our chosen window length/weight scheme */ which = -1; for(i=0; in; ++i) { if((h->f[i].win == window) && (h->f[i].w == weighted)) { /* match */ if(verb) printf("Found fitting data for win %4d w %d\n",window,weighted); which = i; } } /* Read in a sequence from the standard input */ nseq = 0; ident = (char*) malloc(100000*sizeof(char)); title = (char*) malloc(100000*sizeof(char)); buff = (char*) malloc(100000*sizeof(char)); t = 0; tc = 0; while(fgets(buff,99999,stdin)!=NULL) { /* There is a memory problem - the matrix data gets corrupted under OSF after this fgets */ for(i=0; i') { if(nseq>0) { pred_coils(seq,ident,title,h,window,which,weighted,mode,min_P,&t,&tc,min_seg); free(seq); } seqlen = 0; i=1; while((buff[i]!=' ') && (buff[i]!='\0') && (buff[i]!='\n') && (buff[i]!='\r')) { ident[i-1]=buff[i]; i++; } ident[i-1]='\0'; i++; j=0; seq = (char*)malloc(sizeof(char)); seq[0]='\0'; while(buff[i]!='\n' && buff[i]!='\0' && buff[i]!='\r') { title[j]=buff[i]; i++; j++; } title[j]='\0'; nseq++; } else { /* printf("Adding |%s| to |%s| = \n",buff,seq); */ seq=(char*)realloc(seq,(seqlen+strlen(buff)+1)*sizeof(char)); strcpy(&seq[seqlen],buff); seqlen=strlen(seq); /* printf(" |%s|\n",seq); */ } } if(nseq>0) { pred_coils(seq,ident,title,h,window,which,weighted,mode,min_P,&t,&tc,min_seg); free(seq); } fprintf(stderr,"%8d sequences %8d aas %8d in coil\n",nseq,t,tc); free(title); free(ident); exit(0); } void exit_error() { fprintf(stderr,"format: ncoils [options] < [sequence file]\n"); fprintf(stderr," -f or -fasta [fasta output - coils as 'x', like '-x' in seg]\n"); fprintf(stderr," -c [concise mode - which sequences have any coils (and how many)]\n"); fprintf(stderr," -min_seg [for concise mode - only report sequence if >= min coil segments]\n"); fprintf(stderr," -min_P [minimum P to define coil segment; DEFAULT = 0.5]\n"); fprintf(stderr," -win [window size; DEFAULT = 21]\n"); fprintf(stderr," -w [weight heptad positions a&d the same as b,c,e,f,g]\n"); fprintf(stderr," -v [verbose/debug mode - print extra junk]\n"); fprintf(stderr," -max_seq_len [longest sequence tolerated; DEFAULT = 100 000]\n"); fprintf(stderr,"\n"); fprintf(stderr,"NCOILS, Rob Russell and Andrei Lupas, 1999\n"); fprintf(stderr," based on Lupas, Van Dyck & Stock (1991) Science 252,1162-1164\n"); fprintf(stderr," Copyright (C) 1999 Robert B. Russell\n"); fprintf(stderr," NCOILS comes with ABSOLUTELY NO WARRANTY; for details see the file LICENSE\n"); fprintf(stderr," This is free software, and you are welcome to redistribute it\n"); fprintf(stderr," under certain conditions; see LICENSE for details\n"); exit(-1); } void pred_coils(char *seq,char *ident,char *title,struct hept_pref *h,int win, int which, int weighted,int mode, float min_P, int *t, int *tc, int min_seg) { int i,j; int len,pos,aa_pt; int pt; int total_coil_segments; int are_there_coils; float actual_win; float this_score,Gg,Gcc,power; float t1,t2,t3,t4; float *score; float *P; char *hept_seq; len=strlen(seq); score = (float*)malloc(len*sizeof(float)); P = (float*)malloc(len*sizeof(float)); hept_seq = (char*)malloc(len*sizeof(char)); /* printf("Sequence is %s length is %d\n",seq,len); */ for(i=0; i=0) && (aa_pt<26) && (AAs[aa_pt]!='_')) { pos = j%7; /* where are we in the heptad? pos modulus 7 */ /* printf("AA %c in hept %c %7.5f\n",seq[i+j],('a'+pos),h->m[aa_pt][pos]); */ if(weighted && (pos==0 || pos==3)) { power = 2.5; } else { power = 1.0; } actual_win+=power; if(h->m[aa_pt][pos]!=-1) { this_score*=pow(h->m[aa_pt][pos],power); } else { this_score*=pow(h->smallest,power); } } } if(actual_win>0) { this_score = pow(this_score,(1/(float)actual_win)); } else { this_score = 0; } for(j=0; ((j=0) && (aa_pt<26) && (AAs[aa_pt]!='_')) { pos = j%7; /* where are we in the heptad? pos modulus 7 */ if(this_score>score[i+j]) { score[i+j]=this_score; hept_seq[i+j]='a'+pos; } } } } if(mode==1) { printf(">%s %s\n",ident,title); } are_there_coils=0; total_coil_segments=0; for(i=0; if[which].sd_cc); t2 = (score[i]-(h->f[which].m_cc))/h->f[which].sd_cc; t3 = fabs(t2); t4 = pow(t3,2); t4 = t3*t3; Gcc = t1 * exp(-0.5*t4); /* printf("Gcc %f %f %f %f %f\n",t1cc,t2cc,t3cc,t4cc,Gcc); */ t1 = 1/(h->f[which].sd_g); t2 = (score[i]-(h->f[which].m_g))/h->f[which].sd_g; t3 = fabs(t2); t4 = pow(t3,2); t4 = t3*t3; Gg = t1 * exp(-0.5*t4); /* printf("Gg %f %f %f %f %f\n",t1g,t2g,t3g,t4g,Gg); */ P[i] = Gcc/(h->f[which].sc*Gg+Gcc); if(P[i]>=min_P) { are_there_coils=1; if((i==0) || (P[i-1]=min_P) { printf("x"); } else { printf("%c",seq[i]); } if(((i+1)%60)==0) { printf("\n"); } } else if(mode==0) { printf("%4d %c %c %7.3f %7.3f (%7.3f %7.3f)\n",i+1,seq[i],hept_seq[i],score[i],P[i],Gcc,Gg); } } if(mode==1) { printf("\n"); } if((mode==2) && (are_there_coils==1) && (total_coil_segments>=min_seg)) { if(total_coil_segments==1) { printf("Pred %4d coil segment : %s %s\n",total_coil_segments,ident,title); } else { printf("Pred %4d coil segments : %s %s\n",total_coil_segments,ident,title); } } free(P); free(score); free(hept_seq); }