Home > FR3DSource > xAlignCandidates.m

xAlignCandidates

PURPOSE ^

xAlignCandidates(File,Search,Direction) shows a multiple sequence

SYNOPSIS ^

function [Text] = xAlignCandidates(File,Search,Direction,Param)

DESCRIPTION ^

 xAlignCandidates(File,Search,Direction) shows a multiple sequence
 alignment of the candidates in Search.  Bases which correspond in the
 geometric search are aligned with one another.  If a maximum distance
 has been specified, or the maximum gap is small, bases between the
 bases in the candidate are also displayed.

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 % xAlignCandidates(File,Search,Direction) shows a multiple sequence
0002 % alignment of the candidates in Search.  Bases which correspond in the
0003 % geometric search are aligned with one another.  If a maximum distance
0004 % has been specified, or the maximum gap is small, bases between the
0005 % bases in the candidate are also displayed.
0006 
0007 % Direction can be +1 or -1; it tells the order in which to put the
0008 % nucleotides in the first candidate
0009 
0010 function [Text] = xAlignCandidates(File,Search,Direction,Param)
0011 
0012 Query      = Search.Query;
0013 Candidates = Search.Candidates;
0014 N          = Query.NumNT;
0015 
0016 [L,t] = size(Candidates);
0017 
0018 [y,p] = sort(Direction*double(Candidates(1,1:N)));    
0019                                     % put nucleotides in inc/decreasing order
0020 Cand = double(Candidates(:,p));     % re-order nucleotides
0021 F    = Candidates(:,N+1);           % file numbers
0022 
0023 if isfield(Query,'MaxDiffMat'),
0024   MaxDiff = diag(Query.MaxDiffMat(p,p),1);
0025 else
0026   MaxDiff = Inf*ones(1,N-1);
0027 end
0028 
0029 if Direction > 0,
0030   MaxDiff = MaxDiff;
0031 else
0032   MaxDiff = fliplr(MaxDiff);
0033 end
0034 
0035 % ---------------------------- Calculate maximum gaps between cand. nucleotides
0036 
0037 maxinsert = zeros(1,N-1);
0038 for c = 1:L,
0039   maxinsert = max(maxinsert,abs(diff(double(Cand(c,1:N))))-1);
0040 end
0041 
0042 % ---------------------------- Print header line
0043 
0044 t = 1;                                          % line of text we're on
0045 k = N;                                          % number of columns being shown
0046 Text{t} = sprintf('               ');
0047 if Query.Geometric > 0,
0048   Text{t} = [Text{t} sprintf('           ')];
0049 end
0050 for j=1:N,
0051   Text{t} = [Text{t} sprintf('        ')];
0052 end
0053 Text{t} = [Text{t} sprintf('    ')];
0054 for n = 1:(N-1),
0055   Text{t} = [Text{t} sprintf('%d',mod(n,10))];
0056   if (MaxDiff(n) < Inf) | (maxinsert(n) < 5),   % if only few insertions
0057     for i=1:maxinsert(n),
0058       Text{t} = [Text{t} sprintf(' ')];
0059       k = k + 1;                                % more columns being shown
0060     end
0061   else
0062     Text{t} = [Text{t} sprintf('    ')];
0063   end
0064 end
0065 Text{t} = [Text{t} sprintf('%d', mod(N,10))];
0066 
0067 % ----------------------------- Print alignment
0068 
0069 CorrCodeList = zeros(c,N);                        % for bases corresp to query
0070 for j = 1:N,
0071   InsCode{j}.Count = zeros(1,4);
0072   InsLength{j}.Count = [];
0073 end
0074 
0075 for c = 1:L,                                      % loop through candidates
0076   f = F(c);                                       % file number
0077   t = t + 1;
0078   Text{t} = [sprintf('%15s', File(f).Filename)];
0079   if Query.Geometric > 0,
0080     if isfield(Search,'Discrepancy'),
0081       Text{t} = [Text{t} sprintf('%11.4f',Search.Discrepancy(c))];
0082     elseif isfield(Search,'AvgDisc'),
0083       Text{t} = [Text{t} sprintf('%11.4f',Search.AvgDisc(c))];
0084     end
0085   end
0086   for j=1:N,                            % print candidate
0087     Text{t} = [Text{t} sprintf('%3s',File(f).NT(Cand(c,j)).Base)];    
0088     Text{t} = [Text{t} sprintf('%5s',File(f).NT(Cand(c,j)).Number)];    
0089   end
0090   Text{t} = [Text{t} sprintf('    ')];
0091 
0092   k = 1;                                          % column counter
0093 
0094   for n = 1:(N-1),                                % print alignment for cand
0095     Text{t} = [Text{t} sprintf('%s', File(F(c)).NT(Cand(c,n)).Base)];
0096     j = File(F(c)).NT(Cand(c,n)).Code;
0097     CorrCodeList(c,n) = j;                        % store code of corresp base
0098     k = k + 1;
0099     if (MaxDiff(n) < Inf) | (maxinsert(n) < 5),   % if only few insertions
0100       if Cand(c,n+1) - Cand(c,n) > 1,             % increasing order
0101         for i = (Cand(c,n)+1):(Cand(c,n+1)-1),
0102           Text{t} = [Text{t} sprintf('%c', File(F(c)).NT(i).Base)];   % show insertions
0103           j = File(F(c)).NT(i).Code;
0104           InsCode{n}.Count(j) = InsCode{n}.Count(j) + 1;
0105         end
0106       elseif Cand(c,n+1) - Cand(c,n) < -1,        % decreasing order
0107         for i = (Cand(c,n)-1):-1:(Cand(c,n+1)+1),
0108           Text{t} = [Text{t} sprintf('%c', File(F(c)).NT(i).Base)];   % show insertions
0109           j = File(F(c)).NT(i).Code;
0110           InsCode{n}.Count(j) = InsCode{n}.Count(j) + 1;
0111         end
0112       end
0113       for i=1:(1 + maxinsert(n) - abs(Cand(c,n+1)-Cand(c,n))),
0114         Text{t} = [Text{t} sprintf('-')];
0115         k = k + 1;
0116       end
0117     else
0118       Text{t} = [Text{t} sprintf('....')];
0119     end
0120 
0121     h = abs(Cand(c,n+1)-Cand(c,n));                % number of insertions + 1
0122     if h > length(InsLength{n}.Count),
0123       InsLength{n}.Count(h) = 1;
0124     else
0125       InsLength{n}.Count(h) = InsLength{n}.Count(h) + 1;
0126     end
0127 
0128   end
0129   Text{t} = [Text{t} sprintf('%s', File(F(c)).NT(Cand(c,N)).Base)];
0130   j = File(F(c)).NT(Cand(c,N)).Code;
0131   CorrCodeList(c,N) = j;                           % store last corresp base
0132 
0133   drawnow
0134 end
0135 
0136 % --------------------------------------------- calculate statistics
0137 
0138 Letters = {'A', 'C', 'G', 'U'};
0139 
0140 t = t + 1;
0141 Text{t} = [sprintf('Letter frequencies')];
0142 for n = 1:N,
0143   S = sum(InsCode{n}.Count);
0144   if S > 0,                                       % there are some insertions
0145     t = t + 1;
0146     Text{t} = [sprintf('Insertions after motif base %d: (%d observed)', n, S)];
0147     t = t + 1;
0148     Text{t} = [sprintf('Letters: ')];
0149     for j = 1:4,
0150       Text{t} = [Text{t} sprintf('%s %6.4f  ', Letters{j}, InsCode{n}.Count(j)/S)];
0151     end
0152     t = t + 1;
0153     Text{t} = [sprintf('Lengths: ')];
0154     for j = 1:length(InsLength{n}.Count),
0155      Text{t} = [Text{t} sprintf('%d %6.4f  ', j-1, InsLength{n}.Count(j)/sum(InsLength{n}.Count))];
0156     end
0157   end
0158 end
0159 
0160 for i = 1:N,
0161   for j = 1:N,
0162     if i < j, 
0163       t = t + 1;
0164       Text{t} = [sprintf('Pairs made by bases corresponding to nucleotides %d and %d in the query motif', i,j)];
0165       Tallies = zeros(4,4);
0166       for k = 1:L,
0167         a = CorrCodeList(k,i);
0168         b = CorrCodeList(k,j);
0169         Tallies(a,b) = Tallies(a,b) + 1;
0170       end
0171 
0172       A = find(sum(Tallies'));                    % nonzero rows
0173       B = find(sum(Tallies));                     % nonzero columns
0174       Tallies = Tallies(A,B);                     % keep nonzero rows, columns
0175       LA = Letters(A);                            % keep needed letters
0176       LB = Letters(B);
0177 
0178       Perc = Tallies / L;
0179 
0180       PPerc = [];
0181       for a = 1:length(A),
0182         for b = 1:length(B),
0183           PPerc(a,b) = sum(Perc(a,:))*sum(Perc(:,b));
0184         end
0185       end
0186       chisqr = L * sum(sum(((Perc - PPerc).^2)./PPerc));
0187 
0188       t = t + 1;
0189       Text{t} = [sprintf('ChiSqr %7.2f', chisqr)];
0190       if chisqr > chi2inv(0.99,(length(B)-1)*(length(A)-1)),
0191         Text{t} = [Text{t} sprintf(' suggests rejecting independence at the 99%% level')];
0192       else
0193         Text{t} = [Text{t} sprintf(' suggests accepting independence at the 99%% level')];
0194       end      
0195 
0196       t = t + 1;
0197       Text{t} = [sprintf('Observed  ')];
0198       for b = 1:length(B),
0199         Text{t} = [Text{t} sprintf('%s       ', LB{b})];
0200       end
0201       Text{t} = [Text{t} sprintf('Product  ')];
0202       for b = 1:length(B),
0203         Text{t} = [Text{t} sprintf('%s       ', LB{b})];
0204       end
0205 
0206       for a = 1:length(A),
0207         t = t + 1;
0208         Text{t} = [sprintf('%s  ', LA{a})];
0209         for b = 1:length(B),
0210           Text{t} = [Text{t} sprintf('%8.4f', 100*Perc(a,b))];
0211         end
0212         Text{t} = [Text{t} sprintf('       %s ', LA{a})];
0213         for b = 1:length(B),
0214           Text{t} = [Text{t} sprintf('%8.4f', 100*sum(Perc(a,:))*sum(Perc(:,b)))];
0215         end
0216       end
0217     end
0218   end
0219 end
0220 
0221 if nargin < 4,
0222   for t = 1:length(Text),
0223     fprintf('%s\n', Text{t});
0224   end
0225 end

Generated on Fri 03-Apr-2009 09:52:35 by m2html © 2003