Home > FR3DSource > zIndexLookup.m

zIndexLookup

PURPOSE ^

zIndexLookup(File,Num,Chain) finds the base index for base having nucleotide

SYNOPSIS ^

function [ind,allchains] = zIndexLookup(File,Num,Chain,Verbose)

DESCRIPTION ^

 zIndexLookup(File,Num,Chain) finds the base index for base having nucleotide 
 number Num and, if specified, Chain
 Num can be a cell array of N nucleotide numbers
 Any entry of that cell array can use the notation '1830:1835' for a range
 Any entry can also use commas to separate nucleotide numbers
 Nucleotide numbers can be followed by (A) or _A to indicate chain A
 Someday: Base letters can be specified to help identify the correct chain

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 % zIndexLookup(File,Num,Chain) finds the base index for base having nucleotide
0002 % number Num and, if specified, Chain
0003 % Num can be a cell array of N nucleotide numbers
0004 % Any entry of that cell array can use the notation '1830:1835' for a range
0005 % Any entry can also use commas to separate nucleotide numbers
0006 % Nucleotide numbers can be followed by (A) or _A to indicate chain A
0007 % Someday: Base letters can be specified to help identify the correct chain
0008 
0009 % ind is a Cx1 vector of the first match to each, the easy answer
0010 % allchains is a Cx1 cell array.  Each cell lists all chains that match the
0011 % corresponding nucleotide number
0012 
0013 function [ind,allchains] = zIndexLookup(File,Num,Chain,Verbose)
0014 
0015 if nargin < 3,
0016   for k = 1:length(Num),
0017     Chain{k} = '';
0018   end
0019 end
0020 
0021 if nargin < 4,
0022   Verbose = 1;
0023 end
0024 
0025 if strcmp(class(Num),'char'),
0026   Num = {Num};
0027 end
0028 
0029 if strcmp(class(Chain),'char'),
0030   Chain = {Chain};
0031 end
0032 
0033 % split multiple specifications into separate cells
0034 
0035 t = 1;
0036 for k = 1:length(Num),
0037   N = regexprep(Num{k},'([ACGU])\s*([123456789])','$1$2'); % join base & number
0038   N = regexprep(N,';| ',',');         % replace other delimeters with commas
0039   while strfind(N,',,'),              % replace double commas with single
0040     N = regexprep(N,',,',',');
0041   end
0042   a = [1 1+strfind(N,',')];               % locations of commas
0043   for i = 1:(length(a)-1),
0044     Numb{t} = N(a(i):(a(i+1)-2));
0045     Chai{t} = Chain{k};
0046     t = t + 1;
0047   end
0048   Numb{t} = N(a(end):end);
0049   Chai{t} = Chain{k};
0050   t = t + 1;
0051 end
0052 
0053 % check for chain indicated in parentheses
0054 
0055 for k = 1:length(Numb),
0056   if Numb{k}(1) == '(' || Numb{k}(1) == '_',         % chain only
0057     Chai{k} = Numb{k}(2);
0058     Numb{k} = '-';
0059   elseif ~isempty(strfind(Numb{k},'(')),
0060     a = strfind(Numb{k},'(');
0061     b = strfind(Numb{k},')');
0062     Chai{k} = Numb{k}(a(1)+1:b(1)-1);     % extract chain
0063     if length(a) == 1,                    % one chain specified
0064       Numb{k} = [Numb{k}(1:a(1)-1) Numb{k}(b(1)+1:end)];
0065     elseif length(a) == 2,                % range with two chains specified
0066       Numb{k} = [Numb{k}(1:a(1)-1) Numb{k}(b(1)+1:a(2)-1)];
0067     end
0068   end
0069 end
0070 
0071 % check for element or chain indicated by underscore
0072 
0073 for k = 1:length(Numb),
0074   if any(Numb{k}(1) == 'bdefhijklmnopqrstvwxyz'),
0075     % this is an element, leave it alone
0076   else                      % check for chain indicated by underscore
0077     if ~isempty(strfind(Numb{k},'_')),
0078       a = strfind(Numb{k},'_');
0079       Chai{k} = Numb{k}(a(end)+1:end);        % extract chain
0080       Numb{k} = Numb{k}(1:a(end)-1);          % remove chain reference
0081     end
0082   end
0083 end
0084 
0085 ind = [];
0086   
0087 % if File is a text string (filename), load the file
0088 
0089 if strcmp(class(File),'char'),
0090   Filename = File;
0091   File = zGetNTData(Filename,0);
0092 end
0093 
0094 Numbers = cat(1,{File.NT(:).Number});
0095 
0096 allchains = [];
0097 
0098 for k = 1:length(Numb)                      % loop through nucleotide numbers
0099   if ~isempty(strfind(Numb{k},':')),        % a range of numbers
0100     n = Numb{k};                            % kth specified number or range
0101     i = strfind(n,':');                     % find the colon
0102     Numb1 = n(1:(i-1));                     % first nucleotide number
0103     p = LookUpOne(File,Numbers,Numb1,Chai{k},Verbose);
0104     Numb2 = n((i+1):length(n));             % second nucleotide number
0105     q = LookUpOne(File,Numbers,Numb2,Chai{k},Verbose);
0106 
0107     m = length(ind);
0108 
0109     if length(p) > 0 && length(q) > 0,      % both numbers are valid
0110       if p(1) < q(1),
0111         ind = [ind p(1):q(1)];
0112       else
0113         ind = [ind p(1):-1:q(1)];
0114       end
0115     end
0116 
0117     mm = length(ind);
0118     ch = [];
0119     c  = 1;
0120 
0121     for j = 1:length(p),
0122       for jj = 1:length(q),
0123         if File.NT(p(j)).Chain == File.NT(q(jj)).Chain,
0124           ch{c} = File.NT(p(j)).Chain;
0125           c = c + 1;
0126         end
0127       end
0128     end
0129 
0130     for j = (m+1):mm,
0131       allchains{j} = ch;
0132     end
0133   elseif Numb{k}(1) == '-',
0134     ind = find(cat(2,File.NT.Chain) == Chai{k});
0135   elseif strcmpi(Numb{k},'all'),
0136     ind = 1:length(File.NT);                  % all nucleotides
0137   elseif any(Numb{k}(1) == 'bdefhijklmnopqrstvwxyz') && isfield(File.NT(1),'Hierarchy'),
0138     c = 1;                        % counter for number of indices found
0139     newind = [];
0140     for n = 1:length(File.NT),
0141       p = find(ismember(File.NT(n).Hierarchy,Numb{k}));
0142       if ~isempty(p),
0143         newind(c) = n;
0144         c = c + 1;
0145       end
0146     end
0147 
0148     if ~isempty(newind),
0149       for j = 1:length(newind),
0150         allchains{length(ind)+j} = File.NT(newind(j)).Chain;
0151       end
0152       ind = [ind newind];
0153     end
0154   else
0155     L = LookUpOne(File,Numbers,Numb{k},Chai{k},Verbose);
0156 
0157     if length(L) > 0,
0158       ind = [ind L(1)];                       % add the first hit to the list
0159 
0160       m = length(ind);
0161       ch = [];
0162 
0163       for i=1:length(L),
0164         ch{i} = File.NT(L(i)).Chain;
0165       end
0166 
0167       allchains{m} = ch;
0168     end
0169   end
0170 end
0171 
0172 return
0173 
0174 % check for indicated base - add to LookUpOne in the future
0175 
0176 for k = 1:length(Numb),
0177   Numb{k} = upper(Numb{k});
0178   aa = strfind(Numb{k},'A');
0179   ac = strfind(Numb{k},'C');
0180   ag = strfind(Numb{k},'G');
0181   au = strfind(Numb{k},'U');
0182   Base{k} = '';
0183   if ~isempty(aa),
0184     Numb{k} = strrep(Numb{k},'A','');
0185     Base{k} = 'A';
0186   end
0187   if ~isempty(ac),
0188     Numb{k} = strrep(Numb{k},'C','');
0189     Base{k} = 'C';
0190   end
0191   if ~isempty(ag),
0192     Numb{k} = strrep(Numb{k},'G','');
0193     Base{k} = 'G';
0194   end
0195   if ~isempty(au),
0196     Numb{k} = strrep(Numb{k},'U','');
0197     Base{k} = 'U';
0198   end
0199 end
0200 
0201 %-------------------------------------------------------------------------
0202 function [ind] = LookUpOne(File,Numbers,N,Chain,Verbose)
0203 
0204     if any(N(1) == 'ACGU'),
0205       N = N(2:end);
0206     end
0207 
0208     % later, add the ability to infer the chain from the base specified
0209 
0210     ind = [];
0211     p = find(ismember(Numbers,N));
0212     if length(p) == 0,
0213       if Verbose > 0,
0214         fprintf('Could not find nucleotide %s in %s\n',N,File.Filename);
0215       end
0216     elseif length(p) == 1 & length(Chain) == 0, % one match, no chain specified
0217       ind = [ind p];
0218     elseif length(p) > 1 & length(Chain) == 0,% two matches, no chain specified
0219       ind = [ind p];
0220       if Verbose > 0,
0221         fprintf('Multiple matches found for %s in %s, consider specifying a chain\n', N, File.Filename);
0222       end
0223       for a = 1:length(ind),
0224         if Verbose > 0,
0225           fprintf('Nucleotide %s%s Chain %5s Index %5d\n', File.NT(ind(a)).Base, File.NT(ind(a)).Number, File.NT(ind(a)).Chain, ind(a));
0226         end
0227       end
0228     elseif length(Chain) > 0,                    % chain specified
0229       c = 0;
0230       for j = 1:length(p),
0231         if strcmp(File.NT(p(j)).Chain,Chain),
0232           ind = [ind p(j)];
0233           c = c + 1;
0234         end
0235       end
0236       if c == 0,
0237         if Verbose > 0,
0238           fprintf('Could not find nucleotide %s in chain %s in %s\n',N,Chain,File.Filename);
0239         end
0240       elseif c > 1,
0241         if Verbose > 0,
0242           fprintf('Multiple matches found for %s in chain %s in %s\n', N,Chain,File.Filename);
0243         end
0244       end
0245     end

Generated on Fri 03-Apr-2009 09:52:35 by m2html © 2003