Home > FR3DSource > xConstructQuery.m

xConstructQuery

PURPOSE ^

xConstructQuery(Query,File) fills in details of Query from File

SYNOPSIS ^

function [Query] = xConstructQuery(Query,File)

DESCRIPTION ^

 xConstructQuery(Query,File) fills in details of Query from File

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 % xConstructQuery(Query,File) fills in details of Query from File
0002 
0003 % defaults:
0004 % Query.Geometric = 1 if Query.Filename and Query.NTList are defined
0005 % Query.Geometric = 0 if not; in that case, Query.Mask, Query.Edges, or
0006 %      Query.MaxDiff need to be defined, or there is no focus for the search
0007 % Query.ExcludeOverlap = 1 if there are 7 or more nucleotides
0008 % Query.Mask is "NNNNN..."
0009 % Query.Sequential is 1 if Query.MaxDiff is non-empty
0010 % Query.MaxDiff is [Inf Inf ...] by default
0011 % Query.LocWeight is [1 1 1 1...]
0012 % Query.AngleWeight is [1 1 1 1...]
0013 % Query.DiscCutoff is 0.4
0014 % Query.RelCutoff is Query.DiscCutoff, if not overridden
0015 % Query.ChainList is not needed unless there is ambiguity in nucleotide
0016 %                 numbers, and if there is, xConstructQuery will tell you
0017 
0018 function [Query] = xConstructQuery(Query,File)
0019 
0020 % ------------------------------- Determine whether search is geom. or symb.
0021 
0022 if ~isfield(Query,'Geometric'),
0023   if isfield(Query,'Filename') & isfield(Query,'NTList'),
0024     Query.Geometric = 1;
0025   else
0026     Query.Geometric = 0;
0027   end
0028 end
0029 
0030 % ------------------------------- Determine number of nucleotides if geometric
0031 
0032 if Query.Geometric == 1,
0033   Query.NumNT = length(Query.NTList);
0034 end  
0035 
0036 % ------------------------------- Determine number of nucleotides if symbolic
0037 
0038 if Query.Geometric == 0,
0039   m = 0;
0040   if isfield(Query,'Edges'),
0041     [s,t] = size(Query.Edges);
0042     m = max([m s t]);               % number of nucleotides
0043   end
0044   if isfield(Query,'Diagonal'),
0045     m = length(Query.Diagonal);
0046   end
0047   if isfield(Query,'Mask'),
0048     m = length(Query.Mask);
0049   end
0050   if isfield(Query,'Diff'),
0051     [s,t] = size(Query.Diff);
0052     m = max([m s t]);               % number of nucleotides
0053   end
0054   if isfield(Query,'MaxDiff'),
0055     [s,t] = size(Query.MaxDiff);
0056     m = max([m s t]);
0057   end
0058   if isfield(Query,'MaxDiffVector'),
0059     [s,t] = size(Query.MaxDiffVector);
0060     m = max(m,max(s,t)+1);
0061   end
0062   if isfield(Query,'MinDiff'),
0063     [s,t] = size(Query.MinDiff);
0064     m = max([m s t]);
0065   end
0066   if isfield(Query,'MinDiffVector'),
0067     [s,t] = size(Query.MinDiffVector);
0068     m = max(m,max(s,t)+1);
0069   end
0070   if m == 0,
0071     fprintf('Not enough information to form a query motif.\n');
0072     fprintf('No search was conducted.\n');
0073   else
0074     Query.NumNT = m;
0075   end
0076 end
0077 
0078 if isfield(Query,'NumNT'),
0079 
0080 % --------- Set default values
0081 
0082 if ~isfield(Query,'ExcludeOverlap'),
0083   if Query.NumNT >= 7,
0084     Query.ExcludeOverlap = 1;
0085   else
0086     Query.ExcludeOverlap = 0;
0087   end
0088 end
0089 
0090 if ~isfield(Query,'Number'),
0091   Query.Number = '0';
0092 end
0093 
0094 if ~isfield(Query,'Name'),
0095   Query.Name = num2str(Query.Number);
0096 end
0097 
0098 if ~isfield(Query,'Description'),
0099   Query.Description = Query.Name;
0100 end
0101 
0102 if ~isfield(Query,'Config'),
0103   for i=1:Query.NumNT,
0104     Query.Config{i} = '';
0105   end
0106 else
0107   for i=(length(Query.Config)+1):Query.NumNT,
0108     Query.Config{i} = '';
0109   end
0110 end
0111 
0112 if (Query.Geometric == 0) & ~isfield(Query,'Diameter'),
0113   Query.Diameter = 30;
0114 end
0115 
0116 % ---------------------------------------------------------------------
0117 
0118 if Query.Geometric == 1,                    % model comes from a file
0119     
0120   if ~isfield(Query,'Diagonal'),
0121     if ~isfield(Query,'LocWeight'),
0122       Query.LocWeight = ones(1,Query.NumNT);
0123     end
0124     if ~isfield(Query,'AngleWeight'),
0125       Query.AngleWeight = ones(1,Query.NumNT);
0126     end
0127   end
0128 
0129   if ~isfield(Query,'DiscCutoff'),
0130     Query.DiscCutoff = 0.4;
0131   end
0132 
0133   if ~isfield(Query,'RelCutoff'),
0134     Query.RelCutoff = Query.DiscCutoff;
0135   end
0136 
0137   % --------- basic parameters for the model
0138 
0139   if isfield(Query,'ChainList'),
0140     Query.Indices  = zIndexLookup(File,Query.NTList,Query.ChainList);
0141   else
0142     Query.Indices  = zIndexLookup(File,Query.NTList);
0143   end
0144 
0145   for i=1:Query.NumNT,
0146     Query.NT(i) = File.NT(Query.Indices(i));
0147   end
0148 
0149   Query.Edge  = File.Edge(Query.Indices,Query.Indices);
0150 end
0151 
0152 % --------- Interpret model mask
0153 
0154 % For nucleotide i, the vector Query.OKCodes{i} has 0's and 1's
0155 % which tell which nucleotide codes (A=1, C=2,...) meet the mask
0156 
0157 if isfield(Query,'Mask'),
0158   for i=1:Query.NumNT,
0159     switch Query.Mask(i)
0160      case 'A', OK = [1 0 0 0];
0161      case 'C', OK = [0 1 0 0];
0162      case 'G', OK = [0 0 1 0];
0163      case 'U', OK = [0 0 0 1];
0164      case 'M', OK = [1 1 0 0];
0165      case 'R', OK = [1 0 1 0];
0166      case 'W', OK = [1 0 0 1];
0167      case 'S', OK = [0 1 1 0];
0168      case 'Y', OK = [0 1 0 1];
0169      case 'K', OK = [0 0 1 1];
0170      case 'V', OK = [1 1 1 0];
0171      case 'H', OK = [1 1 0 1];
0172      case 'D', OK = [1 0 1 1];
0173      case 'B', OK = [0 1 1 1];
0174      case 'N', OK = [1 1 1 1];
0175     end
0176     Query.OKCodes{i} = OK;
0177   end
0178 end
0179 
0180 % --------- Interpret model diagonal (mask and weights)
0181 
0182 % For nucleotide i, the vector Query.OKCodes{i} has 0's and 1's
0183 % which tell which nucleotide codes (A=1, C=2,...) meet the mask
0184 
0185 if isfield(Query,'Diagonal'),
0186   for i=1:Query.NumNT,
0187     [OK,LW,AW]           = xGetNuclSpec(Query.Diagonal{i});
0188     Query.OKCodes{i}     = OK;
0189     Query.LocWeight(i)   = LW;
0190     Query.AngleWeight(i) = AW;
0191   end
0192 end
0193 
0194 % --------- If no nucleotide mask has been encountered
0195 
0196 if ~isfield(Query,'OKCodes'),
0197   for i=1:Query.NumNT,
0198     Query.OKCodes{i} = [1 1 1 1];
0199   end
0200 end
0201 
0202 % --------- Make edge matrix antisymmetric and numeric
0203 
0204 % 1-AA  2-CA  3-GA  4-UA  5-AC  6-CC  7-GC  8-UC
0205 % 9-AG 10-CG 11-GG 12-UG 13-AU 14-CU 15-GU 16-UU
0206 
0207 RevPair = [1 5 9 13 2 6 10 14 3 7 11 15 4 8 12 16];  % exchange AC for CA etc.
0208 
0209 if isfield(Query,'Edges'),
0210   Query.Edges{Query.NumNT,Query.NumNT} = 0;
0211   Query.EdgeNums{Query.NumNT,Query.NumNT} = 0;
0212   Query.ExcludeEdges{Query.NumNT,Query.NumNT} = 0;
0213   Query.OKPairs{Query.NumNT,Query.NumNT} = 0;
0214   Query.ExPairs{Query.NumNT,Query.NumNT} = 0;
0215   Query.BasePhos{Query.NumNT,Query.NumNT} = 0;
0216   Query.ExcludeBasePhos{Query.NumNT,Query.NumNT} = 0;
0217   Query.Flank{Query.NumNT,Query.NumNT} = [];
0218   Query.Range{Query.NumNT,Query.NumNT} = [];
0219   Query.Coplanar{Query.NumNT,Query.NumNT} = [];
0220   Query.OKBB{Query.NumNT,Query.NumNT} = [];
0221   Query.ExBB{Query.NumNT,Query.NumNT} = [];
0222 
0223   for i=1:Query.NumNT,
0224     for j=(i+1):Query.NumNT,
0225       if ~isempty(Query.Edges{j,i}),
0226         [ReqEdge,ExEdge,OKPairs,ExPairs,BP1,BP2,EBP1,EBP2,Flank,Range,Coplanar,ReqBB,ExBB] = xGetEdgeNums(Query.Edges{j,i});
0227         Query.EdgeNums{j,i}     =  ReqEdge;
0228         Query.EdgeNums{i,j}     = -ReqEdge;
0229         Query.ExcludeEdges{j,i} =  ExEdge;
0230         Query.ExcludeEdges{i,j} = -ExEdge;
0231         Query.OKPairs{j,i}      = OKPairs;
0232         Query.OKPairs{i,j}      = RevPair(OKPairs);
0233         Query.ExPairs{j,i}      = ExPairs;
0234         Query.ExPairs{i,j}      = RevPair(ExPairs);
0235         Query.BasePhos{j,i}     = BP1;
0236         Query.BasePhos{i,j}     = BP2;
0237         Query.ExcludeBasePhos{j,i} = EBP1;
0238         Query.ExcludeBasePhos{i,j} = EBP2;
0239     Query.Flank{i,j}           = Flank;
0240     Query.Range{i,j}           = Range;
0241         Query.Coplanar{i,j}        = Coplanar;
0242         Query.OKBB{j,i}      = ReqBB;
0243         Query.ExBB{j,i}      = ExBB;
0244         Query.OKBB{i,j}      = ReqBB;
0245         Query.ExBB{i,j}      = ExBB;
0246       elseif ~isempty(Query.Edges{i,j}),
0247         [ReqEdge,ExEdge,OKPairs,ExPairs,BP1,BP2,EBP1,EBP2,Flank,Range,Coplanar,ReqBB,ExBB] = xGetEdgeNums(Query.Edges{i,j});
0248         Query.EdgeNums{j,i}     = -ReqEdge;
0249         Query.EdgeNums{i,j}     =  ReqEdge;
0250         Query.ExcludeEdges{j,i} = -ExEdge;
0251         Query.ExcludeEdges{i,j} =  ExEdge;
0252         Query.OKPairs{j,i}      = RevPair(OKPairs);
0253         Query.OKPairs{i,j}      = OKPairs;
0254         Query.ExPairs{j,i}      = RevPair(ExPairs);
0255         Query.ExPairs{i,j}      = ExPairs;
0256         Query.BasePhos{i,j}     = BP1;
0257         Query.BasePhos{j,i}     = BP2;
0258         Query.ExcludeBasePhos{i,j} = EBP1;
0259         Query.ExcludeBasePhos{j,i} = EBP2;
0260         Query.Flank{j,i}           = Flank;
0261         Query.Range{j,i}           = Range;
0262         Query.Coplanar{j,i}        = Coplanar;
0263         Query.OKBB{i,j}      = ReqBB;
0264         Query.ExBB{i,j}      = ExBB;
0265         Query.OKBB{j,i}      = ReqBB;
0266         Query.ExBB{j,i}      = ExBB;
0267       else
0268         Query.EdgeNums{i,j} = [];
0269         Query.EdgeNums{i,j} = [];
0270       end
0271     end
0272   end
0273 end
0274 
0275 % --------- precompute parameters for geometric screening and ranking
0276 
0277 if Query.Geometric > 0,
0278   Query.LocWeight = Query.NumNT * Query.LocWeight / sum(Query.LocWeight);
0279                                         % weights sum to Query.NumNT
0280   Query.SSCutoff  =(Query.NumNT^2)*(Query.RelCutoff^2)*cumsum(Query.LocWeight);
0281                                   % cutoffs for first 1, 2, 3, ... nucleotides
0282   
0283   Query = xPrecomputeForDiscrepancy(Query);
0284 
0285   Query.Distance = zMutualDistance(Query.Centers,Inf);
0286 
0287   Query.LDiscCutoff = (Query.NumNT*Query.RelCutoff)^2;
0288 
0289   if (Query.NumNT >= 2),
0290 
0291     if isfield(Query,'Flex'),                  % not checked in a long time
0292       Query.DistanceScreen = 0;        % cannot use sums of squares with flex
0293       Query.Flex(Query.NumNT,Query.NumNT) = 0; % make big enough
0294       Query.Flex = Query.Flex + Query.Flex';   % make symmetric
0295     end
0296 
0297     Query.DistCutoff = max(max(Query.Distance)) ...
0298                      + sqrt(2)*Query.NumNT*Query.DiscCutoff;
0299                                      % distances this large needed in File
0300 
0301   else       % --------- Special calculations for two-nucleotide motifs
0302     Query.DistCutoff = Query.Distance(1,2) + 2 * Query.DiscCutoff;
0303   end
0304 
0305 else
0306   Query.SSCutoff = Inf * ones(1,Query.NumNT);
0307   Query.DistCutoff = 30;        % default distance cutoff for symbolic search
0308 end
0309 
0310 % --------- Read minimum and maximum distance specifications
0311 
0312 if isfield(Query,'Diff'),
0313   [s,t] = size(Query.Diff);
0314   if ~strcmp(class(Query.Diff),'char'),
0315     for i=1:s,
0316       for j=1:t,
0317         if ~isempty(Query.Diff{i,j}),
0318           [md,MD,sign] = xGetDiffSpec(Query.Diff{i,j});
0319           Query.MaxDiff(i,j) = MD;
0320           Query.MinDiff(i,j) = md;
0321           Query.DifferenceSign(i,j) = sign;
0322         end
0323       end
0324     end
0325   end
0326 end
0327 
0328 % --------- implement maxdiff screening and set difference signs
0329 
0330 if isfield(Query,'MaxDiff'),               % matrix of max differences
0331   D  = Inf*ones(Query.NumNT,Query.NumNT);
0332   DS = zeros(Query.NumNT,Query.NumNT);
0333 
0334   [s,t] = size(Query.MaxDiff);
0335 
0336     for i=1:s,
0337       for j=1:t,
0338         D(i,j) = Query.MaxDiff(i,j);       % read in values set
0339         if D(i,j) == 0,                    % if no value set, use infinity
0340           D(i,j) = Inf;
0341         end
0342         DS(i,j) = Query.DifferenceSign(i,j);
0343      end
0344     end
0345 
0346     for i=1:Query.NumNT,
0347       D(i,i) = 0;
0348       for j=1:Query.NumNT,
0349         D(i,j) = min(D(i,j),D(j,i));       % symmetrize entries in D
0350         D(j,i) = D(i,j);
0351 
0352         if DS(i,j) ~= 0,
0353           DS(j,i) = -DS(i,j);
0354         end
0355       end
0356     end
0357 
0358    for m=1:Query.NumNT,                    % just in case
0359     for i=1:Query.NumNT,
0360       for j=1:Query.NumNT,
0361         for k=1:Query.NumNT,
0362           D(i,j) = min(D(i,j),D(i,k)+D(k,j)); % infer shorter maxdiff's
0363         end
0364       end
0365     end
0366    end
0367 
0368    Query.MaxDiffMat = D;
0369    Query.DifferenceSignMat = DS;
0370 end
0371 
0372 % --------- implement mindiff screening
0373 
0374 if isfield(Query,'MinDiff'),
0375   D = zeros(Query.NumNT,Query.NumNT);       % minimum distance 1
0376 
0377   [s,t] = size(Query.MinDiff);
0378 
0379     for i=1:s,
0380       for j=1:t,
0381         D(i,j) = Query.MinDiff(i,j);       % read in values set
0382         if D(i,j) == 0,                    % if no value set, use 1
0383           D(i,j) = 1;
0384         end
0385      end
0386     end
0387 
0388     for i=1:Query.NumNT,
0389       D(i,i) = 0;
0390       for j=1:Query.NumNT,
0391         D(i,j) = max(D(i,j),D(j,i));       % symmetrize entries in D
0392         D(j,i) = D(i,j);
0393       end
0394     end
0395 
0396     Query.MinDiffMat = D;
0397 end
0398 
0399 % --------- implement maxdiff screening for a maxdiff vector
0400 
0401 if isfield(Query,'MaxDiffVector'),
0402   D = Inf*ones(Query.NumNT,Query.NumNT);
0403 
0404   [s,t] = size(Query.MaxDiffVector);
0405 
0406   if min(s,t) == 1,                        % vector of max differences
0407 
0408     for i=1:(Query.NumNT-1),
0409       D(i,i+1) = Query.MaxDiffVector(i);   % put values in a matrix
0410     end
0411 
0412     for d=3:Query.NumNT,                   % diagonal to consider
0413       for i=1:(Query.NumNT-d+1),
0414         j = i + d - 1;
0415         D(i,j) = D(i,j-1) + D(i+1,j);      % infer max differences
0416       end
0417     end
0418 
0419     Query.MaxDiffMat = triu(D) + triu(D)';    % make symmetric
0420 
0421     if isfield(Query,'Filename'),
0422       [y,i] = sort(Query.Indices);         % re-order for nucleotide order
0423     else
0424       i = 1:Query.NumNT;
0425     end
0426 
0427     k(i) = 1:Query.NumNT;
0428 
0429     Query.MaxDiffMat = Query.MaxDiffMat(k,k); % re-order matrix of differences
0430   end    
0431 end
0432 
0433 % --------- implement mindiff screening for a mindiff vector
0434 
0435 if isfield(Query,'MinDiffVector'),
0436   D = zeros(Query.NumNT,Query.NumNT);       % minimum distance 1
0437 
0438   [s,t] = size(Query.MinDiffVector);
0439 
0440   if min(s,t) == 1,                         % vector of min differences
0441 
0442     for i=1:(Query.NumNT-1),
0443       D(i,i+1) = Query.MinDiffVector(i);    % put values in a matrix
0444     end
0445 
0446     Query.MinDiffMat = D + D';              % make symmetric
0447 
0448     if isfield(Query,'Filename'),
0449       [y,i] = sort(Query.Indices);          % re-order for nucleotide order
0450     else
0451       i = 1:Query.NumNT;
0452     end
0453 
0454     k(i) = 1:Query.NumNT;
0455 
0456     Query.MinDiffMat = Query.MinDiffMat(k,k); % re-order matrix of differences
0457   end    
0458 end
0459 
0460 end  % if isfield(Query,NumNT)

Generated on Fri 03-Apr-2009 09:52:35 by m2html © 2003