Home > FR3DSource > zGetNTData.m

zGetNTData

PURPOSE ^

zGetNTData(Filenames,ReadCode,Verbose) reads data files, depending on ReadCode

SYNOPSIS ^

function [Files] = zGetNTData(Filenames,ReadCode,Verbose)

DESCRIPTION ^

 zGetNTData(Filenames,ReadCode,Verbose) reads data files, depending on ReadCode

 If ReadCode = 0, it looks for Filename.mat and reads it if it exists.
 If ReadCode = 1, it looks for Filename.mat, reads it, and re-does 
    the classification of interacting pairs
 If ReadCode = 2, it looks for Filename.mat, reads it 
 If ReadCode = 3, it looks for Filename.mat, reads it, re-does the
    classification of pairs
 If ReadCode = 4, it reads Filename.pdb, analyzes each nucleotide, 
    and classifies interacting pairs

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 % zGetNTData(Filenames,ReadCode,Verbose) reads data files, depending on ReadCode
0002 %
0003 % If ReadCode = 0, it looks for Filename.mat and reads it if it exists.
0004 % If ReadCode = 1, it looks for Filename.mat, reads it, and re-does
0005 %    the classification of interacting pairs
0006 % If ReadCode = 2, it looks for Filename.mat, reads it
0007 % If ReadCode = 3, it looks for Filename.mat, reads it, re-does the
0008 %    classification of pairs
0009 % If ReadCode = 4, it reads Filename.pdb, analyzes each nucleotide,
0010 %    and classifies interacting pairs
0011 
0012 function [Files] = zGetNTData(Filenames,ReadCode,Verbose)
0013 
0014 [CL, CurrentVersion] = zClassLimits;  % read current version number
0015 
0016 if nargin < 2,
0017   ReadCode = 0;
0018 end
0019 
0020 if nargin < 3,
0021   Verbose = 0;
0022 end
0023 
0024 % path(path,pwd);
0025 
0026 if strcmp(class(Filenames),'char'),
0027   Filenames = {Filenames};
0028 end
0029 
0030 for f=1:length(Filenames),
0031   Filename = Filenames{f};
0032 
0033   if isempty(strfind(lower(Filename),'.pdb')),
0034     PDBFilename  = [Filename '.pdb'];
0035   else
0036     PDBFilename  = Filename;
0037     i = strfind(Filename,'.');
0038     Filename = Filename(1:(i(end)-1));
0039     ReadCode = 4;
0040   end
0041 
0042   FILENAME = upper(Filename);
0043   filename = lower(Filename);
0044 
0045   ClassifyCode = 0;
0046   SaveCode     = 0;
0047   ReadPDB      = 0;
0048 
0049   if (ReadCode > 3),
0050     ReadFull = 1;
0051   else
0052     ReadFull = 0;
0053   end
0054 
0055   clear File
0056 
0057   if ReadCode == 4,                     % re-read the PDB file
0058     File = zReadandAnalyze(PDBFilename,Verbose);   % might not work on a Mac
0059     ClassifyCode = 1;
0060     ReadFull = 0;                       % no need to read PDB file again
0061     ReadPDB  = 1;
0062   else                                  % try to load a precomputed version
0063     if (exist(strcat(Filename,'.mat'),'file') > 0),
0064       load(strcat(Filename,'.mat'),'File','-mat');
0065       if Verbose > 0,
0066         fprintf('Loaded %s\n', [Filename '.mat']);
0067       end
0068     elseif (exist(strcat(FILENAME,'.MAT'),'file') > 0),  % helps on a Mac
0069       load(strcat(FILENAME,'.MAT'),'File','-mat');
0070       if Verbose > 0,
0071         fprintf('Loaded  %s\n', [FILENAME '.MAT']);
0072       end
0073     elseif (exist(strcat(filename,'.mat'),'file') > 0),  % helps on a Mac
0074       load(strcat(filename,'.mat'),'File','-mat');
0075       if Verbose > 0,
0076         fprintf('Loaded   %s\n', [filename '.MAT']);
0077       end
0078     else
0079       ReadFull = 1;
0080     end
0081   end
0082 
0083   if (ReadFull == 1),                     % try to load the full version
0084     File = zReadandAnalyze(PDBFilename,Verbose);
0085     ClassifyCode = 1;
0086     ReadPDB      = 1;
0087   end
0088 
0089   if ~exist('File'),
0090     File = [];
0091   end
0092 
0093   if ~isfield(File,'ClassVersion'),
0094     File.ClassVersion = 0;
0095   end
0096 
0097   if ~isfield(File,'BasePhosphate'),
0098     File.BasePhosphate = sparse(zeros(length(File.NT)));
0099   end
0100 
0101   if isempty(File.BasePhosphate),
0102     File.BasePhosphate = sparse(zeros(length(File.NT)));
0103   end
0104 
0105   if ~isfield(File,'Covalent'),
0106     if length(File.NT) > 1,
0107       File = zBackboneContinuity(File);
0108     else
0109       File.Covalent = sparse(zeros(length(File.NT)));
0110     end
0111   end
0112 
0113   if isfield(File,'Inter'),
0114     File = rmfield(File,'Inter');
0115   end
0116 
0117   if isfield(File,'SizeCode'),
0118     File = rmfield(File,'SizeCode');
0119   end
0120 
0121   if isfield(File,'Pair'),
0122     File = rmfield(File,'Pair');
0123   end
0124 
0125   if isfield(File,'Header'),
0126     File = rmfield(File,'Header');
0127   end
0128 
0129   Overlap = 0;
0130 
0131   File.Distance = [];                        % only compute this if needed
0132 
0133   if length(File.NT) > 1,                    % if it has nucleotides,
0134     if length(File.NT(1).Sugar(:,1)) < 13,
0135       File = zStoreO3(File);
0136     end
0137 
0138     if (ReadCode == 1) | (ReadCode == 3) | (ReadCode == 4) | ... 
0139       (ClassifyCode == 1) | (File.ClassVersion < CurrentVersion),
0140 
0141       File.Edge = sparse(File.NumNT,File.NumNT);
0142       File.Coplanar = sparse(File.NumNT,File.NumNT);
0143 
0144       c = cat(1,File.NT(1:File.NumNT).Center); % nucleotide centers
0145 
0146       File.Distance = zMutualDistance(c,16); % compute distances < 16 Angstroms
0147       d = sort(nonzeros(File.Distance));
0148 
0149       if isempty(d),
0150        % more than one nucleotide, but too far apart to pair
0151        File.Range = sparse(File.NumNT,File.NumNT);
0152        File.BasePhosphate = sparse(File.NumNT,File.NumNT);
0153       else
0154        if d(min(10,length(d))) < 1,
0155          fprintf('%s has overlapping nucleotides and should be avoided\n',File.Filename);
0156 
0157          Overlap = 1;
0158        else
0159          t = cputime;
0160          File = zClassifyPairs(File,Verbose);
0161 
0162          if Verbose > 0,
0163            fprintf(' Base-phosphate interactions ...');
0164          end
0165 
0166          File = zPhosphateInteractions(File);
0167          File.ClassVersion = CurrentVersion;
0168          ClassifyCode = 1;
0169 
0170          if Verbose > 0,
0171            fprintf(' Interaction range ... ');
0172          end
0173 
0174          File = zInteractionRange(File,Verbose);
0175 
0176          if Verbose > 0,
0177            fprintf('\nClassification took %4.2f minutes\n', (cputime-t)/60);
0178          end
0179        end
0180       end
0181     end
0182   else
0183     File.ClassVersion = CurrentVersion;
0184   end
0185 
0186   if length(File.NT) > 0,
0187    if ~isfield(File.NT(1),'Syn'),
0188     SynList = mSynList(File);
0189     for k=1:length(File.NT),
0190       File.NT(k).Syn = SynList(k);
0191     end
0192     SaveCode = 1;
0193    end
0194   end
0195 
0196   if ~isfield(File,'PDBFilename'),
0197     File.PDBFilename = [File.Filename '.pdb'];   % will self-correct the
0198                                                  % next time PDB is read
0199   end
0200 
0201   if ~isfield(File,'Backbone'),
0202     File = zBackboneConformation(File,Verbose);
0203     if File.NumNT > 1,
0204       File.Backbone(1,1) = 1;                    % register that we checked
0205     end
0206     SaveCode = 1;
0207   end
0208 
0209   if File.NumNT > 1 && sum(sum(File.Backbone)) == 0,
0210     File = zBackboneConformation(File,Verbose);
0211     SaveCode = 1;
0212   end
0213 
0214   if ~isfield(File,'Range') || ~isfield(File,'Crossing'),
0215     File = zInteractionRange(File,Verbose);
0216     ClassifyCode = 1;
0217   end
0218 
0219   if ~isfield(File,'Flank'),
0220     File = xFlankingPairs(File);
0221   end
0222 
0223   if ~isfield(File,'Info'),
0224     File = zGetPDBInfo(File);          % get resolution and other info
0225     SaveCode = 1;
0226   else
0227     if isempty(File.Info.Descriptor),
0228       File = zGetPDBInfo(File);          % look for file information
0229       if ~isempty(File.Info.Descriptor),
0230         SaveCode = 1;
0231       end
0232     end
0233   end
0234 
0235   % ----------------- If it just read the .pdb file and no pairs, look at BUC
0236 
0237   if (ReadPDB == 1) && (isempty(strfind(PDBFilename,'.pdb1'))),
0238     E = abs(File.Edge);                       % all interactions
0239     bp = full(sum(sum((E > 0) .* (E < 15))));       % number of basepairs
0240     r  = bp / length(File.NT);                % ratio of bp to nt
0241     if (r < 0.4),                             % very few basepairs found
0242       if Verbose > 0,
0243         fprintf('Few basepairs found (ratio %7.2f), reading the biological unit coordinates\n',r);
0244       end
0245       File1 = zGetNTData([PDBFilename '1'],4,1);   % read biological unit coords
0246       if length(File1.NT) > 0,
0247         E1  = abs(File1.Edge);
0248         bp1 = full(sum(sum((E > 0) .* (E < 15))));       % number of basepairs
0249         r1  = bp / length(File1.NT);               % ratio of bp to nt
0250         if Verbose > 0,
0251           fprintf('Biological unit coordinates have ratio %7.2f\n',r1);
0252         end
0253         if r1 > r,
0254           File = File1;                         % use biological unit coords
0255         end
0256       end
0257     end
0258   end
0259     
0260   File = orderfields(File);
0261 
0262   Saved = 0;
0263 
0264   if (ReadCode > 0) || (ClassifyCode > 0) || (SaveCode > 0),
0265     zSaveNTData(File,Verbose);
0266     Saved = 1;
0267   end
0268 
0269   Files(f) = File;
0270 
0271 end

Generated on Fri 03-Apr-2009 09:52:35 by m2html © 2003