Home > FR3DSource > zReadandAnalyze.m

zReadandAnalyze

PURPOSE ^

zReadandAnalyze.m read a _mod.pdb file, then

SYNOPSIS ^

function [File] = zReadandAnalyze(PDBFilename,Verbose)

DESCRIPTION ^

 zReadandAnalyze.m read a _mod.pdb file, then
 pulls out the locations of base atoms,
 and computes the best shift and rotation from a standard base

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 % zReadandAnalyze.m read a _mod.pdb file, then
0002 % pulls out the locations of base atoms,
0003 % and computes the best shift and rotation from a standard base
0004 
0005 % It returns a variable File with fields:
0006 %
0007 % NT        array of nucleotide structures
0008 % NumNT     the total number of distinct nucleotides found
0009 % Distance  sparse matrix of center-center distances, if less than 15
0010 
0011 % Each nucleotide has these fields:
0012 %  Base     Nucleotide letter A, C, G, U
0013 %  Code     Numeric code for nucleotide letter, A=1, C=2, G=3, U=4
0014 %  Chain    Which chain of the molecule the base is located in
0015 %  Number   Cell array of nucleotide number within the molecule
0016 %  Loc      Observed locations of base atoms
0017 %  Center   Geometric center of observed base atoms
0018 %  Sugar    Observed locations of sugar atoms
0019 %  Rot      3x3 matrix giving rotation from standard to observed locations
0020 %  Scale    Best scaling from standard to observed locations
0021 %  Shift    Best shift from standard to observed locations
0022 %  Fit      Best fit of observed locations to standard base locations
0023 
0024 function [File] = zReadandAnalyze(PDBFilename,Verbose)
0025 
0026 i = strfind(PDBFilename,'.');
0027 Filename = PDBFilename(1:(i(end)-1));
0028 
0029 if nargin < 2,
0030   Verbose = 1;
0031 end
0032 
0033 % read PDBFilename ----------------------------------------------------
0034 
0035 if exist([pwd filesep 'PDBFiles' filesep 'Trouble reading']) == 7,
0036   if exist([pwd filesep 'PDBFiles' filesep 'Trouble reading' filesep PDBFilename]) == 2,
0037     Stop = 1;
0038     if Verbose > 0,
0039       fprintf('zReadandAnalyze is skipping %s because it appeared in FR3D/PDBFiles/Trouble Reading\n', PDBFilename);
0040     end
0041   else
0042     Stop = 0;
0043   end
0044 else
0045   Stop = 0;
0046 end
0047 
0048 fid = fopen(PDBFilename,'r');
0049 
0050 if (fid > 0) && (Stop == 0),
0051 
0052 fclose(fid);
0053 
0054 a = 1000+round(8900*rand);
0055 
0056 TempFileName = ['##TempPDB' num2str(a) '.pdb'];
0057 
0058 Header = zExtractAtomsPDB(PDBFilename,TempFileName,Verbose);
0059 
0060 [ATOM_TYPE, ATOMNUMBER, ATOMNAME, VERSION, NTLETTER, CHAIN, NTNUMBER, P, Readable] = zReadPDBTextRead(TempFileName);
0061 
0062 delete(TempFileName);
0063 
0064 if Readable == 0,
0065   fprintf('zReadandAnalyze was unable to read the PDB file %s\n',PDBFilename);
0066   fprintf('Please move it to FR3D/PDBFiles/Trouble Reading.\n');
0067 end
0068 
0069 % Move models in NMR file apart ---------------------------------------------
0070 
0071 if isfield(Header,'Expdata') & (length(Header.ModelStart) > 1)
0072   if ~isempty(strfind(Header.Expdata,'NMR')),
0073     Header.ModelStart = [Header.ModelStart length(NTNUMBER)+1];
0074     for m = 1:(length(Header.ModelStart)-1),
0075       a = Header.ModelStart(m);
0076       b = Header.ModelStart(m+1)-1;
0077       P(a:b,1) = P(a:b,1) + (m-1)*1000;     % shift by 1000 for each model
0078     end
0079   end
0080 end
0081 
0082 % Read standard bases from Sponer QM calculations --------------------------
0083 
0084 zStandardBases                % read in QM locations of atoms in 4 bases
0085 
0086 Lim(1,:) = [10 8 11 8];       % number of base atoms, excluding hydrogen
0087 Lim(2,:) = [15 13 16 12];     % total number of atoms, including hydrogen
0088 
0089 % Extract locations of base and sugar atoms ---------------------------------
0090 
0091 NT = [];                                    % nucleotide data structure
0092 n  = 1;                                     % current NT index
0093 i  = 1;                                     % current atom/row number
0094 unrec = 0;                                  % count unrecognized nucleotides
0095 
0096 while i < length(NTNUMBER),                 % go through all atoms
0097   Flag = 0;                                 % not a recognized nucleotide
0098   j = [];                                   % initialize rows of next nucleo.
0099   ntnum = NTNUMBER{i};                      % nucleotide number from pdb file
0100 
0101   while (i <= length(NTNUMBER)) & (strcmp(NTNUMBER{i},ntnum)), 
0102                                             % pull out rows for this nucleotide
0103                                             % assumes they are continguous
0104     j = [j; i];                             % add rows for this nucleotide
0105     i = i + 1;                              % go to the next row
0106   end
0107 
0108   NT(n).Base    = NTLETTER{j(1)};         % letter for this nucleotide
0109   NT(n).Chain   = CHAIN{j(1)};            % what chain nucleotide is in
0110   NT(n).Number  = NTNUMBER{j(1)};         % nucleotide number for this molecule
0111 
0112   Sugar = Inf * ones(12,3);
0113   Loc   = Inf * ones(11,3);
0114 
0115   if (NT(n).Base == 'A') | (NT(n).Base == 'A'),
0116     for k = min(j):max(j),
0117      if (VERSION{k}=='A') | (VERSION{k}==' '),
0118       switch ATOMNAME{k},
0119         case 'N9',      Loc( 1,:) = P(k,:);
0120         case 'C4',      Loc( 2,:) = P(k,:);
0121         case 'N3',      Loc( 3,:) = P(k,:);
0122         case 'N1',      Loc( 4,:) = P(k,:);
0123         case 'C6',      Loc( 5,:) = P(k,:);
0124         case 'N6',      Loc( 6,:) = P(k,:);
0125         case 'C8',      Loc( 7,:) = P(k,:);
0126         case 'C5',      Loc( 8,:) = P(k,:);
0127         case 'C2',      Loc( 9,:) = P(k,:);
0128         case 'N7',      Loc(10,:) = P(k,:);
0129         case {'C1*','C1'''},     Sugar( 1,:) = P(k,:);
0130         case {'C2*','C2'''},     Sugar( 2,:) = P(k,:);
0131         case {'O2*','O2'''},     Sugar( 3,:) = P(k,:);
0132         case {'C3*','C3'''},     Sugar( 4,:) = P(k,:);
0133         case {'O3*','O3'''},     Sugar( 5,:) = P(k,:);
0134         case {'C4*','C4'''},     Sugar( 6,:) = P(k,:);
0135         case {'O4*','O4'''},     Sugar( 7,:) = P(k,:);
0136         case {'C5*','C5'''},     Sugar( 8,:) = P(k,:);
0137         case {'O5*','O5'''},     Sugar( 9,:) = P(k,:);
0138         case 'P',       Sugar(10,:) = P(k,:);
0139         case {'O1P','OP1'},     Sugar(11,:) = P(k,:);
0140         case {'O2P','OP2'},     Sugar(12,:) = P(k,:);
0141       end
0142      end
0143     end
0144     Loc(11,:)    = zeros(1,3);
0145     NT(n).Loc    = Loc;
0146     NT(n).Sugar  = Sugar;
0147     NT(n).Center = mean(Loc(1:10,:));          % mean of heavy base atoms
0148     NT(n).Code   = 1;                          % A is 1, C is 2, etc.
0149   elseif (strcmp(NT(n).Base,'C')) | (strcmp(NT(n).Base,'C+')),
0150     for k = min(j):max(j),
0151      if (VERSION{k}=='A') | (VERSION{k}==' '),
0152       switch ATOMNAME{k},
0153         case 'N1',      Loc( 1,:) = P(k,:);
0154         case 'C2',      Loc( 2,:) = P(k,:);
0155         case 'O2',      Loc( 3,:) = P(k,:);
0156         case 'N3',      Loc( 4,:) = P(k,:);
0157         case 'C4',      Loc( 5,:) = P(k,:);
0158         case 'N4',      Loc( 6,:) = P(k,:);
0159         case 'C6',      Loc( 7,:) = P(k,:);
0160         case 'C5',      Loc( 8,:) = P(k,:);
0161         case {'C1*','C1'''},     Sugar( 1,:) = P(k,:);
0162         case {'C2*','C2'''},     Sugar( 2,:) = P(k,:);
0163         case {'O2*','O2'''},     Sugar( 3,:) = P(k,:);
0164         case {'C3*','C3'''},     Sugar( 4,:) = P(k,:);
0165         case {'O3*','O3'''},     Sugar( 5,:) = P(k,:);
0166         case {'C4*','C4'''},     Sugar( 6,:) = P(k,:);
0167         case {'O4*','O4'''},     Sugar( 7,:) = P(k,:);
0168         case {'C5*','C5'''},     Sugar( 8,:) = P(k,:);
0169         case {'O5*','O5'''},     Sugar( 9,:) = P(k,:);
0170         case 'P',       Sugar(10,:) = P(k,:);
0171         case {'O1P','OP1'},     Sugar(11,:) = P(k,:);
0172         case {'O2P','OP2'},     Sugar(12,:) = P(k,:);
0173       end
0174      end
0175     end
0176     Loc( 9,:) = zeros(1,3);
0177     Loc(10,:) = zeros(1,3);
0178     Loc(11,:) = zeros(1,3);
0179     NT(n).Loc    = Loc;
0180     NT(n).Sugar  = Sugar;
0181     NT(n).Center = mean(Loc(1:8,:));
0182     NT(n).Code   = 2;                          % A is 1, C is 2, etc.
0183   elseif (NT(n).Base == 'G') | (NT(n).Base == 'G'),
0184     for k = min(j):max(j),
0185      if (VERSION{k}=='A') | (VERSION{k}==' '),
0186       switch ATOMNAME{k},
0187         case 'N9',      Loc( 1,:) = P(k,:);
0188         case 'C4',      Loc( 2,:) = P(k,:);
0189         case 'N3',      Loc( 3,:) = P(k,:);
0190         case 'N1',      Loc( 4,:) = P(k,:);
0191         case 'C6',      Loc( 5,:) = P(k,:);
0192         case 'O6',      Loc( 6,:) = P(k,:);
0193         case 'C8',      Loc( 7,:) = P(k,:);
0194         case 'C5',      Loc( 8,:) = P(k,:);
0195         case 'C2',      Loc( 9,:) = P(k,:);
0196         case 'N7',      Loc(10,:) = P(k,:);
0197         case 'N2',      Loc(11,:) = P(k,:);
0198         case {'C1*','C1'''},     Sugar( 1,:) = P(k,:);
0199         case {'C2*','C2'''},     Sugar( 2,:) = P(k,:);
0200         case {'O2*','O2'''},     Sugar( 3,:) = P(k,:);
0201         case {'C3*','C3'''},     Sugar( 4,:) = P(k,:);
0202         case {'O3*','O3'''},     Sugar( 5,:) = P(k,:);
0203         case {'C4*','C4'''},     Sugar( 6,:) = P(k,:);
0204         case {'O4*','O4'''},     Sugar( 7,:) = P(k,:);
0205         case {'C5*','C5'''},     Sugar( 8,:) = P(k,:);
0206         case {'O5*','O5'''},     Sugar( 9,:) = P(k,:);
0207         case 'P',       Sugar(10,:) = P(k,:);
0208         case {'O1P','OP1'},     Sugar(11,:) = P(k,:);
0209         case {'O2P','OP2'},     Sugar(12,:) = P(k,:);
0210       end
0211      end
0212     end
0213     NT(n).Loc    = Loc;
0214     NT(n).Sugar  = Sugar;
0215     NT(n).Center = mean(Loc(1:11,:));
0216     NT(n).Code   = 3;                          % A is 1, C is 2, etc.
0217   elseif (NT(n).Base == 'U') | (strcmp(NT(n).Base,'+U')),
0218     for k = min(j):max(j),
0219      if (VERSION{k}=='A') | (VERSION{k}==' '),
0220       switch ATOMNAME{k},
0221         case 'N1',      Loc( 1,:) = P(k,:);
0222         case 'C2',      Loc( 2,:) = P(k,:);
0223         case 'O2',      Loc( 3,:) = P(k,:);
0224         case 'N3',      Loc( 4,:) = P(k,:);
0225         case 'C4',      Loc( 5,:) = P(k,:);
0226         case 'O4',      Loc( 6,:) = P(k,:);
0227         case 'C6',      Loc( 7,:) = P(k,:);
0228         case 'C5',      Loc( 8,:) = P(k,:);
0229         case {'C1*','C1'''},     Sugar( 1,:) = P(k,:);
0230         case {'C2*','C2'''},     Sugar( 2,:) = P(k,:);
0231         case {'O2*','O2'''},     Sugar( 3,:) = P(k,:);
0232         case {'C3*','C3'''},     Sugar( 4,:) = P(k,:);
0233         case {'O3*','O3'''},     Sugar( 5,:) = P(k,:);
0234         case {'C4*','C4'''},     Sugar( 6,:) = P(k,:);
0235         case {'O4*','O4'''},     Sugar( 7,:) = P(k,:);
0236         case {'C5*','C5'''},     Sugar( 8,:) = P(k,:);
0237         case {'O5*','O5'''},     Sugar( 9,:) = P(k,:);
0238         case 'P',       Sugar(10,:) = P(k,:);
0239         case {'O1P','OP1'},     Sugar(11,:) = P(k,:);
0240         case {'O2P','OP2'},     Sugar(12,:) = P(k,:);
0241       end
0242      end
0243     end
0244     Loc( 9,:) = zeros(1,3);
0245     Loc(10,:) = zeros(1,3);
0246     Loc(11,:) = zeros(1,3);
0247     NT(n).Loc    = Loc;
0248     NT(n).Sugar  = Sugar;
0249     NT(n).Center = mean(Loc(1:8,:));
0250     NT(n).Code   = 4;                          % A is 1, C is 2, etc.
0251   else 
0252     if unrec < 5,
0253       if Verbose > 0,
0254         fprintf('Unrecognized: %s %s\n', NT(n).Base,NT(n).Number);
0255       end
0256       unrec = unrec + 1;
0257     end
0258     n = n - 1;                                 % not a recognized
0259                                                % nucleotide, do nothing
0260     Flag = 1;
0261   end                                         % end four if statements
0262 
0263   if (Flag < 1),
0264     if (max(max(Loc)) == Inf),                 % base atoms missing
0265       if Verbose > 0,
0266         NumGood = length(find((Loc(1,:) < Inf) .* (abs(Loc(1,:)) > 0)));
0267         fprintf('Base %s%s has %d atoms, so it will be skipped\n',NT(n).Base,NT(n).Number,NumGood);
0268       end
0269       n = n - 1;
0270     elseif (max(max(Sugar)) == Inf),          % sugar atom missing
0271       for k = 1:12,
0272         if Sugar(k,1) == Inf,
0273           Sugar(k,:) = Loc(1,:);              % use glycosidic atom
0274         end
0275       end
0276       NT(n).Sugar = Sugar;
0277     end
0278   end
0279 
0280   n = n + 1;                                  % next nucleotide index
0281 
0282 end                                           % end while i < ... loop
0283 
0284 NumNT = n - 1;                                % total number of nucleotides
0285 
0286 % Compute best shift and rotation matrices ---------------------------------
0287 
0288 warning off MATLAB:divideByZero
0289 
0290 for n=1:NumNT,                                   % analyze all nucleotides
0291   C  = NT(n).Code;                               % A is 1, C is 2, ...
0292   L  = Lim(1,C);                                 % number of atoms in base
0293   X  = StandardLoc(1:L,:,C);                     % ideal base atom locations
0294   Y  = NT(n).Loc(1:L,:);                         % observed atom locations
0295 
0296   [r, sc, sh] = zBestTransformation(X,Y);        % find best rotation, shift
0297 
0298   L2 = Lim(2,C);                                 % num of atoms and hyd in base
0299   X2 = StandardLoc(1:L2,:,C);                    % ideal base & hyd locations
0300   F  = (sh*ones(1,L2) + r*X2')';                 % best fit without scaling
0301   e  = sqrt(sum(sum((Y - F(1:L,:)).^2)))/L;      % error measure;
0302                                                  % should be between 0 and 10
0303   if (e > 0.1) && Verbose > 0,
0304     fprintf('Nucleotide %c%s has average fitting error %6.4f Angstroms\n', NT(n).Base, NT(n).Number, e);
0305   end
0306 
0307   NT(n).Rot         = r;                         % save the rotation matrix
0308   NT(n).Fit(1:L2,:) = F;                         % fitted locations of base, H
0309   NT(n).Syn         = 0;
0310 end
0311 
0312 % Fill in fields of File ----------------------------------------------------
0313 
0314 File.PDBFilename = PDBFilename;
0315 File.Filename  = Filename;
0316 File.NT        = NT(1:NumNT);
0317 File.NumNT     = NumNT;
0318 File.Distance  = [];
0319 File.HandClass = [];
0320 File.Comment   = [];
0321 File.CI        = sparse(NumNT,NumNT);
0322 File.Edge      = sparse(NumNT,NumNT);
0323 File.Modified  = 1;
0324 File.Header    = Header;
0325 File.BasePhosphate = [];
0326 
0327 % Calculate configuration (syn or anti) -------------------------------------
0328 
0329 if length(File.NT) > 0,
0330   SynList = mSynList(File);
0331 
0332   j = find(SynList);
0333 
0334   for k=1:length(j),
0335     File.NT(j(k)).Syn = 1;
0336   end
0337 end
0338 
0339 else
0340 
0341   fprintf('Could not open file %s\n', PDBFilename);
0342   NumNT = 0;
0343   File.Filename = Filename;
0344   File.NT        = [];
0345   File.NumNT     = -1;
0346   File.Distance  = [];
0347   File.HandClass = [];
0348   File.Comment   = [];
0349   File.CI        = sparse(NumNT,NumNT);
0350   File.Edge      = sparse(NumNT,NumNT);
0351   File.Coplanar  = sparse(NumNT,NumNT);
0352   File.Modified  = 0;
0353   File.Pair      = [];
0354   File.ClassVersion = 0;
0355   File.Header    = [];
0356   File.Info.Resolution  = [];
0357   File.Info.Descriptor  = '';
0358   File.Info.ExpTechnique= '';
0359   File.Info.ReleaseDate = '';
0360   File.Info.Author      = '';
0361   File.Info.Keywords    = '';
0362   File.Info.Source      = '';
0363   File.BasePhosphate = sparse(NumNT,NumNT);
0364 
0365 end
0366 
0367 File = orderfields(File);
0368 
0369 end

Generated on Fri 03-Apr-2009 09:52:35 by m2html © 2003