0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 function [File] = zReadandAnalyze(PDBFilename,Verbose)
0025
0026 i = strfind(PDBFilename,'.');
0027 Filename = PDBFilename(1:(i(end)-1));
0028
0029 if nargin < 2,
0030 Verbose = 1;
0031 end
0032
0033
0034
0035 if exist([pwd filesep 'PDBFiles' filesep 'Trouble reading']) == 7,
0036 if exist([pwd filesep 'PDBFiles' filesep 'Trouble reading' filesep PDBFilename]) == 2,
0037 Stop = 1;
0038 if Verbose > 0,
0039 fprintf('zReadandAnalyze is skipping %s because it appeared in FR3D/PDBFiles/Trouble Reading\n', PDBFilename);
0040 end
0041 else
0042 Stop = 0;
0043 end
0044 else
0045 Stop = 0;
0046 end
0047
0048 fid = fopen(PDBFilename,'r');
0049
0050 if (fid > 0) && (Stop == 0),
0051
0052 fclose(fid);
0053
0054 a = 1000+round(8900*rand);
0055
0056 TempFileName = ['##TempPDB' num2str(a) '.pdb'];
0057
0058 Header = zExtractAtomsPDB(PDBFilename,TempFileName,Verbose);
0059
0060 [ATOM_TYPE, ATOMNUMBER, ATOMNAME, VERSION, NTLETTER, CHAIN, NTNUMBER, P, Readable] = zReadPDBTextRead(TempFileName);
0061
0062 delete(TempFileName);
0063
0064 if Readable == 0,
0065 fprintf('zReadandAnalyze was unable to read the PDB file %s\n',PDBFilename);
0066 fprintf('Please move it to FR3D/PDBFiles/Trouble Reading.\n');
0067 end
0068
0069
0070
0071 if isfield(Header,'Expdata') & (length(Header.ModelStart) > 1)
0072 if ~isempty(strfind(Header.Expdata,'NMR')),
0073 Header.ModelStart = [Header.ModelStart length(NTNUMBER)+1];
0074 for m = 1:(length(Header.ModelStart)-1),
0075 a = Header.ModelStart(m);
0076 b = Header.ModelStart(m+1)-1;
0077 P(a:b,1) = P(a:b,1) + (m-1)*1000;
0078 end
0079 end
0080 end
0081
0082
0083
0084 zStandardBases
0085
0086 Lim(1,:) = [10 8 11 8];
0087 Lim(2,:) = [15 13 16 12];
0088
0089
0090
0091 NT = [];
0092 n = 1;
0093 i = 1;
0094 unrec = 0;
0095
0096 while i < length(NTNUMBER),
0097 Flag = 0;
0098 j = [];
0099 ntnum = NTNUMBER{i};
0100
0101 while (i <= length(NTNUMBER)) & (strcmp(NTNUMBER{i},ntnum)),
0102
0103
0104 j = [j; i];
0105 i = i + 1;
0106 end
0107
0108 NT(n).Base = NTLETTER{j(1)};
0109 NT(n).Chain = CHAIN{j(1)};
0110 NT(n).Number = NTNUMBER{j(1)};
0111
0112 Sugar = Inf * ones(12,3);
0113 Loc = Inf * ones(11,3);
0114
0115 if (NT(n).Base == 'A') | (NT(n).Base == 'A'),
0116 for k = min(j):max(j),
0117 if (VERSION{k}=='A') | (VERSION{k}==' '),
0118 switch ATOMNAME{k},
0119 case 'N9', Loc( 1,:) = P(k,:);
0120 case 'C4', Loc( 2,:) = P(k,:);
0121 case 'N3', Loc( 3,:) = P(k,:);
0122 case 'N1', Loc( 4,:) = P(k,:);
0123 case 'C6', Loc( 5,:) = P(k,:);
0124 case 'N6', Loc( 6,:) = P(k,:);
0125 case 'C8', Loc( 7,:) = P(k,:);
0126 case 'C5', Loc( 8,:) = P(k,:);
0127 case 'C2', Loc( 9,:) = P(k,:);
0128 case 'N7', Loc(10,:) = P(k,:);
0129 case {'C1*','C1'''}, Sugar( 1,:) = P(k,:);
0130 case {'C2*','C2'''}, Sugar( 2,:) = P(k,:);
0131 case {'O2*','O2'''}, Sugar( 3,:) = P(k,:);
0132 case {'C3*','C3'''}, Sugar( 4,:) = P(k,:);
0133 case {'O3*','O3'''}, Sugar( 5,:) = P(k,:);
0134 case {'C4*','C4'''}, Sugar( 6,:) = P(k,:);
0135 case {'O4*','O4'''}, Sugar( 7,:) = P(k,:);
0136 case {'C5*','C5'''}, Sugar( 8,:) = P(k,:);
0137 case {'O5*','O5'''}, Sugar( 9,:) = P(k,:);
0138 case 'P', Sugar(10,:) = P(k,:);
0139 case {'O1P','OP1'}, Sugar(11,:) = P(k,:);
0140 case {'O2P','OP2'}, Sugar(12,:) = P(k,:);
0141 end
0142 end
0143 end
0144 Loc(11,:) = zeros(1,3);
0145 NT(n).Loc = Loc;
0146 NT(n).Sugar = Sugar;
0147 NT(n).Center = mean(Loc(1:10,:));
0148 NT(n).Code = 1;
0149 elseif (strcmp(NT(n).Base,'C')) | (strcmp(NT(n).Base,'C+')),
0150 for k = min(j):max(j),
0151 if (VERSION{k}=='A') | (VERSION{k}==' '),
0152 switch ATOMNAME{k},
0153 case 'N1', Loc( 1,:) = P(k,:);
0154 case 'C2', Loc( 2,:) = P(k,:);
0155 case 'O2', Loc( 3,:) = P(k,:);
0156 case 'N3', Loc( 4,:) = P(k,:);
0157 case 'C4', Loc( 5,:) = P(k,:);
0158 case 'N4', Loc( 6,:) = P(k,:);
0159 case 'C6', Loc( 7,:) = P(k,:);
0160 case 'C5', Loc( 8,:) = P(k,:);
0161 case {'C1*','C1'''}, Sugar( 1,:) = P(k,:);
0162 case {'C2*','C2'''}, Sugar( 2,:) = P(k,:);
0163 case {'O2*','O2'''}, Sugar( 3,:) = P(k,:);
0164 case {'C3*','C3'''}, Sugar( 4,:) = P(k,:);
0165 case {'O3*','O3'''}, Sugar( 5,:) = P(k,:);
0166 case {'C4*','C4'''}, Sugar( 6,:) = P(k,:);
0167 case {'O4*','O4'''}, Sugar( 7,:) = P(k,:);
0168 case {'C5*','C5'''}, Sugar( 8,:) = P(k,:);
0169 case {'O5*','O5'''}, Sugar( 9,:) = P(k,:);
0170 case 'P', Sugar(10,:) = P(k,:);
0171 case {'O1P','OP1'}, Sugar(11,:) = P(k,:);
0172 case {'O2P','OP2'}, Sugar(12,:) = P(k,:);
0173 end
0174 end
0175 end
0176 Loc( 9,:) = zeros(1,3);
0177 Loc(10,:) = zeros(1,3);
0178 Loc(11,:) = zeros(1,3);
0179 NT(n).Loc = Loc;
0180 NT(n).Sugar = Sugar;
0181 NT(n).Center = mean(Loc(1:8,:));
0182 NT(n).Code = 2;
0183 elseif (NT(n).Base == 'G') | (NT(n).Base == 'G'),
0184 for k = min(j):max(j),
0185 if (VERSION{k}=='A') | (VERSION{k}==' '),
0186 switch ATOMNAME{k},
0187 case 'N9', Loc( 1,:) = P(k,:);
0188 case 'C4', Loc( 2,:) = P(k,:);
0189 case 'N3', Loc( 3,:) = P(k,:);
0190 case 'N1', Loc( 4,:) = P(k,:);
0191 case 'C6', Loc( 5,:) = P(k,:);
0192 case 'O6', Loc( 6,:) = P(k,:);
0193 case 'C8', Loc( 7,:) = P(k,:);
0194 case 'C5', Loc( 8,:) = P(k,:);
0195 case 'C2', Loc( 9,:) = P(k,:);
0196 case 'N7', Loc(10,:) = P(k,:);
0197 case 'N2', Loc(11,:) = P(k,:);
0198 case {'C1*','C1'''}, Sugar( 1,:) = P(k,:);
0199 case {'C2*','C2'''}, Sugar( 2,:) = P(k,:);
0200 case {'O2*','O2'''}, Sugar( 3,:) = P(k,:);
0201 case {'C3*','C3'''}, Sugar( 4,:) = P(k,:);
0202 case {'O3*','O3'''}, Sugar( 5,:) = P(k,:);
0203 case {'C4*','C4'''}, Sugar( 6,:) = P(k,:);
0204 case {'O4*','O4'''}, Sugar( 7,:) = P(k,:);
0205 case {'C5*','C5'''}, Sugar( 8,:) = P(k,:);
0206 case {'O5*','O5'''}, Sugar( 9,:) = P(k,:);
0207 case 'P', Sugar(10,:) = P(k,:);
0208 case {'O1P','OP1'}, Sugar(11,:) = P(k,:);
0209 case {'O2P','OP2'}, Sugar(12,:) = P(k,:);
0210 end
0211 end
0212 end
0213 NT(n).Loc = Loc;
0214 NT(n).Sugar = Sugar;
0215 NT(n).Center = mean(Loc(1:11,:));
0216 NT(n).Code = 3;
0217 elseif (NT(n).Base == 'U') | (strcmp(NT(n).Base,'+U')),
0218 for k = min(j):max(j),
0219 if (VERSION{k}=='A') | (VERSION{k}==' '),
0220 switch ATOMNAME{k},
0221 case 'N1', Loc( 1,:) = P(k,:);
0222 case 'C2', Loc( 2,:) = P(k,:);
0223 case 'O2', Loc( 3,:) = P(k,:);
0224 case 'N3', Loc( 4,:) = P(k,:);
0225 case 'C4', Loc( 5,:) = P(k,:);
0226 case 'O4', Loc( 6,:) = P(k,:);
0227 case 'C6', Loc( 7,:) = P(k,:);
0228 case 'C5', Loc( 8,:) = P(k,:);
0229 case {'C1*','C1'''}, Sugar( 1,:) = P(k,:);
0230 case {'C2*','C2'''}, Sugar( 2,:) = P(k,:);
0231 case {'O2*','O2'''}, Sugar( 3,:) = P(k,:);
0232 case {'C3*','C3'''}, Sugar( 4,:) = P(k,:);
0233 case {'O3*','O3'''}, Sugar( 5,:) = P(k,:);
0234 case {'C4*','C4'''}, Sugar( 6,:) = P(k,:);
0235 case {'O4*','O4'''}, Sugar( 7,:) = P(k,:);
0236 case {'C5*','C5'''}, Sugar( 8,:) = P(k,:);
0237 case {'O5*','O5'''}, Sugar( 9,:) = P(k,:);
0238 case 'P', Sugar(10,:) = P(k,:);
0239 case {'O1P','OP1'}, Sugar(11,:) = P(k,:);
0240 case {'O2P','OP2'}, Sugar(12,:) = P(k,:);
0241 end
0242 end
0243 end
0244 Loc( 9,:) = zeros(1,3);
0245 Loc(10,:) = zeros(1,3);
0246 Loc(11,:) = zeros(1,3);
0247 NT(n).Loc = Loc;
0248 NT(n).Sugar = Sugar;
0249 NT(n).Center = mean(Loc(1:8,:));
0250 NT(n).Code = 4;
0251 else
0252 if unrec < 5,
0253 if Verbose > 0,
0254 fprintf('Unrecognized: %s %s\n', NT(n).Base,NT(n).Number);
0255 end
0256 unrec = unrec + 1;
0257 end
0258 n = n - 1;
0259
0260 Flag = 1;
0261 end
0262
0263 if (Flag < 1),
0264 if (max(max(Loc)) == Inf),
0265 if Verbose > 0,
0266 NumGood = length(find((Loc(1,:) < Inf) .* (abs(Loc(1,:)) > 0)));
0267 fprintf('Base %s%s has %d atoms, so it will be skipped\n',NT(n).Base,NT(n).Number,NumGood);
0268 end
0269 n = n - 1;
0270 elseif (max(max(Sugar)) == Inf),
0271 for k = 1:12,
0272 if Sugar(k,1) == Inf,
0273 Sugar(k,:) = Loc(1,:);
0274 end
0275 end
0276 NT(n).Sugar = Sugar;
0277 end
0278 end
0279
0280 n = n + 1;
0281
0282 end
0283
0284 NumNT = n - 1;
0285
0286
0287
0288 warning off MATLAB:divideByZero
0289
0290 for n=1:NumNT,
0291 C = NT(n).Code;
0292 L = Lim(1,C);
0293 X = StandardLoc(1:L,:,C);
0294 Y = NT(n).Loc(1:L,:);
0295
0296 [r, sc, sh] = zBestTransformation(X,Y);
0297
0298 L2 = Lim(2,C);
0299 X2 = StandardLoc(1:L2,:,C);
0300 F = (sh*ones(1,L2) + r*X2')';
0301 e = sqrt(sum(sum((Y - F(1:L,:)).^2)))/L;
0302
0303 if (e > 0.1) && Verbose > 0,
0304 fprintf('Nucleotide %c%s has average fitting error %6.4f Angstroms\n', NT(n).Base, NT(n).Number, e);
0305 end
0306
0307 NT(n).Rot = r;
0308 NT(n).Fit(1:L2,:) = F;
0309 NT(n).Syn = 0;
0310 end
0311
0312
0313
0314 File.PDBFilename = PDBFilename;
0315 File.Filename = Filename;
0316 File.NT = NT(1:NumNT);
0317 File.NumNT = NumNT;
0318 File.Distance = [];
0319 File.HandClass = [];
0320 File.Comment = [];
0321 File.CI = sparse(NumNT,NumNT);
0322 File.Edge = sparse(NumNT,NumNT);
0323 File.Modified = 1;
0324 File.Header = Header;
0325 File.BasePhosphate = [];
0326
0327
0328
0329 if length(File.NT) > 0,
0330 SynList = mSynList(File);
0331
0332 j = find(SynList);
0333
0334 for k=1:length(j),
0335 File.NT(j(k)).Syn = 1;
0336 end
0337 end
0338
0339 else
0340
0341 fprintf('Could not open file %s\n', PDBFilename);
0342 NumNT = 0;
0343 File.Filename = Filename;
0344 File.NT = [];
0345 File.NumNT = -1;
0346 File.Distance = [];
0347 File.HandClass = [];
0348 File.Comment = [];
0349 File.CI = sparse(NumNT,NumNT);
0350 File.Edge = sparse(NumNT,NumNT);
0351 File.Coplanar = sparse(NumNT,NumNT);
0352 File.Modified = 0;
0353 File.Pair = [];
0354 File.ClassVersion = 0;
0355 File.Header = [];
0356 File.Info.Resolution = [];
0357 File.Info.Descriptor = '';
0358 File.Info.ExpTechnique= '';
0359 File.Info.ReleaseDate = '';
0360 File.Info.Author = '';
0361 File.Info.Keywords = '';
0362 File.Info.Source = '';
0363 File.BasePhosphate = sparse(NumNT,NumNT);
0364
0365 end
0366
0367 File = orderfields(File);
0368
0369 end