%mMaskMaker.m
%Creates a structure mask2 for a certain organism with known structure in a fasta alignment.
%This mask2 can be used with BioEdit to mark basepairing.
% clear
% clc

%%%%%%%%%%%%%%%%I have to fix OrgInd(1), as it indicates here only the first organism in the xls list
% if ~exist('Count','var') %Where does this Count come from???
%     run('..\mGetFasta')
%     run('..\mGetPositions')
% end
if ~exist('Sequences','var') && ~exist('InputData','var') %This means both mGetFasta and mGetPositions were run before
    run('..\mGetFasta')
    run('..\mGetPositions')
end

%tic
Len=length(Sequences(OrgInd(1),:));

[h w]=size(InputData);
if w==2
    
    if ~exist('OutputPath','var')
        mkdir('../../Output');
        OrigDir=cd;
        if ispc ==1 %ispc returns 1 if this is run on a PC (Windows)
            ind= findstr('\',OrigDir);  %Doesn't work on MAC
        else
            ind= findstr('/',OrigDir);  %Works on MAC
        end
        OutputPath=strcat(OrigDir(1:ind(end-1)),'Output\');
    end
%mask13%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%     fprintf('\n\n');
%     fprintf('%s\t%s\t%s\t%s\n','Nam','No','Opening Code', 'Closing Code');
%     fprintf('%s\t%s\t%s\t%s\n','cWW','1','Aa','bB');
%     fprintf('%s\t%s\t%s\t%s\n','tWW','2','Cc','dD');
%     fprintf('%s\t%s\t%s\t%s\n','cWH','3','Ee','fF');
%     fprintf('%s\t%s\t%s\t%s\n','tWH','4','Gg','hH');
%     fprintf('%s\t%s\t%s\t%s\n','cWS','5','Ii','jJ');
%     fprintf('%s\t%s\t%s\t%s\n','tWS','6','Kk','lL');
%     fprintf('%s\t%s\t%s\t%s\n','cHH','7','Mm','nN');
%     fprintf('%s\t%s\t%s\t%s\n','tHH','8','Oo','pP');
%     fprintf('%s\t%s\t%s\t%s\n','cHS','9','Qq','rR');
%     fprintf('%s\t%s\t%s\t%s\n','tHS','10','Ss','tT');
%     fprintf('%s\t%s\t%s\t%s\n','cSS','11','Uu','vV');
%     fprintf('%s\t%s\t%s\t%s\n','tSS','12','Ww','xX');
%     fprintf('%s\t%s\t%s\t%s\n','bif','13','Yy','zZ');
%     fprintf('\n');


    %Initialization:
    pre_mask13(Len)=' '; %Preallocation, for speed
    for i=1:Len %Len=length(Sequences(OrgInd(1),:));
%         if strcmp(Sequences(1,i),' ') %It was found out that some of the sequences end with spaces!
%             break
%         end
        pre_mask13(i)='.'; %pre_mask13 is without gaps
    end
    
    for n=h:-1:1 %for each position     %In case of triplets, this should give priority to interactions listed earlier (usually cWW)
        A = InputData(n,1);
        B = InputData(n,2);

        %[Code1 Code2] = mMaskMaker13Code(Interaction(n,:)); %m12MaskMakerCode is the function that translates "Interaction" name like cWW into 2 codes, Ab for example in this case
%         OrigDir=cd;
%         cd ..
        %TableSimple = mGetTableNumberFromName(Interaction{n}); %Table(n) is the best and is done right after mGetPositions
%         cd(OrigDir)
        [Code1 Code2] = mMaskMaker13Code(Table(n)); %m12MaskMakerCode is the function that translates "Interaction" name like cWW into 2 codes, AZ for example in this case

        if A>B,
            A=B; B=InputData(n,1);
            pre_mask13(A)=lower(Code1);  %For example, a BP which is cWW will be assigned A for the 1st base and z for the 2nd,
            pre_mask13(B)=upper(Code2);  %A BP which is tWW will be assigned B for the 1st base and y for the 2nd, and so on
        else
            pre_mask13(A)=upper(Code1);
            pre_mask13(B)=lower(Code2);
        end 
    end
    
    pre_mask13_counter=0;
    mask13(Len)=' '; %Preallocation, for speed
    for i=1:Len %Len=length(Sequences(OrgInd(1),:));
        if strcmp(Sequences(OrgInd(1),i),'-')
            mask13(i)='-'; %mask13 is with gaps
        else
            pre_mask13_counter = pre_mask13_counter+1;
            mask13(i)=pre_mask13(pre_mask13_counter);
        end
    end
%mask13%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    
%mask2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%     BP=zeros(h,w);
    BP=sortrows(sort(InputData,2));%This will first sort the clumns row by row, and then it will sort the rows

    mask2(Len)=' '; %Preallocation, for speed
    for i=1:Len %Len=length(Sequences(OrgInd(1),:)); %%%(the length of sequence of source organism)
%         if strcmp(Sequences(1,i),' ') %It was found out that some of the sequences end with spaces!
%             break
%         end
        if strcmp(Sequences(OrgInd(1),i),'-')
            mask2(i)='-';%gaps
        else
            mask2(i)='.';%This sets all "real" positions (not gaps) in mask2 as ., because Biodit does not understand spaces ' '
        end
    end

    %     i=1;
    %     if (BP(i+1,1)==BP(i,1)+1)&&(BP(i+1,2)==BP(i,2)-1)%this means in a helix, only use for the 1st position
    %         mask2(Univ(BP(i,1)))='(';
    %         mask2(Univ(BP(i,2)))=')';
    %     end
% %     MissIn2D1=[];
% %     MissIn2D2=[];
BPcomp=sort([BP(:,1);BP(:,2)]');
BPskip=[]; %These are BPs that form triplets of more, they can not be coded in mask2
for i=1:length(BPcomp)-1
    if BPcomp(i)==BPcomp(i+1)
        BPskip=[BPskip BPcomp(i)];
    end
end

RepIn2D=0;
if length(BP(:,1)) > 1   
    for i=1:length(BP)%BP is the pair of NT numbers that are paired like [1 14; 2 13; 4 12]
        if i<length(BP) %doesn't work for the last position
            if isempty(find(BP(i,1)==BPskip)) && isempty(find(BP(i,2)==BPskip)) %this works even if BPskip is empty
%             if strmatch(mask2(Univ(BP(i,1))),'.')
                if (BP(i+1,1)>BP(i,1)) && (BP(i+1,2)<BP(i,2)) %this means no crossing interactions
                    mask2(Univ(BP(i,1)))='(';
                    mask2(Univ(BP(i,2)))=')';
                    RepIn2D=RepIn2D+1;
% %                 else MissIn2D1=vertcat(MissIn2D1,BP(i,:));
                end
%             end
            end
        end
        if i>1 %doesn't work for the 1st position, also needed to cover some misses
            if isempty(find(BP(i,1)==BPskip)) && isempty(find(BP(i,2)==BPskip)) %this works even if BPskip is empty
                if strmatch(mask2(Univ(BP(i,1))),'.')
                    if (BP(i-1,1)<BP(i,1)) && (BP(i-1,2)>BP(i,2)) %same meaning
                        mask2(Univ(BP(i,1)))='(';
                        mask2(Univ(BP(i,2)))=')';
                        RepIn2D=RepIn2D+1;
% %                 else MissIn2D2=vertcat(MissIn2D2,BP(i,:));
                    end
                end
            end
        end
    end
else %this happens in case there is only 1 BP (!!), so that it won't be missed since the above conditions need to compare consecutive BPs to assign ( and ).
    i=1;
    mask2(Univ(BP(i,1)))='(';
    mask2(Univ(BP(i,2)))=')';
end
% % if length(MissIn2D2)<=length(MissIn2D1)
% %     MissIn2D=MissIn2D2;
% % else
% %     MissIn2D=MissIn2D1;
% % end
% % MissIn2D
%mask2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%mask3%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
mask3=mask2;
% H=0;%This is the counter for tertiary (crossing) bps
for i=1:length(mask3)
    if (strcmp(mask2(i),'.')) && ~(strcmp(mask13(i),'.'))
        if      ~isempty(strmatch(upper(mask13(i)),strvcat('A','C','E','G','I','K','M','O','Q','S','U','W','Y','?')));
            mask3(i)='[';
%             H=H+1; %Do this only once, not twice per bp!
        elseif  ~isempty(strmatch(upper(mask13(i)),strvcat('B','D','F','H','J','L','N','P','R','T','V','X','Z','!')));
            mask3(i)=']';
        end
    end
end
%mask3%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%



%Print Masks into fidOUT%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    OUTfile = strcat(OutputPath,PositionList,'_',FastaFilename,'_Masks.txt');
    fidOUT = fopen(OUTfile,'w+');
    %PrintArray=[1 fidOUT];
    PrintArray=fidOUT;
    
    for f=1:length(PrintArray)
        
        fprintf(PrintArray(f),'%s\n','>Mask with 13 classes, Ab is cWW, Cd is tWW,... Read in direction from UPPERCASE to lowercase letters if interaction not symmetric');
        fprintf(PrintArray(f),'%s',mask13);
        fprintf(PrintArray(f),'\n');
        
        fprintf(PrintArray(f),'%s\n','>Both 2ary and 3ary Structure Mask (parentheses nested, but brackets not necessarily nested)');
        fprintf(PrintArray(f),'%s',mask3);
        fprintf(PrintArray(f),'\n');

        fprintf(PrintArray(f),'%s\n','>2ary Structure Mask (all parentheses nested)');
        fprintf(PrintArray(f),'%s',mask2);
        fprintf(PrintArray(f),'\n');
        fprintf(PrintArray(f),'\n');

    end
    fclose(fidOUT);
%     fprintf('%s\n%s\n\n','Masks printed to output directory:',OutputPath);
%Print Masks into fidOUT%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%     fprintf('%s %g\n',      'Total number of BPs    = ',h);
%     fprintf('%s %g\n',      'Number of BPs   = ',h-H);
%     fprintf('%s %g%s%g%s\n','Number of unnested BPs = ',H,' (or ',fix(100*H/h),'%)' ); %report the number of 3ary basepairs (Note: A couple of these may still be 2ary)
fprintf('\n%s%g%s%g%s\n','Number of BPs represented in the nested 2D mask  = ',RepIn2D, ', or ',fix((100*RepIn2D/h)),' % of all BPs');

else
    fprintf('Error: The Position List is not a list of basepairs');
end
%toc
