%mFastaExtractor_multiple_noGUI.m     %By Ali Mokdad - 2005/07/12
%This program first reads a complete alignment file, then lets the user chose the positions that he needs
%to extract from that complete alignment to make a smaller more relevent alignment for his purposes.

clear
clc

run('..\mGetFasta')

% DomainsToExtract = 'All';             %You can also chose Archaea, Bacteria, or Eukarya
% RefOragnismName = {'PSTVd'};	%You can also use an exact organism name from your Fasta File
% LocPosInput = {'21-25,337-341' '35-39,322-326' '49-52,309-312' '55-60,300-306' '86-90,270-276' '97-103,255-262' '117-121,240-243' '156-162,199-205'};%You can use any meaningful - and , separated input, each separate input wrapped in single quotes
% LocPosName  = {'21-25_337-341' '35-39_322-326' '49-52_309-312' '55-60_300-306' '86-90_270-276' '97-103_255-262' '117-121_240-243' '156-162_199-205'};%You can use any meaningful - and _ separated input_ each separate input wrapped in single quotes
% IgnoreTopOrganisms = 1;    %This is needed in case the top alignment(s) is just a mask... Make 0 if there is no mask
% BPlist=[];

DomainsToExtract = 'All';
RefOragnismName = {'Escherichia_coli'};
ILList = '16S_IL_list.txt';
[LocPosName, LocPosInput] = textread(strcat(FastaPath,ILList),'%s\t%s'); 
IgnoreTopOrganisms = 0;
BPlist=strcat(FastaPath,'rr0052_AllClasses.xls');
[InputData, InputText] = xlsread(BPlist);

for lpi=1:length(LocPosInput)
    index   = findstr(',',LocPosInput{lpi});
    index   = [index length(LocPosInput{lpi})+1];
    ind     = findstr('-',LocPosInput{lpi});
    %Test if numbers make sense or not:
    InputFormatError = 'The input numbers do not make sense, please follow this format: LocPosInput{lpi} = ''2-12,20-30,15-20'';';
    % InputFormatError1=InputFormatError;InputFormatError2=InputFormatError;InputFormatError3=InputFormatError;

    GoAhead = 1;
    if length(ind) ~= length(index)     ,   InputFormatError, GoAhead=0; end
    if (ind(1)<2) || (ind(1)>index(1))  ,   InputFormatError, GoAhead=0; end
    for i=2:length(ind),
        if (ind(i)<index(i-1)) || (ind(i)>index(i))  ,   InputFormatError, GoAhead=0; end
    end

    if GoAhead==1;
        clear LocPos1; clear LocPos2; clear UniPos1; clear UniPos2;
        
        LocPos1(1) = str2num(LocPosInput{lpi}(1:ind(1)-1));
        LocPos2(1) = str2num(LocPosInput{lpi}(ind(1)+1:index(1)-1));
        for i=2:length(index)
            LocPos1(i) = str2num(LocPosInput{lpi}(index(i-1):ind(i)-1));
            LocPos2(i) = str2num(LocPosInput{lpi}(ind(i)+1:index(i)-1));
        end

        n = 1; %needed for mGetUnivFromAnyFasta
        run('..\mGetUnivFromAnyFasta')

        UniPos1 = Univ(LocPos1);
        UniPos2 = Univ(LocPos2);

        if     strcmp(DomainsToExtract,'All'),        sequences = 1+IgnoreTopOrganisms : length(Sequences(:,1));
        elseif strcmp(DomainsToExtract,'Archaea'),    sequences = 1+IgnoreTopOrganisms : DomainLimits(2);
        elseif strcmp(DomainsToExtract,'Bacteria'),   sequences = ArchaeaEnd + 1        : DomainLimits(3);
        elseif strcmp(DomainsToExtract,'Eukarya'),    sequences = BacteriaEnd + 1       : DomainLimits(4);
        end

        %FastaOutputFile = strcat(FastaPath,FastaFilename,'_extract_',DomainsToExtract,'_',RefOragnismName{1},'_',LocPosName{lpi},'.fasta');
        FastaOutputFile = strcat(FastaPath,'FastaExtracts\16SelectedABEAllEc_',LocPosName{lpi},'.fasta');
        FastafidOUT = fopen(FastaOutputFile,'w+');
        if FastafidOUT == -1      %Error check loop
            ErrorOUT = ['The file: ',FastaOutputFile,' could not be written to. It may be open. Please close it and try again'];
        else
            for j=sequences
                fprintf(FastafidOUT,'%s%s\n','>',FastaOrganismNames{j});
                for i=1:length(index)
                    fprintf(FastafidOUT,'%s',Sequences(j,UniPos1(i):UniPos2(i)));
                    if i~=length(index),
                        fprintf(FastafidOUT,'%s','....'); %%%%%%%%MAY NEED TO REPLACE THIS WITH 'AAAA' (coordinate with Craig)
                    else fprintf(FastafidOUT,'\n');
                    end
                end
            end
        end
        if FastafidOUT ~= -1      %Error check loop
            fclose(FastafidOUT);
            %fprintf('\n%s\n%s\n','The requested part of your fasta file was saved under:',FastaOutputFile);
        end
        
        if ~isempty(BPlist)
            mBPlistExtractor %%%
            BPLOutputFile = strcat(FastaPath,'BPLExtracts\16SelectedABEAllEc_',LocPosName{lpi},'.xls');
            xlswrite(BPLOutputFile, BPsThatBelongHere);
        end

    end
end

fprintf('\n%s\n','This is a list of the filenames created:')
for lpi=1:length(LocPosInput)
    fprintf('%s%s%s\n','16SelectedABEAllEc_',LocPosName{lpi},'.fasta');
end

fprintf('\n%s\n%s\n','The requested part(s) of your fasta file can be found in the folder:',FastaPath);