%mRenameFasta.m  by Ali Mokdad
%This program renames the Fasta files within the specified directory
%by incorporating the first word of the filename (which is usually the phylogenetic group name)
%at the beginning of each included organism. It also takes only the first
%2 words (or 1 if only one exists) of the original fasta comment line and combines the whole fasta 
%comment line into one word with _ instead of spaces.

%This program took less than 13 minutes to run on all raw 23S and 16S rRNA on a 2.4 GHz Pentium 4 PC.
clear
if ~isdeployed,clc,end
tic
%Files in the following directories will be renamed
Directories = {'Sept2004_Alignments_Separate\23S\A',...
    'Sept2004_Alignments_Separate\23S\B',...
    'Sept2004_Alignments_Separate\23S\E',...
    'Sept2004_Alignments_Separate\16S\A',...
    'Sept2004_Alignments_Separate\16S\B',...
    'Sept2004_Alignments_Separate\16S\E'};

for Direct=1:length(Directories)
    cd(Directories{Direct})

    FastaFileIN = ls;
    if size(FastaFileIN)>2
        for f=1:size(FastaFileIN)-2
            fprintf('\n\t%g\t%s\n',f,FastaFileIN(f+2,:));

            FastaFileOUT = strcat(FastaFileIN(f+2,:),'.fasta');
            fidOUT = fopen(FastaFileOUT,'w+');

            tline = 1;
            SeqCount = 0;
            fidIN = fopen(FastaFileIN(f+2,:));
            while tline ~= -1
                tline = fgetl(fidIN);
                if strncmpi(tline,'>',1)
                    SeqCount = SeqCount + 1;
                    fprintf('%g\t',SeqCount);

                    index = findstr(' ',tline);                     %The Name of the sequence ends with the first space in the fasta comment line(?)
                    if isempty(index),index = length(tline)+1;end  %In case there are no spaces in the fasta commnet line
                    
                    ind=findstr(' ',FastaFileIN(f+2,:));%This is extremely important: Some of the file names are shorter than others, so Matlab will add spaces to the ends of the short ones, and this will mess things up. This is the index for the spaces, and the filename is before the first space.
                    if isempty(ind),ind = length(FastaFileIN(f+2,:));end %This also means that if the filename is composed of several words separated by spaces, only the first word will be taken.
                        
                    if length(index)>1,fprintf(fidOUT,'%s%s%s%s%s%s\n','>',FastaFileIN(f+2,1:ind(1)-1),'_',tline(2:index(1)-1),'_',tline(index(1)+1:index(2)-1));
                    else fprintf(fidOUT,'%s%s%s%s\n','>',FastaFileIN(f+2,1:ind(1)-1),'_',tline(2:index-1));
                    end  
                else
                    if tline ~= -1
                        fprintf(fidOUT,'%s\n',tline);
                    end
                end
            end
            fclose(fidIN);
            fclose(fidOUT);
        end
    end

    cd ..
    cd ..
    cd ..

end
fprintf('\n');
toc

