function [FastaOrganismNames, Sequences] = mfastaread(filename)

%mfastaread.m       %Based on MATLAB's Bioinformatics function fastaread,
%with output formats and variable names changed to suit our purposes.
%A couter every 100 sequences is also included inside the code.


fprintf('%s\n','Reading fasta file, please wait ...');
try
    ftext = textread(filename,'%s','delimiter','\n','bufsize',4095); %default
catch
    try
    fprintf('%s %g %s\n','This FASTA file has some very long lines... Increasing buffer size to',3*4095,'bytes');
    ftext = textread(filename,'%s','delimiter','\n','bufsize',3*4095);
    catch
        try
        fprintf('%s %g %s\n','Still not enough... Increasing buffer size again to',9*4095,'bytes');
        ftext = textread(filename,'%s','delimiter','\n','bufsize',9*4095);
        catch
            fprintf('%s\n','Your FASTA file has some extremely long lines, please shorten them by inserting line breaks and try again later');
        end
    end
end
commentLines = strncmp(ftext,'>',1); %Produces an array of 0's and 1's with as many elements as lines in ftext. 1 for any line starting with >, 0 otherwise.

if ~any(commentLines)
    error('Bioinfo:FastaNotValid',...
        'Input does not exist or is not a valid FASTA file.')
end

SeqCount = sum(commentLines);
fprintf('%s %g %s\n','Separating the',SeqCount,'sequences in this fasta file ...');

seqStarts = [find(commentLines); size(ftext,1)+1];


data(SeqCount,1).Header = ''; %Declare (allocate) the length of this array

try
    PercentDoneLast=-1; %initialize
    for theSeq = 1:SeqCount

        PercentDone = round(theSeq*100/SeqCount);
        if PercentDone ~= PercentDoneLast, %Only print percent counter in case it changes, to save time
            PercentDoneLast = PercentDone;
            if PercentDone==0,  fprintf('\n\n\n\n\n'); end
            fprintf('\b\b\b\b\b%3g%s',   PercentDone,' %');
            %if PercentDone<11,  fprintf('\b\b\b%g%s',   PercentDone,' %');
            %else,               fprintf('\b\b\b\b%g%s', PercentDone,' %'); end
        end


        data(theSeq).Header = ftext{seqStarts(theSeq)}(2:end);    %name of organism
        firstRow = seqStarts(theSeq)+1;     %beginning of sequence of this organism
        lastRow = seqStarts(theSeq+1)-1;          %end of sequence of this organism

        numChars = cellfun('length',ftext(firstRow:lastRow)); %Checks the length of each line of this sequence. %(cellfun does the operation "length" on all members of the array "ftext")
        numSymbols = sum(numChars);                           %This gives the total length of this sequence
        data(theSeq).Sequences = repmat(' ',1,numSymbols);    %Creates character variable 'Sequences(theSeq)' with length equal that of this sequence. It has all empty spaces (smart way to do that with repmat). This is faster that concatenating to the variable without previuosly allocating its length.

        pos = 1;
        for i=firstRow:lastRow,
            len =  cellfun('length',ftext(i));
            if len == 0 %This will happen if an extra line break occurs (inside or at the end of the fasta file being read)
                break %because that will also signify end of this sequence, nothing in this sequence will be read after the line break
            end
            data(theSeq).Sequences(pos:pos+len-1) = ftext{i};%Append each line of ftext (from this sequence) into this variable
            pos = pos+len;
        end
    end
    fprintf('\n'); %Needed after the PercentDone counter

    data(theSeq).Sequences = deblank(data(theSeq).Sequences); %Strip trailing blanks from end of string

    if SeqCount == 1
        seq = data.Sequences;
        FastaOrganismNames = data.Header;
    else
        seq = {data(:).Sequences};
        FastaOrganismNames = {data(:).Header};
    end

    %change seq cell array into Sequences character array:
Sequences = str2mat(seq);

catch
    fprintf('%s\n','Error: Incorrect data format in fasta file');
end
