%{
/* lowercaser.l - Filter for lowercasing HTML tags

   Michal Ludvig <michal@logix.cz> (c) 2003,2004
   Homepage: http://www.logix.cz/michal/devel/lowercaser

   This code is a public domain - you can do whatever you want with it
   but don't blame me if it doesn't work as you expected.

   Introduction:

   In the ages of Netscape 3.0 people were used to write HTML tags
   in upper case (e.g. <B>Bold text</B>). However with the introduction
   of XML and XHTML these tags are no more valid and only a lowercase 
   tags can be used. On some websites it was necessary to convert 
   thousands of documents from the old HTML to the shiny new XHTML. 
   While it's still a boring manual work, at least converting the upper
   case tags to lower case can be done automatically. And this is the 
   only purpose of this short program.

   It works as a filter - feed your old HTML file to the input and 
   read it from the output. 

   It intentionally doesn't convert in-line programs in PHP and ASP as 
   well as strings in the tags (like URLs), because these could be case 
   sensitive.

   And yes, it's written in LEX.
   
   Compilation:

   $ flex lowercaser.l
   $ gcc -o lowercaser lex.yy.c -lfl

   You can use -DANSI_COLOR flag to gcc to get a nice colored output.
*/

#include <ctype.h>
#include <string.h>

#ifdef  ANSI_COLOR
#define ANSI_GREEN      "\033[0;32;40m"
#define ANSI_RED        "\033[0;31;40m"
#define ANSI_CYAN       "\033[0;36;40m"
#define ANSI_RESET      "\033[0m"
#else
#define ANSI_GREEN      ""
#define ANSI_RED        ""
#define ANSI_CYAN       ""
#define ANSI_RESET      ""
#endif

char    *lcbuffer;
char    prog_char, string_char;
int     tag_caller, tag_prog_caller, tag_string_caller;

static char *
strlcase (const char *string)
{
        int i;
        for (i = 0; string[i]; i++)
                lcbuffer[i] = tolower (string[i]);
        lcbuffer[i] = 0;
        return lcbuffer;
}
%}

%s TAG TAG_STRING TAG_PROG

%%
\<              {
                        tag_caller = YY_START;
                        BEGIN (TAG);
                        printf ("%s<", ANSI_GREEN);
                }

<TAG>[a-zA-Z]*  {
                        lcbuffer = alloca (strlen (yytext) + 1);
                        printf ("%s", strlcase (yytext));
                }

\<[\?\%]                {
                        prog_char = yytext[1];
                        printf ("%s%s", ANSI_CYAN, yytext);
                        tag_prog_caller = YY_START;
                        BEGIN (TAG_PROG);
                }

<TAG>["']       {
                        printf ("%s%s", ANSI_RED, yytext);
                        tag_string_caller = YY_START;
                        string_char = yytext[0];
                        BEGIN (TAG_STRING);
                }

<TAG>\>         {
                        printf (">%s", ANSI_RESET);
                        BEGIN (tag_caller);
                }

<TAG_PROG>[\?\%]\>      {
                        if (yytext[0] != prog_char)
                                { REJECT; }
                        else
                        {
                                printf ("%s%s", yytext, ANSI_RESET);
                                BEGIN (tag_prog_caller);
                        }
                }

<TAG_STRING>[^"']*["']  {
                        if (yytext[strlen (yytext) - 1] != string_char)
                                { REJECT; }
                        else
                        {
                                printf ("%s%s", yytext, ANSI_GREEN);
                                BEGIN (tag_string_caller);
                        }
                }
%%