/* lowercaser.l - Filter for lowercasing HTML tags

   Michal Ludvig <michal@logix.cz> (c) 2003,2004
   Homepage: http://www.logix.cz/michal/devel/lowercaser

   This code is a public domain - you can do whatever you want with it
   but don't blame me if it doesn't work as you expected.


   In the ages of Netscape 3.0 people were used to write HTML tags
   in upper case (e.g. <B>Bold text</B>). However with the introduction
   of XML and XHTML these tags are no more valid and only a lowercase 
   tags can be used. On some websites it was necessary to convert 
   thousands of documents from the old HTML to the shiny new XHTML. 
   While it's still a boring manual work, at least converting the upper
   case tags to lower case can be done automatically. And this is the 
   only purpose of this short program.

   It works as a filter - feed your old HTML file to the input and 
   read it from the output. 

   It intentionally doesn't convert in-line programs in PHP and ASP as 
   well as strings in the tags (like URLs), because these could be case 

   And yes, it's written in LEX.

   $ flex lowercaser.l
   $ gcc -o lowercaser lex.yy.c -lfl

   You can use -DANSI_COLOR flag to gcc to get a nice colored output.

#include <ctype.h>
#include <string.h>

#ifdef  ANSI_COLOR
#define ANSI_GREEN      "\033[0;32;40m"
#define ANSI_RED        "\033[0;31;40m"
#define ANSI_CYAN       "\033[0;36;40m"
#define ANSI_RESET      "\033[0m"
#define ANSI_GREEN      ""
#define ANSI_RED        ""
#define ANSI_CYAN       ""
#define ANSI_RESET      ""

char    *lcbuffer;
char    prog_char, string_char;
int     tag_caller, tag_prog_caller, tag_string_caller;

static char *
strlcase (const char *string)
        int i;
        for (i = 0; string[i]; i++)
                lcbuffer[i] = tolower (string[i]);
        lcbuffer[i] = 0;
        return lcbuffer;


\<              {
                        tag_caller = YY_START;
                        BEGIN (TAG);
                        printf ("%s<", ANSI_GREEN);

<TAG>[a-zA-Z]*  {
                        lcbuffer = alloca (strlen (yytext) + 1);
                        printf ("%s", strlcase (yytext));

\<[\?\%]                {
                        prog_char = yytext[1];
                        printf ("%s%s", ANSI_CYAN, yytext);
                        tag_prog_caller = YY_START;
                        BEGIN (TAG_PROG);

<TAG>["']       {
                        printf ("%s%s", ANSI_RED, yytext);
                        tag_string_caller = YY_START;
                        string_char = yytext[0];
                        BEGIN (TAG_STRING);

<TAG>\>         {
                        printf (">%s", ANSI_RESET);
                        BEGIN (tag_caller);

<TAG_PROG>[\?\%]\>      {
                        if (yytext[0] != prog_char)
                                { REJECT; }
                                printf ("%s%s", yytext, ANSI_RESET);
                                BEGIN (tag_prog_caller);

<TAG_STRING>[^"']*["']  {
                        if (yytext[strlen (yytext) - 1] != string_char)
                                { REJECT; }
                                printf ("%s%s", yytext, ANSI_GREEN);
                                BEGIN (tag_string_caller);