%{ /* lowercaser.l - Filter for lowercasing HTML tags Michal Ludvig <michal@logix.cz> (c) 2003,2004 Homepage: http://www.logix.cz/michal/devel/lowercaser This code is a public domain - you can do whatever you want with it but don't blame me if it doesn't work as you expected. Introduction: In the ages of Netscape 3.0 people were used to write HTML tags in upper case (e.g. <B>Bold text</B>). However with the introduction of XML and XHTML these tags are no more valid and only a lowercase tags can be used. On some websites it was necessary to convert thousands of documents from the old HTML to the shiny new XHTML. While it's still a boring manual work, at least converting the upper case tags to lower case can be done automatically. And this is the only purpose of this short program. It works as a filter - feed your old HTML file to the input and read it from the output. It intentionally doesn't convert in-line programs in PHP and ASP as well as strings in the tags (like URLs), because these could be case sensitive. And yes, it's written in LEX. Compilation: $ flex lowercaser.l $ gcc -o lowercaser lex.yy.c -lfl You can use -DANSI_COLOR flag to gcc to get a nice colored output. */ #include <ctype.h> #include <string.h> #ifdef ANSI_COLOR #define ANSI_GREEN "\033[0;32;40m" #define ANSI_RED "\033[0;31;40m" #define ANSI_CYAN "\033[0;36;40m" #define ANSI_RESET "\033[0m" #else #define ANSI_GREEN "" #define ANSI_RED "" #define ANSI_CYAN "" #define ANSI_RESET "" #endif char *lcbuffer; char prog_char, string_char; int tag_caller, tag_prog_caller, tag_string_caller; static char * strlcase (const char *string) { int i; for (i = 0; string[i]; i++) lcbuffer[i] = tolower (string[i]); lcbuffer[i] = 0; return lcbuffer; } %} %s TAG TAG_STRING TAG_PROG %% \< { tag_caller = YY_START; BEGIN (TAG); printf ("%s<", ANSI_GREEN); } <TAG>[a-zA-Z]* { lcbuffer = alloca (strlen (yytext) + 1); printf ("%s", strlcase (yytext)); } \<[\?\%] { prog_char = yytext[1]; printf ("%s%s", ANSI_CYAN, yytext); tag_prog_caller = YY_START; BEGIN (TAG_PROG); } <TAG>["'] { printf ("%s%s", ANSI_RED, yytext); tag_string_caller = YY_START; string_char = yytext[0]; BEGIN (TAG_STRING); } <TAG>\> { printf (">%s", ANSI_RESET); BEGIN (tag_caller); } <TAG_PROG>[\?\%]\> { if (yytext[0] != prog_char) { REJECT; } else { printf ("%s%s", yytext, ANSI_RESET); BEGIN (tag_prog_caller); } } <TAG_STRING>[^"']*["'] { if (yytext[strlen (yytext) - 1] != string_char) { REJECT; } else { printf ("%s%s", yytext, ANSI_GREEN); BEGIN (tag_string_caller); } } %%