%{ /* lowercaser.l - Filter for lowercasing HTML tags Michal Ludvig (c) 2003,2004 Homepage: http://www.logix.cz/michal/devel/lowercaser This code is a public domain - you can do whatever you want with it but don't blame me if it doesn't work as you expected. Introduction: In the ages of Netscape 3.0 people were used to write HTML tags in upper case (e.g. Bold text). However with the introduction of XML and XHTML these tags are no more valid and only a lowercase tags can be used. On some websites it was necessary to convert thousands of documents from the old HTML to the shiny new XHTML. While it's still a boring manual work, at least converting the upper case tags to lower case can be done automatically. And this is the only purpose of this short program. It works as a filter - feed your old HTML file to the input and read it from the output. It intentionally doesn't convert in-line programs in PHP and ASP as well as strings in the tags (like URLs), because these could be case sensitive. And yes, it's written in LEX. Compilation: $ flex lowercaser.l $ gcc -o lowercaser lex.yy.c -lfl You can use -DANSI_COLOR flag to gcc to get a nice colored output. */ #include #include #ifdef ANSI_COLOR #define ANSI_GREEN "\033[0;32;40m" #define ANSI_RED "\033[0;31;40m" #define ANSI_CYAN "\033[0;36;40m" #define ANSI_RESET "\033[0m" #else #define ANSI_GREEN "" #define ANSI_RED "" #define ANSI_CYAN "" #define ANSI_RESET "" #endif char *lcbuffer; char prog_char, string_char; int tag_caller, tag_prog_caller, tag_string_caller; static char * strlcase (const char *string) { int i; for (i = 0; string[i]; i++) lcbuffer[i] = tolower (string[i]); lcbuffer[i] = 0; return lcbuffer; } %} %s TAG TAG_STRING TAG_PROG %% \< { tag_caller = YY_START; BEGIN (TAG); printf ("%s<", ANSI_GREEN); } [a-zA-Z]* { lcbuffer = alloca (strlen (yytext) + 1); printf ("%s", strlcase (yytext)); } \<[\?\%] { prog_char = yytext[1]; printf ("%s%s", ANSI_CYAN, yytext); tag_prog_caller = YY_START; BEGIN (TAG_PROG); } ["'] { printf ("%s%s", ANSI_RED, yytext); tag_string_caller = YY_START; string_char = yytext[0]; BEGIN (TAG_STRING); } \> { printf (">%s", ANSI_RESET); BEGIN (tag_caller); } [\?\%]\> { if (yytext[0] != prog_char) { REJECT; } else { printf ("%s%s", yytext, ANSI_RESET); BEGIN (tag_prog_caller); } } [^"']*["'] { if (yytext[strlen (yytext) - 1] != string_char) { REJECT; } else { printf ("%s%s", yytext, ANSI_GREEN); BEGIN (tag_string_caller); } } %%