%{
/* lowercaser.l - Filter for lowercasing HTML tags
Michal Ludvig (c) 2003,2004
Homepage: http://www.logix.cz/michal/devel/lowercaser
This code is a public domain - you can do whatever you want with it
but don't blame me if it doesn't work as you expected.
Introduction:
In the ages of Netscape 3.0 people were used to write HTML tags
in upper case (e.g. Bold text). However with the introduction
of XML and XHTML these tags are no more valid and only a lowercase
tags can be used. On some websites it was necessary to convert
thousands of documents from the old HTML to the shiny new XHTML.
While it's still a boring manual work, at least converting the upper
case tags to lower case can be done automatically. And this is the
only purpose of this short program.
It works as a filter - feed your old HTML file to the input and
read it from the output.
It intentionally doesn't convert in-line programs in PHP and ASP as
well as strings in the tags (like URLs), because these could be case
sensitive.
And yes, it's written in LEX.
Compilation:
$ flex lowercaser.l
$ gcc -o lowercaser lex.yy.c -lfl
You can use -DANSI_COLOR flag to gcc to get a nice colored output.
*/
#include
#include
#ifdef ANSI_COLOR
#define ANSI_GREEN "\033[0;32;40m"
#define ANSI_RED "\033[0;31;40m"
#define ANSI_CYAN "\033[0;36;40m"
#define ANSI_RESET "\033[0m"
#else
#define ANSI_GREEN ""
#define ANSI_RED ""
#define ANSI_CYAN ""
#define ANSI_RESET ""
#endif
char *lcbuffer;
char prog_char, string_char;
int tag_caller, tag_prog_caller, tag_string_caller;
static char *
strlcase (const char *string)
{
int i;
for (i = 0; string[i]; i++)
lcbuffer[i] = tolower (string[i]);
lcbuffer[i] = 0;
return lcbuffer;
}
%}
%s TAG TAG_STRING TAG_PROG
%%
\< {
tag_caller = YY_START;
BEGIN (TAG);
printf ("%s<", ANSI_GREEN);
}
[a-zA-Z]* {
lcbuffer = alloca (strlen (yytext) + 1);
printf ("%s", strlcase (yytext));
}
\<[\?\%] {
prog_char = yytext[1];
printf ("%s%s", ANSI_CYAN, yytext);
tag_prog_caller = YY_START;
BEGIN (TAG_PROG);
}
["'] {
printf ("%s%s", ANSI_RED, yytext);
tag_string_caller = YY_START;
string_char = yytext[0];
BEGIN (TAG_STRING);
}
\> {
printf (">%s", ANSI_RESET);
BEGIN (tag_caller);
}
[\?\%]\> {
if (yytext[0] != prog_char)
{ REJECT; }
else
{
printf ("%s%s", yytext, ANSI_RESET);
BEGIN (tag_prog_caller);
}
}
[^"']*["'] {
if (yytext[strlen (yytext) - 1] != string_char)
{ REJECT; }
else
{
printf ("%s%s", yytext, ANSI_GREEN);
BEGIN (tag_string_caller);
}
}
%%