MIA: Janitor: __TITLE__

#!/bin/sh

# Locate invalid characters in HTML mark-up.

# /www/public_html/admin/janitor/charset.sh

IN_FILE=$1
CHARSET_TEXT=/www/public_html/admin/janitor/charset.txt
CHARSET_LIST=/www/public_html/admin/janitor/charset.lst

if [ ! -f $CHARSET_TEXT ]; then
    echo "File not found: $CHARSET_TEXT";
    exit;
fi

# Use this .lst (list) file to locate invalid characters in mark-up:
od -t a $CHARSET_TEXT | fmt -w 3 | grep -v [0-9][0-9] | sort | uniq >$CHARSET_LIST


# Does input file exist?
if [ ! -f $IN_FILE ]; then
    echo "File not found: $IN_FILE";
    exit;
fi

# Locate invalid characters.
od -t a $IN_FILE | \
awk ' \
BEGIN { \
} \
NR==1 { \
    while ( (getline line <CHARSET_LIST) > 0) { CHARSET[line]++; } \
} \
{ \
    for (i=(1+1); i<=NF; i++) { \
	char = $(i); \
	if (! (char in CHARSET)) { \
	    i = NF+1; \
	    print IN_FILE ": " $0; \
	} \
    } \
} \
END { \
}' CHARSET_LIST=$CHARSET_LIST IN_FILE=$IN_FILE

###
#

Home: MIA Janitors

Comments: janitor@marxists.org