MIA: Janitor: __TITLE__
#!/bin/sh
# Locate invalid characters in HTML mark-up.
# /www/public_html/admin/janitor/charset.sh
IN_FILE=$1
CHARSET_TEXT=/www/public_html/admin/janitor/charset.txt
CHARSET_LIST=/www/public_html/admin/janitor/charset.lst
if [ ! -f $CHARSET_TEXT ]; then
echo "File not found: $CHARSET_TEXT";
exit;
fi
# Use this .lst (list) file to locate invalid characters in mark-up:
od -t a $CHARSET_TEXT | fmt -w 3 | grep -v [0-9][0-9] | sort | uniq >$CHARSET_LIST
# Does input file exist?
if [ ! -f $IN_FILE ]; then
echo "File not found: $IN_FILE";
exit;
fi
# Locate invalid characters.
od -t a $IN_FILE | \
awk ' \
BEGIN { \
} \
NR==1 { \
while ( (getline line <CHARSET_LIST) > 0) { CHARSET[line]++; } \
} \
{ \
for (i=(1+1); i<=NF; i++) { \
char = $(i); \
if (! (char in CHARSET)) { \
i = NF+1; \
print IN_FILE ": " $0; \
} \
} \
} \
END { \
}' CHARSET_LIST=$CHARSET_LIST IN_FILE=$IN_FILE
###
#