git pre commit hook to prevent non utf8 files

#!/bin/sh

# Redirect output to stderr.
exec 1>&2

result=0

for f in `git diff --cached --name-only`
do
    if [[ $OSTYPE = "darwin16" ]]
    then
    output="$(file -I $f)"
    else
    output="$(file -i $f)"
    fi

    if ! echo $output | grep -E "charset=(utf-8|us-ascii)"
    then
        echo "$output - should be utf8"
        result=1
    fi
done

exit $result

Example of pre commit hook that will prevent commits with non utf8 files

Notes:

  • git diff --cached --name-only - check only staged files
  • file -I and file -I - in macos file utility has different flag
  • us-ascii - file in ascii and containing only english letters is valid utf8 file