#! /bin/bash
# FS QA Test No. 453
#
# Create a directory with multiple filenames that all appear the same
# (in unicode, anyway) but point to different inodes.  In theory all
# Linux filesystems should allow this (filenames are a sequence of
# arbitrary bytes) even if the user implications are horrifying.
#
#-----------------------------------------------------------------------
# Copyright (c) 2017, Oracle and/or its affiliates.  All Rights Reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#-----------------------------------------------------------------------

seq=`basename "$0"`
seqres="$RESULT_DIR/$seq"
echo "QA output created by $seq"

here=`pwd`
tmp=/tmp/$$
status=1    # failure is the default!
trap "_cleanup; exit \$status" 0 1 2 3 15

_cleanup()
{
	rm -f $tmp.*
}

# get standard environment, filters and checks
. ./common/rc

_supported_os Linux
_require_scratch

echo "Format and mount"
_scratch_mkfs > $seqres.full 2>&1
_scratch_mount >> $seqres.full 2>&1

testdir="${SCRATCH_MNT}/test-${seq}"
mkdir $testdir

hexbytes() {
	echo -n "$1" | od -tx1 -w99999 | head -n1 | sed -e 's/^0* //g'
}

setf() {
	key="$(echo -e "$1")"
	value="$2"

	echo "${value}" > "${testdir}/${key}"
	echo "Storing ${key} ($(hexbytes "${key}")) -> ${value}" >> $seqres.full
}

testf() {
	key="$(echo -e "$1")"
	value="$2"
	fname="${testdir}/${key}"

	echo "Testing ${key} ($(hexbytes "${key}")) -> ${value}" >> $seqres.full

	if [ ! -e "${fname}" ]; then
		echo "Key ${key} does not exist for ${value} test??"
		return
	fi

	actual_value="$(cat "${fname}")"
	if [ "${actual_value}" != "${value}" ]; then
		echo "Key ${key} has value ${value}, expected ${actual_value}."
	fi
}

filter_scrub() {
	grep 'Unicode' | sed -e 's/^.*Duplicate/Duplicate/g'
}

echo "Create files"
# These two render the same
setf "french_caf\xc3\xa9.txt" "NFC"
setf "french_cafe\xcc\x81.txt" "NFD"

# These two may have different widths
setf "chinese_\xef\xbd\xb6.txt" "NFKC1"
setf "chinese_\xe3\x82\xab.txt" "NFKC2"

# Same point, different byte representations in NFC/NFD/NFKC/NFKD
setf "greek_\xcf\x93.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFC"
setf "greek_\xcf\x92\xcc\x81.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFD"
setf "greek_\xce\x8e.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFKC"
setf "greek_\xce\xa5\xcc\x81.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFKD"

# Arabic code point can expand into a muuuch longer series
setf "arabic_\xef\xb7\xba.txt" "ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM, NFC"
setf "arabic_\xd8\xb5\xd9\x84\xd9\x89\x20\xd8\xa7\xd9\x84\xd9\x84\xd9\x87\x20\xd8\xb9\xd9\x84\xd9\x8a\xd9\x87\x20\xd9\x88\xd8\xb3\xd9\x84\xd9\x85.txt" "ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM, NFKC"

# Fake slash?
setf "urk\xc0\xafmoo" "FAKESLASH"

# Emoji: octopus butterfly owl giraffe
setf "emoji_\xf0\x9f\xa6\x91\xf0\x9f\xa6\x8b\xf0\x9f\xa6\x89\xf0\x9f\xa6\x92.txt" "octopus butterfly owl giraffe emoji"

# Line draw characters, because why not?
setf "\x6c\x69\x6e\x65\x64\x72\x61\x77\x5f\x0a\xe2\x95\x94\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x97\x0a\xe2\x95\x91\x20\x6d\x65\x74\x61\x74\x61\x62\x6c\x65\x20\xe2\x95\x91\x0a\xe2\x95\x9f\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x95\xa2\x0a\xe2\x95\x91\x20\x5f\x5f\x69\x6e\x64\x65\x78\x20\x20\x20\xe2\x95\x91\x0a\xe2\x95\x9a\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x9d\x0a.txt" "ugly box because we can"

ls -la $testdir >> $seqres.full

echo "Test files"
testf "french_caf\xc3\xa9.txt" "NFC"
testf "french_cafe\xcc\x81.txt" "NFD"

testf "chinese_\xef\xbd\xb6.txt" "NFKC1"
testf "chinese_\xe3\x82\xab.txt" "NFKC2"

testf "greek_\xcf\x93.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFC"
testf "greek_\xcf\x92\xcc\x81.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFD"
testf "greek_\xce\x8e.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFKC"
testf "greek_\xce\xa5\xcc\x81.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFKD"

testf "arabic_\xef\xb7\xba.txt" "ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM, NFC"
testf "arabic_\xd8\xb5\xd9\x84\xd9\x89\x20\xd8\xa7\xd9\x84\xd9\x84\xd9\x87\x20\xd8\xb9\xd9\x84\xd9\x8a\xd9\x87\x20\xd9\x88\xd8\xb3\xd9\x84\xd9\x85.txt" "ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM, NFKC"

testf "urk\xc0\xafmoo" "FAKESLASH"

testf "emoji_\xf0\x9f\xa6\x91\xf0\x9f\xa6\x8b\xf0\x9f\xa6\x89\xf0\x9f\xa6\x92.txt" "octopus butterfly owl giraffe emoji"

testf "\x6c\x69\x6e\x65\x64\x72\x61\x77\x5f\x0a\xe2\x95\x94\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x97\x0a\xe2\x95\x91\x20\x6d\x65\x74\x61\x74\x61\x62\x6c\x65\x20\xe2\x95\x91\x0a\xe2\x95\x9f\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x95\xa2\x0a\xe2\x95\x91\x20\x5f\x5f\x69\x6e\x64\x65\x78\x20\x20\x20\xe2\x95\x91\x0a\xe2\x95\x9a\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x9d\x0a.txt" "ugly box because we can"

echo "Uniqueness of inodes?"
stat -c '%i' "${testdir}/"* | sort | uniq -c | while read nr inum; do
	if [ "${nr}" -gt 1 ]; then
		echo "${nr} ${inum}"
	fi
done

echo "Test XFS online scrub, if applicable"

# Only run this on xfs if xfs_scrub is available and has the unicode checker
check_xfs_scrub() {
	[ "$FSTYP" == "xfs" ] || return 1
	_supports_xfs_scrub "$SCRATCH_MNT" "$SCRATCH_DEV" || return 1

	# We only care if xfs_scrub has unicode string support...
	if ! type ldd > /dev/null 2>&1 || \
	   ! ldd "${XFS_SCRUB_PROG}" | grep -q libunistring; then
		return 1
	fi

	return 0
}

if check_xfs_scrub; then
	output="$(LC_ALL="C.UTF-8" ${XFS_SCRUB_PROG} -n "${SCRATCH_MNT}" 2>&1 | filter_scrub)"
	echo "${output}" | grep -q "french_" || echo "No complaints about french e accent?"
	echo "${output}" | grep -q "chinese_" || echo "No complaints about chinese width-different?"
	echo "${output}" | grep -q "greek_" || echo "No complaints about greek letter mess?"
	echo "${output}" | grep -q "arabic_" || echo "No complaints about arabic expanded string?"
	echo "Actual xfs_scrub output:" >> $seqres.full
	echo "${output}" >> $seqres.full
fi

# success, all done
status=0
exit
