nix-on-droid-app/terminal-emulator/src/test/java/com/termux/terminal/UnicodeInputTest.java

package com.termux.terminal;

import java.io.UnsupportedEncodingException;

public class UnicodeInputTest extends TerminalTestCase {

	public void testIllFormedUtf8SuccessorByteNotConsumed() throws Exception {
		// The Unicode Standard Version 6.2 – Core Specification (http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf):
		// "If the converter encounters an ill-formed UTF-8 code unit sequence which starts with a valid first byte, but which does not
		// continue with valid successor bytes (see Table 3-7), it must not consume the successor bytes as part of the ill-formed
		// subsequence whenever those successor bytes themselves constitute part of a well-formed UTF-8 code unit subsequence."
		withTerminalSized(5, 5);
		mTerminal.append(new byte[]{(byte) 0b11101111, (byte) 'a'}, 2);
		assertLineIs(0, ((char) TerminalEmulator.UNICODE_REPLACEMENT_CHAR) + "a   ");

		// https://code.google.com/p/chromium/issues/detail?id=212704
		byte[] input = new byte[]{
				(byte) 0x61, (byte) 0xF1,
				(byte) 0x80, (byte) 0x80,
				(byte) 0xe1, (byte) 0x80,
				(byte) 0xc2, (byte) 0x62,
				(byte) 0x80, (byte) 0x63,
				(byte) 0x80, (byte) 0xbf,
				(byte) 0x64
		};
		withTerminalSized(10, 2);
		mTerminal.append(input, input.length);
		assertLinesAre("a\uFFFD\uFFFD\uFFFDb\uFFFDc\uFFFD\uFFFDd", "          ");

		// Surrogate pairs.
		withTerminalSized(5, 2);
		input = new byte[]{
				(byte) 0xed, (byte) 0xa0,
				(byte) 0x80, (byte) 0xed,
				(byte) 0xad, (byte) 0xbf,
				(byte) 0xed, (byte) 0xae,
				(byte) 0x80, (byte) 0xed,
				(byte) 0xbf, (byte) 0xbf
		};
		mTerminal.append(input, input.length);
		assertLinesAre("\uFFFD\uFFFD\uFFFD\uFFFD ", "     ");

		// https://bugzilla.mozilla.org/show_bug.cgi?id=746900: "with this patch 0xe0 0x80 is decoded as two U+FFFDs,
		// but 0xe0 0xa0 is decoded as a single U+FFFD, and this is correct according to the "Best Practices", but IE
		// and Chrome (Version 22.0.1229.94) decode both of them as two U+FFFDs. Opera 12.11 decodes both of them as
		// one U+FFFD".
		withTerminalSized(5, 2);
		input = new byte[]{(byte) 0xe0, (byte) 0xa0, ' '};
		mTerminal.append(input, input.length);
		assertLinesAre("\uFFFD    ", "     ");

		// withTerminalSized(5, 2);
		// input = new byte[]{(byte) 0xe0, (byte) 0x80, 'a'};
		// mTerminal.append(input, input.length);
		// assertLinesAre("\uFFFD\uFFFDa  ", "     ");
	}

	public void testUnassignedCodePoint() throws UnsupportedEncodingException {
		withTerminalSized(3, 3);
		// UTF-8 for U+C2541, an unassigned code point:
		byte[] b = new byte[]{(byte) 0xf3, (byte) 0x82, (byte) 0x95, (byte) 0x81};
		mTerminal.append(b, b.length);
		enterString("Y");
		assertEquals(1, Character.charCount(TerminalEmulator.UNICODE_REPLACEMENT_CHAR));
		assertLineStartsWith(0, TerminalEmulator.UNICODE_REPLACEMENT_CHAR, (int) 'Y', ' ');
	}

	public void testStuff() {
		withTerminalSized(80, 24);
		byte[] b = new byte[]{(byte) 0xf3, (byte) 0x82, (byte) 0x95, (byte) 0x81, (byte) 0x61, (byte) 0x38, (byte) 0xe7, (byte) 0x8f,
				(byte) 0xae, (byte) 0xc2, (byte) 0x9f, (byte) 0xe8, (byte) 0xa0, (byte) 0x9f, (byte) 0xe8, (byte) 0x8c, (byte) 0xa4,
				(byte) 0xed, (byte) 0x93, (byte) 0x89, (byte) 0xef, (byte) 0xbf, (byte) 0xbd, (byte) 0x42, (byte) 0xc2, (byte) 0x9b,
				(byte) 0xe6, (byte) 0x87, (byte) 0x89, (byte) 0x5a};
		mTerminal.append(b, b.length);
	}

	public void testSimpleCombining() throws Exception {
		withTerminalSized(3, 2).enterString(" a\u0302 ").assertLinesAre(" a\u0302 ", "   ");
	}

	public void testCombiningCharacterInFirstColumn() throws Exception {
		withTerminalSized(5, 3).enterString("test\r\nhi\r\n").assertLinesAre("test ", "hi   ", "     ");

		// U+0302 is COMBINING CIRCUMFLEX ACCENT. Test case from mosh (http://mosh.mit.edu/).
		withTerminalSized(5, 5).enterString("test\r\nabc\r\n\u0302\r\ndef\r\n");
		assertLinesAre("test ", "abc  ", " \u0302    ", "def  ", "     ");
	}

	public void testCombiningCharacterInLastColumn() throws Exception {
		withTerminalSized(3, 2).enterString("  a\u0302").assertLinesAre("  a\u0302", "   ");
		withTerminalSized(3, 2).enterString("  à̲").assertLinesAre("  à̲", "   ");
		withTerminalSized(3, 2).enterString("Aà̲F").assertLinesAre("Aà̲F", "   ");
	}

	public void testWideCharacterInLastColumn() throws Exception {
		withTerminalSized(3, 2).enterString("  枝\u0302").assertLinesAre("   ", "枝\u0302 ");

		withTerminalSized(3, 2).enterString(" 枝").assertLinesAre(" 枝", "   ").assertCursorAt(0, 2);
		enterString("a").assertLinesAre(" 枝", "a  ");
	}

	public void testWideCharacterDeletion() throws Exception {
		// CSI Ps D Cursor Backward Ps Times
		withTerminalSized(3, 2).enterString("枝\033[Da").assertLinesAre(" a ", "   ");
		withTerminalSized(3, 2).enterString("枝\033[2Da").assertLinesAre("a  ", "   ");
		withTerminalSized(3, 2).enterString("枝\033[2D枝").assertLinesAre("枝 ", "   ");
		withTerminalSized(3, 2).enterString("枝\033[1D枝").assertLinesAre(" 枝", "   ");
		withTerminalSized(5, 2).enterString(" 枝 \033[Da").assertLinesAre(" 枝a ", "     ");
		withTerminalSized(5, 2).enterString("a \033[D\u0302").assertLinesAre("a\u0302    ", "     ");
		withTerminalSized(5, 2).enterString("枝 \033[D\u0302").assertLinesAre("枝\u0302   ", "     ");
		enterString("Z").assertLinesAre("枝\u0302Z  ", "     ");
		enterString("\033[D ").assertLinesAre("枝\u0302   ", "     ");
		// Go back two columns, standing at the second half of the wide character:
		enterString("\033[2DU").assertLinesAre(" U   ", "     ");
	}

	public void testWideCharOverwriting() {
		withTerminalSized(3, 2).enterString("abc\033[3D枝").assertLinesAre("枝c", "   ");
	}

	public void testOverlongUtf8Encoding() throws Exception {
		// U+0020 should be encoded as 0x20, 0xc0 0xa0 is an overlong encoding
		// so should be replaced with the replacement char U+FFFD.
		withTerminalSized(5, 5).mTerminal.append(new byte[]{(byte) 0xc0, (byte) 0xa0, 'Y'}, 3);
		assertLineIs(0, "\uFFFDY   ");
	}

	public void testWideCharacterWithoutWrapping() throws Exception {
		// With wraparound disabled. The behaviour when a wide character is output with cursor in
		// the last column when autowrap is disabled is not obvious, but we expect the wide
		// character to be ignored here.
		withTerminalSized(3, 3).enterString("\033[?7l").enterString("枝枝枝").assertLinesAre("枝 ", "   ", "   ");
		enterString("a枝").assertLinesAre("枝a", "   ", "   ");
	}

}