From patchwork Thu Sep 28 19:38:11 2023
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: Dimitrij Mijoski <dmjpp@hotmail.com>
X-Patchwork-Id: 146269
Return-Path: <gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org>
Delivered-To: ouuuleilei@gmail.com
Received: by 2002:a59:cae8:0:b0:403:3b70:6f57 with SMTP id r8csp3553379vqu;
        Thu, 28 Sep 2023 12:39:05 -0700 (PDT)
X-Google-Smtp-Source: 
 AGHT+IFSmFHSKfmpEOcKRcYT0XY1ucUujiq/9CsGRYZV01ZcW5ZVuToWoZHWXGaoLENhKG2U65O2
X-Received: by 2002:a05:6402:b37:b0:522:2dcc:afb6 with SMTP id
 bo23-20020a0564020b3700b005222dccafb6mr1993606edb.7.1695929944868;
        Thu, 28 Sep 2023 12:39:04 -0700 (PDT)
ARC-Seal: i=2; a=rsa-sha256; t=1695929944; cv=pass;
        d=google.com; s=arc-20160816;
        b=h/yZ3NiCGn7Buu7E0ISc3Q91SBbhuaba0Yf4niPQBRyB6Oks41iydqaFQP5TLzl3KR
         hAEnYpdPF2z0pYKEY1sfONTcb6i2kJZ4xrBtsoowkZPzCdLRHDHxs0TtB4ynvfZbsEY6
         OrLSxHsLxtWoIJR2ljOORnN0hSsw4IaZ2GIvJsIzh6JvKJ34yJ4lCzK9kretlakBmid6
         mf/5D7H+hqX/G1Ue3iM1AoSkxP0N5b3ge6JHj0SYER0xFxtSSFKoFZBfwGtugDlauNtn
         QxUs+aUM90q7HezqCR9fIbTPwd6mrJIm/XWA+4GUblMEHz1wLi5XuNARpFvrRDKU6sJT
         KTrQ==
ARC-Message-Signature: i=2; a=rsa-sha256; c=relaxed/relaxed; d=google.com;
 s=arc-20160816;
        h=errors-to:list-subscribe:list-help:list-post:list-archive
         :list-unsubscribe:list-id:precedence:mime-version:user-agent
         :content-transfer-encoding:date:to:from:subject:message-id
         :dkim-signature:dmarc-filter:delivered-to;
        bh=n+wcRNGtfaNfTKh2Ow2aA2LVyRVMp3IHN1ab144EJrQ=;
        fh=+uLa3m5dEsZWyE738VC4UtcvjxY1kYREGLuytVVwtpk=;
        b=oeEIgeP68wKjTVpKFxW7Z3CX7RLe6XsZMiIX6K1n/G4iRqdRzTmF86GYVAfjuJ2S3T
         sJ/xPn4S7D67SQ3NCl7MCg08Ujg3c8AIr6ZCbyt/3g2cSJhseavdecZ5esHJ1QWZUBVV
         gDhR2TvOy1bePB10M0WSmV22M6PxvC7/SPFoCIs/71vgrotBeqhJ8dWxhghsVVRG22+t
         PRxBigyplM9Nu+sjKJe0zKUSgqPZApRnAwfOgxeIpJy80bUdOV1j/7v7f2E/9axI7nJs
         XSiTU8mQ11j30nA3lwUWniF5psbmz1kEjqOQBONz4O55SBqFd0u5PyN1m24ZldqpKLij
         Q+Iw==
ARC-Authentication-Results: i=2; mx.google.com;
       dkim=pass header.i=@hotmail.com header.s=selector1 header.b="na/Jk1gl";
       arc=pass (i=1);
       spf=pass (google.com: domain of
 gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org designates 8.43.85.97 as
 permitted sender)
 smtp.mailfrom="gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org";
       dmarc=pass (p=NONE sp=NONE dis=NONE) header.from=hotmail.com
Received: from server2.sourceware.org (ip-8-43-85-97.sourceware.org.
 [8.43.85.97])
        by mx.google.com with ESMTPS id
 e22-20020a056402089600b00530b8e50ad8si3878974edy.338.2023.09.28.12.39.04
        for <ouuuleilei@gmail.com>
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Thu, 28 Sep 2023 12:39:04 -0700 (PDT)
Received-SPF: pass (google.com: domain of
 gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org designates 8.43.85.97 as
 permitted sender) client-ip=8.43.85.97;
Authentication-Results: mx.google.com;
       dkim=pass header.i=@hotmail.com header.s=selector1 header.b="na/Jk1gl";
       arc=pass (i=1);
       spf=pass (google.com: domain of
 gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org designates 8.43.85.97 as
 permitted sender)
 smtp.mailfrom="gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org";
       dmarc=pass (p=NONE sp=NONE dis=NONE) header.from=hotmail.com
Received: from server2.sourceware.org (localhost [IPv6:::1])
	by sourceware.org (Postfix) with ESMTP id 5C5EE3861937
	for <ouuuleilei@gmail.com>; Thu, 28 Sep 2023 19:39:03 +0000 (GMT)
X-Original-To: gcc-patches@gcc.gnu.org
Delivered-To: gcc-patches@gcc.gnu.org
Received: from EUR02-VI1-obe.outbound.protection.outlook.com
 (mail-vi1eur02olkn2093.outbound.protection.outlook.com [40.92.48.93])
 by sourceware.org (Postfix) with ESMTPS id 30BEB386180B;
 Thu, 28 Sep 2023 19:38:18 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 30BEB386180B
Authentication-Results: sourceware.org;
 dmarc=pass (p=none dis=none) header.from=hotmail.com
Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=hotmail.com
ARC-Seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none;
 b=QEIA33EmcP2NlRfKeP+ofX2hj6G8XHgQq/iZG5umKKBrL8ahkCsxP3YBrfPMD8W9SrG+rJdtC+UWJpqjsuBuFSAOkmp/hYyYsRip/4UayVZ75OMo7H15trh4FLCxCJZjwkPWRnCEbT9fNW9Xy2SkUthNd8+qA5M8EXwOiU660fYCkiVrhMH2LfS26FqO5DEPQrZT+HcY0WX+86tkdFvLwttsVldW4s4zW2ZyI9djHXATMOnhbc9+aIAwgbnOqoGu2CkNlQGcN8t4+xpX50absgbqtKJx308poJIR1WZgHZB2hmK+FUeqYHcbfENrjbtq+QQu6jXeejcv5cHeYOFO7A==
ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com;
 s=arcselector9901;
 h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1;
 bh=n+wcRNGtfaNfTKh2Ow2aA2LVyRVMp3IHN1ab144EJrQ=;
 b=WjdXTmAdqGvSWBeyPRl3eydclFkGva1czhWiMSGG2oO7YpqUgb7Kn1ZJH52CGgdDqNL9D+d3X2A+EGV7CM4sOrdUiaaQpuXZ8KZuIOh/hcXekCFOKrLf4abcCQ5ehstF1BydGm5/ih6oe3LUQ1CuoBxSr4FZwbc2ltruAMM6gDCUfhDgVgLbCXEnkrAWMgQ77JWfa8O/rXa+5WHJtwE+WYI4luEE0VvynTQ67WJyVvH8jQrdkgPZSgIDTXM8w1TE63bBTDbjT9uys8nJ3kixKln1WrsjprVKM/WG5zZhQX86JEWFEESLEjaO/9ytvQgksjTRbMtegPNk9NpGYGqRjw==
ARC-Authentication-Results: i=1; mx.microsoft.com 1; spf=none; dmarc=none;
 dkim=none; arc=none
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=hotmail.com;
 s=selector1;
 h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck;
 bh=n+wcRNGtfaNfTKh2Ow2aA2LVyRVMp3IHN1ab144EJrQ=;
 b=na/Jk1glCyjElElI7yG+UNhsKmemV1W+JgGI6AIv9JtICCWVyKsFzXoxAb+AqGTDqwwzes2agEtoxeB/+OKNeCVM2nsTr+7V7hvJIvARSKONViO+NbZ1BHMfmCTiHFaMdinqPTeatVH0H5RwDAtkiEaJy3qljdjdgEUln21nAfvRmehq0URY5ETq6kKpRMalI4X3RyHDx+sSLY7wJXpg/x++DwdEhEywYb0X+f4lrkwY+zH3UaAcfEr8rV/Qp4aZjk9w3ZdOg4FWPdl1o02Bc1LMTfw5dN3utcH40MuEsXlVg3m6lxowUFtqeY2ZDoRZIqsn5x+FIT8E6mtc4/FPJg==
Received: from AS1P192MB1620.EURP192.PROD.OUTLOOK.COM (2603:10a6:20b:4a0::7)
 by VI1P192MB2214.EURP192.PROD.OUTLOOK.COM (2603:10a6:800:1c4::8) with
 Microsoft SMTP Server (version=TLS1_2,
 cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.6813.28; Thu, 28 Sep
 2023 19:38:14 +0000
Received: from AS1P192MB1620.EURP192.PROD.OUTLOOK.COM
 ([fe80::f035:e341:1267:15e8]) by AS1P192MB1620.EURP192.PROD.OUTLOOK.COM
 ([fe80::f035:e341:1267:15e8%4]) with mapi id 15.20.6838.016; Thu, 28 Sep 2023
 19:38:14 +0000
Message-ID: 
 <AS1P192MB1620C0581FE17DC9EA0FA6ACACC1A@AS1P192MB1620.EURP192.PROD.OUTLOOK.COM>
Subject: [PATCH v3] libstdc++: Fix handling of surrogate CP in codecvt
 [PR108976]
From: Dimitrij Mijoski <dmjpp@hotmail.com>
To: gcc-patches@gcc.gnu.org, libstdc++@gcc.gnu.org
Date: Thu, 28 Sep 2023 21:38:11 +0200
User-Agent: Evolution 3.44.4-0ubuntu2 
X-TMN: [D2/yJ8TSLZpLJ4k8tmKjrdzAIn5OpY4v]
X-ClientProxiedBy: VI1PR06CA0171.eurprd06.prod.outlook.com
 (2603:10a6:803:c8::28) To AS1P192MB1620.EURP192.PROD.OUTLOOK.COM
 (2603:10a6:20b:4a0::7)
X-Microsoft-Original-Message-ID: 
 <fc7a0896db21e1d3e4930b0764a867ae4734ce43.camel@hotmail.com>
MIME-Version: 1.0
X-MS-Exchange-MessageSentRepresentingType: 1
X-MS-PublicTrafficType: Email
X-MS-TrafficTypeDiagnostic: AS1P192MB1620:EE_|VI1P192MB2214:EE_
X-MS-Office365-Filtering-Correlation-Id: cbc4a6bf-b524-4103-8826-08dbc05a749a
X-Microsoft-Antispam: BCL:0;
X-Microsoft-Antispam-Message-Info: 
 MHJOXtGzg0m0xK/7JyiEzhTnCHunRxtNgbTHD70M62iadgNkXpGckFHH6WIV1R0pGpcaHrsOoWaaaTYLvYl+M4iPGFZYpTJL/gTtOLPu4KMmSs5Hn3g0JGyVdcSOiiSYNX9GyvOGbbRjR8ZEzbCRxq5nDwAQuzWyMftg1XZH1rHCrqVUDL9orWasSqKQbl1j3rBCxR7kogngaqvNrH4zFpGk5ODjN6Stni99zstE8NmsuPXMqAzyE6WR9ZGlTPxnGMbSl+1qGKX+byAMyLloWG5ig+l5MVSMT4lMqWj1k4bk6v9dYHUc3VkPt0thflrBcZ06AVPJhTjcqAshE/As8u4K2f4TDE1wkOViQJeQrFgT9X4V9oYQ/vTe2iXIgnOFka+VPYuYlUCvTGXN9zTGgdnqyjzx86XX8b8W/UVDw7SyiP6R24XdiLrFU9pgB4zE/JDrkbxFl5rBNsXFL+u2+W2cINkgpqDWh7qhgktxulOqVuQYtV3eirIOVdRzvd3gyBYKMi7QPD1eCv9II7HzOviO2EAtEET9bQOJ55Jk0NwtgVIKccvgd/8C5yPh8Pqubj2g1TDG9o48fbRDQviHXcaIIO3iEUEjom1w3mB6UDo=
X-MS-Exchange-AntiSpam-MessageData-ChunkCount: 1
X-MS-Exchange-AntiSpam-MessageData-0: =?utf-8?q?VUR1gEReqTwhiQ4viTWSF24cOzIv?=
	=?utf-8?q?bhQlF2T4QGJEoub6eO9UpDDDIfRAbUmaENtS72jJ7mLukxqrpM6elPDt+2/qd+V7b?=
	=?utf-8?q?QO0MOMCJSrnY9y5yjgIQdbmItAQkDo8KS6hBc9/k0Rw6s8wj+fD6vGI8kTbg07ESk?=
	=?utf-8?q?pTD2RU69mhDW8i1/Ui5idPyl67NUEDdLpRyzdu0N0arb6lrjGdxwIFsfJBDQRaL4n?=
	=?utf-8?q?wWgxvxX2+Bb3xxH0QqNdPBeALA7RWE/WT3PzShUZfdIXVMhIxk0U19Q5lhf53fpLu?=
	=?utf-8?q?s93nm2NHSgsObqEDN9r/5PilKF1s767/9tV2bPOXiLI0T0fAEc9di+5cOw4i4TIYe?=
	=?utf-8?q?F3TecsIo5h+su6f245ipjsbXm6sVm12kMwHeMP/cSM6C+IIImBF3VmgDtXmXTstvI?=
	=?utf-8?q?pM/rAOY1kNgP/J+S7pjLA0YvwtQNFfI4mIqhFywgas6fV8f2A8iPcYBydqZFjAtZn?=
	=?utf-8?q?ubj+RZGjh8yp2CWD/8avvjlPA0PXsqAmhSSW2Hv/XOWNcDZeoOWFwzClSxuB/2NmM?=
	=?utf-8?q?MuOJskcYVRvu7W0dEfu9sa0dGeMXgbV1QnA7jczLfVOr9iMEttNvc9d4CauMEf6mN?=
	=?utf-8?q?EKfDsoiQYAesDk4ToWDCRnKcd9mIjKj/VPjRtF/Aw894Cmf2mZ7EUymh5s7Vg52eE?=
	=?utf-8?q?tHPDMmaais5OuzpsHKTv3QcmT1EhHr7TbRkht8eLg0NyLmxiXIvFBMH704MwJjMr5?=
	=?utf-8?q?TThQLHJdZ8Sl5WL90Ape8a42SAj0wbAd6QSXFybbirgDKgZqBVoK8kJmCUQ3gw5Gs?=
	=?utf-8?q?QMvT72Uap7/HXyUETvH92Xi2Ac/0mHzvJml/5TSc02XwkJR70eYiNMLwTgCfFNMHz?=
	=?utf-8?q?B9wPf9j9yW6M8mpa4aLY2j99VPvNmfZKrZKdeoodLshOzjNkgz4/rBHlete7qwqwd?=
	=?utf-8?q?LNlF9MiOBrQi3maPkOothe8YfNw3EJFbGHsnXSbFR279VhVeQ4Wqfn0c4I4fdqeT6?=
	=?utf-8?q?DCrrF9WJGMKPzIIIqvQHzGp4r4T9XcNhJrNDbWSBZWCYNeQ642RAD9Gg5Qbs6Nx0/?=
	=?utf-8?q?wpzdUrw6gzVbG8XcemBgaEnZJaODcDYUT6R4hj0MX0aEbSrYX4cPwdeFIznrvLvqz?=
	=?utf-8?q?dfqJRiGaYrQJ7wvnrdUCNA/QNjdoeG+jhN+Tdn9r44JKXJRVVpsrZxUgl4dro2b1i?=
	=?utf-8?q?CMYoQGOT6s5oZt+UnLUv41Ht85xEQCUWtRkRyG9A=3D=3D?=
X-OriginatorOrg: sct-15-20-4755-11-msonline-outlook-fb43a.templateTenant
X-MS-Exchange-CrossTenant-Network-Message-Id: 
 cbc4a6bf-b524-4103-8826-08dbc05a749a
X-MS-Exchange-CrossTenant-AuthSource: AS1P192MB1620.EURP192.PROD.OUTLOOK.COM
X-MS-Exchange-CrossTenant-AuthAs: Internal
X-MS-Exchange-CrossTenant-OriginalArrivalTime: 28 Sep 2023 19:38:14.5489 (UTC)
X-MS-Exchange-CrossTenant-FromEntityHeader: Hosted
X-MS-Exchange-CrossTenant-Id: 84df9e7f-e9f6-40af-b435-aaaaaaaaaaaa
X-MS-Exchange-CrossTenant-RMS-PersistedConsumerOrg: 
 00000000-0000-0000-0000-000000000000
X-MS-Exchange-Transport-CrossTenantHeadersStamped: VI1P192MB2214
X-Spam-Status: No, score=-9.5 required=5.0 tests=BAYES_00, DKIM_SIGNED,
 DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0,
 HK_RANDOM_ENVFROM, HK_RANDOM_FROM, RCVD_IN_DNSWL_NONE, RCVD_IN_MSPIKE_H2,
 SCC_5_SHORT_WORD_LINES, SPF_HELO_PASS, SPF_PASS,
 TXREP autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on
 server2.sourceware.org
X-BeenThere: gcc-patches@gcc.gnu.org
X-Mailman-Version: 2.1.30
Precedence: list
List-Id: Gcc-patches mailing list <gcc-patches.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>
Errors-To: gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org
X-getmail-retrieved-from-mailbox: INBOX
X-GMAIL-THRID: 1778311438078987801
X-GMAIL-MSGID: 1778311438078987801

This patch fixes the handling of surrogate code points in all standard
facets for transcoding Unicode that are based on std::codecvt. Surrogate
code points should always be treated as error. On the other hand
surrogate code units can only appear in UTF-16 and only when they come
in a proper pair.

Additionally, it fixes a bug in std::codecvt_utf16::in() when odd number
of bytes were given in the range [from, from_end), error was returned
always. The last byte in such range does not form a full UTF-16 code
unit and we can not make any decisions for error, instead partial should
be returned.

The testsuite for testing these facets was updated in the following
order:

1. All functions that test codecvts that work with UTF-8 were refactored
   and made more generic so they accept codecvt that works with the char
   type char8_t.
2. The same functions were updated with new test cases for transcoding
   errors and now additionally test for surrogates, overlong UTF-8
   sequences, code points out of the Unicode range, and more tests for
   missing leading and trailing code units.
3. New tests were added to test codecvt_utf16 in both of its variants,
   UTF-16 <-> UTF-32/UCS-4 and UTF-16 <-> UCS-2.

libstdc++-v3/ChangeLog:

	* src/c++11/codecvt.cc (read_utf8_code_point): Fix handing of
	surrogates in UTF-8.
	(ucs4_out): Fix handling of surrogates in UCS-4 -> UTF-8.
	(ucs4_in): Fix handling of range with odd number of bytes.
	(ucs4_out): Fix handling of surrogates in UCS-4 -> UTF-16.
	(ucs2_out): Fix handling of surrogates in UCS-2 -> UTF-16.
	(ucs2_in): Fix handling of range with odd number of bytes.
	(__codecvt_utf16_base<char16_t>::do_in): Likewise.
	(__codecvt_utf16_base<char32_t>::do_in): Likewise.
	(__codecvt_utf16_base<wchar_t>::do_in): Likewise.
	* testsuite/22_locale/codecvt/codecvt_unicode.cc: Renames, add
	tests for codecvt_utf16<char16_t> and codecvt_utf16<char32_t>.
	* testsuite/22_locale/codecvt/codecvt_unicode.h: Refactor UTF-8
	testing functions for char8_t, add more test cases for errors,
	add testing functions for codecvt_utf16.
	* testsuite/22_locale/codecvt/codecvt_unicode_wchar_t.cc:
	Renames, add tests for codecvt_utf16<whchar_t>.
	* testsuite/22_locale/codecvt/codecvt_utf16/79980.cc (test06):
	Fix test.
	* testsuite/22_locale/codecvt/codecvt_unicode_char8_t.cc: New test.
---
 libstdc++-v3/src/c++11/codecvt.cc             |   18 +-
 .../22_locale/codecvt/codecvt_unicode.cc      |   38 +-
 .../22_locale/codecvt/codecvt_unicode.h       | 1799 +++++++++++++----
 .../codecvt/codecvt_unicode_char8_t.cc        |   53 +
 .../codecvt/codecvt_unicode_wchar_t.cc        |   32 +-
 .../22_locale/codecvt/codecvt_utf16/79980.cc  |    2 +-
 6 files changed, 1493 insertions(+), 449 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode_char8_t.cc

diff --git a/libstdc++-v3/src/c++11/codecvt.cc b/libstdc++-v3/src/c++11/codecvt.cc
index 02f05752d..2cc812cfc 100644
--- a/libstdc++-v3/src/c++11/codecvt.cc
+++ b/libstdc++-v3/src/c++11/codecvt.cc
@@ -284,6 +284,8 @@ namespace
 	return invalid_mb_sequence;
       if (c1 == 0xE0 && c2 < 0xA0) [[unlikely]] // overlong
 	return invalid_mb_sequence;
+      if (c1 == 0xED && c2 >= 0xA0) [[unlikely]] // surrogate
+	return invalid_mb_sequence;
       if (avail < 3) [[unlikely]]
 	return incomplete_mb_character;
       char32_t c3 = (unsigned char) from[2];
@@ -484,6 +486,8 @@ namespace
     while (from.size())
       {
 	const char32_t c = from[0];
+	if (0xD800 <= c && c <= 0xDFFF) [[unlikely]]
+	  return codecvt_base::error;
 	if (c > maxcode) [[unlikely]]
 	  return codecvt_base::error;
 	if (!write_utf8_code_point(to, c)) [[unlikely]]
@@ -508,7 +512,7 @@ namespace
 	  return codecvt_base::error;
 	to = codepoint;
       }
-    return from.size() ? codecvt_base::partial : codecvt_base::ok;
+    return from.nbytes() ? codecvt_base::partial : codecvt_base::ok;
   }
 
   // ucs4 -> utf16
@@ -521,6 +525,8 @@ namespace
     while (from.size())
       {
 	const char32_t c = from[0];
+	if (0xD800 <= c && c <= 0xDFFF) [[unlikely]]
+	  return codecvt_base::error;
 	if (c > maxcode) [[unlikely]]
 	  return codecvt_base::error;
 	if (!write_utf16_code_point(to, c, mode)) [[unlikely]]
@@ -653,7 +659,7 @@ namespace
     while (from.size() && to.size())
       {
 	char16_t c = from[0];
-	if (is_high_surrogate(c))
+	if (0xD800 <= c && c <= 0xDFFF)
 	  return codecvt_base::error;
 	if (c > maxcode)
 	  return codecvt_base::error;
@@ -680,7 +686,7 @@ namespace
 	  return codecvt_base::error;
 	to = c;
       }
-    return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
+    return from.nbytes() == 0 ? codecvt_base::ok : codecvt_base::partial;
   }
 
   const char16_t*
@@ -1344,8 +1350,6 @@ do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
   auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
   __from_next = reinterpret_cast<const char*>(from.next);
   __to_next = to.next;
-  if (res == codecvt_base::ok && __from_next != __from_end)
-    res = codecvt_base::error;
   return res;
 }
 
@@ -1419,8 +1423,6 @@ do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
   __from_next = reinterpret_cast<const char*>(from.next);
   __to_next = to.next;
-  if (res == codecvt_base::ok && __from_next != __from_end)
-    res = codecvt_base::error;
   return res;
 }
 
@@ -1521,8 +1523,6 @@ do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 #endif
   __from_next = reinterpret_cast<const char*>(from.next);
   __to_next = reinterpret_cast<wchar_t*>(to.next);
-  if (res == codecvt_base::ok && __from_next != __from_end)
-    res = codecvt_base::error;
   return res;
 }
 
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.cc
index df1a2b4cc..c563781ca 100644
--- a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.cc
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.cc
@@ -27,38 +27,58 @@ void
 test_utf8_utf32_codecvts ()
 {
   using codecvt_c32 = codecvt<char32_t, char, mbstate_t>;
-  auto loc_c = locale::classic ();
+  auto &loc_c = locale::classic ();
   VERIFY (has_facet<codecvt_c32> (loc_c));
 
   auto &cvt = use_facet<codecvt_c32> (loc_c);
-  test_utf8_utf32_codecvts (cvt);
+  test_utf8_utf32_cvt (cvt);
 
   codecvt_utf8<char32_t> cvt2;
-  test_utf8_utf32_codecvts (cvt2);
+  test_utf8_utf32_cvt (cvt2);
 }
 
 void
 test_utf8_utf16_codecvts ()
 {
   using codecvt_c16 = codecvt<char16_t, char, mbstate_t>;
-  auto loc_c = locale::classic ();
+  auto &loc_c = locale::classic ();
   VERIFY (has_facet<codecvt_c16> (loc_c));
 
   auto &cvt = use_facet<codecvt_c16> (loc_c);
-  test_utf8_utf16_cvts (cvt);
+  test_utf8_utf16_cvt (cvt);
 
   codecvt_utf8_utf16<char16_t> cvt2;
-  test_utf8_utf16_cvts (cvt2);
+  test_utf8_utf16_cvt (cvt2);
 
   codecvt_utf8_utf16<char32_t> cvt3;
-  test_utf8_utf16_cvts (cvt3);
+  test_utf8_utf16_cvt (cvt3);
 }
 
 void
 test_utf8_ucs2_codecvts ()
 {
   codecvt_utf8<char16_t> cvt;
-  test_utf8_ucs2_cvts (cvt);
+  test_utf8_ucs2_cvt (cvt);
+}
+
+void
+test_utf16_utf32_codecvts ()
+{
+  codecvt_utf16<char32_t> cvt;
+  test_utf16_utf32_cvt (cvt, utf16_big_endian);
+
+  codecvt_utf16<char32_t, 0x10FFFF, codecvt_mode::little_endian> cvt2;
+  test_utf16_utf32_cvt (cvt2, utf16_little_endian);
+}
+
+void
+test_utf16_ucs2_codecvts ()
+{
+  codecvt_utf16<char16_t> cvt;
+  test_utf16_ucs2_cvt (cvt, utf16_big_endian);
+
+  codecvt_utf16<char16_t, 0x10FFFF, codecvt_mode::little_endian> cvt2;
+  test_utf16_ucs2_cvt (cvt2, utf16_little_endian);
 }
 
 int
@@ -67,4 +87,6 @@ main ()
   test_utf8_utf32_codecvts ();
   test_utf8_utf16_codecvts ();
   test_utf8_ucs2_codecvts ();
+  test_utf16_utf32_codecvts ();
+  test_utf16_ucs2_codecvts ();
 }
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.h b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.h
index fbdc7a35b..d3ae42fac 100644
--- a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.h
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.h
@@ -42,33 +42,33 @@ auto constexpr array_size (const T (&)[N]) -> size_t
   return N;
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf8_to_utf32_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf8_to_utf32_in_ok (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
   // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
-  const char in[] = "bш\uAAAA\U0010AAAA";
-  const char32_t exp_literal[] = U"bш\uAAAA\U0010AAAA";
-  CharT exp[array_size (exp_literal)] = {};
-  std::copy (begin (exp_literal), end (exp_literal), begin (exp));
-
-  static_assert (array_size (in) == 11, "");
-  static_assert (array_size (exp_literal) == 5, "");
-  static_assert (array_size (exp) == 5, "");
-  VERIFY (char_traits<char>::length (in) == 10);
-  VERIFY (char_traits<char32_t>::length (exp_literal) == 4);
-  VERIFY (char_traits<CharT>::length (exp) == 4);
+  const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
+  const char32_t expected[] = U"b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 11, "");
+  static_assert (array_size (expected) == 5, "");
+
+  ExternT in[array_size (input)];
+  InternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<ExternT>::length (in) == 10);
+  VERIFY (char_traits<InternT>::length (exp) == 4);
 
   test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 4}};
   for (auto t : offsets)
     {
-      CharT out[array_size (exp) - 1] = {};
+      InternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       auto state = mbstate_t{};
-      auto in_next = (const char *) nullptr;
-      auto out_next = (CharT *) nullptr;
+      auto in_next = (const ExternT *) nullptr;
+      auto out_next = (InternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -76,19 +76,19 @@ utf8_to_utf32_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.ok);
       VERIFY (in_next == in + t.in_size);
       VERIFY (out_next == out + t.out_size);
-      VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
       if (t.out_size < array_size (out))
 	VERIFY (out[t.out_size] == 0);
     }
 
   for (auto t : offsets)
     {
-      CharT out[array_size (exp)] = {};
+      InternT out[array_size (exp)] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       auto state = mbstate_t{};
-      auto in_next = (const char *) nullptr;
-      auto out_next = (CharT *) nullptr;
+      auto in_next = (const ExternT *) nullptr;
+      auto out_next = (InternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res
@@ -96,29 +96,29 @@ utf8_to_utf32_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.ok);
       VERIFY (in_next == in + t.in_size);
       VERIFY (out_next == out + t.out_size);
-      VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
       if (t.out_size < array_size (out))
 	VERIFY (out[t.out_size] == 0);
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf8_to_utf32_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf8_to_utf32_in_partial (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
   // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
-  const char in[] = "bш\uAAAA\U0010AAAA";
-  const char32_t exp_literal[] = U"bш\uAAAA\U0010AAAA";
-  CharT exp[array_size (exp_literal)] = {};
-  std::copy (begin (exp_literal), end (exp_literal), begin (exp));
-
-  static_assert (array_size (in) == 11, "");
-  static_assert (array_size (exp_literal) == 5, "");
-  static_assert (array_size (exp) == 5, "");
-  VERIFY (char_traits<char>::length (in) == 10);
-  VERIFY (char_traits<char32_t>::length (exp_literal) == 4);
-  VERIFY (char_traits<CharT>::length (exp) == 4);
+  const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
+  const char32_t expected[] = U"b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 11, "");
+  static_assert (array_size (expected) == 5, "");
+
+  ExternT in[array_size (input)];
+  InternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<ExternT>::length (in) == 10);
+  VERIFY (char_traits<InternT>::length (exp) == 4);
 
   test_offsets_partial offsets[] = {
     {1, 0, 0, 0}, // no space for first CP
@@ -144,14 +144,14 @@ utf8_to_utf32_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
 
   for (auto t : offsets)
     {
-      CharT out[array_size (exp) - 1] = {};
+      InternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       VERIFY (t.expected_in_next <= t.in_size);
       VERIFY (t.expected_out_next <= t.out_size);
       auto state = mbstate_t{};
-      auto in_next = (const char *) nullptr;
-      auto out_next = (CharT *) nullptr;
+      auto in_next = (const ExternT *) nullptr;
+      auto out_next = (InternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -159,37 +159,58 @@ utf8_to_utf32_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.partial);
       VERIFY (in_next == in + t.expected_in_next);
       VERIFY (out_next == out + t.expected_out_next);
-      VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
       if (t.expected_out_next < array_size (out))
 	VERIFY (out[t.expected_out_next] == 0);
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf8_to_utf32_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf8_to_utf32_in_error (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
-  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
-  const char valid_in[] = "bш\uAAAA\U0010AAAA";
-  const char32_t exp_literal[] = U"bш\uAAAA\U0010AAAA";
-  CharT exp[array_size (exp_literal)] = {};
-  std::copy (begin (exp_literal), end (exp_literal), begin (exp));
+  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP, 4-byte CP
+  const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
+  const char32_t expected[] = U"b\u0448\uD700\U0010AAAA";
+  static_assert (array_size (input) == 11, "");
+  static_assert (array_size (expected) == 5, "");
+
+  ExternT in[array_size (input)];
+  InternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<ExternT>::length (in) == 10);
+  VERIFY (char_traits<InternT>::length (exp) == 4);
+
+  // There are 5 classes of errors in UTF-8 decoding
+  // 1. Missing leading byte
+  // 2. Missing trailing byte
+  // 3. Surrogate CP
+  // 4. Overlong sequence
+  // 5. CP out of Unicode range
+  test_offsets_error<unsigned char> offsets[] = {
+
+    // 1. Missing leading byte. We will replace the leading byte with
+    // non-leading byte, such as a byte that is always invalid or a trailing
+    // byte.
 
-  static_assert (array_size (valid_in) == 11, "");
-  static_assert (array_size (exp_literal) == 5, "");
-  static_assert (array_size (exp) == 5, "");
-  VERIFY (char_traits<char>::length (valid_in) == 10);
-  VERIFY (char_traits<char32_t>::length (exp_literal) == 4);
-  VERIFY (char_traits<CharT>::length (exp) == 4);
+    // replace leading byte with invalid byte
+    {1, 4, 0, 0, 0xFF, 0},
+    {3, 4, 1, 1, 0xFF, 1},
+    {6, 4, 3, 2, 0xFF, 3},
+    {10, 4, 6, 3, 0xFF, 6},
 
-  test_offsets_error<char> offsets[] = {
+    // replace leading byte with trailing byte
+    {1, 4, 0, 0, 0b10101010, 0},
+    {3, 4, 1, 1, 0b10101010, 1},
+    {6, 4, 3, 2, 0b10101010, 3},
+    {10, 4, 6, 3, 0b10101010, 6},
 
-    // replace leading byte with invalid byte
-    {1, 4, 0, 0, '\xFF', 0},
-    {3, 4, 1, 1, '\xFF', 1},
-    {6, 4, 3, 2, '\xFF', 3},
-    {10, 4, 6, 3, '\xFF', 6},
+    // 2. Missing trailing byte. We will replace the trailing byte with
+    // non-trailing byte, such as a byte that is always invalid or a leading
+    // byte (simple ASCII byte in our case).
 
     // replace first trailing byte with ASCII byte
     {3, 4, 1, 1, 'z', 2},
@@ -197,21 +218,27 @@ utf8_to_utf32_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
     {10, 4, 6, 3, 'z', 7},
 
     // replace first trailing byte with invalid byte
-    {3, 4, 1, 1, '\xFF', 2},
-    {6, 4, 3, 2, '\xFF', 4},
-    {10, 4, 6, 3, '\xFF', 7},
+    {3, 4, 1, 1, 0xFF, 2},
+    {6, 4, 3, 2, 0xFF, 4},
+    {10, 4, 6, 3, 0xFF, 7},
 
     // replace second trailing byte with ASCII byte
     {6, 4, 3, 2, 'z', 5},
     {10, 4, 6, 3, 'z', 8},
 
     // replace second trailing byte with invalid byte
-    {6, 4, 3, 2, '\xFF', 5},
-    {10, 4, 6, 3, '\xFF', 8},
+    {6, 4, 3, 2, 0xFF, 5},
+    {10, 4, 6, 3, 0xFF, 8},
 
     // replace third trailing byte
     {10, 4, 6, 3, 'z', 9},
-    {10, 4, 6, 3, '\xFF', 9},
+    {10, 4, 6, 3, 0xFF, 9},
+
+    // 2.1 The following test-cases raise doubt whether error or partial should
+    // be returned. For example, we have 4-byte sequence with valid leading
+    // byte. If we hide the last byte we need to return partial. But, if the
+    // second or third byte, which are visible to the call to codecvt, are
+    // malformed then error should be returned.
 
     // replace first trailing byte with ASCII byte, also incomplete at end
     {5, 4, 3, 2, 'z', 4},
@@ -219,30 +246,51 @@ utf8_to_utf32_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
     {9, 4, 6, 3, 'z', 7},
 
     // replace first trailing byte with invalid byte, also incomplete at end
-    {5, 4, 3, 2, '\xFF', 4},
-    {8, 4, 6, 3, '\xFF', 7},
-    {9, 4, 6, 3, '\xFF', 7},
+    {5, 4, 3, 2, 0xFF, 4},
+    {8, 4, 6, 3, 0xFF, 7},
+    {9, 4, 6, 3, 0xFF, 7},
 
     // replace second trailing byte with ASCII byte, also incomplete at end
     {9, 4, 6, 3, 'z', 8},
 
     // replace second trailing byte with invalid byte, also incomplete at end
-    {9, 4, 6, 3, '\xFF', 8},
+    {9, 4, 6, 3, 0xFF, 8},
+
+    // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
+    // CP U+D700
+    {6, 4, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
+    {6, 4, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
+    {6, 4, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
+    {6, 4, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00
+
+    // 4. Overlong sequence. The CPs in the input are chosen such as modifying
+    // just the leading byte is enough to make them overlong, i.e. for the
+    // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
+    // zeroes.
+    {3, 4, 1, 1, 0b11000000, 1},  // make the 2-byte CP overlong
+    {3, 4, 1, 1, 0b11000001, 1},  // make the 2-byte CP overlong
+    {6, 4, 3, 2, 0b11100000, 3},  // make the 3-byte CP overlong
+    {10, 4, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong
+
+    // 5. CP above range
+    // turn U+10AAAA into U+14AAAA by changing its leading byte
+    {10, 4, 6, 3, 0b11110101, 6},
+    // turn U+10AAAA into U+11AAAA by changing its 2nd byte
+    {10, 4, 6, 3, 0b10011010, 7},
   };
   for (auto t : offsets)
     {
-      char in[array_size (valid_in)] = {};
-      CharT out[array_size (exp) - 1] = {};
+      InternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       VERIFY (t.expected_in_next <= t.in_size);
       VERIFY (t.expected_out_next <= t.out_size);
-      char_traits<char>::copy (in, valid_in, array_size (valid_in));
+      auto old_char = in[t.replace_pos];
       in[t.replace_pos] = t.replace_char;
 
       auto state = mbstate_t{};
-      auto in_next = (const char *) nullptr;
-      auto out_next = (CharT *) nullptr;
+      auto in_next = (const ExternT *) nullptr;
+      auto out_next = (InternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -250,48 +298,51 @@ utf8_to_utf32_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.error);
       VERIFY (in_next == in + t.expected_in_next);
       VERIFY (out_next == out + t.expected_out_next);
-      VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
       if (t.expected_out_next < array_size (out))
 	VERIFY (out[t.expected_out_next] == 0);
+
+      in[t.replace_pos] = old_char;
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf8_to_utf32_in (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf8_to_utf32_in (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   utf8_to_utf32_in_ok (cvt);
   utf8_to_utf32_in_partial (cvt);
   utf8_to_utf32_in_error (cvt);
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf32_to_utf8_out_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf32_to_utf8_out_ok (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
   // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
-  const char32_t in_literal[] = U"bш\uAAAA\U0010AAAA";
-  const char exp[] = "bш\uAAAA\U0010AAAA";
-  CharT in[array_size (in_literal)] = {};
-  copy (begin (in_literal), end (in_literal), begin (in));
-
-  static_assert (array_size (in_literal) == 5, "");
-  static_assert (array_size (in) == 5, "");
-  static_assert (array_size (exp) == 11, "");
-  VERIFY (char_traits<char32_t>::length (in_literal) == 4);
-  VERIFY (char_traits<CharT>::length (in) == 4);
-  VERIFY (char_traits<char>::length (exp) == 10);
+  const char32_t input[] = U"b\u0448\uAAAA\U0010AAAA";
+  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 5, "");
+  static_assert (array_size (expected) == 11, "");
+
+  InternT in[array_size (input)];
+  ExternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<InternT>::length (in) == 4);
+  VERIFY (char_traits<ExternT>::length (exp) == 10);
 
   const test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {4, 10}};
   for (auto t : offsets)
     {
-      char out[array_size (exp) - 1] = {};
+      ExternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       auto state = mbstate_t{};
-      auto in_next = (const CharT *) nullptr;
-      auto out_next = (char *) nullptr;
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (ExternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -299,29 +350,29 @@ utf32_to_utf8_out_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.ok);
       VERIFY (in_next == in + t.in_size);
       VERIFY (out_next == out + t.out_size);
-      VERIFY (char_traits<char>::compare (out, exp, t.out_size) == 0);
+      VERIFY (char_traits<ExternT>::compare (out, exp, t.out_size) == 0);
       if (t.out_size < array_size (out))
 	VERIFY (out[t.out_size] == 0);
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf32_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf32_to_utf8_out_partial (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
   // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
-  const char32_t in_literal[] = U"bш\uAAAA\U0010AAAA";
-  const char exp[] = "bш\uAAAA\U0010AAAA";
-  CharT in[array_size (in_literal)] = {};
-  copy (begin (in_literal), end (in_literal), begin (in));
-
-  static_assert (array_size (in_literal) == 5, "");
-  static_assert (array_size (in) == 5, "");
-  static_assert (array_size (exp) == 11, "");
-  VERIFY (char_traits<char32_t>::length (in_literal) == 4);
-  VERIFY (char_traits<CharT>::length (in) == 4);
-  VERIFY (char_traits<char>::length (exp) == 10);
+  const char32_t input[] = U"b\u0448\uAAAA\U0010AAAA";
+  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 5, "");
+  static_assert (array_size (expected) == 11, "");
+
+  InternT in[array_size (input)];
+  ExternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<InternT>::length (in) == 4);
+  VERIFY (char_traits<ExternT>::length (exp) == 10);
 
   const test_offsets_partial offsets[] = {
     {1, 0, 0, 0}, // no space for first CP
@@ -340,14 +391,14 @@ utf32_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
   };
   for (auto t : offsets)
     {
-      char out[array_size (exp) - 1] = {};
+      ExternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       VERIFY (t.expected_in_next <= t.in_size);
       VERIFY (t.expected_out_next <= t.out_size);
       auto state = mbstate_t{};
-      auto in_next = (const CharT *) nullptr;
-      auto out_next = (char *) nullptr;
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (ExternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -355,44 +406,58 @@ utf32_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.partial);
       VERIFY (in_next == in + t.expected_in_next);
       VERIFY (out_next == out + t.expected_out_next);
-      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+      VERIFY (char_traits<ExternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
       if (t.expected_out_next < array_size (out))
 	VERIFY (out[t.expected_out_next] == 0);
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf32_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf32_to_utf8_out_error (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
-  const char32_t valid_in[] = U"bш\uAAAA\U0010AAAA";
-  const char exp[] = "bш\uAAAA\U0010AAAA";
-
-  static_assert (array_size (valid_in) == 5, "");
-  static_assert (array_size (exp) == 11, "");
-  VERIFY (char_traits<char32_t>::length (valid_in) == 4);
-  VERIFY (char_traits<char>::length (exp) == 10);
-
-  test_offsets_error<CharT> offsets[] = {{4, 10, 0, 0, 0x00110000, 0},
-					 {4, 10, 1, 1, 0x00110000, 1},
-					 {4, 10, 2, 3, 0x00110000, 2},
-					 {4, 10, 3, 6, 0x00110000, 3}};
+  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
+  const char32_t input[] = U"b\u0448\uAAAA\U0010AAAA";
+  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 5, "");
+  static_assert (array_size (expected) == 11, "");
+
+  InternT in[array_size (input)];
+  ExternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<InternT>::length (in) == 4);
+  VERIFY (char_traits<ExternT>::length (exp) == 10);
+
+  test_offsets_error<InternT> offsets[] = {
+
+    // Surrogate CP
+    {4, 10, 0, 0, 0xD800, 0},
+    {4, 10, 1, 1, 0xDBFF, 1},
+    {4, 10, 2, 3, 0xDC00, 2},
+    {4, 10, 3, 6, 0xDFFF, 3},
+
+    // CP out of range
+    {4, 10, 0, 0, 0x00110000, 0},
+    {4, 10, 1, 1, 0x00110000, 1},
+    {4, 10, 2, 3, 0x00110000, 2},
+    {4, 10, 3, 6, 0x00110000, 3}};
 
   for (auto t : offsets)
     {
-      CharT in[array_size (valid_in)] = {};
-      char out[array_size (exp) - 1] = {};
+      ExternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       VERIFY (t.expected_in_next <= t.in_size);
       VERIFY (t.expected_out_next <= t.out_size);
-      copy (begin (valid_in), end (valid_in), begin (in));
+      auto old_char = in[t.replace_pos];
       in[t.replace_pos] = t.replace_char;
 
       auto state = mbstate_t{};
-      auto in_next = (const CharT *) nullptr;
-      auto out_next = (char *) nullptr;
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (ExternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -400,56 +465,59 @@ utf32_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.error);
       VERIFY (in_next == in + t.expected_in_next);
       VERIFY (out_next == out + t.expected_out_next);
-      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+      VERIFY (char_traits<ExternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
       if (t.expected_out_next < array_size (out))
 	VERIFY (out[t.expected_out_next] == 0);
+
+      in[t.replace_pos] = old_char;
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf32_to_utf8_out (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf32_to_utf8_out (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   utf32_to_utf8_out_ok (cvt);
   utf32_to_utf8_out_partial (cvt);
   utf32_to_utf8_out_error (cvt);
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-test_utf8_utf32_codecvts (const std::codecvt<CharT, char, mbstate_t> &cvt)
+test_utf8_utf32_cvt (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   utf8_to_utf32_in (cvt);
   utf32_to_utf8_out (cvt);
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf8_to_utf16_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf8_to_utf16_in_ok (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
   // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
-  const char in[] = "bш\uAAAA\U0010AAAA";
-  const char16_t exp_literal[] = u"bш\uAAAA\U0010AAAA";
-  CharT exp[array_size (exp_literal)] = {};
-  copy (begin (exp_literal), end (exp_literal), begin (exp));
-
-  static_assert (array_size (in) == 11, "");
-  static_assert (array_size (exp_literal) == 6, "");
-  static_assert (array_size (exp) == 6, "");
-  VERIFY (char_traits<char>::length (in) == 10);
-  VERIFY (char_traits<char16_t>::length (exp_literal) == 5);
-  VERIFY (char_traits<CharT>::length (exp) == 5);
+  const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
+  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 11, "");
+  static_assert (array_size (expected) == 6, "");
+
+  ExternT in[array_size (input)];
+  InternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<ExternT>::length (in) == 10);
+  VERIFY (char_traits<InternT>::length (exp) == 5);
 
   test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 5}};
   for (auto t : offsets)
     {
-      CharT out[array_size (exp) - 1] = {};
+      InternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       auto state = mbstate_t{};
-      auto in_next = (const char *) nullptr;
-      auto out_next = (CharT *) nullptr;
+      auto in_next = (const ExternT *) nullptr;
+      auto out_next = (InternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -457,19 +525,19 @@ utf8_to_utf16_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.ok);
       VERIFY (in_next == in + t.in_size);
       VERIFY (out_next == out + t.out_size);
-      VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
       if (t.out_size < array_size (out))
 	VERIFY (out[t.out_size] == 0);
     }
 
   for (auto t : offsets)
     {
-      CharT out[array_size (exp)] = {};
+      InternT out[array_size (exp)] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       auto state = mbstate_t{};
-      auto in_next = (const char *) nullptr;
-      auto out_next = (CharT *) nullptr;
+      auto in_next = (const ExternT *) nullptr;
+      auto out_next = (InternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res
@@ -477,29 +545,29 @@ utf8_to_utf16_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.ok);
       VERIFY (in_next == in + t.in_size);
       VERIFY (out_next == out + t.out_size);
-      VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
       if (t.out_size < array_size (out))
 	VERIFY (out[t.out_size] == 0);
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf8_to_utf16_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf8_to_utf16_in_partial (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
   // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
-  const char in[] = "bш\uAAAA\U0010AAAA";
-  const char16_t exp_literal[] = u"bш\uAAAA\U0010AAAA";
-  CharT exp[array_size (exp_literal)] = {};
-  copy (begin (exp_literal), end (exp_literal), begin (exp));
-
-  static_assert (array_size (in) == 11, "");
-  static_assert (array_size (exp_literal) == 6, "");
-  static_assert (array_size (exp) == 6, "");
-  VERIFY (char_traits<char>::length (in) == 10);
-  VERIFY (char_traits<char16_t>::length (exp_literal) == 5);
-  VERIFY (char_traits<CharT>::length (exp) == 5);
+  const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
+  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 11, "");
+  static_assert (array_size (expected) == 6, "");
+
+  ExternT in[array_size (input)];
+  InternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<ExternT>::length (in) == 10);
+  VERIFY (char_traits<InternT>::length (exp) == 5);
 
   test_offsets_partial offsets[] = {
     {1, 0, 0, 0}, // no space for first CP
@@ -530,14 +598,14 @@ utf8_to_utf16_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
 
   for (auto t : offsets)
     {
-      CharT out[array_size (exp) - 1] = {};
+      InternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       VERIFY (t.expected_in_next <= t.in_size);
       VERIFY (t.expected_out_next <= t.out_size);
       auto state = mbstate_t{};
-      auto in_next = (const char *) nullptr;
-      auto out_next = (CharT *) nullptr;
+      auto in_next = (const ExternT *) nullptr;
+      auto out_next = (InternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -545,36 +613,58 @@ utf8_to_utf16_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.partial);
       VERIFY (in_next == in + t.expected_in_next);
       VERIFY (out_next == out + t.expected_out_next);
-      VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
       if (t.expected_out_next < array_size (out))
 	VERIFY (out[t.expected_out_next] == 0);
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf8_to_utf16_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf8_to_utf16_in_error (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
-  const char valid_in[] = "bш\uAAAA\U0010AAAA";
-  const char16_t exp_literal[] = u"bш\uAAAA\U0010AAAA";
-  CharT exp[array_size (exp_literal)] = {};
-  copy (begin (exp_literal), end (exp_literal), begin (exp));
+  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP, 4-byte CP
+  const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
+  const char16_t expected[] = u"b\u0448\uD700\U0010AAAA";
+  static_assert (array_size (input) == 11, "");
+  static_assert (array_size (expected) == 6, "");
+
+  ExternT in[array_size (input)];
+  InternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<ExternT>::length (in) == 10);
+  VERIFY (char_traits<InternT>::length (exp) == 5);
+
+  // There are 5 classes of errors in UTF-8 decoding
+  // 1. Missing leading byte
+  // 2. Missing trailing byte
+  // 3. Surrogate CP
+  // 4. Overlong sequence
+  // 5. CP out of Unicode range
+  test_offsets_error<unsigned char> offsets[] = {
+
+    // 1. Missing leading byte. We will replace the leading byte with
+    // non-leading byte, such as a byte that is always invalid or a trailing
+    // byte.
 
-  static_assert (array_size (valid_in) == 11, "");
-  static_assert (array_size (exp_literal) == 6, "");
-  static_assert (array_size (exp) == 6, "");
-  VERIFY (char_traits<char>::length (valid_in) == 10);
-  VERIFY (char_traits<char16_t>::length (exp_literal) == 5);
-  VERIFY (char_traits<CharT>::length (exp) == 5);
+    // replace leading byte with invalid byte
+    {1, 5, 0, 0, 0xFF, 0},
+    {3, 5, 1, 1, 0xFF, 1},
+    {6, 5, 3, 2, 0xFF, 3},
+    {10, 5, 6, 3, 0xFF, 6},
 
-  test_offsets_error<char> offsets[] = {
+    // replace leading byte with trailing byte
+    {1, 5, 0, 0, 0b10101010, 0},
+    {3, 5, 1, 1, 0b10101010, 1},
+    {6, 5, 3, 2, 0b10101010, 3},
+    {10, 5, 6, 3, 0b10101010, 6},
 
-    // replace leading byte with invalid byte
-    {1, 5, 0, 0, '\xFF', 0},
-    {3, 5, 1, 1, '\xFF', 1},
-    {6, 5, 3, 2, '\xFF', 3},
-    {10, 5, 6, 3, '\xFF', 6},
+    // 2. Missing trailing byte. We will replace the trailing byte with
+    // non-trailing byte, such as a byte that is always invalid or a leading
+    // byte (simple ASCII byte in our case).
 
     // replace first trailing byte with ASCII byte
     {3, 5, 1, 1, 'z', 2},
@@ -582,21 +672,27 @@ utf8_to_utf16_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
     {10, 5, 6, 3, 'z', 7},
 
     // replace first trailing byte with invalid byte
-    {3, 5, 1, 1, '\xFF', 2},
-    {6, 5, 3, 2, '\xFF', 4},
-    {10, 5, 6, 3, '\xFF', 7},
+    {3, 5, 1, 1, 0xFF, 2},
+    {6, 5, 3, 2, 0xFF, 4},
+    {10, 5, 6, 3, 0xFF, 7},
 
     // replace second trailing byte with ASCII byte
     {6, 5, 3, 2, 'z', 5},
     {10, 5, 6, 3, 'z', 8},
 
     // replace second trailing byte with invalid byte
-    {6, 5, 3, 2, '\xFF', 5},
-    {10, 5, 6, 3, '\xFF', 8},
+    {6, 5, 3, 2, 0xFF, 5},
+    {10, 5, 6, 3, 0xFF, 8},
 
     // replace third trailing byte
     {10, 5, 6, 3, 'z', 9},
-    {10, 5, 6, 3, '\xFF', 9},
+    {10, 5, 6, 3, 0xFF, 9},
+
+    // 2.1 The following test-cases raise doubt whether error or partial should
+    // be returned. For example, we have 4-byte sequence with valid leading
+    // byte. If we hide the last byte we need to return partial. But, if the
+    // second or third byte, which are visible to the call to codecvt, are
+    // malformed then error should be returned.
 
     // replace first trailing byte with ASCII byte, also incomplete at end
     {5, 5, 3, 2, 'z', 4},
@@ -604,30 +700,51 @@ utf8_to_utf16_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
     {9, 5, 6, 3, 'z', 7},
 
     // replace first trailing byte with invalid byte, also incomplete at end
-    {5, 5, 3, 2, '\xFF', 4},
-    {8, 5, 6, 3, '\xFF', 7},
-    {9, 5, 6, 3, '\xFF', 7},
+    {5, 5, 3, 2, 0xFF, 4},
+    {8, 5, 6, 3, 0xFF, 7},
+    {9, 5, 6, 3, 0xFF, 7},
 
     // replace second trailing byte with ASCII byte, also incomplete at end
     {9, 5, 6, 3, 'z', 8},
 
     // replace second trailing byte with invalid byte, also incomplete at end
-    {9, 5, 6, 3, '\xFF', 8},
+    {9, 5, 6, 3, 0xFF, 8},
+
+    // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
+    // CP U+D700
+    {6, 5, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
+    {6, 5, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
+    {6, 5, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
+    {6, 5, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00
+
+    // 4. Overlong sequence. The CPs in the input are chosen such as modifying
+    // just the leading byte is enough to make them overlong, i.e. for the
+    // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
+    // zeroes.
+    {3, 5, 1, 1, 0b11000000, 1},  // make the 2-byte CP overlong
+    {3, 5, 1, 1, 0b11000001, 1},  // make the 2-byte CP overlong
+    {6, 5, 3, 2, 0b11100000, 3},  // make the 3-byte CP overlong
+    {10, 5, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong
+
+    // 5. CP above range
+    // turn U+10AAAA into U+14AAAA by changing its leading byte
+    {10, 5, 6, 3, 0b11110101, 6},
+    // turn U+10AAAA into U+11AAAA by changing its 2nd byte
+    {10, 5, 6, 3, 0b10011010, 7},
   };
   for (auto t : offsets)
     {
-      char in[array_size (valid_in)] = {};
-      CharT out[array_size (exp) - 1] = {};
+      InternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       VERIFY (t.expected_in_next <= t.in_size);
       VERIFY (t.expected_out_next <= t.out_size);
-      char_traits<char>::copy (in, valid_in, array_size (valid_in));
+      auto old_char = in[t.replace_pos];
       in[t.replace_pos] = t.replace_char;
 
       auto state = mbstate_t{};
-      auto in_next = (const char *) nullptr;
-      auto out_next = (CharT *) nullptr;
+      auto in_next = (const ExternT *) nullptr;
+      auto out_next = (InternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -635,48 +752,51 @@ utf8_to_utf16_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.error);
       VERIFY (in_next == in + t.expected_in_next);
       VERIFY (out_next == out + t.expected_out_next);
-      VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
       if (t.expected_out_next < array_size (out))
 	VERIFY (out[t.expected_out_next] == 0);
+
+      in[t.replace_pos] = old_char;
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf8_to_utf16_in (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf8_to_utf16_in (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   utf8_to_utf16_in_ok (cvt);
   utf8_to_utf16_in_partial (cvt);
   utf8_to_utf16_in_error (cvt);
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf16_to_utf8_out_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf16_to_utf8_out_ok (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
   // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
-  const char16_t in_literal[] = u"bш\uAAAA\U0010AAAA";
-  const char exp[] = "bш\uAAAA\U0010AAAA";
-  CharT in[array_size (in_literal)];
-  copy (begin (in_literal), end (in_literal), begin (in));
-
-  static_assert (array_size (in_literal) == 6, "");
-  static_assert (array_size (exp) == 11, "");
-  static_assert (array_size (in) == 6, "");
-  VERIFY (char_traits<char16_t>::length (in_literal) == 5);
-  VERIFY (char_traits<char>::length (exp) == 10);
-  VERIFY (char_traits<CharT>::length (in) == 5);
+  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
+  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 6, "");
+  static_assert (array_size (expected) == 11, "");
+
+  InternT in[array_size (input)];
+  ExternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<InternT>::length (in) == 5);
+  VERIFY (char_traits<ExternT>::length (exp) == 10);
 
   const test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {5, 10}};
   for (auto t : offsets)
     {
-      char out[array_size (exp) - 1] = {};
+      ExternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       auto state = mbstate_t{};
-      auto in_next = (const CharT *) nullptr;
-      auto out_next = (char *) nullptr;
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (ExternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -684,29 +804,29 @@ utf16_to_utf8_out_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.ok);
       VERIFY (in_next == in + t.in_size);
       VERIFY (out_next == out + t.out_size);
-      VERIFY (char_traits<char>::compare (out, exp, t.out_size) == 0);
+      VERIFY (char_traits<ExternT>::compare (out, exp, t.out_size) == 0);
       if (t.out_size < array_size (out))
 	VERIFY (out[t.out_size] == 0);
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf16_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf16_to_utf8_out_partial (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
   // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
-  const char16_t in_literal[] = u"bш\uAAAA\U0010AAAA";
-  const char exp[] = "bш\uAAAA\U0010AAAA";
-  CharT in[array_size (in_literal)];
-  copy (begin (in_literal), end (in_literal), begin (in));
-
-  static_assert (array_size (in_literal) == 6, "");
-  static_assert (array_size (exp) == 11, "");
-  static_assert (array_size (in) == 6, "");
-  VERIFY (char_traits<char16_t>::length (in_literal) == 5);
-  VERIFY (char_traits<char>::length (exp) == 10);
-  VERIFY (char_traits<CharT>::length (in) == 5);
+  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
+  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 6, "");
+  static_assert (array_size (expected) == 11, "");
+
+  InternT in[array_size (input)];
+  ExternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<InternT>::length (in) == 5);
+  VERIFY (char_traits<ExternT>::length (exp) == 10);
 
   const test_offsets_partial offsets[] = {
     {1, 0, 0, 0}, // no space for first CP
@@ -732,14 +852,14 @@ utf16_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
   };
   for (auto t : offsets)
     {
-      char out[array_size (exp) - 1] = {};
+      ExternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       VERIFY (t.expected_in_next <= t.in_size);
       VERIFY (t.expected_out_next <= t.out_size);
       auto state = mbstate_t{};
-      auto in_next = (const CharT *) nullptr;
-      auto out_next = (char *) nullptr;
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (ExternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -747,26 +867,34 @@ utf16_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.partial);
       VERIFY (in_next == in + t.expected_in_next);
       VERIFY (out_next == out + t.expected_out_next);
-      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+      VERIFY (char_traits<ExternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
       if (t.expected_out_next < array_size (out))
 	VERIFY (out[t.expected_out_next] == 0);
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf16_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf16_to_utf8_out_error (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
-  const char16_t valid_in[] = u"bш\uAAAA\U0010AAAA";
-  const char exp[] = "bш\uAAAA\U0010AAAA";
-
-  static_assert (array_size (valid_in) == 6, "");
-  static_assert (array_size (exp) == 11, "");
-  VERIFY (char_traits<char16_t>::length (valid_in) == 5);
-  VERIFY (char_traits<char>::length (exp) == 10);
-
-  test_offsets_error<CharT> offsets[] = {
+  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
+  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
+  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 6, "");
+  static_assert (array_size (expected) == 11, "");
+
+  InternT in[array_size (input)];
+  ExternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<InternT>::length (in) == 5);
+  VERIFY (char_traits<ExternT>::length (exp) == 10);
+
+  // The only possible error in UTF-16 is unpaired surrogate code units.
+  // So we replace valid code points (scalar values) with lone surrogate CU.
+  test_offsets_error<InternT> offsets[] = {
     {5, 10, 0, 0, 0xD800, 0},
     {5, 10, 0, 0, 0xDBFF, 0},
     {5, 10, 0, 0, 0xDC00, 0},
@@ -796,18 +924,17 @@ utf16_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
 
   for (auto t : offsets)
     {
-      CharT in[array_size (valid_in)] = {};
-      char out[array_size (exp) - 1] = {};
+      ExternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       VERIFY (t.expected_in_next <= t.in_size);
       VERIFY (t.expected_out_next <= t.out_size);
-      copy (begin (valid_in), end (valid_in), begin (in));
+      auto old_char = in[t.replace_pos];
       in[t.replace_pos] = t.replace_char;
 
       auto state = mbstate_t{};
-      auto in_next = (const CharT *) nullptr;
-      auto out_next = (char *) nullptr;
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (ExternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -815,56 +942,59 @@ utf16_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.error);
       VERIFY (in_next == in + t.expected_in_next);
       VERIFY (out_next == out + t.expected_out_next);
-      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+      VERIFY (char_traits<ExternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
       if (t.expected_out_next < array_size (out))
 	VERIFY (out[t.expected_out_next] == 0);
+
+      in[t.replace_pos] = old_char;
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf16_to_utf8_out (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf16_to_utf8_out (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   utf16_to_utf8_out_ok (cvt);
   utf16_to_utf8_out_partial (cvt);
   utf16_to_utf8_out_error (cvt);
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-test_utf8_utf16_cvts (const std::codecvt<CharT, char, mbstate_t> &cvt)
+test_utf8_utf16_cvt (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   utf8_to_utf16_in (cvt);
   utf16_to_utf8_out (cvt);
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf8_to_ucs2_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf8_to_ucs2_in_ok (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
   // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
-  const char in[] = "bш\uAAAA";
-  const char16_t exp_literal[] = u"bш\uAAAA";
-  CharT exp[array_size (exp_literal)] = {};
-  copy (begin (exp_literal), end (exp_literal), begin (exp));
-
-  static_assert (array_size (in) == 7, "");
-  static_assert (array_size (exp_literal) == 4, "");
-  static_assert (array_size (exp) == 4, "");
-  VERIFY (char_traits<char>::length (in) == 6);
-  VERIFY (char_traits<char16_t>::length (exp_literal) == 3);
-  VERIFY (char_traits<CharT>::length (exp) == 3);
+  const unsigned char input[] = "b\u0448\uAAAA";
+  const char16_t expected[] = u"b\u0448\uAAAA";
+  static_assert (array_size (input) == 7, "");
+  static_assert (array_size (expected) == 4, "");
+
+  ExternT in[array_size (input)];
+  InternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<ExternT>::length (in) == 6);
+  VERIFY (char_traits<InternT>::length (exp) == 3);
 
   test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}};
   for (auto t : offsets)
     {
-      CharT out[array_size (exp) - 1] = {};
+      InternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       auto state = mbstate_t{};
-      auto in_next = (const char *) nullptr;
-      auto out_next = (CharT *) nullptr;
+      auto in_next = (const ExternT *) nullptr;
+      auto out_next = (InternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -872,19 +1002,19 @@ utf8_to_ucs2_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.ok);
       VERIFY (in_next == in + t.in_size);
       VERIFY (out_next == out + t.out_size);
-      VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
       if (t.out_size < array_size (out))
 	VERIFY (out[t.out_size] == 0);
     }
 
   for (auto t : offsets)
     {
-      CharT out[array_size (exp)] = {};
+      InternT out[array_size (exp)] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       auto state = mbstate_t{};
-      auto in_next = (const char *) nullptr;
-      auto out_next = (CharT *) nullptr;
+      auto in_next = (const ExternT *) nullptr;
+      auto out_next = (InternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res
@@ -892,29 +1022,29 @@ utf8_to_ucs2_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.ok);
       VERIFY (in_next == in + t.in_size);
       VERIFY (out_next == out + t.out_size);
-      VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
       if (t.out_size < array_size (out))
 	VERIFY (out[t.out_size] == 0);
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf8_to_ucs2_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf8_to_ucs2_in_partial (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
   // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
-  const char in[] = "bш\uAAAA";
-  const char16_t exp_literal[] = u"bш\uAAAA";
-  CharT exp[array_size (exp_literal)] = {};
-  copy (begin (exp_literal), end (exp_literal), begin (exp));
-
-  static_assert (array_size (in) == 7, "");
-  static_assert (array_size (exp_literal) == 4, "");
-  static_assert (array_size (exp) == 4, "");
-  VERIFY (char_traits<char>::length (in) == 6);
-  VERIFY (char_traits<char16_t>::length (exp_literal) == 3);
-  VERIFY (char_traits<CharT>::length (exp) == 3);
+  const unsigned char input[] = "b\u0448\uAAAA";
+  const char16_t expected[] = u"b\u0448\uAAAA";
+  static_assert (array_size (input) == 7, "");
+  static_assert (array_size (expected) == 4, "");
+
+  ExternT in[array_size (input)];
+  InternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<ExternT>::length (in) == 6);
+  VERIFY (char_traits<InternT>::length (exp) == 3);
 
   test_offsets_partial offsets[] = {
     {1, 0, 0, 0}, // no space for first CP
@@ -932,14 +1062,14 @@ utf8_to_ucs2_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
 
   for (auto t : offsets)
     {
-      CharT out[array_size (exp) - 1] = {};
+      InternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       VERIFY (t.expected_in_next <= t.in_size);
       VERIFY (t.expected_out_next <= t.out_size);
       auto state = mbstate_t{};
-      auto in_next = (const char *) nullptr;
-      auto out_next = (CharT *) nullptr;
+      auto in_next = (const ExternT *) nullptr;
+      auto out_next = (InternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -947,36 +1077,57 @@ utf8_to_ucs2_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.partial);
       VERIFY (in_next == in + t.expected_in_next);
       VERIFY (out_next == out + t.expected_out_next);
-      VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
       if (t.expected_out_next < array_size (out))
 	VERIFY (out[t.expected_out_next] == 0);
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf8_to_ucs2_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf8_to_ucs2_in_error (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
-  const char valid_in[] = "bш\uAAAA\U0010AAAA";
-  const char16_t exp_literal[] = u"bш\uAAAA\U0010AAAA";
-  CharT exp[array_size (exp_literal)] = {};
-  copy (begin (exp_literal), end (exp_literal), begin (exp));
+  const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
+  const char16_t expected[] = u"b\u0448\uD700\U0010AAAA";
+  static_assert (array_size (input) == 11, "");
+  static_assert (array_size (expected) == 6, "");
+
+  ExternT in[array_size (input)];
+  InternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<ExternT>::length (in) == 10);
+  VERIFY (char_traits<InternT>::length (exp) == 5);
+
+  // There are 5 classes of errors in UTF-8 decoding
+  // 1. Missing leading byte
+  // 2. Missing trailing byte
+  // 3. Surrogate CP
+  // 4. Overlong sequence
+  // 5. CP out of Unicode range
+  test_offsets_error<unsigned char> offsets[] = {
+
+    // 1. Missing leading byte. We will replace the leading byte with
+    // non-leading byte, such as a byte that is always invalid or a trailing
+    // byte.
 
-  static_assert (array_size (valid_in) == 11, "");
-  static_assert (array_size (exp_literal) == 6, "");
-  static_assert (array_size (exp) == 6, "");
-  VERIFY (char_traits<char>::length (valid_in) == 10);
-  VERIFY (char_traits<char16_t>::length (exp_literal) == 5);
-  VERIFY (char_traits<CharT>::length (exp) == 5);
+    // replace leading byte with invalid byte
+    {1, 5, 0, 0, 0xFF, 0},
+    {3, 5, 1, 1, 0xFF, 1},
+    {6, 5, 3, 2, 0xFF, 3},
+    {10, 5, 6, 3, 0xFF, 6},
 
-  test_offsets_error<char> offsets[] = {
+    // replace leading byte with trailing byte
+    {1, 5, 0, 0, 0b10101010, 0},
+    {3, 5, 1, 1, 0b10101010, 1},
+    {6, 5, 3, 2, 0b10101010, 3},
+    {10, 5, 6, 3, 0b10101010, 6},
 
-    // replace leading byte with invalid byte
-    {1, 5, 0, 0, '\xFF', 0},
-    {3, 5, 1, 1, '\xFF', 1},
-    {6, 5, 3, 2, '\xFF', 3},
-    {10, 5, 6, 3, '\xFF', 6},
+    // 2. Missing trailing byte. We will replace the trailing byte with
+    // non-trailing byte, such as a byte that is always invalid or a leading
+    // byte (simple ASCII byte in our case).
 
     // replace first trailing byte with ASCII byte
     {3, 5, 1, 1, 'z', 2},
@@ -984,72 +1135,90 @@ utf8_to_ucs2_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
     {10, 5, 6, 3, 'z', 7},
 
     // replace first trailing byte with invalid byte
-    {3, 5, 1, 1, '\xFF', 2},
-    {6, 5, 3, 2, '\xFF', 4},
-    {10, 5, 6, 3, '\xFF', 7},
+    {3, 5, 1, 1, 0xFF, 2},
+    {6, 5, 3, 2, 0xFF, 4},
+    {10, 5, 6, 3, 0xFF, 7},
 
     // replace second trailing byte with ASCII byte
     {6, 5, 3, 2, 'z', 5},
     {10, 5, 6, 3, 'z', 8},
 
     // replace second trailing byte with invalid byte
-    {6, 5, 3, 2, '\xFF', 5},
-    {10, 5, 6, 3, '\xFF', 8},
+    {6, 5, 3, 2, 0xFF, 5},
+    {10, 5, 6, 3, 0xFF, 8},
 
     // replace third trailing byte
     {10, 5, 6, 3, 'z', 9},
-    {10, 5, 6, 3, '\xFF', 9},
-
-    // When we see a leading byte of 4-byte CP, we should return error, no
-    // matter if it is incomplete at the end or has errors in the trailing
-    // bytes.
-
-    // Don't replace anything, show full 4-byte CP
-    {10, 4, 6, 3, 'b', 0},
-    {10, 5, 6, 3, 'b', 0},
+    {10, 5, 6, 3, 0xFF, 9},
 
-    // Don't replace anything, show incomplete 4-byte CP at the end
-    {7, 4, 6, 3, 'b', 0}, // incomplete fourth CP
-    {8, 4, 6, 3, 'b', 0}, // incomplete fourth CP
-    {9, 4, 6, 3, 'b', 0}, // incomplete fourth CP
-    {7, 5, 6, 3, 'b', 0}, // incomplete fourth CP
-    {8, 5, 6, 3, 'b', 0}, // incomplete fourth CP
-    {9, 5, 6, 3, 'b', 0}, // incomplete fourth CP
+    // 2.1 The following test-cases raise doubt whether error or partial should
+    // be returned. For example, we have 4-byte sequence with valid leading
+    // byte. If we hide the last byte we need to return partial. But, if the
+    // second or third byte, which are visible to the call to codecvt, are
+    // malformed then error should be returned.
 
     // replace first trailing byte with ASCII byte, also incomplete at end
     {5, 5, 3, 2, 'z', 4},
-
-    // replace first trailing byte with invalid byte, also incomplete at end
-    {5, 5, 3, 2, '\xFF', 4},
-
-    // replace first trailing byte with ASCII byte, also incomplete at end
     {8, 5, 6, 3, 'z', 7},
     {9, 5, 6, 3, 'z', 7},
 
     // replace first trailing byte with invalid byte, also incomplete at end
-    {8, 5, 6, 3, '\xFF', 7},
-    {9, 5, 6, 3, '\xFF', 7},
+    {5, 5, 3, 2, 0xFF, 4},
+    {8, 5, 6, 3, 0xFF, 7},
+    {9, 5, 6, 3, 0xFF, 7},
 
     // replace second trailing byte with ASCII byte, also incomplete at end
     {9, 5, 6, 3, 'z', 8},
 
     // replace second trailing byte with invalid byte, also incomplete at end
-    {9, 5, 6, 3, '\xFF', 8},
+    {9, 5, 6, 3, 0xFF, 8},
+
+    // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
+    // CP U+D700
+    {6, 5, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
+    {6, 5, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
+    {6, 5, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
+    {6, 5, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00
+
+    // 4. Overlong sequence. The CPs in the input are chosen such as modifying
+    // just the leading byte is enough to make them overlong, i.e. for the
+    // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
+    // zeroes.
+    {3, 5, 1, 1, 0b11000000, 1},  // make the 2-byte CP overlong
+    {3, 5, 1, 1, 0b11000001, 1},  // make the 2-byte CP overlong
+    {6, 5, 3, 2, 0b11100000, 3},  // make the 3-byte CP overlong
+    {10, 5, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong
+
+    // 5. CP above range
+    // turn U+10AAAA into U+14AAAA by changing its leading byte
+    {10, 5, 6, 3, 0b11110101, 6},
+    // turn U+10AAAA into U+11AAAA by changing its 2nd byte
+    {10, 5, 6, 3, 0b10011010, 7},
+    // Don't replace anything, show full 4-byte CP U+10AAAA
+    {10, 4, 6, 3, 'b', 0},
+    {10, 5, 6, 3, 'b', 0},
+    // Don't replace anything, show incomplete 4-byte CP at the end. It's still
+    // out of UCS2 range just by seeing the first byte.
+    {7, 4, 6, 3, 'b', 0}, // incomplete fourth CP
+    {8, 4, 6, 3, 'b', 0}, // incomplete fourth CP
+    {9, 4, 6, 3, 'b', 0}, // incomplete fourth CP
+    {7, 5, 6, 3, 'b', 0}, // incomplete fourth CP
+    {8, 5, 6, 3, 'b', 0}, // incomplete fourth CP
+    {9, 5, 6, 3, 'b', 0}, // incomplete fourth CP
   };
   for (auto t : offsets)
     {
-      char in[array_size (valid_in)] = {};
-      CharT out[array_size (exp) - 1] = {};
+      InternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       VERIFY (t.expected_in_next <= t.in_size);
       VERIFY (t.expected_out_next <= t.out_size);
-      char_traits<char>::copy (in, valid_in, array_size (valid_in));
+      auto old_char = in[t.replace_pos];
       in[t.replace_pos] = t.replace_char;
 
       auto state = mbstate_t{};
-      auto in_next = (const char *) nullptr;
-      auto out_next = (CharT *) nullptr;
+      auto in_next = (const ExternT *) nullptr;
+      auto out_next = (InternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -1057,48 +1226,51 @@ utf8_to_ucs2_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.error);
       VERIFY (in_next == in + t.expected_in_next);
       VERIFY (out_next == out + t.expected_out_next);
-      VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
       if (t.expected_out_next < array_size (out))
 	VERIFY (out[t.expected_out_next] == 0);
+
+      in[t.replace_pos] = old_char;
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-utf8_to_ucs2_in (const std::codecvt<CharT, char, mbstate_t> &cvt)
+utf8_to_ucs2_in (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   utf8_to_ucs2_in_ok (cvt);
   utf8_to_ucs2_in_partial (cvt);
   utf8_to_ucs2_in_error (cvt);
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-ucs2_to_utf8_out_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
+ucs2_to_utf8_out_ok (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
   // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
-  const char16_t in_literal[] = u"bш\uAAAA";
-  const char exp[] = "bш\uAAAA";
-  CharT in[array_size (in_literal)] = {};
-  copy (begin (in_literal), end (in_literal), begin (in));
-
-  static_assert (array_size (in_literal) == 4, "");
-  static_assert (array_size (exp) == 7, "");
-  static_assert (array_size (in) == 4, "");
-  VERIFY (char_traits<char16_t>::length (in_literal) == 3);
-  VERIFY (char_traits<char>::length (exp) == 6);
-  VERIFY (char_traits<CharT>::length (in) == 3);
+  const char16_t input[] = u"b\u0448\uAAAA";
+  const unsigned char expected[] = "b\u0448\uAAAA";
+  static_assert (array_size (input) == 4, "");
+  static_assert (array_size (expected) == 7, "");
+
+  InternT in[array_size (input)];
+  ExternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<InternT>::length (in) == 3);
+  VERIFY (char_traits<ExternT>::length (exp) == 6);
 
   const test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}};
   for (auto t : offsets)
     {
-      char out[array_size (exp) - 1] = {};
+      ExternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       auto state = mbstate_t{};
-      auto in_next = (const CharT *) nullptr;
-      auto out_next = (char *) nullptr;
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (ExternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -1106,29 +1278,29 @@ ucs2_to_utf8_out_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.ok);
       VERIFY (in_next == in + t.in_size);
       VERIFY (out_next == out + t.out_size);
-      VERIFY (char_traits<char>::compare (out, exp, t.out_size) == 0);
+      VERIFY (char_traits<ExternT>::compare (out, exp, t.out_size) == 0);
       if (t.out_size < array_size (out))
 	VERIFY (out[t.out_size] == 0);
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-ucs2_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
+ucs2_to_utf8_out_partial (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
   // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
-  const char16_t in_literal[] = u"bш\uAAAA";
-  const char exp[] = "bш\uAAAA";
-  CharT in[array_size (in_literal)] = {};
-  copy (begin (in_literal), end (in_literal), begin (in));
-
-  static_assert (array_size (in_literal) == 4, "");
-  static_assert (array_size (exp) == 7, "");
-  static_assert (array_size (in) == 4, "");
-  VERIFY (char_traits<char16_t>::length (in_literal) == 3);
-  VERIFY (char_traits<char>::length (exp) == 6);
-  VERIFY (char_traits<CharT>::length (in) == 3);
+  const char16_t input[] = u"b\u0448\uAAAA";
+  const unsigned char expected[] = "b\u0448\uAAAA";
+  static_assert (array_size (input) == 4, "");
+  static_assert (array_size (expected) == 7, "");
+
+  InternT in[array_size (input)];
+  ExternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<InternT>::length (in) == 3);
+  VERIFY (char_traits<ExternT>::length (exp) == 6);
 
   const test_offsets_partial offsets[] = {
     {1, 0, 0, 0}, // no space for first CP
@@ -1142,14 +1314,14 @@ ucs2_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
   };
   for (auto t : offsets)
     {
-      char out[array_size (exp) - 1] = {};
+      ExternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       VERIFY (t.expected_in_next <= t.in_size);
       VERIFY (t.expected_out_next <= t.out_size);
       auto state = mbstate_t{};
-      auto in_next = (const CharT *) nullptr;
-      auto out_next = (char *) nullptr;
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (ExternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -1157,43 +1329,45 @@ ucs2_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.partial);
       VERIFY (in_next == in + t.expected_in_next);
       VERIFY (out_next == out + t.expected_out_next);
-      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+      VERIFY (char_traits<ExternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
       if (t.expected_out_next < array_size (out))
 	VERIFY (out[t.expected_out_next] == 0);
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-ucs2_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
+ucs2_to_utf8_out_error (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   using namespace std;
-  const char16_t valid_in[] = u"bш\uAAAA\U0010AAAA";
-  const char exp[] = "bш\uAAAA\U0010AAAA";
-
-  static_assert (array_size (valid_in) == 6, "");
-  static_assert (array_size (exp) == 11, "");
-  VERIFY (char_traits<char16_t>::length (valid_in) == 5);
-  VERIFY (char_traits<char>::length (exp) == 10);
-
-  test_offsets_error<CharT> offsets[] = {
-    {5, 10, 0, 0, 0xD800, 0},
-    {5, 10, 0, 0, 0xDBFF, 0},
-    {5, 10, 0, 0, 0xDC00, 0},
-    {5, 10, 0, 0, 0xDFFF, 0},
-
-    {5, 10, 1, 1, 0xD800, 1},
-    {5, 10, 1, 1, 0xDBFF, 1},
-    {5, 10, 1, 1, 0xDC00, 1},
-    {5, 10, 1, 1, 0xDFFF, 1},
-
-    {5, 10, 2, 3, 0xD800, 2},
-    {5, 10, 2, 3, 0xDBFF, 2},
-    {5, 10, 2, 3, 0xDC00, 2},
-    {5, 10, 2, 3, 0xDFFF, 2},
-
-    // dont replace anything, just show the surrogate pair
-    {5, 10, 3, 6, u'b', 0},
+  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
+  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 6, "");
+  static_assert (array_size (expected) == 11, "");
+
+  InternT in[array_size (input)];
+  ExternT exp[array_size (expected)];
+  copy (begin (input), end (input), begin (in));
+  copy (begin (expected), end (expected), begin (exp));
+  VERIFY (char_traits<InternT>::length (in) == 5);
+  VERIFY (char_traits<ExternT>::length (exp) == 10);
+
+  test_offsets_error<InternT> offsets[] = {
+    {3, 6, 0, 0, 0xD800, 0},
+    {3, 6, 0, 0, 0xDBFF, 0},
+    {3, 6, 0, 0, 0xDC00, 0},
+    {3, 6, 0, 0, 0xDFFF, 0},
+
+    {3, 6, 1, 1, 0xD800, 1},
+    {3, 6, 1, 1, 0xDBFF, 1},
+    {3, 6, 1, 1, 0xDC00, 1},
+    {3, 6, 1, 1, 0xDFFF, 1},
+
+    {3, 6, 2, 3, 0xD800, 2},
+    {3, 6, 2, 3, 0xDBFF, 2},
+    {3, 6, 2, 3, 0xDC00, 2},
+    {3, 6, 2, 3, 0xDFFF, 2},
 
     // make the leading surrogate a trailing one
     {5, 10, 3, 6, 0xDC00, 3},
@@ -1206,6 +1380,9 @@ ucs2_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
     // make the trailing surrogate a BMP char
     {5, 10, 3, 6, u'z', 4},
 
+    // don't replace anything in the test cases bellow, just show the surrogate
+    // pair (fourth CP) fully or partially
+    {5, 10, 3, 6, u'b', 0},
     {5, 7, 3, 6, u'b', 0}, // no space for fourth CP
     {5, 8, 3, 6, u'b', 0}, // no space for fourth CP
     {5, 9, 3, 6, u'b', 0}, // no space for fourth CP
@@ -1214,23 +1391,21 @@ ucs2_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
     {4, 7, 3, 6, u'b', 0},  // incomplete fourth CP, and no space for it
     {4, 8, 3, 6, u'b', 0},  // incomplete fourth CP, and no space for it
     {4, 9, 3, 6, u'b', 0},  // incomplete fourth CP, and no space for it
-
   };
 
   for (auto t : offsets)
     {
-      CharT in[array_size (valid_in)] = {};
-      char out[array_size (exp) - 1] = {};
+      ExternT out[array_size (exp) - 1] = {};
       VERIFY (t.in_size <= array_size (in));
       VERIFY (t.out_size <= array_size (out));
       VERIFY (t.expected_in_next <= t.in_size);
       VERIFY (t.expected_out_next <= t.out_size);
-      copy (begin (valid_in), end (valid_in), begin (in));
+      auto old_char = in[t.replace_pos];
       in[t.replace_pos] = t.replace_char;
 
       auto state = mbstate_t{};
-      auto in_next = (const CharT *) nullptr;
-      auto out_next = (char *) nullptr;
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (ExternT *) nullptr;
       auto res = codecvt_base::result ();
 
       res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
@@ -1238,25 +1413,793 @@ ucs2_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
       VERIFY (res == cvt.error);
       VERIFY (in_next == in + t.expected_in_next);
       VERIFY (out_next == out + t.expected_out_next);
-      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+      VERIFY (char_traits<ExternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
       if (t.expected_out_next < array_size (out))
 	VERIFY (out[t.expected_out_next] == 0);
+
+      in[t.replace_pos] = old_char;
     }
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-ucs2_to_utf8_out (const std::codecvt<CharT, char, mbstate_t> &cvt)
+ucs2_to_utf8_out (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   ucs2_to_utf8_out_ok (cvt);
   ucs2_to_utf8_out_partial (cvt);
   ucs2_to_utf8_out_error (cvt);
 }
 
-template <class CharT>
+template <class InternT, class ExternT>
 void
-test_utf8_ucs2_cvts (const std::codecvt<CharT, char, mbstate_t> &cvt)
+test_utf8_ucs2_cvt (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
 {
   utf8_to_ucs2_in (cvt);
   ucs2_to_utf8_out (cvt);
 }
+
+enum utf16_endianess
+{
+  utf16_big_endian,
+  utf16_little_endian
+};
+
+template <class Iter1, class Iter2>
+Iter2
+utf16_to_bytes (Iter1 f, Iter1 l, Iter2 o, utf16_endianess e)
+{
+  if (e == utf16_big_endian)
+    for (; f != l; ++f)
+      {
+	*o++ = (*f >> 8) & 0xFF;
+	*o++ = *f & 0xFF;
+      }
+  else
+    for (; f != l; ++f)
+      {
+	*o++ = *f & 0xFF;
+	*o++ = (*f >> 8) & 0xFF;
+      }
+  return o;
+}
+
+template <class InternT>
+void
+utf16_to_utf32_in_ok (const std::codecvt<InternT, char, mbstate_t> &cvt,
+		      utf16_endianess endianess)
+{
+  using namespace std;
+  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
+  const char32_t expected[] = U"b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 6, "");
+  static_assert (array_size (expected) == 5, "");
+
+  char in[array_size (input) * 2];
+  InternT exp[array_size (expected)];
+  utf16_to_bytes (begin (input), end (input), begin (in), endianess);
+  copy (begin (expected), end (expected), begin (exp));
+
+  test_offsets_ok offsets[] = {{0, 0}, {2, 1}, {4, 2}, {6, 3}, {10, 4}};
+  for (auto t : offsets)
+    {
+      InternT out[array_size (exp) - 1] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      auto state = mbstate_t{};
+      auto in_next = (const char *) nullptr;
+      auto out_next = (InternT *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+		    out_next);
+      VERIFY (res == cvt.ok);
+      VERIFY (in_next == in + t.in_size);
+      VERIFY (out_next == out + t.out_size);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
+      if (t.out_size < array_size (out))
+	VERIFY (out[t.out_size] == 0);
+    }
+
+  for (auto t : offsets)
+    {
+      InternT out[array_size (exp)] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      auto state = mbstate_t{};
+      auto in_next = (const char *) nullptr;
+      auto out_next = (InternT *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res
+	= cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next);
+      VERIFY (res == cvt.ok);
+      VERIFY (in_next == in + t.in_size);
+      VERIFY (out_next == out + t.out_size);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
+      if (t.out_size < array_size (out))
+	VERIFY (out[t.out_size] == 0);
+    }
+}
+
+template <class InternT>
+void
+utf16_to_utf32_in_partial (const std::codecvt<InternT, char, mbstate_t> &cvt,
+			   utf16_endianess endianess)
+{
+  using namespace std;
+  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
+  const char32_t expected[] = U"b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 6, "");
+  static_assert (array_size (expected) == 5, "");
+
+  char in[array_size (input) * 2];
+  InternT exp[array_size (expected)];
+  auto in_iter = begin (in);
+  utf16_to_bytes (begin (input), end (input), begin (in), endianess);
+  copy (begin (expected), end (expected), begin (exp));
+
+  test_offsets_partial offsets[] = {
+    {2, 0, 0, 0}, // no space for first CP
+    {1, 1, 0, 0}, // incomplete first CP
+    {1, 0, 0, 0}, // incomplete first CP, and no space for it
+
+    {4, 1, 2, 1}, // no space for second CP
+    {3, 2, 2, 1}, // incomplete second CP
+    {3, 1, 2, 1}, // incomplete second CP, and no space for it
+
+    {6, 2, 4, 2}, // no space for third CP
+    {5, 3, 4, 2}, // incomplete third CP
+    {5, 2, 4, 2}, // incomplete third CP, and no space for it
+
+    {10, 3, 6, 3}, // no space for fourth CP
+    {7, 4, 6, 3},  // incomplete fourth CP
+    {8, 4, 6, 3},  // incomplete fourth CP
+    {9, 4, 6, 3},  // incomplete fourth CP
+    {7, 3, 6, 3},  // incomplete fourth CP, and no space for it
+    {8, 3, 6, 3},  // incomplete fourth CP, and no space for it
+    {9, 3, 6, 3},  // incomplete fourth CP, and no space for it
+  };
+
+  for (auto t : offsets)
+    {
+      InternT out[array_size (exp) - 1] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      VERIFY (t.expected_in_next <= t.in_size);
+      VERIFY (t.expected_out_next <= t.out_size);
+      auto state = mbstate_t{};
+      auto in_next = (const char *) nullptr;
+      auto out_next = (InternT *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+		    out_next);
+      VERIFY (res == cvt.partial);
+      VERIFY (in_next == in + t.expected_in_next);
+      VERIFY (out_next == out + t.expected_out_next);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
+      if (t.expected_out_next < array_size (out))
+	VERIFY (out[t.expected_out_next] == 0);
+    }
+}
+
+template <class InternT>
+void
+utf16_to_utf32_in_error (const std::codecvt<InternT, char, mbstate_t> &cvt,
+			 utf16_endianess endianess)
+{
+  using namespace std;
+  char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
+  const char32_t expected[] = U"b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 6, "");
+  static_assert (array_size (expected) == 5, "");
+
+  InternT exp[array_size (expected)];
+  copy (begin (expected), end (expected), begin (exp));
+
+  // The only possible error in UTF-16 is unpaired surrogate code units.
+  // So we replace valid code points (scalar values) with lone surrogate CU.
+  test_offsets_error<char16_t> offsets[] = {
+    {10, 4, 0, 0, 0xD800, 0},
+    {10, 4, 0, 0, 0xDBFF, 0},
+    {10, 4, 0, 0, 0xDC00, 0},
+    {10, 4, 0, 0, 0xDFFF, 0},
+
+    {10, 4, 2, 1, 0xD800, 1},
+    {10, 4, 2, 1, 0xDBFF, 1},
+    {10, 4, 2, 1, 0xDC00, 1},
+    {10, 4, 2, 1, 0xDFFF, 1},
+
+    {10, 4, 4, 2, 0xD800, 2},
+    {10, 4, 4, 2, 0xDBFF, 2},
+    {10, 4, 4, 2, 0xDC00, 2},
+    {10, 4, 4, 2, 0xDFFF, 2},
+
+    // make the leading surrogate a trailing one
+    {10, 4, 6, 3, 0xDC00, 3},
+    {10, 4, 6, 3, 0xDFFF, 3},
+
+    // make the trailing surrogate a leading one
+    {10, 4, 6, 3, 0xD800, 4},
+    {10, 4, 6, 3, 0xDBFF, 4},
+
+    // make the trailing surrogate a BMP char
+    {10, 4, 6, 3, u'z', 4},
+  };
+
+  for (auto t : offsets)
+    {
+      char in[array_size (input) * 2];
+      InternT out[array_size (exp) - 1] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      VERIFY (t.expected_in_next <= t.in_size);
+      VERIFY (t.expected_out_next <= t.out_size);
+      auto old_char = input[t.replace_pos];
+      input[t.replace_pos] = t.replace_char; // replace in input, not in in
+      utf16_to_bytes (begin (input), end (input), begin (in), endianess);
+
+      auto state = mbstate_t{};
+      auto in_next = (const char *) nullptr;
+      auto out_next = (InternT *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+		    out_next);
+      VERIFY (res == cvt.error);
+      VERIFY (in_next == in + t.expected_in_next);
+      VERIFY (out_next == out + t.expected_out_next);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
+      if (t.expected_out_next < array_size (out))
+	VERIFY (out[t.expected_out_next] == 0);
+
+      input[t.replace_pos] = old_char;
+    }
+}
+
+template <class InternT>
+void
+utf32_to_utf16_out_ok (const std::codecvt<InternT, char, mbstate_t> &cvt,
+		       utf16_endianess endianess)
+{
+  using namespace std;
+  const char32_t input[] = U"b\u0448\uAAAA\U0010AAAA";
+  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 5, "");
+  static_assert (array_size (expected) == 6, "");
+
+  InternT in[array_size (input)];
+  char exp[array_size (expected) * 2];
+  copy (begin (input), end (input), begin (in));
+  utf16_to_bytes (begin (expected), end (expected), begin (exp), endianess);
+
+  const test_offsets_ok offsets[] = {{0, 0}, {1, 2}, {2, 4}, {3, 6}, {4, 10}};
+  for (auto t : offsets)
+    {
+      char out[array_size (exp) - 2] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      auto state = mbstate_t{};
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (char *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+		     out_next);
+      VERIFY (res == cvt.ok);
+      VERIFY (in_next == in + t.in_size);
+      VERIFY (out_next == out + t.out_size);
+      VERIFY (char_traits<char>::compare (out, exp, t.out_size) == 0);
+      if (t.out_size < array_size (out))
+	VERIFY (out[t.out_size] == 0);
+    }
+}
+
+template <class InternT>
+void
+utf32_to_utf16_out_partial (const std::codecvt<InternT, char, mbstate_t> &cvt,
+			    utf16_endianess endianess)
+{
+  using namespace std;
+  const char32_t input[] = U"b\u0448\uAAAA\U0010AAAA";
+  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 5, "");
+  static_assert (array_size (expected) == 6, "");
+
+  InternT in[array_size (input)];
+  char exp[array_size (expected) * 2];
+  copy (begin (input), end (input), begin (in));
+  utf16_to_bytes (begin (expected), end (expected), begin (exp), endianess);
+
+  const test_offsets_partial offsets[] = {
+    {1, 0, 0, 0}, // no space for first CP
+    {1, 1, 0, 0}, // no space for first CP
+
+    {2, 2, 1, 2}, // no space for second CP
+    {2, 3, 1, 2}, // no space for second CP
+
+    {3, 4, 2, 4}, // no space for third CP
+    {3, 5, 2, 4}, // no space for third CP
+
+    {4, 6, 3, 6}, // no space for fourth CP
+    {4, 7, 3, 6}, // no space for fourth CP
+    {4, 8, 3, 6}, // no space for fourth CP
+    {4, 9, 3, 6}, // no space for fourth CP
+  };
+  for (auto t : offsets)
+    {
+      char out[array_size (exp) - 2] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      VERIFY (t.expected_in_next <= t.in_size);
+      VERIFY (t.expected_out_next <= t.out_size);
+      auto state = mbstate_t{};
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (char *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+		     out_next);
+      VERIFY (res == cvt.partial);
+      VERIFY (in_next == in + t.expected_in_next);
+      VERIFY (out_next == out + t.expected_out_next);
+      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+      if (t.expected_out_next < array_size (out))
+	VERIFY (out[t.expected_out_next] == 0);
+    }
+}
+
+template <class InternT>
+void
+utf32_to_utf16_out_error (const std::codecvt<InternT, char, mbstate_t> &cvt,
+			  utf16_endianess endianess)
+{
+  using namespace std;
+  const char32_t input[] = U"b\u0448\uAAAA\U0010AAAA";
+  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 5, "");
+  static_assert (array_size (expected) == 6, "");
+
+  InternT in[array_size (input)];
+  char exp[array_size (expected) * 2];
+  copy (begin (input), end (input), begin (in));
+  utf16_to_bytes (begin (expected), end (expected), begin (exp), endianess);
+
+  test_offsets_error<InternT> offsets[] = {
+
+    // Surrogate CP
+    {4, 10, 0, 0, 0xD800, 0},
+    {4, 10, 1, 2, 0xDBFF, 1},
+    {4, 10, 2, 4, 0xDC00, 2},
+    {4, 10, 3, 6, 0xDFFF, 3},
+
+    // CP out of range
+    {4, 10, 0, 0, 0x00110000, 0},
+    {4, 10, 1, 2, 0x00110000, 1},
+    {4, 10, 2, 4, 0x00110000, 2},
+    {4, 10, 3, 6, 0x00110000, 3}};
+
+  for (auto t : offsets)
+    {
+      char out[array_size (exp) - 2] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      VERIFY (t.expected_in_next <= t.in_size);
+      VERIFY (t.expected_out_next <= t.out_size);
+      auto old_char = in[t.replace_pos];
+      in[t.replace_pos] = t.replace_char;
+
+      auto state = mbstate_t{};
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (char *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+		     out_next);
+      VERIFY (res == cvt.error);
+      VERIFY (in_next == in + t.expected_in_next);
+      VERIFY (out_next == out + t.expected_out_next);
+      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+      if (t.expected_out_next < array_size (out))
+	VERIFY (out[t.expected_out_next] == 0);
+
+      in[t.replace_pos] = old_char;
+    }
+}
+
+template <class InternT>
+void
+test_utf16_utf32_cvt (const std::codecvt<InternT, char, mbstate_t> &cvt,
+		      utf16_endianess endianess)
+{
+  utf16_to_utf32_in_ok (cvt, endianess);
+  utf16_to_utf32_in_partial (cvt, endianess);
+  utf16_to_utf32_in_error (cvt, endianess);
+  utf32_to_utf16_out_ok (cvt, endianess);
+  utf32_to_utf16_out_partial (cvt, endianess);
+  utf32_to_utf16_out_error (cvt, endianess);
+}
+
+template <class InternT>
+void
+utf16_to_ucs2_in_ok (const std::codecvt<InternT, char, mbstate_t> &cvt,
+		     utf16_endianess endianess)
+{
+  using namespace std;
+  const char16_t input[] = u"b\u0448\uAAAA";
+  const char16_t expected[] = u"b\u0448\uAAAA";
+  static_assert (array_size (input) == 4, "");
+  static_assert (array_size (expected) == 4, "");
+
+  char in[array_size (input) * 2];
+  InternT exp[array_size (expected)];
+  utf16_to_bytes (begin (input), end (input), begin (in), endianess);
+  copy (begin (expected), end (expected), begin (exp));
+
+  test_offsets_ok offsets[] = {{0, 0}, {2, 1}, {4, 2}, {6, 3}};
+  for (auto t : offsets)
+    {
+      InternT out[array_size (exp) - 1] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      auto state = mbstate_t{};
+      auto in_next = (const char *) nullptr;
+      auto out_next = (InternT *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+		    out_next);
+      VERIFY (res == cvt.ok);
+      VERIFY (in_next == in + t.in_size);
+      VERIFY (out_next == out + t.out_size);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
+      if (t.out_size < array_size (out))
+	VERIFY (out[t.out_size] == 0);
+    }
+
+  for (auto t : offsets)
+    {
+      InternT out[array_size (exp)] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      auto state = mbstate_t{};
+      auto in_next = (const char *) nullptr;
+      auto out_next = (InternT *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res
+	= cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next);
+      VERIFY (res == cvt.ok);
+      VERIFY (in_next == in + t.in_size);
+      VERIFY (out_next == out + t.out_size);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
+      if (t.out_size < array_size (out))
+	VERIFY (out[t.out_size] == 0);
+    }
+}
+
+template <class InternT>
+void
+utf16_to_ucs2_in_partial (const std::codecvt<InternT, char, mbstate_t> &cvt,
+			  utf16_endianess endianess)
+{
+  using namespace std;
+  const char16_t input[] = u"b\u0448\uAAAA";
+  const char16_t expected[] = u"b\u0448\uAAAA";
+  static_assert (array_size (input) == 4, "");
+  static_assert (array_size (expected) == 4, "");
+
+  char in[array_size (input) * 2];
+  InternT exp[array_size (expected)];
+  auto in_iter = begin (in);
+  utf16_to_bytes (begin (input), end (input), begin (in), endianess);
+  copy (begin (expected), end (expected), begin (exp));
+
+  test_offsets_partial offsets[] = {
+    {2, 0, 0, 0}, // no space for first CP
+    {1, 1, 0, 0}, // incomplete first CP
+    {1, 0, 0, 0}, // incomplete first CP, and no space for it
+
+    {4, 1, 2, 1}, // no space for second CP
+    {3, 2, 2, 1}, // incomplete second CP
+    {3, 1, 2, 1}, // incomplete second CP, and no space for it
+
+    {6, 2, 4, 2}, // no space for third CP
+    {5, 3, 4, 2}, // incomplete third CP
+    {5, 2, 4, 2}, // incomplete third CP, and no space for it
+  };
+
+  for (auto t : offsets)
+    {
+      InternT out[array_size (exp) - 1] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      VERIFY (t.expected_in_next <= t.in_size);
+      VERIFY (t.expected_out_next <= t.out_size);
+      auto state = mbstate_t{};
+      auto in_next = (const char *) nullptr;
+      auto out_next = (InternT *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+		    out_next);
+      VERIFY (res == cvt.partial);
+      VERIFY (in_next == in + t.expected_in_next);
+      VERIFY (out_next == out + t.expected_out_next);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
+      if (t.expected_out_next < array_size (out))
+	VERIFY (out[t.expected_out_next] == 0);
+    }
+}
+
+template <class InternT>
+void
+utf16_to_ucs2_in_error (const std::codecvt<InternT, char, mbstate_t> &cvt,
+			utf16_endianess endianess)
+{
+  using namespace std;
+  char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
+  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 6, "");
+  static_assert (array_size (expected) == 6, "");
+
+  InternT exp[array_size (expected)];
+  copy (begin (expected), end (expected), begin (exp));
+
+  // The only possible error in UTF-16 is unpaired surrogate code units.
+  // Additionally, because the target encoding is UCS-2, a proper pair of
+  // surrogates is also error. Simply, any surrogate CU is error.
+  test_offsets_error<char16_t> offsets[] = {
+    {6, 3, 0, 0, 0xD800, 0},
+    {6, 3, 0, 0, 0xDBFF, 0},
+    {6, 3, 0, 0, 0xDC00, 0},
+    {6, 3, 0, 0, 0xDFFF, 0},
+
+    {6, 3, 2, 1, 0xD800, 1},
+    {6, 3, 2, 1, 0xDBFF, 1},
+    {6, 3, 2, 1, 0xDC00, 1},
+    {6, 3, 2, 1, 0xDFFF, 1},
+
+    {6, 3, 4, 2, 0xD800, 2},
+    {6, 3, 4, 2, 0xDBFF, 2},
+    {6, 3, 4, 2, 0xDC00, 2},
+    {6, 3, 4, 2, 0xDFFF, 2},
+
+    // make the leading surrogate a trailing one
+    {10, 5, 6, 3, 0xDC00, 3},
+    {10, 5, 6, 3, 0xDFFF, 3},
+
+    // make the trailing surrogate a leading one
+    {10, 5, 6, 3, 0xD800, 4},
+    {10, 5, 6, 3, 0xDBFF, 4},
+
+    // make the trailing surrogate a BMP char
+    {10, 5, 6, 3, u'z', 4},
+
+    // don't replace anything in the test cases bellow, just show the surrogate
+    // pair (fourth CP) fully or partially (just the first surrogate)
+    {10, 5, 6, 3, u'b', 0},
+    {8, 5, 6, 3, u'b', 0},
+    {9, 5, 6, 3, u'b', 0},
+
+    {10, 4, 6, 3, u'b', 0},
+    {8, 4, 6, 3, u'b', 0},
+    {9, 4, 6, 3, u'b', 0},
+  };
+
+  for (auto t : offsets)
+    {
+      char in[array_size (input) * 2];
+      InternT out[array_size (exp) - 1] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      VERIFY (t.expected_in_next <= t.in_size);
+      VERIFY (t.expected_out_next <= t.out_size);
+      auto old_char = input[t.replace_pos];
+      input[t.replace_pos] = t.replace_char; // replace in input, not in in
+      utf16_to_bytes (begin (input), end (input), begin (in), endianess);
+
+      auto state = mbstate_t{};
+      auto in_next = (const char *) nullptr;
+      auto out_next = (InternT *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+		    out_next);
+      VERIFY (res == cvt.error);
+      VERIFY (in_next == in + t.expected_in_next);
+      VERIFY (out_next == out + t.expected_out_next);
+      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
+	      == 0);
+      if (t.expected_out_next < array_size (out))
+	VERIFY (out[t.expected_out_next] == 0);
+
+      input[t.replace_pos] = old_char;
+    }
+}
+
+template <class InternT>
+void
+ucs2_to_utf16_out_ok (const std::codecvt<InternT, char, mbstate_t> &cvt,
+		      utf16_endianess endianess)
+{
+  using namespace std;
+  const char16_t input[] = u"b\u0448\uAAAA";
+  const char16_t expected[] = u"b\u0448\uAAAA";
+  static_assert (array_size (input) == 4, "");
+  static_assert (array_size (expected) == 4, "");
+
+  InternT in[array_size (input)];
+  char exp[array_size (expected) * 2];
+  copy (begin (input), end (input), begin (in));
+  utf16_to_bytes (begin (expected), end (expected), begin (exp), endianess);
+
+  const test_offsets_ok offsets[] = {{0, 0}, {1, 2}, {2, 4}, {3, 6}};
+  for (auto t : offsets)
+    {
+      char out[array_size (exp) - 2] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      auto state = mbstate_t{};
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (char *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+		     out_next);
+      VERIFY (res == cvt.ok);
+      VERIFY (in_next == in + t.in_size);
+      VERIFY (out_next == out + t.out_size);
+      VERIFY (char_traits<char>::compare (out, exp, t.out_size) == 0);
+      if (t.out_size < array_size (out))
+	VERIFY (out[t.out_size] == 0);
+    }
+}
+
+template <class InternT>
+void
+ucs2_to_utf16_out_partial (const std::codecvt<InternT, char, mbstate_t> &cvt,
+			   utf16_endianess endianess)
+{
+  using namespace std;
+  const char16_t input[] = u"b\u0448\uAAAA";
+  const char16_t expected[] = u"b\u0448\uAAAA";
+  static_assert (array_size (input) == 4, "");
+  static_assert (array_size (expected) == 4, "");
+
+  InternT in[array_size (input)];
+  char exp[array_size (expected) * 2];
+  copy (begin (input), end (input), begin (in));
+  utf16_to_bytes (begin (expected), end (expected), begin (exp), endianess);
+
+  const test_offsets_partial offsets[] = {
+    {1, 0, 0, 0}, // no space for first CP
+    {1, 1, 0, 0}, // no space for first CP
+
+    {2, 2, 1, 2}, // no space for second CP
+    {2, 3, 1, 2}, // no space for second CP
+
+    {3, 4, 2, 4}, // no space for third CP
+    {3, 5, 2, 4}, // no space for third CP
+  };
+  for (auto t : offsets)
+    {
+      char out[array_size (exp) - 2] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      VERIFY (t.expected_in_next <= t.in_size);
+      VERIFY (t.expected_out_next <= t.out_size);
+      auto state = mbstate_t{};
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (char *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+		     out_next);
+      VERIFY (res == cvt.partial);
+      VERIFY (in_next == in + t.expected_in_next);
+      VERIFY (out_next == out + t.expected_out_next);
+      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+      if (t.expected_out_next < array_size (out))
+	VERIFY (out[t.expected_out_next] == 0);
+    }
+}
+
+template <class InternT>
+void
+ucs2_to_utf16_out_error (const std::codecvt<InternT, char, mbstate_t> &cvt,
+			 utf16_endianess endianess)
+{
+  using namespace std;
+  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
+  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
+  static_assert (array_size (input) == 6, "");
+  static_assert (array_size (expected) == 6, "");
+
+  InternT in[array_size (input)];
+  char exp[array_size (expected) * 2];
+  copy (begin (input), end (input), begin (in));
+  utf16_to_bytes (begin (expected), end (expected), begin (exp), endianess);
+
+  test_offsets_error<InternT> offsets[] = {
+    {3, 6, 0, 0, 0xD800, 0},
+    {3, 6, 0, 0, 0xDBFF, 0},
+    {3, 6, 0, 0, 0xDC00, 0},
+    {3, 6, 0, 0, 0xDFFF, 0},
+
+    {3, 6, 1, 2, 0xD800, 1},
+    {3, 6, 1, 2, 0xDBFF, 1},
+    {3, 6, 1, 2, 0xDC00, 1},
+    {3, 6, 1, 2, 0xDFFF, 1},
+
+    {3, 6, 2, 4, 0xD800, 2},
+    {3, 6, 2, 4, 0xDBFF, 2},
+    {3, 6, 2, 4, 0xDC00, 2},
+    {3, 6, 2, 4, 0xDFFF, 2},
+
+    // make the leading surrogate a trailing one
+    {5, 10, 3, 6, 0xDC00, 3},
+    {5, 10, 3, 6, 0xDFFF, 3},
+
+    // make the trailing surrogate a leading one
+    {5, 10, 3, 6, 0xD800, 4},
+    {5, 10, 3, 6, 0xDBFF, 4},
+
+    // make the trailing surrogate a BMP char
+    {5, 10, 3, 6, u'z', 4},
+
+    // don't replace anything in the test cases bellow, just show the surrogate
+    // pair (fourth CP) fully or partially (just the first surrogate)
+    {5, 10, 3, 6, u'b', 0},
+    {5, 8, 3, 6, u'b', 0},
+    {5, 9, 3, 6, u'b', 0},
+
+    {4, 10, 3, 6, u'b', 0},
+    {4, 8, 3, 6, u'b', 0},
+    {4, 9, 3, 6, u'b', 0},
+  };
+
+  for (auto t : offsets)
+    {
+      char out[array_size (exp) - 2] = {};
+      VERIFY (t.in_size <= array_size (in));
+      VERIFY (t.out_size <= array_size (out));
+      VERIFY (t.expected_in_next <= t.in_size);
+      VERIFY (t.expected_out_next <= t.out_size);
+      auto old_char = in[t.replace_pos];
+      in[t.replace_pos] = t.replace_char;
+
+      auto state = mbstate_t{};
+      auto in_next = (const InternT *) nullptr;
+      auto out_next = (char *) nullptr;
+      auto res = codecvt_base::result ();
+
+      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+		     out_next);
+      VERIFY (res == cvt.error);
+      VERIFY (in_next == in + t.expected_in_next);
+      VERIFY (out_next == out + t.expected_out_next);
+      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+      if (t.expected_out_next < array_size (out))
+	VERIFY (out[t.expected_out_next] == 0);
+
+      in[t.replace_pos] = old_char;
+    }
+}
+
+template <class InternT>
+void
+test_utf16_ucs2_cvt (const std::codecvt<InternT, char, mbstate_t> &cvt,
+		     utf16_endianess endianess)
+{
+  utf16_to_ucs2_in_ok (cvt, endianess);
+  utf16_to_ucs2_in_partial (cvt, endianess);
+  utf16_to_ucs2_in_error (cvt, endianess);
+  ucs2_to_utf16_out_ok (cvt, endianess);
+  ucs2_to_utf16_out_partial (cvt, endianess);
+  ucs2_to_utf16_out_error (cvt, endianess);
+}
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode_char8_t.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode_char8_t.cc
new file mode 100644
index 000000000..8ab5ba79f
--- /dev/null
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode_char8_t.cc
@@ -0,0 +1,53 @@
+// Copyright (C) 2020-2023 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-do run { target c++11 } }
+// { dg-require-cstdint "" }
+// { dg-options "-fchar8_t" }
+
+#include "codecvt_unicode.h"
+
+using namespace std;
+
+void
+test_utf8_utf32_codecvts ()
+{
+  using codecvt_c32_c8 = codecvt<char32_t, char8_t, mbstate_t>;
+  auto &loc_c = locale::classic ();
+  VERIFY (has_facet<codecvt_c32_c8> (loc_c));
+
+  auto &cvt = use_facet<codecvt_c32_c8> (loc_c);
+  test_utf8_utf32_cvt (cvt);
+}
+
+void
+test_utf8_utf16_codecvts ()
+{
+  using codecvt_c16_c8 = codecvt<char16_t, char8_t, mbstate_t>;
+  auto &loc_c = locale::classic ();
+  VERIFY (has_facet<codecvt_c16_c8> (loc_c));
+
+  auto &cvt = use_facet<codecvt_c16_c8> (loc_c);
+  test_utf8_utf16_cvt (cvt);
+}
+
+int
+main ()
+{
+  test_utf8_utf32_codecvts ();
+  test_utf8_utf16_codecvts ();
+}
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode_wchar_t.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode_wchar_t.cc
index 4fd1bfec6..d6e5b20e8 100644
--- a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode_wchar_t.cc
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode_wchar_t.cc
@@ -28,7 +28,7 @@ test_utf8_utf32_codecvts ()
 {
 #if __SIZEOF_WCHAR_T__ == 4
   codecvt_utf8<wchar_t> cvt;
-  test_utf8_utf32_codecvts (cvt);
+  test_utf8_utf32_cvt (cvt);
 #endif
 }
 
@@ -37,7 +37,7 @@ test_utf8_utf16_codecvts ()
 {
 #if __SIZEOF_WCHAR_T__ >= 2
   codecvt_utf8_utf16<wchar_t> cvt;
-  test_utf8_utf16_cvts (cvt);
+  test_utf8_utf16_cvt (cvt);
 #endif
 }
 
@@ -46,7 +46,31 @@ test_utf8_ucs2_codecvts ()
 {
 #if __SIZEOF_WCHAR_T__ == 2
   codecvt_utf8<wchar_t> cvt;
-  test_utf8_ucs2_cvts (cvt);
+  test_utf8_ucs2_cvt (cvt);
+#endif
+}
+
+void
+test_utf16_utf32_codecvts ()
+{
+#if __SIZEOF_WCHAR_T__ == 4
+  codecvt_utf16<wchar_t> cvt3;
+  test_utf16_utf32_cvt (cvt3, utf16_big_endian);
+
+  codecvt_utf16<wchar_t, 0x10FFFF, codecvt_mode::little_endian> cvt4;
+  test_utf16_utf32_cvt (cvt4, utf16_little_endian);
+#endif
+}
+
+void
+test_utf16_ucs2_codecvts ()
+{
+#if __SIZEOF_WCHAR_T__ == 2
+  codecvt_utf16<wchar_t> cvt3;
+  test_utf16_ucs2_cvt (cvt3, utf16_big_endian);
+
+  codecvt_utf16<wchar_t, 0x10FFFF, codecvt_mode::little_endian> cvt4;
+  test_utf16_ucs2_cvt (cvt4, utf16_little_endian);
 #endif
 }
 
@@ -56,4 +80,6 @@ main ()
   test_utf8_utf32_codecvts ();
   test_utf8_utf16_codecvts ();
   test_utf8_ucs2_codecvts ();
+  test_utf16_utf32_codecvts ();
+  test_utf16_ucs2_codecvts ();
 }
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc
index 970d2bc11..f3f802f7c 100644
--- a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc
@@ -83,7 +83,7 @@ test06()
   const char src[] = "\0\x61\xAB\xCD";
   Conv<char16_t> conv("to_bytes failed", u"from_bytes failed");
   std::u16string result = conv.from_bytes(src, src+3); // incomplete character
-  VERIFY( result == u"from_bytes failed" );
+  VERIFY( result == u"\u0061" );
   VERIFY( conv.converted() == 2 );
 }