Backport of <https://github.com/unicode-org/icu/commit/b7d08bc04>
~~~
From b7d08bc04a4296982fcef8b6b8a354a9e4e7afca Mon Sep 17 00:00:00 2001
From: Frank Tang <ftang@chromium.org>
Date: Sat, 1 Feb 2020 02:39:04 +0000
Subject: [PATCH] ICU-20958 Prevent SEGV_MAPERR in append
See #971
X-SVN-Rev: 39671
---
icu4c/source/common/utext.cpp | 27 +++++++++--
icu4c/source/test/intltest/utxttest.cpp | 62 +++++++++++++++++++++++++
icu4c/source/test/intltest/utxttest.h | 1 +
3 files changed, 85 insertions(+), 5 deletions(-)
~~~
Includes code from Debian `ICU-13634.patch`:
https://sources.debian.org/patches/icu/57.1-6+deb9u4/ICU-13634.patch/
--- source/common/putil.cpp
+++ source/common/putil.cpp
@@ -516,6 +516,17 @@ uprv_fmin(double x, double y)
return (x > y ? y : x);
}
+U_CAPI UBool U_EXPORT2
+uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
+ // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
+ // This function could be optimized by calling one of those primitives.
+ auto a64 = static_cast<int64_t>(a);
+ auto b64 = static_cast<int64_t>(b);
+ int64_t res64 = a64 + b64;
+ *res = static_cast<int32_t>(res64);
+ return res64 != *res;
+}
+
/**
* Truncates the given double.
* trunc(3.3) = 3.0, trunc (-3.3) = -3.0
--- source/common/putilimp.h
+++ source/common/putilimp.h
@@ -394,6 +394,19 @@ U_INTERNAL double U_EXPORT2 uprv_log(double d);
*/
U_INTERNAL double U_EXPORT2 uprv_round(double x);
+/**
+ * Adds the signed integers a and b, storing the result in res.
+ * Checks for signed integer overflow.
+ * Similar to the GCC/Clang extension __builtin_add_overflow
+ *
+ * @param a The first operand.
+ * @param b The second operand.
+ * @param res a + b
+ * @return true if overflow occurred; false if no overflow occurred.
+ * @internal
+ */
+U_INTERNAL UBool U_EXPORT2 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res);
+
#if 0
/**
* Returns the number of digits after the decimal point in a double number x.
--- source/common/unistr.cpp
+++ source/common/unistr.cpp
@@ -1543,7 +1543,11 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng
}
int32_t oldLength = length();
- int32_t newLength = oldLength + srcLength;
+ int32_t newLength;
+ if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
+ setToBogus();
+ return *this;
+ }
// optimize append() onto a large-enough, owned string
if((newLength <= getCapacity() && isBufferWritable()) ||
cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
--- source/test/cintltst/putiltst.c
+++ source/test/cintltst/putiltst.c
@@ -128,6 +128,17 @@ static void TestPUtilAPI(void){
log_err("ERROR: uprv_isInfinite failed.\n");
}
+ log_verbose("Testing the APIs uprv_add32_overflow\n");
+ int32_t overflow_result;
+ doAssert(FALSE, uprv_add32_overflow(INT32_MAX - 2, 1, &overflow_result), "should not overflow");
+ doAssert(INT32_MAX - 1, overflow_result, "should equal INT32_MAX - 1");
+ doAssert(FALSE, uprv_add32_overflow(INT32_MAX - 2, 2, &overflow_result), "should not overflow");
+ doAssert(INT32_MAX, overflow_result, "should equal exactly INT32_MAX");
+ doAssert(TRUE, uprv_add32_overflow(INT32_MAX - 2, 3, &overflow_result), "should overflow");
+ // Test on negative numbers:
+ doAssert(FALSE, uprv_add32_overflow(-3, -2, &overflow_result), "should not overflow");
+ doAssert(-5, overflow_result, "should equal -5");
+
#if 0
log_verbose("Testing the API uprv_digitsAfterDecimal()....\n");
doAssert(uprv_digitsAfterDecimal(value1), 3, "uprv_digitsAfterDecimal() failed.");
--- source/test/intltest/ustrtest.cpp
+++ source/test/intltest/ustrtest.cpp
@@ -58,6 +58,7 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &
TESTCASE_AUTO(TestSizeofUnicodeString);
TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
TESTCASE_AUTO(TestMoveSwap);
+ TESTCASE_AUTO(TestLargeAppend);
TESTCASE_AUTO_END;
}
@@ -2187,3 +2188,64 @@ UnicodeStringTest::TestMoveSwap() {
errln("UnicodeString copy after self-move did not work");
}
}
+
+void UnicodeStringTest::TestLargeAppend() {
+ if(quick) return;
+
+ IcuTestErrorCode status(*this, "TestLargeAppend");
+ // Make a large UnicodeString
+ int32_t len = 0xAFFFFFF;
+ UnicodeString str;
+ char16_t *buf = str.getBuffer(len);
+ // A fast way to set buffer to valid Unicode.
+ // 4E4E is a valid unicode character
+ uprv_memset(buf, 0x4e, len * 2);
+ str.releaseBuffer(len);
+ UnicodeString dest;
+ // Append it 16 times
+ // 0xAFFFFFF times 16 is 0xA4FFFFF1,
+ // which is greater than INT32_MAX, which is 0x7FFFFFFF.
+ int64_t total = 0;
+ for (int32_t i = 0; i < 16; i++) {
+ dest.append(str);
+ total += len;
+ if (total <= INT32_MAX) {
+ assertFalse("dest is not bogus", dest.isBogus());
+ } else {
+ assertTrue("dest should be bogus", dest.isBogus());
+ }
+ }
+ dest.remove();
+ total = 0;
+ for (int32_t i = 0; i < 16; i++) {
+ dest.append(str);
+ total += len;
+ if (total + len <= INT32_MAX) {
+ assertFalse("dest is not bogus", dest.isBogus());
+ } else if (total <= INT32_MAX) {
+ // Check that a string of exactly the maximum size works
+ UnicodeString str2;
+ int32_t remain = INT32_MAX - total;
+ char16_t *buf2 = str2.getBuffer(remain);
+ if (buf2 == nullptr) {
+ // if somehow memory allocation fail, return the test
+ return;
+ }
+ uprv_memset(buf2, 0x4e, remain * 2);
+ str2.releaseBuffer(remain);
+ dest.append(str2);
+ total += remain;
+ assertEquals("When a string of exactly the maximum size works", (int64_t)INT32_MAX, total);
+ assertEquals("When a string of exactly the maximum size works", INT32_MAX, dest.length());
+ assertFalse("dest is not bogus", dest.isBogus());
+
+ // Check that a string size+1 goes bogus
+ str2.truncate(1);
+ dest.append(str2);
+ total++;
+ assertTrue("dest should be bogus", dest.isBogus());
+ } else {
+ assertTrue("dest should be bogus", dest.isBogus());
+ }
+ }
+}
--- source/test/intltest/ustrtest.h
+++ source/test/intltest/ustrtest.h
@@ -92,6 +92,7 @@ public:
void TestUnicodeStringImplementsAppendable();
void TestSizeofUnicodeString();
void TestMoveSwap();
+ void TestLargeAppend();
};
class StringCaseTest: public IntlTest {