diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 667fada0a1eb3f..38d8af263ac8ad 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -537,6 +537,24 @@ Maybe StringBytes::Size(Isolate* isolate, } \ } while (0) +// Converts known-valid UTF-8 (buflen >= 32) to a V8 string via the fast +// UTF-16 path. Callers must ensure buflen is range-checked. +static MaybeLocal EncodeValidNonAsciiUtf8(Isolate* isolate, + const char* buf, + size_t buflen) { + size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen); + if (u16size > static_cast(v8::String::kMaxLength)) { + isolate->ThrowException(ERR_STRING_TOO_LONG(isolate)); + return MaybeLocal(); + } + return EncodeTwoByteString( + isolate, u16size, [buf, buflen, u16size](uint16_t* dst) { + size_t written = simdutf::convert_valid_utf8_to_utf16( + buf, buflen, reinterpret_cast(dst)); + CHECK_EQ(written, u16size); + }); +} + MaybeLocal StringBytes::Encode(Isolate* isolate, const char* buf, size_t buflen, @@ -650,17 +668,7 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, static_cast(r.count)); } } else if (simdutf::validate_utf8(buf, buflen)) { - size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen); - if (u16size > static_cast(v8::String::kMaxLength)) { - isolate->ThrowException(ERR_STRING_TOO_LONG(isolate)); - return MaybeLocal(); - } - return EncodeTwoByteString( - isolate, u16size, [buf, buflen, u16size](uint16_t* dst) { - size_t written = simdutf::convert_valid_utf8_to_utf16( - buf, buflen, reinterpret_cast(dst)); - CHECK_EQ(written, u16size); - }); + return EncodeValidNonAsciiUtf8(isolate, buf, buflen); } } @@ -742,31 +750,14 @@ MaybeLocal StringBytes::EncodeValidUtf8(Isolate* isolate, CHECK_BUFLEN_IN_RANGE(buflen); if (!buflen) return String::Empty(isolate); buflen = keep_buflen_in_range(buflen); - - // ASCII fast path if (!simdutf::validate_ascii_with_errors(buf, buflen).error) { return ExternOneByteString::NewFromCopy(isolate, buf, buflen); } - - if (buflen >= 32) { - size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen); - if (u16size > static_cast(v8::String::kMaxLength)) { - isolate->ThrowException(ERR_STRING_TOO_LONG(isolate)); - return MaybeLocal(); - } - return EncodeTwoByteString( - isolate, u16size, [buf, buflen, u16size](uint16_t* dst) { - size_t written = simdutf::convert_valid_utf8_to_utf16( - buf, buflen, reinterpret_cast(dst)); - CHECK_EQ(written, u16size); - }); - } - + if (buflen >= 32) return EncodeValidNonAsciiUtf8(isolate, buf, buflen); Local str; if (!String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen) - .ToLocal(&str)) { + .ToLocal(&str)) isolate->ThrowException(node::ERR_STRING_TOO_LONG(isolate)); - } return str; } diff --git a/src/string_bytes.h b/src/string_bytes.h index 71aa9ff1f90a7c..a9935b68db9647 100644 --- a/src/string_bytes.h +++ b/src/string_bytes.h @@ -83,7 +83,8 @@ class StringBytes { size_t buflen, enum encoding encoding); - // Like Encode(..., UTF8) but does not re-validate. Input must be valid UTF-8. + // Like Encode(..., UTF8) but skips UTF-8 validation. Caller must guarantee + // that buf contains valid UTF-8. static v8::MaybeLocal EncodeValidUtf8(v8::Isolate* isolate, const char* buf, size_t buflen);