Skip to content

Commit

Permalink
util: add a method to get string encoding info
Browse files Browse the repository at this point in the history
Add a util method to check the encoding information of a string.
The encoing information is from V8

Refs: #56090
  • Loading branch information
theweipeng committed Dec 7, 2024
1 parent c4aa34a commit a4a445a
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 0 deletions.
20 changes: 20 additions & 0 deletions doc/api/util.md
Original file line number Diff line number Diff line change
Expand Up @@ -3142,6 +3142,26 @@ util.isArray({});
// Returns: false
```
## `util.getStringEncodingInfo(content)`
<!-- YAML
added: REPLACEME
-->
* `content` {string}
* Returns: {Object}
Return the encoding and byte length of the content.
```js
const { getStringEncodingInfo } = require('node:util');

getStringEncodingInfo('hello world');
// Returns: { encoding: 'latin1', byteLength: 11 }
getStringEncodingInfo('你好');
// Returns: { encoding: 'utf16le', byteLength: 4 }
```
[Common System Errors]: errors.md#common-system-errors
[Custom inspection functions on objects]: #custom-inspection-functions-on-objects
[Custom promisified functions]: #custom-promisified-functions
Expand Down
24 changes: 24 additions & 0 deletions lib/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,29 @@ function getCallSites(frameCount = 10, options) {
return binding.getCallSites(frameCount);
};

/**
* Get the encoding info of a string and
* throwing a exception when the content is not a string
* @param {string} content
* @returns {{encoding: 'latin1' | 'utf16le', byteLength: number}}
*/
function getStringEncodingInfo(content) {
if (typeof content !== 'string') {
throw new ERR_INVALID_ARG_TYPE('content', 'string', content);
}
const isOneByte = binding.isStringOneByte(content);
if (isOneByte) {
return {
encoding: 'latin1',
byteLength: content.length,
};
}
return {
encoding: 'utf16le',
byteLength: content.length * 2,
};
}

// Keep the `exports =` so that various functions can still be monkeypatched
module.exports = {
_errnoException,
Expand Down Expand Up @@ -470,6 +493,7 @@ module.exports = {
},
types,
parseEnv,
getStringEncodingInfo,
};

defineLazyProperties(
Expand Down
4 changes: 4 additions & 0 deletions src/node_external_reference.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ namespace node {

using CFunctionCallbackWithOneByteString =
uint32_t (*)(v8::Local<v8::Value>, const v8::FastOneByteString&);

using CFunctionCallbackReturnBool = bool (*)(v8::Local<v8::Value> unused,
v8::Local<v8::Value> receiver);
using CFunctionCallback = void (*)(v8::Local<v8::Value> unused,
v8::Local<v8::Value> receiver);
using CFunctionCallbackReturnDouble =
Expand Down Expand Up @@ -90,6 +93,7 @@ class ExternalReferenceRegistry {
#define ALLOWED_EXTERNAL_REFERENCE_TYPES(V) \
V(CFunctionCallback) \
V(CFunctionCallbackWithOneByteString) \
V(CFunctionCallbackReturnBool) \
V(CFunctionCallbackReturnDouble) \
V(CFunctionCallbackReturnInt32) \
V(CFunctionCallbackValueReturnDouble) \
Expand Down
23 changes: 23 additions & 0 deletions src/node_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,21 @@ static void GetCallSites(const FunctionCallbackInfo<Value>& args) {
args.GetReturnValue().Set(callsites);
}

static void IsStringOneByte(const FunctionCallbackInfo<Value>& args) {
CHECK_EQ(args.Length(), 1);
CHECK(args[0]->IsString());
bool is_one_byte = args[0].As<String>()->IsOneByte();
args.GetReturnValue().Set(is_one_byte);
}

static bool FastIsStringOneByte(Local<Value> receiver,
const Local<Value> target) {
CHECK(target->IsString());
return target.As<String>()->IsOneByte();
}

CFunction fast_is_string_one_byte_(CFunction::Make(FastIsStringOneByte));

static void IsInsideNodeModules(const FunctionCallbackInfo<Value>& args) {
Isolate* isolate = args.GetIsolate();
CHECK_EQ(args.Length(), 2);
Expand Down Expand Up @@ -356,6 +371,9 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
registry->Register(fast_guess_handle_type_.GetTypeInfo());
registry->Register(ParseEnv);
registry->Register(IsInsideNodeModules);
registry->Register(IsStringOneByte);
registry->Register(FastIsStringOneByte);
registry->Register(fast_is_string_one_byte_.GetTypeInfo());
}

void Initialize(Local<Object> target,
Expand Down Expand Up @@ -471,6 +489,11 @@ void Initialize(Local<Object> target,
"guessHandleType",
GuessHandleType,
&fast_guess_handle_type_);
SetFastMethodNoSideEffect(context,
target,
"isStringOneByte",
IsStringOneByte,
&fast_is_string_one_byte_);
}

} // namespace util
Expand Down
41 changes: 41 additions & 0 deletions test/parallel/test-util-string-encoding-info.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Flags: --expose-internals
'use strict';
require('../common');
const assert = require('assert');
const { getStringEncodingInfo } = require('util');

[
undefined,
null,
false,
5n,
5,
Symbol(),
() => {},
{},
].forEach((value) => {
assert.throws(
() => { getStringEncodingInfo(value); },
/The "content" argument must be of type string/
);
});

{
const latin1String = 'hello world!';
// Run this inside a for loop to trigger the fast API
for (let i = 0; i < 10_000; i++) {
const { encoding, byteLength } = getStringEncodingInfo(latin1String);
assert.strictEqual(encoding, 'latin1');
assert.strictEqual(byteLength, latin1String.length);
}
}

{
const utf16String = '你好😀😃';
// Run this inside a for loop to trigger the fast API
for (let i = 0; i < 10_000; i++) {
const { encoding, byteLength } = getStringEncodingInfo(utf16String);
assert.strictEqual(encoding, 'utf16le');
assert.strictEqual(byteLength, utf16String.length * 2);
}
}

0 comments on commit a4a445a

Please sign in to comment.