Count UTF-8 units in a string (v2)

Revision 2 publishedon

Setup

const arr = []
const RANDEXP = 20
const ODDEXP = 4
for (let i = 0; i < 1000; i += RANDEXP / ODDEXP) {
  const rand = ~~(Math.random() * (1 << RANDEXP))
  for (let j = (1 << ODDEXP) - 1; j < 1 << RANDEXP; j <<= ODDEXP) {
    if (rand & j) {
      arr.push('#')
    } else {
      arr.push('\u{1f452}')
    }
  }
}

const testString = arr.join()

Test Runner

Initializing...

Testing in
Test CaseOps/sec
Regex with unicode flag
function testRegexUni(s) {
  const r = /[\u{10000}-\u{10ffff}]/ug
  let n = 0
  while (r.exec(s)) n++
  return n
}

testRegexUni(testString)
ready
Regex matching surrogate pairs
function testRegexSurr(s) {
  const r = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g
  let n = 0
  while (r.exec(s)) n++
  return n
}

testRegexSurr(testString)
ready
Array from
function testArrayFrom(s) {
  return s.length - Array.from(s).length
}

testArrayFrom(testString)
ready
Array spread
function testArraySpread(s) {
  return s.length - [...s].length
}

testArraySpread(testString)
ready
Regex iterating over surrogate pairs with ".matchAll"
function testRegexSurrAll(s) {
  const r = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g
  let n = 0
  for (const _ of s.matchAll(r)) n++
  return n
}

testRegexSurrAll(testString)
ready

Revisions

You can edit these tests or add more tests to this page by appending /edit to the URL.

Revision 1
publishedon
Revision 2
publishedon