Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
317 changes: 243 additions & 74 deletions lib/elixir/lib/base.ex
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,220 @@ defmodule Base do
for <<char::8 <- string>>, char not in ~c"\s\t\r\n", into: <<>>, do: <<char::8>>
end

# SWAR (SIMD Within A Register) fast paths for valid16?/2 and valid32?/2
# (non-hex). Each chunk of 8 bytes is validated in one guard: 7 bytes via
# bitwise arithmetic on a single 56-bit integer, plus a per-byte range
# check for the 8th byte. 56 bits is the largest width that fits in a BEAM
# small int on 64-bit (fixnum range is 59-bit signed); at 64 bits every
# `w + 0x80..` would allocate a bignum on the heap and the optimisation
# would collapse. See https://github.com/erlang/otp/pull/10938 for the
# corresponding pattern in OTP.
@swar_mask80 0x80808080808080

# Per-range SWAR constants, broadcast across 7 lanes. Naming convention:
# @swar_ge_X = 0x80 - X → high bit of `(w + @swar_ge_X)` lane is set
# iff that byte is ≥ X
# @swar_gt_X = 0x7F - X → high bit of `(w + @swar_gt_X)` lane is set
# iff that byte is > X
# A byte is in range [lo, hi] iff
# `bxor(w + @swar_ge_lo, w + @swar_gt_hi)` has its high bit set.
@swar_ge_0 0x50505050505050
@swar_gt_9 0x46464646464646
@swar_ge_2 0x4E4E4E4E4E4E4E
@swar_gt_7 0x48484848484848
@swar_ge_A 0x3F3F3F3F3F3F3F
@swar_gt_F 0x39393939393939
@swar_gt_V 0x29292929292929
@swar_gt_Z 0x25252525252525
@swar_ge_a 0x1F1F1F1F1F1F1F
@swar_gt_f 0x19191919191919
@swar_gt_v 0x09090909090909
@swar_gt_z 0x05050505050505

# For base64 standard, '/' (0x2F) sits exactly one below '0' (0x30), so we
# extend the digit range to [0x2F, 0x39], which absorbs '/' into one range
# check — saves one Mycroft singleton. Trick lifted from
# https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
@swar_ge_slash 0x51515151515151

# Mycroft zero-byte detection for base64 singletons (+, -, _).
# Per lane: high bit set iff `bxor(w, K*ones) - 0x01..01` has its high bit
# set, i.e. that byte's V value was 0 → original byte was K. Simplified
# (no `bnot V` term) — for ASCII-gated `w`, borrow-propagation false
# positives only occur for adjacent bytes that happen to equal `K xor 0x01`,
# which is outside the base64 alphabet, so it never matters here.
# Pattern follows https://github.com/elixir-lang/elixir/pull/15255.
@swar_mask01 0x01010101010101
@swar_plus_x7 0x2B2B2B2B2B2B2B
@swar_dash_x7 0x2D2D2D2D2D2D2D
@swar_under_x7 0x5F5F5F5F5F5F5F

# Per-byte validity guards (used in both the SWAR clauses for the 8th byte
# of each stride and in the body of the sub-8-byte tail clauses).
defguardp valid_char16upper?(c) when c in ?0..?9 or c in ?A..?F
defguardp valid_char16lower?(c) when c in ?0..?9 or c in ?a..?f
defguardp valid_char16mixed?(c) when c in ?0..?9 or c in ?A..?F or c in ?a..?f

defguardp valid_char32upper?(c) when c in ?A..?Z or c in ?2..?7
defguardp valid_char32lower?(c) when c in ?a..?z or c in ?2..?7
defguardp valid_char32mixed?(c) when c in ?A..?Z or c in ?a..?z or c in ?2..?7

# Most common range first — letters dominate (22/32) over digits (10/32)
# in hex base32, so letters go first in the OR short-circuit.
defguardp valid_char32hexupper?(c) when c in ?A..?V or c in ?0..?9
defguardp valid_char32hexlower?(c) when c in ?a..?v or c in ?0..?9
defguardp valid_char32hexmixed?(c) when c in ?A..?V or c in ?a..?v or c in ?0..?9

defguardp valid_char64base?(c)
when c in ?A..?Z or c in ?a..?z or c in ?0..?9 or c == ?+ or c == ?/

defguardp valid_char64url?(c)
when c in ?A..?Z or c in ?a..?z or c in ?0..?9 or c == ?- or c == ?_

# SWAR 7-byte word validity. Structure for each guard:
# 1. ASCII gate `band(w, MASK80) == 0` — every byte < 0x80 so the
# additions below cannot carry across lanes.
# 2. "Each byte is in range A OR range B (OR range C)" gate — OR per-
# range XOR masks (high bit set in lane iff byte in that range), AND
# with MASK80, demand all 7 high bits set.
defguardp valid_word16upper?(w)
when band(w, @swar_mask80) == 0 and
band(
bor(
bxor(w + @swar_ge_0, w + @swar_gt_9),
bxor(w + @swar_ge_A, w + @swar_gt_F)
),
@swar_mask80
) == @swar_mask80

defguardp valid_word16lower?(w)
when band(w, @swar_mask80) == 0 and
band(
bor(
bxor(w + @swar_ge_0, w + @swar_gt_9),
bxor(w + @swar_ge_a, w + @swar_gt_f)
),
@swar_mask80
) == @swar_mask80

defguardp valid_word16mixed?(w)
when band(w, @swar_mask80) == 0 and
band(
bor(
bor(
bxor(w + @swar_ge_0, w + @swar_gt_9),
bxor(w + @swar_ge_A, w + @swar_gt_F)
),
bxor(w + @swar_ge_a, w + @swar_gt_f)
),
@swar_mask80
) == @swar_mask80

defguardp valid_word32upper?(w)
when band(w, @swar_mask80) == 0 and
band(
bor(
bxor(w + @swar_ge_A, w + @swar_gt_Z),
bxor(w + @swar_ge_2, w + @swar_gt_7)
),
@swar_mask80
) == @swar_mask80

defguardp valid_word32lower?(w)
when band(w, @swar_mask80) == 0 and
band(
bor(
bxor(w + @swar_ge_a, w + @swar_gt_z),
bxor(w + @swar_ge_2, w + @swar_gt_7)
),
@swar_mask80
) == @swar_mask80

defguardp valid_word32mixed?(w)
when band(w, @swar_mask80) == 0 and
band(
bor(
bor(
bxor(w + @swar_ge_A, w + @swar_gt_Z),
bxor(w + @swar_ge_a, w + @swar_gt_z)
),
bxor(w + @swar_ge_2, w + @swar_gt_7)
),
@swar_mask80
) == @swar_mask80

defguardp valid_word32hexupper?(w)
when band(w, @swar_mask80) == 0 and
band(
bor(
bxor(w + @swar_ge_0, w + @swar_gt_9),
bxor(w + @swar_ge_A, w + @swar_gt_V)
),
@swar_mask80
) == @swar_mask80

defguardp valid_word32hexlower?(w)
when band(w, @swar_mask80) == 0 and
band(
bor(
bxor(w + @swar_ge_0, w + @swar_gt_9),
bxor(w + @swar_ge_a, w + @swar_gt_v)
),
@swar_mask80
) == @swar_mask80

defguardp valid_word32hexmixed?(w)
when band(w, @swar_mask80) == 0 and
band(
bor(
bor(
bxor(w + @swar_ge_0, w + @swar_gt_9),
bxor(w + @swar_ge_A, w + @swar_gt_V)
),
bxor(w + @swar_ge_a, w + @swar_gt_v)
),
@swar_mask80
) == @swar_mask80

# base64 SWAR word validity: 3 ranges (A-Z, a-z, 0-9) OR'd with singletons.
# For base, the digit range is extended to [0x2F, 0x39] to absorb '/' as
# part of one range (Lemire merge), leaving only '+' as a Mycroft singleton.
# For url, the singletons '-' and '_' are detected via two Mycroft terms.
defguardp valid_word64base?(w)
when band(w, @swar_mask80) == 0 and
band(
bor(
bor(
bor(
bxor(w + @swar_ge_A, w + @swar_gt_Z),
bxor(w + @swar_ge_a, w + @swar_gt_z)
),
bxor(w + @swar_ge_slash, w + @swar_gt_9)
),
bxor(w, @swar_plus_x7) - @swar_mask01
),
@swar_mask80
) == @swar_mask80

defguardp valid_word64url?(w)
when band(w, @swar_mask80) == 0 and
band(
bor(
bor(
bor(
bxor(w + @swar_ge_A, w + @swar_gt_Z),
bxor(w + @swar_ge_a, w + @swar_gt_z)
),
bxor(w + @swar_ge_0, w + @swar_gt_9)
),
bor(
bxor(w, @swar_dash_x7) - @swar_mask01,
bxor(w, @swar_under_x7) - @swar_mask01
)
),
@swar_mask80
) == @swar_mask80

@doc """
Encodes a binary string into a base 16 encoded string.

Expand Down Expand Up @@ -371,45 +585,21 @@ defmodule Base do
decode_name = :"decode16#{base}!"
validate_name = :"validate16#{base}?"
valid_char_name = :"valid_char16#{base}?"
valid_word_name = :"valid_word16#{base}?"

{min, decoded} = to_decode_list.(alphabet)

defp unquote(validate_name)(<<>>), do: true

defp unquote(validate_name)(<<c1, c2, c3, c4, c5, c6, c7, c8, rest::binary>>) do
unquote(valid_char_name)(c1) and
unquote(valid_char_name)(c2) and
unquote(valid_char_name)(c3) and
unquote(valid_char_name)(c4) and
unquote(valid_char_name)(c5) and
unquote(valid_char_name)(c6) and
unquote(valid_char_name)(c7) and
unquote(valid_char_name)(c8) and
unquote(validate_name)(rest)
end

defp unquote(validate_name)(<<c1, c2, c3, c4, rest::binary>>) do
unquote(valid_char_name)(c1) and
unquote(valid_char_name)(c2) and
unquote(valid_char_name)(c3) and
unquote(valid_char_name)(c4) and
unquote(validate_name)(rest)
end

defp unquote(validate_name)(<<c1, c2, rest::binary>>) do
unquote(valid_char_name)(c1) and
unquote(valid_char_name)(c2) and
unquote(validate_name)(rest)
end
# SWAR fast path: 7 bytes per stride, validated entirely via
# `valid_word16<base>?` in the body. The `and` short-circuits when SWAR
# fails on any byte. Tail bytes (1-6 leftover) recurse through the
# single-byte clause below.
defp unquote(validate_name)(<<w::56, rest::binary>>),
do: unquote(valid_word_name)(w) and unquote(validate_name)(rest)

defp unquote(validate_name)(<<_char, _rest::binary>>), do: false

@compile {:inline, [{valid_char_name, 1}]}
defp unquote(valid_char_name)(char)
when elem({unquote_splicing(decoded)}, char - unquote(min)) != nil,
do: true
defp unquote(validate_name)(<<>>), do: true

defp unquote(valid_char_name)(_char), do: false
defp unquote(validate_name)(<<char, rest::binary>>),
do: unquote(valid_char_name)(char) and unquote(validate_name)(rest)

defp unquote(decode_name)(char) do
index = char - unquote(min)
Expand Down Expand Up @@ -781,23 +971,19 @@ defmodule Base do
validate_name = :"validate64#{base}?"
validate_main_name = :"validate_main64#{validate_name}?"
valid_char_name = :"valid_char64#{base}?"
valid_word_name = :"valid_word64#{base}?"
{min, decoded} = alphabet |> Enum.with_index() |> to_decode_list.()

# SWAR fast path: 7 bytes per stride, validated via `valid_word64<base>?`
# in the body. Tail leftover (1-6 bytes after a 7-byte stride hits an
# 8-byte-multiple `main`) recurses through the single-byte clause.
defp unquote(validate_main_name)(<<w::56, rest::binary>>),
do: unquote(valid_word_name)(w) and unquote(validate_main_name)(rest)

defp unquote(validate_main_name)(<<>>), do: true

defp unquote(validate_main_name)(
<<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8, rest::binary>>
) do
unquote(valid_char_name)(c1) and
unquote(valid_char_name)(c2) and
unquote(valid_char_name)(c3) and
unquote(valid_char_name)(c4) and
unquote(valid_char_name)(c5) and
unquote(valid_char_name)(c6) and
unquote(valid_char_name)(c7) and
unquote(valid_char_name)(c8) and
unquote(validate_main_name)(rest)
end
defp unquote(validate_main_name)(<<char, rest::binary>>),
do: unquote(valid_char_name)(char) and unquote(validate_main_name)(rest)

defp unquote(validate_name)(<<>>, _pad?), do: true

Expand Down Expand Up @@ -883,13 +1069,6 @@ defmodule Base do
end
end

@compile {:inline, [{valid_char_name, 1}]}
defp unquote(valid_char_name)(char)
when elem({unquote_splicing(decoded)}, char - unquote(min)) != nil,
do: true

defp unquote(valid_char_name)(_char), do: false

defp unquote(decode_name)(char) do
index = char - unquote(min)

Expand Down Expand Up @@ -1445,21 +1624,18 @@ defmodule Base do
valid_char_name = :"valid_char32#{base}?"
{min, decoded} = to_decode_list.(alphabet)

# SWAR fast path: 7 bytes per stride, validated via `valid_word32<base>?`
# in the body. Tail leftover (1-6 bytes after a 7-byte stride hits an
# 8-byte-multiple `main`) recurses through the single-byte clause.
valid_word_name = :"valid_word32#{base}?"

defp unquote(validate_main_name)(<<w::56, rest::binary>>),
do: unquote(valid_word_name)(w) and unquote(validate_main_name)(rest)

defp unquote(validate_main_name)(<<>>), do: true

defp unquote(validate_main_name)(
<<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8, rest::binary>>
) do
unquote(valid_char_name)(c1) and
unquote(valid_char_name)(c2) and
unquote(valid_char_name)(c3) and
unquote(valid_char_name)(c4) and
unquote(valid_char_name)(c5) and
unquote(valid_char_name)(c6) and
unquote(valid_char_name)(c7) and
unquote(valid_char_name)(c8) and
unquote(validate_main_name)(rest)
end
defp unquote(validate_main_name)(<<char, rest::binary>>),
do: unquote(valid_char_name)(char) and unquote(validate_main_name)(rest)

defp unquote(validate_name)(<<>>, _pad?), do: true

Expand Down Expand Up @@ -1539,13 +1715,6 @@ defmodule Base do
end
end

@compile {:inline, [{valid_char_name, 1}]}
defp unquote(valid_char_name)(char)
when elem({unquote_splicing(decoded)}, char - unquote(min)) != nil,
do: true

defp unquote(valid_char_name)(_char), do: false

defp unquote(decode_name)(char) do
index = char - unquote(min)

Expand Down
Loading