Just for fun, here’s UTF-8 encoding in pure VCL.
I wrote this as part of our language tests when we introduced bitwise operators a while ago, but I think it’s also a good example for passing arguments and returning values from user-defined subs.
sub hexdigit(INTEGER var.i) STRING {
return substr("0123456789abcdef", var.i, 1);
}
# in: var.i - an octet
# out: the same octet in base 16
sub hex(INTEGER var.i) STRING {
declare local var.lo INTEGER;
declare local var.hi INTEGER;
declare local var.c0 STRING;
declare local var.c1 STRING;
set var.lo = var.i;
set var.lo &= 0xf;
set var.c0 = hexdigit(var.lo);
set var.hi = var.i;
set var.hi >>= 4;
set var.hi &= 0xf;
set var.c1 = hexdigit(var.hi);
declare local var.r STRING;
set var.r = {"%"} + var.c1 + var.c0;
return var.r;
}
# in: var.codepoint - unicode codepoint
# out: utf8 byte sequence
sub codepoint(STRING var.codepoint) STRING {
declare local var.cp INTEGER;
declare local var.c0 STRING;
declare local var.c1 STRING;
declare local var.c2 STRING;
declare local var.c3 STRING;
declare local var.tmp INTEGER;
if (var.codepoint ~ "^0x") {
set var.cp = std.strtol(var.codepoint, 16);
} else {
set var.cp = std.atoi(var.codepoint);
}
if (var.cp > 0xd800 && var.cp <= 0xdfff) {
# invalid
return "";
}
if (var.cp <= 0x7f) {
set var.c0 = hex(var.cp);
goto done;
}
if (var.cp <= 0x7ff) {
set var.tmp = var.cp;
set var.tmp >>= 6;
set var.tmp += 192;
set var.c0 = hex(var.tmp);
set var.tmp = var.cp;
set var.tmp &= 63;
set var.tmp += 128;
set var.c1 = hex(var.tmp);
goto done;
}
if (var.cp <= 0xffff) {
set var.tmp = var.cp;
set var.tmp >>= 12;
set var.tmp += 224;
set var.c0 = hex(var.tmp);
set var.tmp = var.cp;
set var.tmp >>= 6;
set var.tmp &= 63;
set var.tmp += 128;
set var.c1 = hex(var.tmp);
set var.tmp = var.cp;
set var.tmp &= 63;
set var.tmp += 128;
set var.c2 = hex(var.tmp);
goto done;
}
if (var.cp <= 0x10ffff) {
set var.tmp = var.cp;
set var.tmp >>= 18;
set var.tmp += 240;
set var.c0 = hex(var.tmp);
set var.tmp = var.cp;
set var.tmp >>= 12;
set var.tmp &= 63;
set var.tmp += 128;
set var.c1 = hex(var.tmp);
set var.tmp = var.cp;
set var.tmp >>= 6;
set var.tmp &= 63;
set var.tmp += 128;
set var.c2 = hex(var.tmp);
set var.tmp = var.cp;
set var.tmp &= 63;
set var.tmp += 128;
set var.c3 = hex(var.tmp);
goto done;
}
done:
# borrowing urldecode() for hex decoding
return urldecode(var.c0 + var.c1 + var.c2 + var.c3);
}
sub vcl_recv {
error 600;
}
sub vcl_error {
# a random codepoint in the unicode emoji range
declare local var.codepoint INTEGER = randomint(0x1F600, 0x1F64F);
# encode to utf8
declare local var.utf8 STRING = codepoint(var.codepoint);
# synthetic response
set obj.http.content-type = "text/plain; charset=utf-8";
synthetic var.utf8;
}