summaryrefslogtreecommitdiff
path: root/src/expand/format_args.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/expand/format_args.cpp')
-rw-r--r--src/expand/format_args.cpp88
1 files changed, 80 insertions, 8 deletions
diff --git a/src/expand/format_args.cpp b/src/expand/format_args.cpp
index e2ee35d3..6cc81af6 100644
--- a/src/expand/format_args.cpp
+++ b/src/expand/format_args.cpp
@@ -34,7 +34,7 @@ namespace {
};
Align align = Align::Unspec;
- char align_char = ' ';
+ uint32_t align_char = ' ';
Sign sign = Sign::Unspec;
bool alternate = false;
@@ -118,6 +118,71 @@ namespace {
}
};
+ uint32_t parse_utf8(const char* s, int& out_len)
+ {
+ uint8_t v1 = s[0];
+ if( v1 < 0x80 )
+ {
+ out_len = 1;
+ return v1;
+ }
+ else if( (v1 & 0xC0) == 0x80 )
+ {
+ // Invalid (continuation)
+ out_len = 1;
+ return 0xFFFE;
+ }
+ else if( (v1 & 0xE0) == 0xC0 ) {
+ // Two bytes
+ out_len = 2;
+
+ uint8_t e1 = s[1];
+ if( (e1 & 0xC0) != 0x80 ) return 0xFFFE;
+
+ uint32_t outval
+ = ((v1 & 0x1F) << 6)
+ | ((e1 & 0x3F) <<0)
+ ;
+ return outval;
+ }
+ else if( (v1 & 0xF0) == 0xE0 ) {
+ // Three bytes
+ out_len = 3;
+ uint8_t e1 = s[1];
+ if( (e1 & 0xC0) != 0x80 ) return 0xFFFE;
+ uint8_t e2 = s[2];
+ if( (e2 & 0xC0) != 0x80 ) return 0xFFFE;
+
+ uint32_t outval
+ = ((v1 & 0x0F) << 12)
+ | ((e1 & 0x3F) << 6)
+ | ((e2 & 0x3F) << 0)
+ ;
+ return outval;
+ }
+ else if( (v1 & 0xF8) == 0xF0 ) {
+ // Four bytes
+ out_len = 4;
+ uint8_t e1 = s[1];
+ if( (e1 & 0xC0) != 0x80 ) return 0xFFFE;
+ uint8_t e2 = s[2];
+ if( (e2 & 0xC0) != 0x80 ) return 0xFFFE;
+ uint8_t e3 = s[3];
+ if( (e3 & 0xC0) != 0x80 ) return 0xFFFE;
+
+ uint32_t outval
+ = ((v1 & 0x07) << 18)
+ | ((e1 & 0x3F) << 12)
+ | ((e2 & 0x3F) << 6)
+ | ((e3 & 0x3F) << 0)
+ ;
+ return outval;
+ }
+ else {
+ throw ""; // Should be impossible.
+ }
+ }
+
/// Parse a format string into a sequence of fragments.
///
/// Returns a list of fragments, and the remaining free text after the last format sequence
@@ -209,9 +274,15 @@ namespace {
s ++; // eat ':'
// Alignment
- if( s[0] != '\0' && (s[1] == '<' || s[1] == '^' || s[1] == '>') ) {
- args.align_char = s[0];
- s ++;
+ // - Padding character, a single unicode codepoint followed by '<'/'^'/'>'
+ {
+ int next_c_i;
+ uint32_t ch = parse_utf8(s, next_c_i);
+ char next_c = s[next_c_i];
+ if( ch != '}' && ch != '\0' && (next_c == '<' || next_c == '^' || next_c == '>') ) {
+ args.align_char = ch;
+ s += next_c_i;
+ }
}
if( *s == '<' ) {
args.align = FmtArgs::Align::Left;
@@ -338,16 +409,17 @@ namespace {
}
}
+ if( s[0] == '\0' )
+ ERROR(sp, E0000, "Unexpected end of formatting string");
+
// Parse ident?
// - Lazy way is to just handle a single char and ensure that it is just a single char
- if( s[0] != '}' && s[0] != '\0' && s[1] != '}' ) {
- TODO(sp, "Parse formatting fragment at \"" << fmt_frag_str << "\" (long type)");
+ if( s[0] != '}' && s[1] != '}' ) {
+ TODO(sp, "Parse formatting fragment at \"" << fmt_frag_str << "\" (long type) - s=...\"" << s << "\"");
}
switch(s[0])
{
- case '\0':
- ERROR(sp, E0000, "Unexpected end of formatting string");
default:
ERROR(sp, E0000, "Unknown formatting type specifier '" << *s << "'");
case '}': trait_name = "Display"; break;