From ef19e3996af58ba1c43262f6a66b8705832ab529 Mon Sep 17 00:00:00 2001 From: Nick Gasson Date: Sat, 26 Nov 2022 10:36:18 +0000 Subject: [PATCH] Use a hash table to store interned strings --- src/ident.c | 579 +++++++++++++++++----------------------------- src/ident.h | 19 +- src/prim.h | 2 +- src/rt/wave.c | 2 +- src/util.c | 7 +- test/test_ident.c | 115 +++------ 6 files changed, 250 insertions(+), 474 deletions(-) diff --git a/src/ident.c b/src/ident.c index 12e18320..820b789a 100644 --- a/src/ident.c +++ b/src/ident.c @@ -16,8 +16,10 @@ // #include "util.h" +#include "array.h" #include "fbuf.h" #include "ident.h" +#include "thread.h" #include #include @@ -25,146 +27,81 @@ #include #include #include +#include -#define MAP_DEPTH 3 +#define HASH_INIT 5381; +typedef unsigned long hash_state_t; -typedef struct clist clist_t; -typedef struct trie trie_t; - -struct clist { - unsigned char value; - trie_t *down; - clist_t *left; - clist_t *right; -}; - -struct trie { - unsigned char value; - uint16_t write_gen; - uint16_t depth; - uint32_t write_index; - trie_t *up; - clist_t *list; - trie_t *map[0]; -}; +typedef A(char) char_array_t; struct ident_rd_ctx { - fbuf_t *file; - size_t cache_sz; - size_t cache_alloc; - ident_t *cache; + fbuf_t *file; + size_t cache_sz; + size_t cache_alloc; + ident_t *cache; + char_array_t scratch; }; struct ident_wr_ctx { - fbuf_t *file; - uint32_t next_index; - uint16_t generation; - unsigned char *scratch; - size_t scratch_size; + fbuf_t *file; + uint32_t next_index; + uint16_t generation; }; -typedef struct { - trie_t trie; - trie_t *map[256]; -} root_t; - -static root_t root = { - { - .value = '\0', - .write_gen = 0, - .write_index = 0, - .depth = 1, - .up = NULL - } +struct _ident { + ident_t chain; + uint32_t write_index; + uint16_t write_gen; + uint16_t length; + char bytes[0]; }; -static trie_t *alloc_node(char ch, trie_t *prev) -{ - const size_t mapsz = (prev->depth < MAP_DEPTH) ? 256 * sizeof(trie_t *) : 0; - - trie_t *t = xmalloc(sizeof(trie_t) + mapsz); - t->value = ch; - t->depth = prev->depth + 1; - t->up = prev; - t->write_gen = 0; - t->list = NULL; +#define TABLE_SZ 1024 +static ident_t table[TABLE_SZ]; - if (mapsz > 0) - memset(t->map, '\0', mapsz); - - if (prev->depth <= MAP_DEPTH) - prev->map[(unsigned char)ch] = t; - else { - clist_t *c = xmalloc(sizeof(clist_t)); - c->value = ch; - c->down = t; - c->left = NULL; - c->right = NULL; - - clist_t *it, **where; - for (it = prev->list, where = &(prev->list); - it != NULL; - where = (ch < it->value ? &(it->left) : &(it->right)), - it = *where) - ; - - *where = c; - } - - return t; -} - -static void build_trie(const char *str, trie_t *prev, trie_t **end) +static inline int hash_update(hash_state_t *state, const char *key, int nchars) { - assert(*str != '\0'); - assert(prev != NULL); + // DJB2 hash function from here: + // http://www.cse.yorku.ca/~oz/hash.html - trie_t *t = alloc_node(*str, prev); - - if (*(++str) == '\0') - *end = t; - else - build_trie(str, t, end); + hash_state_t hash = *state; + const char *p = key; + for (; p < key + nchars && *p; p++) + hash = ((hash << 5) + hash) + *p; + *state = hash; + return p - key; } -static clist_t *search_node(trie_t *t, char ch) +static bool ident_install(ident_t *where, ident_t new, int len) { - clist_t *it; - for (it = t->list; - (it != NULL) && (it->value != ch); - it = (ch < it->value ? it->left : it->right)) - ; + if (unlikely(len >= UINT16_MAX)) + fatal("identifier '%s' too long", new->bytes); - return it; + new->length = len; + new->chain = NULL; + new->write_gen = 0; + new->write_index = 0; + + *where = new; + return true; } -static bool search_trie(const char **str, trie_t *t, trie_t **end) +static ident_t ident_from_bytes(const char *str, hash_state_t hash, int len) { - assert(**str != '\0'); - assert(t != NULL); - - trie_t *next = NULL; - - if (t->depth <= MAP_DEPTH) - next = t->map[(unsigned char)**str]; - else { - clist_t *it = search_node(t, **str); - next = (it != NULL) ? it->down : NULL; - } + const int slot = hash & (TABLE_SZ - 1); + for (;;) { + ident_t *ptr = &(table[slot]); + for (; *ptr; ptr = &((*ptr)->chain)) { + if ((*ptr)->length == len && memcmp((*ptr)->bytes, str, len) == 0) + return *ptr; + } - if (next == NULL) { - *end = t; - return false; - } - else { - (*str)++; + ident_t new = xmalloc_flex(sizeof(struct _ident), len + 1, sizeof(char)); + memcpy(new->bytes, str, len); + new->bytes[len] = '\0'; - if (**str == '\0') { - *end = next; - return true; - } - else - return search_trie(str, next, end); + if (ident_install(ptr, new, len)) + return new; } } @@ -173,60 +110,15 @@ ident_t ident_new(const char *str) assert(str != NULL); assert(*str != '\0'); - trie_t *result; - if (!search_trie(&str, &(root.trie), &result)) - build_trie(str, result, &result); - - return result; -} - -bool ident_interned(const char *str) -{ - assert(str != NULL); - assert(*str != '\0'); - - trie_t *result; - return search_trie(&str, &(root.trie), &result); -} - -void istr_r(ident_t ident, char *buf, size_t sz) -{ - char *p = buf + ident->depth - 1; - assert(p < buf + sz); - *p-- = '\0'; + hash_state_t hash = HASH_INIT; + int len = hash_update(&hash, str, INT_MAX); - trie_t *it; - for (it = ident; it->value != '\0'; it = it->up) - *(p--) = it->value < 128 ? it->value : '?'; + return ident_from_bytes(str, hash, len); } const char *istr(ident_t ident) { - if (ident == NULL) - return NULL; - -#if 1 - char *p = get_fmt_buf(ident->depth); - istr_r(ident, p, ident->depth); -#else - char *p = get_fmt_buf(ident->depth * 5) + ident->depth * 5 - 1; - *p = '\0'; - - trie_t *it; - for (it = ident; it->value != '\0'; it = it->up) { - if (it->value < 128) - *(--p) = it->value; - else { - *(--p) = '0' + (it->value & 7); - *(--p) = '0' + ((it->value >> 3) & 7); - *(--p) = '0' + ((it->value >> 6) & 7); - *(--p) = '0'; - *(--p) = '\\'; - } - } -#endif - - return p; + return ident ? ident->bytes : NULL; } ident_wr_ctx_t ident_write_begin(fbuf_t *f) @@ -237,8 +129,6 @@ ident_wr_ctx_t ident_write_begin(fbuf_t *f) struct ident_wr_ctx *ctx = xcalloc(sizeof(struct ident_wr_ctx)); ctx->file = f; ctx->generation = ident_wr_gen++; - ctx->scratch_size = 100; - ctx->scratch = xmalloc(ctx->scratch_size); ctx->next_index = 1; // Skip over null ident return ctx; @@ -246,7 +136,6 @@ ident_wr_ctx_t ident_write_begin(fbuf_t *f) void ident_write_end(ident_wr_ctx_t ctx) { - free(ctx->scratch); free(ctx); } @@ -259,19 +148,8 @@ void ident_write(ident_t ident, ident_wr_ctx_t ctx) else { fbuf_put_uint(ctx->file, 0); - if (ident->depth > ctx->scratch_size) { - ctx->scratch_size = next_power_of_2(ident->depth); - ctx->scratch = xrealloc(ctx->scratch, ctx->scratch_size); - } - - unsigned char *p = ctx->scratch + ident->depth - 1; - *p = '\0'; - - trie_t *it; - for (it = ident; it->value != '\0'; it = it->up) - *(--p) = it->value; - - write_raw(ctx->scratch, ident->depth, ctx->file); + assert(ident->bytes[ident->length] == '\0'); + write_raw(ident->bytes, ident->length + 1, ctx->file); ident->write_gen = ctx->generation; ident->write_index = ctx->next_index++; @@ -282,7 +160,7 @@ void ident_write(ident_t ident, ident_wr_ctx_t ctx) ident_rd_ctx_t ident_read_begin(fbuf_t *f) { - struct ident_rd_ctx *ctx = xmalloc(sizeof(struct ident_rd_ctx)); + struct ident_rd_ctx *ctx = xcalloc(sizeof(struct ident_rd_ctx)); ctx->file = f; ctx->cache_alloc = 256; ctx->cache_sz = 0; @@ -296,6 +174,7 @@ ident_rd_ctx_t ident_read_begin(fbuf_t *f) void ident_read_end(ident_rd_ctx_t ctx) { + ACLEAR(ctx->scratch); free(ctx->cache); free(ctx); } @@ -309,29 +188,14 @@ ident_t ident_read(ident_rd_ctx_t ctx) ctx->cache = xrealloc(ctx->cache, ctx->cache_alloc * sizeof(ident_t)); } - trie_t *p = &(root.trie); + ARESIZE(ctx->scratch, 0); char ch; - while ((ch = read_u8(ctx->file)) != '\0') { - trie_t *next = NULL; - if (p->depth <= MAP_DEPTH) - next = p->map[(unsigned char)ch]; - else { - clist_t *it = search_node(p, ch); - next = (it != NULL) ? it->down : NULL; - } + do { + ch = read_u8(ctx->file); + APUSH(ctx->scratch, ch); + } while (ch); - if (next != NULL) - p = next; - else - p = alloc_node(ch, p); - } - - if (p == &(root.trie)) - return NULL; - else { - ctx->cache[ctx->cache_sz++] = p; - return p; - } + return (ctx->cache[ctx->cache_sz++] = ident_new(ctx->scratch.items)); } else if (likely(index - 1 < ctx->cache_sz)) return ctx->cache[index - 1]; @@ -342,21 +206,37 @@ ident_t ident_read(ident_rd_ctx_t ctx) ident_t ident_uniq(const char *prefix) { - static int counter = 0; + hash_state_t base_hash = HASH_INIT; + int len = hash_update(&base_hash, prefix, INT_MAX); + + static volatile int counter = 0; + char suffix[16] = ""; + for (;;) { + hash_state_t hash = base_hash; + int sufflen = hash_update(&hash, suffix, INT_MAX); + + const int slot = hash & (TABLE_SZ - 1); + for (;;) { + ident_t *ptr = &(table[slot]); + for (; *ptr; ptr = &((*ptr)->chain)) { + if ((*ptr)->length == len + sufflen + && memcmp((*ptr)->bytes, prefix, len) == 0 + && memcmp((*ptr)->bytes + len, suffix, sufflen) == 0) + goto exist; + } - const char *start = prefix; - trie_t *end; - if (search_trie(&start, &(root.trie), &end)) { - const size_t len = strlen(prefix) + 16; - char buf[len]; - snprintf(buf, len, "%s%d", prefix, counter++); + ident_t new = xmalloc_flex(sizeof(struct _ident), len + sufflen + 1, + sizeof(char)); + memcpy(new->bytes, prefix, len); + memcpy(new->bytes + len, suffix, sufflen); + new->bytes[len + sufflen] = '\0'; - return ident_new(buf); - } - else { - trie_t *result; - build_trie(start, end, &result); - return result; + if (ident_install(ptr, new, len + sufflen)) + return new; + } + + exist: + checked_sprintf(suffix, sizeof(suffix), "%d", relaxed_add(&counter, 1)); } } @@ -367,136 +247,113 @@ ident_t ident_prefix(ident_t a, ident_t b, char sep) else if (b == NULL) return a; - trie_t *result; - - if (sep != '\0') { - // Append separator - const char sep_str[] = { sep, '\0' }; - const char *p_sep_str = sep_str; - if (!search_trie(&p_sep_str, a, &result)) - build_trie(p_sep_str, result, &result); - } - else - result = a; - - // Append b - const char *bstr = istr(b); - if (!search_trie(&bstr, result, &result)) - build_trie(bstr, result, &result); - - return result; -} + hash_state_t hash = HASH_INIT; + hash_update(&hash, a->bytes, a->length); + hash_update(&hash, &sep, 1); + hash_update(&hash, b->bytes, b->length); + + const int len = a->length + b->length + (sep != '\0'); + const int slot = hash & (TABLE_SZ - 1); + + for (;;) { + ident_t *ptr = &(table[slot]); + for (; *ptr; ptr = &((*ptr)->chain)) { + if ((*ptr)->length == len + && memcmp((*ptr)->bytes, a->bytes, a->length) == 0 + && (sep == '\0' || (*ptr)->bytes[a->length] == sep) + && memcmp((*ptr)->bytes + a->length + (sep != '\0'), + b->bytes, b->length) == 0) + return *ptr; + } -ident_t ident_strip(ident_t a, ident_t b) -{ - assert(a != NULL); - assert(b != NULL); + ident_t new = xmalloc_flex(sizeof(struct _ident), len + 1, sizeof(char)); + memcpy(new->bytes, a->bytes, a->length); + if (sep != '\0') new->bytes[a->length] = sep; + memcpy(new->bytes + a->length + (sep != '\0'), b->bytes, b->length + 1); - while (a->value == b->value && b->value != '\0') { - a = a->up; - b = b->up; + if (ident_install(ptr, new, len)) + return new; } - - return (b->value == '\0' ? a : NULL); } bool ident_starts_with(ident_t a, ident_t b) { - while (a != b && a->value != '\0') - a = a->up; - - return a == b; + return b != NULL && (a == b || strncmp(a->bytes, b->bytes, b->length) == 0); } char ident_char(ident_t i, unsigned n) { if (i == NULL) return '\0'; - else if (n == 0) - return i->value; - else - return ident_char(i->up, n - 1); + else { + assert(n < i->length); + return i->bytes[n]; + } } size_t ident_len(ident_t i) { - if (i == NULL || i->value == '\0') - return 0; - else - return i->depth - 1; + return i == NULL ? 0 : i->length; } -static ident_t ident_suffix_until(ident_t i, char c, char escape1, char escape2) +ident_t ident_until(ident_t i, char c) { assert(i != NULL); - bool escaping1 = false, escaping2 = false; - ident_t r = i; - while (i->value != '\0') { - if (!escaping1 && !escaping2 && i->value == c) - r = i->up; - else if (i->value == escape1) - escaping1 = !escaping1; - else if (i->value == escape2) - escaping2 = !escaping2; - i = i->up; - } + int pos = 0; + for (; pos < i->length && i->bytes[pos] != c; pos++); - return r; -} + if (pos == i->length) return i; -ident_t ident_until(ident_t i, char c) -{ - return ident_suffix_until(i, c, '\0', '\0'); + hash_state_t hash = HASH_INIT; + hash_update(&hash, i->bytes, pos); + + return ident_from_bytes(i->bytes, hash, pos); } ident_t ident_runtil(ident_t i, char c) { assert(i != NULL); - for (ident_t r = i; r->value != '\0'; r = r->up) { - if (r->value == c) - return r->up; - } + int pos = i->length; + for (; pos >= 0 && i->bytes[pos] != c; pos--); - return i; + if (pos < 0) return i; + + hash_state_t hash = HASH_INIT; + hash_update(&hash, i->bytes, pos); + + return ident_from_bytes(i->bytes, hash, pos); } ident_t ident_from(ident_t i, char c) { assert(i != NULL); - char buf[i->depth + 1]; - char *p = buf + i->depth; - *p-- = '\0'; + int pos = 0; + for (; pos < i->length && i->bytes[pos] != c; pos++); - char *from = NULL; - while (i->value != '\0') { - if (i->value == c) - from = p + 1; - *p-- = i->value; - i = i->up; - } + if (pos == i->length) return NULL; - return (from == NULL) ? NULL : ident_new(from); + hash_state_t hash = HASH_INIT; + hash_update(&hash, i->bytes + pos + 1, i->length - pos - 1); + + return ident_from_bytes(i->bytes + pos + 1, hash, i->length - pos - 1); } ident_t ident_rfrom(ident_t i, char c) { assert(i != NULL); - char buf[i->depth + 1]; - char *p = buf + i->depth; - *p-- = '\0'; + int pos = i->length; + for (; pos >= 0 && i->bytes[pos] != c; pos--); - while (i->value != '\0') { - if (i->value == c) - return ident_new(p + 1); - *p-- = i->value; - i = i->up; - } + if (pos < 0) return i; - return NULL; + hash_state_t hash = HASH_INIT; + hash_update(&hash, i->bytes + pos + 1, i->length - pos - 1); + + return ident_from_bytes(i->bytes + pos + 1, hash, i->length - pos - 1); } bool icmp(ident_t i, const char *s) @@ -504,41 +361,25 @@ bool icmp(ident_t i, const char *s) if (i == NULL || s == NULL) return i == NULL && s == NULL; - trie_t *result; - if (!search_trie(&s, &(root.trie), &result)) - return false; - else - return result == i; + return strcmp(i->bytes, s) == 0; } int ident_compare(ident_t a, ident_t b) { - if (a->up == b->up) - return a->value - b->value; - else if (a->depth > b->depth) { - int cmp = ident_compare(a->up, b); - return cmp == 0 ? a->value : cmp; - } - else if (b->depth > a->depth) { - int cmp = ident_compare(a, b->up); - return cmp == 0 ? 0 - b->value : cmp; - } - else - return ident_compare(a->up, b->up); + return strcmp(a->bytes, b->bytes); } -static bool ident_glob_walk(const trie_t *i, const char *g, - const char *const end) +static bool ident_glob_walk(const char *str, const char *g) { - if (i->value == '\0') - return (g < end); - else if (g < end) + if (*str == '\0') + return *g == '\0'; + else if (*g == '\0') return false; else if (*g == '*') - return ident_glob_walk(i->up, g, end) - || ident_glob_walk(i->up, g - 1, end); - else if (i->value == *g) - return ident_glob_walk(i->up, g - 1, end); + return ident_glob_walk(str + 1, g) + || ident_glob_walk(str + 1, g + 1); + else if (*str == *g) + return ident_glob_walk(str + 1, g + 1); else return false; } @@ -547,72 +388,68 @@ bool ident_glob(ident_t i, const char *glob, int length) { assert(i != NULL); - if (length < 0) - length = strlen(glob); - - return ident_glob_walk(i, glob + length - 1, glob); -} - -bool ident_contains(ident_t i, const char *search) -{ - assert(i != NULL); - - for (ident_t r = i; r->value != '\0'; r = r->up) { - for (const char *p = search; *p != '\0'; p++) { - if (r->value == *p) - return true; - } - } - - return false; + return ident_glob_walk(i->bytes, glob); } ident_t ident_downcase(ident_t i) { - // TODO: this could be implemented more efficiently + assert(i != NULL); - if (i == NULL) - return NULL; + char small[64], *big = NULL, *buf = small; + if (i->length >= ARRAY_LEN(small)) + buf = big = xmalloc(i->length + 1); - char *p = get_fmt_buf(i->depth) + i->depth - 1; - *p = '\0'; + for (int pos = 0; pos < i->length; pos++) + buf[pos] = tolower((int)i->bytes[pos]); + buf[i->length] = '\0'; - trie_t *it; - for (it = i; it->value != '\0'; it = it->up) - *(--p) = tolower((int)it->value); + hash_state_t hash = HASH_INIT; + hash_update(&hash, buf, i->length); - return ident_new(p); + ident_t result = ident_from_bytes(buf, hash, i->length); + if (big != NULL) free(big); + return result; } -ident_t ident_walk_selected(ident_t *i) +ident_t ident_walk_selected(ident_t *it) { - if (*i == NULL) + if (*it == NULL) return NULL; - ident_t result = ident_suffix_until(*i, '.', '\'', '\\'); - if (result == NULL || result == *i) { - result = *i; - *i = NULL; + ident_t i = *it; + char escape = '\0'; + int pos = 0; + for (; pos < i->length; pos++) { + if (i->bytes[pos] == '.' && escape == '\0') + break; + else if (i->bytes[pos] == '\'' && escape != '\\') + escape = escape == '\'' ? '\0' : '\''; + else if (i->bytes[pos] == '\\' && escape != '\'') + escape = escape == '\\' ? '\0' : '\\'; } - else { - char *LOCAL buf = xmalloc((*i)->depth + 1), *p = buf + (*i)->depth + 1; - *--p = '\0'; - for (ident_t it = *i; it != result; it = it->up) - *--p = it->value; - *i = ident_new(p + 1); + + if (pos == i->length) { + *it = NULL; + return i; } - return result; + hash_state_t hash = HASH_INIT; + hash_update(&hash, i->bytes + pos + 1, i->length - pos - 1); + + *it = ident_from_bytes(i->bytes + pos + 1, hash, i->length - pos - 1); + + hash = HASH_INIT; + hash_update(&hash, i->bytes, pos); + + return ident_from_bytes(i->bytes, hash, pos); } int ident_distance(ident_t a, ident_t b) { - const int n = ident_len(b); - const int m = ident_len(a); + const int n = b->length; + const int m = a->length; - char s[m + 1], t[n + 1]; - istr_r(a, s, m + 1); - istr_r(b, t, n + 1); + const char *s = a->bytes, *t = b->bytes; int mem[2 * (n + 1)], *v0 = mem, *v1 = mem + n + 1; diff --git a/src/ident.h b/src/ident.h index 5279a76b..297f7cbe 100644 --- a/src/ident.h +++ b/src/ident.h @@ -23,9 +23,6 @@ // Intern a string as an identifier. ident_t ident_new(const char *str); -// True if the given string was already interned. -bool ident_interned(const char *str); - // Generate a unique identifier with the given prefix. ident_t ident_uniq(const char *prefix); @@ -33,14 +30,10 @@ ident_t ident_uniq(const char *prefix); // by a dot. ident_t ident_prefix(ident_t a, ident_t b, char sep); -// Strips a suffix from an identifier or returns NULL if this -// is not possible. -ident_t ident_strip(ident_t a, ident_t b); - // True if identifier a starts with b bool ident_starts_with(ident_t a, ident_t b); -// Return the Nth character of an identifier counting from the end. +// Return the Nth character of an identifier char ident_char(ident_t i, unsigned n); // Number of characters in the identifier @@ -69,23 +62,15 @@ int ident_compare(ident_t a, ident_t b); // if set to the length of glob bool ident_glob(ident_t i, const char *glob, int length); -// True if identifier contains any characters from search -bool ident_contains(ident_t i, const char *search); - // Convert an indentifier to lower case ident_t ident_downcase(ident_t i); // Iterate through dot-separated name components ident_t ident_walk_selected(ident_t *i); -// Convert an identifier reference to a NULL-terminated string. -// This function is quite slow so its use should be avoided except -// for printing. +// Convert an identifier reference to a NULL-terminated string const char *istr(ident_t ident); -// As above but write into a user supplied buffer. -void istr_r(ident_t ident, char *buf, size_t sz); - // Compute Levenshtein distance between two identifiers int ident_distance(ident_t a, ident_t b); diff --git a/src/prim.h b/src/prim.h index 05b57054..77befd8e 100644 --- a/src/prim.h +++ b/src/prim.h @@ -29,7 +29,7 @@ typedef struct _lib *lib_t; typedef struct _object object_t; typedef struct _object_arena object_arena_t; -typedef struct trie *ident_t; +typedef struct _ident *ident_t; typedef struct _tree *tree_t; typedef struct _type *type_t; typedef struct _vlog_node *vlog_node_t; diff --git a/src/rt/wave.c b/src/rt/wave.c index 341293e0..edef04ba 100644 --- a/src/rt/wave.c +++ b/src/rt/wave.c @@ -341,7 +341,7 @@ static fst_type_t *fst_type_for(type_t type, const loc_t *loc) ft->u.literals.strings = xmalloc(maxsize * nlits); for (int i = 0; i < nlits; i++) { char *p = ft->u.literals.strings + i*maxsize; - istr_r(tree_ident(type_enum_literal(type, i)), p, maxsize); + strncpy(p, istr(tree_ident(type_enum_literal(type, i))), maxsize); for (; *p; p++) *p = tolower((int)*p); } diff --git a/src/util.c b/src/util.c index 00a4b71a..2319f8cb 100644 --- a/src/util.c +++ b/src/util.c @@ -505,6 +505,7 @@ void note_at(const loc_t *loc, const char *fmt, ...) void fatal_at(const loc_t *loc, const char *fmt, ...) { diag_t *d = diag_new(DIAG_FATAL, loc); + diag_suppress(d, false); va_list ap; va_start(ap, fmt); @@ -518,6 +519,7 @@ void fatal_at(const loc_t *loc, const char *fmt, ...) void fatal(const char *fmt, ...) { diag_t *d = diag_new(DIAG_FATAL, NULL); + diag_suppress(d, false); va_list ap; va_start(ap, fmt); @@ -1274,9 +1276,8 @@ void tb_printf(text_buf_t *tb, const char *fmt, ...) void tb_istr(text_buf_t *tb, ident_t ident) { - const size_t len = ident_len(ident); - char *p = tb_reserve(tb, len); // Adds one byte for terminating null - istr_r(ident, p, len + 1); + // TODO: this function seems useless now + tb_cat(tb, istr(ident)); } void tb_append(text_buf_t *tb, char ch) diff --git a/test/test_ident.c b/test/test_ident.c index b1144b5c..b7f947ef 100644 --- a/test/test_ident.c +++ b/test/test_ident.c @@ -150,41 +150,24 @@ START_TEST(test_prefix) } END_TEST -START_TEST(test_strip) -{ - ident_t a, b, c; - - a = ident_new("something"); - b = ident_new("thing"); - c = ident_strip(a, b); - - fail_if(c == NULL); - fail_unless(c == ident_new("some")); - - a = ident_new("g"); - b = ident_new("cake"); - c = ident_strip(a, b); - - fail_unless(c == NULL); -} -END_TEST - START_TEST(test_char) { ident_t i; i = ident_new("foobar"); - fail_unless(ident_char(i, 0) == 'r'); - fail_unless(ident_char(i, 5) == 'f'); - fail_unless(ident_char(i, 3) == 'o'); + fail_unless(ident_char(i, 0) == 'f'); + fail_unless(ident_char(i, 5) == 'r'); + fail_unless(ident_char(i, 3) == 'b'); } END_TEST START_TEST(test_until) { - ident_t i; + ident_t i, tmp; i = ident_new("aye.bee.c"); - fail_unless(ident_until(i, '.') == ident_new("aye")); + tmp = ident_until(i, '.'); + fail_unless(tmp == ident_new("aye")); + fail_unless(tmp == ident_until(i, '.')); i = ident_new("nodot"); fail_unless(ident_until(i, '.') == i); } @@ -259,25 +242,6 @@ START_TEST(test_glob) } END_TEST; -START_TEST(test_interned) -{ - ident_new("foo14141"); - fail_unless(ident_interned("foo14141")); - fail_if(ident_interned("foobar11111")); -} -END_TEST - -START_TEST(test_contains) -{ - ident_t i = ident_new("cake"); - fail_unless(ident_contains(i, "k")); - fail_unless(ident_contains(i, "moa")); - fail_unless(ident_contains(i, "amo")); - fail_if(ident_contains(i, "zod")); - fail_if(ident_contains(i, "")); -} -END_TEST - START_TEST(test_len) { fail_unless(ident_len(ident_new("a")) == 1); @@ -289,27 +253,26 @@ START_TEST(test_downcase) { fail_unless(ident_downcase(ident_new("ABC")) == ident_new("abc")); fail_unless(ident_downcase(ident_new("123xY")) == ident_new("123xy")); + + const char longtext1[] = "XXXXXXXXXXXXXsdfsdfdXXXXXXXXXXXASAAFASFAAFAFAF" + "AFAFAFAFAadfsdfsdfdAAAAAAAAAA"; + const char longtext2[] = "xxxxxxxxxxxxxsdfsdfdxxxxxxxxxxxasaafasfaafafaf" + "afafafafaadfsdfsdfdaaaaaaaaaa"; + fail_unless(ident_downcase(ident_new(longtext1)) == ident_new(longtext2)); } END_TEST START_TEST(test_compare) { - ck_assert_int_eq(0, ident_compare(ident_new("a"), ident_new("a"))); - ck_assert_int_eq(0, ident_compare(ident_new("aaa"), ident_new("aaa"))); - ck_assert_int_eq('a' - 'b', - ident_compare(ident_new("a"), ident_new("b"))); - ck_assert_int_eq('a' - 'b', - ident_compare(ident_new("aaa"), ident_new("aab"))); - ck_assert_int_eq('b' - 'a', - ident_compare(ident_new("aab"), ident_new("aaa"))); - ck_assert_int_eq(0 - 'a', - ident_compare(ident_new("aa"), ident_new("aaa"))); - ck_assert_int_eq('a' - 0, - ident_compare(ident_new("aaa"), ident_new("aa"))); - ck_assert_int_eq('b' - 'a', - ident_compare(ident_new("bab"), ident_new("aba"))); - ck_assert_int_eq('b' - 'l', - ident_compare(ident_new("abcd"), ident_new("alemnic"))); + ck_assert_int_eq(ident_compare(ident_new("a"), ident_new("a")), 0); + ck_assert_int_eq(ident_compare(ident_new("aaa"), ident_new("aaa")), 0); + ck_assert_int_lt(ident_compare(ident_new("a"), ident_new("b")), 0); + ck_assert_int_lt(ident_compare(ident_new("aaa"), ident_new("aab")), 0); + ck_assert_int_gt(ident_compare(ident_new("aab"), ident_new("aaa")), 0); + ck_assert_int_lt(ident_compare(ident_new("aa"), ident_new("aaa")), 0); + ck_assert_int_gt(ident_compare(ident_new("aaa"), ident_new("aa")), 0); + ck_assert_int_gt(ident_compare(ident_new("bab"), ident_new("aba")), 0); + ck_assert_int_lt(ident_compare(ident_new("abcd"), ident_new("alemnic")), 0); } END_TEST @@ -344,24 +307,7 @@ START_TEST(test_starts_with) fail_unless(ident_starts_with(ident_new("ABCdef"), ident_new("ABC"))); fail_if(ident_starts_with(ident_new("abcdef"), ident_new("ABC"))); fail_unless(ident_starts_with(ident_new("foo(x).bar"), ident_new("foo(x)"))); -} -END_TEST - -START_TEST(test_istr_r) -{ - char buf[16]; - - istr_r(ident_new("foo"), buf, 16); - ck_assert_str_eq(buf, "foo"); - - istr_r(ident_new("X"), buf, 16); - ck_assert_str_eq(buf, "X"); - - istr_r(ident_new("?"), buf, 2); - ck_assert_str_eq(buf, "?"); - - istr_r(ident_new("string"), buf, 7); - ck_assert_str_eq(buf, "string"); + fail_if(ident_starts_with(ident_new("foo(x).bar"), NULL)); } END_TEST @@ -406,6 +352,16 @@ START_TEST(test_distance) } END_TEST +START_TEST(test_uniq) +{ + ident_t i1 = ident_new("prefix"); + ident_t i2 = ident_uniq("prefix"); + fail_if(i1 == i2); + fail_unless(i2 == ident_new("prefix1")); + fail_unless(icmp(ident_uniq("prefix"), "prefix2")); +} +END_TEST + Suite *get_ident_tests(void) { Suite *s = suite_create("ident"); @@ -417,7 +373,6 @@ Suite *get_ident_tests(void) tcase_add_test(tc_core, test_rand); tcase_add_test(tc_core, test_read_write); tcase_add_test(tc_core, test_prefix); - tcase_add_test(tc_core, test_strip); tcase_add_test(tc_core, test_char); tcase_add_test(tc_core, test_until); tcase_add_test(tc_core, test_runtil); @@ -425,15 +380,13 @@ Suite *get_ident_tests(void) tcase_add_test(tc_core, test_glob); tcase_add_test(tc_core, test_rfrom); tcase_add_test(tc_core, test_from); - tcase_add_test(tc_core, test_interned); - tcase_add_test(tc_core, test_contains); tcase_add_test(tc_core, test_len); tcase_add_test(tc_core, test_downcase); tcase_add_test(tc_core, test_compare); tcase_add_test(tc_core, test_walk_selected); tcase_add_test(tc_core, test_starts_with); - tcase_add_test(tc_core, test_istr_r); tcase_add_test(tc_core, test_distance); + tcase_add_test(tc_core, test_uniq); suite_add_tcase(s, tc_core); return s; -- 2.39.2