Logo Search packages:      
Sourcecode: w3mmee version File versions  Download package

uirx.c

#include "uirx.h"

#ifndef STATIC_EPSILON_CLOSURE
#define STATIC_EPSILON_CLOSURE (1)
#endif

static uirx_expr_t *
uirx_new_expr(uirx_exprv_t *ev)
{
  if (ev->n >= ev->n_max) {
    ptrdiff_t newmax;
    uirx_expr_t *newv;

    newmax = (ev->n / 2 + 1) * 3;

    if (!(newv = ev->v ? alt_call_realloc(ev->v, sizeof(uirx_expr_t) * newmax) : alt_call_malloc(sizeof(uirx_expr_t) * newmax)))
      return NULL;

    ev->n_max = newmax;
    ev->v = newv;
  }

  return &ev->v[(ev->n)++];
}

static uirx_expr_t *
uirx_add_expr(uirx_parse_stack_t *sp, ptrdiff_t expr_i)
{
  if (sp->last >= 0) {
    uirx_expr_t *new;

    if (!(new = uirx_new_expr(sp->exprv)))
      return NULL;

    new->type = uirx_expr_is_cat;
    new->data.two[0] = sp->last;
    new->data.two[1] = expr_i;
    sp->last = new - sp->exprv->v;
    return new;
  }
  else if ((sp->last = expr_i) >= 0)
    return &sp->exprv->v[expr_i];
  else
    return sp->exprv->v;
}

uirx_alpha_t *
uirx_new_alpha(uirx_nfa_t *nfa)
{
  if (nfa->n >= nfa->n_max) {
    ptrdiff_t newmax;
    uirx_alpha_t *newv;

    newmax = (nfa->n / 2 + 1) * 3;

    if (!(newv = nfa->v ? alt_call_realloc(nfa->v, sizeof(uirx_alpha_t) * newmax) : alt_call_malloc(sizeof(uirx_alpha_t) * newmax)))
      return NULL;

    nfa->n_max = newmax;
    nfa->v = newv;
  }

  return &nfa->v[(nfa->n)++];
}

uirx_expr_t *
uirx_parse_alpha(uirx_parse_stack_t *sp, uirx_alpha_t *alpha)
{
  uirx_expr_t *new;

  if (!(new = uirx_new_expr(sp->exprv)))
    return NULL;

  new->type = uirx_expr_is_alpha;
  new->data.one = alpha - sp->nfa->v;
  return uirx_add_expr(sp, new - sp->exprv->v);
}

uirx_nfa_t *
uirx_parse_start(uirx_parse_stack_t *sp, uirx_parse_stack_t *sup, uirx_alpha_t *alpha)
{
  if (!sp->nfa) {
    if (sup)
      sp->nfa = sup->nfa;
    else if ((sp->nfa = alt_call_malloc(sizeof(uirx_nfa_t))))
      memset(sp->nfa, 0, sizeof(uirx_nfa_t));
    else
      return NULL;
  }

  if (!sp->exprv) {
    if (sup)
      sp->exprv = sup->exprv;
    else if ((sp->exprv = alt_call_malloc(sizeof(uirx_exprv_t))))
      memset(sp->exprv, 0, sizeof(uirx_exprv_t));
    else
      return NULL;
  }

  if (sup) {
    uirx_expr_t *box;

    if (!(box = uirx_new_expr(sup->exprv)))
      return NULL;

    box->type = uirx_expr_is_cat;
    box->data.two[0] = sup->last;
    box->data.two[1] = -2;
    sup->last = box - sup->exprv->v;
  }

  sp->sup = sup;
  sp->last = -1;

  if (!alpha && sup && !sup->sup) {
    if ((alpha = uirx_new_alpha(sp->nfa)))
      return NULL;

    alpha->type = uirx_alpha_is_e;
    alpha->a.c.beg = 0;
    alpha->a.c.end = 0;
    alpha->cb_func = NULL;
  }

  if (alpha) {
    alpha->a.c.end = 0;

    if (!uirx_parse_alpha(sp, alpha))
      return NULL;
  }

  return sp->nfa;
}

static unsigned char *
uirx_new_posmap(uirx_exprv_t *ev, uirx_nfa_t *nfa)
{
  unsigned char *map;

  if (ev->mn >= ev->mn_max) {
    ptrdiff_t newmax;
    unsigned char **newv;

    newmax = (ev->mn / 2 + 1) * 3;

    if (!(newv = (ev->mv ?
              alt_call_realloc(ev->mv, sizeof(unsigned char *) * newmax) :
              alt_call_malloc(sizeof(unsigned char *) * newmax))))
      return NULL;

    ev->mn_max = newmax;
    ev->mv = newv;
  }

  if ((map = alt_call_malloc_atomic(nfa->n_bytes)))
    ev->mv[(ev->mn)++] = map;

  return map;
}

static unsigned char *
uirx_new_zeromap(uirx_exprv_t *ev, uirx_nfa_t *nfa)
{
  unsigned char *map;

  if ((map = uirx_new_posmap(ev, nfa)))
    memset(map, 0, nfa->n_bytes);

  return map;
}

static void
uirx_posmap_union_bang(uirx_nfa_t *nfa, unsigned char *x, unsigned char *y)
{
  if (x && y) {
    size_t i, n;

    for (n = nfa->n_bytes, i = 0 ; i < n ; ++i)
      x[i] |= y[i];
  }
}

static unsigned char *
uirx_posmap_union(uirx_exprv_t *ev, uirx_nfa_t *nfa, unsigned char *x, unsigned char *y)
{
  unsigned char *u;

  if ((u = uirx_new_posmap(ev, nfa))) {
    if (x)
      memcpy(u, x, nfa->n_bytes);
    else
      memset(u, 0, nfa->n_bytes);

    if (y)
      uirx_posmap_union_bang(nfa, u, y);
  }

  return u;
}

#define uirx_posmap_set_bang(x, i) (((unsigned char *)x)[(i) / CHAR_BIT] |= 1U << ((i) % CHAR_BIT))
#define uirx_posmap_test(x, i) ((x) ? (((unsigned char *)x)[(i) / CHAR_BIT] & (1U << ((i) % CHAR_BIT))) : 0)

static int
uirx_set_followpos(uirx_exprv_t *ev, uirx_nfa_t *nfa, unsigned char *x, unsigned char *y)
{
  if (x && y) {
    size_t i, j, k;
    unsigned char mask, *map;

    for (i = j = 0 ; i < nfa->n ; i += CHAR_BIT, ++j)
      if (x[j])
      for (mask = 1, k = 0 ; k < CHAR_BIT ; mask <<= 1, ++k)
        if (x[j] & mask) {
          if (!(map = nfa->v[i + k].followpos.map)) {
            if (!(map = uirx_new_zeromap(ev, nfa)))
            return 0;

            nfa->v[i + k].followpos.map = map;
          }

          uirx_posmap_union_bang(nfa, map, y);
        }
  }

  return 1;
}

static uirx_expr_t *
uirx_calc_for_nfa(uirx_exprv_t *ev, ptrdiff_t i, uirx_nfa_t *nfa)
{
  ptrdiff_t li, ri;
  uirx_expr_t *node, *l, *r;
  unsigned char *fp, *lp;

  node = &ev->v[i];

  switch (node->type) {
  case uirx_expr_is_cat:
    li = node->data.two[0];
    ri = node->data.two[1];

    if (li >= 0) {
      if (!uirx_calc_for_nfa(ev, li, nfa))
      return NULL;

      l = &ev->v[li];

      if (ri >= 0) {
      if (!uirx_calc_for_nfa(ev, ri, nfa))
        return NULL;

      r = &ev->v[ri];

      if (!uirx_set_followpos(ev, nfa, l->lastpos, r->firstpos))
        return NULL;

      if (l->nullable) {
        if (!(fp = uirx_posmap_union(ev, nfa, l->firstpos, r->firstpos)))
          return NULL;
      }
      else
        fp = l->firstpos;

      if (r->nullable) {
        if (!(lp = uirx_posmap_union(ev, nfa, r->lastpos, l->lastpos)))
          return NULL;
      }
      else
        lp = r->lastpos;

      node->nullable = l->nullable && r->nullable;
      node->firstpos = fp;
      node->lastpos = lp;
      }
      else {
      node->nullable = l->nullable;
      node->firstpos = l->firstpos;
      node->lastpos = l->lastpos;
      }
    }
    else if (ri >= 0) {
      if (!uirx_calc_for_nfa(ev, ri, nfa))
      return NULL;

      r = &ev->v[ri];
      node->nullable = r->nullable;
      node->firstpos = r->firstpos;
      node->lastpos = r->lastpos;
    }
    else {
      node->nullable = 1;
      node->firstpos = node->lastpos = NULL;
    }

    break;
  case uirx_expr_is_or:
    li = node->data.two[0];
    ri = node->data.two[1];

    if (li >= 0) {
      if (!uirx_calc_for_nfa(ev, li, nfa))
      return NULL;

      l = &ev->v[li];

      if (ri >= 0) {
      if (!uirx_calc_for_nfa(ev, ri, nfa))
        return NULL;

      r = &ev->v[ri];

      if (!(fp = uirx_posmap_union(ev, nfa, l->firstpos, r->firstpos)))
        return NULL;

      if (!(lp = uirx_posmap_union(ev, nfa, l->lastpos, r->lastpos)))
        return NULL;

      node->nullable = l->nullable || r->nullable;
      node->firstpos = fp;
      node->lastpos = lp;
      }
      else {
      node->nullable = 1;
      node->firstpos = l->firstpos;
      node->lastpos = l->lastpos;
      }
    }
    else if (ri >= 0) {
      if (!uirx_calc_for_nfa(ev, ri, nfa))
      return NULL;

      r = &ev->v[ri];
      node->nullable = 1;
      node->firstpos = r->firstpos;
      node->lastpos = r->lastpos;
    }
    else {
      node->nullable = 1;
      node->firstpos = node->lastpos = NULL;
    }

    break;
  case uirx_expr_is_star:
    if ((li = node->data.one) >= 0) {
      if (!uirx_calc_for_nfa(ev, li, nfa))
      return NULL;

      l = &ev->v[li];
      node->nullable = 1;
      node->firstpos = l->firstpos;
      node->lastpos = l->lastpos;

      if (!uirx_set_followpos(ev, nfa, node->lastpos, node->firstpos))
      return NULL;
    }
    else {
      node->nullable = 1;
      node->firstpos = node->lastpos = NULL;
    }

    break;
  case uirx_expr_is_plus:
    if ((li = node->data.one) >= 0) {
      if (!uirx_calc_for_nfa(ev, li, nfa))
      return NULL;

      l = &ev->v[li];
      node->nullable = l->nullable;
      node->firstpos = l->firstpos;
      node->lastpos = l->lastpos;

      if (!uirx_set_followpos(ev, nfa, node->firstpos, node->lastpos))
      return NULL;

      if (!uirx_set_followpos(ev, nfa, node->lastpos, node->firstpos))
      return NULL;
    }
    else {
      node->nullable = 1;
      node->firstpos = node->lastpos = NULL;
    }

    break;
  case uirx_expr_is_0or1:
    if ((li = node->data.one) >= 0) {
      if (!uirx_calc_for_nfa(ev, li, nfa))
      return NULL;

      l = &ev->v[li];
      node->nullable = 1;
      node->firstpos = l->firstpos;
      node->lastpos = l->lastpos;
    }
    else {
      node->nullable = 1;
      node->firstpos = node->lastpos = NULL;
    }

    break;
  default:
    if ((li = node->data.one) >= 0) {
      unsigned char *posmap;

      if (!(posmap = uirx_new_zeromap(ev, nfa)))
      return NULL;

      uirx_posmap_set_bang(posmap, li);
      node->nullable = 0;
      node->firstpos = node->lastpos = posmap;
    }
    else {
      node->nullable = 1;
      node->firstpos = node->lastpos = NULL;
    }

    break;
  }

  return node;
}

static uirx_expr_t *
uirx_add_expr_to_sup(uirx_parse_stack_t *sp)
{
  uirx_parse_stack_t *sup;
  uirx_expr_t *new, *box;

  if (!(sup = sp->sup) || sup->last < 0 ||
      !(sup->exprv->v[sup->last].type == uirx_expr_is_cat || sup->exprv->v[sup->last].type == uirx_expr_is_or))
    return NULL;

  box = &sup->exprv->v[sup->last];

  if (box->data.two[1] < -1)
    box->data.two[1] = sp->last;
  else if (!(new = uirx_new_expr(sup->exprv)))
    return NULL;
  else {
    new->type = uirx_expr_is_or;
    box = &sup->exprv->v[sup->last];
    new->data.two[0] = box->data.two[1];
    box->data.two[1] = new - sup->exprv->v;
    new->data.two[1] = sp->last;
  }

  return box;
}

uirx_expr_t *
uirx_parse_end(uirx_parse_stack_t *sp, uirx_alpha_t *alpha)
{
  uirx_expr_t *ret;

  if (!(ret = uirx_add_expr_to_sup(sp)))
    return NULL;

  if (!alpha && sp->sup && !sp->sup->sup) {
    if ((alpha = uirx_new_alpha(sp->nfa)))
      return NULL;

    alpha->type = uirx_alpha_is_e;
    alpha->a.c.beg = 0;
    alpha->a.c.end = 0;
    alpha->cb_func = NULL;
  }

  if (alpha) {
    uirx_expr_t *new;
    ptrdiff_t ei;

    alpha->a.c.end = 1;

    if (!(new = uirx_new_expr(sp->sup->exprv)))
      return NULL;

    new->type = uirx_expr_is_alpha;
    new->data.one = alpha - sp->sup->nfa->v;
    ei = new - sp->sup->exprv->v;

    if (!(new = uirx_new_expr(sp->sup->exprv)))
      return NULL;

    new->type = uirx_expr_is_cat;
    ret = &sp->sup->exprv->v[sp->sup->last];
    new->data.two[0] = ret->data.two[1];
    new->data.two[1] = ei;
    ei = new - sp->sup->exprv->v;

    if (!(new = uirx_new_expr(sp->sup->exprv)))
      return NULL;

    ret = &sp->sup->exprv->v[sp->sup->last];
    new->type = uirx_expr_is_cat;
    new->data.two[0] = -1;
    new->data.two[1] = ei;
    ret->data.two[1] = new - sp->sup->exprv->v;
  }

  return ret;
}

uirx_expr_t *
uirx_parse_or(uirx_parse_stack_t *sp)
{
  uirx_expr_t *ret;

  if (!(ret = uirx_add_expr_to_sup(sp)))
    return NULL;

  sp->last = -1;
  return ret;
}

uirx_expr_t *
uirx_parse_postfix(uirx_parse_stack_t *sp, uirx_expr_type_t t)
{
  uirx_expr_t *new;
  ptrdiff_t last;

  if ((last = sp->last) < 0 || !(new = uirx_new_expr(sp->exprv)))
    return NULL;

  new->type = t;

  switch (sp->exprv->v[last].type) {
  case uirx_expr_is_or:
  case uirx_expr_is_cat:
    new->data.one = sp->exprv->v[last].data.two[1];
    sp->exprv->v[last].data.two[1] = new - sp->exprv->v;
    break;
  default:
    new->data.one = last;
    sp->last = new - sp->exprv->v;
    break;
  }

  return new;
}

static void
uirx_epsilon_closure(uirx_nfa_t *nfa, unsigned char *posmap, uirx_posv_t *posv)
{
  ptrdiff_t i, j, k, l, m, n;

  for (k = 0 ; k < posv->n ;)
    for (i = k, k = posv->n ; i < k ; ++i) {
      j = posv->v[i];

      if (nfa->v[j].type == uirx_alpha_is_e)
      for (l = 0, m = nfa->v[j].followpos.posv.n; l < m ; ++l) {
        n = nfa->v[j].followpos.posv.v[l];

        if (!uirx_posmap_test(posmap, n)) {
          uirx_posmap_set_bang(posmap, n);
          posv->v[(posv->n)++] = n;
        }
      }
    }
}

static int
uirx_posmap_to_posv(uirx_nfa_t *nfa, unsigned char *map, uirx_posv_t *posv, ptrdiff_t n)
{
  if (map) {
    ptrdiff_t i, j, k, l;
    unsigned char mask;
    ptrdiff_t *p;

    for (i = j = k = 0 ; i < nfa->n ; i += CHAR_BIT, ++j)
      if (map[j])
      for (mask = 1 ; mask & ~(~0U << CHAR_BIT) ; mask <<= 1)
        if (map[j] & mask)
          ++k;

    posv->n = k;

    if (n < k)
      n = k;

    if (n) {
      if ((posv->v = alt_call_malloc_atomic(sizeof(ptrdiff_t) * n))) {
      for (p = posv->v, i = j = 0 ; i < nfa->n ; i += CHAR_BIT, ++j)
        if (map[j])
          for (mask = 1, l = 0 ; mask & ~(~0U << CHAR_BIT) ; mask <<= 1, ++l)
            if (map[j] & mask)
            *p++ = i + l;
      }
      else
      return 0;
    }
    else
      posv->v = NULL;
  }
  else {
    posv->n = 0;

    if (n) {
      if (!(posv->v = alt_call_malloc_atomic(sizeof(ptrdiff_t) * n)))
      return 0;
    }
    else
      posv->v = NULL;
  }

  return 1;
}

uirx_nfa_t *
uirx_complete_nfa(uirx_parse_stack_t *sp)
{
  uirx_nfa_t *nfa;

  nfa = sp->nfa;
  nfa->n_bytes = (nfa->n + CHAR_BIT - 1) / CHAR_BIT;

  if (sp->last >= 0 && uirx_calc_for_nfa(sp->exprv, sp->last, nfa) &&
      uirx_posmap_to_posv(nfa, sp->exprv->v[sp->last].firstpos, &nfa->first, nfa->n)) {
    size_t i;

    if (!(nfa->posflag = alt_call_malloc_atomic(nfa->n_bytes)))
      goto error;

    if (!(nfa->stack[0].v = alt_call_malloc_atomic(sizeof(ptrdiff_t) * nfa->n)))
      goto error;

    if (!(nfa->stack[1].v = alt_call_malloc_atomic(sizeof(ptrdiff_t) * nfa->n)))
      goto error;

    for (i = 0 ; i < nfa->n ; ++i)
      if (!uirx_posmap_to_posv(nfa, nfa->v[i].followpos.map, &nfa->v[i].followpos.posv,
#if STATIC_EPSILON_CLOSURE
                         nfa->n
#else
                         0
#endif
                         ))
      goto error;

    if (nfa->first.n < nfa->n) {
      memset(nfa->posflag, 0, nfa->n_bytes);
      uirx_epsilon_closure(nfa, nfa->posflag, &nfa->first);
    }

#if STATIC_EPSILON_CLOSURE
    for (i = 0 ; i < nfa->n ; ++i) {
      memset(nfa->posflag, 0, nfa->n_bytes);
      uirx_epsilon_closure(nfa, nfa->posflag, &nfa->v[i].followpos.posv);
    }
#endif

    return nfa;
  }
error:
  return NULL;
}

void
uirx_match_start(uirx_nfa_t *nfa)
{
  nfa->from = 0;
  memcpy(nfa->stack[0].v, nfa->first.v, sizeof(ptrdiff_t) * nfa->first.n);
  nfa->stack[0].n = nfa->first.n;
}

int
uirx_match_v(uirx_wc_t c, uirx_alpha_t *alpha)
{
  size_t b, e;

  for (b = 0, e = alpha->a.v.n ; b < e ;) {
    size_t i = (b + e) / 2;

    if (c < alpha->a.v.cs[i].beg)
      e = i;
    else if (c > alpha->a.v.cs[i].end)
      b = i + 1;
    else
      return 1;
  }

  return 0;
}

ptrdiff_t
uirx_match(uirx_nfa_t *nfa, void *cb_arg, uirx_wc_t alpha)
{
  ptrdiff_t i, j, l, m, n, from, to;

  memset(nfa->posflag, 0, nfa->n_bytes);
  from = nfa->from;
  to = 1 - from;
  nfa->stack[to].n = 0;

  for (i = 0 ; i < nfa->stack[from].n ; ++i) {
    j = nfa->stack[from].v[i];

    if (nfa->v[j].type == uirx_alpha_is_e) {
      if (nfa->v[j].cb_func)
      nfa->v[j].cb_func(nfa->v[j].a.c.beg, cb_arg);
    }
    else if (nfa->v[j].type == uirx_alpha_is_c ?
           (alpha >= nfa->v[j].a.c.beg && alpha <= nfa->v[j].a.c.end) :
           uirx_match_v(alpha, &nfa->v[j])) {
      for (l = 0, m = nfa->v[j].followpos.posv.n; l < m ; ++l) {
      n = nfa->v[j].followpos.posv.v[l];

      if (!uirx_posmap_test(nfa->posflag, n)) {
        uirx_posmap_set_bang(nfa->posflag, n);
        nfa->stack[to].v[(nfa->stack[to].n)++] = n;
      }
      }
    }
  }

#if !STATIC_EPSILON_CLOSURE
  uirx_epsilon_closure(nfa, nfa->posflag, &nfa->stack[to]);
#endif

  if (nfa->stack[to].n)
    nfa->from = to;

  return nfa->stack[to].n;
}

void
uirx_match_end(uirx_nfa_t *nfa, void *cb_arg)
{
  ptrdiff_t i, j;

  for (i = 0 ; i < nfa->stack[nfa->from].n ; ++i) {
    j = nfa->stack[nfa->from].v[i];

    if (nfa->v[j].type == uirx_alpha_is_e && nfa->v[j].cb_func)
      nfa->v[j].cb_func(nfa->v[j].a.c.beg, cb_arg);
  }
}

void
uirx_free_exprv(uirx_exprv_t *exprv)
{
  alt_freer_t freer = alt_set_freer(NULL);

  alt_set_freer(freer);

  if (freer) {
    size_t i;

    for (i = 0 ; i < exprv->mn ; ++i)
      alt_call_free(exprv->mv[i]);

    alt_call_free(exprv->mv);
    alt_call_free(exprv->v);
    alt_call_free(exprv);
  }
}

Generated by  Doxygen 1.6.0   Back to index