Logo Search packages:      
Sourcecode: w3mmee version File versions  Download package

wcrx.c

#include <errno.h>
#include <setjmp.h>
#include "wcrx.h"

void
wcrx_ethrow(wcrx_parser_desc_t *desc, const char *frmt, ...)
{
  va_list ap;

  va_start(ap, frmt);
  desc->core->evprintf(desc->evprintf_arg, frmt, ap);
  va_end(ap);

  if (desc->continuation)
    longjmp(*desc->continuation, desc->core->error_status);
  else
    exit(desc->core->error_status);
}

void
wcrx_eprintf(wcrx_parser_desc_t *desc, const char *frmt, ...)
{
  va_list ap;

  va_start(ap, frmt);
  desc->core->evprintf(desc->evprintf_arg, frmt, ap);
  va_end(ap);
}

static wcrx_compile_macro_t *
wcrx_lookup_macro(uirx_wc_t c, wcrx_macro_tab_t *tab)
{
  size_t b, e;

  for (b = 0, e = tab->n ; b < e ;) {
    size_t i = (b + e) / 2;

    if (c < tab->v[i].c)
      e = i;
    else if (c > tab->v[i].c)
      b = i + 1;
    else
      return tab->v[i].macro;
  }

  return NULL;
}

static wcrx_wcl_t *
wcrx_nreverse_wcl(wcrx_wcl_t *s, wcrx_wcl_t *d)
{
  wcrx_wcl_t *t;

  while (s) {
    t = s->prev;
    s->prev = d;
    d = s;
    s = t;
  }

  return d;
}

wcrx_wcl_t *
wcrx_compile_class_sort(wcrx_wcl_t *wcl)
{
  wcrx_wcl_t *l[2], *t, *d;
  int i;

  if (!wcl || !wcl->prev)
    return wcl;

  for (l[0] = l[1] = NULL, i = 0 ; wcl ; i = 1 - i) {
    t = wcl->prev;
    wcl->prev = l[i];
    l[i] = wcl;
    wcl = t;
  }

  l[0] = wcrx_compile_class_sort(l[0]);
  l[1] = wcrx_compile_class_sort(l[1]);

  for (d = NULL ; l[0] && l[1] ;) {
    if (l[0]->beg < l[1]->beg)
      goto zero_lt_one;
    else if (l[0]->beg > l[1]->beg)
      goto zero_gt_one;
    else if (l[0]->end < l[1]->end)
      goto zero_lt_one;
    else if (l[0]->end > l[1]->end)
      goto zero_gt_one;
    else {
      l[1] = l[1]->prev;
      continue;
    }
  zero_lt_one:
    t = l[0]->prev;
    l[0]->prev = d;
    d = l[0];
    l[0] = t;
    continue;
  zero_gt_one:
    t = l[1]->prev;
    l[1]->prev = d;
    d = l[1];
    l[1] = t;
    continue;
  }

  if (l[1])
    l[0] = l[1];

  return wcrx_nreverse_wcl(d, l[0]);
}

void
wcrx_compile_class_alpha(wcrx_parser_desc_t *desc, uirx_parse_stack_t *csp, int status, wcrx_wcl_t *wcl)
{
  uirx_alpha_t *alpha;

  if (!(alpha = uirx_new_alpha(csp->nfa)))
    wcrx_ethrow(desc, "uirx_new_alpha(csp->nfa): %s\n", strerror(errno));

  if (wcl) {
    if (wcl->prev) {
      size_t n;
      wcrx_wcl_t *l;
      uirx_alpha_range_t *rv;

      l = wcl->prev;

      if (l->beg < wcl->beg) {
      wcl = wcrx_nreverse_wcl(wcl, NULL);
      l = wcl->prev;
      }

      for (n = 2  ; (l = l->prev) ;)
      ++n;

      if (!(rv = alt_call_malloc_atomic(sizeof(uirx_alpha_range_t) * n)))
      wcrx_ethrow(desc,
                "alt_call_malloc_atomic(sizeof(uirx_alpha_range_t) * %lu): %s\n",
                (unsigned long)n, strerror(errno));

      alpha->type = uirx_alpha_is_v;
      alpha->a.v.cs = rv;
      alpha->a.v.n = n;

      do {
      rv->beg = wcl->beg;
      rv->end = wcl->end;
      ++rv;
      } while ((wcl = wcl->prev));
    }
    else {
      alpha->type = uirx_alpha_is_c;
      alpha->a.c.beg = wcl->beg;
      alpha->a.c.end = wcl->end;
    }

    if (!(uirx_parse_alpha(csp, alpha)))
      wcrx_ethrow(desc, "uirx_parse_alpha(csp, alpha): %s\n", strerror(errno));
  }
}

void
wcrx_compile_class_end(wcrx_parser_desc_t *desc, uirx_parse_stack_t *sp, int status, wcrx_wcl_t *wcl)
{
  uirx_wc_t beg, tbeg, tend;
  wcrx_wcl_t *d, *tprev;

  wcl = wcrx_compile_class_sort(wcl);

  if (status & WCRX_CLASS_COMPLEMENT) {
    wcrx_wcl_t last;

    for (d = NULL, beg = 0 ; wcl ;) {
      tprev = wcl->prev;
      tbeg = wcl->beg;
      tend = wcl->end;

      if (beg < tbeg) {
      wcl->beg = beg;
      wcl->end = tbeg - 1U;
      wcl->prev = d;
      d = wcl;
      wcl = tprev;
      }
      else
      wcl = wcl->prev;

      if (tend < UIRX_WC_MAX) {
      if (beg <= tend)
        beg = tend + 1U;
      }
      else {
      if (d)
        wcrx_compile_class_alpha(desc, sp, status, d);

      return;
      }
    }

    last.prev = d;
    last.beg = beg;
    last.end = UIRX_WC_MAX;
    wcrx_compile_class_alpha(desc, sp, status, &last);
  }
  else {
    d = NULL;

    for (;;) {
      while (wcl && wcl->beg > wcl->end)
      wcl = wcl->prev;

      if (!wcl)
      break;

      if (!d) {
      d = wcl;
      wcl = d->prev;
      d->prev = NULL;
      }
      else if (d->end + 1U < wcl->beg) {
      tprev = wcl->prev;
      wcl->prev = d;
      d = wcl;
      wcl = tprev;
      }
      else {
      if (d->end < wcl->end)
        d->end = wcl->end;

      wcl = wcl->prev;
      continue;
      }

      if (!(d->end < UIRX_WC_MAX))
      break;
    }

    if (d)
      wcrx_compile_class_alpha(desc, sp, status, d);
  }
}

void
wcrx_compile_class(wcrx_parser_desc_t *desc, uirx_parse_stack_t *sp, int status, wcrx_wcl_t *wcl, uirx_wc_t beg)
{
  uirx_wc_t wc;

  while ((wc = wcrx_read_alpha(desc)) != desc->core->eof_char)
    switch (wc) {
    case WCRX_OPC_NCLASS:
      if (beg == desc->core->eof_char && !wcl &&
        !(status & (WCRX_CLASS_RANGE | WCRX_CLASS_COMPLEMENT))) {
      wcrx_wcl_t wcl[2], *last;

      if (desc->core->bof_char != desc->core->eof_char) {
        wcl[0].prev = NULL;
        wcl[0].beg = wcl[0].end = desc->core->bof_char;
        wcl[1].prev = &wcl[0];
        wcl[1].beg = wcl[1].end = desc->core->eof_char;
        last = &wcl[1];
      }
      else {
        wcl[0].prev = NULL;
        wcl[0].beg = wcl[0].end = desc->core->bof_char;
        last = &wcl[0];
      }
      
      wcrx_compile_class(desc, sp, status | WCRX_CLASS_COMPLEMENT, last, desc->core->eof_char);
      return;
      }
      else 
      goto literal;
    case WCRX_OPC_RANGE:
      if (beg == desc->core->eof_char || status & WCRX_CLASS_RANGE)
      goto literal;

      status |= WCRX_CLASS_RANGE;
      break;
    case WCRX_OPC_ECLASS:
      goto end;
    case WCRX_OPC_ESC:
      if ((wc = wcrx_read_alpha(desc)) == desc->core->eof_char)
      wc = WCRX_OPC_ESC;
      else if (!(status & WCRX_CLASS_RANGE)) {
      wcrx_compile_macro_t *p;
 
      if ((p = wcrx_lookup_macro(wc, desc->core->class_macro_tab))) {
        if (beg != desc->core->eof_char) {
          wcrx_wcl_t more;

          more.prev = wcl;
          more.beg = more.end = beg;
          p->func.class(desc, sp, status, &more, wc, p->arg);
        }
        else
          p->func.class(desc, sp, status, wcl, wc, p->arg);

        return;
      }
      }
    default:
    literal:
      if (beg != desc->core->eof_char) {
      wcrx_wcl_t next;

      next.prev = wcl;
      next.beg = beg;

      if (status & WCRX_CLASS_RANGE) {
        next.end = wc;
        wc = desc->core->eof_char;
      }
      else
        next.end = beg;

      wcrx_compile_class(desc, sp, status & ~WCRX_CLASS_RANGE, &next, wc);
      return;
      }

      beg = wc;
      break;
    }

  wcrx_eprintf(desc, "character class has not been closed, now closing\n");
end:
  if (beg != desc->core->eof_char) {
    if (status & WCRX_CLASS_RANGE) {
      wcrx_wcl_t more[2];

      more[0].prev = wcl;
      more[0].beg = more[0].end = beg;
      more[1].prev = &more[0];
      more[1].beg = more[1].end = WCRX_OPC_RANGE;
      wcrx_compile_class_end(desc, sp, status, &more[1]);
    }
    else {
      wcrx_wcl_t more;

      more.prev = wcl;
      more.beg = more.end = beg;
      wcrx_compile_class_end(desc, sp, status, &more);
    }
  }
  else
    wcrx_compile_class_end(desc, sp, status, wcl);
}

void
wcrx_compile_group(wcrx_parser_desc_t *desc, uirx_parse_stack_t *sup)
{
  uirx_wc_t wc;
  unsigned int g_nth;
  uirx_alpha_t *alpha;
  uirx_parse_stack_t sp = {NULL};
  uirx_expr_type_t t;

  g_nth = (desc->group_nth)++;

  if (desc->core->group_beg_func) {
    if (!(alpha = uirx_new_alpha(sup->nfa)))
      wcrx_ethrow(desc, "uirx_new_alpha(sup->nfa): %s\n", strerror(errno));

    alpha->type = uirx_alpha_is_e;
    alpha->a.c.beg = g_nth;
    alpha->a.c.end = 0;
    alpha->cb_func = desc->core->group_beg_func;
  }
  else
    alpha = NULL;

  if (!uirx_parse_start(&sp, sup, alpha))
    wcrx_ethrow(desc, "uirx_parse_start(&sp, sup, alpha): %s\n", strerror(errno));

  while ((wc = wcrx_read_alpha(desc)) != desc->core->eof_char) {
    switch (wc) {
    case WCRX_OPC_BGROUP:
      wcrx_compile_group(desc, &sp);
      break;
    case WCRX_OPC_OR:
      if (!uirx_parse_or(&sp))
      wcrx_ethrow(desc, "uirx_parse_or(&sp): %s", strerror(errno));

      break;
    case WCRX_OPC_EGROUP:
      if (desc->core->group_end_func) {
      if (!(alpha = uirx_new_alpha(sp.nfa)))
        wcrx_ethrow(desc, "uirx_new_alpha(sp.nfa): %s\n", strerror(errno));

      alpha->type = uirx_alpha_is_e;
      alpha->a.c.beg = g_nth;
      alpha->a.c.end = 0;
      alpha->cb_func = desc->core->group_end_func;
      }
      else
      alpha = NULL;

      if (!uirx_parse_end(&sp, alpha)) {
      if (errno)
        wcrx_ethrow(desc, "uirx_parse_end(&sp, alpha): %s", strerror(errno));

      wcrx_eprintf(desc, "no group found, treated as literal '%c'\n", WCRX_OPC_EGROUP);
      goto literal;
      }

      return;
    case WCRX_OPC_0OR1:
      t = uirx_expr_is_0or1;
      goto postfix;
    case WCRX_OPC_PCLOSURE:
      t = uirx_expr_is_plus;
      goto postfix;
    case WCRX_OPC_CLOSURE:
      t = uirx_expr_is_star;
    postfix:
      if (!uirx_parse_postfix(&sp, t)) {
      if (errno)
        wcrx_ethrow(desc, "uirx_parse_postfix(&sp, '%c'): %s\n", wc, strerror(errno));

      wcrx_eprintf(desc, "mis-placed postfix operator, treated as literal '%c'\n", wc);
      goto literal;
      }

      break;
    case WCRX_OPC_WILDCHAR:
      {
      wcrx_wcl_t dot = {NULL, WCRX_OPC_LF, WCRX_OPC_LF};

      wcrx_compile_class_end(desc, &sp, WCRX_CLASS_COMPLEMENT, &dot);
      break;
      }
    case WCRX_OPC_BCLASS:
      wcrx_compile_class(desc, &sp, 0, NULL, desc->core->eof_char);
      break;
    case WCRX_OPC_ESC:
      if ((wc = wcrx_read_alpha(desc)) == desc->core->eof_char)
      wc = WCRX_OPC_ESC;
      else {
      wcrx_compile_macro_t *p;
 
      if ((p = wcrx_lookup_macro(wc, desc->core->expr_macro_tab))) {
        p->func.expr(desc, &sp, wc, p->arg);
        break;
      }
      }

      goto literal;
    case WCRX_OPC_BOF:
      wc = desc->core->bof_char;
      goto literal;
    case WCRX_OPC_EOF:
      wc = desc->core->eof_char;
    default:
      if (desc->alpha_filter)
      wc = desc->alpha_filter(wc, desc);
    literal:
      if (!(alpha = uirx_new_alpha(sp.nfa)))
      wcrx_ethrow(desc, "uirx_new_alpha(sp.nfa): %s\n", strerror(errno));

      alpha->type = uirx_alpha_is_c;
      alpha->a.c.beg = alpha->a.c.end = wc;

      if (!(uirx_parse_alpha(&sp, alpha)))
      wcrx_ethrow(desc, "uirx_parse_alpha(&sp, alpha): %s\n", strerror(errno));

      break;
    }
  }

  if (sup->sup)
    wcrx_eprintf(desc, "group hase not been closed, now closing\n");

  if (desc->core->group_end_func) {
    if (!(alpha = uirx_new_alpha(sp.nfa)))
      wcrx_ethrow(desc, "uirx_new_alpha(sp->nfa): %s\n", strerror(errno));

    alpha->type = uirx_alpha_is_e;
    alpha->a.c.beg = g_nth;
    alpha->a.c.end = 0;
    alpha->cb_func = desc->core->group_end_func;
  }
  else
    alpha = NULL;

  if (!uirx_parse_end(&sp, alpha))
    wcrx_ethrow(desc, "uirx_parse_end(&sp, alpha): %s\n", strerror(errno));
}

uirx_nfa_t *
wcrx_compile(wcrx_parser_desc_t *desc)
{
  uirx_parse_stack_t sp = {NULL};

  if (!uirx_parse_start(&sp, NULL, NULL))
    wcrx_ethrow(desc, "uirx_parse_start(&sp, NULL, NULL): %s\n", strerror(errno));

  wcrx_compile_group(desc, &sp);

  if (!uirx_complete_nfa(&sp))
    wcrx_ethrow(desc, "uirx_complete_nfa(&sp): %s\n", strerror(errno));

  return sp.nfa;
}

void
wcrx_compile_class_macro_class_body(wcrx_parser_desc_t *desc, uirx_parse_stack_t *sp, int status, wcrx_wcl_t *dst, wcrx_wcl_t *src)
{
  if (src) {
    wcrx_wcl_t temp;

    temp.prev = dst;
    temp.beg = src->beg;
    temp.end = src->end;
    wcrx_compile_class_macro_class_body(desc, sp, status, &temp, src->prev);
  }
  else if (status & WCRX_CLASS_INTERNAL)
    wcrx_compile_class_end(desc, sp, status, dst);
  else
    wcrx_compile_class(desc, sp, status, dst, desc->core->eof_char);
}

void
wcrx_compile_class_macro_class(wcrx_parser_desc_t *desc, uirx_parse_stack_t *sp,
                         int status, wcrx_wcl_t *dst, uirx_wc_t wc, void *arg)
{
  wcrx_compile_class_macro_class_body(desc, sp, status, dst, arg);
}

void
wcrx_compile_expr_macro_class(wcrx_parser_desc_t *desc, uirx_parse_stack_t *sp, uirx_wc_t wc, void *arg)
{
  wcrx_compile_class_macro_class_body(desc, sp, WCRX_CLASS_INTERNAL, NULL, arg);
}

void
wcrx_compile_class_macro_complement_body(wcrx_parser_desc_t *desc, uirx_parse_stack_t *sp, int status,
                               wcrx_wcl_t *wcl, wcrx_wcl_t *dst, wcrx_wcl_t *src)
{
  wcrx_wcl_t temp;

  if (src) {
    temp.prev = dst;
    temp.beg = src->beg;
    temp.end = src->end;
    wcrx_compile_class_macro_complement_body(desc, sp, status, wcl, &temp, src->prev);
  }
  else {
    wcrx_wcl_t bofl, eofl, *l, *t;
    uirx_wc_t beg;

    bofl.prev = dst;
    bofl.beg = desc->core->bof_char;
    bofl.end = desc->core->bof_char;
    eofl.prev = &bofl;
    eofl.beg = desc->core->eof_char;
    eofl.end = desc->core->eof_char;

    l = wcrx_compile_class_sort(&eofl);

    for (dst = &temp, beg = 0 ; l ; l = l->prev) {
      if (beg < l->beg) {
      dst->beg = beg;
      dst->end = l->beg - 1U;
      }

      if (l->end < UIRX_WC_MAX) {
      if (beg <= l->end)
        beg = l->end + 1U;

      dst->prev = l;
      dst = l;
      }
      else
      goto end;
    }

    dst->beg = beg;
    dst->end = UIRX_WC_MAX;
  end:
    dst->prev = NULL;

    for (dst = wcl, l = &temp ; l ;) {
      t = l->prev;
      l->prev = dst;
      dst = l;
      l = t;
    }

    if (status & WCRX_CLASS_INTERNAL)
      wcrx_compile_class_end(desc, sp, status, dst);
    else
      wcrx_compile_class(desc, sp, status, dst, desc->core->eof_char);
  }
}

void
wcrx_compile_class_macro_complement(wcrx_parser_desc_t *desc, uirx_parse_stack_t *sp, int status,
                            wcrx_wcl_t *dst, uirx_wc_t wc, void *arg)
{
  wcrx_compile_class_macro_complement_body(desc, sp, status, dst, NULL, arg);
}

void
wcrx_compile_expr_macro_complement(wcrx_parser_desc_t *desc, uirx_parse_stack_t *sp, uirx_wc_t wc, void *arg)
{
  wcrx_compile_class_macro_class_body(desc, sp, WCRX_CLASS_COMPLEMENT | WCRX_CLASS_INTERNAL, NULL, arg);
}

Generated by  Doxygen 1.6.0   Back to index