Logo Search packages:      
Sourcecode: halibut version File versions

misc.c

/*
 * misc.c: miscellaneous useful items
 */

#include <stdarg.h>
#include "halibut.h"

char *adv(char *s) {
    return s + 1 + strlen(s);
}

struct stackTag {
    void **data;
    int sp;
    int size;
};

stack stk_new(void) {
    stack s;

    s = snew(struct stackTag);
    s->sp = 0;
    s->size = 0;
    s->data = NULL;

    return s;
}

void stk_free(stack s) {
    sfree(s->data);
    sfree(s);
}

void stk_push(stack s, void *item) {
    if (s->size <= s->sp) {
      s->size = s->sp + 32;
      s->data = sresize(s->data, s->size, void *);
    }
    s->data[s->sp++] = item;
}

void *stk_pop(stack s) {
    if (s->sp > 0)
      return s->data[--s->sp];
    else
      return NULL;
}

void *stk_top(stack s) {
    if (s->sp > 0)
      return s->data[s->sp-1];
    else
      return NULL;
}

/*
 * Small routines to amalgamate a string from an input source.
 */
const rdstring empty_rdstring = {0, 0, NULL};
const rdstringc empty_rdstringc = {0, 0, NULL};

void rdadd(rdstring *rs, wchar_t c) {
    if (rs->pos >= rs->size-1) {
      rs->size = rs->pos + 128;
      rs->text = sresize(rs->text, rs->size, wchar_t);
    }
    rs->text[rs->pos++] = c;
    rs->text[rs->pos] = 0;
}
void rdadds(rdstring *rs, wchar_t const *p) {
    int len = ustrlen(p);
    if (rs->pos >= rs->size - len) {
      rs->size = rs->pos + len + 128;
      rs->text = sresize(rs->text, rs->size, wchar_t);
    }
    ustrcpy(rs->text + rs->pos, p);
    rs->pos += len;
}
wchar_t *rdtrim(rdstring *rs) {
    rs->text = sresize(rs->text, rs->pos + 1, wchar_t);
    return rs->text;
}

void rdaddc(rdstringc *rs, char c) {
    if (rs->pos >= rs->size-1) {
      rs->size = rs->pos + 128;
      rs->text = sresize(rs->text, rs->size, char);
    }
    rs->text[rs->pos++] = c;
    rs->text[rs->pos] = 0;
}
void rdaddsc(rdstringc *rs, char const *p) {
    int len = strlen(p);
    if (rs->pos >= rs->size - len) {
      rs->size = rs->pos + len + 128;
      rs->text = sresize(rs->text, rs->size, char);
    }
    strcpy(rs->text + rs->pos, p);
    rs->pos += len;
}
char *rdtrimc(rdstringc *rs) {
    rs->text = sresize(rs->text, rs->pos + 1, char);
    return rs->text;
}

static int compare_wordlists_literally(word *a, word *b) {
    int t;
    while (a && b) {
      if (a->type != b->type)
          return (a->type < b->type ? -1 : +1);   /* FIXME? */
      t = a->type;
      if ((t != word_Normal && t != word_Code &&
           t != word_WeakCode && t != word_Emph) ||
          a->alt || b->alt) {
          int c;
          if (a->text && b->text) {
            c = ustricmp(a->text, b->text);
            if (c)
                return c;
          }
          c = compare_wordlists_literally(a->alt, b->alt);
          if (c)
            return c;
          a = a->next;
          b = b->next;
      } else {
          wchar_t *ap = a->text, *bp = b->text;
          while (*ap && *bp) {
            wchar_t ac = *ap, bc = *bp;
            if (ac != bc)
                return (ac < bc ? -1 : +1);
            if (!*++ap && a->next && a->next->type == t && !a->next->alt)
                a = a->next, ap = a->text;
            if (!*++bp && b->next && b->next->type == t && !b->next->alt)
                b = b->next, bp = b->text;
          }
          if (*ap || *bp)
            return (*ap ? +1 : -1);
          a = a->next;
          b = b->next;
      }
    }

    if (a || b)
      return (a ? +1 : -1);
    else
      return 0;
}

int compare_wordlists(word *a, word *b) {
    /*
     * First we compare only the alphabetic content of the word
     * lists, with case not a factor. If that comes out equal,
     * _then_ we compare the word lists literally.
     */
    struct {
      word *w;
      int i;
      wchar_t c;
    } pos[2];

    pos[0].w = a;
    pos[1].w = b;
    pos[0].i = pos[1].i = 0;

    while (1) {
      /*
       * Find the next alphabetic character in each word list.
       */
      int k;

      for (k = 0; k < 2; k++) {
          /*
           * Advance until we hit either an alphabetic character
           * or the end of the word list.
           */
          while (1) {
            if (!pos[k].w) {
                /* End of word list. */
                pos[k].c = 0;
                break;
            } else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) {
                /* No characters remaining in this word; move on. */
                pos[k].w = pos[k].w->next;
                pos[k].i = 0;
            } else if (!uisalpha(pos[k].w->text[pos[k].i])) {
                /* This character isn't alphabetic; move on. */
                pos[k].i++;
            } else {
                /* We have an alphabetic! Lowercase it and continue. */
                pos[k].c = utolower(pos[k].w->text[pos[k].i]);
                break;
            }
          }
      }

#ifdef HAS_WCSCOLL
      {
          wchar_t a[2], b[2];
          int ret;

          a[0] = pos[0].c;
          b[0] = pos[1].c;
          a[1] = b[1] = L'\0';

          ret = wcscoll(a, b);
          if (ret)
            return ret;
      }
#else
      if (pos[0].c < pos[1].c)
          return -1;
      else if (pos[0].c > pos[1].c)
          return +1;
#endif

      if (!pos[0].c)
          break;               /* they're equal */

      pos[0].i++;
      pos[1].i++;
    }

    /*
     * If we reach here, the strings were alphabetically equal, so
     * compare in more detail.
     */
    return compare_wordlists_literally(a, b);
}

void mark_attr_ends(word *words)
{
    word *w, *wp;

    wp = NULL;
    for (w = words; w; w = w->next) {
      if (isattr(w->type)) {
          int before = (wp && isattr(wp->type) &&
                    sameattr(wp->type, w->type));
          int after = (w->next && isattr(w->next->type) &&
                   sameattr(w->next->type, w->type));
          w->aux |= (before ?
                   (after ? attr_Always : attr_Last) :
                   (after ? attr_First : attr_Only));
      }
      wp = w;
    }
}

/*
 * This function implements the optimal paragraph wrapping
 * algorithm, pretty much as used in TeX. A cost function is
 * defined for each line of the wrapped paragraph (typically some
 * convex function of the difference between the line's length and
 * its desired length), and a dynamic programming approach is used
 * to optimise globally across all possible layouts of the
 * paragraph to find the one with the minimum total cost.
 * 
 * The function as implemented here gives a choice of two options
 * for the cost function:
 * 
 *  - If `natural_space' is zero, then the algorithm attempts to
 *    make each line the maximum possible width (either `width' or
 *    `subsequentwidth' depending on whether it's the first line of
 *    the paragraph or not), and the cost function is simply the
 *    square of the unused space at the end of each line. This is a
 *    simple mechanism suitable for use in fixed-pitch environments
 *    such as plain text displayed on a terminal.
 * 
 *  - However, if `natural_space' is positive, the algorithm
 *    assumes the medium is fully graphical and that the width of
 *    space characters can be adjusted finely, and it attempts to
 *    make each _space character_ the width given in
 *    `natural_space'. (The provided width function should return
 *    the _minimum_ acceptable width of a space character in this
 *    case.) Therefore, the cost function for a line is dependent
 *    on the number of spaces on that line as well as the amount by
 *    which the line width differs from the optimum.
 */
wrappedline *wrap_para(word *text, int width, int subsequentwidth,
                   int (*widthfn)(void *, word *), void *ctx,
                   int natural_space) {
    wrappedline *head = NULL, **ptr = &head;
    int nwords, wordsize;
    struct wrapword {
      word *begin, *end;
      int width;
      int spacewidth;
      int cost;
      int nwords;
    } *wrapwords;
    int i, j, n;

    /*
     * Break the line up into wrappable components.
     */
    nwords = wordsize = 0;
    wrapwords = NULL;
    while (text) {
      if (nwords >= wordsize) {
          wordsize = nwords + 64;
          wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
      }
      wrapwords[nwords].width = 0;
      wrapwords[nwords].begin = text;
      while (text) {
          wrapwords[nwords].width += widthfn(ctx, text);
          wrapwords[nwords].end = text->next;
          if (text->next && (text->next->type == word_WhiteSpace ||
                         text->next->type == word_EmphSpace ||
                         text->breaks))
            break;
          text = text->next;
      }
      if (text && text->next && (text->next->type == word_WhiteSpace ||
                     text->next->type == word_EmphSpace)) {
          wrapwords[nwords].spacewidth = widthfn(ctx, text->next);
          text = text->next;
      } else {
          wrapwords[nwords].spacewidth = 0;
      }
      nwords++;
      if (text)
          text = text->next;
    }

    /*
     * Perform the dynamic wrapping algorithm: work backwards from
     * nwords-1, determining the optimal wrapping for each terminal
     * subsequence of the paragraph.
     */
    for (i = nwords; i-- ;) {
      int best = -1;
      int bestcost = 0;
      int cost;
      int linelen = 0, spacewidth = 0, minspacewidth = 0;
      int nspaces;
      int thiswidth = (i == 0 ? width : subsequentwidth);

      j = 0;
      nspaces = 0;
      while (i+j < nwords) {
          /*
           * See what happens if we put j+1 words on this line.
           */
          if (spacewidth) {
            nspaces++;
            minspacewidth = spacewidth;
          }
          linelen += spacewidth + wrapwords[i+j].width;
          spacewidth = wrapwords[i+j].spacewidth;
          j++;
          if (linelen > thiswidth) {
            /*
             * If we're over the width limit, abandon ship,
             * _unless_ there is no best-effort yet (which will
             * only happen if the first word is too long all by
             * itself).
             */
            if (best > 0)
                break;
          }

          /*
           * Compute the cost of this line. The method of doing
           * this differs hugely depending on whether
           * natural_space is nonzero or not.
           */
          if (natural_space) {
            if (!nspaces && linelen > thiswidth) {
                /*
                 * Special case: if there are no spaces at all
                 * on the line because one single word is too
                 * long for its line, cost is zero because
                 * there's nothing we can do about it anyway.
                 */
                cost = 0;
            } else {
                int shortfall = thiswidth - linelen;
                int spaceextra = shortfall / (nspaces ? nspaces : 1);
                int spaceshortfall = natural_space -
                  (minspacewidth + spaceextra);

                if (i+j == nwords && spaceshortfall < 0) {
                  /*
                   * Special case: on the very last line of
                   * the paragraph, we don't score penalty
                   * points for having to _stretch_ the line,
                   * since we won't stretch it anyway.
                   * However, we score penalties as normal
                   * for having to squeeze it.
                   */
                  cost = 0;
                } else {
                  /*
                   * Squaring this number is tricky since
                   * it's liable to be quite big. Let's
                   * divide it through by 256.
                   */
                  int x = spaceshortfall >> 8;
                  int xf = spaceshortfall & 0xFF;

                  /*
                   * Not counting strange variable-fixed-
                   * point oddities, we are computing
                   * 
                   *   (x+xf)^2 = x^2 + 2*x*xf + xf*xf
                   * 
                   * except that _our_ xf is 256 times the
                   * one listed there.
                   */

                  cost = x * x;
                  cost += (2 * x * xf) >> 8;
                }
            }
          } else {
            if (i+j == nwords) {
                /*
                 * Special case: if we're at the very end of the
                 * paragraph, we don't score penalty points for the
                 * white space left on the line.
                 */
                cost = 0;
            } else {
                cost = (thiswidth-linelen) * (thiswidth-linelen);
            }
          }

          /*
           * Add in the cost of wrapping all lines after this
           * point too.
           */
          if (i+j < nwords)
            cost += wrapwords[i+j].cost;

          /*
           * We compare bestcost >= cost, not bestcost > cost,
           * because in cases where the costs are identical we
           * want to try to look like the greedy algorithm,
           * because readers are likely to have spent a lot of
           * time looking at greedy-wrapped paragraphs and
           * there's no point violating the Principle of Least
           * Surprise if it doesn't actually gain anything.
           */
          if (best < 0 || bestcost >= cost) {
            bestcost = cost;
            best = j;
          }
      }
      /*
       * Now we know the optimal answer for this terminal
       * subsequence, so put it in wrapwords.
       */
      wrapwords[i].cost = bestcost;
      wrapwords[i].nwords = best;
    }

    /*
     * We've wrapped the paragraph. Now build the output
     * `wrappedline' list.
     */
    i = 0;
    while (i < nwords) {
      wrappedline *w = snew(wrappedline);
      *ptr = w;
      ptr = &w->next;
      w->next = NULL;

      n = wrapwords[i].nwords;
      w->begin = wrapwords[i].begin;
      w->end = wrapwords[i+n-1].end;

      /*
       * Count along the words to find nspaces and shortfall.
       */
      w->nspaces = 0;
      w->shortfall = width;
      for (j = 0; j < n; j++) {
          w->shortfall -= wrapwords[i+j].width;
          if (j < n-1 && wrapwords[i+j].spacewidth) {
            w->nspaces++;
            w->shortfall -= wrapwords[i+j].spacewidth;
          }
      }
      i += n;
    }

    sfree(wrapwords);

    return head;
}

void wrap_free(wrappedline *w) {
    while (w) {
      wrappedline *t = w->next;
      sfree(w);
      w = t;
    }
}

void cmdline_cfg_add(paragraph *cfg, char *string)
{
    wchar_t *ustring;
    int upos, ulen, pos, len;

    ulen = 0;
    while (cfg->keyword[ulen])
      ulen += 1 + ustrlen(cfg->keyword+ulen);
    len = 0;
    while (cfg->origkeyword[len])
      len += 1 + strlen(cfg->origkeyword+len);

    ustring = ufroma_locale_dup(string);

    upos = ulen;
    ulen += 2 + ustrlen(ustring);
    cfg->keyword = sresize(cfg->keyword, ulen, wchar_t);
    ustrcpy(cfg->keyword+upos, ustring);
    cfg->keyword[ulen-1] = L'\0';

    pos = len;
    len += 2 + strlen(string);
    cfg->origkeyword = sresize(cfg->origkeyword, len, char);
    strcpy(cfg->origkeyword+pos, string);
    cfg->origkeyword[len-1] = '\0';

    sfree(ustring);
}

paragraph *cmdline_cfg_new(void)
{
    paragraph *p;

    p = snew(paragraph);
    memset(p, 0, sizeof(*p));
    p->type = para_Config;
    p->next = NULL;
    p->fpos.filename = "<command line>";
    p->fpos.line = p->fpos.col = -1;
    p->keyword = ustrdup(L"\0");
    p->origkeyword = dupstr("\0");

    return p;
}

paragraph *cmdline_cfg_simple(char *string, ...)
{
    va_list ap;
    char *s;
    paragraph *p;

    p = cmdline_cfg_new();
    cmdline_cfg_add(p, string);

    va_start(ap, string);
    while ((s = va_arg(ap, char *)) != NULL)
      cmdline_cfg_add(p, s);
    va_end(ap);

    return p;
}

Generated by  Doxygen 1.6.0   Back to index