vim/src/regexp.c

/* vi:set ts=8 sts=4 sw=4 noet:
 *
 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
 */

// By default: do not create debugging logs or files related to regular
// expressions, even when compiling with -DDEBUG.
// Uncomment the second line to get the regexp debugging.
#undef DEBUG
// #define DEBUG

#include "vim.h"

#ifdef DEBUG
// show/save debugging data when BT engine is used
# define BT_REGEXP_DUMP
// save the debugging data to a file instead of displaying it
# define BT_REGEXP_LOG
# define BT_REGEXP_DEBUG_LOG
# define BT_REGEXP_DEBUG_LOG_NAME	"bt_regexp_debug.log"
#endif

#ifdef FEAT_RELTIME
static sig_atomic_t dummy_timeout_flag = 0;
static volatile sig_atomic_t *timeout_flag = &dummy_timeout_flag;
#endif

/*
 * Magic characters have a special meaning, they don't match literally.
 * Magic characters are negative.  This separates them from literal characters
 * (possibly multi-byte).  Only ASCII characters can be Magic.
 */
#define Magic(x)	((int)(x) - 256)
#define un_Magic(x)	((x) + 256)
#define is_Magic(x)	((x) < 0)

    static int
no_Magic(int x)
{
    if (is_Magic(x))
	return un_Magic(x);
    return x;
}

    static int
toggle_Magic(int x)
{
    if (is_Magic(x))
	return un_Magic(x);
    return Magic(x);
}

#ifdef FEAT_RELTIME
static int timeout_nesting = 0;

/*
 * Start a timer that will cause the regexp to abort after "msec".
 * This doesn't work well recursively.  In case it happens anyway, the first
 * set timeout will prevail, nested ones are ignored.
 * The caller must make sure there is a matching disable_regexp_timeout() call!
 */
    void
init_regexp_timeout(long msec)
{
    if (timeout_nesting == 0)
	timeout_flag = start_timeout(msec);
    ++timeout_nesting;
}

    void
disable_regexp_timeout(void)
{
    if (timeout_nesting == 0)
	iemsg("disable_regexp_timeout() called without active timer");
    else if (--timeout_nesting == 0)
    {
	stop_timeout();
	timeout_flag = &dummy_timeout_flag;
    }
}
#endif

#if defined(FEAT_EVAL) || defined(PROTO)
# ifdef FEAT_RELTIME
static sig_atomic_t *saved_timeout_flag;
# endif

/*
 * Used at the debug prompt: disable the timeout so that expression evaluation
 * can used patterns.
 * Must be followed by calling restore_timeout_for_debugging().
 */
    void
save_timeout_for_debugging(void)
{
# ifdef FEAT_RELTIME
    saved_timeout_flag = (sig_atomic_t *)timeout_flag;
    timeout_flag = &dummy_timeout_flag;
# endif
}

    void
restore_timeout_for_debugging(void)
{
# ifdef FEAT_RELTIME
    timeout_flag = saved_timeout_flag;
# endif
}
#endif

/*
 * The first byte of the BT regexp internal "program" is actually this magic
 * number; the start node begins in the second byte.  It's used to catch the
 * most severe mutilation of the program by the caller.
 */

#define REGMAGIC	0234

/*
 * Utility definitions.
 */
#define UCHARAT(p)	((int)*(char_u *)(p))

// Used for an error (down from) vim_regcomp(): give the error message, set
// rc_did_emsg and return NULL
#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_(e_invalid_item_in_str_brackets), reg_magic == MAGIC_ALL)


#define MAX_LIMIT	(32767L << 16L)

#define NOT_MULTI	0
#define MULTI_ONE	1
#define MULTI_MULT	2

// return values for regmatch()
#define RA_FAIL		1	// something failed, abort
#define RA_CONT		2	// continue in inner loop
#define RA_BREAK	3	// break inner loop
#define RA_MATCH	4	// successful match
#define RA_NOMATCH	5	// didn't match

/*
 * Return NOT_MULTI if c is not a "multi" operator.
 * Return MULTI_ONE if c is a single "multi" operator.
 * Return MULTI_MULT if c is a multi "multi" operator.
 */
    static int
re_multi_type(int c)
{
    if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
	return MULTI_ONE;
    if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
	return MULTI_MULT;
    return NOT_MULTI;
}

static char_u		*reg_prev_sub = NULL;
static size_t		reg_prev_sublen = 0;

/*
 * REGEXP_INRANGE contains all characters which are always special in a []
 * range after '\'.
 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
 * These are:
 *  \n	- New line (NL).
 *  \r	- Carriage Return (CR).
 *  \t	- Tab (TAB).
 *  \e	- Escape (ESC).
 *  \b	- Backspace (Ctrl_H).
 *  \d  - Character code in decimal, eg \d123
 *  \o	- Character code in octal, eg \o80
 *  \x	- Character code in hex, eg \x4a
 *  \u	- Multibyte character code, eg \u20ac
 *  \U	- Long multibyte character code, eg \U12345678
 */
static char_u REGEXP_INRANGE[] = "]^-n\\";
static char_u REGEXP_ABBR[] = "nrtebdoxuU";

/*
 * Translate '\x' to its control character, except "\n", which is Magic.
 */
    static int
backslash_trans(int c)
{
    switch (c)
    {
	case 'r':   return CAR;
	case 't':   return TAB;
	case 'e':   return ESC;
	case 'b':   return BS;
    }
    return c;
}

enum
{
    CLASS_ALNUM = 0,
    CLASS_ALPHA,
    CLASS_BLANK,
    CLASS_CNTRL,
    CLASS_DIGIT,
    CLASS_GRAPH,
    CLASS_LOWER,
    CLASS_PRINT,
    CLASS_PUNCT,
    CLASS_SPACE,
    CLASS_UPPER,
    CLASS_XDIGIT,
    CLASS_TAB,
    CLASS_RETURN,
    CLASS_BACKSPACE,
    CLASS_ESCAPE,
    CLASS_IDENT,
    CLASS_KEYWORD,
    CLASS_FNAME,
    CLASS_NONE = 99
};

/*
 * Check for a character class name "[:name:]".  "pp" points to the '['.
 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
 * recognized.  Otherwise "pp" is advanced to after the item.
 */
    static int
get_char_class(char_u **pp)
{
    // must be sorted by the 'value' field because it is used by bsearch()!
    static keyvalue_T char_class_tab[] =
    {
	KEYVALUE_ENTRY(CLASS_ALNUM, "alnum:]"),
	KEYVALUE_ENTRY(CLASS_ALPHA, "alpha:]"),
	KEYVALUE_ENTRY(CLASS_BACKSPACE, "backspace:]"),
	KEYVALUE_ENTRY(CLASS_BLANK, "blank:]"),
	KEYVALUE_ENTRY(CLASS_CNTRL, "cntrl:]"),
	KEYVALUE_ENTRY(CLASS_DIGIT, "digit:]"),
	KEYVALUE_ENTRY(CLASS_ESCAPE, "escape:]"),
	KEYVALUE_ENTRY(CLASS_FNAME, "fname:]"),
	KEYVALUE_ENTRY(CLASS_GRAPH, "graph:]"),
	KEYVALUE_ENTRY(CLASS_IDENT, "ident:]"),
	KEYVALUE_ENTRY(CLASS_KEYWORD, "keyword:]"),
	KEYVALUE_ENTRY(CLASS_LOWER, "lower:]"),
	KEYVALUE_ENTRY(CLASS_PRINT, "print:]"),
	KEYVALUE_ENTRY(CLASS_PUNCT, "punct:]"),
	KEYVALUE_ENTRY(CLASS_RETURN, "return:]"),
	KEYVALUE_ENTRY(CLASS_SPACE, "space:]"),
	KEYVALUE_ENTRY(CLASS_TAB, "tab:]"),
	KEYVALUE_ENTRY(CLASS_UPPER, "upper:]"),
	KEYVALUE_ENTRY(CLASS_XDIGIT, "xdigit:]")
    };

    // check that the value of "pp" has a chance of matching
    if ((*pp)[1] == ':' && ASCII_ISLOWER((*pp)[2])
			&& ASCII_ISLOWER((*pp)[3]) && ASCII_ISLOWER((*pp)[4]))
    {
	keyvalue_T target;
	keyvalue_T *entry;
	// this function can be called repeatedly with the same value for "pp"
	// so we cache the last found entry.
	static keyvalue_T *last_entry = NULL;

	target.key = 0;
	target.value.string = *pp + 2;
	target.value.length = 0;	// not used, see cmp_keyvalue_value_n()

	if (last_entry != NULL && cmp_keyvalue_value_n(&target, last_entry) == 0)
	    entry = last_entry;
	else
	    entry = (keyvalue_T *)bsearch(&target, &char_class_tab,
					ARRAY_LENGTH(char_class_tab),
					sizeof(char_class_tab[0]), cmp_keyvalue_value_n);
	if (entry != NULL)
	{
	    last_entry = entry;
	    *pp += entry->value.length + 2;
	    return entry->key;
	}
    }
    return CLASS_NONE;
}

/*
 * Specific version of character class functions.
 * Using a table to keep this fast.
 */
static short	class_tab[256];

#define	    RI_DIGIT	0x01
#define	    RI_HEX	0x02
#define	    RI_OCTAL	0x04
#define	    RI_WORD	0x08
#define	    RI_HEAD	0x10
#define	    RI_ALPHA	0x20
#define	    RI_LOWER	0x40
#define	    RI_UPPER	0x80
#define	    RI_WHITE	0x100

    static void
init_class_tab(void)
{
    int		i;
    static int	done = FALSE;

    if (done)
	return;

    for (i = 0; i < 256; ++i)
    {
	if (i >= '0' && i <= '7')
	    class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
	else if (i >= '8' && i <= '9')
	    class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
	else if (i >= 'a' && i <= 'f')
	    class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
	else if (i >= 'g' && i <= 'z')
	    class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
	else if (i >= 'A' && i <= 'F')
	    class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
	else if (i >= 'G' && i <= 'Z')
	    class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
	else if (i == '_')
	    class_tab[i] = RI_WORD + RI_HEAD;
	else
	    class_tab[i] = 0;
    }
    class_tab[' '] |= RI_WHITE;
    class_tab['\t'] |= RI_WHITE;
    done = TRUE;
}

#define ri_digit(c)	((c) < 0x100 && (class_tab[c] & RI_DIGIT))
#define ri_hex(c)	((c) < 0x100 && (class_tab[c] & RI_HEX))
#define ri_octal(c)	((c) < 0x100 && (class_tab[c] & RI_OCTAL))
#define ri_word(c)	((c) < 0x100 && (class_tab[c] & RI_WORD))
#define ri_head(c)	((c) < 0x100 && (class_tab[c] & RI_HEAD))
#define ri_alpha(c)	((c) < 0x100 && (class_tab[c] & RI_ALPHA))
#define ri_lower(c)	((c) < 0x100 && (class_tab[c] & RI_LOWER))
#define ri_upper(c)	((c) < 0x100 && (class_tab[c] & RI_UPPER))
#define ri_white(c)	((c) < 0x100 && (class_tab[c] & RI_WHITE))

// flags for regflags
#define RF_ICASE    1	// ignore case
#define RF_NOICASE  2	// don't ignore case
#define RF_HASNL    4	// can match a NL
#define RF_ICOMBINE 8	// ignore combining characters
#define RF_LOOKBH   16	// uses "\@<=" or "\@<!"

/*
 * Global work variables for vim_regcomp().
 */

static char_u	*regparse;	// Input-scan pointer.
static int	regnpar;	// () count.
static int	wants_nfa;	// regex should use NFA engine
#ifdef FEAT_SYN_HL
static int	regnzpar;	// \z() count.
static int	re_has_z;	// \z item detected
#endif
static unsigned	regflags;	// RF_ flags for prog
#if defined(FEAT_SYN_HL) || defined(PROTO)
static int	had_eol;	// TRUE when EOL found by vim_regcomp()
#endif

static magic_T	reg_magic;	// magicness of the pattern

static int	reg_string;	// matching with a string instead of a buffer
				// line
static int	reg_strict;	// "[abc" is illegal

/*
 * META contains all characters that may be magic, except '^' and '$'.
 */

// META[] is used often enough to justify turning it into a table.
static char_u META_flags[] = {
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
//		   %  &     (  )  *  +	      .
    0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
//     1  2  3	4  5  6  7  8  9	<  =  >  ?
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
//  @  A     C	D     F     H  I     K	L  M	 O
    1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
//  P	     S	   U  V  W  X	  Z  [		 _
    1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
//     a     c	d     f     h  i     k	l  m  n  o
    0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
//  p	     s	   u  v  w  x	  z  {	|     ~
    1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
};

static int	curchr;		// currently parsed character
// Previous character.  Note: prevchr is sometimes -1 when we are not at the
// start, eg in /[ ^I]^ the pattern was never found even if it existed,
// because ^ was taken to be magic -- webb
static int	prevchr;
static int	prevprevchr;	// previous-previous character
static int	nextchr;	// used for ungetchr()

// arguments for reg()
#define REG_NOPAREN	0	// toplevel reg()
#define REG_PAREN	1	// \(\)
#define REG_ZPAREN	2	// \z(\)
#define REG_NPAREN	3	// \%(\)

typedef struct
{
     char_u	*regparse;
     int	prevchr_len;
     int	curchr;
     int	prevchr;
     int	prevprevchr;
     int	nextchr;
     int	at_start;
     int	prev_at_start;
     int	regnpar;
} parse_state_T;

static void	initchr(char_u *);
static int	getchr(void);
static void	skipchr_keepstart(void);
static int	peekchr(void);
static void	skipchr(void);
static void	ungetchr(void);
static long	gethexchrs(int maxinputlen);
static long	getoctchrs(void);
static long	getdecchrs(void);
static int	coll_get_char(void);
static int	prog_magic_wrong(void);
static int	cstrncmp(char_u *s1, char_u *s2, int *n);
static char_u	*cstrchr(char_u *, int);
static int	re_mult_next(char *what);
static int	reg_iswordc(int);
#ifdef FEAT_EVAL
static void report_re_switch(char_u *pat);
#endif

static regengine_T bt_regengine;
static regengine_T nfa_regengine;

/*
 * Return TRUE if compiled regular expression "prog" can match a line break.
 */
    int
re_multiline(regprog_T *prog)
{
    return (prog->regflags & RF_HASNL);
}

/*
 * Check for an equivalence class name "[=a=]".  "pp" points to the '['.
 * Returns a character representing the class. Zero means that no item was
 * recognized.  Otherwise "pp" is advanced to after the item.
 */
    static int
get_equi_class(char_u **pp)
{
    int		c;
    int		l = 1;
    char_u	*p = *pp;

    if (p[1] == '=' && p[2] != NUL)
    {
	if (has_mbyte)
	    l = (*mb_ptr2len)(p + 2);
	if (p[l + 2] == '=' && p[l + 3] == ']')
	{
	    if (has_mbyte)
		c = mb_ptr2char(p + 2);
	    else
		c = p[2];
	    *pp += l + 4;
	    return c;
	}
    }
    return 0;
}

/*
 * Check for a collating element "[.a.]".  "pp" points to the '['.
 * Returns a character. Zero means that no item was recognized.  Otherwise
 * "pp" is advanced to after the item.
 * Currently only single characters are recognized!
 */
    static int
get_coll_element(char_u **pp)
{
    int		c;
    int		l = 1;
    char_u	*p = *pp;

    if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
    {
	if (has_mbyte)
	    l = (*mb_ptr2len)(p + 2);
	if (p[l + 2] == '.' && p[l + 3] == ']')
	{
	    if (has_mbyte)
		c = mb_ptr2char(p + 2);
	    else
		c = p[2];
	    *pp += l + 4;
	    return c;
	}
    }
    return 0;
}

static int reg_cpo_lit; // 'cpoptions' contains 'l' flag
static int reg_cpo_bsl; // 'cpoptions' contains '\' flag

    static void
get_cpo_flags(void)
{
    reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
    reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
}

/*
 * Skip over a "[]" range.
 * "p" must point to the character after the '['.
 * The returned pointer is on the matching ']', or the terminating NUL.
 */
    static char_u *
skip_anyof(char_u *p)
{
    int		l;

    if (*p == '^')	// Complement of range.
	++p;
    if (*p == ']' || *p == '-')
	++p;
    while (*p != NUL && *p != ']')
    {
	if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
	    p += l;
	else
	    if (*p == '-')
	    {
		++p;
		if (*p != ']' && *p != NUL)
		    MB_PTR_ADV(p);
	    }
	else if (*p == '\\'
		&& !reg_cpo_bsl
		&& (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
		    || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
	    p += 2;
	else if (*p == '[')
	{
	    if (get_char_class(&p) == CLASS_NONE
		    && get_equi_class(&p) == 0
		    && get_coll_element(&p) == 0
		    && *p != NUL)
		++p; // it is not a class name and not NUL
	}
	else
	    ++p;
    }

    return p;
}

/*
 * Skip past regular expression.
 * Stop at end of "startp" or where "delim" is found ('/', '?', etc).
 * Take care of characters with a backslash in front of it.
 * Skip strings inside [ and ].
 */
    char_u *
skip_regexp(
    char_u	*startp,
    int		delim,
    int		magic)
{
    return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
}

/*
 * Call skip_regexp() and when the delimiter does not match give an error and
 * return NULL.
 */
    char_u *
skip_regexp_err(
    char_u	*startp,
    int		delim,
    int		magic)
{
    char_u *p = skip_regexp(startp, delim, magic);

    if (*p != delim)
    {
	semsg(_(e_missing_delimiter_after_search_pattern_str), startp);
	return NULL;
    }
    return p;
}

/*
 * skip_regexp() with extra arguments:
 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
 * expression and change "\?" to "?".  If "*newp" is not NULL the expression
 * is changed in-place.
 * If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
 * If "magic_val" is not NULL, returns the effective magicness of the pattern
 */
    char_u *
skip_regexp_ex(
    char_u	*startp,
    int		dirc,
    int		magic,
    char_u	**newp,
    int		*dropped,
    magic_T	*magic_val)
{
    magic_T	mymagic;
    char_u	*p = startp;
    size_t	startplen = 0;

    if (magic)
	mymagic = MAGIC_ON;
    else
	mymagic = MAGIC_OFF;
    get_cpo_flags();

    for (; p[0] != NUL; MB_PTR_ADV(p))
    {
	if (p[0] == dirc)	// found end of regexp
	    break;
	if ((p[0] == '[' && mymagic >= MAGIC_ON)
		|| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
	{
	    p = skip_anyof(p + 1);
	    if (p[0] == NUL)
		break;
	}
	else if (p[0] == '\\' && p[1] != NUL)
	{
	    if (dirc == '?' && newp != NULL && p[1] == '?')
	    {
		// change "\?" to "?", make a copy first.
		if (startplen == 0)
		    startplen = STRLEN(startp);
		if (*newp == NULL)
		{
		    *newp = vim_strnsave(startp, startplen);
		    if (*newp != NULL)
		    {
			p = *newp + (p - startp);
			startp = *newp;
		    }
		}
		if (dropped != NULL)
		    ++*dropped;
		if (*newp != NULL)
		    mch_memmove(p, p + 1, startplen - ((p + 1) - startp) + 1);
		else
		    ++p;
	    }
	    else
		++p;    // skip next character
	    if (*p == 'v')
		mymagic = MAGIC_ALL;
	    else if (*p == 'V')
		mymagic = MAGIC_NONE;
	}
    }
    if (magic_val != NULL)
	*magic_val = mymagic;
    return p;
}

/*
 * Functions for getting characters from the regexp input.
 */
static int	prevchr_len;	// byte length of previous char
static int	at_start;	// True when on the first character
static int	prev_at_start;  // True when on the second character

/*
 * Start parsing at "str".
 */
    static void
initchr(char_u *str)
{
    regparse = str;
    prevchr_len = 0;
    curchr = prevprevchr = prevchr = nextchr = -1;
    at_start = TRUE;
    prev_at_start = FALSE;
}

/*
 * Save the current parse state, so that it can be restored and parsing
 * starts in the same state again.
 */
    static void
save_parse_state(parse_state_T *ps)
{
    ps->regparse = regparse;
    ps->prevchr_len = prevchr_len;
    ps->curchr = curchr;
    ps->prevchr = prevchr;
    ps->prevprevchr = prevprevchr;
    ps->nextchr = nextchr;
    ps->at_start = at_start;
    ps->prev_at_start = prev_at_start;
    ps->regnpar = regnpar;
}

/*
 * Restore a previously saved parse state.
 */
    static void
restore_parse_state(parse_state_T *ps)
{
    regparse = ps->regparse;
    prevchr_len = ps->prevchr_len;
    curchr = ps->curchr;
    prevchr = ps->prevchr;
    prevprevchr = ps->prevprevchr;
    nextchr = ps->nextchr;
    at_start = ps->at_start;
    prev_at_start = ps->prev_at_start;
    regnpar = ps->regnpar;
}


/*
 * Get the next character without advancing.
 */
    static int
peekchr(void)
{
    static int	after_slash = FALSE;

    if (curchr != -1)
	return curchr;

    switch (curchr = regparse[0])
    {
	case '.':
	case '[':
	case '~':
	    // magic when 'magic' is on
	    if (reg_magic >= MAGIC_ON)
		curchr = Magic(curchr);
	    break;
	case '(':
	case ')':
	case '{':
	case '%':
	case '+':
	case '=':
	case '?':
	case '@':
	case '!':
	case '&':
	case '|':
	case '<':
	case '>':
	case '#':	// future ext.
	case '"':	// future ext.
	case '\'':	// future ext.
	case ',':	// future ext.
	case '-':	// future ext.
	case ':':	// future ext.
	case ';':	// future ext.
	case '`':	// future ext.
	case '/':	// Can't be used in / command
			// magic only after "\v"
	    if (reg_magic == MAGIC_ALL)
		curchr = Magic(curchr);
	    break;
	case '*':
	    // * is not magic as the very first character, eg "?*ptr", when
	    // after '^', eg "/^*ptr" and when after "\(", "\|", "\&".  But
	    // "\(\*" is not magic, thus must be magic if "after_slash"
	    if (reg_magic >= MAGIC_ON
		    && !at_start
		    && !(prev_at_start && prevchr == Magic('^'))
		    && (after_slash
			|| (prevchr != Magic('(')
			    && prevchr != Magic('&')
			    && prevchr != Magic('|'))))
		curchr = Magic('*');
	    break;
	case '^':
	    // '^' is only magic as the very first character and if it's after
	    // "\(", "\|", "\&' or "\n"
	    if (reg_magic >= MAGIC_OFF
		    && (at_start
			|| reg_magic == MAGIC_ALL
			|| prevchr == Magic('(')
			|| prevchr == Magic('|')
			|| prevchr == Magic('&')
			|| prevchr == Magic('n')
			|| (no_Magic(prevchr) == '('
			    && prevprevchr == Magic('%'))))
	    {
		curchr = Magic('^');
		at_start = TRUE;
		prev_at_start = FALSE;
	    }
	    break;
	case '$':
	    // '$' is only magic as the very last char and if it's in front of
	    // either "\|", "\)", "\&", or "\n"
	    if (reg_magic >= MAGIC_OFF)
	    {
		char_u *p = regparse + 1;
		int is_magic_all = (reg_magic == MAGIC_ALL);

		// ignore \c \C \m \M \v \V and \Z after '$'
		while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
			    || p[1] == 'm' || p[1] == 'M'
			    || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
		{
		    if (p[1] == 'v')
			is_magic_all = TRUE;
		    else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
			is_magic_all = FALSE;
		    p += 2;
		}
		if (p[0] == NUL
			|| (p[0] == '\\'
			    && (p[1] == '|' || p[1] == '&' || p[1] == ')'
				|| p[1] == 'n'))
			|| (is_magic_all
			    && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
			|| reg_magic == MAGIC_ALL)
		    curchr = Magic('$');
	    }
	    break;
	case '\\':
	    {
		int c = regparse[1];

		if (c == NUL)
		    curchr = '\\';	// trailing '\'
		else if (c <= '~' && META_flags[c])
		{
		    /*
		     * META contains everything that may be magic sometimes,
		     * except ^ and $ ("\^" and "\$" are only magic after
		     * "\V").  We now fetch the next character and toggle its
		     * magicness.  Therefore, \ is so meta-magic that it is
		     * not in META.
		     */
		    curchr = -1;
		    prev_at_start = at_start;
		    at_start = FALSE;	// be able to say "/\*ptr"
		    ++regparse;
		    ++after_slash;
		    peekchr();
		    --regparse;
		    --after_slash;
		    curchr = toggle_Magic(curchr);
		}
		else if (vim_strchr(REGEXP_ABBR, c))
		{
		    /*
		     * Handle abbreviations, like "\t" for TAB -- webb
		     */
		    curchr = backslash_trans(c);
		}
		else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
		    curchr = toggle_Magic(c);
		else
		{
		    /*
		     * Next character can never be (made) magic?
		     * Then backslashing it won't do anything.
		     */
		    if (has_mbyte)
			curchr = (*mb_ptr2char)(regparse + 1);
		    else
			curchr = c;
		}
		break;
	    }

	default:
	    if (has_mbyte)
		curchr = (*mb_ptr2char)(regparse);
    }

    return curchr;
}

/*
 * Eat one lexed character.  Do this in a way that we can undo it.
 */
    static void
skipchr(void)
{
    // peekchr() eats a backslash, do the same here
    if (*regparse == '\\')
	prevchr_len = 1;
    else
	prevchr_len = 0;
    if (regparse[prevchr_len] != NUL)
    {
	if (enc_utf8)
	    // exclude composing chars that mb_ptr2len does include
	    prevchr_len += utf_ptr2len(regparse + prevchr_len);
	else if (has_mbyte)
	    prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
	else
	    ++prevchr_len;
    }
    regparse += prevchr_len;
    prev_at_start = at_start;
    at_start = FALSE;
    prevprevchr = prevchr;
    prevchr = curchr;
    curchr = nextchr;	    // use previously unget char, or -1
    nextchr = -1;
}

/*
 * Skip a character while keeping the value of prev_at_start for at_start.
 * prevchr and prevprevchr are also kept.
 */
    static void
skipchr_keepstart(void)
{
    int as = prev_at_start;
    int pr = prevchr;
    int prpr = prevprevchr;

    skipchr();
    at_start = as;
    prevchr = pr;
    prevprevchr = prpr;
}

/*
 * Get the next character from the pattern. We know about magic and such, so
 * therefore we need a lexical analyzer.
 */
    static int
getchr(void)
{
    int chr = peekchr();

    skipchr();
    return chr;
}

/*
 * put character back.  Works only once!
 */
    static void
ungetchr(void)
{
    nextchr = curchr;
    curchr = prevchr;
    prevchr = prevprevchr;
    at_start = prev_at_start;
    prev_at_start = FALSE;

    // Backup regparse, so that it's at the same position as before the
    // getchr().
    regparse -= prevchr_len;
}

/*
 * Get and return the value of the hex string at the current position.
 * Return -1 if there is no valid hex number.
 * The position is updated:
 *     blahblah\%x20asdf
 *	   before-^ ^-after
 * The parameter controls the maximum number of input characters. This will be
 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
 */
    static long
gethexchrs(int maxinputlen)
{
    long_u	nr = 0;
    int		c;
    int		i;

    for (i = 0; i < maxinputlen; ++i)
    {
	c = regparse[0];
	if (!vim_isxdigit(c))
	    break;
	nr <<= 4;
	nr |= hex2nr(c);
	++regparse;
    }

    if (i == 0)
	return -1;
    return (long)nr;
}

/*
 * Get and return the value of the decimal string immediately after the
 * current position. Return -1 for invalid.  Consumes all digits.
 */
    static long
getdecchrs(void)
{
    long_u	nr = 0;
    int		c;
    int		i;

    for (i = 0; ; ++i)
    {
	c = regparse[0];
	if (c < '0' || c > '9')
	    break;
	nr *= 10;
	nr += c - '0';
	++regparse;
	curchr = -1; // no longer valid
    }

    if (i == 0)
	return -1;
    return (long)nr;
}

/*
 * get and return the value of the octal string immediately after the current
 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
 * treat 8 or 9 as recognised characters. Position is updated:
 *     blahblah\%o210asdf
 *	   before-^  ^-after
 */
    static long
getoctchrs(void)
{
    long_u	nr = 0;
    int		c;
    int		i;

    for (i = 0; i < 3 && nr < 040; ++i)
    {
	c = regparse[0];
	if (c < '0' || c > '7')
	    break;
	nr <<= 3;
	nr |= hex2nr(c);
	++regparse;
    }

    if (i == 0)
	return -1;
    return (long)nr;
}

/*
 * read_limits - Read two integers to be taken as a minimum and maximum.
 * If the first character is '-', then the range is reversed.
 * Should end with 'end'.  If minval is missing, zero is default, if maxval is
 * missing, a very big number is the default.
 */
    static int
read_limits(long *minval, long *maxval)
{
    int		reverse = FALSE;
    char_u	*first_char;
    long	tmp;

    if (*regparse == '-')
    {
	// Starts with '-', so reverse the range later
	regparse++;
	reverse = TRUE;
    }
    first_char = regparse;
    *minval = getdigits(&regparse);
    if (*regparse == ',')	    // There is a comma
    {
	if (vim_isdigit(*++regparse))
	    *maxval = getdigits(&regparse);
	else
	    *maxval = MAX_LIMIT;
    }
    else if (VIM_ISDIGIT(*first_char))
	*maxval = *minval;	    // It was \{n} or \{-n}
    else
	*maxval = MAX_LIMIT;	    // It was \{} or \{-}
    if (*regparse == '\\')
	regparse++;	// Allow either \{...} or \{...\}
    if (*regparse != '}')
	EMSG2_RET_FAIL(_(e_syntax_error_in_str_curlies),
						       reg_magic == MAGIC_ALL);

    /*
     * Reverse the range if there was a '-', or make sure it is in the right
     * order otherwise.
     */
    if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
    {
	tmp = *minval;
	*minval = *maxval;
	*maxval = tmp;
    }
    skipchr();		// let's be friends with the lexer again
    return OK;
}

/*
 * vim_regexec and friends
 */

/*
 * Global work variables for vim_regexec().
 */

static void	cleanup_subexpr(void);
#ifdef FEAT_SYN_HL
static void	cleanup_zsubexpr(void);
#endif
static int	match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);

/*
 * Sometimes need to save a copy of a line.  Since alloc()/free() is very
 * slow, we keep one allocated piece of memory and only re-allocate it when
 * it's too small.  It's freed in bt_regexec_both() when finished.
 */
static char_u	*reg_tofree = NULL;
static unsigned	reg_tofreelen;

/*
 * Structure used to store the execution state of the regex engine.
 * Which ones are set depends on whether a single-line or multi-line match is
 * done:
 *			single-line		multi-line
 * reg_match		&regmatch_T		NULL
 * reg_mmatch		NULL			&regmmatch_T
 * reg_startp		reg_match->startp	<invalid>
 * reg_endp		reg_match->endp		<invalid>
 * reg_startpos		<invalid>		reg_mmatch->startpos
 * reg_endpos		<invalid>		reg_mmatch->endpos
 * reg_win		NULL			window in which to search
 * reg_buf		curbuf			buffer in which to search
 * reg_firstlnum	<invalid>		first line in which to search
 * reg_maxline		0			last line nr
 * reg_line_lbr		FALSE or TRUE		FALSE
 */
typedef struct {
    regmatch_T		*reg_match;
    regmmatch_T		*reg_mmatch;

    char_u		**reg_startp;
    char_u		**reg_endp;
    lpos_T		*reg_startpos;
    lpos_T		*reg_endpos;

    win_T		*reg_win;
    buf_T		*reg_buf;
    linenr_T		reg_firstlnum;
    linenr_T		reg_maxline;
    int			reg_line_lbr;	// "\n" in string is line break

    // The current match-position is stord in these variables:
    linenr_T	lnum;		// line number, relative to first line
    char_u	*line;		// start of current line
    char_u	*input;		// current input, points into "line"

    int	need_clear_subexpr;	// subexpressions still need to be cleared
#ifdef FEAT_SYN_HL
    int	need_clear_zsubexpr;	// extmatch subexpressions still need to be
				// cleared
#endif

    // Internal copy of 'ignorecase'.  It is set at each call to vim_regexec().
    // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
    // contains '\c' or '\C' the value is overruled.
    int			reg_ic;

    // Similar to "reg_ic", but only for 'combining' characters.  Set with \Z
    // flag in the regexp.  Defaults to false, always.
    int			reg_icombine;

    // Copy of "rmm_maxcol": maximum column to search for a match.  Zero when
    // there is no maximum.
    colnr_T		reg_maxcol;

    // State for the NFA engine regexec.
    int nfa_has_zend;	    // NFA regexp \ze operator encountered.
    int nfa_has_backref;    // NFA regexp \1 .. \9 encountered.
    int nfa_nsubexpr;	    // Number of sub expressions actually being used
			    // during execution. 1 if only the whole match
			    // (subexpr 0) is used.
    // listid is global, so that it increases on recursive calls to
    // nfa_regmatch(), which means we don't have to clear the lastlist field of
    // all the states.
    int nfa_listid;
    int nfa_alt_listid;

#ifdef FEAT_SYN_HL
    int nfa_has_zsubexpr;   // NFA regexp has \z( ), set zsubexpr.
#endif
} regexec_T;

static regexec_T	rex;
static int		rex_in_use = FALSE;

/*
 * Return TRUE if character 'c' is included in 'iskeyword' option for
 * "reg_buf" buffer.
 */
    static int
reg_iswordc(int c)
{
    return vim_iswordc_buf(c, rex.reg_buf);
}

#ifdef FEAT_EVAL
static int can_f_submatch = FALSE;	// TRUE when submatch() can be used

// This struct is used for reg_submatch(). Needed for when the
// substitution string is an expression that contains a call to substitute()
// and submatch().
typedef struct {
    regmatch_T	*sm_match;
    regmmatch_T	*sm_mmatch;
    linenr_T	sm_firstlnum;
    linenr_T	sm_maxline;
    int		sm_line_lbr;
} regsubmatch_T;

static regsubmatch_T rsm;  // can only be used when can_f_submatch is TRUE
#endif

typedef enum
{
    RGLF_LINE = 0x01,
    RGLF_LENGTH = 0x02
#ifdef FEAT_EVAL
    ,
    RGLF_SUBMATCH = 0x04
#endif
} reg_getline_flags_T;

//
// common code for reg_getline(), reg_getline_len(), reg_getline_submatch() and
// reg_getline_submatch_len().
// the flags argument (which is a bitmask) controls what info is to be returned and whether
// or not submatch is in effect.
// note:
//     submatch is available only if FEAT_EVAL is defined.
    static void
reg_getline_common(linenr_T lnum, reg_getline_flags_T flags, char_u **line, colnr_T *length)
{
    int get_line = flags & RGLF_LINE;
    int get_length = flags & RGLF_LENGTH;
    linenr_T firstlnum;
    linenr_T maxline;

#ifdef FEAT_EVAL
    if (flags & RGLF_SUBMATCH)
    {
	firstlnum = rsm.sm_firstlnum + lnum;
	maxline = rsm.sm_maxline;
    }
    else
#endif
    {
	firstlnum = rex.reg_firstlnum + lnum;
	maxline = rex.reg_maxline;
    }

    // when looking behind for a match/no-match lnum is negative. but we
    // can't go before line 1.
    if (firstlnum < 1)
    {
	if (get_line)
	    *line = NULL;
	if (get_length)
	    *length = 0;

	return;
    }

    if (lnum > maxline)
    {
	// must have matched the "\n" in the last line.
	if (get_line)
	    *line = (char_u *)"";
	if (get_length)
	    *length = 0;

	return;
    }

    if (get_line)
	*line = ml_get_buf(rex.reg_buf, firstlnum, FALSE);
    if (get_length)
	*length = ml_get_buf_len(rex.reg_buf, firstlnum);
}

/*
 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
 */
    static char_u *
reg_getline(linenr_T lnum)
{
    char_u *line;

    reg_getline_common(lnum, RGLF_LINE, &line, NULL);

    return line;
}

/*
 * Get length of line "lnum", which is relative to "reg_firstlnum".
 */
    static colnr_T
reg_getline_len(linenr_T lnum)
{
    colnr_T length;

    reg_getline_common(lnum, RGLF_LENGTH, NULL, &length);

    return length;
}

#ifdef FEAT_SYN_HL
static char_u	*reg_startzp[NSUBEXP];	// Workspace to mark beginning
static char_u	*reg_endzp[NSUBEXP];	//   and end of \z(...\) matches
static lpos_T	reg_startzpos[NSUBEXP];	// idem, beginning pos
static lpos_T	reg_endzpos[NSUBEXP];	// idem, end pos
#endif

// TRUE if using multi-line regexp.
#define REG_MULTI	(rex.reg_match == NULL)

#ifdef FEAT_SYN_HL
/*
 * Create a new extmatch and mark it as referenced once.
 */
    static reg_extmatch_T *
make_extmatch(void)
{
    reg_extmatch_T	*em;

    em = ALLOC_CLEAR_ONE(reg_extmatch_T);
    if (em != NULL)
	em->refcnt = 1;
    return em;
}

/*
 * Add a reference to an extmatch.
 */
    reg_extmatch_T *
ref_extmatch(reg_extmatch_T *em)
{
    if (em != NULL)
	em->refcnt++;
    return em;
}

/*
 * Remove a reference to an extmatch.  If there are no references left, free
 * the info.
 */
    void
unref_extmatch(reg_extmatch_T *em)
{
    int i;

    if (em != NULL && --em->refcnt <= 0)
    {
	for (i = 0; i < NSUBEXP; ++i)
	    vim_free(em->matches[i]);
	vim_free(em);
    }
}
#endif

/*
 * Get class of previous character.
 */
    static int
reg_prev_class(void)
{
    if (rex.input > rex.line)
	return mb_get_class_buf(rex.input - 1
		       - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
    return -1;
}

/*
 * Return TRUE if the current rex.input position matches the Visual area.
 */
    static int
reg_match_visual(void)
{
    pos_T	top, bot;
    linenr_T    lnum;
    colnr_T	col;
    win_T	*wp = rex.reg_win == NULL ? curwin : rex.reg_win;
    int		mode;
    colnr_T	start, end;
    colnr_T	start2, end2;
    colnr_T	cols;
    colnr_T	curswant;

    // Check if the buffer is the current buffer and not using a string.
    if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI)
	return FALSE;

    if (VIsual_active)
    {
	if (LT_POS(VIsual, wp->w_cursor))
	{
	    top = VIsual;
	    bot = wp->w_cursor;
	}
	else
	{
	    top = wp->w_cursor;
	    bot = VIsual;
	}
	mode = VIsual_mode;
	curswant = wp->w_curswant;
    }
    else
    {
	if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
	{
	    top = curbuf->b_visual.vi_start;
	    bot = curbuf->b_visual.vi_end;
	}
	else
	{
	    top = curbuf->b_visual.vi_end;
	    bot = curbuf->b_visual.vi_start;
	}
	// a substitute command may have removed some lines
	if (bot.lnum > curbuf->b_ml.ml_line_count)
	    bot.lnum = curbuf->b_ml.ml_line_count;
	mode = curbuf->b_visual.vi_mode;
	curswant = curbuf->b_visual.vi_curswant;
    }
    lnum = rex.lnum + rex.reg_firstlnum;
    if (lnum < top.lnum || lnum > bot.lnum)
	return FALSE;

    col = (colnr_T)(rex.input - rex.line);
    if (mode == 'v')
    {
	if ((lnum == top.lnum && col < top.col)
		|| (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
	    return FALSE;
    }
    else if (mode == Ctrl_V)
    {
	getvvcol(wp, &top, &start, NULL, &end);
	getvvcol(wp, &bot, &start2, NULL, &end2);
	if (start2 < start)
	    start = start2;
	if (end2 > end)
	    end = end2;
	if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL)
	    end = MAXCOL;

	// getvvcol() flushes rex.line, need to get it again
	rex.line = reg_getline(rex.lnum);
	rex.input = rex.line + col;

	cols = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, rex.line, col);
	if (cols < start || cols > end - (*p_sel == 'e'))
	    return FALSE;
    }
    return TRUE;
}

/*
 * Check the regexp program for its magic number.
 * Return TRUE if it's wrong.
 */
    static int
prog_magic_wrong(void)
{
    regprog_T	*prog;

    prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
    if (prog->engine == &nfa_regengine)
	// For NFA matcher we don't check the magic
	return FALSE;

    if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
    {
	iemsg(e_corrupted_regexp_program);
	return TRUE;
    }
    return FALSE;
}

/*
 * Cleanup the subexpressions, if this wasn't done yet.
 * This construction is used to clear the subexpressions only when they are
 * used (to increase speed).
 */
    static void
cleanup_subexpr(void)
{
    if (!rex.need_clear_subexpr)
	return;

    if (REG_MULTI)
    {
	// Use 0xff to set lnum to -1
	vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
	vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
    }
    else
    {
	vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
	vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
    }
    rex.need_clear_subexpr = FALSE;
}

#ifdef FEAT_SYN_HL
    static void
cleanup_zsubexpr(void)
{
    if (!rex.need_clear_zsubexpr)
	return;

    if (REG_MULTI)
    {
	// Use 0xff to set lnum to -1
	vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
	vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
    }
    else
    {
	vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
	vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
    }
    rex.need_clear_zsubexpr = FALSE;
}
#endif

/*
 * Advance rex.lnum, rex.line and rex.input to the next line.
 */
    static void
reg_nextline(void)
{
    rex.line = reg_getline(++rex.lnum);
    rex.input = rex.line;
    fast_breakcheck();
}

/*
 * Check whether a backreference matches.
 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
 * If "bytelen" is not NULL, it is set to the byte length of the match in the
 * last line.
 */
    static int
match_with_backref(
    linenr_T start_lnum,
    colnr_T  start_col,
    linenr_T end_lnum,
    colnr_T  end_col,
    int	     *bytelen)
{
    linenr_T	clnum = start_lnum;
    colnr_T	ccol = start_col;
    int		len;
    char_u	*p;

    if (bytelen != NULL)
	*bytelen = 0;
    for (;;)
    {
	// Since getting one line may invalidate the other, need to make copy.
	// Slow!
	if (rex.line != reg_tofree)
	{
	    len = (int)STRLEN(rex.line);
	    if (reg_tofree == NULL || len >= (int)reg_tofreelen)
	    {
		len += 50;	// get some extra
		vim_free(reg_tofree);
		reg_tofree = alloc(len);
		if (reg_tofree == NULL)
		    return RA_FAIL; // out of memory!
		reg_tofreelen = len;
	    }
	    STRCPY(reg_tofree, rex.line);
	    rex.input = reg_tofree + (rex.input - rex.line);
	    rex.line = reg_tofree;
	}

	// Get the line to compare with.
	p = reg_getline(clnum);
	if (clnum == end_lnum)
	    len = end_col - ccol;
	else
	    len = (int)reg_getline_len(clnum) - ccol;

	if (cstrncmp(p + ccol, rex.input, &len) != 0)
	    return RA_NOMATCH;  // doesn't match
	if (bytelen != NULL)
	    *bytelen += len;
	if (clnum == end_lnum)
	    break;		// match and at end!
	if (rex.lnum >= rex.reg_maxline)
	    return RA_NOMATCH;  // text too short

	// Advance to next line.
	reg_nextline();
	if (bytelen != NULL)
	    *bytelen = 0;
	++clnum;
	ccol = 0;
	if (got_int)
	    return RA_FAIL;
    }

    // found a match!  Note that rex.line may now point to a copy of the line,
    // that should not matter.
    return RA_MATCH;
}

/*
 * Used in a place where no * or \+ can follow.
 */
    static int
re_mult_next(char *what)
{
    if (re_multi_type(peekchr()) == MULTI_MULT)
    {
       semsg(_(e_nfa_regexp_cannot_repeat_str), what);
       rc_did_emsg = TRUE;
       return FAIL;
    }
    return OK;
}

typedef struct
{
    int a, b, c;
} decomp_T;


// 0xfb20 - 0xfb4f
static decomp_T decomp_table[0xfb4f-0xfb20+1] =
{
    {0x5e2,0,0},		// 0xfb20	alt ayin
    {0x5d0,0,0},		// 0xfb21	alt alef
    {0x5d3,0,0},		// 0xfb22	alt dalet
    {0x5d4,0,0},		// 0xfb23	alt he
    {0x5db,0,0},		// 0xfb24	alt kaf
    {0x5dc,0,0},		// 0xfb25	alt lamed
    {0x5dd,0,0},		// 0xfb26	alt mem-sofit
    {0x5e8,0,0},		// 0xfb27	alt resh
    {0x5ea,0,0},		// 0xfb28	alt tav
    {'+', 0, 0},		// 0xfb29	alt plus
    {0x5e9, 0x5c1, 0},		// 0xfb2a	shin+shin-dot
    {0x5e9, 0x5c2, 0},		// 0xfb2b	shin+sin-dot
    {0x5e9, 0x5c1, 0x5bc},	// 0xfb2c	shin+shin-dot+dagesh
    {0x5e9, 0x5c2, 0x5bc},	// 0xfb2d	shin+sin-dot+dagesh
    {0x5d0, 0x5b7, 0},		// 0xfb2e	alef+patah
    {0x5d0, 0x5b8, 0},		// 0xfb2f	alef+qamats
    {0x5d0, 0x5b4, 0},		// 0xfb30	alef+hiriq
    {0x5d1, 0x5bc, 0},		// 0xfb31	bet+dagesh
    {0x5d2, 0x5bc, 0},		// 0xfb32	gimel+dagesh
    {0x5d3, 0x5bc, 0},		// 0xfb33	dalet+dagesh
    {0x5d4, 0x5bc, 0},		// 0xfb34	he+dagesh
    {0x5d5, 0x5bc, 0},		// 0xfb35	vav+dagesh
    {0x5d6, 0x5bc, 0},		// 0xfb36	zayin+dagesh
    {0xfb37, 0, 0},		// 0xfb37 -- UNUSED
    {0x5d8, 0x5bc, 0},		// 0xfb38	tet+dagesh
    {0x5d9, 0x5bc, 0},		// 0xfb39	yud+dagesh
    {0x5da, 0x5bc, 0},		// 0xfb3a	kaf sofit+dagesh
    {0x5db, 0x5bc, 0},		// 0xfb3b	kaf+dagesh
    {0x5dc, 0x5bc, 0},		// 0xfb3c	lamed+dagesh
    {0xfb3d, 0, 0},		// 0xfb3d -- UNUSED
    {0x5de, 0x5bc, 0},		// 0xfb3e	mem+dagesh
    {0xfb3f, 0, 0},		// 0xfb3f -- UNUSED
    {0x5e0, 0x5bc, 0},		// 0xfb40	nun+dagesh
    {0x5e1, 0x5bc, 0},		// 0xfb41	samech+dagesh
    {0xfb42, 0, 0},		// 0xfb42 -- UNUSED
    {0x5e3, 0x5bc, 0},		// 0xfb43	pe sofit+dagesh
    {0x5e4, 0x5bc,0},		// 0xfb44	pe+dagesh
    {0xfb45, 0, 0},		// 0xfb45 -- UNUSED
    {0x5e6, 0x5bc, 0},		// 0xfb46	tsadi+dagesh
    {0x5e7, 0x5bc, 0},		// 0xfb47	qof+dagesh
    {0x5e8, 0x5bc, 0},		// 0xfb48	resh+dagesh
    {0x5e9, 0x5bc, 0},		// 0xfb49	shin+dagesh
    {0x5ea, 0x5bc, 0},		// 0xfb4a	tav+dagesh
    {0x5d5, 0x5b9, 0},		// 0xfb4b	vav+holam
    {0x5d1, 0x5bf, 0},		// 0xfb4c	bet+rafe
    {0x5db, 0x5bf, 0},		// 0xfb4d	kaf+rafe
    {0x5e4, 0x5bf, 0},		// 0xfb4e	pe+rafe
    {0x5d0, 0x5dc, 0}		// 0xfb4f	alef-lamed
};

    static void
mb_decompose(int c, int *c1, int *c2, int *c3)
{
    decomp_T d;

    if (c >= 0xfb20 && c <= 0xfb4f)
    {
	d = decomp_table[c - 0xfb20];
	*c1 = d.a;
	*c2 = d.b;
	*c3 = d.c;
    }
    else
    {
	*c1 = c;
	*c2 = *c3 = 0;
    }
}

/*
 * Compare two strings, ignore case if rex.reg_ic set.
 * Return 0 if strings match, non-zero otherwise.
 * Correct the length "*n" when composing characters are ignored
 * or for utf8 when both utf codepoints are considered equal because of
 * case-folding but have different length (e.g. 's' and 'ſ')
 */
    static int
cstrncmp(char_u *s1, char_u *s2, int *n)
{
    int		result;

    if (!rex.reg_ic)
	result = STRNCMP(s1, s2, *n);
    else if (enc_utf8)
    {
	char_u *p = s1;
	int n2 = 0;
	int n1 = *n;
	// count the number of characters for byte-length of s1
	while (n1 > 0 && *p != NUL)
	{
	    n1 -= mb_ptr2len(s1);
	    MB_PTR_ADV(p);
	    n2++;
	}
	// count the number of bytes to advance the same number of chars for s2
	p = s2;
	while (n2-- > 0 && *p != NUL)
	    MB_PTR_ADV(p);

	n2 = p - s2;

	result = MB_STRNICMP2(s1, s2, *n, n2);
	if (result == 0 && n2 < *n)
	    *n = n2;
    }
    else
	result = MB_STRNICMP(s1, s2, *n);

    // if it failed and it's utf8 and we want to combineignore:
    if (result != 0 && enc_utf8 && rex.reg_icombine)
    {
	char_u	*str1, *str2;
	int	c1, c2, c11, c12;
	int	junk;

	// we have to handle the strcmp ourselves, since it is necessary to
	// deal with the composing characters by ignoring them:
	str1 = s1;
	str2 = s2;
	c1 = c2 = 0;
	while ((int)(str1 - s1) < *n)
	{
	    c1 = mb_ptr2char_adv(&str1);
	    c2 = mb_ptr2char_adv(&str2);

	    // Decompose the character if necessary, into 'base' characters.
	    // Currently hard-coded for Hebrew, Arabic to be done...
	    if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
	    {
		// decomposition necessary?
		mb_decompose(c1, &c11, &junk, &junk);
		mb_decompose(c2, &c12, &junk, &junk);
		c1 = c11;
		c2 = c12;
		if (c11 != c12
			    && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
		    break;
	    }
	}
	result = c2 - c1;
	if (result == 0)
	    *n = (int)(str2 - s2);
    }

    return result;
}

/*
 * cstrchr: This function is used a lot for simple searches, keep it fast!
 */
    static char_u *
cstrchr(char_u *s, int c)
{
    char_u	*p;
    int		cc, lc;

    if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
	return vim_strchr(s, c);

    // tolower() and toupper() can be slow, comparing twice should be a lot
    // faster (esp. when using MS Visual C++!).
    // For UTF-8 need to use folded case.
    if (enc_utf8 && c > 0x80)
    {
	cc = utf_fold(c);
	lc = cc;
    }
    else
	if (MB_ISUPPER(c))
	{
	    cc = MB_TOLOWER(c);
	    lc = cc;
	}
	else if (MB_ISLOWER(c))
	{
	    cc = MB_TOUPPER(c);
	    lc = c;
	}
	else
	    return vim_strchr(s, c);

    if (has_mbyte)
    {
	for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
	{
	    int uc = utf_ptr2char(p);
	    if (enc_utf8 && (c > 0x80 || uc > 0x80))
	    {
		// Do not match an illegal byte.  E.g. 0xff matches 0xc3 0xbf,
		// not 0xff.
		// compare with lower case of the character
		if ((uc < 0x80 || uc != *p) && utf_fold(uc) == lc)
		    return p;
	    }
	    else if (*p == c || *p == cc)
		return p;
	}
    }
    else
	// Faster version for when there are no multi-byte characters.
	for (p = s; *p != NUL; ++p)
	    if (*p == c || *p == cc)
		return p;

    return NULL;
}

////////////////////////////////////////////////////////////////
//		      regsub stuff			      //
////////////////////////////////////////////////////////////////

typedef void (*fptr_T)(int *, int);

static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);

    static void
do_upper(int *d, int c)
{
    *d = MB_TOUPPER(c);
}

    static void
do_lower(int *d, int c)
{
    *d = MB_TOLOWER(c);
}

/*
 * regtilde(): Replace tildes in the pattern by the old pattern.
 *
 * Short explanation of the tilde: It stands for the previous replacement
 * pattern.  If that previous pattern also contains a ~ we should go back a
 * step further...  But we insert the previous pattern into the current one
 * and remember that.
 * This still does not handle the case where "magic" changes.  So require the
 * user to keep his hands off of "magic".
 *
 * The tildes are parsed once before the first call to vim_regsub().
 */
    char_u *
regtilde(char_u *source, int magic)
{
    char_u	*newsub = source;
    char_u	*p;
    size_t	newsublen = 0;
    char_u	tilde[3] = {'~', NUL, NUL};
    size_t	tildelen = 1;
    int		error = FALSE;

    if (!magic)
    {
	tilde[0] = '\\';
	tilde[1] = '~';
	tilde[2] = NUL;
	tildelen = 2;
    }

    for (p = newsub; *p; ++p)
    {
	if (STRNCMP(p, tilde, tildelen) == 0)
	{
	    size_t prefixlen = p - newsub;		// not including the tilde
	    char_u *postfix = p + tildelen;
	    size_t postfixlen;
	    size_t tmpsublen;

	    if (newsublen == 0)
		newsublen = STRLEN(newsub);
	    newsublen -= tildelen;
	    postfixlen = newsublen - prefixlen;
	    tmpsublen = prefixlen + reg_prev_sublen + postfixlen;

	    if (tmpsublen > 0 && reg_prev_sub != NULL)
	    {
		char_u *tmpsub;

		// Avoid making the text longer than MAXCOL, it will cause
		// trouble at some point.
		if (tmpsublen > MAXCOL)
		{
		    emsg(_(e_resulting_text_too_long));
		    error = TRUE;
		    break;
		}

		tmpsub = alloc(tmpsublen + 1);
		if (tmpsub == NULL)
		{
		    emsg(_(e_out_of_memory));
		    error = TRUE;
		    break;
		}

		// copy prefix
		mch_memmove(tmpsub, newsub, prefixlen);
		// interpret tilde
		mch_memmove(tmpsub + prefixlen, reg_prev_sub, reg_prev_sublen);
		// copy postfix
		STRCPY(tmpsub + prefixlen + reg_prev_sublen, postfix);

		if (newsub != source)	// allocated newsub before
		    vim_free(newsub);
		newsub = tmpsub;
		newsublen = tmpsublen;
		p = newsub + prefixlen + reg_prev_sublen;
	    }
	    else
		mch_memmove(p, postfix, postfixlen + 1);	// remove the tilde (+1 for the NUL)

	    --p;
	}
	else
	{
	    if (*p == '\\' && p[1])		// skip escaped characters
		++p;
	    if (has_mbyte)
		p += (*mb_ptr2len)(p) - 1;
	}
    }

    if (error)
    {
	if (newsub != source)
	    vim_free(newsub);
	return source;
    }

    // Store a copy of newsub  in reg_prev_sub.  It is always allocated,
    // because recursive calls may make the returned string invalid.
    // Only store it if there something to store.
    newsublen = p - newsub;
    if (newsublen == 0)
	VIM_CLEAR(reg_prev_sub);
    else
    {
	vim_free(reg_prev_sub);
	reg_prev_sub = vim_strnsave(newsub, newsublen);
    }

    if (reg_prev_sub == NULL)
	reg_prev_sublen = 0;
    else
	reg_prev_sublen = newsublen;

    return newsub;
}

#ifdef FEAT_EVAL

/*
 * Put the submatches in "argv[argskip]" which is a list passed into
 * call_func() by vim_regsub_both().
 */
    static int
fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, ufunc_T *fp)
{
    listitem_T	*li;
    int		i;
    char_u	*s;
    typval_T	*listarg = argv + argskip;

    if (!has_varargs(fp) && fp->uf_args.ga_len <= argskip)
	// called function doesn't take a submatches argument
	return argskip;

    // Relies on sl_list to be the first item in staticList10_T.
    init_static_list((staticList10_T *)(listarg->vval.v_list));

    // There are always 10 list items in staticList10_T.
    li = listarg->vval.v_list->lv_first;
    for (i = 0; i < 10; ++i)
    {
	s = rsm.sm_match->startp[i];
	if (s == NULL || rsm.sm_match->endp[i] == NULL)
	    s = NULL;
	else
	    s = vim_strnsave(s, rsm.sm_match->endp[i] - s);
	li->li_tv.v_type = VAR_STRING;
	li->li_tv.vval.v_string = s;
	li = li->li_next;
    }
    return argskip + 1;
}

    static void
clear_submatch_list(staticList10_T *sl)
{
    int i;

    for (i = 0; i < 10; ++i)
	vim_free(sl->sl_items[i].li_tv.vval.v_string);
}
#endif

/*
 * vim_regsub() - perform substitutions after a vim_regexec() or
 * vim_regexec_multi() match.
 *
 * If "flags" has REGSUB_COPY really copy into "dest[destlen]".
 * Otherwise nothing is copied, only compute the length of the result.
 *
 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
 *
 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
 * double them to keep them, and insert a backslash before a CR to avoid it
 * being replaced with a line break later.
 *
 * Note: The matched text must not change between the call of
 * vim_regexec()/vim_regexec_multi() and vim_regsub()!  It would make the back
 * references invalid!
 *
 * Returns the size of the replacement, including terminating NUL.
 */
    int
vim_regsub(
    regmatch_T	*rmp,
    char_u	*source,
    typval_T	*expr,
    char_u	*dest,
    int		destlen,
    int		flags)
{
    int		result;
    regexec_T	rex_save;
    int		rex_in_use_save = rex_in_use;

    if (rex_in_use)
	// Being called recursively, save the state.
	rex_save = rex;
    rex_in_use = TRUE;

    rex.reg_match = rmp;
    rex.reg_mmatch = NULL;
    rex.reg_maxline = 0;
    rex.reg_buf = curbuf;
    rex.reg_line_lbr = TRUE;
    result = vim_regsub_both(source, expr, dest, destlen, flags);

    rex_in_use = rex_in_use_save;
    if (rex_in_use)
	rex = rex_save;

    return result;
}

    int
vim_regsub_multi(
    regmmatch_T	*rmp,
    linenr_T	lnum,
    char_u	*source,
    char_u	*dest,
    int		destlen,
    int		flags)
{
    int		result;
    regexec_T	rex_save;
    int		rex_in_use_save = rex_in_use;

    if (rex_in_use)
	// Being called recursively, save the state.
	rex_save = rex;
    rex_in_use = TRUE;

    rex.reg_match = NULL;
    rex.reg_mmatch = rmp;
    rex.reg_buf = curbuf;	// always works on the current buffer!
    rex.reg_firstlnum = lnum;
    rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
    rex.reg_line_lbr = FALSE;
    result = vim_regsub_both(source, NULL, dest, destlen, flags);

    rex_in_use = rex_in_use_save;
    if (rex_in_use)
	rex = rex_save;

    return result;
}

#if defined(FEAT_EVAL) || defined(PROTO)
// When nesting more than a couple levels it's probably a mistake.
# define MAX_REGSUB_NESTING 4
static char_u   *eval_result[MAX_REGSUB_NESTING] = {NULL, NULL, NULL, NULL};

# if defined(EXITFREE) || defined(PROTO)
    void
free_resub_eval_result(void)
{
    int i;

    for (i = 0; i < MAX_REGSUB_NESTING; ++i)
	VIM_CLEAR(eval_result[i]);
}
# endif
#endif

    static int
vim_regsub_both(
    char_u	*source,
    typval_T	*expr,
    char_u	*dest,
    int		destlen,
    int		flags)
{
    char_u	*src;
    char_u	*dst;
    char_u	*s;
    int		c;
    int		cc;
    int		no = -1;
    fptr_T	func_all = (fptr_T)NULL;
    fptr_T	func_one = (fptr_T)NULL;
    linenr_T	clnum = 0;	// init for GCC
    int		len = 0;	// init for GCC
#ifdef FEAT_EVAL
    static int  nesting = 0;
    int		nested;
#endif
    int		copy = flags & REGSUB_COPY;

    // Be paranoid...
    if ((source == NULL && expr == NULL) || dest == NULL)
    {
	iemsg(e_null_argument);
	return 0;
    }
    if (prog_magic_wrong())
	return 0;
#ifdef FEAT_EVAL
    if (nesting == MAX_REGSUB_NESTING)
    {
	emsg(_(e_substitute_nesting_too_deep));
	return 0;
    }
    nested = nesting;
#endif
    src = source;
    dst = dest;

    /*
     * When the substitute part starts with "\=" evaluate it as an expression.
     */
    if (expr != NULL || (source[0] == '\\' && source[1] == '='))
    {
#ifdef FEAT_EVAL
	// To make sure that the length doesn't change between checking the
	// length and copying the string, and to speed up things, the
	// resulting string is saved from the call with
	// "flags & REGSUB_COPY" == 0 to the call with
	// "flags & REGSUB_COPY" != 0.
	if (copy)
	{
	    if (eval_result[nested] != NULL)
	    {
		int eval_len = (int)STRLEN(eval_result[nested]);

		if (eval_len < destlen)
		{
		    STRCPY(dest, eval_result[nested]);
		    dst += eval_len;
		    VIM_CLEAR(eval_result[nested]);
		}
	    }
	}
	else
	{
	    int		    prev_can_f_submatch = can_f_submatch;
	    regsubmatch_T   rsm_save;

	    VIM_CLEAR(eval_result[nested]);

	    // The expression may contain substitute(), which calls us
	    // recursively.  Make sure submatch() gets the text from the first
	    // level.
	    if (can_f_submatch)
		rsm_save = rsm;
	    can_f_submatch = TRUE;
	    rsm.sm_match = rex.reg_match;
	    rsm.sm_mmatch = rex.reg_mmatch;
	    rsm.sm_firstlnum = rex.reg_firstlnum;
	    rsm.sm_maxline = rex.reg_maxline;
	    rsm.sm_line_lbr = rex.reg_line_lbr;

	    // Although unlikely, it is possible that the expression invokes a
	    // substitute command (it might fail, but still).  Therefore keep
	    // an array of eval results.
	    ++nesting;

	    if (expr != NULL)
	    {
		typval_T	argv[2];
		char_u		buf[NUMBUFLEN];
		typval_T	rettv;
		staticList10_T	matchList;
		funcexe_T	funcexe;

		rettv.v_type = VAR_STRING;
		rettv.vval.v_string = NULL;
		argv[0].v_type = VAR_LIST;
		argv[0].vval.v_list = &matchList.sl_list;
		matchList.sl_list.lv_len = 0;
		CLEAR_FIELD(funcexe);
		funcexe.fe_argv_func = fill_submatch_list;
		funcexe.fe_evaluate = TRUE;
		if (expr->v_type == VAR_FUNC)
		{
		    s = expr->vval.v_string;
		    call_func(s, -1, &rettv, 1, argv, &funcexe);
		}
		else if (expr->v_type == VAR_PARTIAL)
		{
		    partial_T   *partial = expr->vval.v_partial;

		    s = partial_name(partial);
		    funcexe.fe_partial = partial;
		    call_func(s, -1, &rettv, 1, argv, &funcexe);
		}
		else if (expr->v_type == VAR_INSTR)
		{
		    exe_typval_instr(expr, &rettv);
		}
		if (matchList.sl_list.lv_len > 0)
		    // fill_submatch_list() was called
		    clear_submatch_list(&matchList);

		if (rettv.v_type == VAR_UNKNOWN)
		    // something failed, no need to report another error
		    eval_result[nested] = NULL;
		else
		{
		    eval_result[nested] = tv_get_string_buf_chk(&rettv, buf);
		    if (eval_result[nested] != NULL)
			eval_result[nested] = vim_strsave(eval_result[nested]);
		}
		clear_tv(&rettv);
	    }
	    else if (substitute_instr != NULL)
		// Execute instructions from ISN_SUBSTITUTE.
		eval_result[nested] = exe_substitute_instr();
	    else
		eval_result[nested] = eval_to_string(source + 2, TRUE, FALSE);
	    --nesting;

	    if (eval_result[nested] != NULL)
	    {
		int had_backslash = FALSE;

		for (s = eval_result[nested]; *s != NUL; MB_PTR_ADV(s))
		{
		    // Change NL to CR, so that it becomes a line break,
		    // unless called from vim_regexec_nl().
		    // Skip over a backslashed character.
		    if (*s == NL && !rsm.sm_line_lbr)
			*s = CAR;
		    else if (*s == '\\' && s[1] != NUL)
		    {
			++s;
			/* Change NL to CR here too, so that this works:
			 * :s/abc\\\ndef/\="aaa\\\nbbb"/  on text:
			 *   abc\
			 *   def
			 * Not when called from vim_regexec_nl().
			 */
			if (*s == NL && !rsm.sm_line_lbr)
			    *s = CAR;
			had_backslash = TRUE;
		    }
		}
		if (had_backslash && (flags & REGSUB_BACKSLASH))
		{
		    // Backslashes will be consumed, need to double them.
		    s = vim_strsave_escaped(eval_result[nested], (char_u *)"\\");
		    if (s != NULL)
		    {
			vim_free(eval_result[nested]);
			eval_result[nested] = s;
		    }
		}

		dst += STRLEN(eval_result[nested]);
	    }

	    can_f_submatch = prev_can_f_submatch;
	    if (can_f_submatch)
		rsm = rsm_save;
	}
#endif
    }
    else
      while ((c = *src++) != NUL)
      {
	if (c == '&' && (flags & REGSUB_MAGIC))
	    no = 0;
	else if (c == '\\' && *src != NUL)
	{
	    if (*src == '&' && !(flags & REGSUB_MAGIC))
	    {
		++src;
		no = 0;
	    }
	    else if ('0' <= *src && *src <= '9')
	    {
		no = *src++ - '0';
	    }
	    else if (vim_strchr((char_u *)"uUlLeE", *src))
	    {
		switch (*src++)
		{
		case 'u':   func_one = do_upper;
			    continue;
		case 'U':   func_all = do_upper;
			    continue;
		case 'l':   func_one = do_lower;
			    continue;
		case 'L':   func_all = do_lower;
			    continue;
		case 'e':
		case 'E':   func_one = func_all = (fptr_T)NULL;
			    continue;
		}
	    }
	}
	if (no < 0)	      // Ordinary character.
	{
	    if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
	    {
		// Copy a special key as-is.
		if (copy)
		{
		    if (dst + 3 > dest + destlen)
		    {
			iemsg("vim_regsub_both(): not enough space");
			return 0;
		    }
		    *dst++ = c;
		    *dst++ = *src++;
		    *dst++ = *src++;
		}
		else
		{
		    dst += 3;
		    src += 2;
		}
		continue;
	    }

	    if (c == '\\' && *src != NUL)
	    {
		// Check for abbreviations -- webb
		switch (*src)
		{
		    case 'r':	c = CAR;	++src;	break;
		    case 'n':	c = NL;		++src;	break;
		    case 't':	c = TAB;	++src;	break;
		 // Oh no!  \e already has meaning in subst pat :-(
		 // case 'e':   c = ESC;	++src;	break;
		    case 'b':	c = Ctrl_H;	++src;	break;

		    // If "backslash" is TRUE the backslash will be removed
		    // later.  Used to insert a literal CR.
		    default:	if (flags & REGSUB_BACKSLASH)
				{
				    if (copy)
				    {
					if (dst + 1 > dest + destlen)
					{
					    iemsg("vim_regsub_both(): not enough space");
					    return 0;
					}
					*dst = '\\';
				    }
				    ++dst;
				}
				c = *src++;
		}
	    }
	    else if (has_mbyte)
		c = mb_ptr2char(src - 1);

	    // Write to buffer, if copy is set.
	    if (func_one != (fptr_T)NULL)
	    {
		func_one(&cc, c);
		func_one = NULL;
	    }
	    else if (func_all != (fptr_T)NULL)
		func_all(&cc, c);
	    else // just copy
		cc = c;

	    if (has_mbyte)
	    {
		int totlen = mb_ptr2len(src - 1);
		int charlen = mb_char2len(cc);

		if (copy)
		{
		    if (dst + charlen > dest + destlen)
		    {
			iemsg("vim_regsub_both(): not enough space");
			return 0;
		    }
		    mb_char2bytes(cc, dst);
		}
		dst += charlen - 1;
		if (enc_utf8)
		{
		    int clen = utf_ptr2len(src - 1);

		    // If the character length is shorter than "totlen", there
		    // are composing characters; copy them as-is.
		    if (clen < totlen)
		    {
			if (copy)
			{
			    if (dst + totlen - clen > dest + destlen)
			    {
				iemsg("vim_regsub_both(): not enough space");
				return 0;
			    }
			    mch_memmove(dst + 1, src - 1 + clen,
						     (size_t)(totlen - clen));
			}
			dst += totlen - clen;
		    }
		}
		src += totlen - 1;
	    }
	    else if (copy)
	    {
		if (dst + 1 > dest + destlen)
		{
		    iemsg("vim_regsub_both(): not enough space");
		    return 0;
		}
		*dst = cc;
	    }
	    dst++;
	}
	else
	{
	    if (REG_MULTI)
	    {
		clnum = rex.reg_mmatch->startpos[no].lnum;
		if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
		    s = NULL;
		else
		{
		    s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
		    if (rex.reg_mmatch->endpos[no].lnum == clnum)
			len = rex.reg_mmatch->endpos[no].col
					    - rex.reg_mmatch->startpos[no].col;
		    else
			len = (int)reg_getline_len(clnum) - rex.reg_mmatch->startpos[no].col;
		}
	    }
	    else
	    {
		s = rex.reg_match->startp[no];
		if (rex.reg_match->endp[no] == NULL)
		    s = NULL;
		else
		    len = (int)(rex.reg_match->endp[no] - s);
	    }
	    if (s != NULL)
	    {
		for (;;)
		{
		    if (len == 0)
		    {
			if (REG_MULTI)
			{
			    if (rex.reg_mmatch->endpos[no].lnum == clnum)
				break;
			    if (copy)
			    {
				if (dst + 1 > dest + destlen)
				{
				    iemsg("vim_regsub_both(): not enough space");
				    return 0;
				}
				*dst = CAR;
			    }
			    ++dst;
			    s = reg_getline(++clnum);
			    if (rex.reg_mmatch->endpos[no].lnum == clnum)
				len = rex.reg_mmatch->endpos[no].col;
			    else
				len = (int)reg_getline_len(clnum);
			}
			else
			    break;
		    }
		    else if (*s == NUL) // we hit NUL.
		    {
			if (copy)
			    iemsg(e_damaged_match_string);
			goto exit;
		    }
		    else
		    {
			if ((flags & REGSUB_BACKSLASH)
						  && (*s == CAR || *s == '\\'))
			{
			    /*
			     * Insert a backslash in front of a CR, otherwise
			     * it will be replaced by a line break.
			     * Number of backslashes will be halved later,
			     * double them here.
			     */
			    if (copy)
			    {
				if (dst + 2 > dest + destlen)
				{
				    iemsg("vim_regsub_both(): not enough space");
				    return 0;
				}
				dst[0] = '\\';
				dst[1] = *s;
			    }
			    dst += 2;
			}
			else
			{
			    if (has_mbyte)
				c = mb_ptr2char(s);
			    else
				c = *s;

			    if (func_one != (fptr_T)NULL)
			    {
				func_one(&cc, c);
				func_one = NULL;
			    }
			    else if (func_all != (fptr_T)NULL)
				func_all(&cc, c);
			    else // just copy
				cc = c;

			    if (has_mbyte)
			    {
				int l;
				int charlen;

				// Copy composing characters separately, one
				// at a time.
				if (enc_utf8)
				    l = utf_ptr2len(s) - 1;
				else
				    l = mb_ptr2len(s) - 1;

				s += l;
				len -= l;
				charlen = mb_char2len(cc);
				if (copy)
				{
				    if (dst + charlen > dest + destlen)
				    {
					iemsg("vim_regsub_both(): not enough space");
					return 0;
				    }
				    mb_char2bytes(cc, dst);
				}
				dst += charlen - 1;
			    }
			    else if (copy)
			    {
				if (dst + 1 > dest + destlen)
				{
				    iemsg("vim_regsub_both(): not enough space");
				    return 0;
				}
				*dst = cc;
			    }
			    dst++;
			}

			++s;
			--len;
		    }
		}
	    }
	    no = -1;
	}
      }
    if (copy)
	*dst = NUL;

exit:
    return (int)((dst - dest) + 1);
}

#ifdef FEAT_EVAL

    static char_u *
reg_getline_submatch(linenr_T lnum)
{
    char_u *line;

    reg_getline_common(lnum, RGLF_LINE | RGLF_SUBMATCH, &line, NULL);

    return line;
}

    static colnr_T
reg_getline_submatch_len(linenr_T lnum)
{
    colnr_T length;

    reg_getline_common(lnum, RGLF_LENGTH | RGLF_SUBMATCH, NULL, &length);

    return length;
}

/*
 * Used for the submatch() function: get the string from the n'th submatch in
 * allocated memory.
 * Returns NULL when not in a ":s" command and for a non-existing submatch.
 */
    char_u *
reg_submatch(int no)
{
    char_u	*retval = NULL;
    char_u	*s;
    int		len;
    int		round;
    linenr_T	lnum;

    if (!can_f_submatch || no < 0)
	return NULL;

    if (rsm.sm_match == NULL)
    {
	/*
	 * First round: compute the length and allocate memory.
	 * Second round: copy the text.
	 */
	for (round = 1; round <= 2; ++round)
	{
	    lnum = rsm.sm_mmatch->startpos[no].lnum;
	    if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
		return NULL;

	    s = reg_getline_submatch(lnum);
	    if (s == NULL)  // anti-crash check, cannot happen?
		break;
	    s += rsm.sm_mmatch->startpos[no].col;
	    if (rsm.sm_mmatch->endpos[no].lnum == lnum)
	    {
		// Within one line: take form start to end col.
		len = rsm.sm_mmatch->endpos[no].col
					  - rsm.sm_mmatch->startpos[no].col;
		if (round == 2)
		    vim_strncpy(retval, s, len);
		++len;
	    }
	    else
	    {
		// Multiple lines: take start line from start col, middle
		// lines completely and end line up to end col.
		len = (int)reg_getline_submatch_len(lnum) - rsm.sm_mmatch->startpos[no].col;
		if (round == 2)
		{
		    STRCPY(retval, s);
		    retval[len] = '\n';
		}
		++len;
		++lnum;
		while (lnum < rsm.sm_mmatch->endpos[no].lnum)
		{
		    s = reg_getline_submatch(lnum);
		    if (round == 2)
			STRCPY(retval + len, s);
		    len += (int)reg_getline_submatch_len(lnum);
		    if (round == 2)
			retval[len] = '\n';
		    ++len;
		    ++lnum;
		}
		if (round == 2)
		    STRNCPY(retval + len, reg_getline_submatch(lnum),
					     rsm.sm_mmatch->endpos[no].col);
		len += rsm.sm_mmatch->endpos[no].col;
		if (round == 2)
		    retval[len] = NUL;
		++len;
	    }

	    if (retval == NULL)
	    {
		retval = alloc(len);
		if (retval == NULL)
		    return NULL;
	    }
	}
    }
    else
    {
	s = rsm.sm_match->startp[no];
	if (s == NULL || rsm.sm_match->endp[no] == NULL)
	    retval = NULL;
	else
	    retval = vim_strnsave(s, rsm.sm_match->endp[no] - s);
    }

    return retval;
}

/*
 * Used for the submatch() function with the optional non-zero argument: get
 * the list of strings from the n'th submatch in allocated memory with NULs
 * represented in NLs.
 * Returns a list of allocated strings.  Returns NULL when not in a ":s"
 * command, for a non-existing submatch and for any error.
 */
    list_T *
reg_submatch_list(int no)
{
    char_u	*s;
    linenr_T	slnum;
    linenr_T	elnum;
    colnr_T	scol;
    colnr_T	ecol;
    int		i;
    list_T	*list;
    int		error = FALSE;

    if (!can_f_submatch || no < 0)
	return NULL;

    if (rsm.sm_match == NULL)
    {
	slnum = rsm.sm_mmatch->startpos[no].lnum;
	elnum = rsm.sm_mmatch->endpos[no].lnum;
	if (slnum < 0 || elnum < 0)
	    return NULL;

	scol = rsm.sm_mmatch->startpos[no].col;
	ecol = rsm.sm_mmatch->endpos[no].col;

	list = list_alloc();
	if (list == NULL)
	    return NULL;

	s = reg_getline_submatch(slnum) + scol;
	if (slnum == elnum)
	{
	    if (list_append_string(list, s, ecol - scol) == FAIL)
		error = TRUE;
	}
	else
	{
	    int max_lnum = elnum - slnum;

	    if (list_append_string(list, s, -1) == FAIL)
		error = TRUE;
	    for (i = 1; i < max_lnum; i++)
	    {
		s = reg_getline_submatch(slnum + i);
		if (list_append_string(list, s, -1) == FAIL)
		    error = TRUE;
	    }
	    s = reg_getline_submatch(elnum);
	    if (list_append_string(list, s, ecol) == FAIL)
		error = TRUE;
	}
    }
    else
    {
	s = rsm.sm_match->startp[no];
	if (s == NULL || rsm.sm_match->endp[no] == NULL)
	    return NULL;
	list = list_alloc();
	if (list == NULL)
	    return NULL;
	if (list_append_string(list, s,
				 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
	    error = TRUE;
    }

    if (error)
    {
	list_free(list);
	return NULL;
    }
    ++list->lv_refcount;
    return list;
}
#endif

/*
 * Initialize the values used for matching against multiple lines
 */
    static void
init_regexec_multi(
	regmmatch_T	*rmp,
	win_T		*win,	// window in which to search or NULL
	buf_T		*buf,	// buffer in which to search
	linenr_T	lnum)	// nr of line to start looking for match
{
    rex.reg_match = NULL;
    rex.reg_mmatch = rmp;
    rex.reg_buf = buf;
    rex.reg_win = win;
    rex.reg_firstlnum = lnum;
    rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
    rex.reg_line_lbr = FALSE;
    rex.reg_ic = rmp->rmm_ic;
    rex.reg_icombine = FALSE;
    rex.reg_maxcol = rmp->rmm_maxcol;
}

#include "regexp_bt.c"

static regengine_T bt_regengine =
{
    bt_regcomp,
    bt_regfree,
    bt_regexec_nl,
    bt_regexec_multi
#ifdef DEBUG
    ,(char_u *)""
#endif
};

#include "regexp_nfa.c"

static regengine_T nfa_regengine =
{
    nfa_regcomp,
    nfa_regfree,
    nfa_regexec_nl,
    nfa_regexec_multi
#ifdef DEBUG
    ,(char_u *)""
#endif
};

// Which regexp engine to use? Needed for vim_regcomp().
// Must match with 'regexpengine'.
static int regexp_engine = 0;

#ifdef DEBUG
static char_u regname[][30] = {
		    "AUTOMATIC Regexp Engine",
		    "BACKTRACKING Regexp Engine",
		    "NFA Regexp Engine"
			    };
#endif

/*
 * Compile a regular expression into internal code.
 * Returns the program in allocated memory.
 * Use vim_regfree() to free the memory.
 * Returns NULL for an error.
 */
    regprog_T *
vim_regcomp(char_u *expr_arg, int re_flags)
{
    regprog_T   *prog = NULL;
    char_u	*expr = expr_arg;
    int		called_emsg_before;

    regexp_engine = p_re;

    // Check for prefix "\%#=", that sets the regexp engine
    if (STRNCMP(expr, "\\%#=", 4) == 0)
    {
	int newengine = expr[4] - '0';

	if (newengine == AUTOMATIC_ENGINE
	    || newengine == BACKTRACKING_ENGINE
	    || newengine == NFA_ENGINE)
	{
	    regexp_engine = expr[4] - '0';
	    expr += 5;
#ifdef DEBUG
	    smsg("New regexp mode selected (%d): %s",
					   regexp_engine, regname[newengine]);
#endif
	}
	else
	{
	    emsg(_(e_percent_hash_can_only_be_followed_by_zero_one_two_automatic_engine_will_be_used));
	    regexp_engine = AUTOMATIC_ENGINE;
	}
    }
#ifdef DEBUG
    bt_regengine.expr = expr;
    nfa_regengine.expr = expr;
#endif
    // reg_iswordc() uses rex.reg_buf
    rex.reg_buf = curbuf;

    /*
     * First try the NFA engine, unless backtracking was requested.
     */
    called_emsg_before = called_emsg;
    if (regexp_engine != BACKTRACKING_ENGINE)
	prog = nfa_regengine.regcomp(expr,
		re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
    else
	prog = bt_regengine.regcomp(expr, re_flags);

    // Check for error compiling regexp with initial engine.
    if (prog == NULL)
    {
#ifdef BT_REGEXP_DEBUG_LOG
	if (regexp_engine == BACKTRACKING_ENGINE)   // debugging log for BT engine
	{
	    FILE *f;
	    f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
	    if (f)
	    {
		fprintf(f, "Syntax error in \"%s\"\n", expr);
		fclose(f);
	    }
	    else
		semsg("(NFA) Could not open \"%s\" to write !!!",
			BT_REGEXP_DEBUG_LOG_NAME);
	}
#endif
	/*
	 * If the NFA engine failed, try the backtracking engine.
	 * The NFA engine also fails for patterns that it can't handle well
	 * but are still valid patterns, thus a retry should work.
	 * But don't try if an error message was given.
	 */
	if (regexp_engine == AUTOMATIC_ENGINE
					  && called_emsg == called_emsg_before)
	{
	    regexp_engine = BACKTRACKING_ENGINE;
#ifdef FEAT_EVAL
	    report_re_switch(expr);
#endif
	    prog = bt_regengine.regcomp(expr, re_flags);
	}
    }

    if (prog != NULL)
    {
	// Store the info needed to call regcomp() again when the engine turns
	// out to be very slow when executing it.
	prog->re_engine = regexp_engine;
	prog->re_flags  = re_flags;
    }

    return prog;
}

/*
 * Free a compiled regexp program, returned by vim_regcomp().
 */
    void
vim_regfree(regprog_T *prog)
{
    if (prog != NULL)
	prog->engine->regfree(prog);
}

#if defined(EXITFREE) || defined(PROTO)
    void
free_regexp_stuff(void)
{
    ga_clear(&regstack);
    ga_clear(&backpos);
    vim_free(reg_tofree);
    vim_free(reg_prev_sub);
}
#endif

#ifdef FEAT_EVAL
    static void
report_re_switch(char_u *pat)
{
    if (p_verbose > 0)
    {
	verbose_enter();
	msg_puts(_("Switching to backtracking RE engine for pattern: "));
	msg_puts((char *)pat);
	verbose_leave();
    }
}
#endif

#if defined(FEAT_X11) || defined(PROTO)
/*
 * Return whether "prog" is currently being executed.
 */
    int
regprog_in_use(regprog_T *prog)
{
    return prog->re_in_use;
}
#endif

/*
 * Match a regexp against a string.
 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
 * Note: "rmp->regprog" may be freed and changed.
 * Uses curbuf for line count and 'iskeyword'.
 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
 *
 * Return TRUE if there is a match, FALSE if not.
 */
    static int
vim_regexec_string(
    regmatch_T	*rmp,
    char_u	*line,  // string to match against
    colnr_T	col,    // column to start looking for match
    int		nl)
{
    int		result;
    regexec_T	rex_save;
    int		rex_in_use_save = rex_in_use;

    // Cannot use the same prog recursively, it contains state.
    if (rmp->regprog->re_in_use)
    {
	emsg(_(e_cannot_use_pattern_recursively));
	return FALSE;
    }
    rmp->regprog->re_in_use = TRUE;

    if (rex_in_use)
	// Being called recursively, save the state.
	rex_save = rex;
    rex_in_use = TRUE;

    rex.reg_startp = NULL;
    rex.reg_endp = NULL;
    rex.reg_startpos = NULL;
    rex.reg_endpos = NULL;

    result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
    rmp->regprog->re_in_use = FALSE;

    // NFA engine aborted because it's very slow.
    if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
					       && result == NFA_TOO_EXPENSIVE)
    {
	int    save_p_re = p_re;
	int    re_flags = rmp->regprog->re_flags;
	char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);

	p_re = BACKTRACKING_ENGINE;
	vim_regfree(rmp->regprog);
	if (pat != NULL)
	{
#ifdef FEAT_EVAL
	    report_re_switch(pat);
#endif
	    rmp->regprog = vim_regcomp(pat, re_flags);
	    if (rmp->regprog != NULL)
	    {
		rmp->regprog->re_in_use = TRUE;
		result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
		rmp->regprog->re_in_use = FALSE;
	    }
	    vim_free(pat);
	}

	p_re = save_p_re;
    }

    rex_in_use = rex_in_use_save;
    if (rex_in_use)
	rex = rex_save;

    return result > 0;
}

#if defined(FEAT_SPELL) || defined(FEAT_EVAL) || defined(FEAT_X11) || defined(PROTO)
/*
 * Note: "*prog" may be freed and changed.
 * Return TRUE if there is a match, FALSE if not.
 */
    int
vim_regexec_prog(
    regprog_T	**prog,
    int		ignore_case,
    char_u	*line,
    colnr_T	col)
{
    int		r;
    regmatch_T	regmatch;

    regmatch.regprog = *prog;
    regmatch.rm_ic = ignore_case;
    r = vim_regexec_string(&regmatch, line, col, FALSE);
    *prog = regmatch.regprog;
    return r;
}
#endif

/*
 * Note: "rmp->regprog" may be freed and changed.
 * Return TRUE if there is a match, FALSE if not.
 */
    int
vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
{
    return vim_regexec_string(rmp, line, col, FALSE);
}

/*
 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
 * Note: "rmp->regprog" may be freed and changed.
 * Return TRUE if there is a match, FALSE if not.
 */
    int
vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
{
    return vim_regexec_string(rmp, line, col, TRUE);
}

/*
 * Match a regexp against multiple lines.
 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
 * Uses curbuf for line count and 'iskeyword'.
 *
 * Return zero if there is no match.  Return number of lines contained in the
 * match otherwise.
 */
    long
vim_regexec_multi(
    regmmatch_T *rmp,
    win_T       *win,		// window in which to search or NULL
    buf_T       *buf,		// buffer in which to search
    linenr_T	lnum,		// nr of line to start looking for match
    colnr_T	col,		// column to start looking for match
    int		*timed_out)	// flag is set when timeout limit reached
{
    int		result;
    regexec_T	rex_save;
    int		rex_in_use_save = rex_in_use;

    // Cannot use the same prog recursively, it contains state.
    if (rmp->regprog->re_in_use)
    {
	emsg(_(e_cannot_use_pattern_recursively));
	return FALSE;
    }
    rmp->regprog->re_in_use = TRUE;

    if (rex_in_use)
	// Being called recursively, save the state.
	rex_save = rex;
    rex_in_use = TRUE;

    result = rmp->regprog->engine->regexec_multi(
				      rmp, win, buf, lnum, col, timed_out);
    rmp->regprog->re_in_use = FALSE;

    // NFA engine aborted because it's very slow.
    if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
					       && result == NFA_TOO_EXPENSIVE)
    {
	int    save_p_re = p_re;
	int    re_flags = rmp->regprog->re_flags;
	char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);

	p_re = BACKTRACKING_ENGINE;
	if (pat != NULL)
	{
	    regprog_T *prev_prog = rmp->regprog;

#ifdef FEAT_EVAL
	    report_re_switch(pat);
#endif
#ifdef FEAT_SYN_HL
	    // checking for \z misuse was already done when compiling for NFA,
	    // allow all here
	    reg_do_extmatch = REX_ALL;
#endif
	    rmp->regprog = vim_regcomp(pat, re_flags);
#ifdef FEAT_SYN_HL
	    reg_do_extmatch = 0;
#endif
	    if (rmp->regprog == NULL)
	    {
		// Somehow compiling the pattern failed now, put back the
		// previous one to avoid "regprog" becoming NULL.
		rmp->regprog = prev_prog;
	    }
	    else
	    {
		vim_regfree(prev_prog);

		rmp->regprog->re_in_use = TRUE;
		result = rmp->regprog->engine->regexec_multi(
				      rmp, win, buf, lnum, col, timed_out);
		rmp->regprog->re_in_use = FALSE;
	    }
	    vim_free(pat);
	}
	p_re = save_p_re;
    }

    rex_in_use = rex_in_use_save;
    if (rex_in_use)
	rex = rex_save;

    return result <= 0 ? 0 : result;
}
-												patch 7.4.2293
Problem:    Modelines in source code are inconsistant.
Solution:   Use the same line in most files.  Add 'noet'.  (Naruhiko Nishino)

											
										
										
											2016-08-29 22:49:24 +02:00
+								/* vi:set ts=8 sts=4 sw=4 noet:
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 *
 								 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
 								 */
-												patch 8.1.1202: always get regexp debugging logs when building with -DDEBUG

Problem:    Always get regexp debugging logs when building with -DDEBUG.
Solution:   By default do not create regexp debugging logs. (Ken Takata)

											
										
										
											2019-04-25 20:07:51 +02:00
+								// By default: do not create debugging logs or files related to regular
 								// expressions, even when compiling with -DDEBUG.
 								// Uncomment the second line to get the regexp debugging.
 								#undef DEBUG
 								// #define DEBUG
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								#include "vim.h"
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								#ifdef DEBUG
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								// show/save debugging data when BT engine is used
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								# define BT_REGEXP_DUMP
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								// save the debugging data to a file instead of displaying it
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								# define BT_REGEXP_LOG
-												updated for version 7.3.978
Problem:    Regexp debug logs don't have a good name.
Solution:   Use clear names and make it possible to write logs for the old and
            new engines separately. (Taro Muraoka)

											
										
										
											2013-05-20 21:49:13 +02:00
+								# define BT_REGEXP_DEBUG_LOG
 								# define BT_REGEXP_DEBUG_LOG_NAME	"bt_regexp_debug.log"
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								#endif
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.2.5057: using gettimeofday() for timeout is very inefficient

Problem:    Using gettimeofday() for timeout is very inefficient.
Solution:   Set a platform dependent timer. (Paul Ollis, closes #10505)

											
										
										
											2022-06-05 16:55:54 +01:00
+								#ifdef FEAT_RELTIME
-												patch 8.2.5141: using "volatile int" in a signal handler might be wrong

Problem:    Using "volatile int" in a signal handler might be wrong.
Solution:   Use "volatile sig_atomic_t".

											
										
										
											2022-06-20 13:38:33 +01:00
+								static sig_atomic_t dummy_timeout_flag = 0;
 								static volatile sig_atomic_t *timeout_flag = &dummy_timeout_flag;
-												patch 8.2.5057: using gettimeofday() for timeout is very inefficient

Problem:    Using gettimeofday() for timeout is very inefficient.
Solution:   Set a platform dependent timer. (Paul Ollis, closes #10505)

											
										
										
											2022-06-05 16:55:54 +01:00
+								#endif
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								/*
 								 * Magic characters have a special meaning, they don't match literally.
 								 * Magic characters are negative.  This separates them from literal characters
 								 * (possibly multi-byte).  Only ASCII characters can be Magic.
 								 */
 								#define Magic(x)	((int)(x) - 256)
 								#define un_Magic(x)	((x) + 256)
 								#define is_Magic(x)	((x) < 0)
 								    static int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								no_Magic(int x)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
 								    if (is_Magic(x))
 									return un_Magic(x);
 								    return x;
 								}
 								    static int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								toggle_Magic(int x)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
 								    if (is_Magic(x))
 									return un_Magic(x);
 								    return Magic(x);
 								}
-												patch 8.2.5057: using gettimeofday() for timeout is very inefficient

Problem:    Using gettimeofday() for timeout is very inefficient.
Solution:   Set a platform dependent timer. (Paul Ollis, closes #10505)

											
										
										
											2022-06-05 16:55:54 +01:00
+								#ifdef FEAT_RELTIME
-												patch 9.0.0282: a nested timout stops the previous timeout

Problem:    A nested timout stops the previous timeout.
Solution:   Ignore any nested timeout.

											
										
										
											2022-08-26 21:33:04 +01:00
+								static int timeout_nesting = 0;
 								/*
 								 * Start a timer that will cause the regexp to abort after "msec".
 								 * This doesn't work well recursively.  In case it happens anyway, the first
 								 * set timeout will prevail, nested ones are ignored.
 								 * The caller must make sure there is a matching disable_regexp_timeout() call!
 								 */
-												patch 8.2.5057: using gettimeofday() for timeout is very inefficient

Problem:    Using gettimeofday() for timeout is very inefficient.
Solution:   Set a platform dependent timer. (Paul Ollis, closes #10505)

											
										
										
											2022-06-05 16:55:54 +01:00
+								    void
 								init_regexp_timeout(long msec)
 								{
-												patch 9.0.0282: a nested timout stops the previous timeout

Problem:    A nested timout stops the previous timeout.
Solution:   Ignore any nested timeout.

											
										
										
											2022-08-26 21:33:04 +01:00
+								    if (timeout_nesting == 0)
 									timeout_flag = start_timeout(msec);
 								    ++timeout_nesting;
-												patch 8.2.5057: using gettimeofday() for timeout is very inefficient

Problem:    Using gettimeofday() for timeout is very inefficient.
Solution:   Set a platform dependent timer. (Paul Ollis, closes #10505)

											
										
										
											2022-06-05 16:55:54 +01:00
+								}
 								    void
 								disable_regexp_timeout(void)
 								{
-												patch 9.0.0282: a nested timout stops the previous timeout

Problem:    A nested timout stops the previous timeout.
Solution:   Ignore any nested timeout.

											
										
										
											2022-08-26 21:33:04 +01:00
+								    if (timeout_nesting == 0)
 									iemsg("disable_regexp_timeout() called without active timer");
 								    else if (--timeout_nesting == 0)
 								    {
 									stop_timeout();
 									timeout_flag = &dummy_timeout_flag;
 								    }
-												patch 8.2.5057: using gettimeofday() for timeout is very inefficient

Problem:    Using gettimeofday() for timeout is very inefficient.
Solution:   Set a platform dependent timer. (Paul Ollis, closes #10505)

											
										
										
											2022-06-05 16:55:54 +01:00
+								}
 								#endif
-												patch 9.0.0513: may not be able to use a pattern ad the debug prompt

Problem:    May not be able to use a pattern ad the debug prompt.
Solution:   Temporarily disable the timeout. (closes #11164)

											
										
										
											2022-09-20 13:51:25 +01:00
+								#if defined(FEAT_EVAL) || defined(PROTO)
 								# ifdef FEAT_RELTIME
 								static sig_atomic_t *saved_timeout_flag;
 								# endif
 								/*
 								 * Used at the debug prompt: disable the timeout so that expression evaluation
 								 * can used patterns.
 								 * Must be followed by calling restore_timeout_for_debugging().
 								 */
 								    void
 								save_timeout_for_debugging(void)
 								{
 								# ifdef FEAT_RELTIME
 								    saved_timeout_flag = (sig_atomic_t *)timeout_flag;
 								    timeout_flag = &dummy_timeout_flag;
 								# endif
 								}
 								    void
 								restore_timeout_for_debugging(void)
 								{
 								# ifdef FEAT_RELTIME
 								    timeout_flag = saved_timeout_flag;
 								# endif
 								}
 								#endif
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * The first byte of the BT regexp internal "program" is actually this magic
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 * number; the start node begins in the second byte.  It's used to catch the
 								 * most severe mutilation of the program by the caller.
 								 */
 								#define REGMAGIC	0234
 								/*
 								 * Utility definitions.
 								 */
 								#define UCHARAT(p)	((int)*(char_u *)(p))
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								// Used for an error (down from) vim_regcomp(): give the error message, set
 								// rc_did_emsg and return NULL
-												patch 8.1.0743: giving error messages is not flexible

Problem:    Giving error messages is not flexible.
Solution:   Add semsg().  Change argument from "char_u *" to "char *", also
            for msg() and get rid of most MSG macros. (Ozaki Kiichi, closes
            #3302)  Also make emsg() accept a "char *" argument.  Get rid of
            an enormous number of type casts.

											
										
										
											2019-01-13 23:38:42 +01:00
+								#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
 								#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
 								#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
 								#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
-												patch 8.1.0748: using sprintf() instead of semsg()

Problem:    Using sprintf() instead of semsg().
Solution:   Use semsg().  Fix bug with E888. (Ozaki Kiichi, closes #3801)

											
										
										
											2019-01-14 22:46:15 +01:00
+								#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
-												patch 8.1.0743: giving error messages is not flexible

Problem:    Giving error messages is not flexible.
Solution:   Add semsg().  Change argument from "char_u *" to "char *", also
            for msg() and get rid of most MSG macros. (Ozaki Kiichi, closes
            #3302)  Also make emsg() accept a "char *" argument.  Get rid of
            an enormous number of type casts.

											
										
										
											2019-01-13 23:38:42 +01:00
+								#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
-												patch 8.2.3985: error messages are spread out

Problem:    Error messages are spread out.
Solution:   Move more error messages to errors.h.

											
										
										
											2022-01-02 19:25:26 +00:00
+								#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_(e_invalid_item_in_str_brackets), reg_magic == MAGIC_ALL)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.0.0074
Problem:    Cannot make Vim fail on an internal error.
Solution:   Add IEMSG() and IEMSG2(). (Domenique Pelle)  Avoid reporting an
            internal error without mentioning where.

											
										
										
											2016-11-10 20:01:45 +01:00
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								#define MAX_LIMIT	(32767L << 16L)
 								#define NOT_MULTI	0
 								#define MULTI_ONE	1
 								#define MULTI_MULT	2
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
 								// return values for regmatch()
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								#define RA_FAIL		1	// something failed, abort
 								#define RA_CONT		2	// continue in inner loop
 								#define RA_BREAK	3	// break inner loop
 								#define RA_MATCH	4	// successful match
 								#define RA_NOMATCH	5	// didn't match
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								/*
 								 * Return NOT_MULTI if c is not a "multi" operator.
 								 * Return MULTI_ONE if c is a single "multi" operator.
 								 * Return MULTI_MULT if c is a multi "multi" operator.
 								 */
 								    static int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								re_multi_type(int c)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
 								    if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
 									return MULTI_ONE;
 								    if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
 									return MULTI_MULT;
 								    return NOT_MULTI;
 								}
-												updated for version 7.0093

											
										
										
											2005-06-25 23:04:51 +00:00
+								static char_u		*reg_prev_sub = NULL;
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+								static size_t		reg_prev_sublen = 0;
-												updated for version 7.0093

											
										
										
											2005-06-25 23:04:51 +00:00
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								/*
 								 * REGEXP_INRANGE contains all characters which are always special in a []
 								 * range after '\'.
 								 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
 								 * These are:
 								 *  \n	- New line (NL).
 								 *  \r	- Carriage Return (CR).
 								 *  \t	- Tab (TAB).
 								 *  \e	- Escape (ESC).
 								 *  \b	- Backspace (Ctrl_H).
-												updated for version 7.0016

											
										
										
											2004-09-13 20:26:32 +00:00
+								 *  \d  - Character code in decimal, eg \d123
 								 *  \o	- Character code in octal, eg \o80
 								 *  \x	- Character code in hex, eg \x4a
 								 *  \u	- Multibyte character code, eg \u20ac
 								 *  \U	- Long multibyte character code, eg \U12345678
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 */
 								static char_u REGEXP_INRANGE[] = "]^-n\\";
-												updated for version 7.0016

											
										
										
											2004-09-13 20:26:32 +00:00
+								static char_u REGEXP_ABBR[] = "nrtebdoxuU";
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
 								/*
 								 * Translate '\x' to its control character, except "\n", which is Magic.
 								 */
 								    static int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								backslash_trans(int c)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
 								    switch (c)
 								    {
 									case 'r':   return CAR;
 									case 't':   return TAB;
 									case 'e':   return ESC;
 									case 'b':   return BS;
 								    }
 								    return c;
 								}
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+								enum
 								{
 								    CLASS_ALNUM = 0,
 								    CLASS_ALPHA,
 								    CLASS_BLANK,
 								    CLASS_CNTRL,
 								    CLASS_DIGIT,
 								    CLASS_GRAPH,
 								    CLASS_LOWER,
 								    CLASS_PRINT,
 								    CLASS_PUNCT,
 								    CLASS_SPACE,
 								    CLASS_UPPER,
 								    CLASS_XDIGIT,
 								    CLASS_TAB,
 								    CLASS_RETURN,
 								    CLASS_BACKSPACE,
 								    CLASS_ESCAPE,
 								    CLASS_IDENT,
 								    CLASS_KEYWORD,
 								    CLASS_FNAME,
 								    CLASS_NONE = 99
 								};
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								/*
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+								 * Check for a character class name "[:name:]".  "pp" points to the '['.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
 								 * recognized.  Otherwise "pp" is advanced to after the item.
 								 */
 								    static int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								get_char_class(char_u **pp)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+								    // must be sorted by the 'value' field because it is used by bsearch()!
 								    static keyvalue_T char_class_tab[] =
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+									KEYVALUE_ENTRY(CLASS_ALNUM, "alnum:]"),
 									KEYVALUE_ENTRY(CLASS_ALPHA, "alpha:]"),
 									KEYVALUE_ENTRY(CLASS_BACKSPACE, "backspace:]"),
 									KEYVALUE_ENTRY(CLASS_BLANK, "blank:]"),
 									KEYVALUE_ENTRY(CLASS_CNTRL, "cntrl:]"),
 									KEYVALUE_ENTRY(CLASS_DIGIT, "digit:]"),
 									KEYVALUE_ENTRY(CLASS_ESCAPE, "escape:]"),
 									KEYVALUE_ENTRY(CLASS_FNAME, "fname:]"),
 									KEYVALUE_ENTRY(CLASS_GRAPH, "graph:]"),
 									KEYVALUE_ENTRY(CLASS_IDENT, "ident:]"),
 									KEYVALUE_ENTRY(CLASS_KEYWORD, "keyword:]"),
 									KEYVALUE_ENTRY(CLASS_LOWER, "lower:]"),
 									KEYVALUE_ENTRY(CLASS_PRINT, "print:]"),
 									KEYVALUE_ENTRY(CLASS_PUNCT, "punct:]"),
 									KEYVALUE_ENTRY(CLASS_RETURN, "return:]"),
 									KEYVALUE_ENTRY(CLASS_SPACE, "space:]"),
 									KEYVALUE_ENTRY(CLASS_TAB, "tab:]"),
 									KEYVALUE_ENTRY(CLASS_UPPER, "upper:]"),
 									KEYVALUE_ENTRY(CLASS_XDIGIT, "xdigit:]")
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    };
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+								    // check that the value of "pp" has a chance of matching
 								    if ((*pp)[1] == ':' && ASCII_ISLOWER((*pp)[2])
 											&& ASCII_ISLOWER((*pp)[3]) && ASCII_ISLOWER((*pp)[4]))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+									keyvalue_T target;
 									keyvalue_T *entry;
 									// this function can be called repeatedly with the same value for "pp"
 									// so we cache the last found entry.
 									static keyvalue_T *last_entry = NULL;
 									target.key = 0;
-												patch 9.1.0828: string_T struct could be used more often

Problem:  string_T struct could be used more often
Solution: Refactor code and make use of string_T struct
          for key-value pairs, reformat overlong lines
          (John Marriott)

closes: #15975

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-11-02 15:59:01 +01:00
+									target.value.string = *pp + 2;
 									target.value.length = 0;	// not used, see cmp_keyvalue_value_n()
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
 									if (last_entry != NULL && cmp_keyvalue_value_n(&target, last_entry) == 0)
 									    entry = last_entry;
 									else
 									    entry = (keyvalue_T *)bsearch(&target, &char_class_tab,
 													ARRAY_LENGTH(char_class_tab),
 													sizeof(char_class_tab[0]), cmp_keyvalue_value_n);
 									if (entry != NULL)
 									{
 									    last_entry = entry;
-												patch 9.1.0828: string_T struct could be used more often

Problem:  string_T struct could be used more often
Solution: Refactor code and make use of string_T struct
          for key-value pairs, reformat overlong lines
          (John Marriott)

closes: #15975

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-11-02 15:59:01 +01:00
+									    *pp += entry->value.length + 2;
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+									    return entry->key;
 									}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    }
 								    return CLASS_NONE;
 								}
 								/*
 								 * Specific version of character class functions.
 								 * Using a table to keep this fast.
 								 */
 								static short	class_tab[256];
 								#define	    RI_DIGIT	0x01
 								#define	    RI_HEX	0x02
 								#define	    RI_OCTAL	0x04
 								#define	    RI_WORD	0x08
 								#define	    RI_HEAD	0x10
 								#define	    RI_ALPHA	0x20
 								#define	    RI_LOWER	0x40
 								#define	    RI_UPPER	0x80
 								#define	    RI_WHITE	0x100
 								    static void
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								init_class_tab(void)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
 								    int		i;
 								    static int	done = FALSE;
 								    if (done)
 									return;
 								    for (i = 0; i < 256; ++i)
 								    {
 									if (i >= '0' && i <= '7')
 									    class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
 									else if (i >= '8' && i <= '9')
 									    class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
 									else if (i >= 'a' && i <= 'f')
 									    class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 									else if (i >= 'g' && i <= 'z')
 									    class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 									else if (i >= 'A' && i <= 'F')
 									    class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 									else if (i >= 'G' && i <= 'Z')
 									    class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 									else if (i == '_')
 									    class_tab[i] = RI_WORD + RI_HEAD;
 									else
 									    class_tab[i] = 0;
 								    }
 								    class_tab[' '] |= RI_WHITE;
 								    class_tab['\t'] |= RI_WHITE;
 								    done = TRUE;
 								}
-												patch 8.2.4402: missing parenthesis may cause unexpected problems

Problem:    Missing parenthesis may cause unexpected problems.
Solution:   Add more parenthesis is macros. (closes #9788)

											
										
										
											2022-02-16 19:24:07 +00:00
+								#define ri_digit(c)	((c) < 0x100 && (class_tab[c] & RI_DIGIT))
 								#define ri_hex(c)	((c) < 0x100 && (class_tab[c] & RI_HEX))
 								#define ri_octal(c)	((c) < 0x100 && (class_tab[c] & RI_OCTAL))
 								#define ri_word(c)	((c) < 0x100 && (class_tab[c] & RI_WORD))
 								#define ri_head(c)	((c) < 0x100 && (class_tab[c] & RI_HEAD))
 								#define ri_alpha(c)	((c) < 0x100 && (class_tab[c] & RI_ALPHA))
 								#define ri_lower(c)	((c) < 0x100 && (class_tab[c] & RI_LOWER))
 								#define ri_upper(c)	((c) < 0x100 && (class_tab[c] & RI_UPPER))
 								#define ri_white(c)	((c) < 0x100 && (class_tab[c] & RI_WHITE))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								// flags for regflags
 								#define RF_ICASE    1	// ignore case
 								#define RF_NOICASE  2	// don't ignore case
 								#define RF_HASNL    4	// can match a NL
 								#define RF_ICOMBINE 8	// ignore combining characters
 								#define RF_LOOKBH   16	// uses "\@<=" or "\@<!"
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
 								/*
 								 * Global work variables for vim_regcomp().
 								 */
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								static char_u	*regparse;	// Input-scan pointer.
 								static int	regnpar;	// () count.
-												patch 8.2.2278: falling back to old regexp engine can some patterns

Problem:    Falling back to old regexp engine can some patterns.
Solution:   Do not fall back once [[:lower:]] or [[:upper:]] is used.
            (Christian Brabandt, closes #7572)

											
										
										
											2021-01-02 17:43:49 +01:00
+								static int	wants_nfa;	// regex should use NFA engine
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								#ifdef FEAT_SYN_HL
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								static int	regnzpar;	// \z() count.
 								static int	re_has_z;	// \z item detected
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								#endif
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								static unsigned	regflags;	// RF_ flags for prog
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								#if defined(FEAT_SYN_HL) || defined(PROTO)
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								static int	had_eol;	// TRUE when EOL found by vim_regcomp()
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								#endif
-												patch 8.2.2295: incsearch does not detect empty pattern properly

Problem:    Incsearch does not detect empty pattern properly.
Solution:   Return magic state when skipping over a pattern. (Christian
            Brabandt, closes #7612, closes #6420)

											
										
										
											2021-01-04 12:42:13 +01:00
+								static magic_T	reg_magic;	// magicness of the pattern
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								static int	reg_string;	// matching with a string instead of a buffer
 												// line
 								static int	reg_strict;	// "[abc" is illegal
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
 								/*
 								 * META contains all characters that may be magic, except '^' and '$'.
 								 */
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								// META[] is used often enough to justify turning it into a table.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								static char_u META_flags[] = {
 , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								//		   %  &     (  )  *  +	      .
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								//     1  2  3	4  5  6  7  8  9	<  =  >  ?
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								//  @  A     C	D     F     H  I     K	L  M	 O
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								//  P	     S	   U  V  W  X	  Z  [		 _
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								//     a     c	d     f     h  i     k	l  m  n  o
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								//  p	     s	   u  v  w  x	  z  {	|     ~
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
 								};
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								static int	curchr;		// currently parsed character
 								// Previous character.  Note: prevchr is sometimes -1 when we are not at the
 								// start, eg in /[ ^I]^ the pattern was never found even if it existed,
 								// because ^ was taken to be magic -- webb
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								static int	prevchr;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								static int	prevprevchr;	// previous-previous character
 								static int	nextchr;	// used for ungetchr()
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								// arguments for reg()
 								#define REG_NOPAREN	0	// toplevel reg()
 								#define REG_PAREN	1	// \(\)
 								#define REG_ZPAREN	2	// \z(\)
 								#define REG_NPAREN	3	// \%(\)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												updated for version 7.3.1087
Problem:    A leading star is not seen as a normal char when \{} follows.
Solution:   Save and restore the parse state properly.

											
										
										
											2013-06-01 14:42:56 +02:00
+								typedef struct
 								{
 								     char_u	*regparse;
 								     int	prevchr_len;
 								     int	curchr;
 								     int	prevchr;
 								     int	prevprevchr;
 								     int	nextchr;
 								     int	at_start;
 								     int	prev_at_start;
 								     int	regnpar;
 								} parse_state_T;
-												patch 7.4.1199
Problem:    Still using __ARGS.
Solution:   Remove __ARGS in several files. (script by Hirohito Higashi)

											
										
										
											2016-01-29 22:47:03 +01:00
+								static void	initchr(char_u *);
 								static int	getchr(void);
 								static void	skipchr_keepstart(void);
 								static int	peekchr(void);
 								static void	skipchr(void);
 								static void	ungetchr(void);
-												patch 8.0.1254: undefined left shift in gethexchrs()

Problem:    Undefined left shift in gethexchrs(). (geeknik)
Solution:   Use unsigned long. (idea by Christian Brabandt, closes #2255)

											
										
										
											2017-11-02 22:29:38 +01:00
+								static long	gethexchrs(int maxinputlen);
 								static long	getoctchrs(void);
 								static long	getdecchrs(void);
-												patch 7.4.1199
Problem:    Still using __ARGS.
Solution:   Remove __ARGS in several files. (script by Hirohito Higashi)

											
										
										
											2016-01-29 22:47:03 +01:00
+								static int	coll_get_char(void);
 								static int	prog_magic_wrong(void);
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								static int	cstrncmp(char_u *s1, char_u *s2, int *n);
 								static char_u	*cstrchr(char_u *, int);
 								static int	re_mult_next(char *what);
-												patch 8.1.0862: no verbose version of character classes

Problem:    No verbose version of character classes.
Solution:   Add [:ident:], [:keyword:] and [:fname:]. (Ozaki Kiichi,
            closes #1373)

											
										
										
											2019-01-31 15:34:40 +01:00
+								static int	reg_iswordc(int);
-												patch 8.2.2278: falling back to old regexp engine can some patterns

Problem:    Falling back to old regexp engine can some patterns.
Solution:   Do not fall back once [[:lower:]] or [[:upper:]] is used.
            (Christian Brabandt, closes #7572)

											
										
										
											2021-01-02 17:43:49 +01:00
+								#ifdef FEAT_EVAL
 								static void report_re_switch(char_u *pat);
 								#endif
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								static regengine_T bt_regengine;
 								static regengine_T nfa_regengine;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								/*
 								 * Return TRUE if compiled regular expression "prog" can match a line break.
 								 */
 								    int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								re_multiline(regprog_T *prog)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
 								    return (prog->regflags & RF_HASNL);
 								}
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+								/*
 								 * Check for an equivalence class name "[=a=]".  "pp" points to the '['.
 								 * Returns a character representing the class. Zero means that no item was
 								 * recognized.  Otherwise "pp" is advanced to after the item.
 								 */
 								    static int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								get_equi_class(char_u **pp)
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+								{
 								    int		c;
 								    int		l = 1;
 								    char_u	*p = *pp;
-												patch 8.1.0934: invalid memory access in search pattern

Problem:    Invalid memory access in search pattern. (Kuang-che Wu)
Solution:   Check for incomplete equivalence class. (closes #3970)

											
										
										
											2019-02-16 17:07:47 +01:00
+								    if (p[1] == '=' && p[2] != NUL)
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+								    {
 									if (has_mbyte)
-												updated for version 7.0127

											
										
										
											2005-08-10 21:07:57 +00:00
+									    l = (*mb_ptr2len)(p + 2);
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+									if (p[l + 2] == '=' && p[l + 3] == ']')
 									{
 									    if (has_mbyte)
 										c = mb_ptr2char(p + 2);
 									    else
 										c = p[2];
 									    *pp += l + 4;
 									    return c;
 									}
 								    }
 								    return 0;
 								}
 								/*
 								 * Check for a collating element "[.a.]".  "pp" points to the '['.
 								 * Returns a character. Zero means that no item was recognized.  Otherwise
 								 * "pp" is advanced to after the item.
 								 * Currently only single characters are recognized!
 								 */
 								    static int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								get_coll_element(char_u **pp)
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+								{
 								    int		c;
 								    int		l = 1;
 								    char_u	*p = *pp;
-												patch 8.1.0937: invalid memory access in search pattern

Problem:    Invalid memory access in search pattern. (Kuang-che Wu)
Solution:   Check for incomplete collation element. (Dominique Pelle,
            closes #3985)

											
										
										
											2019-02-17 13:53:34 +01:00
+								    if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+								    {
 									if (has_mbyte)
-												updated for version 7.0127

											
										
										
											2005-08-10 21:07:57 +00:00
+									    l = (*mb_ptr2len)(p + 2);
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+									if (p[l + 2] == '.' && p[l + 3] == ']')
 									{
 									    if (has_mbyte)
 										c = mb_ptr2char(p + 2);
 									    else
 										c = p[2];
 									    *pp += l + 4;
 									    return c;
 									}
 								    }
 								    return 0;
 								}
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								static int reg_cpo_lit; // 'cpoptions' contains 'l' flag
 								static int reg_cpo_bsl; // 'cpoptions' contains '\' flag
-												updated for version 7.3.1119
Problem:    Flags in 'cpo' are search for several times.
Solution:   Store the result and re-use the flags.

											
										
										
											2013-06-05 12:43:09 +02:00
 								    static void
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								get_cpo_flags(void)
-												updated for version 7.3.1119
Problem:    Flags in 'cpo' are search for several times.
Solution:   Store the result and re-use the flags.

											
										
										
											2013-06-05 12:43:09 +02:00
+								{
 								    reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
 								    reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
 								}
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
 								/*
 								 * Skip over a "[]" range.
 								 * "p" must point to the character after the '['.
 								 * The returned pointer is on the matching ']', or the terminating NUL.
 								 */
 								    static char_u *
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								skip_anyof(char_u *p)
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+								{
 								    int		l;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    if (*p == '^')	// Complement of range.
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+									++p;
 								    if (*p == ']' || *p == '-')
 									++p;
 								    while (*p != NUL && *p != ']')
 								    {
-												updated for version 7.0127

											
										
										
											2005-08-10 21:07:57 +00:00
+									if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+									    p += l;
 									else
 									    if (*p == '-')
 									    {
 										++p;
 										if (*p != ']' && *p != NUL)
-												patch 8.0.0451: some macros are in lower case

Problem:    Some macros are in lower case.
Solution:   Make a few more macros upper case. Avoid lower case macros use an
            argument twice.

											
										
										
											2017-03-12 19:22:36 +01:00
+										    MB_PTR_ADV(p);
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+									    }
 									else if (*p == '\\'
-												updated for version 7.3.1119
Problem:    Flags in 'cpo' are search for several times.
Solution:   Store the result and re-use the flags.

											
										
										
											2013-06-05 12:43:09 +02:00
+										&& !reg_cpo_bsl
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+										&& (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
-												updated for version 7.3.1119
Problem:    Flags in 'cpo' are search for several times.
Solution:   Store the result and re-use the flags.

											
										
										
											2013-06-05 12:43:09 +02:00
+										    || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+									    p += 2;
 									else if (*p == '[')
 									{
 									    if (get_char_class(&p) == CLASS_NONE
 										    && get_equi_class(&p) == 0
-												patch 7.4.736
Problem:    Invalid memory access.
Solution:   Avoid going over the end of a NUL terminated string. (Dominique
            Pelle)

											
										
										
											2015-06-09 20:39:24 +02:00
+										    && get_coll_element(&p) == 0
 										    && *p != NUL)
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										++p; // it is not a class name and not NUL
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
+									}
 									else
 									    ++p;
 								    }
 								    return p;
 								}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								/*
 								 * Skip past regular expression.
-												patch 8.2.0612: Vim9: no check for space before #comment

Problem:    Vim9: no check for space before #comment.
Solution:   Add space checks.

											
										
										
											2020-04-20 19:42:10 +02:00
+								 * Stop at end of "startp" or where "delim" is found ('/', '?', etc).
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 * Take care of characters with a backslash in front of it.
 								 * Skip strings inside [ and ].
-												patch 8.2.0502: Vim9: some code is not tested

Problem:    Vim9: some code is not tested.
Solution:   Add more tests.  Fix uncovered problems.

											
										
										
											2020-04-02 21:13:25 +02:00
+								 */
 								    char_u *
 								skip_regexp(
 								    char_u	*startp,
-												patch 8.2.0612: Vim9: no check for space before #comment

Problem:    Vim9: no check for space before #comment.
Solution:   Add space checks.

											
										
										
											2020-04-20 19:42:10 +02:00
+								    int		delim,
-												patch 8.2.0502: Vim9: some code is not tested

Problem:    Vim9: some code is not tested.
Solution:   Add more tests.  Fix uncovered problems.

											
										
										
											2020-04-02 21:13:25 +02:00
+								    int		magic)
 								{
-												patch 8.2.2295: incsearch does not detect empty pattern properly

Problem:    Incsearch does not detect empty pattern properly.
Solution:   Return magic state when skipping over a pattern. (Christian
            Brabandt, closes #7612, closes #6420)

											
										
										
											2021-01-04 12:42:13 +01:00
+								    return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
-												patch 8.2.0612: Vim9: no check for space before #comment

Problem:    Vim9: no check for space before #comment.
Solution:   Add space checks.

											
										
										
											2020-04-20 19:42:10 +02:00
+								}
 								/*
 								 * Call skip_regexp() and when the delimiter does not match give an error and
 								 * return NULL.
 								 */
 								    char_u *
 								skip_regexp_err(
 								    char_u	*startp,
 								    int		delim,
 								    int		magic)
 								{
 								    char_u *p = skip_regexp(startp, delim, magic);
 								    if (*p != delim)
 								    {
-												patch 8.2.4005: error messages are spread out

Problem:    Error messages are spread out.
Solution:   Move more error messages to errors.h.

											
										
										
											2022-01-04 21:30:47 +00:00
+									semsg(_(e_missing_delimiter_after_search_pattern_str), startp);
-												patch 8.2.0612: Vim9: no check for space before #comment

Problem:    Vim9: no check for space before #comment.
Solution:   Add space checks.

											
										
										
											2020-04-20 19:42:10 +02:00
+									return NULL;
 								    }
 								    return p;
-												patch 8.2.0502: Vim9: some code is not tested

Problem:    Vim9: some code is not tested.
Solution:   Add more tests.  Fix uncovered problems.

											
										
										
											2020-04-02 21:13:25 +02:00
+								}
 								/*
 								 * skip_regexp() with extra arguments:
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
 								 * expression and change "\?" to "?".  If "*newp" is not NULL the expression
 								 * is changed in-place.
-												patch 8.2.0502: Vim9: some code is not tested

Problem:    Vim9: some code is not tested.
Solution:   Add more tests.  Fix uncovered problems.

											
										
										
											2020-04-02 21:13:25 +02:00
+								 * If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
-												patch 8.2.2295: incsearch does not detect empty pattern properly

Problem:    Incsearch does not detect empty pattern properly.
Solution:   Return magic state when skipping over a pattern. (Christian
            Brabandt, closes #7612, closes #6420)

											
										
										
											2021-01-04 12:42:13 +01:00
+								 * If "magic_val" is not NULL, returns the effective magicness of the pattern
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 */
 								    char_u *
-												patch 8.2.0502: Vim9: some code is not tested

Problem:    Vim9: some code is not tested.
Solution:   Add more tests.  Fix uncovered problems.

											
										
										
											2020-04-02 21:13:25 +02:00
+								skip_regexp_ex(
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								    char_u	*startp,
 								    int		dirc,
 								    int		magic,
-												patch 8.2.0502: Vim9: some code is not tested

Problem:    Vim9: some code is not tested.
Solution:   Add more tests.  Fix uncovered problems.

											
										
										
											2020-04-02 21:13:25 +02:00
+								    char_u	**newp,
-												patch 8.2.2295: incsearch does not detect empty pattern properly

Problem:    Incsearch does not detect empty pattern properly.
Solution:   Return magic state when skipping over a pattern. (Christian
            Brabandt, closes #7612, closes #6420)

											
										
										
											2021-01-04 12:42:13 +01:00
+								    int		*dropped,
 								    magic_T	*magic_val)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 8.2.2295: incsearch does not detect empty pattern properly

Problem:    Incsearch does not detect empty pattern properly.
Solution:   Return magic state when skipping over a pattern. (Christian
            Brabandt, closes #7612, closes #6420)

											
										
										
											2021-01-04 12:42:13 +01:00
+								    magic_T	mymagic;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    char_u	*p = startp;
-												patch 9.1.0438: Wrong Ex command executed when :g uses '?' as delimiter

Problem:  Wrong Ex command executed when :g uses '?' as delimiter and
          pattern contains escaped '?'.
Solution: Don't use "*newp" when it's not allocated (zeertzjq).

closes: #14837

Signed-off-by: zeertzjq <zeertzjq@outlook.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-24 07:37:36 +02:00
+								    size_t	startplen = 0;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
 								    if (magic)
 									mymagic = MAGIC_ON;
 								    else
 									mymagic = MAGIC_OFF;
-												updated for version 7.3.1119
Problem:    Flags in 'cpo' are search for several times.
Solution:   Store the result and re-use the flags.

											
										
										
											2013-06-05 12:43:09 +02:00
+								    get_cpo_flags();
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.0.0451: some macros are in lower case

Problem:    Some macros are in lower case.
Solution:   Make a few more macros upper case. Avoid lower case macros use an
            argument twice.

											
										
										
											2017-03-12 19:22:36 +01:00
+								    for (; p[0] != NUL; MB_PTR_ADV(p))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									if (p[0] == dirc)	// found end of regexp
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    break;
 									if ((p[0] == '[' && mymagic >= MAGIC_ON)
 										|| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
 									{
 									    p = skip_anyof(p + 1);
 									    if (p[0] == NUL)
 										break;
 									}
 									else if (p[0] == '\\' && p[1] != NUL)
 									{
 									    if (dirc == '?' && newp != NULL && p[1] == '?')
 									    {
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										// change "\?" to "?", make a copy first.
-												patch 9.1.0438: Wrong Ex command executed when :g uses '?' as delimiter

Problem:  Wrong Ex command executed when :g uses '?' as delimiter and
          pattern contains escaped '?'.
Solution: Don't use "*newp" when it's not allocated (zeertzjq).

closes: #14837

Signed-off-by: zeertzjq <zeertzjq@outlook.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-24 07:37:36 +02:00
+										if (startplen == 0)
 										    startplen = STRLEN(startp);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										if (*newp == NULL)
 										{
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+										    *newp = vim_strnsave(startp, startplen);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										    if (*newp != NULL)
-												patch 9.1.0438: Wrong Ex command executed when :g uses '?' as delimiter

Problem:  Wrong Ex command executed when :g uses '?' as delimiter and
          pattern contains escaped '?'.
Solution: Don't use "*newp" when it's not allocated (zeertzjq).

closes: #14837

Signed-off-by: zeertzjq <zeertzjq@outlook.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-24 07:37:36 +02:00
+										    {
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											p = *newp + (p - startp);
-												patch 9.1.0438: Wrong Ex command executed when :g uses '?' as delimiter

Problem:  Wrong Ex command executed when :g uses '?' as delimiter and
          pattern contains escaped '?'.
Solution: Don't use "*newp" when it's not allocated (zeertzjq).

closes: #14837

Signed-off-by: zeertzjq <zeertzjq@outlook.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-24 07:37:36 +02:00
+											startp = *newp;
 										    }
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										}
-												patch 8.2.0502: Vim9: some code is not tested

Problem:    Vim9: some code is not tested.
Solution:   Add more tests.  Fix uncovered problems.

											
										
										
											2020-04-02 21:13:25 +02:00
+										if (dropped != NULL)
 										    ++*dropped;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										if (*newp != NULL)
-												patch 9.1.0438: Wrong Ex command executed when :g uses '?' as delimiter

Problem:  Wrong Ex command executed when :g uses '?' as delimiter and
          pattern contains escaped '?'.
Solution: Don't use "*newp" when it's not allocated (zeertzjq).

closes: #14837

Signed-off-by: zeertzjq <zeertzjq@outlook.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-24 07:37:36 +02:00
+										    mch_memmove(p, p + 1, startplen - ((p + 1) - startp) + 1);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										else
 										    ++p;
 									    }
 									    else
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										++p;    // skip next character
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    if (*p == 'v')
 										mymagic = MAGIC_ALL;
 									    else if (*p == 'V')
 										mymagic = MAGIC_NONE;
 									}
 								    }
-												patch 8.2.2295: incsearch does not detect empty pattern properly

Problem:    Incsearch does not detect empty pattern properly.
Solution:   Return magic state when skipping over a pattern. (Christian
            Brabandt, closes #7612, closes #6420)

											
										
										
											2021-01-04 12:42:13 +01:00
+								    if (magic_val != NULL)
 									*magic_val = mymagic;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    return p;
 								}
-												patch 8.0.0645: no error for illegal back reference in NFA engine

Problem:    The new regexp engine does not give an error for using a back
            reference where it is not allowed. (Dominique Pelle)
Solution:   Check the back reference like the old engine. (closes #1774)

											
										
										
											2017-06-17 20:08:20 +02:00
+								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * Functions for getting characters from the regexp input.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 */
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								static int	prevchr_len;	// byte length of previous char
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								static int	at_start;	// True when on the first character
 								static int	prev_at_start;  // True when on the second character
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												updated for version 7.3.1149
Problem:    New regexp engine: Matching plain text could be faster.
Solution:   Detect a plain text match and handle it specifically.  Add
            vim_regfree().

											
										
										
											2013-06-08 18:19:48 +02:00
+								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * Start parsing at "str".
-												updated for version 7.3.1149
Problem:    New regexp engine: Matching plain text could be faster.
Solution:   Detect a plain text match and handle it specifically.  Add
            vim_regfree().

											
										
										
											2013-06-08 18:19:48 +02:00
+								 */
 								    static void
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								initchr(char_u *str)
-												updated for version 7.3.1149
Problem:    New regexp engine: Matching plain text could be faster.
Solution:   Detect a plain text match and handle it specifically.  Add
            vim_regfree().

											
										
										
											2013-06-08 18:19:48 +02:00
+								{
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    regparse = str;
 								    prevchr_len = 0;
 								    curchr = prevprevchr = prevchr = nextchr = -1;
 								    at_start = TRUE;
 								    prev_at_start = FALSE;
-												updated for version 7.3.1149
Problem:    New regexp engine: Matching plain text could be faster.
Solution:   Detect a plain text match and handle it specifically.  Add
            vim_regfree().

											
										
										
											2013-06-08 18:19:48 +02:00
+								}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * Save the current parse state, so that it can be restored and parsing
 								 * starts in the same state again.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 */
 								    static void
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								save_parse_state(parse_state_T *ps)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    ps->regparse = regparse;
 								    ps->prevchr_len = prevchr_len;
 								    ps->curchr = curchr;
 								    ps->prevchr = prevchr;
 								    ps->prevprevchr = prevprevchr;
 								    ps->nextchr = nextchr;
 								    ps->at_start = at_start;
 								    ps->prev_at_start = prev_at_start;
 								    ps->regnpar = regnpar;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
 								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * Restore a previously saved parse state.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 */
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    static void
 								restore_parse_state(parse_state_T *ps)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    regparse = ps->regparse;
 								    prevchr_len = ps->prevchr_len;
 								    curchr = ps->curchr;
 								    prevchr = ps->prevchr;
 								    prevprevchr = ps->prevprevchr;
 								    nextchr = ps->nextchr;
 								    at_start = ps->at_start;
 								    prev_at_start = ps->prev_at_start;
 								    regnpar = ps->regnpar;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
 								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * Get the next character without advancing.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 */
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    static int
 								peekchr(void)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    static int	after_slash = FALSE;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 9.0.1221: code is indented more than necessary

Problem:    Code is indented more than necessary.
Solution:   Use an early return where it makes sense. (Yegappan Lakshmanan,
            closes #11833)

											
										
										
											2023-01-18 18:17:48 +00:00
+								    if (curchr != -1)
 									return curchr;
 								    switch (curchr = regparse[0])
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									case '.':
 									case '[':
 									case '~':
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									    // magic when 'magic' is on
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									    if (reg_magic >= MAGIC_ON)
 										curchr = Magic(curchr);
 									    break;
 									case '(':
 									case ')':
 									case '{':
 									case '%':
 									case '+':
 									case '=':
 									case '?':
 									case '@':
 									case '!':
 									case '&':
 									case '|':
 									case '<':
 									case '>':
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									case '#':	// future ext.
 									case '"':	// future ext.
 									case '\'':	// future ext.
 									case ',':	// future ext.
 									case '-':	// future ext.
 									case ':':	// future ext.
 									case ';':	// future ext.
 									case '`':	// future ext.
 									case '/':	// Can't be used in / command
-												patch 9.0.1221: code is indented more than necessary

Problem:    Code is indented more than necessary.
Solution:   Use an early return where it makes sense. (Yegappan Lakshmanan,
            closes #11833)

											
										
										
											2023-01-18 18:17:48 +00:00
+											// magic only after "\v"
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									    if (reg_magic == MAGIC_ALL)
 										curchr = Magic(curchr);
 									    break;
 									case '*':
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									    // * is not magic as the very first character, eg "?*ptr", when
 									    // after '^', eg "/^*ptr" and when after "\(", "\|", "\&".  But
 									    // "\(\*" is not magic, thus must be magic if "after_slash"
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									    if (reg_magic >= MAGIC_ON
 										    && !at_start
 										    && !(prev_at_start && prevchr == Magic('^'))
 										    && (after_slash
 											|| (prevchr != Magic('(')
 											    && prevchr != Magic('&')
 											    && prevchr != Magic('|'))))
 										curchr = Magic('*');
 									    break;
 									case '^':
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									    // '^' is only magic as the very first character and if it's after
 									    // "\(", "\|", "\&' or "\n"
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									    if (reg_magic >= MAGIC_OFF
 										    && (at_start
 											|| reg_magic == MAGIC_ALL
 											|| prevchr == Magic('(')
 											|| prevchr == Magic('|')
 											|| prevchr == Magic('&')
 											|| prevchr == Magic('n')
 											|| (no_Magic(prevchr) == '('
 											    && prevprevchr == Magic('%'))))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    {
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+										curchr = Magic('^');
 										at_start = TRUE;
 										prev_at_start = FALSE;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    }
 									    break;
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									case '$':
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									    // '$' is only magic as the very last char and if it's in front of
 									    // either "\|", "\)", "\&", or "\n"
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									    if (reg_magic >= MAGIC_OFF)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    {
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+										char_u *p = regparse + 1;
 										int is_magic_all = (reg_magic == MAGIC_ALL);
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										// ignore \c \C \m \M \v \V and \Z after '$'
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+										while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
-												patch 9.0.1221: code is indented more than necessary

Problem:    Code is indented more than necessary.
Solution:   Use an early return where it makes sense. (Yegappan Lakshmanan,
            closes #11833)

											
										
										
											2023-01-18 18:17:48 +00:00
+											    || p[1] == 'm' || p[1] == 'M'
 											    || p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+										{
 										    if (p[1] == 'v')
 											is_magic_all = TRUE;
 										    else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
 											is_magic_all = FALSE;
 										    p += 2;
 										}
 										if (p[0] == NUL
 											|| (p[0] == '\\'
 											    && (p[1] == '|' || p[1] == '&' || p[1] == ')'
 												|| p[1] == 'n'))
 											|| (is_magic_all
-												patch 9.0.1221: code is indented more than necessary

Problem:    Code is indented more than necessary.
Solution:   Use an early return where it makes sense. (Yegappan Lakshmanan,
            closes #11833)

											
										
										
											2023-01-18 18:17:48 +00:00
+											    && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+											|| reg_magic == MAGIC_ALL)
 										    curchr = Magic('$');
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    }
 									    break;
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									case '\\':
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    {
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+										int c = regparse[1];
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+										if (c == NUL)
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										    curchr = '\\';	// trailing '\'
-												patch 8.2.4273: the EBCDIC support is outdated

Problem:    The EBCDIC support is outdated.
Solution:   Remove the EBCDIC support.

											
										
										
											2022-01-31 14:59:41 +00:00
+										else if (c <= '~' && META_flags[c])
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										{
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+										    /*
 										     * META contains everything that may be magic sometimes,
 										     * except ^ and $ ("\^" and "\$" are only magic after
 										     * "\V").  We now fetch the next character and toggle its
 										     * magicness.  Therefore, \ is so meta-magic that it is
 										     * not in META.
 										     */
 										    curchr = -1;
 										    prev_at_start = at_start;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										    at_start = FALSE;	// be able to say "/\*ptr"
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+										    ++regparse;
 										    ++after_slash;
 										    peekchr();
 										    --regparse;
 										    --after_slash;
 										    curchr = toggle_Magic(curchr);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										}
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+										else if (vim_strchr(REGEXP_ABBR, c))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										{
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+										    /*
 										     * Handle abbreviations, like "\t" for TAB -- webb
 										     */
 										    curchr = backslash_trans(c);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										}
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+										else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
 										    curchr = toggle_Magic(c);
 										else
-												updated for version 7.3.1037
Problem:    Look-behind matching is very slow on long lines.
Solution:   Add a byte limit to how far back an attempt is made.

											
										
										
											2013-05-29 18:45:11 +02:00
+										{
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+										    /*
 										     * Next character can never be (made) magic?
 										     * Then backslashing it won't do anything.
 										     */
 										    if (has_mbyte)
 											curchr = (*mb_ptr2char)(regparse + 1);
 										    else
 											curchr = c;
-												updated for version 7.3.1037
Problem:    Look-behind matching is very slow on long lines.
Solution:   Add a byte limit to how far back an attempt is made.

											
										
										
											2013-05-29 18:45:11 +02:00
+										}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										break;
 									    }
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									default:
 									    if (has_mbyte)
 										curchr = (*mb_ptr2char)(regparse);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    }
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    return curchr;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
 								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * Eat one lexed character.  Do this in a way that we can undo it.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 */
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    static void
 								skipchr(void)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    // peekchr() eats a backslash, do the same here
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    if (*regparse == '\\')
 									prevchr_len = 1;
 								    else
 									prevchr_len = 0;
 								    if (regparse[prevchr_len] != NUL)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									if (enc_utf8)
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									    // exclude composing chars that mb_ptr2len does include
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									    prevchr_len += utf_ptr2len(regparse + prevchr_len);
 									else if (has_mbyte)
 									    prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
 									else
 									    ++prevchr_len;
 								    }
 								    regparse += prevchr_len;
 								    prev_at_start = at_start;
 								    at_start = FALSE;
 								    prevprevchr = prevchr;
 								    prevchr = curchr;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    curchr = nextchr;	    // use previously unget char, or -1
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    nextchr = -1;
 								}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								/*
 								 * Skip a character while keeping the value of prev_at_start for at_start.
 								 * prevchr and prevprevchr are also kept.
 								 */
 								    static void
 								skipchr_keepstart(void)
 								{
 								    int as = prev_at_start;
 								    int pr = prevchr;
 								    int prpr = prevprevchr;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    skipchr();
 								    at_start = as;
 								    prevchr = pr;
 								    prevprevchr = prpr;
 								}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								/*
 								 * Get the next character from the pattern. We know about magic and such, so
 								 * therefore we need a lexical analyzer.
 								 */
 								    static int
 								getchr(void)
 								{
 								    int chr = peekchr();
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    skipchr();
 								    return chr;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
-												updated for version 7.0216

											
										
										
											2006-03-06 23:29:24 +00:00
+								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * put character back.  Works only once!
-												updated for version 7.0216

											
										
										
											2006-03-06 23:29:24 +00:00
+								 */
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    static void
 								ungetchr(void)
-												updated for version 7.0216

											
										
										
											2006-03-06 23:29:24 +00:00
+								{
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    nextchr = curchr;
 								    curchr = prevchr;
 								    prevchr = prevprevchr;
 								    at_start = prev_at_start;
 								    prev_at_start = FALSE;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    // Backup regparse, so that it's at the same position as before the
 								    // getchr().
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    regparse -= prevchr_len;
-												updated for version 7.0216

											
										
										
											2006-03-06 23:29:24 +00:00
+								}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * Get and return the value of the hex string at the current position.
 								 * Return -1 if there is no valid hex number.
 								 * The position is updated:
 								 *     blahblah\%x20asdf
 								 *	   before-^ ^-after
 								 * The parameter controls the maximum number of input characters. This will be
 								 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 */
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    static long
 								gethexchrs(int maxinputlen)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    long_u	nr = 0;
 								    int		c;
 								    int		i;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    for (i = 0; i < maxinputlen; ++i)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									c = regparse[0];
 									if (!vim_isxdigit(c))
 									    break;
 									nr <<= 4;
 									nr |= hex2nr(c);
 									++regparse;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    }
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    if (i == 0)
 									return -1;
 								    return (long)nr;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
 								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * Get and return the value of the decimal string immediately after the
 								 * current position. Return -1 for invalid.  Consumes all digits.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 */
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    static long
 								getdecchrs(void)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    long_u	nr = 0;
 								    int		c;
 								    int		i;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    for (i = 0; ; ++i)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									c = regparse[0];
 									if (c < '0' || c > '9')
 									    break;
 									nr *= 10;
 									nr += c - '0';
 									++regparse;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									curchr = -1; // no longer valid
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    }
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
 								    if (i == 0)
 									return -1;
 								    return (long)nr;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
-												updated for version 7.3.1037
Problem:    Look-behind matching is very slow on long lines.
Solution:   Add a byte limit to how far back an attempt is made.

											
										
										
											2013-05-29 18:45:11 +02:00
+								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * get and return the value of the octal string immediately after the current
 								 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
 								 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
 								 * treat 8 or 9 as recognised characters. Position is updated:
 								 *     blahblah\%o210asdf
 								 *	   before-^  ^-after
-												updated for version 7.3.1037
Problem:    Look-behind matching is very slow on long lines.
Solution:   Add a byte limit to how far back an attempt is made.

											
										
										
											2013-05-29 18:45:11 +02:00
+								 */
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    static long
 								getoctchrs(void)
-												updated for version 7.3.1037
Problem:    Look-behind matching is very slow on long lines.
Solution:   Add a byte limit to how far back an attempt is made.

											
										
										
											2013-05-29 18:45:11 +02:00
+								{
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    long_u	nr = 0;
 								    int		c;
 								    int		i;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    for (i = 0; i < 3 && nr < 040; ++i)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									c = regparse[0];
 									if (c < '0' || c > '7')
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    break;
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									nr <<= 3;
 									nr |= hex2nr(c);
 									++regparse;
-												updated for version 7.2-307

											
										
										
											2009-11-25 17:21:32 +00:00
+								    }
-												updated for version 7.3.1087
Problem:    A leading star is not seen as a normal char when \{} follows.
Solution:   Save and restore the parse state properly.

											
										
										
											2013-06-01 14:42:56 +02:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    if (i == 0)
 									return -1;
 								    return (long)nr;
-												updated for version 7.3.1087
Problem:    A leading star is not seen as a normal char when \{} follows.
Solution:   Save and restore the parse state properly.

											
										
										
											2013-06-01 14:42:56 +02:00
+								}
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * read_limits - Read two integers to be taken as a minimum and maximum.
 								 * If the first character is '-', then the range is reversed.
 								 * Should end with 'end'.  If minval is missing, zero is default, if maxval is
 								 * missing, a very big number is the default.
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								 */
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    static int
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								read_limits(long *minval, long *maxval)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    int		reverse = FALSE;
 								    char_u	*first_char;
 								    long	tmp;
-												updated for version 7.0051

											
										
										
											2005-02-22 08:39:57 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    if (*regparse == '-')
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									// Starts with '-', so reverse the range later
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									regparse++;
 									reverse = TRUE;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    }
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    first_char = regparse;
 								    *minval = getdigits(&regparse);
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    if (*regparse == ',')	    // There is a comma
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    {
 									if (vim_isdigit(*++regparse))
 									    *maxval = getdigits(&regparse);
 									else
 									    *maxval = MAX_LIMIT;
 								    }
 								    else if (VIM_ISDIGIT(*first_char))
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									*maxval = *minval;	    // It was \{n} or \{-n}
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    else
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									*maxval = MAX_LIMIT;	    // It was \{} or \{-}
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    if (*regparse == '\\')
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									regparse++;	// Allow either \{...} or \{...\}
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    if (*regparse != '}')
-												patch 8.2.3987: error messages are spread out

Problem:    Error messages are spread out.
Solution:   Move more error messages to errors.h.

											
										
										
											2022-01-02 21:26:16 +00:00
+									EMSG2_RET_FAIL(_(e_syntax_error_in_str_curlies),
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+														       reg_magic == MAGIC_ALL);
-												updated for version 7.0056

											
										
										
											2005-03-06 23:38:09 +00:00
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    /*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								     * Reverse the range if there was a '-', or make sure it is in the right
 								     * order otherwise.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								     */
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
-												updated for version 7.0056

											
										
										
											2005-03-06 23:38:09 +00:00
+								    {
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									tmp = *minval;
 									*minval = *maxval;
 									*maxval = tmp;
-												updated for version 7.0056

											
										
										
											2005-03-06 23:38:09 +00:00
+								    }
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    skipchr();		// let's be friends with the lexer again
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    return OK;
-												updated for version 7.0056

											
										
										
											2005-03-06 23:38:09 +00:00
+								}
 								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * vim_regexec and friends
-												updated for version 7.0056

											
										
										
											2005-03-06 23:38:09 +00:00
+								 */
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								/*
 								 * Global work variables for vim_regexec().
 								 */
-												updated for version 7.0056

											
										
										
											2005-03-06 23:38:09 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								static void	cleanup_subexpr(void);
 								#ifdef FEAT_SYN_HL
 								static void	cleanup_zsubexpr(void);
 								#endif
 								static int	match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
-												updated for version 7.0056

											
										
										
											2005-03-06 23:38:09 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								/*
 								 * Sometimes need to save a copy of a line.  Since alloc()/free() is very
 								 * slow, we keep one allocated piece of memory and only re-allocate it when
 								 * it's too small.  It's freed in bt_regexec_both() when finished.
 								 */
 								static char_u	*reg_tofree = NULL;
 								static unsigned	reg_tofreelen;
-												updated for version 7.0056

											
										
										
											2005-03-06 23:38:09 +00:00
 								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * Structure used to store the execution state of the regex engine.
 								 * Which ones are set depends on whether a single-line or multi-line match is
 								 * done:
 								 *			single-line		multi-line
 								 * reg_match		&regmatch_T		NULL
 								 * reg_mmatch		NULL			&regmmatch_T
 								 * reg_startp		reg_match->startp	<invalid>
 								 * reg_endp		reg_match->endp		<invalid>
 								 * reg_startpos		<invalid>		reg_mmatch->startpos
 								 * reg_endpos		<invalid>		reg_mmatch->endpos
 								 * reg_win		NULL			window in which to search
 								 * reg_buf		curbuf			buffer in which to search
 								 * reg_firstlnum	<invalid>		first line in which to search
 								 * reg_maxline		0			last line nr
 								 * reg_line_lbr		FALSE or TRUE		FALSE
-												updated for version 7.0056

											
										
										
											2005-03-06 23:38:09 +00:00
+								 */
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								typedef struct {
 								    regmatch_T		*reg_match;
 								    regmmatch_T		*reg_mmatch;
-												patch 9.0.0951: trying every character position for a match is inefficient

Problem:    Trying every character position for a match is inefficient.
Solution:   Use the start position of the match ignoring "\zs".

											
										
										
											2022-11-26 11:47:10 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    char_u		**reg_startp;
 								    char_u		**reg_endp;
 								    lpos_T		*reg_startpos;
 								    lpos_T		*reg_endpos;
-												patch 9.0.0951: trying every character position for a match is inefficient

Problem:    Trying every character position for a match is inefficient.
Solution:   Use the start position of the match ignoring "\zs".

											
										
										
											2022-11-26 11:47:10 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    win_T		*reg_win;
 								    buf_T		*reg_buf;
 								    linenr_T		reg_firstlnum;
 								    linenr_T		reg_maxline;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    int			reg_line_lbr;	// "\n" in string is line break
-												updated for version 7.0056

											
										
										
											2005-03-06 23:38:09 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    // The current match-position is stord in these variables:
 								    linenr_T	lnum;		// line number, relative to first line
 								    char_u	*line;		// start of current line
-												patch 8.2.3612: using freed memory with regexp using a mark

Problem:    Using freed memory with regexp using a mark.
Solution:   Get the line again after getting the mark position.

											
										
										
											2021-11-17 18:22:56 +00:00
+								    char_u	*input;		// current input, points into "line"
-												updated for version 7.0056

											
										
										
											2005-03-06 23:38:09 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    int	need_clear_subexpr;	// subexpressions still need to be cleared
 								#ifdef FEAT_SYN_HL
 								    int	need_clear_zsubexpr;	// extmatch subexpressions still need to be
 												// cleared
 								#endif
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    // Internal copy of 'ignorecase'.  It is set at each call to vim_regexec().
 								    // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
 								    // contains '\c' or '\C' the value is overruled.
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    int			reg_ic;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    // Similar to "reg_ic", but only for 'combining' characters.  Set with \Z
 								    // flag in the regexp.  Defaults to false, always.
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    int			reg_icombine;
-												patch 8.1.0809: too many #ifdefs

Problem:    Too many #ifdefs.
Solution:   Graduate FEAT_MBYTE, part 3.

											
										
										
											2019-01-24 16:39:02 +01:00
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    // Copy of "rmm_maxcol": maximum column to search for a match.  Zero when
 								    // there is no maximum.
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    colnr_T		reg_maxcol;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    // State for the NFA engine regexec.
 								    int nfa_has_zend;	    // NFA regexp \ze operator encountered.
 								    int nfa_has_backref;    // NFA regexp \1 .. \9 encountered.
 								    int nfa_nsubexpr;	    // Number of sub expressions actually being used
 											    // during execution. 1 if only the whole match
 											    // (subexpr 0) is used.
 								    // listid is global, so that it increases on recursive calls to
 								    // nfa_regmatch(), which means we don't have to clear the lastlist field of
 								    // all the states.
 								    int nfa_listid;
 								    int nfa_alt_listid;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								#ifdef FEAT_SYN_HL
 								    int nfa_has_zsubexpr;   // NFA regexp has \z( ), set zsubexpr.
 								#endif
 								} regexec_T;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								static regexec_T	rex;
 								static int		rex_in_use = FALSE;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								/*
 								 * Return TRUE if character 'c' is included in 'iskeyword' option for
 								 * "reg_buf" buffer.
 								 */
 								    static int
 								reg_iswordc(int c)
 								{
 								    return vim_iswordc_buf(c, rex.reg_buf);
 								}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+								#ifdef FEAT_EVAL
 								static int can_f_submatch = FALSE;	// TRUE when submatch() can be used
 								// This struct is used for reg_submatch(). Needed for when the
 								// substitution string is an expression that contains a call to substitute()
 								// and submatch().
 								typedef struct {
 								    regmatch_T	*sm_match;
 								    regmmatch_T	*sm_mmatch;
 								    linenr_T	sm_firstlnum;
 								    linenr_T	sm_maxline;
 								    int		sm_line_lbr;
 								} regsubmatch_T;
 								static regsubmatch_T rsm;  // can only be used when can_f_submatch is TRUE
 								#endif
 								typedef enum
 								{
 								    RGLF_LINE = 0x01,
 								    RGLF_LENGTH = 0x02
 								#ifdef FEAT_EVAL
 								    ,
 								    RGLF_SUBMATCH = 0x04
 								#endif
 								} reg_getline_flags_T;
 								//
 								// common code for reg_getline(), reg_getline_len(), reg_getline_submatch() and
 								// reg_getline_submatch_len().
 								// the flags argument (which is a bitmask) controls what info is to be returned and whether
 								// or not submatch is in effect.
 								// note:
 								//     submatch is available only if FEAT_EVAL is defined.
 								    static void
 								reg_getline_common(linenr_T lnum, reg_getline_flags_T flags, char_u **line, colnr_T *length)
 								{
 								    int get_line = flags & RGLF_LINE;
 								    int get_length = flags & RGLF_LENGTH;
 								    linenr_T firstlnum;
 								    linenr_T maxline;
 								#ifdef FEAT_EVAL
 								    if (flags & RGLF_SUBMATCH)
 								    {
 									firstlnum = rsm.sm_firstlnum + lnum;
 									maxline = rsm.sm_maxline;
 								    }
 								    else
 								#endif
 								    {
 									firstlnum = rex.reg_firstlnum + lnum;
 									maxline = rex.reg_maxline;
 								    }
 								    // when looking behind for a match/no-match lnum is negative. but we
 								    // can't go before line 1.
 								    if (firstlnum < 1)
 								    {
 									if (get_line)
 									    *line = NULL;
 									if (get_length)
 									    *length = 0;
 									return;
 								    }
 								    if (lnum > maxline)
 								    {
 									// must have matched the "\n" in the last line.
 									if (get_line)
 									    *line = (char_u *)"";
 									if (get_length)
 									    *length = 0;
 									return;
 								    }
 								    if (get_line)
 									*line = ml_get_buf(rex.reg_buf, firstlnum, FALSE);
 								    if (get_length)
 									*length = ml_get_buf_len(rex.reg_buf, firstlnum);
 								}
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								/*
 								 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
 								 */
 								    static char_u *
 								reg_getline(linenr_T lnum)
 								{
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+								    char_u *line;
 								    reg_getline_common(lnum, RGLF_LINE, &line, NULL);
 								    return line;
 								}
 								/*
 								 * Get length of line "lnum", which is relative to "reg_firstlnum".
 								 */
 								    static colnr_T
 								reg_getline_len(linenr_T lnum)
 								{
 								    colnr_T length;
 								    reg_getline_common(lnum, RGLF_LENGTH, NULL, &length);
 								    return length;
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								}
-												patch 8.1.0809: too many #ifdefs

Problem:    Too many #ifdefs.
Solution:   Graduate FEAT_MBYTE, part 3.

											
										
										
											2019-01-24 16:39:02 +01:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								#ifdef FEAT_SYN_HL
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								static char_u	*reg_startzp[NSUBEXP];	// Workspace to mark beginning
 								static char_u	*reg_endzp[NSUBEXP];	//   and end of \z(...\) matches
 								static lpos_T	reg_startzpos[NSUBEXP];	// idem, beginning pos
 								static lpos_T	reg_endzpos[NSUBEXP];	// idem, end pos
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								#endif
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								// TRUE if using multi-line regexp.
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								#define REG_MULTI	(rex.reg_match == NULL)
 								#ifdef FEAT_SYN_HL
 								/*
 								 * Create a new extmatch and mark it as referenced once.
 								 */
 								    static reg_extmatch_T *
 								make_extmatch(void)
 								{
 								    reg_extmatch_T	*em;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    em = ALLOC_CLEAR_ONE(reg_extmatch_T);
 								    if (em != NULL)
 									em->refcnt = 1;
 								    return em;
 								}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								/*
 								 * Add a reference to an extmatch.
 								 */
 								    reg_extmatch_T *
 								ref_extmatch(reg_extmatch_T *em)
 								{
 								    if (em != NULL)
 									em->refcnt++;
 								    return em;
 								}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								/*
 								 * Remove a reference to an extmatch.  If there are no references left, free
 								 * the info.
 								 */
 								    void
 								unref_extmatch(reg_extmatch_T *em)
 								{
 								    int i;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    if (em != NULL && --em->refcnt <= 0)
 								    {
 									for (i = 0; i < NSUBEXP; ++i)
 									    vim_free(em->matches[i]);
 									vim_free(em);
 								    }
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								#endif
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
 								/*
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								 * Get class of previous character.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 */
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    static int
 								reg_prev_class(void)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    if (rex.input > rex.line)
 									return mb_get_class_buf(rex.input - 1
 										       - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
 								    return -1;
 								}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								/*
 								 * Return TRUE if the current rex.input position matches the Visual area.
 								 */
 								    static int
 								reg_match_visual(void)
 								{
 								    pos_T	top, bot;
 								    linenr_T    lnum;
 								    colnr_T	col;
 								    win_T	*wp = rex.reg_win == NULL ? curwin : rex.reg_win;
 								    int		mode;
 								    colnr_T	start, end;
 								    colnr_T	start2, end2;
 								    colnr_T	cols;
-												patch 8.2.2911: pattern "\%V" does not match all of block selection

Problem:    Pattern "\%V" does not match all of block selection. (Rick Howe)
Solution:   Use the value of vi_curswant. (closes #8285)

											
										
										
											2021-05-30 16:43:11 +02:00
+								    colnr_T	curswant;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.2.4261: accessing invalid memory in a regular expression

Problem:    Accessing invalid memory when a regular expression checks the
            Visual area while matching in a string.
Solution:   Do not try matching the Visual area in a string.

											
										
										
											2022-01-30 16:42:56 +00:00
+								    // Check if the buffer is the current buffer and not using a string.
-												patch 8.2.4262: some search tests fail

Problem:    Some search tests fail.
Solution:   Use a better way to reject searching for the Visual area.

											
										
										
											2022-01-30 17:17:41 +00:00
+								    if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI)
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									return FALSE;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    if (VIsual_active)
 								    {
 									if (LT_POS(VIsual, wp->w_cursor))
 									{
 									    top = VIsual;
 									    bot = wp->w_cursor;
 									}
 									else
 									{
 									    top = wp->w_cursor;
 									    bot = VIsual;
 									}
 									mode = VIsual_mode;
-												patch 8.2.2911: pattern "\%V" does not match all of block selection

Problem:    Pattern "\%V" does not match all of block selection. (Rick Howe)
Solution:   Use the value of vi_curswant. (closes #8285)

											
										
										
											2021-05-30 16:43:11 +02:00
+									curswant = wp->w_curswant;
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    }
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    else
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    {
 									if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
 									{
 									    top = curbuf->b_visual.vi_start;
 									    bot = curbuf->b_visual.vi_end;
 									}
 									else
 									{
 									    top = curbuf->b_visual.vi_end;
 									    bot = curbuf->b_visual.vi_start;
 									}
-												patch 9.1.0105: Style: typos found

Problem:  Style: typos found
Solution: correct them
          (zeertzjq)

closes: #14023

Signed-off-by: zeertzjq <zeertzjq@outlook.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-02-13 20:32:04 +01:00
+									// a substitute command may have removed some lines
-												patch 9.1.0043: ml_get: invalid lnum when :s replaces visual selection

Problem:  ml_get: invalid lnum when :s replaces visual selection
          (@ropery)
Solution: substitute may decrement the number of lines in a buffer,
          so validate, that the bottom lines of the visual selection
          stays within the max buffer line

fixes: #13890
closes: #13892

Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-01-22 20:12:34 +01:00
+									if (bot.lnum > curbuf->b_ml.ml_line_count)
 									    bot.lnum = curbuf->b_ml.ml_line_count;
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									mode = curbuf->b_visual.vi_mode;
-												patch 8.2.2911: pattern "\%V" does not match all of block selection

Problem:    Pattern "\%V" does not match all of block selection. (Rick Howe)
Solution:   Use the value of vi_curswant. (closes #8285)

											
										
										
											2021-05-30 16:43:11 +02:00
+									curswant = curbuf->b_visual.vi_curswant;
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    }
 								    lnum = rex.lnum + rex.reg_firstlnum;
 								    if (lnum < top.lnum || lnum > bot.lnum)
 									return FALSE;
-												patch 8.2.3949: using freed memory with /\%V

Problem:    Using freed memory with /\%V.
Solution:   Get the line again after getvvcol().

											
										
										
											2021-12-30 14:49:43 +00:00
+								    col = (colnr_T)(rex.input - rex.line);
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								    if (mode == 'v')
 								    {
 									if ((lnum == top.lnum && col < top.col)
 										|| (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
 									    return FALSE;
 								    }
 								    else if (mode == Ctrl_V)
 								    {
 									getvvcol(wp, &top, &start, NULL, &end);
 									getvvcol(wp, &bot, &start2, NULL, &end2);
 									if (start2 < start)
 									    start = start2;
 									if (end2 > end)
 									    end = end2;
-												patch 8.2.2911: pattern "\%V" does not match all of block selection

Problem:    Pattern "\%V" does not match all of block selection. (Rick Howe)
Solution:   Use the value of vi_curswant. (closes #8285)

											
										
										
											2021-05-30 16:43:11 +02:00
+									if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL)
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									    end = MAXCOL;
-												patch 8.2.3949: using freed memory with /\%V

Problem:    Using freed memory with /\%V.
Solution:   Get the line again after getvvcol().

											
										
										
											2021-12-30 14:49:43 +00:00
 									// getvvcol() flushes rex.line, need to get it again
 									rex.line = reg_getline(rex.lnum);
 									rex.input = rex.line + col;
-												patch 9.0.0067: cannot show virtual text

Problem:    Cannot show virtual text.
Solution:   Initial changes for virtual text support, using text properties.

											
										
										
											2022-07-25 18:13:54 +01:00
+									cols = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, rex.line, col);
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									if (cols < start || cols > end - (*p_sel == 'e'))
 									    return FALSE;
 								    }
 								    return TRUE;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
 								/*
 								 * Check the regexp program for its magic number.
 								 * Return TRUE if it's wrong.
 								 */
 								    static int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								prog_magic_wrong(void)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								    regprog_T	*prog;
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								    if (prog->engine == &nfa_regengine)
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									// For NFA matcher we don't check the magic
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+									return FALSE;
 								    if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
-												patch 9.0.1594: some internal error messages are translated

Problem:    Some internal error messages are translated.
Solution:   Consistently do not translate internal error messages.
            (closes #12459)

											
										
										
											2023-05-31 17:12:14 +01:00
+									iemsg(e_corrupted_regexp_program);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									return TRUE;
 								    }
 								    return FALSE;
 								}
 								/*
 								 * Cleanup the subexpressions, if this wasn't done yet.
 								 * This construction is used to clear the subexpressions only when they are
 								 * used (to increase speed).
 								 */
 								    static void
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								cleanup_subexpr(void)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 9.0.1221: code is indented more than necessary

Problem:    Code is indented more than necessary.
Solution:   Use an early return where it makes sense. (Yegappan Lakshmanan,
            closes #11833)

											
										
										
											2023-01-18 18:17:48 +00:00
+								    if (!rex.need_clear_subexpr)
 									return;
 								    if (REG_MULTI)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
-												patch 9.0.1221: code is indented more than necessary

Problem:    Code is indented more than necessary.
Solution:   Use an early return where it makes sense. (Yegappan Lakshmanan,
            closes #11833)

											
										
										
											2023-01-18 18:17:48 +00:00
+									// Use 0xff to set lnum to -1
 									vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
 									vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    }
-												patch 9.0.1221: code is indented more than necessary

Problem:    Code is indented more than necessary.
Solution:   Use an early return where it makes sense. (Yegappan Lakshmanan,
            closes #11833)

											
										
										
											2023-01-18 18:17:48 +00:00
+								    else
 								    {
 									vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
 									vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
 								    }
 								    rex.need_clear_subexpr = FALSE;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
 								#ifdef FEAT_SYN_HL
 								    static void
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								cleanup_zsubexpr(void)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 9.0.1221: code is indented more than necessary

Problem:    Code is indented more than necessary.
Solution:   Use an early return where it makes sense. (Yegappan Lakshmanan,
            closes #11833)

											
										
										
											2023-01-18 18:17:48 +00:00
+								    if (!rex.need_clear_zsubexpr)
 									return;
 								    if (REG_MULTI)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
-												patch 9.0.1221: code is indented more than necessary

Problem:    Code is indented more than necessary.
Solution:   Use an early return where it makes sense. (Yegappan Lakshmanan,
            closes #11833)

											
										
										
											2023-01-18 18:17:48 +00:00
+									// Use 0xff to set lnum to -1
 									vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
 									vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
 								    }
 								    else
 								    {
 									vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
 									vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    }
-												patch 9.0.1221: code is indented more than necessary

Problem:    Code is indented more than necessary.
Solution:   Use an early return where it makes sense. (Yegappan Lakshmanan,
            closes #11833)

											
										
										
											2023-01-18 18:17:48 +00:00
+								    rex.need_clear_zsubexpr = FALSE;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
 								#endif
 								/*
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+								 * Advance rex.lnum, rex.line and rex.input to the next line.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 */
 								    static void
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								reg_nextline(void)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+								    rex.line = reg_getline(++rex.lnum);
 								    rex.input = rex.line;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    fast_breakcheck();
 								}
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
+								/*
 								 * Check whether a backreference matches.
 								 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
-												updated for version 7.4.101
Problem:    Using \1 in pattern goes one line too far. (Bohr Shaw, John Little)
Solution:   Only advance the match end for the matched characters in the last
            line.

											
										
										
											2013-11-21 17:13:00 +01:00
+								 * If "bytelen" is not NULL, it is set to the byte length of the match in the
 								 * last line.
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
+								 */
 								    static int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								match_with_backref(
 								    linenr_T start_lnum,
 								    colnr_T  start_col,
 								    linenr_T end_lnum,
 								    colnr_T  end_col,
 								    int	     *bytelen)
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
+								{
 								    linenr_T	clnum = start_lnum;
 								    colnr_T	ccol = start_col;
 								    int		len;
 								    char_u	*p;
 								    if (bytelen != NULL)
 									*bytelen = 0;
 								    for (;;)
 								    {
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									// Since getting one line may invalidate the other, need to make copy.
 									// Slow!
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+									if (rex.line != reg_tofree)
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
+									{
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+									    len = (int)STRLEN(rex.line);
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
+									    if (reg_tofree == NULL || len >= (int)reg_tofreelen)
 									    {
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										len += 50;	// get some extra
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
+										vim_free(reg_tofree);
 										reg_tofree = alloc(len);
 										if (reg_tofree == NULL)
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										    return RA_FAIL; // out of memory!
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
+										reg_tofreelen = len;
 									    }
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+									    STRCPY(reg_tofree, rex.line);
 									    rex.input = reg_tofree + (rex.input - rex.line);
 									    rex.line = reg_tofree;
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
+									}
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									// Get the line to compare with.
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
+									p = reg_getline(clnum);
 									if (clnum == end_lnum)
 									    len = end_col - ccol;
 									else
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+									    len = (int)reg_getline_len(clnum) - ccol;
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+									if (cstrncmp(p + ccol, rex.input, &len) != 0)
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									    return RA_NOMATCH;  // doesn't match
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
+									if (bytelen != NULL)
 									    *bytelen += len;
 									if (clnum == end_lnum)
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									    break;		// match and at end!
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+									if (rex.lnum >= rex.reg_maxline)
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									    return RA_NOMATCH;  // text too short
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									// Advance to next line.
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
+									reg_nextline();
-												updated for version 7.4.101
Problem:    Using \1 in pattern goes one line too far. (Bohr Shaw, John Little)
Solution:   Only advance the match end for the matched characters in the last
            line.

											
										
										
											2013-11-21 17:13:00 +01:00
+									if (bytelen != NULL)
 									    *bytelen = 0;
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
+									++clnum;
 									ccol = 0;
 									if (got_int)
 									    return RA_FAIL;
 								    }
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    // found a match!  Note that rex.line may now point to a copy of the line,
 								    // that should not matter.
-												updated for version 7.3.1191
Problem:    Backreference to previous line doesn't work. (Lech Lorens)
Solution:   Implement looking in another line.

											
										
										
											2013-06-14 20:31:28 +02:00
+								    return RA_MATCH;
 								}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												updated for version 7.4.437
Problem:    New and old regexp engine are not consistent.
Solution:   Also give an error for "\ze*" for the old regexp engine.

											
										
										
											2014-09-09 17:18:49 +02:00
+								/*
 								 * Used in a place where no * or \+ can follow.
 								 */
 								    static int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								re_mult_next(char *what)
-												updated for version 7.4.437
Problem:    New and old regexp engine are not consistent.
Solution:   Also give an error for "\ze*" for the old regexp engine.

											
										
										
											2014-09-09 17:18:49 +02:00
+								{
 								    if (re_multi_type(peekchr()) == MULTI_MULT)
-												patch 8.1.0748: using sprintf() instead of semsg()

Problem:    Using sprintf() instead of semsg().
Solution:   Use semsg().  Fix bug with E888. (Ozaki Kiichi, closes #3801)

											
										
										
											2019-01-14 22:46:15 +01:00
+								    {
-												patch 8.2.4012: error messages are spread out

Problem:    Error messages are spread out.
Solution:   Move the last error messages to errors.h.

											
										
										
											2022-01-05 20:24:39 +00:00
+								       semsg(_(e_nfa_regexp_cannot_repeat_str), what);
-												patch 8.1.0748: using sprintf() instead of semsg()

Problem:    Using sprintf() instead of semsg().
Solution:   Use semsg().  Fix bug with E888. (Ozaki Kiichi, closes #3801)

											
										
										
											2019-01-14 22:46:15 +01:00
+								       rc_did_emsg = TRUE;
 								       return FAIL;
 								    }
-												updated for version 7.4.437
Problem:    New and old regexp engine are not consistent.
Solution:   Also give an error for "\ze*" for the old regexp engine.

											
										
										
											2014-09-09 17:18:49 +02:00
+								    return OK;
 								}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								typedef struct
 								{
 								    int a, b, c;
 								} decomp_T;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								// 0xfb20 - 0xfb4f
-												updated for version 7.0078

											
										
										
											2005-06-01 21:51:55 +00:00
+								static decomp_T decomp_table[0xfb4f-0xfb20+1] =
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    {0x5e2,0,0},		// 0xfb20	alt ayin
 								    {0x5d0,0,0},		// 0xfb21	alt alef
 								    {0x5d3,0,0},		// 0xfb22	alt dalet
 								    {0x5d4,0,0},		// 0xfb23	alt he
 								    {0x5db,0,0},		// 0xfb24	alt kaf
 								    {0x5dc,0,0},		// 0xfb25	alt lamed
 								    {0x5dd,0,0},		// 0xfb26	alt mem-sofit
 								    {0x5e8,0,0},		// 0xfb27	alt resh
 								    {0x5ea,0,0},		// 0xfb28	alt tav
 								    {'+', 0, 0},		// 0xfb29	alt plus
 								    {0x5e9, 0x5c1, 0},		// 0xfb2a	shin+shin-dot
 								    {0x5e9, 0x5c2, 0},		// 0xfb2b	shin+sin-dot
 								    {0x5e9, 0x5c1, 0x5bc},	// 0xfb2c	shin+shin-dot+dagesh
 								    {0x5e9, 0x5c2, 0x5bc},	// 0xfb2d	shin+sin-dot+dagesh
 								    {0x5d0, 0x5b7, 0},		// 0xfb2e	alef+patah
 								    {0x5d0, 0x5b8, 0},		// 0xfb2f	alef+qamats
 								    {0x5d0, 0x5b4, 0},		// 0xfb30	alef+hiriq
 								    {0x5d1, 0x5bc, 0},		// 0xfb31	bet+dagesh
 								    {0x5d2, 0x5bc, 0},		// 0xfb32	gimel+dagesh
 								    {0x5d3, 0x5bc, 0},		// 0xfb33	dalet+dagesh
 								    {0x5d4, 0x5bc, 0},		// 0xfb34	he+dagesh
 								    {0x5d5, 0x5bc, 0},		// 0xfb35	vav+dagesh
 								    {0x5d6, 0x5bc, 0},		// 0xfb36	zayin+dagesh
 								    {0xfb37, 0, 0},		// 0xfb37 -- UNUSED
 								    {0x5d8, 0x5bc, 0},		// 0xfb38	tet+dagesh
 								    {0x5d9, 0x5bc, 0},		// 0xfb39	yud+dagesh
 								    {0x5da, 0x5bc, 0},		// 0xfb3a	kaf sofit+dagesh
 								    {0x5db, 0x5bc, 0},		// 0xfb3b	kaf+dagesh
 								    {0x5dc, 0x5bc, 0},		// 0xfb3c	lamed+dagesh
 								    {0xfb3d, 0, 0},		// 0xfb3d -- UNUSED
 								    {0x5de, 0x5bc, 0},		// 0xfb3e	mem+dagesh
 								    {0xfb3f, 0, 0},		// 0xfb3f -- UNUSED
 								    {0x5e0, 0x5bc, 0},		// 0xfb40	nun+dagesh
 								    {0x5e1, 0x5bc, 0},		// 0xfb41	samech+dagesh
 								    {0xfb42, 0, 0},		// 0xfb42 -- UNUSED
 								    {0x5e3, 0x5bc, 0},		// 0xfb43	pe sofit+dagesh
 								    {0x5e4, 0x5bc,0},		// 0xfb44	pe+dagesh
 								    {0xfb45, 0, 0},		// 0xfb45 -- UNUSED
 								    {0x5e6, 0x5bc, 0},		// 0xfb46	tsadi+dagesh
 								    {0x5e7, 0x5bc, 0},		// 0xfb47	qof+dagesh
 								    {0x5e8, 0x5bc, 0},		// 0xfb48	resh+dagesh
 								    {0x5e9, 0x5bc, 0},		// 0xfb49	shin+dagesh
 								    {0x5ea, 0x5bc, 0},		// 0xfb4a	tav+dagesh
 								    {0x5d5, 0x5b9, 0},		// 0xfb4b	vav+holam
 								    {0x5d1, 0x5bf, 0},		// 0xfb4c	bet+rafe
 								    {0x5db, 0x5bf, 0},		// 0xfb4d	kaf+rafe
 								    {0x5e4, 0x5bf, 0},		// 0xfb4e	pe+rafe
 								    {0x5d0, 0x5dc, 0}		// 0xfb4f	alef-lamed
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								};
 								    static void
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								mb_decompose(int c, int *c1, int *c2, int *c3)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
 								    decomp_T d;
-												updated for version 7.3.1000
Problem:    Typo in char value causes out of bounds access.
Solution:   Fix character value.  (Klemens Baum)

											
										
										
											2013-05-21 21:37:20 +02:00
+								    if (c >= 0xfb20 && c <= 0xfb4f)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
 									d = decomp_table[c - 0xfb20];
 									*c1 = d.a;
 									*c2 = d.b;
 									*c3 = d.c;
 								    }
 								    else
 								    {
 									*c1 = c;
 									*c2 = *c3 = 0;
 								    }
 								}
 								/*
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								 * Compare two strings, ignore case if rex.reg_ic set.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 * Return 0 if strings match, non-zero otherwise.
-												patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive

Problem:  regex: wrong match when searching multi-byte char
          case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string

This patch does the following 4 things:

1) When the regexp engine compares two utf-8 codepoints case
   insensitive it may match an adjacent character, because it assumes
   it can step over as many bytes as the pattern contains.

   This however is not necessarily true because of case-folding, a
   multi-byte UTF-8 character can be considered equal to some
   single-byte value.

   Let's consider the pattern 'ſ' and the string 's'. When comparing and
   ignoring case, the single character 's' matches, and since it matches
   Vim will try to step over the match (by the amount of bytes of the
   pattern), assuming that since it matches, the length of both strings is
   the same.

   However in that case, it should only step over the single byte value
   's' by 1 byte and try to start matching after it again. So for the
   backtracking engine we need to ensure:
   * we try to match the correct length for the pattern and the text
   * in case of a match, we step over it correctly

   There is one tricky thing for the backtracing engine. We also need to
   calculate correctly the number of bytes to compare the 2 different
   utf-8 strings s1 and s2. So we will count the number of characters in
   s1 that the byte len specified. Then we count the number of bytes to
   step over the same number of characters in string s2 and then we can
   correctly compare the 2 utf-8 strings.

2) A similar thing can happen for the NFA engine, when skipping to the
   next character to test for a match. We are skipping over the regstart
   pointer, however we do not consider the case that because of
   case-folding we may need to adjust the number of bytes to skip over.
   So this needs to be adjusted in find_match_text() as well.

3) A related issue turned out, when prog->match_text is actually empty.
   In that case we should try to find the next match and skip this
   condition.

4) When comparing characters using collections, we must also apply case
   folding to each character in the collection and not just to the
   current character from the search string.  This doesn't apply to the
   NFA engine, because internally it converts collections to branches
   [abc] -> a\|b\|c

fixes: #14294
closes: #14756

Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-07-30 20:39:18 +02:00
+								 * Correct the length "*n" when composing characters are ignored
 								 * or for utf8 when both utf codepoints are considered equal because of
 								 * case-folding but have different length (e.g. 's' and 'ſ')
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 */
 								    static int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								cstrncmp(char_u *s1, char_u *s2, int *n)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
 								    int		result;
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    if (!rex.reg_ic)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									result = STRNCMP(s1, s2, *n);
-												patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive

Problem:  regex: wrong match when searching multi-byte char
          case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string

This patch does the following 4 things:

1) When the regexp engine compares two utf-8 codepoints case
   insensitive it may match an adjacent character, because it assumes
   it can step over as many bytes as the pattern contains.

   This however is not necessarily true because of case-folding, a
   multi-byte UTF-8 character can be considered equal to some
   single-byte value.

   Let's consider the pattern 'ſ' and the string 's'. When comparing and
   ignoring case, the single character 's' matches, and since it matches
   Vim will try to step over the match (by the amount of bytes of the
   pattern), assuming that since it matches, the length of both strings is
   the same.

   However in that case, it should only step over the single byte value
   's' by 1 byte and try to start matching after it again. So for the
   backtracking engine we need to ensure:
   * we try to match the correct length for the pattern and the text
   * in case of a match, we step over it correctly

   There is one tricky thing for the backtracing engine. We also need to
   calculate correctly the number of bytes to compare the 2 different
   utf-8 strings s1 and s2. So we will count the number of characters in
   s1 that the byte len specified. Then we count the number of bytes to
   step over the same number of characters in string s2 and then we can
   correctly compare the 2 utf-8 strings.

2) A similar thing can happen for the NFA engine, when skipping to the
   next character to test for a match. We are skipping over the regstart
   pointer, however we do not consider the case that because of
   case-folding we may need to adjust the number of bytes to skip over.
   So this needs to be adjusted in find_match_text() as well.

3) A related issue turned out, when prog->match_text is actually empty.
   In that case we should try to find the next match and skip this
   condition.

4) When comparing characters using collections, we must also apply case
   folding to each character in the collection and not just to the
   current character from the search string.  This doesn't apply to the
   NFA engine, because internally it converts collections to branches
   [abc] -> a\|b\|c

fixes: #14294
closes: #14756

Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-07-30 20:39:18 +02:00
+								    else if (enc_utf8)
 								    {
 									char_u *p = s1;
-												patch 9.1.0650: Coverity warning in cstrncmp()

Problem:  Coverity warning in cstrncmp()
          (after v9.1.0645)
Solution: Change the type of n2 to int.
          (zeertzjq)

________________________________________________________________________________________________________
*** CID 1615684:  Integer handling issues  (INTEGER_OVERFLOW)
/src/regexp.c: 1757 in cstrncmp()
1751                 n1 -= mb_ptr2len(s1);
1752                 MB_PTR_ADV(p);
1753                 n2++;
1754             }
1755             // count the number of bytes to advance the same number of chars for s2
1756             p = s2;
>>>     CID 1615684:  Integer handling issues  (INTEGER_OVERFLOW)
>>>     Expression "n2--", which is equal to 18446744073709551615, where "n2" is known to be equal to 0, underflows the type that receives it, an unsigned integer 64 bits wide.
1757             while (n2-- > 0 && *p != NUL)
1758                 MB_PTR_ADV(p);
1759
1760             n2 = p - s2;
1761
1762             result = MB_STRNICMP2(s1, s2, *n, n2);

closes: #15409

Signed-off-by: zeertzjq <zeertzjq@outlook.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-08-01 22:48:53 +02:00
+									int n2 = 0;
-												patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive

Problem:  regex: wrong match when searching multi-byte char
          case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string

This patch does the following 4 things:

1) When the regexp engine compares two utf-8 codepoints case
   insensitive it may match an adjacent character, because it assumes
   it can step over as many bytes as the pattern contains.

   This however is not necessarily true because of case-folding, a
   multi-byte UTF-8 character can be considered equal to some
   single-byte value.

   Let's consider the pattern 'ſ' and the string 's'. When comparing and
   ignoring case, the single character 's' matches, and since it matches
   Vim will try to step over the match (by the amount of bytes of the
   pattern), assuming that since it matches, the length of both strings is
   the same.

   However in that case, it should only step over the single byte value
   's' by 1 byte and try to start matching after it again. So for the
   backtracking engine we need to ensure:
   * we try to match the correct length for the pattern and the text
   * in case of a match, we step over it correctly

   There is one tricky thing for the backtracing engine. We also need to
   calculate correctly the number of bytes to compare the 2 different
   utf-8 strings s1 and s2. So we will count the number of characters in
   s1 that the byte len specified. Then we count the number of bytes to
   step over the same number of characters in string s2 and then we can
   correctly compare the 2 utf-8 strings.

2) A similar thing can happen for the NFA engine, when skipping to the
   next character to test for a match. We are skipping over the regstart
   pointer, however we do not consider the case that because of
   case-folding we may need to adjust the number of bytes to skip over.
   So this needs to be adjusted in find_match_text() as well.

3) A related issue turned out, when prog->match_text is actually empty.
   In that case we should try to find the next match and skip this
   condition.

4) When comparing characters using collections, we must also apply case
   folding to each character in the collection and not just to the
   current character from the search string.  This doesn't apply to the
   NFA engine, because internally it converts collections to branches
   [abc] -> a\|b\|c

fixes: #14294
closes: #14756

Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-07-30 20:39:18 +02:00
+									int n1 = *n;
 									// count the number of characters for byte-length of s1
 									while (n1 > 0 && *p != NUL)
 									{
 									    n1 -= mb_ptr2len(s1);
 									    MB_PTR_ADV(p);
 									    n2++;
 									}
 									// count the number of bytes to advance the same number of chars for s2
 									p = s2;
 									while (n2-- > 0 && *p != NUL)
 									    MB_PTR_ADV(p);
 									n2 = p - s2;
 									result = MB_STRNICMP2(s1, s2, *n, n2);
-												patch 9.1.0650: Coverity warning in cstrncmp()

Problem:  Coverity warning in cstrncmp()
          (after v9.1.0645)
Solution: Change the type of n2 to int.
          (zeertzjq)

________________________________________________________________________________________________________
*** CID 1615684:  Integer handling issues  (INTEGER_OVERFLOW)
/src/regexp.c: 1757 in cstrncmp()
1751                 n1 -= mb_ptr2len(s1);
1752                 MB_PTR_ADV(p);
1753                 n2++;
1754             }
1755             // count the number of bytes to advance the same number of chars for s2
1756             p = s2;
>>>     CID 1615684:  Integer handling issues  (INTEGER_OVERFLOW)
>>>     Expression "n2--", which is equal to 18446744073709551615, where "n2" is known to be equal to 0, underflows the type that receives it, an unsigned integer 64 bits wide.
1757             while (n2-- > 0 && *p != NUL)
1758                 MB_PTR_ADV(p);
1759
1760             n2 = p - s2;
1761
1762             result = MB_STRNICMP2(s1, s2, *n, n2);

closes: #15409

Signed-off-by: zeertzjq <zeertzjq@outlook.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-08-01 22:48:53 +02:00
+									if (result == 0 && n2 < *n)
-												patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive

Problem:  regex: wrong match when searching multi-byte char
          case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string

This patch does the following 4 things:

1) When the regexp engine compares two utf-8 codepoints case
   insensitive it may match an adjacent character, because it assumes
   it can step over as many bytes as the pattern contains.

   This however is not necessarily true because of case-folding, a
   multi-byte UTF-8 character can be considered equal to some
   single-byte value.

   Let's consider the pattern 'ſ' and the string 's'. When comparing and
   ignoring case, the single character 's' matches, and since it matches
   Vim will try to step over the match (by the amount of bytes of the
   pattern), assuming that since it matches, the length of both strings is
   the same.

   However in that case, it should only step over the single byte value
   's' by 1 byte and try to start matching after it again. So for the
   backtracking engine we need to ensure:
   * we try to match the correct length for the pattern and the text
   * in case of a match, we step over it correctly

   There is one tricky thing for the backtracing engine. We also need to
   calculate correctly the number of bytes to compare the 2 different
   utf-8 strings s1 and s2. So we will count the number of characters in
   s1 that the byte len specified. Then we count the number of bytes to
   step over the same number of characters in string s2 and then we can
   correctly compare the 2 utf-8 strings.

2) A similar thing can happen for the NFA engine, when skipping to the
   next character to test for a match. We are skipping over the regstart
   pointer, however we do not consider the case that because of
   case-folding we may need to adjust the number of bytes to skip over.
   So this needs to be adjusted in find_match_text() as well.

3) A related issue turned out, when prog->match_text is actually empty.
   In that case we should try to find the next match and skip this
   condition.

4) When comparing characters using collections, we must also apply case
   folding to each character in the collection and not just to the
   current character from the search string.  This doesn't apply to the
   NFA engine, because internally it converts collections to branches
   [abc] -> a\|b\|c

fixes: #14294
closes: #14756

Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-07-30 20:39:18 +02:00
+									    *n = n2;
 								    }
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    else
 									result = MB_STRNICMP(s1, s2, *n);
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    // if it failed and it's utf8 and we want to combineignore:
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    if (result != 0 && enc_utf8 && rex.reg_icombine)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
 									char_u	*str1, *str2;
 									int	c1, c2, c11, c12;
 									int	junk;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									// we have to handle the strcmp ourselves, since it is necessary to
 									// deal with the composing characters by ignoring them:
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									str1 = s1;
 									str2 = s2;
 									c1 = c2 = 0;
-												updated for version 7.0142

											
										
										
											2005-09-06 19:25:11 +00:00
+									while ((int)(str1 - s1) < *n)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									{
 									    c1 = mb_ptr2char_adv(&str1);
 									    c2 = mb_ptr2char_adv(&str2);
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+									    // Decompose the character if necessary, into 'base' characters.
 									    // Currently hard-coded for Hebrew, Arabic to be done...
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									    if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    {
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+										// decomposition necessary?
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										mb_decompose(c1, &c11, &junk, &junk);
 										mb_decompose(c2, &c12, &junk, &junk);
 										c1 = c11;
 										c2 = c12;
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										if (c11 != c12
 											    && (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										    break;
 									    }
 									}
 									result = c2 - c1;
 									if (result == 0)
 									    *n = (int)(str2 - s2);
 								    }
 								    return result;
 								}
 								/*
 								 * cstrchr: This function is used a lot for simple searches, keep it fast!
 								 */
 								    static char_u *
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								cstrchr(char_u *s, int c)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
 								    char_u	*p;
-												patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive

Problem:  regex: wrong match when searching multi-byte char
          case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string

This patch does the following 4 things:

1) When the regexp engine compares two utf-8 codepoints case
   insensitive it may match an adjacent character, because it assumes
   it can step over as many bytes as the pattern contains.

   This however is not necessarily true because of case-folding, a
   multi-byte UTF-8 character can be considered equal to some
   single-byte value.

   Let's consider the pattern 'ſ' and the string 's'. When comparing and
   ignoring case, the single character 's' matches, and since it matches
   Vim will try to step over the match (by the amount of bytes of the
   pattern), assuming that since it matches, the length of both strings is
   the same.

   However in that case, it should only step over the single byte value
   's' by 1 byte and try to start matching after it again. So for the
   backtracking engine we need to ensure:
   * we try to match the correct length for the pattern and the text
   * in case of a match, we step over it correctly

   There is one tricky thing for the backtracing engine. We also need to
   calculate correctly the number of bytes to compare the 2 different
   utf-8 strings s1 and s2. So we will count the number of characters in
   s1 that the byte len specified. Then we count the number of bytes to
   step over the same number of characters in string s2 and then we can
   correctly compare the 2 utf-8 strings.

2) A similar thing can happen for the NFA engine, when skipping to the
   next character to test for a match. We are skipping over the regstart
   pointer, however we do not consider the case that because of
   case-folding we may need to adjust the number of bytes to skip over.
   So this needs to be adjusted in find_match_text() as well.

3) A related issue turned out, when prog->match_text is actually empty.
   In that case we should try to find the next match and skip this
   condition.

4) When comparing characters using collections, we must also apply case
   folding to each character in the collection and not just to the
   current character from the search string.  This doesn't apply to the
   NFA engine, because internally it converts collections to branches
   [abc] -> a\|b\|c

fixes: #14294
closes: #14756

Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-07-30 20:39:18 +02:00
+								    int		cc, lc;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.0809: too many #ifdefs

Problem:    Too many #ifdefs.
Solution:   Graduate FEAT_MBYTE, part 3.

											
										
										
											2019-01-24 16:39:02 +01:00
+								    if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									return vim_strchr(s, c);
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    // tolower() and toupper() can be slow, comparing twice should be a lot
 								    // faster (esp. when using MS Visual C++!).
 								    // For UTF-8 need to use folded case.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    if (enc_utf8 && c > 0x80)
-												patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive

Problem:  regex: wrong match when searching multi-byte char
          case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string

This patch does the following 4 things:

1) When the regexp engine compares two utf-8 codepoints case
   insensitive it may match an adjacent character, because it assumes
   it can step over as many bytes as the pattern contains.

   This however is not necessarily true because of case-folding, a
   multi-byte UTF-8 character can be considered equal to some
   single-byte value.

   Let's consider the pattern 'ſ' and the string 's'. When comparing and
   ignoring case, the single character 's' matches, and since it matches
   Vim will try to step over the match (by the amount of bytes of the
   pattern), assuming that since it matches, the length of both strings is
   the same.

   However in that case, it should only step over the single byte value
   's' by 1 byte and try to start matching after it again. So for the
   backtracking engine we need to ensure:
   * we try to match the correct length for the pattern and the text
   * in case of a match, we step over it correctly

   There is one tricky thing for the backtracing engine. We also need to
   calculate correctly the number of bytes to compare the 2 different
   utf-8 strings s1 and s2. So we will count the number of characters in
   s1 that the byte len specified. Then we count the number of bytes to
   step over the same number of characters in string s2 and then we can
   correctly compare the 2 utf-8 strings.

2) A similar thing can happen for the NFA engine, when skipping to the
   next character to test for a match. We are skipping over the regstart
   pointer, however we do not consider the case that because of
   case-folding we may need to adjust the number of bytes to skip over.
   So this needs to be adjusted in find_match_text() as well.

3) A related issue turned out, when prog->match_text is actually empty.
   In that case we should try to find the next match and skip this
   condition.

4) When comparing characters using collections, we must also apply case
   folding to each character in the collection and not just to the
   current character from the search string.  This doesn't apply to the
   NFA engine, because internally it converts collections to branches
   [abc] -> a\|b\|c

fixes: #14294
closes: #14756

Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-07-30 20:39:18 +02:00
+								    {
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									cc = utf_fold(c);
-												patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive

Problem:  regex: wrong match when searching multi-byte char
          case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string

This patch does the following 4 things:

1) When the regexp engine compares two utf-8 codepoints case
   insensitive it may match an adjacent character, because it assumes
   it can step over as many bytes as the pattern contains.

   This however is not necessarily true because of case-folding, a
   multi-byte UTF-8 character can be considered equal to some
   single-byte value.

   Let's consider the pattern 'ſ' and the string 's'. When comparing and
   ignoring case, the single character 's' matches, and since it matches
   Vim will try to step over the match (by the amount of bytes of the
   pattern), assuming that since it matches, the length of both strings is
   the same.

   However in that case, it should only step over the single byte value
   's' by 1 byte and try to start matching after it again. So for the
   backtracking engine we need to ensure:
   * we try to match the correct length for the pattern and the text
   * in case of a match, we step over it correctly

   There is one tricky thing for the backtracing engine. We also need to
   calculate correctly the number of bytes to compare the 2 different
   utf-8 strings s1 and s2. So we will count the number of characters in
   s1 that the byte len specified. Then we count the number of bytes to
   step over the same number of characters in string s2 and then we can
   correctly compare the 2 utf-8 strings.

2) A similar thing can happen for the NFA engine, when skipping to the
   next character to test for a match. We are skipping over the regstart
   pointer, however we do not consider the case that because of
   case-folding we may need to adjust the number of bytes to skip over.
   So this needs to be adjusted in find_match_text() as well.

3) A related issue turned out, when prog->match_text is actually empty.
   In that case we should try to find the next match and skip this
   condition.

4) When comparing characters using collections, we must also apply case
   folding to each character in the collection and not just to the
   current character from the search string.  This doesn't apply to the
   NFA engine, because internally it converts collections to branches
   [abc] -> a\|b\|c

fixes: #14294
closes: #14756

Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-07-30 20:39:18 +02:00
+									lc = cc;
 								    }
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    else
-												patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive

Problem:  regex: wrong match when searching multi-byte char
          case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string

This patch does the following 4 things:

1) When the regexp engine compares two utf-8 codepoints case
   insensitive it may match an adjacent character, because it assumes
   it can step over as many bytes as the pattern contains.

   This however is not necessarily true because of case-folding, a
   multi-byte UTF-8 character can be considered equal to some
   single-byte value.

   Let's consider the pattern 'ſ' and the string 's'. When comparing and
   ignoring case, the single character 's' matches, and since it matches
   Vim will try to step over the match (by the amount of bytes of the
   pattern), assuming that since it matches, the length of both strings is
   the same.

   However in that case, it should only step over the single byte value
   's' by 1 byte and try to start matching after it again. So for the
   backtracking engine we need to ensure:
   * we try to match the correct length for the pattern and the text
   * in case of a match, we step over it correctly

   There is one tricky thing for the backtracing engine. We also need to
   calculate correctly the number of bytes to compare the 2 different
   utf-8 strings s1 and s2. So we will count the number of characters in
   s1 that the byte len specified. Then we count the number of bytes to
   step over the same number of characters in string s2 and then we can
   correctly compare the 2 utf-8 strings.

2) A similar thing can happen for the NFA engine, when skipping to the
   next character to test for a match. We are skipping over the regstart
   pointer, however we do not consider the case that because of
   case-folding we may need to adjust the number of bytes to skip over.
   So this needs to be adjusted in find_match_text() as well.

3) A related issue turned out, when prog->match_text is actually empty.
   In that case we should try to find the next match and skip this
   condition.

4) When comparing characters using collections, we must also apply case
   folding to each character in the collection and not just to the
   current character from the search string.  This doesn't apply to the
   NFA engine, because internally it converts collections to branches
   [abc] -> a\|b\|c

fixes: #14294
closes: #14756

Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-07-30 20:39:18 +02:00
+									if (MB_ISUPPER(c))
 									{
 									    cc = MB_TOLOWER(c);
 									    lc = cc;
 									}
 									else if (MB_ISLOWER(c))
 									{
 									    cc = MB_TOUPPER(c);
 									    lc = c;
 									}
 									else
 									    return vim_strchr(s, c);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
 								    if (has_mbyte)
 								    {
-												updated for version 7.0127

											
										
										
											2005-08-10 21:07:57 +00:00
+									for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									{
-												patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive

Problem:  regex: wrong match when searching multi-byte char
          case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string

This patch does the following 4 things:

1) When the regexp engine compares two utf-8 codepoints case
   insensitive it may match an adjacent character, because it assumes
   it can step over as many bytes as the pattern contains.

   This however is not necessarily true because of case-folding, a
   multi-byte UTF-8 character can be considered equal to some
   single-byte value.

   Let's consider the pattern 'ſ' and the string 's'. When comparing and
   ignoring case, the single character 's' matches, and since it matches
   Vim will try to step over the match (by the amount of bytes of the
   pattern), assuming that since it matches, the length of both strings is
   the same.

   However in that case, it should only step over the single byte value
   's' by 1 byte and try to start matching after it again. So for the
   backtracking engine we need to ensure:
   * we try to match the correct length for the pattern and the text
   * in case of a match, we step over it correctly

   There is one tricky thing for the backtracing engine. We also need to
   calculate correctly the number of bytes to compare the 2 different
   utf-8 strings s1 and s2. So we will count the number of characters in
   s1 that the byte len specified. Then we count the number of bytes to
   step over the same number of characters in string s2 and then we can
   correctly compare the 2 utf-8 strings.

2) A similar thing can happen for the NFA engine, when skipping to the
   next character to test for a match. We are skipping over the regstart
   pointer, however we do not consider the case that because of
   case-folding we may need to adjust the number of bytes to skip over.
   So this needs to be adjusted in find_match_text() as well.

3) A related issue turned out, when prog->match_text is actually empty.
   In that case we should try to find the next match and skip this
   condition.

4) When comparing characters using collections, we must also apply case
   folding to each character in the collection and not just to the
   current character from the search string.  This doesn't apply to the
   NFA engine, because internally it converts collections to branches
   [abc] -> a\|b\|c

fixes: #14294
closes: #14756

Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-07-30 20:39:18 +02:00
+									    int uc = utf_ptr2char(p);
 									    if (enc_utf8 && (c > 0x80 || uc > 0x80))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    {
-												patch 9.0.0105: illegal memory access when pattern starts with illegal byte

Problem:    Illegal memory access when pattern starts with illegal byte.
Solution:   Do not match a character with an illegal byte.

											
										
										
											2022-07-29 16:22:25 +01:00
+										// Do not match an illegal byte.  E.g. 0xff matches 0xc3 0xbf,
 										// not 0xff.
-												patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive

Problem:  regex: wrong match when searching multi-byte char
          case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string

This patch does the following 4 things:

1) When the regexp engine compares two utf-8 codepoints case
   insensitive it may match an adjacent character, because it assumes
   it can step over as many bytes as the pattern contains.

   This however is not necessarily true because of case-folding, a
   multi-byte UTF-8 character can be considered equal to some
   single-byte value.

   Let's consider the pattern 'ſ' and the string 's'. When comparing and
   ignoring case, the single character 's' matches, and since it matches
   Vim will try to step over the match (by the amount of bytes of the
   pattern), assuming that since it matches, the length of both strings is
   the same.

   However in that case, it should only step over the single byte value
   's' by 1 byte and try to start matching after it again. So for the
   backtracking engine we need to ensure:
   * we try to match the correct length for the pattern and the text
   * in case of a match, we step over it correctly

   There is one tricky thing for the backtracing engine. We also need to
   calculate correctly the number of bytes to compare the 2 different
   utf-8 strings s1 and s2. So we will count the number of characters in
   s1 that the byte len specified. Then we count the number of bytes to
   step over the same number of characters in string s2 and then we can
   correctly compare the 2 utf-8 strings.

2) A similar thing can happen for the NFA engine, when skipping to the
   next character to test for a match. We are skipping over the regstart
   pointer, however we do not consider the case that because of
   case-folding we may need to adjust the number of bytes to skip over.
   So this needs to be adjusted in find_match_text() as well.

3) A related issue turned out, when prog->match_text is actually empty.
   In that case we should try to find the next match and skip this
   condition.

4) When comparing characters using collections, we must also apply case
   folding to each character in the collection and not just to the
   current character from the search string.  This doesn't apply to the
   NFA engine, because internally it converts collections to branches
   [abc] -> a\|b\|c

fixes: #14294
closes: #14756

Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-07-30 20:39:18 +02:00
+										// compare with lower case of the character
 										if ((uc < 0x80 || uc != *p) && utf_fold(uc) == lc)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										    return p;
 									    }
 									    else if (*p == c || *p == cc)
 										return p;
 									}
 								    }
 								    else
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									// Faster version for when there are no multi-byte characters.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									for (p = s; *p != NUL; ++p)
 									    if (*p == c || *p == cc)
 										return p;
 								    return NULL;
 								}
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								////////////////////////////////////////////////////////////////
 								//		      regsub stuff			      //
 								////////////////////////////////////////////////////////////////
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 9.0.1908: undefined behaviour upper/lower function ptrs

Problem:  undefined behaviour upper/lower function ptrs
Solution: Fix UBSAN error in regexp and simplify upper/lowercase
          modifier code

The implementation of \u / \U / \l / \L modifiers in the substitute
command relies on remembering the state by setting function pointers on
func_all/func_one in the code. The code signature of `fptr_T` is
supposed to return void* (due to C function signatures not being able to
return itself due to type recursion), and the definition of the
functions (e.g. to_Upper) didn't follow this rule, and so the code tries
to cast functions of different signatures, resulting in undefined
behavior error under UBSAN in Clang 17. See #12745.

We could just fix `do_Upper`/etc to just return void*, which would fix
the problem. However, these functions actually do not need to return
anything at all. It used to be the case that there was only one pointer
"func" to store the pointer, which is why the function needs to either
return itself or NULL to indicate whether it's a one time or ongoing
modification. However, c2c355df6f094cdb9e599fd395a78c14486ec697
(7.3.873) already made that obsolete by introducing `func_one` and
`func_all` to store one-time and ongoing operations separately, so these
functions don't actually need to return anything anymore because it's
implicit whether it's a one-time or ongoing operation. Simplify the code
to reflect that.

closes: #13117

Signed-off-by: Christian Brabandt <cb@256bit.org>
Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>

											
										
										
											2023-09-18 19:51:56 +02:00
+								typedef void (*fptr_T)(int *, int);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+								static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 9.0.1908: undefined behaviour upper/lower function ptrs

Problem:  undefined behaviour upper/lower function ptrs
Solution: Fix UBSAN error in regexp and simplify upper/lowercase
          modifier code

The implementation of \u / \U / \l / \L modifiers in the substitute
command relies on remembering the state by setting function pointers on
func_all/func_one in the code. The code signature of `fptr_T` is
supposed to return void* (due to C function signatures not being able to
return itself due to type recursion), and the definition of the
functions (e.g. to_Upper) didn't follow this rule, and so the code tries
to cast functions of different signatures, resulting in undefined
behavior error under UBSAN in Clang 17. See #12745.

We could just fix `do_Upper`/etc to just return void*, which would fix
the problem. However, these functions actually do not need to return
anything at all. It used to be the case that there was only one pointer
"func" to store the pointer, which is why the function needs to either
return itself or NULL to indicate whether it's a one time or ongoing
modification. However, c2c355df6f094cdb9e599fd395a78c14486ec697
(7.3.873) already made that obsolete by introducing `func_one` and
`func_all` to store one-time and ongoing operations separately, so these
functions don't actually need to return anything anymore because it's
implicit whether it's a one-time or ongoing operation. Simplify the code
to reflect that.

closes: #13117

Signed-off-by: Christian Brabandt <cb@256bit.org>
Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>

											
										
										
											2023-09-18 19:51:56 +02:00
+								    static void
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								do_upper(int *d, int c)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
+								    *d = MB_TOUPPER(c);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
-												patch 9.0.1908: undefined behaviour upper/lower function ptrs

Problem:  undefined behaviour upper/lower function ptrs
Solution: Fix UBSAN error in regexp and simplify upper/lowercase
          modifier code

The implementation of \u / \U / \l / \L modifiers in the substitute
command relies on remembering the state by setting function pointers on
func_all/func_one in the code. The code signature of `fptr_T` is
supposed to return void* (due to C function signatures not being able to
return itself due to type recursion), and the definition of the
functions (e.g. to_Upper) didn't follow this rule, and so the code tries
to cast functions of different signatures, resulting in undefined
behavior error under UBSAN in Clang 17. See #12745.

We could just fix `do_Upper`/etc to just return void*, which would fix
the problem. However, these functions actually do not need to return
anything at all. It used to be the case that there was only one pointer
"func" to store the pointer, which is why the function needs to either
return itself or NULL to indicate whether it's a one time or ongoing
modification. However, c2c355df6f094cdb9e599fd395a78c14486ec697
(7.3.873) already made that obsolete by introducing `func_one` and
`func_all` to store one-time and ongoing operations separately, so these
functions don't actually need to return anything anymore because it's
implicit whether it's a one-time or ongoing operation. Simplify the code
to reflect that.

closes: #13117

Signed-off-by: Christian Brabandt <cb@256bit.org>
Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>

											
										
										
											2023-09-18 19:51:56 +02:00
+								    static void
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								do_lower(int *d, int c)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
+								    *d = MB_TOLOWER(c);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
 								/*
 								 * regtilde(): Replace tildes in the pattern by the old pattern.
 								 *
 								 * Short explanation of the tilde: It stands for the previous replacement
 								 * pattern.  If that previous pattern also contains a ~ we should go back a
 								 * step further...  But we insert the previous pattern into the current one
 								 * and remember that.
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
+								 * This still does not handle the case where "magic" changes.  So require the
 								 * user to keep his hands off of "magic".
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 *
 								 * The tildes are parsed once before the first call to vim_regsub().
 								 */
 								    char_u *
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								regtilde(char_u *source, int magic)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
 								    char_u	*newsub = source;
 								    char_u	*p;
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+								    size_t	newsublen = 0;
 								    char_u	tilde[3] = {'~', NUL, NUL};
 								    size_t	tildelen = 1;
 								    int		error = FALSE;
 								    if (!magic)
 								    {
 									tilde[0] = '\\';
 									tilde[1] = '~';
 									tilde[2] = NUL;
 									tildelen = 2;
 								    }
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
 								    for (p = newsub; *p; ++p)
 								    {
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+									if (STRNCMP(p, tilde, tildelen) == 0)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									{
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+									    size_t prefixlen = p - newsub;		// not including the tilde
 									    char_u *postfix = p + tildelen;
 									    size_t postfixlen;
 									    size_t tmpsublen;
 									    if (newsublen == 0)
 										newsublen = STRLEN(newsub);
 									    newsublen -= tildelen;
 									    postfixlen = newsublen - prefixlen;
 									    tmpsublen = prefixlen + reg_prev_sublen + postfixlen;
 									    if (tmpsublen > 0 && reg_prev_sub != NULL)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    {
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+										char_u *tmpsub;
-												patch 9.0.1532: crash when expanding "~" in substitute causes very long text

Problem:    Crash when expanding "~" in substitute causes very long text.
Solution:   Limit the text length to MAXCOL.

											
										
										
											2023-05-09 21:15:30 +01:00
+										// Avoid making the text longer than MAXCOL, it will cause
 										// trouble at some point.
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+										if (tmpsublen > MAXCOL)
-												patch 9.0.1532: crash when expanding "~" in substitute causes very long text

Problem:    Crash when expanding "~" in substitute causes very long text.
Solution:   Limit the text length to MAXCOL.

											
										
										
											2023-05-09 21:15:30 +01:00
+										{
 										    emsg(_(e_resulting_text_too_long));
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+										    error = TRUE;
-												patch 9.0.1532: crash when expanding "~" in substitute causes very long text

Problem:    Crash when expanding "~" in substitute causes very long text.
Solution:   Limit the text length to MAXCOL.

											
										
										
											2023-05-09 21:15:30 +01:00
+										    break;
 										}
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+										tmpsub = alloc(tmpsublen + 1);
 										if (tmpsub == NULL)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										{
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+										    emsg(_(e_out_of_memory));
 										    error = TRUE;
 										    break;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										}
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
 										// copy prefix
 										mch_memmove(tmpsub, newsub, prefixlen);
 										// interpret tilde
 										mch_memmove(tmpsub + prefixlen, reg_prev_sub, reg_prev_sublen);
 										// copy postfix
 										STRCPY(tmpsub + prefixlen + reg_prev_sublen, postfix);
 										if (newsub != source)	// allocated newsub before
 										    vim_free(newsub);
 										newsub = tmpsub;
 										newsublen = tmpsublen;
 										p = newsub + prefixlen + reg_prev_sublen;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    }
 									    else
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+										mch_memmove(p, postfix, postfixlen + 1);	// remove the tilde (+1 for the NUL)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    --p;
 									}
 									else
 									{
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									    if (*p == '\\' && p[1])		// skip escaped characters
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										++p;
 									    if (has_mbyte)
-												updated for version 7.0127

											
										
										
											2005-08-10 21:07:57 +00:00
+										p += (*mb_ptr2len)(p) - 1;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									}
 								    }
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+								    if (error)
 								    {
 									if (newsub != source)
 									    vim_free(newsub);
 									return source;
 								    }
-												patch 9.0.0047: using freed memory with recursive substitute

Problem:    Using freed memory with recursive substitute.
Solution:   Always make a copy for reg_prev_sub.

											
										
										
											2022-07-07 22:20:31 +01:00
+								    // Store a copy of newsub  in reg_prev_sub.  It is always allocated,
 								    // because recursive calls may make the returned string invalid.
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+								    // Only store it if there something to store.
 								    newsublen = p - newsub;
 								    if (newsublen == 0)
 									VIM_CLEAR(reg_prev_sub);
 								    else
 								    {
 									vim_free(reg_prev_sub);
 									reg_prev_sub = vim_strnsave(newsub, newsublen);
 								    }
 								    if (reg_prev_sub == NULL)
 									reg_prev_sublen = 0;
 								    else
 									reg_prev_sublen = newsublen;
-												patch 9.0.0047: using freed memory with recursive substitute

Problem:    Using freed memory with recursive substitute.
Solution:   Always make a copy for reg_prev_sub.

											
										
										
											2022-07-07 22:20:31 +01:00
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    return newsub;
 								}
-												patch 8.1.1979: code for handling file names is spread out

Problem:    Code for handling file names is spread out.
Solution:   Move code to new filepath.c file.  Graduate FEAT_MODIFY_FNAME.

											
										
										
											2019-09-04 15:54:55 +02:00
+								#ifdef FEAT_EVAL
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
 								/*
-												patch 8.1.2280: crash when passing partial to substitute()

Problem:    Crash when passing partial to substitute().
Solution:   Take extra arguments into account. (closes #5186)

											
										
										
											2019-11-09 22:28:11 +01:00
+								 * Put the submatches in "argv[argskip]" which is a list passed into
 								 * call_func() by vim_regsub_both().
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
+								 */
 								    static int
-												patch 9.0.0476: varargs does not work for replacement function of substitute()

Problem:    Varargs does not work for replacement function of substitute().
Solution:   Check the varargs flag of the function. (closes #11142)

											
										
										
											2022-09-16 12:10:03 +01:00
+								fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, ufunc_T *fp)
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
+								{
 								    listitem_T	*li;
 								    int		i;
 								    char_u	*s;
-												patch 8.1.2280: crash when passing partial to substitute()

Problem:    Crash when passing partial to substitute().
Solution:   Take extra arguments into account. (closes #5186)

											
										
										
											2019-11-09 22:28:11 +01:00
+								    typval_T	*listarg = argv + argskip;
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
-												patch 9.0.0480: cannot use a :def varargs function with substitute()

Problem:    Cannot use a :def varargs function with substitute().
Solution:   Use has_varargs(). (closes #11146)

											
										
										
											2022-09-16 16:06:32 +01:00
+								    if (!has_varargs(fp) && fp->uf_args.ga_len <= argskip)
-												patch 8.1.2280: crash when passing partial to substitute()

Problem:    Crash when passing partial to substitute().
Solution:   Take extra arguments into account. (closes #5186)

											
										
										
											2019-11-09 22:28:11 +01:00
+									// called function doesn't take a submatches argument
 									return argskip;
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
-												patch 8.1.2280: crash when passing partial to substitute()

Problem:    Crash when passing partial to substitute().
Solution:   Take extra arguments into account. (closes #5186)

											
										
										
											2019-11-09 22:28:11 +01:00
+								    // Relies on sl_list to be the first item in staticList10_T.
 								    init_static_list((staticList10_T *)(listarg->vval.v_list));
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
-												patch 8.1.2280: crash when passing partial to substitute()

Problem:    Crash when passing partial to substitute().
Solution:   Take extra arguments into account. (closes #5186)

											
										
										
											2019-11-09 22:28:11 +01:00
+								    // There are always 10 list items in staticList10_T.
 								    li = listarg->vval.v_list->lv_first;
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
+								    for (i = 0; i < 10; ++i)
 								    {
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									s = rsm.sm_match->startp[i];
 									if (s == NULL || rsm.sm_match->endp[i] == NULL)
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
+									    s = NULL;
 									else
-												patch 8.2.0967: unnecessary type casts for vim_strnsave()

Problem:    Unnecessary type casts for vim_strnsave().
Solution:   Remove the type casts.

											
										
										
											2020-06-12 22:59:11 +02:00
+									    s = vim_strnsave(s, rsm.sm_match->endp[i] - s);
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
+									li->li_tv.v_type = VAR_STRING;
 									li->li_tv.vval.v_string = s;
 									li = li->li_next;
 								    }
-												patch 8.1.2280: crash when passing partial to substitute()

Problem:    Crash when passing partial to substitute().
Solution:   Take extra arguments into account. (closes #5186)

											
										
										
											2019-11-09 22:28:11 +01:00
+								    return argskip + 1;
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
+								}
 								    static void
 								clear_submatch_list(staticList10_T *sl)
 								{
 								    int i;
 								    for (i = 0; i < 10; ++i)
 									vim_free(sl->sl_items[i].li_tv.vval.v_string);
 								}
-												patch 8.1.1979: code for handling file names is spread out

Problem:    Code for handling file names is spread out.
Solution:   Move code to new filepath.c file.  Graduate FEAT_MODIFY_FNAME.

											
										
										
											2019-09-04 15:54:55 +02:00
+								#endif
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								/*
 								 * vim_regsub() - perform substitutions after a vim_regexec() or
 								 * vim_regexec_multi() match.
 								 *
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+								 * If "flags" has REGSUB_COPY really copy into "dest[destlen]".
-												patch 9.0.0828: various typos

Problem:    Various typos.
Solution:   Correct typos. (closes #11432)

											
										
										
											2022-11-02 13:30:51 +00:00
+								 * Otherwise nothing is copied, only compute the length of the result.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 *
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+								 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
 								 *
 								 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
 								 * double them to keep them, and insert a backslash before a CR to avoid it
 								 * being replaced with a line break later.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 *
 								 * Note: The matched text must not change between the call of
 								 * vim_regexec()/vim_regexec_multi() and vim_regsub()!  It would make the back
 								 * references invalid!
 								 *
 								 * Returns the size of the replacement, including terminating NUL.
 								 */
 								    int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								vim_regsub(
 								    regmatch_T	*rmp,
 								    char_u	*source,
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
+								    typval_T	*expr,
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								    char_u	*dest,
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+								    int		destlen,
 								    int		flags)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    int		result;
 								    regexec_T	rex_save;
 								    int		rex_in_use_save = rex_in_use;
 								    if (rex_in_use)
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									// Being called recursively, save the state.
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									rex_save = rex;
 								    rex_in_use = TRUE;
 								    rex.reg_match = rmp;
 								    rex.reg_mmatch = NULL;
 								    rex.reg_maxline = 0;
 								    rex.reg_buf = curbuf;
 								    rex.reg_line_lbr = TRUE;
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+								    result = vim_regsub_both(source, expr, dest, destlen, flags);
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
 								    rex_in_use = rex_in_use_save;
 								    if (rex_in_use)
 									rex = rex_save;
 								    return result;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
 								    int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								vim_regsub_multi(
 								    regmmatch_T	*rmp,
 								    linenr_T	lnum,
 								    char_u	*source,
 								    char_u	*dest,
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+								    int		destlen,
 								    int		flags)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    int		result;
 								    regexec_T	rex_save;
 								    int		rex_in_use_save = rex_in_use;
 								    if (rex_in_use)
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									// Being called recursively, save the state.
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									rex_save = rex;
 								    rex_in_use = TRUE;
 								    rex.reg_match = NULL;
 								    rex.reg_mmatch = rmp;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    rex.reg_buf = curbuf;	// always works on the current buffer!
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    rex.reg_firstlnum = lnum;
 								    rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
 								    rex.reg_line_lbr = FALSE;
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+								    result = vim_regsub_both(source, NULL, dest, destlen, flags);
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
 								    rex_in_use = rex_in_use_save;
 								    if (rex_in_use)
 									rex = rex_save;
 								    return result;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								}
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+								#if defined(FEAT_EVAL) || defined(PROTO)
 								// When nesting more than a couple levels it's probably a mistake.
 								# define MAX_REGSUB_NESTING 4
 								static char_u   *eval_result[MAX_REGSUB_NESTING] = {NULL, NULL, NULL, NULL};
 								# if defined(EXITFREE) || defined(PROTO)
 								    void
 								free_resub_eval_result(void)
 								{
 								    int i;
 								    for (i = 0; i < MAX_REGSUB_NESTING; ++i)
 									VIM_CLEAR(eval_result[i]);
 								}
 								# endif
 								#endif
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    static int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								vim_regsub_both(
 								    char_u	*source,
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
+								    typval_T	*expr,
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								    char_u	*dest,
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+								    int		destlen,
 								    int		flags)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
 								    char_u	*src;
 								    char_u	*dst;
 								    char_u	*s;
 								    int		c;
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
+								    int		cc;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    int		no = -1;
-												updated for version 7.3.873
Problem:    Cannot easily use :s to make title case.
Solution:   Have "\L\u" result in title case. (James McCoy)

											
										
										
											2013-03-19 17:42:15 +01:00
+								    fptr_T	func_all = (fptr_T)NULL;
 								    fptr_T	func_one = (fptr_T)NULL;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    linenr_T	clnum = 0;	// init for GCC
 								    int		len = 0;	// init for GCC
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								#ifdef FEAT_EVAL
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+								    static int  nesting = 0;
 								    int		nested;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								#endif
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+								    int		copy = flags & REGSUB_COPY;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    // Be paranoid...
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
+								    if ((source == NULL && expr == NULL) || dest == NULL)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
-												patch 9.0.1594: some internal error messages are translated

Problem:    Some internal error messages are translated.
Solution:   Consistently do not translate internal error messages.
            (closes #12459)

											
										
										
											2023-05-31 17:12:14 +01:00
+									iemsg(e_null_argument);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									return 0;
 								    }
 								    if (prog_magic_wrong())
 									return 0;
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+								#ifdef FEAT_EVAL
 								    if (nesting == MAX_REGSUB_NESTING)
 								    {
 									emsg(_(e_substitute_nesting_too_deep));
 									return 0;
 								    }
 								    nested = nesting;
 								#endif
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    src = source;
 								    dst = dest;
 								    /*
 								     * When the substitute part starts with "\=" evaluate it as an expression.
 								     */
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    if (expr != NULL || (source[0] == '\\' && source[1] == '='))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
 								#ifdef FEAT_EVAL
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									// To make sure that the length doesn't change between checking the
 									// length and copying the string, and to speed up things, the
-												patch 8.2.5057: using gettimeofday() for timeout is very inefficient

Problem:    Using gettimeofday() for timeout is very inefficient.
Solution:   Set a platform dependent timer. (Paul Ollis, closes #10505)

											
										
										
											2022-06-05 16:55:54 +01:00
+									// resulting string is saved from the call with
 									// "flags & REGSUB_COPY" == 0 to the call with
 									// "flags & REGSUB_COPY" != 0.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									if (copy)
 									{
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+									    if (eval_result[nested] != NULL)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    {
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+										int eval_len = (int)STRLEN(eval_result[nested]);
 										if (eval_len < destlen)
 										{
 										    STRCPY(dest, eval_result[nested]);
 										    dst += eval_len;
 										    VIM_CLEAR(eval_result[nested]);
 										}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    }
 									}
 									else
 									{
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									    int		    prev_can_f_submatch = can_f_submatch;
 									    regsubmatch_T   rsm_save;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+									    VIM_CLEAR(eval_result[nested]);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									    // The expression may contain substitute(), which calls us
 									    // recursively.  Make sure submatch() gets the text from the first
 									    // level.
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									    if (can_f_submatch)
 										rsm_save = rsm;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    can_f_submatch = TRUE;
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									    rsm.sm_match = rex.reg_match;
 									    rsm.sm_mmatch = rex.reg_mmatch;
 									    rsm.sm_firstlnum = rex.reg_firstlnum;
 									    rsm.sm_maxline = rex.reg_maxline;
 									    rsm.sm_line_lbr = rex.reg_line_lbr;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+									    // Although unlikely, it is possible that the expression invokes a
 									    // substitute command (it might fail, but still).  Therefore keep
-												patch 8.2.5154: still mentioning version8, some cosmetic issues

Problem:    Still mentioning version8, some cosmetic issues.
Solution:   Prefer mentioning version9, cosmetic improvements.

											
										
										
											2022-06-23 20:46:27 +01:00
+									    // an array of eval results.
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+									    ++nesting;
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
+									    if (expr != NULL)
 									    {
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
+										typval_T	argv[2];
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
+										char_u		buf[NUMBUFLEN];
 										typval_T	rettv;
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
+										staticList10_T	matchList;
-												patch 8.1.1800: function call functions have too many arguments

Problem:    Function call functions have too many arguments.
Solution:   Pass values in a funcexe_T struct.

											
										
										
											2019-08-03 18:17:11 +02:00
+										funcexe_T	funcexe;
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
 										rettv.v_type = VAR_STRING;
 										rettv.vval.v_string = NULL;
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										argv[0].v_type = VAR_LIST;
 										argv[0].vval.v_list = &matchList.sl_list;
 										matchList.sl_list.lv_len = 0;
-												patch 8.2.0559: clearing a struct is verbose

Problem:    Clearing a struct is verbose.
Solution:   Define and use CLEAR_FIELD() and CLEAR_POINTER().

											
										
										
											2020-04-12 19:37:17 +02:00
+										CLEAR_FIELD(funcexe);
-												patch 8.2.3796: the funcexe_T struct members are not named consistently

Problem:    The funcexe_T struct members are not named consistently.
Solution:   Prefix "fe_" to all the members.

											
										
										
											2021-12-13 14:26:44 +00:00
+										funcexe.fe_argv_func = fill_submatch_list;
 										funcexe.fe_evaluate = TRUE;
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										if (expr->v_type == VAR_FUNC)
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
+										{
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										    s = expr->vval.v_string;
-												patch 8.1.1800: function call functions have too many arguments

Problem:    Function call functions have too many arguments.
Solution:   Pass values in a funcexe_T struct.

											
										
										
											2019-08-03 18:17:11 +02:00
+										    call_func(s, -1, &rettv, 1, argv, &funcexe);
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
+										}
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										else if (expr->v_type == VAR_PARTIAL)
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
+										{
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										    partial_T   *partial = expr->vval.v_partial;
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										    s = partial_name(partial);
-												patch 8.2.3796: the funcexe_T struct members are not named consistently

Problem:    The funcexe_T struct members are not named consistently.
Solution:   Prefix "fe_" to all the members.

											
										
										
											2021-12-13 14:26:44 +00:00
+										    funcexe.fe_partial = partial;
-												patch 8.1.1800: function call functions have too many arguments

Problem:    Function call functions have too many arguments.
Solution:   Pass values in a funcexe_T struct.

											
										
										
											2019-08-03 18:17:11 +02:00
+										    call_func(s, -1, &rettv, 1, argv, &funcexe);
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
+										}
-												patch 8.2.4870: Vim9: expression in :substitute is not compiled

Problem:    Vim9: expression in :substitute is not compiled.
Solution:   Use an INSTR instruction if possible. (closes #10334)

											
										
										
											2022-05-05 13:53:03 +01:00
+										else if (expr->v_type == VAR_INSTR)
 										{
 										    exe_typval_instr(expr, &rettv);
 										}
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										if (matchList.sl_list.lv_len > 0)
-												patch 8.1.2282: crash when passing many arguments through a partial

Problem:    Crash when passing many arguments through a partial. (Andy
            Massimino)
Solution:   Check the number of arguments. (closes #5186)

											
										
										
											2019-11-10 00:13:50 +01:00
+										    // fill_submatch_list() was called
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										    clear_submatch_list(&matchList);
-												patch 8.1.2282: crash when passing many arguments through a partial

Problem:    Crash when passing many arguments through a partial. (Andy
            Massimino)
Solution:   Check the number of arguments. (closes #5186)

											
										
										
											2019-11-10 00:13:50 +01:00
+										if (rettv.v_type == VAR_UNKNOWN)
 										    // something failed, no need to report another error
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+										    eval_result[nested] = NULL;
-												patch 8.1.2282: crash when passing many arguments through a partial

Problem:    Crash when passing many arguments through a partial. (Andy
            Massimino)
Solution:   Check the number of arguments. (closes #5186)

											
										
										
											2019-11-10 00:13:50 +01:00
+										else
 										{
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+										    eval_result[nested] = tv_get_string_buf_chk(&rettv, buf);
 										    if (eval_result[nested] != NULL)
 											eval_result[nested] = vim_strsave(eval_result[nested]);
-												patch 8.1.2282: crash when passing many arguments through a partial

Problem:    Crash when passing many arguments through a partial. (Andy
            Massimino)
Solution:   Check the number of arguments. (closes #5186)

											
										
										
											2019-11-10 00:13:50 +01:00
+										}
-												patch 7.4.2090
Problem:    Using submatch() in a lambda passed to substitute() is verbose.
Solution:   Use a static list and pass it as an optional argument to the
            function.  Fix memory leak.

											
										
										
											2016-07-22 21:50:18 +02:00
+										clear_tv(&rettv);
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
+									    }
-												patch 8.2.2784: Vim9: cannot use \=expr in :substitute

Problem:    Vim9: cannot use \=expr in :substitute.
Solution:   Compile the expression into instructions and execute them when
            invoked.

											
										
										
											2021-04-19 16:48:48 +02:00
+									    else if (substitute_instr != NULL)
 										// Execute instructions from ISN_SUBSTITUTE.
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+										eval_result[nested] = exe_substitute_instr();
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
+									    else
-												patch 9.0.0634: evaluating "expr" options has more overhead than needed

Problem:    Evaluating "expr" options has more overhead than needed.
Solution:   Use call_simple_func() for 'foldtext', 'includeexpr', 'printexpr',
            "expr" of 'spellsuggest', 'diffexpr', 'patchexpr', 'balloonexpr',
            'formatexpr', 'indentexpr' and 'charconvert'.

											
										
										
											2022-10-01 19:43:52 +01:00
+										eval_result[nested] = eval_to_string(source + 2, TRUE, FALSE);
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+									    --nesting;
-												patch 7.4.2072
Problem:    substitute() does not support a Funcref argument.
Solution:   Support a Funcref like it supports  a string starting with "\=".

											
										
										
											2016-07-19 19:10:51 +02:00
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+									    if (eval_result[nested] != NULL)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    {
-												updated for version 7.2.407
Problem:    When using an expression in ":s" backslashes in the result are
            dropped. (Sergey Goldgaber, Christian Brabandt)
Solution:   Double backslashes.

											
										
										
											2010-03-23 16:27:22 +01:00
+										int had_backslash = FALSE;
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+										for (s = eval_result[nested]; *s != NUL; MB_PTR_ADV(s))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										{
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										    // Change NL to CR, so that it becomes a line break,
 										    // unless called from vim_regexec_nl().
 										    // Skip over a backslashed character.
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										    if (*s == NL && !rsm.sm_line_lbr)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											*s = CAR;
 										    else if (*s == '\\' && s[1] != NUL)
-												updated for version 7.2.407
Problem:    When using an expression in ":s" backslashes in the result are
            dropped. (Sergey Goldgaber, Christian Brabandt)
Solution:   Double backslashes.

											
										
										
											2010-03-23 16:27:22 +01:00
+										    {
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											++s;
-												updated for version 7.2.437
Problem:    When "\\\n" appears in the expression result the \n doesn't result
            in a line break. (Andy Wokula)
Solution:   Also replace a \n after a backslash into \r.

											
										
										
											2010-05-21 13:08:58 +02:00
+											/* Change NL to CR here too, so that this works:
 											 * :s/abc\\\ndef/\="aaa\\\nbbb"/  on text:
 											 *   abc\
 											 *   def
-												updated for version 7.3.225
Problem:    Using "\n" in a substitute inside ":s" does not result in a line
            break.
Solution:   Change behavior inside vim_regexec_nl().   Add tests.  (Motoya
            Kurotsu)

											
										
										
											2011-06-19 04:32:15 +02:00
+											 * Not when called from vim_regexec_nl().
-												updated for version 7.2.437
Problem:    When "\\\n" appears in the expression result the \n doesn't result
            in a line break. (Andy Wokula)
Solution:   Also replace a \n after a backslash into \r.

											
										
										
											2010-05-21 13:08:58 +02:00
+											 */
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+											if (*s == NL && !rsm.sm_line_lbr)
-												updated for version 7.2.437
Problem:    When "\\\n" appears in the expression result the \n doesn't result
            in a line break. (Andy Wokula)
Solution:   Also replace a \n after a backslash into \r.

											
										
										
											2010-05-21 13:08:58 +02:00
+											    *s = CAR;
-												updated for version 7.2.407
Problem:    When using an expression in ":s" backslashes in the result are
            dropped. (Sergey Goldgaber, Christian Brabandt)
Solution:   Double backslashes.

											
										
										
											2010-03-23 16:27:22 +01:00
+											had_backslash = TRUE;
 										    }
 										}
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+										if (had_backslash && (flags & REGSUB_BACKSLASH))
-												updated for version 7.2.407
Problem:    When using an expression in ":s" backslashes in the result are
            dropped. (Sergey Goldgaber, Christian Brabandt)
Solution:   Double backslashes.

											
										
										
											2010-03-23 16:27:22 +01:00
+										{
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										    // Backslashes will be consumed, need to double them.
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+										    s = vim_strsave_escaped(eval_result[nested], (char_u *)"\\");
-												updated for version 7.2.407
Problem:    When using an expression in ":s" backslashes in the result are
            dropped. (Sergey Goldgaber, Christian Brabandt)
Solution:   Double backslashes.

											
										
										
											2010-03-23 16:27:22 +01:00
+										    if (s != NULL)
 										    {
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+											vim_free(eval_result[nested]);
 											eval_result[nested] = s;
-												updated for version 7.2.407
Problem:    When using an expression in ":s" backslashes in the result are
            dropped. (Sergey Goldgaber, Christian Brabandt)
Solution:   Double backslashes.

											
										
										
											2010-03-23 16:27:22 +01:00
+										    }
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										}
-												patch 8.2.5146: memory leak when substitute expression nests

Problem:    Memory leak when substitute expression nests.
Solution:   Use an array of expression results.

											
										
										
											2022-06-21 22:15:25 +01:00
+										dst += STRLEN(eval_result[nested]);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    }
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									    can_f_submatch = prev_can_f_submatch;
 									    if (can_f_submatch)
 										rsm = rsm_save;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									}
 								#endif
 								    }
 								    else
 								      while ((c = *src++) != NUL)
 								      {
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+									if (c == '&' && (flags & REGSUB_MAGIC))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    no = 0;
 									else if (c == '\\' && *src != NUL)
 									{
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+									    if (*src == '&' && !(flags & REGSUB_MAGIC))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    {
 										++src;
 										no = 0;
 									    }
 									    else if ('0' <= *src && *src <= '9')
 									    {
 										no = *src++ - '0';
 									    }
 									    else if (vim_strchr((char_u *)"uUlLeE", *src))
 									    {
 										switch (*src++)
 										{
-												patch 9.0.1908: undefined behaviour upper/lower function ptrs

Problem:  undefined behaviour upper/lower function ptrs
Solution: Fix UBSAN error in regexp and simplify upper/lowercase
          modifier code

The implementation of \u / \U / \l / \L modifiers in the substitute
command relies on remembering the state by setting function pointers on
func_all/func_one in the code. The code signature of `fptr_T` is
supposed to return void* (due to C function signatures not being able to
return itself due to type recursion), and the definition of the
functions (e.g. to_Upper) didn't follow this rule, and so the code tries
to cast functions of different signatures, resulting in undefined
behavior error under UBSAN in Clang 17. See #12745.

We could just fix `do_Upper`/etc to just return void*, which would fix
the problem. However, these functions actually do not need to return
anything at all. It used to be the case that there was only one pointer
"func" to store the pointer, which is why the function needs to either
return itself or NULL to indicate whether it's a one time or ongoing
modification. However, c2c355df6f094cdb9e599fd395a78c14486ec697
(7.3.873) already made that obsolete by introducing `func_one` and
`func_all` to store one-time and ongoing operations separately, so these
functions don't actually need to return anything anymore because it's
implicit whether it's a one-time or ongoing operation. Simplify the code
to reflect that.

closes: #13117

Signed-off-by: Christian Brabandt <cb@256bit.org>
Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>

											
										
										
											2023-09-18 19:51:56 +02:00
+										case 'u':   func_one = do_upper;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											    continue;
-												patch 9.0.1908: undefined behaviour upper/lower function ptrs

Problem:  undefined behaviour upper/lower function ptrs
Solution: Fix UBSAN error in regexp and simplify upper/lowercase
          modifier code

The implementation of \u / \U / \l / \L modifiers in the substitute
command relies on remembering the state by setting function pointers on
func_all/func_one in the code. The code signature of `fptr_T` is
supposed to return void* (due to C function signatures not being able to
return itself due to type recursion), and the definition of the
functions (e.g. to_Upper) didn't follow this rule, and so the code tries
to cast functions of different signatures, resulting in undefined
behavior error under UBSAN in Clang 17. See #12745.

We could just fix `do_Upper`/etc to just return void*, which would fix
the problem. However, these functions actually do not need to return
anything at all. It used to be the case that there was only one pointer
"func" to store the pointer, which is why the function needs to either
return itself or NULL to indicate whether it's a one time or ongoing
modification. However, c2c355df6f094cdb9e599fd395a78c14486ec697
(7.3.873) already made that obsolete by introducing `func_one` and
`func_all` to store one-time and ongoing operations separately, so these
functions don't actually need to return anything anymore because it's
implicit whether it's a one-time or ongoing operation. Simplify the code
to reflect that.

closes: #13117

Signed-off-by: Christian Brabandt <cb@256bit.org>
Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>

											
										
										
											2023-09-18 19:51:56 +02:00
+										case 'U':   func_all = do_upper;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											    continue;
-												patch 9.0.1908: undefined behaviour upper/lower function ptrs

Problem:  undefined behaviour upper/lower function ptrs
Solution: Fix UBSAN error in regexp and simplify upper/lowercase
          modifier code

The implementation of \u / \U / \l / \L modifiers in the substitute
command relies on remembering the state by setting function pointers on
func_all/func_one in the code. The code signature of `fptr_T` is
supposed to return void* (due to C function signatures not being able to
return itself due to type recursion), and the definition of the
functions (e.g. to_Upper) didn't follow this rule, and so the code tries
to cast functions of different signatures, resulting in undefined
behavior error under UBSAN in Clang 17. See #12745.

We could just fix `do_Upper`/etc to just return void*, which would fix
the problem. However, these functions actually do not need to return
anything at all. It used to be the case that there was only one pointer
"func" to store the pointer, which is why the function needs to either
return itself or NULL to indicate whether it's a one time or ongoing
modification. However, c2c355df6f094cdb9e599fd395a78c14486ec697
(7.3.873) already made that obsolete by introducing `func_one` and
`func_all` to store one-time and ongoing operations separately, so these
functions don't actually need to return anything anymore because it's
implicit whether it's a one-time or ongoing operation. Simplify the code
to reflect that.

closes: #13117

Signed-off-by: Christian Brabandt <cb@256bit.org>
Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>

											
										
										
											2023-09-18 19:51:56 +02:00
+										case 'l':   func_one = do_lower;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											    continue;
-												patch 9.0.1908: undefined behaviour upper/lower function ptrs

Problem:  undefined behaviour upper/lower function ptrs
Solution: Fix UBSAN error in regexp and simplify upper/lowercase
          modifier code

The implementation of \u / \U / \l / \L modifiers in the substitute
command relies on remembering the state by setting function pointers on
func_all/func_one in the code. The code signature of `fptr_T` is
supposed to return void* (due to C function signatures not being able to
return itself due to type recursion), and the definition of the
functions (e.g. to_Upper) didn't follow this rule, and so the code tries
to cast functions of different signatures, resulting in undefined
behavior error under UBSAN in Clang 17. See #12745.

We could just fix `do_Upper`/etc to just return void*, which would fix
the problem. However, these functions actually do not need to return
anything at all. It used to be the case that there was only one pointer
"func" to store the pointer, which is why the function needs to either
return itself or NULL to indicate whether it's a one time or ongoing
modification. However, c2c355df6f094cdb9e599fd395a78c14486ec697
(7.3.873) already made that obsolete by introducing `func_one` and
`func_all` to store one-time and ongoing operations separately, so these
functions don't actually need to return anything anymore because it's
implicit whether it's a one-time or ongoing operation. Simplify the code
to reflect that.

closes: #13117

Signed-off-by: Christian Brabandt <cb@256bit.org>
Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>

											
										
										
											2023-09-18 19:51:56 +02:00
+										case 'L':   func_all = do_lower;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											    continue;
 										case 'e':
-												updated for version 7.3.873
Problem:    Cannot easily use :s to make title case.
Solution:   Have "\L\u" result in title case. (James McCoy)

											
										
										
											2013-03-19 17:42:15 +01:00
+										case 'E':   func_one = func_all = (fptr_T)NULL;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											    continue;
 										}
 									    }
 									}
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									if (no < 0)	      // Ordinary character.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									{
-												updated for version 7.0232

											
										
										
											2006-03-23 22:59:57 +00:00
+									    if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
 									    {
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										// Copy a special key as-is.
-												updated for version 7.0232

											
										
										
											2006-03-23 22:59:57 +00:00
+										if (copy)
 										{
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+										    if (dst + 3 > dest + destlen)
 										    {
 											iemsg("vim_regsub_both(): not enough space");
 											return 0;
 										    }
-												updated for version 7.0232

											
										
										
											2006-03-23 22:59:57 +00:00
+										    *dst++ = c;
 										    *dst++ = *src++;
 										    *dst++ = *src++;
 										}
 										else
 										{
 										    dst += 3;
 										    src += 2;
 										}
 										continue;
 									    }
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    if (c == '\\' && *src != NUL)
 									    {
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										// Check for abbreviations -- webb
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										switch (*src)
 										{
 										    case 'r':	c = CAR;	++src;	break;
 										    case 'n':	c = NL;		++src;	break;
 										    case 't':	c = TAB;	++src;	break;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										 // Oh no!  \e already has meaning in subst pat :-(
 										 // case 'e':   c = ESC;	++src;	break;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										    case 'b':	c = Ctrl_H;	++src;	break;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										    // If "backslash" is TRUE the backslash will be removed
 										    // later.  Used to insert a literal CR.
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+										    default:	if (flags & REGSUB_BACKSLASH)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+												{
 												    if (copy)
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+												    {
 													if (dst + 1 > dest + destlen)
 													{
 													    iemsg("vim_regsub_both(): not enough space");
 													    return 0;
 													}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+													*dst = '\\';
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+												    }
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+												    ++dst;
 												}
 												c = *src++;
 										}
 									    }
-												updated for version 7.0232

											
										
										
											2006-03-23 22:59:57 +00:00
+									    else if (has_mbyte)
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
+										c = mb_ptr2char(src - 1);
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									    // Write to buffer, if copy is set.
-												updated for version 7.3.873
Problem:    Cannot easily use :s to make title case.
Solution:   Have "\L\u" result in title case. (James McCoy)

											
										
										
											2013-03-19 17:42:15 +01:00
+									    if (func_one != (fptr_T)NULL)
-												patch 9.0.1908: undefined behaviour upper/lower function ptrs

Problem:  undefined behaviour upper/lower function ptrs
Solution: Fix UBSAN error in regexp and simplify upper/lowercase
          modifier code

The implementation of \u / \U / \l / \L modifiers in the substitute
command relies on remembering the state by setting function pointers on
func_all/func_one in the code. The code signature of `fptr_T` is
supposed to return void* (due to C function signatures not being able to
return itself due to type recursion), and the definition of the
functions (e.g. to_Upper) didn't follow this rule, and so the code tries
to cast functions of different signatures, resulting in undefined
behavior error under UBSAN in Clang 17. See #12745.

We could just fix `do_Upper`/etc to just return void*, which would fix
the problem. However, these functions actually do not need to return
anything at all. It used to be the case that there was only one pointer
"func" to store the pointer, which is why the function needs to either
return itself or NULL to indicate whether it's a one time or ongoing
modification. However, c2c355df6f094cdb9e599fd395a78c14486ec697
(7.3.873) already made that obsolete by introducing `func_one` and
`func_all` to store one-time and ongoing operations separately, so these
functions don't actually need to return anything anymore because it's
implicit whether it's a one-time or ongoing operation. Simplify the code
to reflect that.

closes: #13117

Signed-off-by: Christian Brabandt <cb@256bit.org>
Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>

											
										
										
											2023-09-18 19:51:56 +02:00
+									    {
 										func_one(&cc, c);
 										func_one = NULL;
 									    }
-												updated for version 7.3.873
Problem:    Cannot easily use :s to make title case.
Solution:   Have "\L\u" result in title case. (James McCoy)

											
										
										
											2013-03-19 17:42:15 +01:00
+									    else if (func_all != (fptr_T)NULL)
-												patch 9.0.1908: undefined behaviour upper/lower function ptrs

Problem:  undefined behaviour upper/lower function ptrs
Solution: Fix UBSAN error in regexp and simplify upper/lowercase
          modifier code

The implementation of \u / \U / \l / \L modifiers in the substitute
command relies on remembering the state by setting function pointers on
func_all/func_one in the code. The code signature of `fptr_T` is
supposed to return void* (due to C function signatures not being able to
return itself due to type recursion), and the definition of the
functions (e.g. to_Upper) didn't follow this rule, and so the code tries
to cast functions of different signatures, resulting in undefined
behavior error under UBSAN in Clang 17. See #12745.

We could just fix `do_Upper`/etc to just return void*, which would fix
the problem. However, these functions actually do not need to return
anything at all. It used to be the case that there was only one pointer
"func" to store the pointer, which is why the function needs to either
return itself or NULL to indicate whether it's a one time or ongoing
modification. However, c2c355df6f094cdb9e599fd395a78c14486ec697
(7.3.873) already made that obsolete by introducing `func_one` and
`func_all` to store one-time and ongoing operations separately, so these
functions don't actually need to return anything anymore because it's
implicit whether it's a one-time or ongoing operation. Simplify the code
to reflect that.

closes: #13117

Signed-off-by: Christian Brabandt <cb@256bit.org>
Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>

											
										
										
											2023-09-18 19:51:56 +02:00
+										func_all(&cc, c);
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									    else // just copy
-												updated for version 7.3.873
Problem:    Cannot easily use :s to make title case.
Solution:   Have "\L\u" result in title case. (James McCoy)

											
										
										
											2013-03-19 17:42:15 +01:00
+										cc = c;
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
 									    if (has_mbyte)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    {
-												Fix: Composing characters in :s substitute text were dropped.

											
										
										
											2010-07-12 22:42:33 +02:00
+										int totlen = mb_ptr2len(src - 1);
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+										int charlen = mb_char2len(cc);
-												Fix: Composing characters in :s substitute text were dropped.

											
										
										
											2010-07-12 22:42:33 +02:00
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										if (copy)
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+										{
 										    if (dst + charlen > dest + destlen)
 										    {
 											iemsg("vim_regsub_both(): not enough space");
 											return 0;
 										    }
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
+										    mb_char2bytes(cc, dst);
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+										}
 										dst += charlen - 1;
-												Fix: Composing characters in :s substitute text were dropped.

											
										
										
											2010-07-12 22:42:33 +02:00
+										if (enc_utf8)
 										{
 										    int clen = utf_ptr2len(src - 1);
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										    // If the character length is shorter than "totlen", there
 										    // are composing characters; copy them as-is.
-												Fix: Composing characters in :s substitute text were dropped.

											
										
										
											2010-07-12 22:42:33 +02:00
+										    if (clen < totlen)
 										    {
 											if (copy)
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+											{
 											    if (dst + totlen - clen > dest + destlen)
 											    {
 												iemsg("vim_regsub_both(): not enough space");
 												return 0;
 											    }
-												Fix: Composing characters in :s substitute text were dropped.

											
										
										
											2010-07-12 22:42:33 +02:00
+											    mch_memmove(dst + 1, src - 1 + clen,
 														     (size_t)(totlen - clen));
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+											}
-												Fix: Composing characters in :s substitute text were dropped.

											
										
										
											2010-07-12 22:42:33 +02:00
+											dst += totlen - clen;
 										    }
 										}
 										src += totlen - 1;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    }
-												patch 8.1.0809: too many #ifdefs

Problem:    Too many #ifdefs.
Solution:   Graduate FEAT_MBYTE, part 3.

											
										
										
											2019-01-24 16:39:02 +01:00
+									    else if (copy)
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+									    {
 										if (dst + 1 > dest + destlen)
 										{
 										    iemsg("vim_regsub_both(): not enough space");
 										    return 0;
 										}
 										*dst = cc;
 									    }
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    dst++;
 									}
 									else
 									{
 									    if (REG_MULTI)
 									    {
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										clnum = rex.reg_mmatch->startpos[no].lnum;
 										if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										    s = NULL;
 										else
 										{
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										    s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
 										    if (rex.reg_mmatch->endpos[no].lnum == clnum)
 											len = rex.reg_mmatch->endpos[no].col
 													    - rex.reg_mmatch->startpos[no].col;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										    else
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+											len = (int)reg_getline_len(clnum) - rex.reg_mmatch->startpos[no].col;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										}
 									    }
 									    else
 									    {
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										s = rex.reg_match->startp[no];
 										if (rex.reg_match->endp[no] == NULL)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										    s = NULL;
 										else
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										    len = (int)(rex.reg_match->endp[no] - s);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    }
 									    if (s != NULL)
 									    {
 										for (;;)
 										{
 										    if (len == 0)
 										    {
 											if (REG_MULTI)
 											{
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+											    if (rex.reg_mmatch->endpos[no].lnum == clnum)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+												break;
 											    if (copy)
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+											    {
 												if (dst + 1 > dest + destlen)
 												{
 												    iemsg("vim_regsub_both(): not enough space");
 												    return 0;
 												}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+												*dst = CAR;
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+											    }
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											    ++dst;
 											    s = reg_getline(++clnum);
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+											    if (rex.reg_mmatch->endpos[no].lnum == clnum)
 												len = rex.reg_mmatch->endpos[no].col;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											    else
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+												len = (int)reg_getline_len(clnum);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											}
 											else
 											    break;
 										    }
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										    else if (*s == NUL) // we hit NUL.
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										    {
 											if (copy)
-												patch 9.0.1594: some internal error messages are translated

Problem:    Some internal error messages are translated.
Solution:   Consistently do not translate internal error messages.
            (closes #12459)

											
										
										
											2023-05-31 17:12:14 +01:00
+											    iemsg(e_damaged_match_string);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											goto exit;
 										    }
 										    else
 										    {
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+											if ((flags & REGSUB_BACKSLASH)
 														  && (*s == CAR || *s == '\\'))
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											{
 											    /*
 											     * Insert a backslash in front of a CR, otherwise
 											     * it will be replaced by a line break.
 											     * Number of backslashes will be halved later,
 											     * double them here.
 											     */
 											    if (copy)
 											    {
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+												if (dst + 2 > dest + destlen)
 												{
 												    iemsg("vim_regsub_both(): not enough space");
 												    return 0;
 												}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+												dst[0] = '\\';
 												dst[1] = *s;
 											    }
 											    dst += 2;
 											}
 											else
 											{
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
+											    if (has_mbyte)
 												c = mb_ptr2char(s);
 											    else
 												c = *s;
-												updated for version 7.3.873
Problem:    Cannot easily use :s to make title case.
Solution:   Have "\L\u" result in title case. (James McCoy)

											
										
										
											2013-03-19 17:42:15 +01:00
+											    if (func_one != (fptr_T)NULL)
-												patch 9.0.1908: undefined behaviour upper/lower function ptrs

Problem:  undefined behaviour upper/lower function ptrs
Solution: Fix UBSAN error in regexp and simplify upper/lowercase
          modifier code

The implementation of \u / \U / \l / \L modifiers in the substitute
command relies on remembering the state by setting function pointers on
func_all/func_one in the code. The code signature of `fptr_T` is
supposed to return void* (due to C function signatures not being able to
return itself due to type recursion), and the definition of the
functions (e.g. to_Upper) didn't follow this rule, and so the code tries
to cast functions of different signatures, resulting in undefined
behavior error under UBSAN in Clang 17. See #12745.

We could just fix `do_Upper`/etc to just return void*, which would fix
the problem. However, these functions actually do not need to return
anything at all. It used to be the case that there was only one pointer
"func" to store the pointer, which is why the function needs to either
return itself or NULL to indicate whether it's a one time or ongoing
modification. However, c2c355df6f094cdb9e599fd395a78c14486ec697
(7.3.873) already made that obsolete by introducing `func_one` and
`func_all` to store one-time and ongoing operations separately, so these
functions don't actually need to return anything anymore because it's
implicit whether it's a one-time or ongoing operation. Simplify the code
to reflect that.

closes: #13117

Signed-off-by: Christian Brabandt <cb@256bit.org>
Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>

											
										
										
											2023-09-18 19:51:56 +02:00
+											    {
 												func_one(&cc, c);
 												func_one = NULL;
 											    }
-												updated for version 7.3.873
Problem:    Cannot easily use :s to make title case.
Solution:   Have "\L\u" result in title case. (James McCoy)

											
										
										
											2013-03-19 17:42:15 +01:00
+											    else if (func_all != (fptr_T)NULL)
-												patch 9.0.1908: undefined behaviour upper/lower function ptrs

Problem:  undefined behaviour upper/lower function ptrs
Solution: Fix UBSAN error in regexp and simplify upper/lowercase
          modifier code

The implementation of \u / \U / \l / \L modifiers in the substitute
command relies on remembering the state by setting function pointers on
func_all/func_one in the code. The code signature of `fptr_T` is
supposed to return void* (due to C function signatures not being able to
return itself due to type recursion), and the definition of the
functions (e.g. to_Upper) didn't follow this rule, and so the code tries
to cast functions of different signatures, resulting in undefined
behavior error under UBSAN in Clang 17. See #12745.

We could just fix `do_Upper`/etc to just return void*, which would fix
the problem. However, these functions actually do not need to return
anything at all. It used to be the case that there was only one pointer
"func" to store the pointer, which is why the function needs to either
return itself or NULL to indicate whether it's a one time or ongoing
modification. However, c2c355df6f094cdb9e599fd395a78c14486ec697
(7.3.873) already made that obsolete by introducing `func_one` and
`func_all` to store one-time and ongoing operations separately, so these
functions don't actually need to return anything anymore because it's
implicit whether it's a one-time or ongoing operation. Simplify the code
to reflect that.

closes: #13117

Signed-off-by: Christian Brabandt <cb@256bit.org>
Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>

											
										
										
											2023-09-18 19:51:56 +02:00
+												func_all(&cc, c);
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+											    else // just copy
-												updated for version 7.3.873
Problem:    Cannot easily use :s to make title case.
Solution:   Have "\L\u" result in title case. (James McCoy)

											
										
										
											2013-03-19 17:42:15 +01:00
+												cc = c;
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
 											    if (has_mbyte)
 											    {
-												updated for version 7.1-046

											
										
										
											2007-07-30 20:32:53 +00:00
+												int l;
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+												int charlen;
-												updated for version 7.1-046

											
										
										
											2007-07-30 20:32:53 +00:00
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+												// Copy composing characters separately, one
 												// at a time.
-												updated for version 7.1-046

											
										
										
											2007-07-30 20:32:53 +00:00
+												if (enc_utf8)
 												    l = utf_ptr2len(s) - 1;
 												else
 												    l = mb_ptr2len(s) - 1;
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
 												s += l;
 												len -= l;
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+												charlen = mb_char2len(cc);
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
+												if (copy)
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+												{
 												    if (dst + charlen > dest + destlen)
 												    {
 													iemsg("vim_regsub_both(): not enough space");
 													return 0;
 												    }
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
+												    mb_char2bytes(cc, dst);
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+												}
 												dst += charlen - 1;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											    }
-												patch 8.1.0809: too many #ifdefs

Problem:    Too many #ifdefs.
Solution:   Graduate FEAT_MBYTE, part 3.

											
										
										
											2019-01-24 16:39:02 +01:00
+											    else if (copy)
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+											    {
 												if (dst + 1 > dest + destlen)
 												{
 												    iemsg("vim_regsub_both(): not enough space");
 												    return 0;
 												}
 												*dst = cc;
 											    }
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
+											    dst++;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											}
-												updated for version 7.0226

											
										
										
											2006-03-16 21:41:35 +00:00
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+											++s;
 											--len;
 										    }
 										}
 									    }
 									    no = -1;
 									}
 								      }
 								    if (copy)
 									*dst = NUL;
 								exit:
 								    return (int)((dst - dest) + 1);
 								}
 								#ifdef FEAT_EVAL
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
-												updated for version 7.2-308

											
										
										
											2009-11-25 18:51:24 +00:00
+								    static char_u *
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								reg_getline_submatch(linenr_T lnum)
-												updated for version 7.2-308

											
										
										
											2009-11-25 18:51:24 +00:00
+								{
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+								    char_u *line;
 								    reg_getline_common(lnum, RGLF_LINE | RGLF_SUBMATCH, &line, NULL);
 								    return line;
 								}
-												updated for version 7.2-308

											
										
										
											2009-11-25 18:51:24 +00:00
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+								    static colnr_T
 								reg_getline_submatch_len(linenr_T lnum)
 								{
 								    colnr_T length;
-												updated for version 7.2-308

											
										
										
											2009-11-25 18:51:24 +00:00
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+								    reg_getline_common(lnum, RGLF_LENGTH | RGLF_SUBMATCH, NULL, &length);
-												updated for version 7.2-308

											
										
										
											2009-11-25 18:51:24 +00:00
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+								    return length;
-												updated for version 7.2-308

											
										
										
											2009-11-25 18:51:24 +00:00
+								}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								/*
-												updated for version 7.1b

											
										
										
											2007-05-10 18:00:30 +00:00
+								 * Used for the submatch() function: get the string from the n'th submatch in
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								 * allocated memory.
 								 * Returns NULL when not in a ":s" command and for a non-existing submatch.
 								 */
 								    char_u *
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								reg_submatch(int no)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								{
 								    char_u	*retval = NULL;
 								    char_u	*s;
 								    int		len;
 								    int		round;
 								    linenr_T	lnum;
-												updated for version 7.0e06

											
										
										
											2006-04-22 22:33:57 +00:00
+								    if (!can_f_submatch || no < 0)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									return NULL;
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    if (rsm.sm_match == NULL)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    {
 									/*
 									 * First round: compute the length and allocate memory.
 									 * Second round: copy the text.
 									 */
 									for (round = 1; round <= 2; ++round)
 									{
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									    lnum = rsm.sm_mmatch->startpos[no].lnum;
 									    if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										return NULL;
-												patch 8.1.1025: checking NULL pointer after addition

Problem:    Checking NULL pointer after addition. (Coverity)
Solution:   First check for NULL, then add the column.

											
										
										
											2019-03-20 21:18:34 +01:00
+									    s = reg_getline_submatch(lnum);
 									    if (s == NULL)  // anti-crash check, cannot happen?
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										break;
-												patch 8.1.1025: checking NULL pointer after addition

Problem:    Checking NULL pointer after addition. (Coverity)
Solution:   First check for NULL, then add the column.

											
										
										
											2019-03-20 21:18:34 +01:00
+									    s += rsm.sm_mmatch->startpos[no].col;
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									    if (rsm.sm_mmatch->endpos[no].lnum == lnum)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    {
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										// Within one line: take form start to end col.
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										len = rsm.sm_mmatch->endpos[no].col
 													  - rsm.sm_mmatch->startpos[no].col;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										if (round == 2)
-												updated for version 7.0109

											
										
										
											2005-07-18 21:47:53 +00:00
+										    vim_strncpy(retval, s, len);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										++len;
 									    }
 									    else
 									    {
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+										// Multiple lines: take start line from start col, middle
 										// lines completely and end line up to end col.
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+										len = (int)reg_getline_submatch_len(lnum) - rsm.sm_mmatch->startpos[no].col;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										if (round == 2)
 										{
 										    STRCPY(retval, s);
 										    retval[len] = '\n';
 										}
 										++len;
 										++lnum;
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+										while (lnum < rsm.sm_mmatch->endpos[no].lnum)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										{
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+										    s = reg_getline_submatch(lnum);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										    if (round == 2)
 											STRCPY(retval + len, s);
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+										    len += (int)reg_getline_submatch_len(lnum);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										    if (round == 2)
 											retval[len] = '\n';
 										    ++len;
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+										    ++lnum;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										}
 										if (round == 2)
-												updated for version 7.2-308

											
										
										
											2009-11-25 18:51:24 +00:00
+										    STRNCPY(retval + len, reg_getline_submatch(lnum),
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+													     rsm.sm_mmatch->endpos[no].col);
 										len += rsm.sm_mmatch->endpos[no].col;
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										if (round == 2)
 										    retval[len] = NUL;
 										++len;
 									    }
-												updated for version 7.0e06

											
										
										
											2006-04-22 22:33:57 +00:00
+									    if (retval == NULL)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    {
-												patch 8.1.1386: unessesary type casts for lalloc()

Problem:    Unessesary type casts for lalloc().
Solution:   Remove type casts.  Change lalloc(size, TRUE) to alloc(size).

											
										
										
											2019-05-24 19:39:03 +02:00
+										retval = alloc(len);
-												updated for version 7.0e06

											
										
										
											2006-04-22 22:33:57 +00:00
+										if (retval == NULL)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+										    return NULL;
 									    }
 									}
 								    }
 								    else
 								    {
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									s = rsm.sm_match->startp[no];
 									if (s == NULL || rsm.sm_match->endp[no] == NULL)
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+									    retval = NULL;
 									else
-												patch 8.2.0967: unnecessary type casts for vim_strnsave()

Problem:    Unnecessary type casts for vim_strnsave().
Solution:   Remove the type casts.

											
										
										
											2020-06-12 22:59:11 +02:00
+									    retval = vim_strnsave(s, rsm.sm_match->endp[no] - s);
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								    }
 								    return retval;
 								}
-												updated for version 7.4.241
Problem:    The string returned by submatch() does not distinguish between a
            NL from a line break and a NL that stands for a NUL character.
Solution:   Add a second argument to return a list. (ZyX)

											
										
										
											2014-04-02 19:00:58 +02:00
 								/*
 								 * Used for the submatch() function with the optional non-zero argument: get
 								 * the list of strings from the n'th submatch in allocated memory with NULs
 								 * represented in NLs.
 								 * Returns a list of allocated strings.  Returns NULL when not in a ":s"
 								 * command, for a non-existing submatch and for any error.
 								 */
 								    list_T *
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								reg_submatch_list(int no)
-												updated for version 7.4.241
Problem:    The string returned by submatch() does not distinguish between a
            NL from a line break and a NL that stands for a NUL character.
Solution:   Add a second argument to return a list. (ZyX)

											
										
										
											2014-04-02 19:00:58 +02:00
+								{
 								    char_u	*s;
 								    linenr_T	slnum;
 								    linenr_T	elnum;
 								    colnr_T	scol;
 								    colnr_T	ecol;
 								    int		i;
 								    list_T	*list;
 								    int		error = FALSE;
 								    if (!can_f_submatch || no < 0)
 									return NULL;
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    if (rsm.sm_match == NULL)
-												updated for version 7.4.241
Problem:    The string returned by submatch() does not distinguish between a
            NL from a line break and a NL that stands for a NUL character.
Solution:   Add a second argument to return a list. (ZyX)

											
										
										
											2014-04-02 19:00:58 +02:00
+								    {
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									slnum = rsm.sm_mmatch->startpos[no].lnum;
 									elnum = rsm.sm_mmatch->endpos[no].lnum;
-												updated for version 7.4.241
Problem:    The string returned by submatch() does not distinguish between a
            NL from a line break and a NL that stands for a NUL character.
Solution:   Add a second argument to return a list. (ZyX)

											
										
										
											2014-04-02 19:00:58 +02:00
+									if (slnum < 0 || elnum < 0)
 									    return NULL;
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									scol = rsm.sm_mmatch->startpos[no].col;
 									ecol = rsm.sm_mmatch->endpos[no].col;
-												updated for version 7.4.241
Problem:    The string returned by submatch() does not distinguish between a
            NL from a line break and a NL that stands for a NUL character.
Solution:   Add a second argument to return a list. (ZyX)

											
										
										
											2014-04-02 19:00:58 +02:00
 									list = list_alloc();
 									if (list == NULL)
 									    return NULL;
 									s = reg_getline_submatch(slnum) + scol;
 									if (slnum == elnum)
 									{
 									    if (list_append_string(list, s, ecol - scol) == FAIL)
 										error = TRUE;
 									}
 									else
 									{
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+									    int max_lnum = elnum - slnum;
-												updated for version 7.4.241
Problem:    The string returned by submatch() does not distinguish between a
            NL from a line break and a NL that stands for a NUL character.
Solution:   Add a second argument to return a list. (ZyX)

											
										
										
											2014-04-02 19:00:58 +02:00
+									    if (list_append_string(list, s, -1) == FAIL)
 										error = TRUE;
-												patch 9.1.0409: too many strlen() calls in the regexp engine

Problem:  too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
          of bsearch() for getting the character class
          (John Marriott)

closes: #14648

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-05-12 00:07:17 +02:00
+									    for (i = 1; i < max_lnum; i++)
-												updated for version 7.4.241
Problem:    The string returned by submatch() does not distinguish between a
            NL from a line break and a NL that stands for a NUL character.
Solution:   Add a second argument to return a list. (ZyX)

											
										
										
											2014-04-02 19:00:58 +02:00
+									    {
 										s = reg_getline_submatch(slnum + i);
 										if (list_append_string(list, s, -1) == FAIL)
 										    error = TRUE;
 									    }
 									    s = reg_getline_submatch(elnum);
 									    if (list_append_string(list, s, ecol) == FAIL)
 										error = TRUE;
 									}
 								    }
 								    else
 								    {
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									s = rsm.sm_match->startp[no];
 									if (s == NULL || rsm.sm_match->endp[no] == NULL)
-												updated for version 7.4.241
Problem:    The string returned by submatch() does not distinguish between a
            NL from a line break and a NL that stands for a NUL character.
Solution:   Add a second argument to return a list. (ZyX)

											
										
										
											2014-04-02 19:00:58 +02:00
+									    return NULL;
 									list = list_alloc();
 									if (list == NULL)
 									    return NULL;
 									if (list_append_string(list, s,
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+												 (int)(rsm.sm_match->endp[no] - s)) == FAIL)
-												updated for version 7.4.241
Problem:    The string returned by submatch() does not distinguish between a
            NL from a line break and a NL that stands for a NUL character.
Solution:   Add a second argument to return a list. (ZyX)

											
										
										
											2014-04-02 19:00:58 +02:00
+									    error = TRUE;
 								    }
 								    if (error)
 								    {
-												patch 7.4.1719
Problem:    Leaking memory when there is a cycle involving a job and a
            partial.
Solution:   Add a copyID to job and channel.  Set references in items referred
            by them.  Go through all jobs and channels to find unreferenced
            items.  Also, decrement reference counts when garbage collecting.

											
										
										
											2016-04-08 17:07:19 +02:00
+									list_free(list);
-												updated for version 7.4.241
Problem:    The string returned by submatch() does not distinguish between a
            NL from a line break and a NL that stands for a NUL character.
Solution:   Add a second argument to return a list. (ZyX)

											
										
										
											2014-04-02 19:00:58 +02:00
+									return NULL;
 								    }
-												patch 8.2.1621: crash when using submatch(0, 1) in substitute()

Problem:    Crash when using submatch(0, 1) in substitute().
Solution:   Increment reference count. (closes #6887)

											
										
										
											2020-09-06 15:14:45 +02:00
+								    ++list->lv_refcount;
-												updated for version 7.4.241
Problem:    The string returned by submatch() does not distinguish between a
            NL from a line break and a NL that stands for a NUL character.
Solution:   Add a second argument to return a list. (ZyX)

											
										
										
											2014-04-02 19:00:58 +02:00
+								    return list;
 								}
-												updated for version 7.0001

											
										
										
											2004-06-13 20:20:40 +00:00
+								#endif
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
-												patch 8.2.0260: several lines of code are duplicated

Problem:    Several lines of code are duplicated.
Solution:   Move duplicated code to a function. (Yegappan Lakshmanan,
            closes #5330)

											
										
										
											2020-02-15 23:06:45 +01:00
+								/*
 								 * Initialize the values used for matching against multiple lines
 								 */
 								    static void
 								init_regexec_multi(
 									regmmatch_T	*rmp,
 									win_T		*win,	// window in which to search or NULL
 									buf_T		*buf,	// buffer in which to search
 									linenr_T	lnum)	// nr of line to start looking for match
 								{
 								    rex.reg_match = NULL;
 								    rex.reg_mmatch = rmp;
 								    rex.reg_buf = buf;
 								    rex.reg_win = win;
 								    rex.reg_firstlnum = lnum;
 								    rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
 								    rex.reg_line_lbr = FALSE;
 								    rex.reg_ic = rmp->rmm_ic;
 								    rex.reg_icombine = FALSE;
 								    rex.reg_maxcol = rmp->rmm_maxcol;
 								}
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								#include "regexp_bt.c"
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								static regengine_T bt_regengine =
 								{
 								    bt_regcomp,
-												updated for version 7.3.1149
Problem:    New regexp engine: Matching plain text could be faster.
Solution:   Detect a plain text match and handle it specifically.  Add
            vim_regfree().

											
										
										
											2013-06-08 18:19:48 +02:00
+								    bt_regfree,
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								    bt_regexec_nl,
-												patch 9.1.0011: regexp cannot match combining chars in collection

Problem:  regexp cannot match combining chars in collection
Solution: Check for combining characters in regex collections for the
          NFA and BT Regex Engine

Also, while at it, make debug mode work again.

fixes #10286
closes: #12871

Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-01-04 22:54:08 +01:00
+								    bt_regexec_multi
 								#ifdef DEBUG
 								    ,(char_u *)""
 								#endif
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								};
 								#include "regexp_nfa.c"
 								static regengine_T nfa_regengine =
 								{
 								    nfa_regcomp,
-												updated for version 7.3.1149
Problem:    New regexp engine: Matching plain text could be faster.
Solution:   Detect a plain text match and handle it specifically.  Add
            vim_regfree().

											
										
										
											2013-06-08 18:19:48 +02:00
+								    nfa_regfree,
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								    nfa_regexec_nl,
-												patch 9.1.0011: regexp cannot match combining chars in collection

Problem:  regexp cannot match combining chars in collection
Solution: Check for combining characters in regex collections for the
          NFA and BT Regex Engine

Also, while at it, make debug mode work again.

fixes #10286
closes: #12871

Signed-off-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-01-04 22:54:08 +01:00
+								    nfa_regexec_multi
 								#ifdef DEBUG
 								    ,(char_u *)""
 								#endif
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								};
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								// Which regexp engine to use? Needed for vim_regcomp().
 								// Must match with 'regexpengine'.
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								static int regexp_engine = 0;
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								#ifdef DEBUG
 								static char_u regname[][30] = {
 										    "AUTOMATIC Regexp Engine",
-												updated for version 7.3.1037
Problem:    Look-behind matching is very slow on long lines.
Solution:   Add a byte limit to how far back an attempt is made.

											
										
										
											2013-05-29 18:45:11 +02:00
+										    "BACKTRACKING Regexp Engine",
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+										    "NFA Regexp Engine"
 											    };
 								#endif
 								/*
 								 * Compile a regular expression into internal code.
-												updated for version 7.3.1149
Problem:    New regexp engine: Matching plain text could be faster.
Solution:   Detect a plain text match and handle it specifically.  Add
            vim_regfree().

											
										
										
											2013-06-08 18:19:48 +02:00
+								 * Returns the program in allocated memory.
 								 * Use vim_regfree() to free the memory.
 								 * Returns NULL for an error.
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								 */
 								    regprog_T *
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								vim_regcomp(char_u *expr_arg, int re_flags)
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								{
 								    regprog_T   *prog = NULL;
 								    char_u	*expr = expr_arg;
-												patch 8.2.0035: saving and restoring called_emsg is clumsy

Problem:    Saving and restoring called_emsg is clumsy.
Solution:   Count the number of error messages.

											
										
										
											2019-12-23 22:59:18 +01:00
+								    int		called_emsg_before;
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
 								    regexp_engine = p_re;
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    // Check for prefix "\%#=", that sets the regexp engine
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								    if (STRNCMP(expr, "\\%#=", 4) == 0)
 								    {
 									int newengine = expr[4] - '0';
 									if (newengine == AUTOMATIC_ENGINE
 									    || newengine == BACKTRACKING_ENGINE
 									    || newengine == NFA_ENGINE)
 									{
 									    regexp_engine = expr[4] - '0';
 									    expr += 5;
 								#ifdef DEBUG
-												patch 8.1.0743: giving error messages is not flexible

Problem:    Giving error messages is not flexible.
Solution:   Add semsg().  Change argument from "char_u *" to "char *", also
            for msg() and get rid of most MSG macros. (Ozaki Kiichi, closes
            #3302)  Also make emsg() accept a "char *" argument.  Get rid of
            an enormous number of type casts.

											
										
										
											2019-01-13 23:38:42 +01:00
+									    smsg("New regexp mode selected (%d): %s",
-												updated for version 7.4.291
Problem:    Compiler warning for int to pointer of different size when DEBUG
            is defined.
Solution:   use smsg() instead of EMSG3().

											
										
										
											2014-05-13 16:46:32 +02:00
+													   regexp_engine, regname[newengine]);
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								#endif
 									}
 									else
 									{
-												patch 8.2.4010: error messages are spread out

Problem:    Error messages are spread out.
Solution:   Move more error messages to errors.h.

											
										
										
											2022-01-05 17:49:15 +00:00
+									    emsg(_(e_percent_hash_can_only_be_followed_by_zero_one_two_automatic_engine_will_be_used));
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+									    regexp_engine = AUTOMATIC_ENGINE;
 									}
 								    }
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+								#ifdef DEBUG
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								    bt_regengine.expr = expr;
 								    nfa_regengine.expr = expr;
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+								#endif
-												patch 8.1.0935: old regexp engine may use invalid buffer

Problem:    Old regexp engine may use invalid buffer for 'iskeyword' or
            uninitialized buffer pointer. (Kuang-che Wu)
Solution:   Set rex.reg_buf when compiling the pattern. (closes #3972)

											
										
										
											2019-02-16 18:07:57 +01:00
+								    // reg_iswordc() uses rex.reg_buf
 								    rex.reg_buf = curbuf;
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
 								    /*
 								     * First try the NFA engine, unless backtracking was requested.
 								     */
-												patch 8.2.0035: saving and restoring called_emsg is clumsy

Problem:    Saving and restoring called_emsg is clumsy.
Solution:   Count the number of error messages.

											
										
										
											2019-12-23 22:59:18 +01:00
+								    called_emsg_before = called_emsg;
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								    if (regexp_engine != BACKTRACKING_ENGINE)
-												patch 8.0.1496: clearing a pointer takes two lines

Problem:    Clearing a pointer takes two lines.
Solution:   Add VIM_CLEAR() and replace vim_clear(). (Hirohito Higashi,
            closes #2629)

											
										
										
											2018-02-10 18:45:26 +01:00
+									prog = nfa_regengine.regcomp(expr,
-												updated for version 7.4.593
Problem:    Crash when searching for "x\{0,90000}". (Dominique Pelle)
Solution:   Bail out from the NFA engine when the max limit is much higher
            than the min limit.

											
										
										
											2015-01-27 12:59:55 +01:00
+										re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								    else
 									prog = bt_regengine.regcomp(expr, re_flags);
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    // Check for error compiling regexp with initial engine.
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+								    if (prog == NULL)
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								    {
-												updated for version 7.3.978
Problem:    Regexp debug logs don't have a good name.
Solution:   Use clear names and make it possible to write logs for the old and
            new engines separately. (Taro Muraoka)

											
										
										
											2013-05-20 21:49:13 +02:00
+								#ifdef BT_REGEXP_DEBUG_LOG
-												patch 8.2.2278: falling back to old regexp engine can some patterns

Problem:    Falling back to old regexp engine can some patterns.
Solution:   Do not fall back once [[:lower:]] or [[:upper:]] is used.
            (Christian Brabandt, closes #7572)

											
										
										
											2021-01-02 17:43:49 +01:00
+									if (regexp_engine == BACKTRACKING_ENGINE)   // debugging log for BT engine
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+									{
 									    FILE *f;
-												updated for version 7.3.978
Problem:    Regexp debug logs don't have a good name.
Solution:   Use clear names and make it possible to write logs for the old and
            new engines separately. (Taro Muraoka)

											
										
										
											2013-05-20 21:49:13 +02:00
+									    f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+									    if (f)
 									    {
-												updated for version 7.3.1128
Problem:    Now that the NFA engine handles everything every failure is a
            syntax error.
Solution:   Remove the syntax_error flag.

											
										
										
											2013-06-05 21:42:53 +02:00
+										fprintf(f, "Syntax error in \"%s\"\n", expr);
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+										fclose(f);
 									    }
 									    else
-												patch 8.1.0743: giving error messages is not flexible

Problem:    Giving error messages is not flexible.
Solution:   Add semsg().  Change argument from "char_u *" to "char *", also
            for msg() and get rid of most MSG macros. (Ozaki Kiichi, closes
            #3302)  Also make emsg() accept a "char *" argument.  Get rid of
            an enormous number of type casts.

											
										
										
											2019-01-13 23:38:42 +01:00
+										semsg("(NFA) Could not open \"%s\" to write !!!",
-												patch 8.0.1496: clearing a pointer takes two lines

Problem:    Clearing a pointer takes two lines.
Solution:   Add VIM_CLEAR() and replace vim_clear(). (Hirohito Higashi,
            closes #2629)

											
										
										
											2018-02-10 18:45:26 +01:00
+											BT_REGEXP_DEBUG_LOG_NAME);
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+									}
 								#endif
 									/*
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+									 * If the NFA engine failed, try the backtracking engine.
-												updated for version 7.4.593
Problem:    Crash when searching for "x\{0,90000}". (Dominique Pelle)
Solution:   Bail out from the NFA engine when the max limit is much higher
            than the min limit.

											
										
										
											2015-01-27 12:59:55 +01:00
+									 * The NFA engine also fails for patterns that it can't handle well
 									 * but are still valid patterns, thus a retry should work.
-												patch 8.1.0973: pattern with syntax error gives threee error messages

Problem:    Pattern with syntax error gives threee error messages. (Kuang-che
            Wu)
Solution:   Remove outdated internal error.  Don't fall back to other engine
            after an error.

											
										
										
											2019-02-22 17:29:43 +01:00
+									 * But don't try if an error message was given.
-												updated for version 7.4.593
Problem:    Crash when searching for "x\{0,90000}". (Dominique Pelle)
Solution:   Bail out from the NFA engine when the max limit is much higher
            than the min limit.

											
										
										
											2015-01-27 12:59:55 +01:00
+									 */
-												patch 8.2.0035: saving and restoring called_emsg is clumsy

Problem:    Saving and restoring called_emsg is clumsy.
Solution:   Count the number of error messages.

											
										
										
											2019-12-23 22:59:18 +01:00
+									if (regexp_engine == AUTOMATIC_ENGINE
 													  && called_emsg == called_emsg_before)
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+									{
-												updated for version 7.4.593
Problem:    Crash when searching for "x\{0,90000}". (Dominique Pelle)
Solution:   Bail out from the NFA engine when the max limit is much higher
            than the min limit.

											
										
										
											2015-01-27 12:59:55 +01:00
+									    regexp_engine = BACKTRACKING_ENGINE;
-												patch 8.2.2278: falling back to old regexp engine can some patterns

Problem:    Falling back to old regexp engine can some patterns.
Solution:   Do not fall back once [[:lower:]] or [[:upper:]] is used.
            (Christian Brabandt, closes #7572)

											
										
										
											2021-01-02 17:43:49 +01:00
+								#ifdef FEAT_EVAL
 									    report_re_switch(expr);
 								#endif
-												updated for version 7.3.1128
Problem:    Now that the NFA engine handles everything every failure is a
            syntax error.
Solution:   Remove the syntax_error flag.

											
										
										
											2013-06-05 21:42:53 +02:00
+									    prog = bt_regengine.regcomp(expr, re_flags);
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+									}
-												updated for version 7.3.1128
Problem:    Now that the NFA engine handles everything every failure is a
            syntax error.
Solution:   Remove the syntax_error flag.

											
										
										
											2013-06-05 21:42:53 +02:00
+								    }
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+								    if (prog != NULL)
 								    {
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									// Store the info needed to call regcomp() again when the engine turns
 									// out to be very slow when executing it.
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+									prog->re_engine = regexp_engine;
 									prog->re_flags  = re_flags;
 								    }
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								    return prog;
 								}
-												updated for version 7.3.1149
Problem:    New regexp engine: Matching plain text could be faster.
Solution:   Detect a plain text match and handle it specifically.  Add
            vim_regfree().

											
										
										
											2013-06-08 18:19:48 +02:00
+								/*
 								 * Free a compiled regexp program, returned by vim_regcomp().
 								 */
 								    void
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								vim_regfree(regprog_T *prog)
-												updated for version 7.3.1149
Problem:    New regexp engine: Matching plain text could be faster.
Solution:   Detect a plain text match and handle it specifically.  Add
            vim_regfree().

											
										
										
											2013-06-08 18:19:48 +02:00
+								{
 								    if (prog != NULL)
 									prog->engine->regfree(prog);
 								}
-												patch 8.1.2005: the regexp.c file is too big

Problem:    The regexp.c file is too big.
Solution:   Move the backtracking engine to a separate file. (Yegappan
            Lakshmanan, closes #4905)

											
										
										
											2019-09-07 23:16:33 +02:00
+								#if defined(EXITFREE) || defined(PROTO)
 								    void
 								free_regexp_stuff(void)
 								{
 								    ga_clear(&regstack);
 								    ga_clear(&backpos);
 								    vim_free(reg_tofree);
 								    vim_free(reg_prev_sub);
 								}
 								#endif
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+								#ifdef FEAT_EVAL
 								    static void
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								report_re_switch(char_u *pat)
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+								{
 								    if (p_verbose > 0)
 								    {
 									verbose_enter();
-												patch 8.1.0779: argument for message functions is inconsistent

Problem:    Argument for message functions is inconsistent.
Solution:   Make first argument to msg() "char *".

											
										
										
											2019-01-19 17:43:09 +01:00
+									msg_puts(_("Switching to backtracking RE engine for pattern: "));
 									msg_puts((char *)pat);
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+									verbose_leave();
 								    }
 								}
 								#endif
-												patch 8.2.3699: the +title feature adds a lot of #ifdef but little code

Problem:    The +title feature adds a lot of #ifdef but little code.
Solution:   Graduate the +title feature.

											
										
										
											2021-11-29 20:39:38 +00:00
+								#if defined(FEAT_X11) || defined(PROTO)
-												patch 8.1.0661: clipboard regexp might be used recursively

Problem:    Clipboard regexp might be used recursively.
Solution:   Check for recursive use and bail out.

											
										
										
											2018-12-29 22:28:46 +01:00
+								/*
 								 * Return whether "prog" is currently being executed.
 								 */
 								    int
 								regprog_in_use(regprog_T *prog)
 								{
 								    return prog->re_in_use;
 								}
-												patch 8.1.0785: depending on the configuration some functions are unused

Problem:    Depending on the configuration some functions are unused.
Solution:   Add more #ifdefs, remove unused functions. (Dominique Pelle,
            closes #3822)

											
										
										
											2019-01-20 15:30:40 +01:00
+								#endif
-												patch 8.1.0661: clipboard regexp might be used recursively

Problem:    Clipboard regexp might be used recursively.
Solution:   Check for recursive use and bail out.

											
										
										
											2018-12-29 22:28:46 +01:00
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								/*
 								 * Match a regexp against a string.
-												patch 8.2.5046: vim_regsub() can overwrite the destination

Problem:    vim_regsub() can overwrite the destination.
Solution:   Pass the destination length, give an error when it doesn't fit.

											
										
										
											2022-05-30 20:58:55 +01:00
+								 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
-												updated for version 7.4.519
Problem:    Crash when using syntax highlighting.
Solution:   When regprog is freed and replaced, store the result.

											
										
										
											2014-11-19 16:38:07 +01:00
+								 * Note: "rmp->regprog" may be freed and changed.
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								 * Uses curbuf for line count and 'iskeyword'.
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+								 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								 *
 								 * Return TRUE if there is a match, FALSE if not.
 								 */
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+								    static int
-												patch 8.0.0647: syntax highlighting can make cause a freeze

Problem:    Syntax highlighting can make cause a freeze.
Solution:   Apply 'redrawtime' to syntax highlighting, per window.

											
										
										
											2017-06-18 22:41:03 +02:00
+								vim_regexec_string(
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								    regmatch_T	*rmp,
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    char_u	*line,  // string to match against
 								    colnr_T	col,    // column to start looking for match
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								    int		nl)
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+								{
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    int		result;
 								    regexec_T	rex_save;
 								    int		rex_in_use_save = rex_in_use;
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+								    // Cannot use the same prog recursively, it contains state.
 								    if (rmp->regprog->re_in_use)
 								    {
-												patch 8.2.4008: error messages are spread out

Problem:    Error messages are spread out.
Solution:   Move more error messages to errors.h.

											
										
										
											2022-01-05 16:09:06 +00:00
+									emsg(_(e_cannot_use_pattern_recursively));
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+									return FALSE;
 								    }
 								    rmp->regprog->re_in_use = TRUE;
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    if (rex_in_use)
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+									// Being called recursively, save the state.
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									rex_save = rex;
 								    rex_in_use = TRUE;
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    rex.reg_startp = NULL;
 								    rex.reg_endp = NULL;
 								    rex.reg_startpos = NULL;
 								    rex.reg_endpos = NULL;
 								    result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
-												patch 8.1.0194: possibly use of NULL pointer

Problem:    Possibly use of NULL pointer. (Coverity)
Solution:   Reset the re_in_use flag earlier.

											
										
										
											2018-07-18 06:02:09 +02:00
+								    rmp->regprog->re_in_use = FALSE;
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    // NFA engine aborted because it's very slow.
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+								    if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
 													       && result == NFA_TOO_EXPENSIVE)
 								    {
 									int    save_p_re = p_re;
 									int    re_flags = rmp->regprog->re_flags;
 									char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
 									p_re = BACKTRACKING_ENGINE;
 									vim_regfree(rmp->regprog);
 									if (pat != NULL)
 									{
 								#ifdef FEAT_EVAL
 									    report_re_switch(pat);
 								#endif
 									    rmp->regprog = vim_regcomp(pat, re_flags);
 									    if (rmp->regprog != NULL)
-												patch 8.1.0194: possibly use of NULL pointer

Problem:    Possibly use of NULL pointer. (Coverity)
Solution:   Reset the re_in_use flag earlier.

											
										
										
											2018-07-18 06:02:09 +02:00
+									    {
 										rmp->regprog->re_in_use = TRUE;
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+										result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
-												patch 8.1.0194: possibly use of NULL pointer

Problem:    Possibly use of NULL pointer. (Coverity)
Solution:   Reset the re_in_use flag earlier.

											
										
										
											2018-07-18 06:02:09 +02:00
+										rmp->regprog->re_in_use = FALSE;
 									    }
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+									    vim_free(pat);
 									}
 									p_re = save_p_re;
 								    }
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
 								    rex_in_use = rex_in_use_save;
 								    if (rex_in_use)
 									rex = rex_save;
-												updated for version 7.4.526
Problem:    matchstr() fails on long text. Daniel Hahler)
Solution:   Return NFA_TOO_EXPENSIVE from regexec_nl(). (Christian Brabandt)

											
										
										
											2014-11-20 23:07:05 +01:00
+								    return result > 0;
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+								}
-												patch 9.0.1403: unused variables and functions

Problem:    Unused variables and functions.
Solution:   Delete items and adjust #ifdefs. (Dominique Pellé, closes #12145)

											
										
										
											2023-03-12 21:20:59 +00:00
+								#if defined(FEAT_SPELL) || defined(FEAT_EVAL) || defined(FEAT_X11) || defined(PROTO)
-												updated for version 7.4.519
Problem:    Crash when using syntax highlighting.
Solution:   When regprog is freed and replaced, store the result.

											
										
										
											2014-11-19 16:38:07 +01:00
+								/*
 								 * Note: "*prog" may be freed and changed.
-												updated for version 7.4.526
Problem:    matchstr() fails on long text. Daniel Hahler)
Solution:   Return NFA_TOO_EXPENSIVE from regexec_nl(). (Christian Brabandt)

											
										
										
											2014-11-20 23:07:05 +01:00
+								 * Return TRUE if there is a match, FALSE if not.
-												updated for version 7.4.519
Problem:    Crash when using syntax highlighting.
Solution:   When regprog is freed and replaced, store the result.

											
										
										
											2014-11-19 16:38:07 +01:00
+								 */
 								    int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								vim_regexec_prog(
 								    regprog_T	**prog,
 								    int		ignore_case,
 								    char_u	*line,
 								    colnr_T	col)
-												updated for version 7.4.519
Problem:    Crash when using syntax highlighting.
Solution:   When regprog is freed and replaced, store the result.

											
										
										
											2014-11-19 16:38:07 +01:00
+								{
-												patch 8.0.0647: syntax highlighting can make cause a freeze

Problem:    Syntax highlighting can make cause a freeze.
Solution:   Apply 'redrawtime' to syntax highlighting, per window.

											
										
										
											2017-06-18 22:41:03 +02:00
+								    int		r;
 								    regmatch_T	regmatch;
-												updated for version 7.4.519
Problem:    Crash when using syntax highlighting.
Solution:   When regprog is freed and replaced, store the result.

											
										
										
											2014-11-19 16:38:07 +01:00
 								    regmatch.regprog = *prog;
 								    regmatch.rm_ic = ignore_case;
-												patch 8.0.0647: syntax highlighting can make cause a freeze

Problem:    Syntax highlighting can make cause a freeze.
Solution:   Apply 'redrawtime' to syntax highlighting, per window.

											
										
										
											2017-06-18 22:41:03 +02:00
+								    r = vim_regexec_string(&regmatch, line, col, FALSE);
-												updated for version 7.4.519
Problem:    Crash when using syntax highlighting.
Solution:   When regprog is freed and replaced, store the result.

											
										
										
											2014-11-19 16:38:07 +01:00
+								    *prog = regmatch.regprog;
 								    return r;
 								}
-												patch 9.0.1403: unused variables and functions

Problem:    Unused variables and functions.
Solution:   Delete items and adjust #ifdefs. (Dominique Pellé, closes #12145)

											
										
										
											2023-03-12 21:20:59 +00:00
+								#endif
-												updated for version 7.4.519
Problem:    Crash when using syntax highlighting.
Solution:   When regprog is freed and replaced, store the result.

											
										
										
											2014-11-19 16:38:07 +01:00
 								/*
 								 * Note: "rmp->regprog" may be freed and changed.
-												updated for version 7.4.526
Problem:    matchstr() fails on long text. Daniel Hahler)
Solution:   Return NFA_TOO_EXPENSIVE from regexec_nl(). (Christian Brabandt)

											
										
										
											2014-11-20 23:07:05 +01:00
+								 * Return TRUE if there is a match, FALSE if not.
-												updated for version 7.4.519
Problem:    Crash when using syntax highlighting.
Solution:   When regprog is freed and replaced, store the result.

											
										
										
											2014-11-19 16:38:07 +01:00
+								 */
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								    int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								{
-												patch 8.0.0647: syntax highlighting can make cause a freeze

Problem:    Syntax highlighting can make cause a freeze.
Solution:   Apply 'redrawtime' to syntax highlighting, per window.

											
										
										
											2017-06-18 22:41:03 +02:00
+								    return vim_regexec_string(rmp, line, col, FALSE);
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								}
 								/*
 								 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
-												updated for version 7.4.519
Problem:    Crash when using syntax highlighting.
Solution:   When regprog is freed and replaced, store the result.

											
										
										
											2014-11-19 16:38:07 +01:00
+								 * Note: "rmp->regprog" may be freed and changed.
-												updated for version 7.4.526
Problem:    matchstr() fails on long text. Daniel Hahler)
Solution:   Return NFA_TOO_EXPENSIVE from regexec_nl(). (Christian Brabandt)

											
										
										
											2014-11-20 23:07:05 +01:00
+								 * Return TRUE if there is a match, FALSE if not.
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								 */
 								    int
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								{
-												patch 8.0.0647: syntax highlighting can make cause a freeze

Problem:    Syntax highlighting can make cause a freeze.
Solution:   Apply 'redrawtime' to syntax highlighting, per window.

											
										
										
											2017-06-18 22:41:03 +02:00
+								    return vim_regexec_string(rmp, line, col, TRUE);
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								}
 								/*
 								 * Match a regexp against multiple lines.
-												patch 8.1.0098: segfault when pattern with \z() is very slow

Problem:    Segfault when pattern with \z() is very slow.
Solution:   Check for NULL regprog.  Add "nfa_fail" to test_override() to be
            able to test this.  Fix that 'searchhl' resets called_emsg.

											
										
										
											2018-06-23 14:21:42 +02:00
+								 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
 								 * Note: "rmp->regprog" may be freed and changed, even set to NULL.
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								 * Uses curbuf for line count and 'iskeyword'.
 								 *
 								 * Return zero if there is no match.  Return number of lines contained in the
 								 * match otherwise.
 								 */
 								    long
-												patch 7.4.1213
Problem:    Using old style function declarations.
Solution:   Change to new style function declarations. (script by Hirohito
            Higashi)

											
										
										
											2016-01-30 20:31:25 +01:00
+								vim_regexec_multi(
 								    regmmatch_T *rmp,
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    win_T       *win,		// window in which to search or NULL
 								    buf_T       *buf,		// buffer in which to search
 								    linenr_T	lnum,		// nr of line to start looking for match
 								    colnr_T	col,		// column to start looking for match
 								    int		*timed_out)	// flag is set when timeout limit reached
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								{
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    int		result;
 								    regexec_T	rex_save;
 								    int		rex_in_use_save = rex_in_use;
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+								    // Cannot use the same prog recursively, it contains state.
 								    if (rmp->regprog->re_in_use)
 								    {
-												patch 8.2.4008: error messages are spread out

Problem:    Error messages are spread out.
Solution:   Move more error messages to errors.h.

											
										
										
											2022-01-05 16:09:06 +00:00
+									emsg(_(e_cannot_use_pattern_recursively));
-												patch 8.1.0192: executing regexp recursively fails with a crash

Problem:    Executing regexp recursively fails with a crash.
Solution:   Move global variables into "rex".

											
										
										
											2018-07-17 05:43:58 +02:00
+									return FALSE;
 								    }
 								    rmp->regprog->re_in_use = TRUE;
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    if (rex_in_use)
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+									// Being called recursively, save the state.
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+									rex_save = rex;
 								    rex_in_use = TRUE;
-												patch 8.0.0643: when a pattern search is slow Vim becomes unusable

Problem:    When 'hlsearch' is set and matching with the last search pattern
            is very slow, Vim becomes unusable.  Cannot quit search by
            pressing CTRL-C.
Solution:   When the search times out set a flag and don't try again.  Check
            for timeout and CTRL-C in NFA loop that adds states.

											
										
										
											2017-06-17 18:44:21 +02:00
+								    result = rmp->regprog->engine->regexec_multi(
-												patch 8.2.5057: using gettimeofday() for timeout is very inefficient

Problem:    Using gettimeofday() for timeout is very inefficient.
Solution:   Set a platform dependent timer. (Paul Ollis, closes #10505)

											
										
										
											2022-06-05 16:55:54 +01:00
+												      rmp, win, buf, lnum, col, timed_out);
-												patch 8.1.0194: possibly use of NULL pointer

Problem:    Possibly use of NULL pointer. (Coverity)
Solution:   Reset the re_in_use flag earlier.

											
										
										
											2018-07-18 06:02:09 +02:00
+								    rmp->regprog->re_in_use = FALSE;
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
-												patch 8.1.2394: using old C style comments

Problem:    Using old C style comments.
Solution:   Use // comments where appropriate.

											
										
										
											2019-12-05 21:10:38 +01:00
+								    // NFA engine aborted because it's very slow.
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+								    if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
 													       && result == NFA_TOO_EXPENSIVE)
 								    {
 									int    save_p_re = p_re;
 									int    re_flags = rmp->regprog->re_flags;
 									char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
 									p_re = BACKTRACKING_ENGINE;
 									if (pat != NULL)
 									{
-												patch 8.2.4687: "vimgrep /\%v/ *" may cause a crash

Problem:    "vimgrep /\%v/ *" may cause a crash.
Solution:   When compiling the pattern with the old engine fails, restore the
            regprog of the new engine instead of leaving it NULL.
            (closes #10079)

											
										
										
											2022-04-04 18:14:34 +01:00
+									    regprog_T *prev_prog = rmp->regprog;
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+								#ifdef FEAT_EVAL
 									    report_re_switch(pat);
 								#endif
-												patch 8.1.0102: cannot build without syntax highlighting

Problem:    Cannot build without syntax highlighting.
Solution:   Add #ifdef around using reg_do_extmatch.

											
										
										
											2018-06-23 15:09:10 +02:00
+								#ifdef FEAT_SYN_HL
-												patch 8.1.0098: segfault when pattern with \z() is very slow

Problem:    Segfault when pattern with \z() is very slow.
Solution:   Check for NULL regprog.  Add "nfa_fail" to test_override() to be
            able to test this.  Fix that 'searchhl' resets called_emsg.

											
										
										
											2018-06-23 14:21:42 +02:00
+									    // checking for \z misuse was already done when compiling for NFA,
 									    // allow all here
 									    reg_do_extmatch = REX_ALL;
-												patch 8.1.0102: cannot build without syntax highlighting

Problem:    Cannot build without syntax highlighting.
Solution:   Add #ifdef around using reg_do_extmatch.

											
										
										
											2018-06-23 15:09:10 +02:00
+								#endif
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+									    rmp->regprog = vim_regcomp(pat, re_flags);
-												patch 8.1.0102: cannot build without syntax highlighting

Problem:    Cannot build without syntax highlighting.
Solution:   Add #ifdef around using reg_do_extmatch.

											
										
										
											2018-06-23 15:09:10 +02:00
+								#ifdef FEAT_SYN_HL
-												patch 8.1.0098: segfault when pattern with \z() is very slow

Problem:    Segfault when pattern with \z() is very slow.
Solution:   Check for NULL regprog.  Add "nfa_fail" to test_override() to be
            able to test this.  Fix that 'searchhl' resets called_emsg.

											
										
										
											2018-06-23 14:21:42 +02:00
+									    reg_do_extmatch = 0;
-												patch 8.1.0102: cannot build without syntax highlighting

Problem:    Cannot build without syntax highlighting.
Solution:   Add #ifdef around using reg_do_extmatch.

											
										
										
											2018-06-23 15:09:10 +02:00
+								#endif
-												patch 8.2.4687: "vimgrep /\%v/ *" may cause a crash

Problem:    "vimgrep /\%v/ *" may cause a crash.
Solution:   When compiling the pattern with the old engine fails, restore the
            regprog of the new engine instead of leaving it NULL.
            (closes #10079)

											
										
										
											2022-04-04 18:14:34 +01:00
+									    if (rmp->regprog == NULL)
-												patch 8.1.0194: possibly use of NULL pointer

Problem:    Possibly use of NULL pointer. (Coverity)
Solution:   Reset the re_in_use flag earlier.

											
										
										
											2018-07-18 06:02:09 +02:00
+									    {
-												patch 8.2.4687: "vimgrep /\%v/ *" may cause a crash

Problem:    "vimgrep /\%v/ *" may cause a crash.
Solution:   When compiling the pattern with the old engine fails, restore the
            regprog of the new engine instead of leaving it NULL.
            (closes #10079)

											
										
										
											2022-04-04 18:14:34 +01:00
+										// Somehow compiling the pattern failed now, put back the
 										// previous one to avoid "regprog" becoming NULL.
 										rmp->regprog = prev_prog;
 									    }
 									    else
 									    {
 										vim_regfree(prev_prog);
-												patch 8.1.0194: possibly use of NULL pointer

Problem:    Possibly use of NULL pointer. (Coverity)
Solution:   Reset the re_in_use flag earlier.

											
										
										
											2018-07-18 06:02:09 +02:00
+										rmp->regprog->re_in_use = TRUE;
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+										result = rmp->regprog->engine->regexec_multi(
-												patch 8.2.5057: using gettimeofday() for timeout is very inefficient

Problem:    Using gettimeofday() for timeout is very inefficient.
Solution:   Set a platform dependent timer. (Paul Ollis, closes #10505)

											
										
										
											2022-06-05 16:55:54 +01:00
+												      rmp, win, buf, lnum, col, timed_out);
-												patch 8.1.0194: possibly use of NULL pointer

Problem:    Possibly use of NULL pointer. (Coverity)
Solution:   Reset the re_in_use flag earlier.

											
										
										
											2018-07-18 06:02:09 +02:00
+										rmp->regprog->re_in_use = FALSE;
 									    }
-												updated for version 7.4.497
Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

											
										
										
											2014-11-05 14:27:36 +01:00
+									    vim_free(pat);
 									}
 									p_re = save_p_re;
 								    }
-												patch 8.0.0020
Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

											
										
										
											2016-10-02 16:51:57 +02:00
+								    rex_in_use = rex_in_use_save;
 								    if (rex_in_use)
 									rex = rex_save;
-												updated for version 7.4.526
Problem:    matchstr() fails on long text. Daniel Hahler)
Solution:   Return NFA_TOO_EXPENSIVE from regexec_nl(). (Christian Brabandt)

											
										
										
											2014-11-20 23:07:05 +01:00
+								    return result <= 0 ? 0 : result;
-												updated for version 7.3.970
Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)

											
										
										
											2013-05-19 19:40:29 +02:00
+								}
No results found.