2016-08-29 22:49:24 +02:00
|
|
|
|
/* vi:set ts=8 sts=4 sw=4 noet:
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*
|
|
|
|
|
* Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
|
|
|
|
|
*/
|
|
|
|
|
|
2019-04-25 20:07:51 +02:00
|
|
|
|
// By default: do not create debugging logs or files related to regular
|
|
|
|
|
// expressions, even when compiling with -DDEBUG.
|
|
|
|
|
// Uncomment the second line to get the regexp debugging.
|
|
|
|
|
#undef DEBUG
|
|
|
|
|
// #define DEBUG
|
2013-05-19 19:40:29 +02:00
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
#include "vim.h"
|
|
|
|
|
|
2013-05-19 19:40:29 +02:00
|
|
|
|
#ifdef DEBUG
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// show/save debugging data when BT engine is used
|
2013-05-19 19:40:29 +02:00
|
|
|
|
# define BT_REGEXP_DUMP
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// save the debugging data to a file instead of displaying it
|
2013-05-19 19:40:29 +02:00
|
|
|
|
# define BT_REGEXP_LOG
|
2013-05-20 21:49:13 +02:00
|
|
|
|
# define BT_REGEXP_DEBUG_LOG
|
|
|
|
|
# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
|
2013-05-19 19:40:29 +02:00
|
|
|
|
#endif
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2022-06-05 16:55:54 +01:00
|
|
|
|
#ifdef FEAT_RELTIME
|
2022-06-20 13:38:33 +01:00
|
|
|
|
static sig_atomic_t dummy_timeout_flag = 0;
|
|
|
|
|
static volatile sig_atomic_t *timeout_flag = &dummy_timeout_flag;
|
2022-06-05 16:55:54 +01:00
|
|
|
|
#endif
|
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
/*
|
|
|
|
|
* Magic characters have a special meaning, they don't match literally.
|
|
|
|
|
* Magic characters are negative. This separates them from literal characters
|
|
|
|
|
* (possibly multi-byte). Only ASCII characters can be Magic.
|
|
|
|
|
*/
|
|
|
|
|
#define Magic(x) ((int)(x) - 256)
|
|
|
|
|
#define un_Magic(x) ((x) + 256)
|
|
|
|
|
#define is_Magic(x) ((x) < 0)
|
|
|
|
|
|
|
|
|
|
static int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
no_Magic(int x)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
if (is_Magic(x))
|
|
|
|
|
return un_Magic(x);
|
|
|
|
|
return x;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
toggle_Magic(int x)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
if (is_Magic(x))
|
|
|
|
|
return un_Magic(x);
|
|
|
|
|
return Magic(x);
|
|
|
|
|
}
|
|
|
|
|
|
2022-06-05 16:55:54 +01:00
|
|
|
|
#ifdef FEAT_RELTIME
|
2022-08-26 21:33:04 +01:00
|
|
|
|
static int timeout_nesting = 0;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Start a timer that will cause the regexp to abort after "msec".
|
|
|
|
|
* This doesn't work well recursively. In case it happens anyway, the first
|
|
|
|
|
* set timeout will prevail, nested ones are ignored.
|
|
|
|
|
* The caller must make sure there is a matching disable_regexp_timeout() call!
|
|
|
|
|
*/
|
2022-06-05 16:55:54 +01:00
|
|
|
|
void
|
|
|
|
|
init_regexp_timeout(long msec)
|
|
|
|
|
{
|
2022-08-26 21:33:04 +01:00
|
|
|
|
if (timeout_nesting == 0)
|
|
|
|
|
timeout_flag = start_timeout(msec);
|
|
|
|
|
++timeout_nesting;
|
2022-06-05 16:55:54 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
disable_regexp_timeout(void)
|
|
|
|
|
{
|
2022-08-26 21:33:04 +01:00
|
|
|
|
if (timeout_nesting == 0)
|
|
|
|
|
iemsg("disable_regexp_timeout() called without active timer");
|
|
|
|
|
else if (--timeout_nesting == 0)
|
|
|
|
|
{
|
|
|
|
|
stop_timeout();
|
|
|
|
|
timeout_flag = &dummy_timeout_flag;
|
|
|
|
|
}
|
2022-06-05 16:55:54 +01:00
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2022-09-20 13:51:25 +01:00
|
|
|
|
#if defined(FEAT_EVAL) || defined(PROTO)
|
|
|
|
|
# ifdef FEAT_RELTIME
|
|
|
|
|
static sig_atomic_t *saved_timeout_flag;
|
|
|
|
|
# endif
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Used at the debug prompt: disable the timeout so that expression evaluation
|
|
|
|
|
* can used patterns.
|
|
|
|
|
* Must be followed by calling restore_timeout_for_debugging().
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
save_timeout_for_debugging(void)
|
|
|
|
|
{
|
|
|
|
|
# ifdef FEAT_RELTIME
|
|
|
|
|
saved_timeout_flag = (sig_atomic_t *)timeout_flag;
|
|
|
|
|
timeout_flag = &dummy_timeout_flag;
|
|
|
|
|
# endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
restore_timeout_for_debugging(void)
|
|
|
|
|
{
|
|
|
|
|
# ifdef FEAT_RELTIME
|
|
|
|
|
timeout_flag = saved_timeout_flag;
|
|
|
|
|
# endif
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* The first byte of the BT regexp internal "program" is actually this magic
|
2004-06-13 20:20:40 +00:00
|
|
|
|
* number; the start node begins in the second byte. It's used to catch the
|
|
|
|
|
* most severe mutilation of the program by the caller.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#define REGMAGIC 0234
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Utility definitions.
|
|
|
|
|
*/
|
|
|
|
|
#define UCHARAT(p) ((int)*(char_u *)(p))
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Used for an error (down from) vim_regcomp(): give the error message, set
|
|
|
|
|
// rc_did_emsg and return NULL
|
2019-01-13 23:38:42 +01:00
|
|
|
|
#define EMSG_RET_NULL(m) return (emsg((m)), rc_did_emsg = TRUE, (void *)NULL)
|
|
|
|
|
#define IEMSG_RET_NULL(m) return (iemsg((m)), rc_did_emsg = TRUE, (void *)NULL)
|
|
|
|
|
#define EMSG_RET_FAIL(m) return (emsg((m)), rc_did_emsg = TRUE, FAIL)
|
|
|
|
|
#define EMSG2_RET_NULL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
|
2019-01-14 22:46:15 +01:00
|
|
|
|
#define EMSG3_RET_NULL(m, c, a) return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = TRUE, (void *)NULL)
|
2019-01-13 23:38:42 +01:00
|
|
|
|
#define EMSG2_RET_FAIL(m, c) return (semsg((const char *)(m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
|
2022-01-02 19:25:26 +00:00
|
|
|
|
#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_(e_invalid_item_in_str_brackets), reg_magic == MAGIC_ALL)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2016-11-10 20:01:45 +01:00
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
#define MAX_LIMIT (32767L << 16L)
|
|
|
|
|
|
|
|
|
|
#define NOT_MULTI 0
|
|
|
|
|
#define MULTI_ONE 1
|
|
|
|
|
#define MULTI_MULT 2
|
2019-09-07 23:16:33 +02:00
|
|
|
|
|
|
|
|
|
// return values for regmatch()
|
2019-12-05 21:10:38 +01:00
|
|
|
|
#define RA_FAIL 1 // something failed, abort
|
|
|
|
|
#define RA_CONT 2 // continue in inner loop
|
|
|
|
|
#define RA_BREAK 3 // break inner loop
|
|
|
|
|
#define RA_MATCH 4 // successful match
|
|
|
|
|
#define RA_NOMATCH 5 // didn't match
|
2019-09-07 23:16:33 +02:00
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
/*
|
|
|
|
|
* Return NOT_MULTI if c is not a "multi" operator.
|
|
|
|
|
* Return MULTI_ONE if c is a single "multi" operator.
|
|
|
|
|
* Return MULTI_MULT if c is a multi "multi" operator.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
re_multi_type(int c)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
|
|
|
|
|
return MULTI_ONE;
|
|
|
|
|
if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
|
|
|
|
|
return MULTI_MULT;
|
|
|
|
|
return NOT_MULTI;
|
|
|
|
|
}
|
|
|
|
|
|
2005-06-25 23:04:51 +00:00
|
|
|
|
static char_u *reg_prev_sub = NULL;
|
2024-05-12 00:07:17 +02:00
|
|
|
|
static size_t reg_prev_sublen = 0;
|
2005-06-25 23:04:51 +00:00
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
/*
|
|
|
|
|
* REGEXP_INRANGE contains all characters which are always special in a []
|
|
|
|
|
* range after '\'.
|
|
|
|
|
* REGEXP_ABBR contains all characters which act as abbreviations after '\'.
|
|
|
|
|
* These are:
|
|
|
|
|
* \n - New line (NL).
|
|
|
|
|
* \r - Carriage Return (CR).
|
|
|
|
|
* \t - Tab (TAB).
|
|
|
|
|
* \e - Escape (ESC).
|
|
|
|
|
* \b - Backspace (Ctrl_H).
|
2004-09-13 20:26:32 +00:00
|
|
|
|
* \d - Character code in decimal, eg \d123
|
|
|
|
|
* \o - Character code in octal, eg \o80
|
|
|
|
|
* \x - Character code in hex, eg \x4a
|
|
|
|
|
* \u - Multibyte character code, eg \u20ac
|
|
|
|
|
* \U - Long multibyte character code, eg \U12345678
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*/
|
|
|
|
|
static char_u REGEXP_INRANGE[] = "]^-n\\";
|
2004-09-13 20:26:32 +00:00
|
|
|
|
static char_u REGEXP_ABBR[] = "nrtebdoxuU";
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Translate '\x' to its control character, except "\n", which is Magic.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
backslash_trans(int c)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
switch (c)
|
|
|
|
|
{
|
|
|
|
|
case 'r': return CAR;
|
|
|
|
|
case 't': return TAB;
|
|
|
|
|
case 'e': return ESC;
|
|
|
|
|
case 'b': return BS;
|
|
|
|
|
}
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-12 00:07:17 +02:00
|
|
|
|
enum
|
|
|
|
|
{
|
|
|
|
|
CLASS_ALNUM = 0,
|
|
|
|
|
CLASS_ALPHA,
|
|
|
|
|
CLASS_BLANK,
|
|
|
|
|
CLASS_CNTRL,
|
|
|
|
|
CLASS_DIGIT,
|
|
|
|
|
CLASS_GRAPH,
|
|
|
|
|
CLASS_LOWER,
|
|
|
|
|
CLASS_PRINT,
|
|
|
|
|
CLASS_PUNCT,
|
|
|
|
|
CLASS_SPACE,
|
|
|
|
|
CLASS_UPPER,
|
|
|
|
|
CLASS_XDIGIT,
|
|
|
|
|
CLASS_TAB,
|
|
|
|
|
CLASS_RETURN,
|
|
|
|
|
CLASS_BACKSPACE,
|
|
|
|
|
CLASS_ESCAPE,
|
|
|
|
|
CLASS_IDENT,
|
|
|
|
|
CLASS_KEYWORD,
|
|
|
|
|
CLASS_FNAME,
|
|
|
|
|
CLASS_NONE = 99
|
|
|
|
|
};
|
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
/*
|
2005-02-22 08:39:57 +00:00
|
|
|
|
* Check for a character class name "[:name:]". "pp" points to the '['.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
* Returns one of the CLASS_ items. CLASS_NONE means that no item was
|
|
|
|
|
* recognized. Otherwise "pp" is advanced to after the item.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
get_char_class(char_u **pp)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
// must be sorted by the 'value' field because it is used by bsearch()!
|
|
|
|
|
static keyvalue_T char_class_tab[] =
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
KEYVALUE_ENTRY(CLASS_ALNUM, "alnum:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_ALPHA, "alpha:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_BACKSPACE, "backspace:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_BLANK, "blank:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_CNTRL, "cntrl:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_DIGIT, "digit:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_ESCAPE, "escape:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_FNAME, "fname:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_GRAPH, "graph:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_IDENT, "ident:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_KEYWORD, "keyword:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_LOWER, "lower:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_PRINT, "print:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_PUNCT, "punct:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_RETURN, "return:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_SPACE, "space:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_TAB, "tab:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_UPPER, "upper:]"),
|
|
|
|
|
KEYVALUE_ENTRY(CLASS_XDIGIT, "xdigit:]")
|
2004-06-13 20:20:40 +00:00
|
|
|
|
};
|
|
|
|
|
|
2024-05-12 00:07:17 +02:00
|
|
|
|
// check that the value of "pp" has a chance of matching
|
|
|
|
|
if ((*pp)[1] == ':' && ASCII_ISLOWER((*pp)[2])
|
|
|
|
|
&& ASCII_ISLOWER((*pp)[3]) && ASCII_ISLOWER((*pp)[4]))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
keyvalue_T target;
|
|
|
|
|
keyvalue_T *entry;
|
|
|
|
|
// this function can be called repeatedly with the same value for "pp"
|
|
|
|
|
// so we cache the last found entry.
|
|
|
|
|
static keyvalue_T *last_entry = NULL;
|
|
|
|
|
|
|
|
|
|
target.key = 0;
|
2024-11-02 15:59:01 +01:00
|
|
|
|
target.value.string = *pp + 2;
|
|
|
|
|
target.value.length = 0; // not used, see cmp_keyvalue_value_n()
|
2024-05-12 00:07:17 +02:00
|
|
|
|
|
|
|
|
|
if (last_entry != NULL && cmp_keyvalue_value_n(&target, last_entry) == 0)
|
|
|
|
|
entry = last_entry;
|
|
|
|
|
else
|
|
|
|
|
entry = (keyvalue_T *)bsearch(&target, &char_class_tab,
|
|
|
|
|
ARRAY_LENGTH(char_class_tab),
|
|
|
|
|
sizeof(char_class_tab[0]), cmp_keyvalue_value_n);
|
|
|
|
|
if (entry != NULL)
|
|
|
|
|
{
|
|
|
|
|
last_entry = entry;
|
2024-11-02 15:59:01 +01:00
|
|
|
|
*pp += entry->value.length + 2;
|
2024-05-12 00:07:17 +02:00
|
|
|
|
return entry->key;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
return CLASS_NONE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Specific version of character class functions.
|
|
|
|
|
* Using a table to keep this fast.
|
|
|
|
|
*/
|
|
|
|
|
static short class_tab[256];
|
|
|
|
|
|
|
|
|
|
#define RI_DIGIT 0x01
|
|
|
|
|
#define RI_HEX 0x02
|
|
|
|
|
#define RI_OCTAL 0x04
|
|
|
|
|
#define RI_WORD 0x08
|
|
|
|
|
#define RI_HEAD 0x10
|
|
|
|
|
#define RI_ALPHA 0x20
|
|
|
|
|
#define RI_LOWER 0x40
|
|
|
|
|
#define RI_UPPER 0x80
|
|
|
|
|
#define RI_WHITE 0x100
|
|
|
|
|
|
|
|
|
|
static void
|
2016-01-30 20:31:25 +01:00
|
|
|
|
init_class_tab(void)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
static int done = FALSE;
|
|
|
|
|
|
|
|
|
|
if (done)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < 256; ++i)
|
|
|
|
|
{
|
|
|
|
|
if (i >= '0' && i <= '7')
|
|
|
|
|
class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
|
|
|
|
|
else if (i >= '8' && i <= '9')
|
|
|
|
|
class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
|
|
|
|
|
else if (i >= 'a' && i <= 'f')
|
|
|
|
|
class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
|
|
|
|
|
else if (i >= 'g' && i <= 'z')
|
|
|
|
|
class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
|
|
|
|
|
else if (i >= 'A' && i <= 'F')
|
|
|
|
|
class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
|
|
|
|
|
else if (i >= 'G' && i <= 'Z')
|
|
|
|
|
class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
|
|
|
|
|
else if (i == '_')
|
|
|
|
|
class_tab[i] = RI_WORD + RI_HEAD;
|
|
|
|
|
else
|
|
|
|
|
class_tab[i] = 0;
|
|
|
|
|
}
|
|
|
|
|
class_tab[' '] |= RI_WHITE;
|
|
|
|
|
class_tab['\t'] |= RI_WHITE;
|
|
|
|
|
done = TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-16 19:24:07 +00:00
|
|
|
|
#define ri_digit(c) ((c) < 0x100 && (class_tab[c] & RI_DIGIT))
|
|
|
|
|
#define ri_hex(c) ((c) < 0x100 && (class_tab[c] & RI_HEX))
|
|
|
|
|
#define ri_octal(c) ((c) < 0x100 && (class_tab[c] & RI_OCTAL))
|
|
|
|
|
#define ri_word(c) ((c) < 0x100 && (class_tab[c] & RI_WORD))
|
|
|
|
|
#define ri_head(c) ((c) < 0x100 && (class_tab[c] & RI_HEAD))
|
|
|
|
|
#define ri_alpha(c) ((c) < 0x100 && (class_tab[c] & RI_ALPHA))
|
|
|
|
|
#define ri_lower(c) ((c) < 0x100 && (class_tab[c] & RI_LOWER))
|
|
|
|
|
#define ri_upper(c) ((c) < 0x100 && (class_tab[c] & RI_UPPER))
|
|
|
|
|
#define ri_white(c) ((c) < 0x100 && (class_tab[c] & RI_WHITE))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// flags for regflags
|
|
|
|
|
#define RF_ICASE 1 // ignore case
|
|
|
|
|
#define RF_NOICASE 2 // don't ignore case
|
|
|
|
|
#define RF_HASNL 4 // can match a NL
|
|
|
|
|
#define RF_ICOMBINE 8 // ignore combining characters
|
|
|
|
|
#define RF_LOOKBH 16 // uses "\@<=" or "\@<!"
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Global work variables for vim_regcomp().
|
|
|
|
|
*/
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
static char_u *regparse; // Input-scan pointer.
|
|
|
|
|
static int regnpar; // () count.
|
2021-01-02 17:43:49 +01:00
|
|
|
|
static int wants_nfa; // regex should use NFA engine
|
2004-06-13 20:20:40 +00:00
|
|
|
|
#ifdef FEAT_SYN_HL
|
2019-12-05 21:10:38 +01:00
|
|
|
|
static int regnzpar; // \z() count.
|
|
|
|
|
static int re_has_z; // \z item detected
|
2004-06-13 20:20:40 +00:00
|
|
|
|
#endif
|
2019-12-05 21:10:38 +01:00
|
|
|
|
static unsigned regflags; // RF_ flags for prog
|
2004-06-13 20:20:40 +00:00
|
|
|
|
#if defined(FEAT_SYN_HL) || defined(PROTO)
|
2019-12-05 21:10:38 +01:00
|
|
|
|
static int had_eol; // TRUE when EOL found by vim_regcomp()
|
2004-06-13 20:20:40 +00:00
|
|
|
|
#endif
|
|
|
|
|
|
2021-01-04 12:42:13 +01:00
|
|
|
|
static magic_T reg_magic; // magicness of the pattern
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
static int reg_string; // matching with a string instead of a buffer
|
|
|
|
|
// line
|
|
|
|
|
static int reg_strict; // "[abc" is illegal
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* META contains all characters that may be magic, except '^' and '$'.
|
|
|
|
|
*/
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// META[] is used often enough to justify turning it into a table.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
static char_u META_flags[] = {
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// % & ( ) * + .
|
2004-06-13 20:20:40 +00:00
|
|
|
|
0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// 1 2 3 4 5 6 7 8 9 < = > ?
|
2004-06-13 20:20:40 +00:00
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// @ A C D F H I K L M O
|
2004-06-13 20:20:40 +00:00
|
|
|
|
1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// P S U V W X Z [ _
|
2004-06-13 20:20:40 +00:00
|
|
|
|
1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// a c d f h i k l m n o
|
2004-06-13 20:20:40 +00:00
|
|
|
|
0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// p s u v w x z { | ~
|
2004-06-13 20:20:40 +00:00
|
|
|
|
1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
|
|
|
|
|
};
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
static int curchr; // currently parsed character
|
|
|
|
|
// Previous character. Note: prevchr is sometimes -1 when we are not at the
|
|
|
|
|
// start, eg in /[ ^I]^ the pattern was never found even if it existed,
|
|
|
|
|
// because ^ was taken to be magic -- webb
|
2013-05-19 19:40:29 +02:00
|
|
|
|
static int prevchr;
|
2019-12-05 21:10:38 +01:00
|
|
|
|
static int prevprevchr; // previous-previous character
|
|
|
|
|
static int nextchr; // used for ungetchr()
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// arguments for reg()
|
|
|
|
|
#define REG_NOPAREN 0 // toplevel reg()
|
|
|
|
|
#define REG_PAREN 1 // \(\)
|
|
|
|
|
#define REG_ZPAREN 2 // \z(\)
|
|
|
|
|
#define REG_NPAREN 3 // \%(\)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2013-06-01 14:42:56 +02:00
|
|
|
|
typedef struct
|
|
|
|
|
{
|
|
|
|
|
char_u *regparse;
|
|
|
|
|
int prevchr_len;
|
|
|
|
|
int curchr;
|
|
|
|
|
int prevchr;
|
|
|
|
|
int prevprevchr;
|
|
|
|
|
int nextchr;
|
|
|
|
|
int at_start;
|
|
|
|
|
int prev_at_start;
|
|
|
|
|
int regnpar;
|
|
|
|
|
} parse_state_T;
|
|
|
|
|
|
2016-01-29 22:47:03 +01:00
|
|
|
|
static void initchr(char_u *);
|
|
|
|
|
static int getchr(void);
|
|
|
|
|
static void skipchr_keepstart(void);
|
|
|
|
|
static int peekchr(void);
|
|
|
|
|
static void skipchr(void);
|
|
|
|
|
static void ungetchr(void);
|
2017-11-02 22:29:38 +01:00
|
|
|
|
static long gethexchrs(int maxinputlen);
|
|
|
|
|
static long getoctchrs(void);
|
|
|
|
|
static long getdecchrs(void);
|
2016-01-29 22:47:03 +01:00
|
|
|
|
static int coll_get_char(void);
|
|
|
|
|
static int prog_magic_wrong(void);
|
2019-09-07 23:16:33 +02:00
|
|
|
|
static int cstrncmp(char_u *s1, char_u *s2, int *n);
|
|
|
|
|
static char_u *cstrchr(char_u *, int);
|
|
|
|
|
static int re_mult_next(char *what);
|
2019-01-31 15:34:40 +01:00
|
|
|
|
static int reg_iswordc(int);
|
2021-01-02 17:43:49 +01:00
|
|
|
|
#ifdef FEAT_EVAL
|
|
|
|
|
static void report_re_switch(char_u *pat);
|
|
|
|
|
#endif
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2013-05-19 19:40:29 +02:00
|
|
|
|
static regengine_T bt_regengine;
|
|
|
|
|
static regengine_T nfa_regengine;
|
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
/*
|
|
|
|
|
* Return TRUE if compiled regular expression "prog" can match a line break.
|
|
|
|
|
*/
|
|
|
|
|
int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
re_multiline(regprog_T *prog)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
return (prog->regflags & RF_HASNL);
|
|
|
|
|
}
|
|
|
|
|
|
2005-02-22 08:39:57 +00:00
|
|
|
|
/*
|
|
|
|
|
* Check for an equivalence class name "[=a=]". "pp" points to the '['.
|
|
|
|
|
* Returns a character representing the class. Zero means that no item was
|
|
|
|
|
* recognized. Otherwise "pp" is advanced to after the item.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
get_equi_class(char_u **pp)
|
2005-02-22 08:39:57 +00:00
|
|
|
|
{
|
|
|
|
|
int c;
|
|
|
|
|
int l = 1;
|
|
|
|
|
char_u *p = *pp;
|
|
|
|
|
|
2019-02-16 17:07:47 +01:00
|
|
|
|
if (p[1] == '=' && p[2] != NUL)
|
2005-02-22 08:39:57 +00:00
|
|
|
|
{
|
|
|
|
|
if (has_mbyte)
|
2005-08-10 21:07:57 +00:00
|
|
|
|
l = (*mb_ptr2len)(p + 2);
|
2005-02-22 08:39:57 +00:00
|
|
|
|
if (p[l + 2] == '=' && p[l + 3] == ']')
|
|
|
|
|
{
|
|
|
|
|
if (has_mbyte)
|
|
|
|
|
c = mb_ptr2char(p + 2);
|
|
|
|
|
else
|
|
|
|
|
c = p[2];
|
|
|
|
|
*pp += l + 4;
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Check for a collating element "[.a.]". "pp" points to the '['.
|
|
|
|
|
* Returns a character. Zero means that no item was recognized. Otherwise
|
|
|
|
|
* "pp" is advanced to after the item.
|
|
|
|
|
* Currently only single characters are recognized!
|
|
|
|
|
*/
|
|
|
|
|
static int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
get_coll_element(char_u **pp)
|
2005-02-22 08:39:57 +00:00
|
|
|
|
{
|
|
|
|
|
int c;
|
|
|
|
|
int l = 1;
|
|
|
|
|
char_u *p = *pp;
|
|
|
|
|
|
2019-02-17 13:53:34 +01:00
|
|
|
|
if (p[0] != NUL && p[1] == '.' && p[2] != NUL)
|
2005-02-22 08:39:57 +00:00
|
|
|
|
{
|
|
|
|
|
if (has_mbyte)
|
2005-08-10 21:07:57 +00:00
|
|
|
|
l = (*mb_ptr2len)(p + 2);
|
2005-02-22 08:39:57 +00:00
|
|
|
|
if (p[l + 2] == '.' && p[l + 3] == ']')
|
|
|
|
|
{
|
|
|
|
|
if (has_mbyte)
|
|
|
|
|
c = mb_ptr2char(p + 2);
|
|
|
|
|
else
|
|
|
|
|
c = p[2];
|
|
|
|
|
*pp += l + 4;
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
static int reg_cpo_lit; // 'cpoptions' contains 'l' flag
|
|
|
|
|
static int reg_cpo_bsl; // 'cpoptions' contains '\' flag
|
2013-06-05 12:43:09 +02:00
|
|
|
|
|
|
|
|
|
static void
|
2016-01-30 20:31:25 +01:00
|
|
|
|
get_cpo_flags(void)
|
2013-06-05 12:43:09 +02:00
|
|
|
|
{
|
|
|
|
|
reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
|
|
|
|
|
reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
|
|
|
|
|
}
|
2005-02-22 08:39:57 +00:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Skip over a "[]" range.
|
|
|
|
|
* "p" must point to the character after the '['.
|
|
|
|
|
* The returned pointer is on the matching ']', or the terminating NUL.
|
|
|
|
|
*/
|
|
|
|
|
static char_u *
|
2016-01-30 20:31:25 +01:00
|
|
|
|
skip_anyof(char_u *p)
|
2005-02-22 08:39:57 +00:00
|
|
|
|
{
|
|
|
|
|
int l;
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
if (*p == '^') // Complement of range.
|
2005-02-22 08:39:57 +00:00
|
|
|
|
++p;
|
|
|
|
|
if (*p == ']' || *p == '-')
|
|
|
|
|
++p;
|
|
|
|
|
while (*p != NUL && *p != ']')
|
|
|
|
|
{
|
2005-08-10 21:07:57 +00:00
|
|
|
|
if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
|
2005-02-22 08:39:57 +00:00
|
|
|
|
p += l;
|
|
|
|
|
else
|
|
|
|
|
if (*p == '-')
|
|
|
|
|
{
|
|
|
|
|
++p;
|
|
|
|
|
if (*p != ']' && *p != NUL)
|
2017-03-12 19:22:36 +01:00
|
|
|
|
MB_PTR_ADV(p);
|
2005-02-22 08:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
else if (*p == '\\'
|
2013-06-05 12:43:09 +02:00
|
|
|
|
&& !reg_cpo_bsl
|
2005-02-22 08:39:57 +00:00
|
|
|
|
&& (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
|
2013-06-05 12:43:09 +02:00
|
|
|
|
|| (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
|
2005-02-22 08:39:57 +00:00
|
|
|
|
p += 2;
|
|
|
|
|
else if (*p == '[')
|
|
|
|
|
{
|
|
|
|
|
if (get_char_class(&p) == CLASS_NONE
|
|
|
|
|
&& get_equi_class(&p) == 0
|
2015-06-09 20:39:24 +02:00
|
|
|
|
&& get_coll_element(&p) == 0
|
|
|
|
|
&& *p != NUL)
|
2019-12-05 21:10:38 +01:00
|
|
|
|
++p; // it is not a class name and not NUL
|
2005-02-22 08:39:57 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
++p;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return p;
|
|
|
|
|
}
|
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
/*
|
|
|
|
|
* Skip past regular expression.
|
2020-04-20 19:42:10 +02:00
|
|
|
|
* Stop at end of "startp" or where "delim" is found ('/', '?', etc).
|
2004-06-13 20:20:40 +00:00
|
|
|
|
* Take care of characters with a backslash in front of it.
|
|
|
|
|
* Skip strings inside [ and ].
|
2020-04-02 21:13:25 +02:00
|
|
|
|
*/
|
|
|
|
|
char_u *
|
|
|
|
|
skip_regexp(
|
|
|
|
|
char_u *startp,
|
2020-04-20 19:42:10 +02:00
|
|
|
|
int delim,
|
2020-04-02 21:13:25 +02:00
|
|
|
|
int magic)
|
|
|
|
|
{
|
2021-01-04 12:42:13 +01:00
|
|
|
|
return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
|
2020-04-20 19:42:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Call skip_regexp() and when the delimiter does not match give an error and
|
|
|
|
|
* return NULL.
|
|
|
|
|
*/
|
|
|
|
|
char_u *
|
|
|
|
|
skip_regexp_err(
|
|
|
|
|
char_u *startp,
|
|
|
|
|
int delim,
|
|
|
|
|
int magic)
|
|
|
|
|
{
|
|
|
|
|
char_u *p = skip_regexp(startp, delim, magic);
|
|
|
|
|
|
|
|
|
|
if (*p != delim)
|
|
|
|
|
{
|
2022-01-04 21:30:47 +00:00
|
|
|
|
semsg(_(e_missing_delimiter_after_search_pattern_str), startp);
|
2020-04-20 19:42:10 +02:00
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
return p;
|
2020-04-02 21:13:25 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* skip_regexp() with extra arguments:
|
2004-06-13 20:20:40 +00:00
|
|
|
|
* When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
|
|
|
|
|
* expression and change "\?" to "?". If "*newp" is not NULL the expression
|
|
|
|
|
* is changed in-place.
|
2020-04-02 21:13:25 +02:00
|
|
|
|
* If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
|
2021-01-04 12:42:13 +01:00
|
|
|
|
* If "magic_val" is not NULL, returns the effective magicness of the pattern
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*/
|
|
|
|
|
char_u *
|
2020-04-02 21:13:25 +02:00
|
|
|
|
skip_regexp_ex(
|
2016-01-30 20:31:25 +01:00
|
|
|
|
char_u *startp,
|
|
|
|
|
int dirc,
|
|
|
|
|
int magic,
|
2020-04-02 21:13:25 +02:00
|
|
|
|
char_u **newp,
|
2021-01-04 12:42:13 +01:00
|
|
|
|
int *dropped,
|
|
|
|
|
magic_T *magic_val)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2021-01-04 12:42:13 +01:00
|
|
|
|
magic_T mymagic;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
char_u *p = startp;
|
2024-05-24 07:37:36 +02:00
|
|
|
|
size_t startplen = 0;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
|
|
|
|
if (magic)
|
|
|
|
|
mymagic = MAGIC_ON;
|
|
|
|
|
else
|
|
|
|
|
mymagic = MAGIC_OFF;
|
2013-06-05 12:43:09 +02:00
|
|
|
|
get_cpo_flags();
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2017-03-12 19:22:36 +01:00
|
|
|
|
for (; p[0] != NUL; MB_PTR_ADV(p))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
if (p[0] == dirc) // found end of regexp
|
2004-06-13 20:20:40 +00:00
|
|
|
|
break;
|
|
|
|
|
if ((p[0] == '[' && mymagic >= MAGIC_ON)
|
|
|
|
|
|| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
|
|
|
|
|
{
|
|
|
|
|
p = skip_anyof(p + 1);
|
|
|
|
|
if (p[0] == NUL)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
else if (p[0] == '\\' && p[1] != NUL)
|
|
|
|
|
{
|
|
|
|
|
if (dirc == '?' && newp != NULL && p[1] == '?')
|
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// change "\?" to "?", make a copy first.
|
2024-05-24 07:37:36 +02:00
|
|
|
|
if (startplen == 0)
|
|
|
|
|
startplen = STRLEN(startp);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
if (*newp == NULL)
|
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
*newp = vim_strnsave(startp, startplen);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
if (*newp != NULL)
|
2024-05-24 07:37:36 +02:00
|
|
|
|
{
|
2004-06-13 20:20:40 +00:00
|
|
|
|
p = *newp + (p - startp);
|
2024-05-24 07:37:36 +02:00
|
|
|
|
startp = *newp;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
2020-04-02 21:13:25 +02:00
|
|
|
|
if (dropped != NULL)
|
|
|
|
|
++*dropped;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
if (*newp != NULL)
|
2024-05-24 07:37:36 +02:00
|
|
|
|
mch_memmove(p, p + 1, startplen - ((p + 1) - startp) + 1);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
else
|
|
|
|
|
++p;
|
|
|
|
|
}
|
|
|
|
|
else
|
2019-12-05 21:10:38 +01:00
|
|
|
|
++p; // skip next character
|
2004-06-13 20:20:40 +00:00
|
|
|
|
if (*p == 'v')
|
|
|
|
|
mymagic = MAGIC_ALL;
|
|
|
|
|
else if (*p == 'V')
|
|
|
|
|
mymagic = MAGIC_NONE;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-01-04 12:42:13 +01:00
|
|
|
|
if (magic_val != NULL)
|
|
|
|
|
*magic_val = mymagic;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
return p;
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-17 20:08:20 +02:00
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* Functions for getting characters from the regexp input.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*/
|
2019-12-05 21:10:38 +01:00
|
|
|
|
static int prevchr_len; // byte length of previous char
|
2019-09-07 23:16:33 +02:00
|
|
|
|
static int at_start; // True when on the first character
|
|
|
|
|
static int prev_at_start; // True when on the second character
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2013-06-08 18:19:48 +02:00
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* Start parsing at "str".
|
2013-06-08 18:19:48 +02:00
|
|
|
|
*/
|
|
|
|
|
static void
|
2019-09-07 23:16:33 +02:00
|
|
|
|
initchr(char_u *str)
|
2013-06-08 18:19:48 +02:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
regparse = str;
|
|
|
|
|
prevchr_len = 0;
|
|
|
|
|
curchr = prevprevchr = prevchr = nextchr = -1;
|
|
|
|
|
at_start = TRUE;
|
|
|
|
|
prev_at_start = FALSE;
|
2013-06-08 18:19:48 +02:00
|
|
|
|
}
|
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* Save the current parse state, so that it can be restored and parsing
|
|
|
|
|
* starts in the same state again.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*/
|
|
|
|
|
static void
|
2019-09-07 23:16:33 +02:00
|
|
|
|
save_parse_state(parse_state_T *ps)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
ps->regparse = regparse;
|
|
|
|
|
ps->prevchr_len = prevchr_len;
|
|
|
|
|
ps->curchr = curchr;
|
|
|
|
|
ps->prevchr = prevchr;
|
|
|
|
|
ps->prevprevchr = prevprevchr;
|
|
|
|
|
ps->nextchr = nextchr;
|
|
|
|
|
ps->at_start = at_start;
|
|
|
|
|
ps->prev_at_start = prev_at_start;
|
|
|
|
|
ps->regnpar = regnpar;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* Restore a previously saved parse state.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*/
|
2019-09-07 23:16:33 +02:00
|
|
|
|
static void
|
|
|
|
|
restore_parse_state(parse_state_T *ps)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
regparse = ps->regparse;
|
|
|
|
|
prevchr_len = ps->prevchr_len;
|
|
|
|
|
curchr = ps->curchr;
|
|
|
|
|
prevchr = ps->prevchr;
|
|
|
|
|
prevprevchr = ps->prevprevchr;
|
|
|
|
|
nextchr = ps->nextchr;
|
|
|
|
|
at_start = ps->at_start;
|
|
|
|
|
prev_at_start = ps->prev_at_start;
|
|
|
|
|
regnpar = ps->regnpar;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* Get the next character without advancing.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*/
|
2019-09-07 23:16:33 +02:00
|
|
|
|
static int
|
|
|
|
|
peekchr(void)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
static int after_slash = FALSE;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2023-01-18 18:17:48 +00:00
|
|
|
|
if (curchr != -1)
|
|
|
|
|
return curchr;
|
|
|
|
|
|
|
|
|
|
switch (curchr = regparse[0])
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
case '.':
|
|
|
|
|
case '[':
|
|
|
|
|
case '~':
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// magic when 'magic' is on
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (reg_magic >= MAGIC_ON)
|
|
|
|
|
curchr = Magic(curchr);
|
|
|
|
|
break;
|
|
|
|
|
case '(':
|
|
|
|
|
case ')':
|
|
|
|
|
case '{':
|
|
|
|
|
case '%':
|
|
|
|
|
case '+':
|
|
|
|
|
case '=':
|
|
|
|
|
case '?':
|
|
|
|
|
case '@':
|
|
|
|
|
case '!':
|
|
|
|
|
case '&':
|
|
|
|
|
case '|':
|
|
|
|
|
case '<':
|
|
|
|
|
case '>':
|
2019-12-05 21:10:38 +01:00
|
|
|
|
case '#': // future ext.
|
|
|
|
|
case '"': // future ext.
|
|
|
|
|
case '\'': // future ext.
|
|
|
|
|
case ',': // future ext.
|
|
|
|
|
case '-': // future ext.
|
|
|
|
|
case ':': // future ext.
|
|
|
|
|
case ';': // future ext.
|
|
|
|
|
case '`': // future ext.
|
|
|
|
|
case '/': // Can't be used in / command
|
2023-01-18 18:17:48 +00:00
|
|
|
|
// magic only after "\v"
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (reg_magic == MAGIC_ALL)
|
|
|
|
|
curchr = Magic(curchr);
|
|
|
|
|
break;
|
|
|
|
|
case '*':
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// * is not magic as the very first character, eg "?*ptr", when
|
|
|
|
|
// after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
|
|
|
|
|
// "\(\*" is not magic, thus must be magic if "after_slash"
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (reg_magic >= MAGIC_ON
|
|
|
|
|
&& !at_start
|
|
|
|
|
&& !(prev_at_start && prevchr == Magic('^'))
|
|
|
|
|
&& (after_slash
|
|
|
|
|
|| (prevchr != Magic('(')
|
|
|
|
|
&& prevchr != Magic('&')
|
|
|
|
|
&& prevchr != Magic('|'))))
|
|
|
|
|
curchr = Magic('*');
|
|
|
|
|
break;
|
|
|
|
|
case '^':
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// '^' is only magic as the very first character and if it's after
|
|
|
|
|
// "\(", "\|", "\&' or "\n"
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (reg_magic >= MAGIC_OFF
|
|
|
|
|
&& (at_start
|
|
|
|
|
|| reg_magic == MAGIC_ALL
|
|
|
|
|
|| prevchr == Magic('(')
|
|
|
|
|
|| prevchr == Magic('|')
|
|
|
|
|
|| prevchr == Magic('&')
|
|
|
|
|
|| prevchr == Magic('n')
|
|
|
|
|
|| (no_Magic(prevchr) == '('
|
|
|
|
|
&& prevprevchr == Magic('%'))))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
curchr = Magic('^');
|
|
|
|
|
at_start = TRUE;
|
|
|
|
|
prev_at_start = FALSE;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
break;
|
2019-09-07 23:16:33 +02:00
|
|
|
|
case '$':
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// '$' is only magic as the very last char and if it's in front of
|
|
|
|
|
// either "\|", "\)", "\&", or "\n"
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (reg_magic >= MAGIC_OFF)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
char_u *p = regparse + 1;
|
|
|
|
|
int is_magic_all = (reg_magic == MAGIC_ALL);
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// ignore \c \C \m \M \v \V and \Z after '$'
|
2019-09-07 23:16:33 +02:00
|
|
|
|
while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
|
2023-01-18 18:17:48 +00:00
|
|
|
|
|| p[1] == 'm' || p[1] == 'M'
|
|
|
|
|
|| p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
|
2019-09-07 23:16:33 +02:00
|
|
|
|
{
|
|
|
|
|
if (p[1] == 'v')
|
|
|
|
|
is_magic_all = TRUE;
|
|
|
|
|
else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
|
|
|
|
|
is_magic_all = FALSE;
|
|
|
|
|
p += 2;
|
|
|
|
|
}
|
|
|
|
|
if (p[0] == NUL
|
|
|
|
|
|| (p[0] == '\\'
|
|
|
|
|
&& (p[1] == '|' || p[1] == '&' || p[1] == ')'
|
|
|
|
|
|| p[1] == 'n'))
|
|
|
|
|
|| (is_magic_all
|
2023-01-18 18:17:48 +00:00
|
|
|
|
&& (p[0] == '|' || p[0] == '&' || p[0] == ')'))
|
2019-09-07 23:16:33 +02:00
|
|
|
|
|| reg_magic == MAGIC_ALL)
|
|
|
|
|
curchr = Magic('$');
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
break;
|
2019-09-07 23:16:33 +02:00
|
|
|
|
case '\\':
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
int c = regparse[1];
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (c == NUL)
|
2019-12-05 21:10:38 +01:00
|
|
|
|
curchr = '\\'; // trailing '\'
|
2022-01-31 14:59:41 +00:00
|
|
|
|
else if (c <= '~' && META_flags[c])
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
/*
|
|
|
|
|
* META contains everything that may be magic sometimes,
|
|
|
|
|
* except ^ and $ ("\^" and "\$" are only magic after
|
|
|
|
|
* "\V"). We now fetch the next character and toggle its
|
|
|
|
|
* magicness. Therefore, \ is so meta-magic that it is
|
|
|
|
|
* not in META.
|
|
|
|
|
*/
|
|
|
|
|
curchr = -1;
|
|
|
|
|
prev_at_start = at_start;
|
2019-12-05 21:10:38 +01:00
|
|
|
|
at_start = FALSE; // be able to say "/\*ptr"
|
2019-09-07 23:16:33 +02:00
|
|
|
|
++regparse;
|
|
|
|
|
++after_slash;
|
|
|
|
|
peekchr();
|
|
|
|
|
--regparse;
|
|
|
|
|
--after_slash;
|
|
|
|
|
curchr = toggle_Magic(curchr);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
2019-09-07 23:16:33 +02:00
|
|
|
|
else if (vim_strchr(REGEXP_ABBR, c))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
/*
|
|
|
|
|
* Handle abbreviations, like "\t" for TAB -- webb
|
|
|
|
|
*/
|
|
|
|
|
curchr = backslash_trans(c);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
2019-09-07 23:16:33 +02:00
|
|
|
|
else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
|
|
|
|
|
curchr = toggle_Magic(c);
|
|
|
|
|
else
|
2013-05-29 18:45:11 +02:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
/*
|
|
|
|
|
* Next character can never be (made) magic?
|
|
|
|
|
* Then backslashing it won't do anything.
|
|
|
|
|
*/
|
|
|
|
|
if (has_mbyte)
|
|
|
|
|
curchr = (*mb_ptr2char)(regparse + 1);
|
|
|
|
|
else
|
|
|
|
|
curchr = c;
|
2013-05-29 18:45:11 +02:00
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
default:
|
|
|
|
|
if (has_mbyte)
|
|
|
|
|
curchr = (*mb_ptr2char)(regparse);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
return curchr;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* Eat one lexed character. Do this in a way that we can undo it.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*/
|
2019-09-07 23:16:33 +02:00
|
|
|
|
static void
|
|
|
|
|
skipchr(void)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// peekchr() eats a backslash, do the same here
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (*regparse == '\\')
|
|
|
|
|
prevchr_len = 1;
|
|
|
|
|
else
|
|
|
|
|
prevchr_len = 0;
|
|
|
|
|
if (regparse[prevchr_len] != NUL)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (enc_utf8)
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// exclude composing chars that mb_ptr2len does include
|
2019-09-07 23:16:33 +02:00
|
|
|
|
prevchr_len += utf_ptr2len(regparse + prevchr_len);
|
|
|
|
|
else if (has_mbyte)
|
|
|
|
|
prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
|
|
|
|
|
else
|
|
|
|
|
++prevchr_len;
|
|
|
|
|
}
|
|
|
|
|
regparse += prevchr_len;
|
|
|
|
|
prev_at_start = at_start;
|
|
|
|
|
at_start = FALSE;
|
|
|
|
|
prevprevchr = prevchr;
|
|
|
|
|
prevchr = curchr;
|
2019-12-05 21:10:38 +01:00
|
|
|
|
curchr = nextchr; // use previously unget char, or -1
|
2019-09-07 23:16:33 +02:00
|
|
|
|
nextchr = -1;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
/*
|
|
|
|
|
* Skip a character while keeping the value of prev_at_start for at_start.
|
|
|
|
|
* prevchr and prevprevchr are also kept.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
skipchr_keepstart(void)
|
|
|
|
|
{
|
|
|
|
|
int as = prev_at_start;
|
|
|
|
|
int pr = prevchr;
|
|
|
|
|
int prpr = prevprevchr;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
skipchr();
|
|
|
|
|
at_start = as;
|
|
|
|
|
prevchr = pr;
|
|
|
|
|
prevprevchr = prpr;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
/*
|
|
|
|
|
* Get the next character from the pattern. We know about magic and such, so
|
|
|
|
|
* therefore we need a lexical analyzer.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
|
|
|
|
getchr(void)
|
|
|
|
|
{
|
|
|
|
|
int chr = peekchr();
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
skipchr();
|
|
|
|
|
return chr;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2006-03-06 23:29:24 +00:00
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* put character back. Works only once!
|
2006-03-06 23:29:24 +00:00
|
|
|
|
*/
|
2019-09-07 23:16:33 +02:00
|
|
|
|
static void
|
|
|
|
|
ungetchr(void)
|
2006-03-06 23:29:24 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
nextchr = curchr;
|
|
|
|
|
curchr = prevchr;
|
|
|
|
|
prevchr = prevprevchr;
|
|
|
|
|
at_start = prev_at_start;
|
|
|
|
|
prev_at_start = FALSE;
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Backup regparse, so that it's at the same position as before the
|
|
|
|
|
// getchr().
|
2019-09-07 23:16:33 +02:00
|
|
|
|
regparse -= prevchr_len;
|
2006-03-06 23:29:24 +00:00
|
|
|
|
}
|
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* Get and return the value of the hex string at the current position.
|
|
|
|
|
* Return -1 if there is no valid hex number.
|
|
|
|
|
* The position is updated:
|
|
|
|
|
* blahblah\%x20asdf
|
|
|
|
|
* before-^ ^-after
|
|
|
|
|
* The parameter controls the maximum number of input characters. This will be
|
|
|
|
|
* 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*/
|
2019-09-07 23:16:33 +02:00
|
|
|
|
static long
|
|
|
|
|
gethexchrs(int maxinputlen)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
long_u nr = 0;
|
|
|
|
|
int c;
|
|
|
|
|
int i;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
for (i = 0; i < maxinputlen; ++i)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
c = regparse[0];
|
|
|
|
|
if (!vim_isxdigit(c))
|
|
|
|
|
break;
|
|
|
|
|
nr <<= 4;
|
|
|
|
|
nr |= hex2nr(c);
|
|
|
|
|
++regparse;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (i == 0)
|
|
|
|
|
return -1;
|
|
|
|
|
return (long)nr;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* Get and return the value of the decimal string immediately after the
|
|
|
|
|
* current position. Return -1 for invalid. Consumes all digits.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*/
|
2019-09-07 23:16:33 +02:00
|
|
|
|
static long
|
|
|
|
|
getdecchrs(void)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
long_u nr = 0;
|
|
|
|
|
int c;
|
|
|
|
|
int i;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
for (i = 0; ; ++i)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
c = regparse[0];
|
|
|
|
|
if (c < '0' || c > '9')
|
|
|
|
|
break;
|
|
|
|
|
nr *= 10;
|
|
|
|
|
nr += c - '0';
|
|
|
|
|
++regparse;
|
2019-12-05 21:10:38 +01:00
|
|
|
|
curchr = -1; // no longer valid
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
2019-09-07 23:16:33 +02:00
|
|
|
|
|
|
|
|
|
if (i == 0)
|
|
|
|
|
return -1;
|
|
|
|
|
return (long)nr;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-05-29 18:45:11 +02:00
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* get and return the value of the octal string immediately after the current
|
|
|
|
|
* position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
|
|
|
|
|
* numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
|
|
|
|
|
* treat 8 or 9 as recognised characters. Position is updated:
|
|
|
|
|
* blahblah\%o210asdf
|
|
|
|
|
* before-^ ^-after
|
2013-05-29 18:45:11 +02:00
|
|
|
|
*/
|
2019-09-07 23:16:33 +02:00
|
|
|
|
static long
|
|
|
|
|
getoctchrs(void)
|
2013-05-29 18:45:11 +02:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
long_u nr = 0;
|
|
|
|
|
int c;
|
|
|
|
|
int i;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
for (i = 0; i < 3 && nr < 040; ++i)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
c = regparse[0];
|
|
|
|
|
if (c < '0' || c > '7')
|
2004-06-13 20:20:40 +00:00
|
|
|
|
break;
|
2019-09-07 23:16:33 +02:00
|
|
|
|
nr <<= 3;
|
|
|
|
|
nr |= hex2nr(c);
|
|
|
|
|
++regparse;
|
2009-11-25 17:21:32 +00:00
|
|
|
|
}
|
2013-06-01 14:42:56 +02:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (i == 0)
|
|
|
|
|
return -1;
|
|
|
|
|
return (long)nr;
|
2013-06-01 14:42:56 +02:00
|
|
|
|
}
|
|
|
|
|
|
2013-05-19 19:40:29 +02:00
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* read_limits - Read two integers to be taken as a minimum and maximum.
|
|
|
|
|
* If the first character is '-', then the range is reversed.
|
|
|
|
|
* Should end with 'end'. If minval is missing, zero is default, if maxval is
|
|
|
|
|
* missing, a very big number is the default.
|
2013-05-19 19:40:29 +02:00
|
|
|
|
*/
|
2004-06-13 20:20:40 +00:00
|
|
|
|
static int
|
2019-09-07 23:16:33 +02:00
|
|
|
|
read_limits(long *minval, long *maxval)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
int reverse = FALSE;
|
|
|
|
|
char_u *first_char;
|
|
|
|
|
long tmp;
|
2005-02-22 08:39:57 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (*regparse == '-')
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Starts with '-', so reverse the range later
|
2019-09-07 23:16:33 +02:00
|
|
|
|
regparse++;
|
|
|
|
|
reverse = TRUE;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
2019-09-07 23:16:33 +02:00
|
|
|
|
first_char = regparse;
|
|
|
|
|
*minval = getdigits(®parse);
|
2019-12-05 21:10:38 +01:00
|
|
|
|
if (*regparse == ',') // There is a comma
|
2019-09-07 23:16:33 +02:00
|
|
|
|
{
|
|
|
|
|
if (vim_isdigit(*++regparse))
|
|
|
|
|
*maxval = getdigits(®parse);
|
|
|
|
|
else
|
|
|
|
|
*maxval = MAX_LIMIT;
|
|
|
|
|
}
|
|
|
|
|
else if (VIM_ISDIGIT(*first_char))
|
2019-12-05 21:10:38 +01:00
|
|
|
|
*maxval = *minval; // It was \{n} or \{-n}
|
2019-09-07 23:16:33 +02:00
|
|
|
|
else
|
2019-12-05 21:10:38 +01:00
|
|
|
|
*maxval = MAX_LIMIT; // It was \{} or \{-}
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (*regparse == '\\')
|
2019-12-05 21:10:38 +01:00
|
|
|
|
regparse++; // Allow either \{...} or \{...\}
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (*regparse != '}')
|
2022-01-02 21:26:16 +00:00
|
|
|
|
EMSG2_RET_FAIL(_(e_syntax_error_in_str_curlies),
|
2019-09-07 23:16:33 +02:00
|
|
|
|
reg_magic == MAGIC_ALL);
|
2005-03-06 23:38:09 +00:00
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* Reverse the range if there was a '-', or make sure it is in the right
|
|
|
|
|
* order otherwise.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*/
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
|
2005-03-06 23:38:09 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
tmp = *minval;
|
|
|
|
|
*minval = *maxval;
|
|
|
|
|
*maxval = tmp;
|
2005-03-06 23:38:09 +00:00
|
|
|
|
}
|
2019-12-05 21:10:38 +01:00
|
|
|
|
skipchr(); // let's be friends with the lexer again
|
2019-09-07 23:16:33 +02:00
|
|
|
|
return OK;
|
2005-03-06 23:38:09 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* vim_regexec and friends
|
2005-03-06 23:38:09 +00:00
|
|
|
|
*/
|
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
/*
|
|
|
|
|
* Global work variables for vim_regexec().
|
|
|
|
|
*/
|
2005-03-06 23:38:09 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
static void cleanup_subexpr(void);
|
|
|
|
|
#ifdef FEAT_SYN_HL
|
|
|
|
|
static void cleanup_zsubexpr(void);
|
|
|
|
|
#endif
|
|
|
|
|
static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen);
|
2005-03-06 23:38:09 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
/*
|
|
|
|
|
* Sometimes need to save a copy of a line. Since alloc()/free() is very
|
|
|
|
|
* slow, we keep one allocated piece of memory and only re-allocate it when
|
|
|
|
|
* it's too small. It's freed in bt_regexec_both() when finished.
|
|
|
|
|
*/
|
|
|
|
|
static char_u *reg_tofree = NULL;
|
|
|
|
|
static unsigned reg_tofreelen;
|
2005-03-06 23:38:09 +00:00
|
|
|
|
|
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* Structure used to store the execution state of the regex engine.
|
|
|
|
|
* Which ones are set depends on whether a single-line or multi-line match is
|
|
|
|
|
* done:
|
|
|
|
|
* single-line multi-line
|
|
|
|
|
* reg_match ®match_T NULL
|
|
|
|
|
* reg_mmatch NULL ®mmatch_T
|
|
|
|
|
* reg_startp reg_match->startp <invalid>
|
|
|
|
|
* reg_endp reg_match->endp <invalid>
|
|
|
|
|
* reg_startpos <invalid> reg_mmatch->startpos
|
|
|
|
|
* reg_endpos <invalid> reg_mmatch->endpos
|
|
|
|
|
* reg_win NULL window in which to search
|
|
|
|
|
* reg_buf curbuf buffer in which to search
|
|
|
|
|
* reg_firstlnum <invalid> first line in which to search
|
|
|
|
|
* reg_maxline 0 last line nr
|
|
|
|
|
* reg_line_lbr FALSE or TRUE FALSE
|
2005-03-06 23:38:09 +00:00
|
|
|
|
*/
|
2019-09-07 23:16:33 +02:00
|
|
|
|
typedef struct {
|
|
|
|
|
regmatch_T *reg_match;
|
|
|
|
|
regmmatch_T *reg_mmatch;
|
2022-11-26 11:47:10 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
char_u **reg_startp;
|
|
|
|
|
char_u **reg_endp;
|
|
|
|
|
lpos_T *reg_startpos;
|
|
|
|
|
lpos_T *reg_endpos;
|
2022-11-26 11:47:10 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
win_T *reg_win;
|
|
|
|
|
buf_T *reg_buf;
|
|
|
|
|
linenr_T reg_firstlnum;
|
|
|
|
|
linenr_T reg_maxline;
|
2019-12-05 21:10:38 +01:00
|
|
|
|
int reg_line_lbr; // "\n" in string is line break
|
2005-03-06 23:38:09 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
// The current match-position is stord in these variables:
|
|
|
|
|
linenr_T lnum; // line number, relative to first line
|
|
|
|
|
char_u *line; // start of current line
|
2021-11-17 18:22:56 +00:00
|
|
|
|
char_u *input; // current input, points into "line"
|
2005-03-06 23:38:09 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
int need_clear_subexpr; // subexpressions still need to be cleared
|
|
|
|
|
#ifdef FEAT_SYN_HL
|
|
|
|
|
int need_clear_zsubexpr; // extmatch subexpressions still need to be
|
|
|
|
|
// cleared
|
|
|
|
|
#endif
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
|
|
|
|
|
// Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
|
|
|
|
|
// contains '\c' or '\C' the value is overruled.
|
2019-09-07 23:16:33 +02:00
|
|
|
|
int reg_ic;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Similar to "reg_ic", but only for 'combining' characters. Set with \Z
|
|
|
|
|
// flag in the regexp. Defaults to false, always.
|
2019-09-07 23:16:33 +02:00
|
|
|
|
int reg_icombine;
|
2019-01-24 16:39:02 +01:00
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Copy of "rmm_maxcol": maximum column to search for a match. Zero when
|
|
|
|
|
// there is no maximum.
|
2019-09-07 23:16:33 +02:00
|
|
|
|
colnr_T reg_maxcol;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
// State for the NFA engine regexec.
|
|
|
|
|
int nfa_has_zend; // NFA regexp \ze operator encountered.
|
|
|
|
|
int nfa_has_backref; // NFA regexp \1 .. \9 encountered.
|
|
|
|
|
int nfa_nsubexpr; // Number of sub expressions actually being used
|
|
|
|
|
// during execution. 1 if only the whole match
|
|
|
|
|
// (subexpr 0) is used.
|
|
|
|
|
// listid is global, so that it increases on recursive calls to
|
|
|
|
|
// nfa_regmatch(), which means we don't have to clear the lastlist field of
|
|
|
|
|
// all the states.
|
|
|
|
|
int nfa_listid;
|
|
|
|
|
int nfa_alt_listid;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
#ifdef FEAT_SYN_HL
|
|
|
|
|
int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr.
|
|
|
|
|
#endif
|
|
|
|
|
} regexec_T;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
static regexec_T rex;
|
|
|
|
|
static int rex_in_use = FALSE;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
/*
|
|
|
|
|
* Return TRUE if character 'c' is included in 'iskeyword' option for
|
|
|
|
|
* "reg_buf" buffer.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
|
|
|
|
reg_iswordc(int c)
|
|
|
|
|
{
|
|
|
|
|
return vim_iswordc_buf(c, rex.reg_buf);
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2024-05-12 00:07:17 +02:00
|
|
|
|
#ifdef FEAT_EVAL
|
|
|
|
|
static int can_f_submatch = FALSE; // TRUE when submatch() can be used
|
|
|
|
|
|
|
|
|
|
// This struct is used for reg_submatch(). Needed for when the
|
|
|
|
|
// substitution string is an expression that contains a call to substitute()
|
|
|
|
|
// and submatch().
|
|
|
|
|
typedef struct {
|
|
|
|
|
regmatch_T *sm_match;
|
|
|
|
|
regmmatch_T *sm_mmatch;
|
|
|
|
|
linenr_T sm_firstlnum;
|
|
|
|
|
linenr_T sm_maxline;
|
|
|
|
|
int sm_line_lbr;
|
|
|
|
|
} regsubmatch_T;
|
|
|
|
|
|
|
|
|
|
static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
typedef enum
|
|
|
|
|
{
|
|
|
|
|
RGLF_LINE = 0x01,
|
|
|
|
|
RGLF_LENGTH = 0x02
|
|
|
|
|
#ifdef FEAT_EVAL
|
|
|
|
|
,
|
|
|
|
|
RGLF_SUBMATCH = 0x04
|
|
|
|
|
#endif
|
|
|
|
|
} reg_getline_flags_T;
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// common code for reg_getline(), reg_getline_len(), reg_getline_submatch() and
|
|
|
|
|
// reg_getline_submatch_len().
|
|
|
|
|
// the flags argument (which is a bitmask) controls what info is to be returned and whether
|
|
|
|
|
// or not submatch is in effect.
|
|
|
|
|
// note:
|
|
|
|
|
// submatch is available only if FEAT_EVAL is defined.
|
|
|
|
|
static void
|
|
|
|
|
reg_getline_common(linenr_T lnum, reg_getline_flags_T flags, char_u **line, colnr_T *length)
|
|
|
|
|
{
|
|
|
|
|
int get_line = flags & RGLF_LINE;
|
|
|
|
|
int get_length = flags & RGLF_LENGTH;
|
|
|
|
|
linenr_T firstlnum;
|
|
|
|
|
linenr_T maxline;
|
|
|
|
|
|
|
|
|
|
#ifdef FEAT_EVAL
|
|
|
|
|
if (flags & RGLF_SUBMATCH)
|
|
|
|
|
{
|
|
|
|
|
firstlnum = rsm.sm_firstlnum + lnum;
|
|
|
|
|
maxline = rsm.sm_maxline;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
#endif
|
|
|
|
|
{
|
|
|
|
|
firstlnum = rex.reg_firstlnum + lnum;
|
|
|
|
|
maxline = rex.reg_maxline;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// when looking behind for a match/no-match lnum is negative. but we
|
|
|
|
|
// can't go before line 1.
|
|
|
|
|
if (firstlnum < 1)
|
|
|
|
|
{
|
|
|
|
|
if (get_line)
|
|
|
|
|
*line = NULL;
|
|
|
|
|
if (get_length)
|
|
|
|
|
*length = 0;
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (lnum > maxline)
|
|
|
|
|
{
|
|
|
|
|
// must have matched the "\n" in the last line.
|
|
|
|
|
if (get_line)
|
|
|
|
|
*line = (char_u *)"";
|
|
|
|
|
if (get_length)
|
|
|
|
|
*length = 0;
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (get_line)
|
|
|
|
|
*line = ml_get_buf(rex.reg_buf, firstlnum, FALSE);
|
|
|
|
|
if (get_length)
|
|
|
|
|
*length = ml_get_buf_len(rex.reg_buf, firstlnum);
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
/*
|
|
|
|
|
* Get pointer to the line "lnum", which is relative to "reg_firstlnum".
|
|
|
|
|
*/
|
|
|
|
|
static char_u *
|
|
|
|
|
reg_getline(linenr_T lnum)
|
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
char_u *line;
|
|
|
|
|
|
|
|
|
|
reg_getline_common(lnum, RGLF_LINE, &line, NULL);
|
|
|
|
|
|
|
|
|
|
return line;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Get length of line "lnum", which is relative to "reg_firstlnum".
|
|
|
|
|
*/
|
|
|
|
|
static colnr_T
|
|
|
|
|
reg_getline_len(linenr_T lnum)
|
|
|
|
|
{
|
|
|
|
|
colnr_T length;
|
|
|
|
|
|
|
|
|
|
reg_getline_common(lnum, RGLF_LENGTH, NULL, &length);
|
|
|
|
|
|
|
|
|
|
return length;
|
2019-09-07 23:16:33 +02:00
|
|
|
|
}
|
2019-01-24 16:39:02 +01:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
#ifdef FEAT_SYN_HL
|
2019-12-05 21:10:38 +01:00
|
|
|
|
static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
|
|
|
|
|
static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
|
|
|
|
|
static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos
|
|
|
|
|
static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
|
2019-09-07 23:16:33 +02:00
|
|
|
|
#endif
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// TRUE if using multi-line regexp.
|
2019-09-07 23:16:33 +02:00
|
|
|
|
#define REG_MULTI (rex.reg_match == NULL)
|
|
|
|
|
|
|
|
|
|
#ifdef FEAT_SYN_HL
|
|
|
|
|
/*
|
|
|
|
|
* Create a new extmatch and mark it as referenced once.
|
|
|
|
|
*/
|
|
|
|
|
static reg_extmatch_T *
|
|
|
|
|
make_extmatch(void)
|
|
|
|
|
{
|
|
|
|
|
reg_extmatch_T *em;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
em = ALLOC_CLEAR_ONE(reg_extmatch_T);
|
|
|
|
|
if (em != NULL)
|
|
|
|
|
em->refcnt = 1;
|
|
|
|
|
return em;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
/*
|
|
|
|
|
* Add a reference to an extmatch.
|
|
|
|
|
*/
|
|
|
|
|
reg_extmatch_T *
|
|
|
|
|
ref_extmatch(reg_extmatch_T *em)
|
|
|
|
|
{
|
|
|
|
|
if (em != NULL)
|
|
|
|
|
em->refcnt++;
|
|
|
|
|
return em;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
/*
|
|
|
|
|
* Remove a reference to an extmatch. If there are no references left, free
|
|
|
|
|
* the info.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
unref_extmatch(reg_extmatch_T *em)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (em != NULL && --em->refcnt <= 0)
|
|
|
|
|
{
|
|
|
|
|
for (i = 0; i < NSUBEXP; ++i)
|
|
|
|
|
vim_free(em->matches[i]);
|
|
|
|
|
vim_free(em);
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
2019-09-07 23:16:33 +02:00
|
|
|
|
#endif
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
|
|
|
|
/*
|
2019-09-07 23:16:33 +02:00
|
|
|
|
* Get class of previous character.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*/
|
2019-09-07 23:16:33 +02:00
|
|
|
|
static int
|
|
|
|
|
reg_prev_class(void)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (rex.input > rex.line)
|
|
|
|
|
return mb_get_class_buf(rex.input - 1
|
|
|
|
|
- (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
/*
|
|
|
|
|
* Return TRUE if the current rex.input position matches the Visual area.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
|
|
|
|
reg_match_visual(void)
|
|
|
|
|
{
|
|
|
|
|
pos_T top, bot;
|
|
|
|
|
linenr_T lnum;
|
|
|
|
|
colnr_T col;
|
|
|
|
|
win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
|
|
|
|
|
int mode;
|
|
|
|
|
colnr_T start, end;
|
|
|
|
|
colnr_T start2, end2;
|
|
|
|
|
colnr_T cols;
|
2021-05-30 16:43:11 +02:00
|
|
|
|
colnr_T curswant;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2022-01-30 16:42:56 +00:00
|
|
|
|
// Check if the buffer is the current buffer and not using a string.
|
2022-01-30 17:17:41 +00:00
|
|
|
|
if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI)
|
2019-09-07 23:16:33 +02:00
|
|
|
|
return FALSE;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (VIsual_active)
|
|
|
|
|
{
|
|
|
|
|
if (LT_POS(VIsual, wp->w_cursor))
|
|
|
|
|
{
|
|
|
|
|
top = VIsual;
|
|
|
|
|
bot = wp->w_cursor;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
top = wp->w_cursor;
|
|
|
|
|
bot = VIsual;
|
|
|
|
|
}
|
|
|
|
|
mode = VIsual_mode;
|
2021-05-30 16:43:11 +02:00
|
|
|
|
curswant = wp->w_curswant;
|
2019-09-07 23:16:33 +02:00
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
else
|
2019-09-07 23:16:33 +02:00
|
|
|
|
{
|
|
|
|
|
if (LT_POS(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
|
|
|
|
|
{
|
|
|
|
|
top = curbuf->b_visual.vi_start;
|
|
|
|
|
bot = curbuf->b_visual.vi_end;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
top = curbuf->b_visual.vi_end;
|
|
|
|
|
bot = curbuf->b_visual.vi_start;
|
|
|
|
|
}
|
2024-02-13 20:32:04 +01:00
|
|
|
|
// a substitute command may have removed some lines
|
2024-01-22 20:12:34 +01:00
|
|
|
|
if (bot.lnum > curbuf->b_ml.ml_line_count)
|
|
|
|
|
bot.lnum = curbuf->b_ml.ml_line_count;
|
2019-09-07 23:16:33 +02:00
|
|
|
|
mode = curbuf->b_visual.vi_mode;
|
2021-05-30 16:43:11 +02:00
|
|
|
|
curswant = curbuf->b_visual.vi_curswant;
|
2019-09-07 23:16:33 +02:00
|
|
|
|
}
|
|
|
|
|
lnum = rex.lnum + rex.reg_firstlnum;
|
|
|
|
|
if (lnum < top.lnum || lnum > bot.lnum)
|
|
|
|
|
return FALSE;
|
|
|
|
|
|
2021-12-30 14:49:43 +00:00
|
|
|
|
col = (colnr_T)(rex.input - rex.line);
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (mode == 'v')
|
|
|
|
|
{
|
|
|
|
|
if ((lnum == top.lnum && col < top.col)
|
|
|
|
|
|| (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
|
|
|
|
|
return FALSE;
|
|
|
|
|
}
|
|
|
|
|
else if (mode == Ctrl_V)
|
|
|
|
|
{
|
|
|
|
|
getvvcol(wp, &top, &start, NULL, &end);
|
|
|
|
|
getvvcol(wp, &bot, &start2, NULL, &end2);
|
|
|
|
|
if (start2 < start)
|
|
|
|
|
start = start2;
|
|
|
|
|
if (end2 > end)
|
|
|
|
|
end = end2;
|
2021-05-30 16:43:11 +02:00
|
|
|
|
if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL)
|
2019-09-07 23:16:33 +02:00
|
|
|
|
end = MAXCOL;
|
2021-12-30 14:49:43 +00:00
|
|
|
|
|
|
|
|
|
// getvvcol() flushes rex.line, need to get it again
|
|
|
|
|
rex.line = reg_getline(rex.lnum);
|
|
|
|
|
rex.input = rex.line + col;
|
|
|
|
|
|
2022-07-25 18:13:54 +01:00
|
|
|
|
cols = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, rex.line, col);
|
2019-09-07 23:16:33 +02:00
|
|
|
|
if (cols < start || cols > end - (*p_sel == 'e'))
|
|
|
|
|
return FALSE;
|
|
|
|
|
}
|
|
|
|
|
return TRUE;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Check the regexp program for its magic number.
|
|
|
|
|
* Return TRUE if it's wrong.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
prog_magic_wrong(void)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2013-05-19 19:40:29 +02:00
|
|
|
|
regprog_T *prog;
|
|
|
|
|
|
2016-10-02 16:51:57 +02:00
|
|
|
|
prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog;
|
2013-05-19 19:40:29 +02:00
|
|
|
|
if (prog->engine == &nfa_regengine)
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// For NFA matcher we don't check the magic
|
2013-05-19 19:40:29 +02:00
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
|
|
if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2023-05-31 17:12:14 +01:00
|
|
|
|
iemsg(e_corrupted_regexp_program);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
return TRUE;
|
|
|
|
|
}
|
|
|
|
|
return FALSE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Cleanup the subexpressions, if this wasn't done yet.
|
|
|
|
|
* This construction is used to clear the subexpressions only when they are
|
|
|
|
|
* used (to increase speed).
|
|
|
|
|
*/
|
|
|
|
|
static void
|
2016-01-30 20:31:25 +01:00
|
|
|
|
cleanup_subexpr(void)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2023-01-18 18:17:48 +00:00
|
|
|
|
if (!rex.need_clear_subexpr)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (REG_MULTI)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2023-01-18 18:17:48 +00:00
|
|
|
|
// Use 0xff to set lnum to -1
|
|
|
|
|
vim_memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
|
|
|
|
|
vim_memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
2023-01-18 18:17:48 +00:00
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
|
|
|
|
|
vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
|
|
|
|
|
}
|
|
|
|
|
rex.need_clear_subexpr = FALSE;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef FEAT_SYN_HL
|
|
|
|
|
static void
|
2016-01-30 20:31:25 +01:00
|
|
|
|
cleanup_zsubexpr(void)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2023-01-18 18:17:48 +00:00
|
|
|
|
if (!rex.need_clear_zsubexpr)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (REG_MULTI)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2023-01-18 18:17:48 +00:00
|
|
|
|
// Use 0xff to set lnum to -1
|
|
|
|
|
vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
|
|
|
|
|
vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
|
|
|
|
|
vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
2023-01-18 18:17:48 +00:00
|
|
|
|
rex.need_clear_zsubexpr = FALSE;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*
|
2018-07-17 05:43:58 +02:00
|
|
|
|
* Advance rex.lnum, rex.line and rex.input to the next line.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*/
|
|
|
|
|
static void
|
2016-01-30 20:31:25 +01:00
|
|
|
|
reg_nextline(void)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2018-07-17 05:43:58 +02:00
|
|
|
|
rex.line = reg_getline(++rex.lnum);
|
|
|
|
|
rex.input = rex.line;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
fast_breakcheck();
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-14 20:31:28 +02:00
|
|
|
|
/*
|
|
|
|
|
* Check whether a backreference matches.
|
|
|
|
|
* Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
|
2013-11-21 17:13:00 +01:00
|
|
|
|
* If "bytelen" is not NULL, it is set to the byte length of the match in the
|
|
|
|
|
* last line.
|
2013-06-14 20:31:28 +02:00
|
|
|
|
*/
|
|
|
|
|
static int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
match_with_backref(
|
|
|
|
|
linenr_T start_lnum,
|
|
|
|
|
colnr_T start_col,
|
|
|
|
|
linenr_T end_lnum,
|
|
|
|
|
colnr_T end_col,
|
|
|
|
|
int *bytelen)
|
2013-06-14 20:31:28 +02:00
|
|
|
|
{
|
|
|
|
|
linenr_T clnum = start_lnum;
|
|
|
|
|
colnr_T ccol = start_col;
|
|
|
|
|
int len;
|
|
|
|
|
char_u *p;
|
|
|
|
|
|
|
|
|
|
if (bytelen != NULL)
|
|
|
|
|
*bytelen = 0;
|
|
|
|
|
for (;;)
|
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Since getting one line may invalidate the other, need to make copy.
|
|
|
|
|
// Slow!
|
2018-07-17 05:43:58 +02:00
|
|
|
|
if (rex.line != reg_tofree)
|
2013-06-14 20:31:28 +02:00
|
|
|
|
{
|
2018-07-17 05:43:58 +02:00
|
|
|
|
len = (int)STRLEN(rex.line);
|
2013-06-14 20:31:28 +02:00
|
|
|
|
if (reg_tofree == NULL || len >= (int)reg_tofreelen)
|
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
len += 50; // get some extra
|
2013-06-14 20:31:28 +02:00
|
|
|
|
vim_free(reg_tofree);
|
|
|
|
|
reg_tofree = alloc(len);
|
|
|
|
|
if (reg_tofree == NULL)
|
2019-12-05 21:10:38 +01:00
|
|
|
|
return RA_FAIL; // out of memory!
|
2013-06-14 20:31:28 +02:00
|
|
|
|
reg_tofreelen = len;
|
|
|
|
|
}
|
2018-07-17 05:43:58 +02:00
|
|
|
|
STRCPY(reg_tofree, rex.line);
|
|
|
|
|
rex.input = reg_tofree + (rex.input - rex.line);
|
|
|
|
|
rex.line = reg_tofree;
|
2013-06-14 20:31:28 +02:00
|
|
|
|
}
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Get the line to compare with.
|
2013-06-14 20:31:28 +02:00
|
|
|
|
p = reg_getline(clnum);
|
|
|
|
|
if (clnum == end_lnum)
|
|
|
|
|
len = end_col - ccol;
|
|
|
|
|
else
|
2024-05-12 00:07:17 +02:00
|
|
|
|
len = (int)reg_getline_len(clnum) - ccol;
|
2013-06-14 20:31:28 +02:00
|
|
|
|
|
2018-07-17 05:43:58 +02:00
|
|
|
|
if (cstrncmp(p + ccol, rex.input, &len) != 0)
|
2019-12-05 21:10:38 +01:00
|
|
|
|
return RA_NOMATCH; // doesn't match
|
2013-06-14 20:31:28 +02:00
|
|
|
|
if (bytelen != NULL)
|
|
|
|
|
*bytelen += len;
|
|
|
|
|
if (clnum == end_lnum)
|
2019-12-05 21:10:38 +01:00
|
|
|
|
break; // match and at end!
|
2018-07-17 05:43:58 +02:00
|
|
|
|
if (rex.lnum >= rex.reg_maxline)
|
2019-12-05 21:10:38 +01:00
|
|
|
|
return RA_NOMATCH; // text too short
|
2013-06-14 20:31:28 +02:00
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Advance to next line.
|
2013-06-14 20:31:28 +02:00
|
|
|
|
reg_nextline();
|
2013-11-21 17:13:00 +01:00
|
|
|
|
if (bytelen != NULL)
|
|
|
|
|
*bytelen = 0;
|
2013-06-14 20:31:28 +02:00
|
|
|
|
++clnum;
|
|
|
|
|
ccol = 0;
|
|
|
|
|
if (got_int)
|
|
|
|
|
return RA_FAIL;
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// found a match! Note that rex.line may now point to a copy of the line,
|
|
|
|
|
// that should not matter.
|
2013-06-14 20:31:28 +02:00
|
|
|
|
return RA_MATCH;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2014-09-09 17:18:49 +02:00
|
|
|
|
/*
|
|
|
|
|
* Used in a place where no * or \+ can follow.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
re_mult_next(char *what)
|
2014-09-09 17:18:49 +02:00
|
|
|
|
{
|
|
|
|
|
if (re_multi_type(peekchr()) == MULTI_MULT)
|
2019-01-14 22:46:15 +01:00
|
|
|
|
{
|
2022-01-05 20:24:39 +00:00
|
|
|
|
semsg(_(e_nfa_regexp_cannot_repeat_str), what);
|
2019-01-14 22:46:15 +01:00
|
|
|
|
rc_did_emsg = TRUE;
|
|
|
|
|
return FAIL;
|
|
|
|
|
}
|
2014-09-09 17:18:49 +02:00
|
|
|
|
return OK;
|
|
|
|
|
}
|
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
typedef struct
|
|
|
|
|
{
|
|
|
|
|
int a, b, c;
|
|
|
|
|
} decomp_T;
|
|
|
|
|
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// 0xfb20 - 0xfb4f
|
2005-06-01 21:51:55 +00:00
|
|
|
|
static decomp_T decomp_table[0xfb4f-0xfb20+1] =
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
{0x5e2,0,0}, // 0xfb20 alt ayin
|
|
|
|
|
{0x5d0,0,0}, // 0xfb21 alt alef
|
|
|
|
|
{0x5d3,0,0}, // 0xfb22 alt dalet
|
|
|
|
|
{0x5d4,0,0}, // 0xfb23 alt he
|
|
|
|
|
{0x5db,0,0}, // 0xfb24 alt kaf
|
|
|
|
|
{0x5dc,0,0}, // 0xfb25 alt lamed
|
|
|
|
|
{0x5dd,0,0}, // 0xfb26 alt mem-sofit
|
|
|
|
|
{0x5e8,0,0}, // 0xfb27 alt resh
|
|
|
|
|
{0x5ea,0,0}, // 0xfb28 alt tav
|
|
|
|
|
{'+', 0, 0}, // 0xfb29 alt plus
|
|
|
|
|
{0x5e9, 0x5c1, 0}, // 0xfb2a shin+shin-dot
|
|
|
|
|
{0x5e9, 0x5c2, 0}, // 0xfb2b shin+sin-dot
|
|
|
|
|
{0x5e9, 0x5c1, 0x5bc}, // 0xfb2c shin+shin-dot+dagesh
|
|
|
|
|
{0x5e9, 0x5c2, 0x5bc}, // 0xfb2d shin+sin-dot+dagesh
|
|
|
|
|
{0x5d0, 0x5b7, 0}, // 0xfb2e alef+patah
|
|
|
|
|
{0x5d0, 0x5b8, 0}, // 0xfb2f alef+qamats
|
|
|
|
|
{0x5d0, 0x5b4, 0}, // 0xfb30 alef+hiriq
|
|
|
|
|
{0x5d1, 0x5bc, 0}, // 0xfb31 bet+dagesh
|
|
|
|
|
{0x5d2, 0x5bc, 0}, // 0xfb32 gimel+dagesh
|
|
|
|
|
{0x5d3, 0x5bc, 0}, // 0xfb33 dalet+dagesh
|
|
|
|
|
{0x5d4, 0x5bc, 0}, // 0xfb34 he+dagesh
|
|
|
|
|
{0x5d5, 0x5bc, 0}, // 0xfb35 vav+dagesh
|
|
|
|
|
{0x5d6, 0x5bc, 0}, // 0xfb36 zayin+dagesh
|
|
|
|
|
{0xfb37, 0, 0}, // 0xfb37 -- UNUSED
|
|
|
|
|
{0x5d8, 0x5bc, 0}, // 0xfb38 tet+dagesh
|
|
|
|
|
{0x5d9, 0x5bc, 0}, // 0xfb39 yud+dagesh
|
|
|
|
|
{0x5da, 0x5bc, 0}, // 0xfb3a kaf sofit+dagesh
|
|
|
|
|
{0x5db, 0x5bc, 0}, // 0xfb3b kaf+dagesh
|
|
|
|
|
{0x5dc, 0x5bc, 0}, // 0xfb3c lamed+dagesh
|
|
|
|
|
{0xfb3d, 0, 0}, // 0xfb3d -- UNUSED
|
|
|
|
|
{0x5de, 0x5bc, 0}, // 0xfb3e mem+dagesh
|
|
|
|
|
{0xfb3f, 0, 0}, // 0xfb3f -- UNUSED
|
|
|
|
|
{0x5e0, 0x5bc, 0}, // 0xfb40 nun+dagesh
|
|
|
|
|
{0x5e1, 0x5bc, 0}, // 0xfb41 samech+dagesh
|
|
|
|
|
{0xfb42, 0, 0}, // 0xfb42 -- UNUSED
|
|
|
|
|
{0x5e3, 0x5bc, 0}, // 0xfb43 pe sofit+dagesh
|
|
|
|
|
{0x5e4, 0x5bc,0}, // 0xfb44 pe+dagesh
|
|
|
|
|
{0xfb45, 0, 0}, // 0xfb45 -- UNUSED
|
|
|
|
|
{0x5e6, 0x5bc, 0}, // 0xfb46 tsadi+dagesh
|
|
|
|
|
{0x5e7, 0x5bc, 0}, // 0xfb47 qof+dagesh
|
|
|
|
|
{0x5e8, 0x5bc, 0}, // 0xfb48 resh+dagesh
|
|
|
|
|
{0x5e9, 0x5bc, 0}, // 0xfb49 shin+dagesh
|
|
|
|
|
{0x5ea, 0x5bc, 0}, // 0xfb4a tav+dagesh
|
|
|
|
|
{0x5d5, 0x5b9, 0}, // 0xfb4b vav+holam
|
|
|
|
|
{0x5d1, 0x5bf, 0}, // 0xfb4c bet+rafe
|
|
|
|
|
{0x5db, 0x5bf, 0}, // 0xfb4d kaf+rafe
|
|
|
|
|
{0x5e4, 0x5bf, 0}, // 0xfb4e pe+rafe
|
|
|
|
|
{0x5d0, 0x5dc, 0} // 0xfb4f alef-lamed
|
2004-06-13 20:20:40 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static void
|
2016-01-30 20:31:25 +01:00
|
|
|
|
mb_decompose(int c, int *c1, int *c2, int *c3)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
decomp_T d;
|
|
|
|
|
|
2013-05-21 21:37:20 +02:00
|
|
|
|
if (c >= 0xfb20 && c <= 0xfb4f)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
d = decomp_table[c - 0xfb20];
|
|
|
|
|
*c1 = d.a;
|
|
|
|
|
*c2 = d.b;
|
|
|
|
|
*c3 = d.c;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
*c1 = c;
|
|
|
|
|
*c2 = *c3 = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
2016-10-02 16:51:57 +02:00
|
|
|
|
* Compare two strings, ignore case if rex.reg_ic set.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
* Return 0 if strings match, non-zero otherwise.
|
patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive
Problem: regex: wrong match when searching multi-byte char
case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string
This patch does the following 4 things:
1) When the regexp engine compares two utf-8 codepoints case
insensitive it may match an adjacent character, because it assumes
it can step over as many bytes as the pattern contains.
This however is not necessarily true because of case-folding, a
multi-byte UTF-8 character can be considered equal to some
single-byte value.
Let's consider the pattern 'ſ' and the string 's'. When comparing and
ignoring case, the single character 's' matches, and since it matches
Vim will try to step over the match (by the amount of bytes of the
pattern), assuming that since it matches, the length of both strings is
the same.
However in that case, it should only step over the single byte value
's' by 1 byte and try to start matching after it again. So for the
backtracking engine we need to ensure:
* we try to match the correct length for the pattern and the text
* in case of a match, we step over it correctly
There is one tricky thing for the backtracing engine. We also need to
calculate correctly the number of bytes to compare the 2 different
utf-8 strings s1 and s2. So we will count the number of characters in
s1 that the byte len specified. Then we count the number of bytes to
step over the same number of characters in string s2 and then we can
correctly compare the 2 utf-8 strings.
2) A similar thing can happen for the NFA engine, when skipping to the
next character to test for a match. We are skipping over the regstart
pointer, however we do not consider the case that because of
case-folding we may need to adjust the number of bytes to skip over.
So this needs to be adjusted in find_match_text() as well.
3) A related issue turned out, when prog->match_text is actually empty.
In that case we should try to find the next match and skip this
condition.
4) When comparing characters using collections, we must also apply case
folding to each character in the collection and not just to the
current character from the search string. This doesn't apply to the
NFA engine, because internally it converts collections to branches
[abc] -> a\|b\|c
fixes: #14294
closes: #14756
Signed-off-by: Christian Brabandt <cb@256bit.org>
2024-07-30 20:39:18 +02:00
|
|
|
|
* Correct the length "*n" when composing characters are ignored
|
|
|
|
|
* or for utf8 when both utf codepoints are considered equal because of
|
|
|
|
|
* case-folding but have different length (e.g. 's' and 'ſ')
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*/
|
|
|
|
|
static int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
cstrncmp(char_u *s1, char_u *s2, int *n)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
int result;
|
|
|
|
|
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (!rex.reg_ic)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
result = STRNCMP(s1, s2, *n);
|
patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive
Problem: regex: wrong match when searching multi-byte char
case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string
This patch does the following 4 things:
1) When the regexp engine compares two utf-8 codepoints case
insensitive it may match an adjacent character, because it assumes
it can step over as many bytes as the pattern contains.
This however is not necessarily true because of case-folding, a
multi-byte UTF-8 character can be considered equal to some
single-byte value.
Let's consider the pattern 'ſ' and the string 's'. When comparing and
ignoring case, the single character 's' matches, and since it matches
Vim will try to step over the match (by the amount of bytes of the
pattern), assuming that since it matches, the length of both strings is
the same.
However in that case, it should only step over the single byte value
's' by 1 byte and try to start matching after it again. So for the
backtracking engine we need to ensure:
* we try to match the correct length for the pattern and the text
* in case of a match, we step over it correctly
There is one tricky thing for the backtracing engine. We also need to
calculate correctly the number of bytes to compare the 2 different
utf-8 strings s1 and s2. So we will count the number of characters in
s1 that the byte len specified. Then we count the number of bytes to
step over the same number of characters in string s2 and then we can
correctly compare the 2 utf-8 strings.
2) A similar thing can happen for the NFA engine, when skipping to the
next character to test for a match. We are skipping over the regstart
pointer, however we do not consider the case that because of
case-folding we may need to adjust the number of bytes to skip over.
So this needs to be adjusted in find_match_text() as well.
3) A related issue turned out, when prog->match_text is actually empty.
In that case we should try to find the next match and skip this
condition.
4) When comparing characters using collections, we must also apply case
folding to each character in the collection and not just to the
current character from the search string. This doesn't apply to the
NFA engine, because internally it converts collections to branches
[abc] -> a\|b\|c
fixes: #14294
closes: #14756
Signed-off-by: Christian Brabandt <cb@256bit.org>
2024-07-30 20:39:18 +02:00
|
|
|
|
else if (enc_utf8)
|
|
|
|
|
{
|
|
|
|
|
char_u *p = s1;
|
2024-08-01 22:48:53 +02:00
|
|
|
|
int n2 = 0;
|
patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive
Problem: regex: wrong match when searching multi-byte char
case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string
This patch does the following 4 things:
1) When the regexp engine compares two utf-8 codepoints case
insensitive it may match an adjacent character, because it assumes
it can step over as many bytes as the pattern contains.
This however is not necessarily true because of case-folding, a
multi-byte UTF-8 character can be considered equal to some
single-byte value.
Let's consider the pattern 'ſ' and the string 's'. When comparing and
ignoring case, the single character 's' matches, and since it matches
Vim will try to step over the match (by the amount of bytes of the
pattern), assuming that since it matches, the length of both strings is
the same.
However in that case, it should only step over the single byte value
's' by 1 byte and try to start matching after it again. So for the
backtracking engine we need to ensure:
* we try to match the correct length for the pattern and the text
* in case of a match, we step over it correctly
There is one tricky thing for the backtracing engine. We also need to
calculate correctly the number of bytes to compare the 2 different
utf-8 strings s1 and s2. So we will count the number of characters in
s1 that the byte len specified. Then we count the number of bytes to
step over the same number of characters in string s2 and then we can
correctly compare the 2 utf-8 strings.
2) A similar thing can happen for the NFA engine, when skipping to the
next character to test for a match. We are skipping over the regstart
pointer, however we do not consider the case that because of
case-folding we may need to adjust the number of bytes to skip over.
So this needs to be adjusted in find_match_text() as well.
3) A related issue turned out, when prog->match_text is actually empty.
In that case we should try to find the next match and skip this
condition.
4) When comparing characters using collections, we must also apply case
folding to each character in the collection and not just to the
current character from the search string. This doesn't apply to the
NFA engine, because internally it converts collections to branches
[abc] -> a\|b\|c
fixes: #14294
closes: #14756
Signed-off-by: Christian Brabandt <cb@256bit.org>
2024-07-30 20:39:18 +02:00
|
|
|
|
int n1 = *n;
|
|
|
|
|
// count the number of characters for byte-length of s1
|
|
|
|
|
while (n1 > 0 && *p != NUL)
|
|
|
|
|
{
|
|
|
|
|
n1 -= mb_ptr2len(s1);
|
|
|
|
|
MB_PTR_ADV(p);
|
|
|
|
|
n2++;
|
|
|
|
|
}
|
|
|
|
|
// count the number of bytes to advance the same number of chars for s2
|
|
|
|
|
p = s2;
|
|
|
|
|
while (n2-- > 0 && *p != NUL)
|
|
|
|
|
MB_PTR_ADV(p);
|
|
|
|
|
|
|
|
|
|
n2 = p - s2;
|
|
|
|
|
|
|
|
|
|
result = MB_STRNICMP2(s1, s2, *n, n2);
|
2024-08-01 22:48:53 +02:00
|
|
|
|
if (result == 0 && n2 < *n)
|
patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive
Problem: regex: wrong match when searching multi-byte char
case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string
This patch does the following 4 things:
1) When the regexp engine compares two utf-8 codepoints case
insensitive it may match an adjacent character, because it assumes
it can step over as many bytes as the pattern contains.
This however is not necessarily true because of case-folding, a
multi-byte UTF-8 character can be considered equal to some
single-byte value.
Let's consider the pattern 'ſ' and the string 's'. When comparing and
ignoring case, the single character 's' matches, and since it matches
Vim will try to step over the match (by the amount of bytes of the
pattern), assuming that since it matches, the length of both strings is
the same.
However in that case, it should only step over the single byte value
's' by 1 byte and try to start matching after it again. So for the
backtracking engine we need to ensure:
* we try to match the correct length for the pattern and the text
* in case of a match, we step over it correctly
There is one tricky thing for the backtracing engine. We also need to
calculate correctly the number of bytes to compare the 2 different
utf-8 strings s1 and s2. So we will count the number of characters in
s1 that the byte len specified. Then we count the number of bytes to
step over the same number of characters in string s2 and then we can
correctly compare the 2 utf-8 strings.
2) A similar thing can happen for the NFA engine, when skipping to the
next character to test for a match. We are skipping over the regstart
pointer, however we do not consider the case that because of
case-folding we may need to adjust the number of bytes to skip over.
So this needs to be adjusted in find_match_text() as well.
3) A related issue turned out, when prog->match_text is actually empty.
In that case we should try to find the next match and skip this
condition.
4) When comparing characters using collections, we must also apply case
folding to each character in the collection and not just to the
current character from the search string. This doesn't apply to the
NFA engine, because internally it converts collections to branches
[abc] -> a\|b\|c
fixes: #14294
closes: #14756
Signed-off-by: Christian Brabandt <cb@256bit.org>
2024-07-30 20:39:18 +02:00
|
|
|
|
*n = n2;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
else
|
|
|
|
|
result = MB_STRNICMP(s1, s2, *n);
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// if it failed and it's utf8 and we want to combineignore:
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (result != 0 && enc_utf8 && rex.reg_icombine)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
char_u *str1, *str2;
|
|
|
|
|
int c1, c2, c11, c12;
|
|
|
|
|
int junk;
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// we have to handle the strcmp ourselves, since it is necessary to
|
|
|
|
|
// deal with the composing characters by ignoring them:
|
2004-06-13 20:20:40 +00:00
|
|
|
|
str1 = s1;
|
|
|
|
|
str2 = s2;
|
|
|
|
|
c1 = c2 = 0;
|
2005-09-06 19:25:11 +00:00
|
|
|
|
while ((int)(str1 - s1) < *n)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
c1 = mb_ptr2char_adv(&str1);
|
|
|
|
|
c2 = mb_ptr2char_adv(&str2);
|
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
// Decompose the character if necessary, into 'base' characters.
|
|
|
|
|
// Currently hard-coded for Hebrew, Arabic to be done...
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-09-07 23:16:33 +02:00
|
|
|
|
// decomposition necessary?
|
2004-06-13 20:20:40 +00:00
|
|
|
|
mb_decompose(c1, &c11, &junk, &junk);
|
|
|
|
|
mb_decompose(c2, &c12, &junk, &junk);
|
|
|
|
|
c1 = c11;
|
|
|
|
|
c2 = c12;
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (c11 != c12
|
|
|
|
|
&& (!rex.reg_ic || utf_fold(c11) != utf_fold(c12)))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
result = c2 - c1;
|
|
|
|
|
if (result == 0)
|
|
|
|
|
*n = (int)(str2 - s2);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* cstrchr: This function is used a lot for simple searches, keep it fast!
|
|
|
|
|
*/
|
|
|
|
|
static char_u *
|
2016-01-30 20:31:25 +01:00
|
|
|
|
cstrchr(char_u *s, int c)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
char_u *p;
|
patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive
Problem: regex: wrong match when searching multi-byte char
case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string
This patch does the following 4 things:
1) When the regexp engine compares two utf-8 codepoints case
insensitive it may match an adjacent character, because it assumes
it can step over as many bytes as the pattern contains.
This however is not necessarily true because of case-folding, a
multi-byte UTF-8 character can be considered equal to some
single-byte value.
Let's consider the pattern 'ſ' and the string 's'. When comparing and
ignoring case, the single character 's' matches, and since it matches
Vim will try to step over the match (by the amount of bytes of the
pattern), assuming that since it matches, the length of both strings is
the same.
However in that case, it should only step over the single byte value
's' by 1 byte and try to start matching after it again. So for the
backtracking engine we need to ensure:
* we try to match the correct length for the pattern and the text
* in case of a match, we step over it correctly
There is one tricky thing for the backtracing engine. We also need to
calculate correctly the number of bytes to compare the 2 different
utf-8 strings s1 and s2. So we will count the number of characters in
s1 that the byte len specified. Then we count the number of bytes to
step over the same number of characters in string s2 and then we can
correctly compare the 2 utf-8 strings.
2) A similar thing can happen for the NFA engine, when skipping to the
next character to test for a match. We are skipping over the regstart
pointer, however we do not consider the case that because of
case-folding we may need to adjust the number of bytes to skip over.
So this needs to be adjusted in find_match_text() as well.
3) A related issue turned out, when prog->match_text is actually empty.
In that case we should try to find the next match and skip this
condition.
4) When comparing characters using collections, we must also apply case
folding to each character in the collection and not just to the
current character from the search string. This doesn't apply to the
NFA engine, because internally it converts collections to branches
[abc] -> a\|b\|c
fixes: #14294
closes: #14756
Signed-off-by: Christian Brabandt <cb@256bit.org>
2024-07-30 20:39:18 +02:00
|
|
|
|
int cc, lc;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-01-24 16:39:02 +01:00
|
|
|
|
if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
return vim_strchr(s, c);
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// tolower() and toupper() can be slow, comparing twice should be a lot
|
|
|
|
|
// faster (esp. when using MS Visual C++!).
|
|
|
|
|
// For UTF-8 need to use folded case.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
if (enc_utf8 && c > 0x80)
|
patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive
Problem: regex: wrong match when searching multi-byte char
case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string
This patch does the following 4 things:
1) When the regexp engine compares two utf-8 codepoints case
insensitive it may match an adjacent character, because it assumes
it can step over as many bytes as the pattern contains.
This however is not necessarily true because of case-folding, a
multi-byte UTF-8 character can be considered equal to some
single-byte value.
Let's consider the pattern 'ſ' and the string 's'. When comparing and
ignoring case, the single character 's' matches, and since it matches
Vim will try to step over the match (by the amount of bytes of the
pattern), assuming that since it matches, the length of both strings is
the same.
However in that case, it should only step over the single byte value
's' by 1 byte and try to start matching after it again. So for the
backtracking engine we need to ensure:
* we try to match the correct length for the pattern and the text
* in case of a match, we step over it correctly
There is one tricky thing for the backtracing engine. We also need to
calculate correctly the number of bytes to compare the 2 different
utf-8 strings s1 and s2. So we will count the number of characters in
s1 that the byte len specified. Then we count the number of bytes to
step over the same number of characters in string s2 and then we can
correctly compare the 2 utf-8 strings.
2) A similar thing can happen for the NFA engine, when skipping to the
next character to test for a match. We are skipping over the regstart
pointer, however we do not consider the case that because of
case-folding we may need to adjust the number of bytes to skip over.
So this needs to be adjusted in find_match_text() as well.
3) A related issue turned out, when prog->match_text is actually empty.
In that case we should try to find the next match and skip this
condition.
4) When comparing characters using collections, we must also apply case
folding to each character in the collection and not just to the
current character from the search string. This doesn't apply to the
NFA engine, because internally it converts collections to branches
[abc] -> a\|b\|c
fixes: #14294
closes: #14756
Signed-off-by: Christian Brabandt <cb@256bit.org>
2024-07-30 20:39:18 +02:00
|
|
|
|
{
|
2004-06-13 20:20:40 +00:00
|
|
|
|
cc = utf_fold(c);
|
patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive
Problem: regex: wrong match when searching multi-byte char
case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string
This patch does the following 4 things:
1) When the regexp engine compares two utf-8 codepoints case
insensitive it may match an adjacent character, because it assumes
it can step over as many bytes as the pattern contains.
This however is not necessarily true because of case-folding, a
multi-byte UTF-8 character can be considered equal to some
single-byte value.
Let's consider the pattern 'ſ' and the string 's'. When comparing and
ignoring case, the single character 's' matches, and since it matches
Vim will try to step over the match (by the amount of bytes of the
pattern), assuming that since it matches, the length of both strings is
the same.
However in that case, it should only step over the single byte value
's' by 1 byte and try to start matching after it again. So for the
backtracking engine we need to ensure:
* we try to match the correct length for the pattern and the text
* in case of a match, we step over it correctly
There is one tricky thing for the backtracing engine. We also need to
calculate correctly the number of bytes to compare the 2 different
utf-8 strings s1 and s2. So we will count the number of characters in
s1 that the byte len specified. Then we count the number of bytes to
step over the same number of characters in string s2 and then we can
correctly compare the 2 utf-8 strings.
2) A similar thing can happen for the NFA engine, when skipping to the
next character to test for a match. We are skipping over the regstart
pointer, however we do not consider the case that because of
case-folding we may need to adjust the number of bytes to skip over.
So this needs to be adjusted in find_match_text() as well.
3) A related issue turned out, when prog->match_text is actually empty.
In that case we should try to find the next match and skip this
condition.
4) When comparing characters using collections, we must also apply case
folding to each character in the collection and not just to the
current character from the search string. This doesn't apply to the
NFA engine, because internally it converts collections to branches
[abc] -> a\|b\|c
fixes: #14294
closes: #14756
Signed-off-by: Christian Brabandt <cb@256bit.org>
2024-07-30 20:39:18 +02:00
|
|
|
|
lc = cc;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
else
|
patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive
Problem: regex: wrong match when searching multi-byte char
case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string
This patch does the following 4 things:
1) When the regexp engine compares two utf-8 codepoints case
insensitive it may match an adjacent character, because it assumes
it can step over as many bytes as the pattern contains.
This however is not necessarily true because of case-folding, a
multi-byte UTF-8 character can be considered equal to some
single-byte value.
Let's consider the pattern 'ſ' and the string 's'. When comparing and
ignoring case, the single character 's' matches, and since it matches
Vim will try to step over the match (by the amount of bytes of the
pattern), assuming that since it matches, the length of both strings is
the same.
However in that case, it should only step over the single byte value
's' by 1 byte and try to start matching after it again. So for the
backtracking engine we need to ensure:
* we try to match the correct length for the pattern and the text
* in case of a match, we step over it correctly
There is one tricky thing for the backtracing engine. We also need to
calculate correctly the number of bytes to compare the 2 different
utf-8 strings s1 and s2. So we will count the number of characters in
s1 that the byte len specified. Then we count the number of bytes to
step over the same number of characters in string s2 and then we can
correctly compare the 2 utf-8 strings.
2) A similar thing can happen for the NFA engine, when skipping to the
next character to test for a match. We are skipping over the regstart
pointer, however we do not consider the case that because of
case-folding we may need to adjust the number of bytes to skip over.
So this needs to be adjusted in find_match_text() as well.
3) A related issue turned out, when prog->match_text is actually empty.
In that case we should try to find the next match and skip this
condition.
4) When comparing characters using collections, we must also apply case
folding to each character in the collection and not just to the
current character from the search string. This doesn't apply to the
NFA engine, because internally it converts collections to branches
[abc] -> a\|b\|c
fixes: #14294
closes: #14756
Signed-off-by: Christian Brabandt <cb@256bit.org>
2024-07-30 20:39:18 +02:00
|
|
|
|
if (MB_ISUPPER(c))
|
|
|
|
|
{
|
|
|
|
|
cc = MB_TOLOWER(c);
|
|
|
|
|
lc = cc;
|
|
|
|
|
}
|
|
|
|
|
else if (MB_ISLOWER(c))
|
|
|
|
|
{
|
|
|
|
|
cc = MB_TOUPPER(c);
|
|
|
|
|
lc = c;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
return vim_strchr(s, c);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
|
|
|
|
if (has_mbyte)
|
|
|
|
|
{
|
2005-08-10 21:07:57 +00:00
|
|
|
|
for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive
Problem: regex: wrong match when searching multi-byte char
case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string
This patch does the following 4 things:
1) When the regexp engine compares two utf-8 codepoints case
insensitive it may match an adjacent character, because it assumes
it can step over as many bytes as the pattern contains.
This however is not necessarily true because of case-folding, a
multi-byte UTF-8 character can be considered equal to some
single-byte value.
Let's consider the pattern 'ſ' and the string 's'. When comparing and
ignoring case, the single character 's' matches, and since it matches
Vim will try to step over the match (by the amount of bytes of the
pattern), assuming that since it matches, the length of both strings is
the same.
However in that case, it should only step over the single byte value
's' by 1 byte and try to start matching after it again. So for the
backtracking engine we need to ensure:
* we try to match the correct length for the pattern and the text
* in case of a match, we step over it correctly
There is one tricky thing for the backtracing engine. We also need to
calculate correctly the number of bytes to compare the 2 different
utf-8 strings s1 and s2. So we will count the number of characters in
s1 that the byte len specified. Then we count the number of bytes to
step over the same number of characters in string s2 and then we can
correctly compare the 2 utf-8 strings.
2) A similar thing can happen for the NFA engine, when skipping to the
next character to test for a match. We are skipping over the regstart
pointer, however we do not consider the case that because of
case-folding we may need to adjust the number of bytes to skip over.
So this needs to be adjusted in find_match_text() as well.
3) A related issue turned out, when prog->match_text is actually empty.
In that case we should try to find the next match and skip this
condition.
4) When comparing characters using collections, we must also apply case
folding to each character in the collection and not just to the
current character from the search string. This doesn't apply to the
NFA engine, because internally it converts collections to branches
[abc] -> a\|b\|c
fixes: #14294
closes: #14756
Signed-off-by: Christian Brabandt <cb@256bit.org>
2024-07-30 20:39:18 +02:00
|
|
|
|
int uc = utf_ptr2char(p);
|
|
|
|
|
if (enc_utf8 && (c > 0x80 || uc > 0x80))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2022-07-29 16:22:25 +01:00
|
|
|
|
// Do not match an illegal byte. E.g. 0xff matches 0xc3 0xbf,
|
|
|
|
|
// not 0xff.
|
patch 9.1.0645: regex: wrong match when searching multi-byte char case-insensitive
Problem: regex: wrong match when searching multi-byte char
case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string
This patch does the following 4 things:
1) When the regexp engine compares two utf-8 codepoints case
insensitive it may match an adjacent character, because it assumes
it can step over as many bytes as the pattern contains.
This however is not necessarily true because of case-folding, a
multi-byte UTF-8 character can be considered equal to some
single-byte value.
Let's consider the pattern 'ſ' and the string 's'. When comparing and
ignoring case, the single character 's' matches, and since it matches
Vim will try to step over the match (by the amount of bytes of the
pattern), assuming that since it matches, the length of both strings is
the same.
However in that case, it should only step over the single byte value
's' by 1 byte and try to start matching after it again. So for the
backtracking engine we need to ensure:
* we try to match the correct length for the pattern and the text
* in case of a match, we step over it correctly
There is one tricky thing for the backtracing engine. We also need to
calculate correctly the number of bytes to compare the 2 different
utf-8 strings s1 and s2. So we will count the number of characters in
s1 that the byte len specified. Then we count the number of bytes to
step over the same number of characters in string s2 and then we can
correctly compare the 2 utf-8 strings.
2) A similar thing can happen for the NFA engine, when skipping to the
next character to test for a match. We are skipping over the regstart
pointer, however we do not consider the case that because of
case-folding we may need to adjust the number of bytes to skip over.
So this needs to be adjusted in find_match_text() as well.
3) A related issue turned out, when prog->match_text is actually empty.
In that case we should try to find the next match and skip this
condition.
4) When comparing characters using collections, we must also apply case
folding to each character in the collection and not just to the
current character from the search string. This doesn't apply to the
NFA engine, because internally it converts collections to branches
[abc] -> a\|b\|c
fixes: #14294
closes: #14756
Signed-off-by: Christian Brabandt <cb@256bit.org>
2024-07-30 20:39:18 +02:00
|
|
|
|
// compare with lower case of the character
|
|
|
|
|
if ((uc < 0x80 || uc != *p) && utf_fold(uc) == lc)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
return p;
|
|
|
|
|
}
|
|
|
|
|
else if (*p == c || *p == cc)
|
|
|
|
|
return p;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Faster version for when there are no multi-byte characters.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
for (p = s; *p != NUL; ++p)
|
|
|
|
|
if (*p == c || *p == cc)
|
|
|
|
|
return p;
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
|
// regsub stuff //
|
|
|
|
|
////////////////////////////////////////////////////////////////
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2023-09-18 19:51:56 +02:00
|
|
|
|
typedef void (*fptr_T)(int *, int);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2022-05-30 20:58:55 +01:00
|
|
|
|
static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2023-09-18 19:51:56 +02:00
|
|
|
|
static void
|
2016-01-30 20:31:25 +01:00
|
|
|
|
do_upper(int *d, int c)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2006-03-16 21:41:35 +00:00
|
|
|
|
*d = MB_TOUPPER(c);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-09-18 19:51:56 +02:00
|
|
|
|
static void
|
2016-01-30 20:31:25 +01:00
|
|
|
|
do_lower(int *d, int c)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2006-03-16 21:41:35 +00:00
|
|
|
|
*d = MB_TOLOWER(c);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* regtilde(): Replace tildes in the pattern by the old pattern.
|
|
|
|
|
*
|
|
|
|
|
* Short explanation of the tilde: It stands for the previous replacement
|
|
|
|
|
* pattern. If that previous pattern also contains a ~ we should go back a
|
|
|
|
|
* step further... But we insert the previous pattern into the current one
|
|
|
|
|
* and remember that.
|
2006-03-16 21:41:35 +00:00
|
|
|
|
* This still does not handle the case where "magic" changes. So require the
|
|
|
|
|
* user to keep his hands off of "magic".
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*
|
|
|
|
|
* The tildes are parsed once before the first call to vim_regsub().
|
|
|
|
|
*/
|
|
|
|
|
char_u *
|
2016-01-30 20:31:25 +01:00
|
|
|
|
regtilde(char_u *source, int magic)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
char_u *newsub = source;
|
|
|
|
|
char_u *p;
|
2024-05-12 00:07:17 +02:00
|
|
|
|
size_t newsublen = 0;
|
|
|
|
|
char_u tilde[3] = {'~', NUL, NUL};
|
|
|
|
|
size_t tildelen = 1;
|
|
|
|
|
int error = FALSE;
|
|
|
|
|
|
|
|
|
|
if (!magic)
|
|
|
|
|
{
|
|
|
|
|
tilde[0] = '\\';
|
|
|
|
|
tilde[1] = '~';
|
|
|
|
|
tilde[2] = NUL;
|
|
|
|
|
tildelen = 2;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
|
|
|
|
for (p = newsub; *p; ++p)
|
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
if (STRNCMP(p, tilde, tildelen) == 0)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
size_t prefixlen = p - newsub; // not including the tilde
|
|
|
|
|
char_u *postfix = p + tildelen;
|
|
|
|
|
size_t postfixlen;
|
|
|
|
|
size_t tmpsublen;
|
|
|
|
|
|
|
|
|
|
if (newsublen == 0)
|
|
|
|
|
newsublen = STRLEN(newsub);
|
|
|
|
|
newsublen -= tildelen;
|
|
|
|
|
postfixlen = newsublen - prefixlen;
|
|
|
|
|
tmpsublen = prefixlen + reg_prev_sublen + postfixlen;
|
|
|
|
|
|
|
|
|
|
if (tmpsublen > 0 && reg_prev_sub != NULL)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
char_u *tmpsub;
|
|
|
|
|
|
2023-05-09 21:15:30 +01:00
|
|
|
|
// Avoid making the text longer than MAXCOL, it will cause
|
|
|
|
|
// trouble at some point.
|
2024-05-12 00:07:17 +02:00
|
|
|
|
if (tmpsublen > MAXCOL)
|
2023-05-09 21:15:30 +01:00
|
|
|
|
{
|
|
|
|
|
emsg(_(e_resulting_text_too_long));
|
2024-05-12 00:07:17 +02:00
|
|
|
|
error = TRUE;
|
2023-05-09 21:15:30 +01:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-12 00:07:17 +02:00
|
|
|
|
tmpsub = alloc(tmpsublen + 1);
|
|
|
|
|
if (tmpsub == NULL)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
emsg(_(e_out_of_memory));
|
|
|
|
|
error = TRUE;
|
|
|
|
|
break;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
2024-05-12 00:07:17 +02:00
|
|
|
|
|
|
|
|
|
// copy prefix
|
|
|
|
|
mch_memmove(tmpsub, newsub, prefixlen);
|
|
|
|
|
// interpret tilde
|
|
|
|
|
mch_memmove(tmpsub + prefixlen, reg_prev_sub, reg_prev_sublen);
|
|
|
|
|
// copy postfix
|
|
|
|
|
STRCPY(tmpsub + prefixlen + reg_prev_sublen, postfix);
|
|
|
|
|
|
|
|
|
|
if (newsub != source) // allocated newsub before
|
|
|
|
|
vim_free(newsub);
|
|
|
|
|
newsub = tmpsub;
|
|
|
|
|
newsublen = tmpsublen;
|
|
|
|
|
p = newsub + prefixlen + reg_prev_sublen;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
2024-05-12 00:07:17 +02:00
|
|
|
|
mch_memmove(p, postfix, postfixlen + 1); // remove the tilde (+1 for the NUL)
|
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
--p;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
if (*p == '\\' && p[1]) // skip escaped characters
|
2004-06-13 20:20:40 +00:00
|
|
|
|
++p;
|
|
|
|
|
if (has_mbyte)
|
2005-08-10 21:07:57 +00:00
|
|
|
|
p += (*mb_ptr2len)(p) - 1;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-12 00:07:17 +02:00
|
|
|
|
if (error)
|
|
|
|
|
{
|
|
|
|
|
if (newsub != source)
|
|
|
|
|
vim_free(newsub);
|
|
|
|
|
return source;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-07 22:20:31 +01:00
|
|
|
|
// Store a copy of newsub in reg_prev_sub. It is always allocated,
|
|
|
|
|
// because recursive calls may make the returned string invalid.
|
2024-05-12 00:07:17 +02:00
|
|
|
|
// Only store it if there something to store.
|
|
|
|
|
newsublen = p - newsub;
|
|
|
|
|
if (newsublen == 0)
|
|
|
|
|
VIM_CLEAR(reg_prev_sub);
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
vim_free(reg_prev_sub);
|
|
|
|
|
reg_prev_sub = vim_strnsave(newsub, newsublen);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (reg_prev_sub == NULL)
|
|
|
|
|
reg_prev_sublen = 0;
|
|
|
|
|
else
|
|
|
|
|
reg_prev_sublen = newsublen;
|
2022-07-07 22:20:31 +01:00
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
return newsub;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-04 15:54:55 +02:00
|
|
|
|
#ifdef FEAT_EVAL
|
2016-07-22 21:50:18 +02:00
|
|
|
|
|
|
|
|
|
/*
|
2019-11-09 22:28:11 +01:00
|
|
|
|
* Put the submatches in "argv[argskip]" which is a list passed into
|
|
|
|
|
* call_func() by vim_regsub_both().
|
2016-07-22 21:50:18 +02:00
|
|
|
|
*/
|
|
|
|
|
static int
|
2022-09-16 12:10:03 +01:00
|
|
|
|
fill_submatch_list(int argc UNUSED, typval_T *argv, int argskip, ufunc_T *fp)
|
2016-07-22 21:50:18 +02:00
|
|
|
|
{
|
|
|
|
|
listitem_T *li;
|
|
|
|
|
int i;
|
|
|
|
|
char_u *s;
|
2019-11-09 22:28:11 +01:00
|
|
|
|
typval_T *listarg = argv + argskip;
|
2016-07-22 21:50:18 +02:00
|
|
|
|
|
2022-09-16 16:06:32 +01:00
|
|
|
|
if (!has_varargs(fp) && fp->uf_args.ga_len <= argskip)
|
2019-11-09 22:28:11 +01:00
|
|
|
|
// called function doesn't take a submatches argument
|
|
|
|
|
return argskip;
|
2016-07-22 21:50:18 +02:00
|
|
|
|
|
2019-11-09 22:28:11 +01:00
|
|
|
|
// Relies on sl_list to be the first item in staticList10_T.
|
|
|
|
|
init_static_list((staticList10_T *)(listarg->vval.v_list));
|
2016-07-22 21:50:18 +02:00
|
|
|
|
|
2019-11-09 22:28:11 +01:00
|
|
|
|
// There are always 10 list items in staticList10_T.
|
|
|
|
|
li = listarg->vval.v_list->lv_first;
|
2016-07-22 21:50:18 +02:00
|
|
|
|
for (i = 0; i < 10; ++i)
|
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
s = rsm.sm_match->startp[i];
|
|
|
|
|
if (s == NULL || rsm.sm_match->endp[i] == NULL)
|
2016-07-22 21:50:18 +02:00
|
|
|
|
s = NULL;
|
|
|
|
|
else
|
2020-06-12 22:59:11 +02:00
|
|
|
|
s = vim_strnsave(s, rsm.sm_match->endp[i] - s);
|
2016-07-22 21:50:18 +02:00
|
|
|
|
li->li_tv.v_type = VAR_STRING;
|
|
|
|
|
li->li_tv.vval.v_string = s;
|
|
|
|
|
li = li->li_next;
|
|
|
|
|
}
|
2019-11-09 22:28:11 +01:00
|
|
|
|
return argskip + 1;
|
2016-07-22 21:50:18 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
clear_submatch_list(staticList10_T *sl)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < 10; ++i)
|
|
|
|
|
vim_free(sl->sl_items[i].li_tv.vval.v_string);
|
|
|
|
|
}
|
2019-09-04 15:54:55 +02:00
|
|
|
|
#endif
|
2016-07-22 21:50:18 +02:00
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
/*
|
|
|
|
|
* vim_regsub() - perform substitutions after a vim_regexec() or
|
|
|
|
|
* vim_regexec_multi() match.
|
|
|
|
|
*
|
2022-05-30 20:58:55 +01:00
|
|
|
|
* If "flags" has REGSUB_COPY really copy into "dest[destlen]".
|
2022-11-02 13:30:51 +00:00
|
|
|
|
* Otherwise nothing is copied, only compute the length of the result.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*
|
2022-05-30 20:58:55 +01:00
|
|
|
|
* If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
|
|
|
|
|
*
|
|
|
|
|
* If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
|
|
|
|
|
* double them to keep them, and insert a backslash before a CR to avoid it
|
|
|
|
|
* being replaced with a line break later.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*
|
|
|
|
|
* Note: The matched text must not change between the call of
|
|
|
|
|
* vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
|
|
|
|
|
* references invalid!
|
|
|
|
|
*
|
|
|
|
|
* Returns the size of the replacement, including terminating NUL.
|
|
|
|
|
*/
|
|
|
|
|
int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
vim_regsub(
|
|
|
|
|
regmatch_T *rmp,
|
|
|
|
|
char_u *source,
|
2016-07-19 19:10:51 +02:00
|
|
|
|
typval_T *expr,
|
2016-01-30 20:31:25 +01:00
|
|
|
|
char_u *dest,
|
2022-05-30 20:58:55 +01:00
|
|
|
|
int destlen,
|
|
|
|
|
int flags)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
int result;
|
|
|
|
|
regexec_T rex_save;
|
|
|
|
|
int rex_in_use_save = rex_in_use;
|
|
|
|
|
|
|
|
|
|
if (rex_in_use)
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Being called recursively, save the state.
|
2016-10-02 16:51:57 +02:00
|
|
|
|
rex_save = rex;
|
|
|
|
|
rex_in_use = TRUE;
|
|
|
|
|
|
|
|
|
|
rex.reg_match = rmp;
|
|
|
|
|
rex.reg_mmatch = NULL;
|
|
|
|
|
rex.reg_maxline = 0;
|
|
|
|
|
rex.reg_buf = curbuf;
|
|
|
|
|
rex.reg_line_lbr = TRUE;
|
2022-05-30 20:58:55 +01:00
|
|
|
|
result = vim_regsub_both(source, expr, dest, destlen, flags);
|
2016-10-02 16:51:57 +02:00
|
|
|
|
|
|
|
|
|
rex_in_use = rex_in_use_save;
|
|
|
|
|
if (rex_in_use)
|
|
|
|
|
rex = rex_save;
|
|
|
|
|
|
|
|
|
|
return result;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
vim_regsub_multi(
|
|
|
|
|
regmmatch_T *rmp,
|
|
|
|
|
linenr_T lnum,
|
|
|
|
|
char_u *source,
|
|
|
|
|
char_u *dest,
|
2022-05-30 20:58:55 +01:00
|
|
|
|
int destlen,
|
|
|
|
|
int flags)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
int result;
|
|
|
|
|
regexec_T rex_save;
|
|
|
|
|
int rex_in_use_save = rex_in_use;
|
|
|
|
|
|
|
|
|
|
if (rex_in_use)
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Being called recursively, save the state.
|
2016-10-02 16:51:57 +02:00
|
|
|
|
rex_save = rex;
|
|
|
|
|
rex_in_use = TRUE;
|
|
|
|
|
|
|
|
|
|
rex.reg_match = NULL;
|
|
|
|
|
rex.reg_mmatch = rmp;
|
2019-12-05 21:10:38 +01:00
|
|
|
|
rex.reg_buf = curbuf; // always works on the current buffer!
|
2016-10-02 16:51:57 +02:00
|
|
|
|
rex.reg_firstlnum = lnum;
|
|
|
|
|
rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
|
|
|
|
|
rex.reg_line_lbr = FALSE;
|
2022-05-30 20:58:55 +01:00
|
|
|
|
result = vim_regsub_both(source, NULL, dest, destlen, flags);
|
2016-10-02 16:51:57 +02:00
|
|
|
|
|
|
|
|
|
rex_in_use = rex_in_use_save;
|
|
|
|
|
if (rex_in_use)
|
|
|
|
|
rex = rex_save;
|
|
|
|
|
|
|
|
|
|
return result;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2022-06-21 22:15:25 +01:00
|
|
|
|
#if defined(FEAT_EVAL) || defined(PROTO)
|
|
|
|
|
// When nesting more than a couple levels it's probably a mistake.
|
|
|
|
|
# define MAX_REGSUB_NESTING 4
|
|
|
|
|
static char_u *eval_result[MAX_REGSUB_NESTING] = {NULL, NULL, NULL, NULL};
|
|
|
|
|
|
|
|
|
|
# if defined(EXITFREE) || defined(PROTO)
|
|
|
|
|
void
|
|
|
|
|
free_resub_eval_result(void)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_REGSUB_NESTING; ++i)
|
|
|
|
|
VIM_CLEAR(eval_result[i]);
|
|
|
|
|
}
|
|
|
|
|
# endif
|
|
|
|
|
#endif
|
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
static int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
vim_regsub_both(
|
|
|
|
|
char_u *source,
|
2016-07-19 19:10:51 +02:00
|
|
|
|
typval_T *expr,
|
2016-01-30 20:31:25 +01:00
|
|
|
|
char_u *dest,
|
2022-05-30 20:58:55 +01:00
|
|
|
|
int destlen,
|
|
|
|
|
int flags)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
char_u *src;
|
|
|
|
|
char_u *dst;
|
|
|
|
|
char_u *s;
|
|
|
|
|
int c;
|
2006-03-16 21:41:35 +00:00
|
|
|
|
int cc;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
int no = -1;
|
2013-03-19 17:42:15 +01:00
|
|
|
|
fptr_T func_all = (fptr_T)NULL;
|
|
|
|
|
fptr_T func_one = (fptr_T)NULL;
|
2019-12-05 21:10:38 +01:00
|
|
|
|
linenr_T clnum = 0; // init for GCC
|
|
|
|
|
int len = 0; // init for GCC
|
2004-06-13 20:20:40 +00:00
|
|
|
|
#ifdef FEAT_EVAL
|
2022-06-21 22:15:25 +01:00
|
|
|
|
static int nesting = 0;
|
|
|
|
|
int nested;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
#endif
|
2022-05-30 20:58:55 +01:00
|
|
|
|
int copy = flags & REGSUB_COPY;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Be paranoid...
|
2016-07-19 19:10:51 +02:00
|
|
|
|
if ((source == NULL && expr == NULL) || dest == NULL)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2023-05-31 17:12:14 +01:00
|
|
|
|
iemsg(e_null_argument);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
if (prog_magic_wrong())
|
|
|
|
|
return 0;
|
2022-06-21 22:15:25 +01:00
|
|
|
|
#ifdef FEAT_EVAL
|
|
|
|
|
if (nesting == MAX_REGSUB_NESTING)
|
|
|
|
|
{
|
|
|
|
|
emsg(_(e_substitute_nesting_too_deep));
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
nested = nesting;
|
|
|
|
|
#endif
|
2004-06-13 20:20:40 +00:00
|
|
|
|
src = source;
|
|
|
|
|
dst = dest;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* When the substitute part starts with "\=" evaluate it as an expression.
|
|
|
|
|
*/
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (expr != NULL || (source[0] == '\\' && source[1] == '='))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
#ifdef FEAT_EVAL
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// To make sure that the length doesn't change between checking the
|
|
|
|
|
// length and copying the string, and to speed up things, the
|
2022-06-05 16:55:54 +01:00
|
|
|
|
// resulting string is saved from the call with
|
|
|
|
|
// "flags & REGSUB_COPY" == 0 to the call with
|
|
|
|
|
// "flags & REGSUB_COPY" != 0.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
if (copy)
|
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
if (eval_result[nested] != NULL)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
int eval_len = (int)STRLEN(eval_result[nested]);
|
|
|
|
|
|
|
|
|
|
if (eval_len < destlen)
|
|
|
|
|
{
|
|
|
|
|
STRCPY(dest, eval_result[nested]);
|
|
|
|
|
dst += eval_len;
|
|
|
|
|
VIM_CLEAR(eval_result[nested]);
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
int prev_can_f_submatch = can_f_submatch;
|
|
|
|
|
regsubmatch_T rsm_save;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2022-06-21 22:15:25 +01:00
|
|
|
|
VIM_CLEAR(eval_result[nested]);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// The expression may contain substitute(), which calls us
|
|
|
|
|
// recursively. Make sure submatch() gets the text from the first
|
|
|
|
|
// level.
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (can_f_submatch)
|
|
|
|
|
rsm_save = rsm;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
can_f_submatch = TRUE;
|
2016-10-02 16:51:57 +02:00
|
|
|
|
rsm.sm_match = rex.reg_match;
|
|
|
|
|
rsm.sm_mmatch = rex.reg_mmatch;
|
|
|
|
|
rsm.sm_firstlnum = rex.reg_firstlnum;
|
|
|
|
|
rsm.sm_maxline = rex.reg_maxline;
|
|
|
|
|
rsm.sm_line_lbr = rex.reg_line_lbr;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
|
2022-06-21 22:15:25 +01:00
|
|
|
|
// Although unlikely, it is possible that the expression invokes a
|
|
|
|
|
// substitute command (it might fail, but still). Therefore keep
|
2022-06-23 20:46:27 +01:00
|
|
|
|
// an array of eval results.
|
2022-06-21 22:15:25 +01:00
|
|
|
|
++nesting;
|
|
|
|
|
|
2016-07-19 19:10:51 +02:00
|
|
|
|
if (expr != NULL)
|
|
|
|
|
{
|
2016-07-22 21:50:18 +02:00
|
|
|
|
typval_T argv[2];
|
2016-07-19 19:10:51 +02:00
|
|
|
|
char_u buf[NUMBUFLEN];
|
|
|
|
|
typval_T rettv;
|
2016-07-22 21:50:18 +02:00
|
|
|
|
staticList10_T matchList;
|
2019-08-03 18:17:11 +02:00
|
|
|
|
funcexe_T funcexe;
|
2016-07-19 19:10:51 +02:00
|
|
|
|
|
|
|
|
|
rettv.v_type = VAR_STRING;
|
|
|
|
|
rettv.vval.v_string = NULL;
|
2016-10-02 16:51:57 +02:00
|
|
|
|
argv[0].v_type = VAR_LIST;
|
|
|
|
|
argv[0].vval.v_list = &matchList.sl_list;
|
|
|
|
|
matchList.sl_list.lv_len = 0;
|
2020-04-12 19:37:17 +02:00
|
|
|
|
CLEAR_FIELD(funcexe);
|
2021-12-13 14:26:44 +00:00
|
|
|
|
funcexe.fe_argv_func = fill_submatch_list;
|
|
|
|
|
funcexe.fe_evaluate = TRUE;
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (expr->v_type == VAR_FUNC)
|
2016-07-19 19:10:51 +02:00
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
s = expr->vval.v_string;
|
2019-08-03 18:17:11 +02:00
|
|
|
|
call_func(s, -1, &rettv, 1, argv, &funcexe);
|
2016-07-19 19:10:51 +02:00
|
|
|
|
}
|
2016-10-02 16:51:57 +02:00
|
|
|
|
else if (expr->v_type == VAR_PARTIAL)
|
2016-07-19 19:10:51 +02:00
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
partial_T *partial = expr->vval.v_partial;
|
2016-07-19 19:10:51 +02:00
|
|
|
|
|
2016-10-02 16:51:57 +02:00
|
|
|
|
s = partial_name(partial);
|
2021-12-13 14:26:44 +00:00
|
|
|
|
funcexe.fe_partial = partial;
|
2019-08-03 18:17:11 +02:00
|
|
|
|
call_func(s, -1, &rettv, 1, argv, &funcexe);
|
2016-07-19 19:10:51 +02:00
|
|
|
|
}
|
2022-05-05 13:53:03 +01:00
|
|
|
|
else if (expr->v_type == VAR_INSTR)
|
|
|
|
|
{
|
|
|
|
|
exe_typval_instr(expr, &rettv);
|
|
|
|
|
}
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (matchList.sl_list.lv_len > 0)
|
2019-11-10 00:13:50 +01:00
|
|
|
|
// fill_submatch_list() was called
|
2016-10-02 16:51:57 +02:00
|
|
|
|
clear_submatch_list(&matchList);
|
|
|
|
|
|
2019-11-10 00:13:50 +01:00
|
|
|
|
if (rettv.v_type == VAR_UNKNOWN)
|
|
|
|
|
// something failed, no need to report another error
|
2022-06-21 22:15:25 +01:00
|
|
|
|
eval_result[nested] = NULL;
|
2019-11-10 00:13:50 +01:00
|
|
|
|
else
|
|
|
|
|
{
|
2022-06-21 22:15:25 +01:00
|
|
|
|
eval_result[nested] = tv_get_string_buf_chk(&rettv, buf);
|
|
|
|
|
if (eval_result[nested] != NULL)
|
|
|
|
|
eval_result[nested] = vim_strsave(eval_result[nested]);
|
2019-11-10 00:13:50 +01:00
|
|
|
|
}
|
2016-07-22 21:50:18 +02:00
|
|
|
|
clear_tv(&rettv);
|
2016-07-19 19:10:51 +02:00
|
|
|
|
}
|
2021-04-19 16:48:48 +02:00
|
|
|
|
else if (substitute_instr != NULL)
|
|
|
|
|
// Execute instructions from ISN_SUBSTITUTE.
|
2022-06-21 22:15:25 +01:00
|
|
|
|
eval_result[nested] = exe_substitute_instr();
|
2016-07-19 19:10:51 +02:00
|
|
|
|
else
|
2022-10-01 19:43:52 +01:00
|
|
|
|
eval_result[nested] = eval_to_string(source + 2, TRUE, FALSE);
|
2022-06-21 22:15:25 +01:00
|
|
|
|
--nesting;
|
2016-07-19 19:10:51 +02:00
|
|
|
|
|
2022-06-21 22:15:25 +01:00
|
|
|
|
if (eval_result[nested] != NULL)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2010-03-23 16:27:22 +01:00
|
|
|
|
int had_backslash = FALSE;
|
|
|
|
|
|
2022-06-21 22:15:25 +01:00
|
|
|
|
for (s = eval_result[nested]; *s != NUL; MB_PTR_ADV(s))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Change NL to CR, so that it becomes a line break,
|
|
|
|
|
// unless called from vim_regexec_nl().
|
|
|
|
|
// Skip over a backslashed character.
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (*s == NL && !rsm.sm_line_lbr)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*s = CAR;
|
|
|
|
|
else if (*s == '\\' && s[1] != NUL)
|
2010-03-23 16:27:22 +01:00
|
|
|
|
{
|
2004-06-13 20:20:40 +00:00
|
|
|
|
++s;
|
2010-05-21 13:08:58 +02:00
|
|
|
|
/* Change NL to CR here too, so that this works:
|
|
|
|
|
* :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
|
|
|
|
|
* abc\
|
|
|
|
|
* def
|
2011-06-19 04:32:15 +02:00
|
|
|
|
* Not when called from vim_regexec_nl().
|
2010-05-21 13:08:58 +02:00
|
|
|
|
*/
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (*s == NL && !rsm.sm_line_lbr)
|
2010-05-21 13:08:58 +02:00
|
|
|
|
*s = CAR;
|
2010-03-23 16:27:22 +01:00
|
|
|
|
had_backslash = TRUE;
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-05-30 20:58:55 +01:00
|
|
|
|
if (had_backslash && (flags & REGSUB_BACKSLASH))
|
2010-03-23 16:27:22 +01:00
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Backslashes will be consumed, need to double them.
|
2022-06-21 22:15:25 +01:00
|
|
|
|
s = vim_strsave_escaped(eval_result[nested], (char_u *)"\\");
|
2010-03-23 16:27:22 +01:00
|
|
|
|
if (s != NULL)
|
|
|
|
|
{
|
2022-06-21 22:15:25 +01:00
|
|
|
|
vim_free(eval_result[nested]);
|
|
|
|
|
eval_result[nested] = s;
|
2010-03-23 16:27:22 +01:00
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2022-06-21 22:15:25 +01:00
|
|
|
|
dst += STRLEN(eval_result[nested]);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-10-02 16:51:57 +02:00
|
|
|
|
can_f_submatch = prev_can_f_submatch;
|
|
|
|
|
if (can_f_submatch)
|
|
|
|
|
rsm = rsm_save;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
while ((c = *src++) != NUL)
|
|
|
|
|
{
|
2022-05-30 20:58:55 +01:00
|
|
|
|
if (c == '&' && (flags & REGSUB_MAGIC))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
no = 0;
|
|
|
|
|
else if (c == '\\' && *src != NUL)
|
|
|
|
|
{
|
2022-05-30 20:58:55 +01:00
|
|
|
|
if (*src == '&' && !(flags & REGSUB_MAGIC))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
++src;
|
|
|
|
|
no = 0;
|
|
|
|
|
}
|
|
|
|
|
else if ('0' <= *src && *src <= '9')
|
|
|
|
|
{
|
|
|
|
|
no = *src++ - '0';
|
|
|
|
|
}
|
|
|
|
|
else if (vim_strchr((char_u *)"uUlLeE", *src))
|
|
|
|
|
{
|
|
|
|
|
switch (*src++)
|
|
|
|
|
{
|
2023-09-18 19:51:56 +02:00
|
|
|
|
case 'u': func_one = do_upper;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
continue;
|
2023-09-18 19:51:56 +02:00
|
|
|
|
case 'U': func_all = do_upper;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
continue;
|
2023-09-18 19:51:56 +02:00
|
|
|
|
case 'l': func_one = do_lower;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
continue;
|
2023-09-18 19:51:56 +02:00
|
|
|
|
case 'L': func_all = do_lower;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
continue;
|
|
|
|
|
case 'e':
|
2013-03-19 17:42:15 +01:00
|
|
|
|
case 'E': func_one = func_all = (fptr_T)NULL;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-12-05 21:10:38 +01:00
|
|
|
|
if (no < 0) // Ordinary character.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2006-03-23 22:59:57 +00:00
|
|
|
|
if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
|
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Copy a special key as-is.
|
2006-03-23 22:59:57 +00:00
|
|
|
|
if (copy)
|
|
|
|
|
{
|
2022-05-30 20:58:55 +01:00
|
|
|
|
if (dst + 3 > dest + destlen)
|
|
|
|
|
{
|
|
|
|
|
iemsg("vim_regsub_both(): not enough space");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2006-03-23 22:59:57 +00:00
|
|
|
|
*dst++ = c;
|
|
|
|
|
*dst++ = *src++;
|
|
|
|
|
*dst++ = *src++;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
dst += 3;
|
|
|
|
|
src += 2;
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
if (c == '\\' && *src != NUL)
|
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Check for abbreviations -- webb
|
2004-06-13 20:20:40 +00:00
|
|
|
|
switch (*src)
|
|
|
|
|
{
|
|
|
|
|
case 'r': c = CAR; ++src; break;
|
|
|
|
|
case 'n': c = NL; ++src; break;
|
|
|
|
|
case 't': c = TAB; ++src; break;
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Oh no! \e already has meaning in subst pat :-(
|
|
|
|
|
// case 'e': c = ESC; ++src; break;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
case 'b': c = Ctrl_H; ++src; break;
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// If "backslash" is TRUE the backslash will be removed
|
|
|
|
|
// later. Used to insert a literal CR.
|
2022-05-30 20:58:55 +01:00
|
|
|
|
default: if (flags & REGSUB_BACKSLASH)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
if (copy)
|
2022-05-30 20:58:55 +01:00
|
|
|
|
{
|
|
|
|
|
if (dst + 1 > dest + destlen)
|
|
|
|
|
{
|
|
|
|
|
iemsg("vim_regsub_both(): not enough space");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*dst = '\\';
|
2022-05-30 20:58:55 +01:00
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
++dst;
|
|
|
|
|
}
|
|
|
|
|
c = *src++;
|
|
|
|
|
}
|
|
|
|
|
}
|
2006-03-23 22:59:57 +00:00
|
|
|
|
else if (has_mbyte)
|
2006-03-16 21:41:35 +00:00
|
|
|
|
c = mb_ptr2char(src - 1);
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Write to buffer, if copy is set.
|
2013-03-19 17:42:15 +01:00
|
|
|
|
if (func_one != (fptr_T)NULL)
|
2023-09-18 19:51:56 +02:00
|
|
|
|
{
|
|
|
|
|
func_one(&cc, c);
|
|
|
|
|
func_one = NULL;
|
|
|
|
|
}
|
2013-03-19 17:42:15 +01:00
|
|
|
|
else if (func_all != (fptr_T)NULL)
|
2023-09-18 19:51:56 +02:00
|
|
|
|
func_all(&cc, c);
|
2019-12-05 21:10:38 +01:00
|
|
|
|
else // just copy
|
2013-03-19 17:42:15 +01:00
|
|
|
|
cc = c;
|
2006-03-16 21:41:35 +00:00
|
|
|
|
|
|
|
|
|
if (has_mbyte)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2010-07-12 22:42:33 +02:00
|
|
|
|
int totlen = mb_ptr2len(src - 1);
|
2022-05-30 20:58:55 +01:00
|
|
|
|
int charlen = mb_char2len(cc);
|
2010-07-12 22:42:33 +02:00
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
if (copy)
|
2022-05-30 20:58:55 +01:00
|
|
|
|
{
|
|
|
|
|
if (dst + charlen > dest + destlen)
|
|
|
|
|
{
|
|
|
|
|
iemsg("vim_regsub_both(): not enough space");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2006-03-16 21:41:35 +00:00
|
|
|
|
mb_char2bytes(cc, dst);
|
2022-05-30 20:58:55 +01:00
|
|
|
|
}
|
|
|
|
|
dst += charlen - 1;
|
2010-07-12 22:42:33 +02:00
|
|
|
|
if (enc_utf8)
|
|
|
|
|
{
|
|
|
|
|
int clen = utf_ptr2len(src - 1);
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// If the character length is shorter than "totlen", there
|
|
|
|
|
// are composing characters; copy them as-is.
|
2010-07-12 22:42:33 +02:00
|
|
|
|
if (clen < totlen)
|
|
|
|
|
{
|
|
|
|
|
if (copy)
|
2022-05-30 20:58:55 +01:00
|
|
|
|
{
|
|
|
|
|
if (dst + totlen - clen > dest + destlen)
|
|
|
|
|
{
|
|
|
|
|
iemsg("vim_regsub_both(): not enough space");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2010-07-12 22:42:33 +02:00
|
|
|
|
mch_memmove(dst + 1, src - 1 + clen,
|
|
|
|
|
(size_t)(totlen - clen));
|
2022-05-30 20:58:55 +01:00
|
|
|
|
}
|
2010-07-12 22:42:33 +02:00
|
|
|
|
dst += totlen - clen;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
src += totlen - 1;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
2019-01-24 16:39:02 +01:00
|
|
|
|
else if (copy)
|
2022-05-30 20:58:55 +01:00
|
|
|
|
{
|
|
|
|
|
if (dst + 1 > dest + destlen)
|
|
|
|
|
{
|
|
|
|
|
iemsg("vim_regsub_both(): not enough space");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
*dst = cc;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
dst++;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (REG_MULTI)
|
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
clnum = rex.reg_mmatch->startpos[no].lnum;
|
|
|
|
|
if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
s = NULL;
|
|
|
|
|
else
|
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
|
|
|
|
|
if (rex.reg_mmatch->endpos[no].lnum == clnum)
|
|
|
|
|
len = rex.reg_mmatch->endpos[no].col
|
|
|
|
|
- rex.reg_mmatch->startpos[no].col;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
else
|
2024-05-12 00:07:17 +02:00
|
|
|
|
len = (int)reg_getline_len(clnum) - rex.reg_mmatch->startpos[no].col;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
s = rex.reg_match->startp[no];
|
|
|
|
|
if (rex.reg_match->endp[no] == NULL)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
s = NULL;
|
|
|
|
|
else
|
2016-10-02 16:51:57 +02:00
|
|
|
|
len = (int)(rex.reg_match->endp[no] - s);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
if (s != NULL)
|
|
|
|
|
{
|
|
|
|
|
for (;;)
|
|
|
|
|
{
|
|
|
|
|
if (len == 0)
|
|
|
|
|
{
|
|
|
|
|
if (REG_MULTI)
|
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (rex.reg_mmatch->endpos[no].lnum == clnum)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
break;
|
|
|
|
|
if (copy)
|
2022-05-30 20:58:55 +01:00
|
|
|
|
{
|
|
|
|
|
if (dst + 1 > dest + destlen)
|
|
|
|
|
{
|
|
|
|
|
iemsg("vim_regsub_both(): not enough space");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
*dst = CAR;
|
2022-05-30 20:58:55 +01:00
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
++dst;
|
|
|
|
|
s = reg_getline(++clnum);
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (rex.reg_mmatch->endpos[no].lnum == clnum)
|
|
|
|
|
len = rex.reg_mmatch->endpos[no].col;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
else
|
2024-05-12 00:07:17 +02:00
|
|
|
|
len = (int)reg_getline_len(clnum);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
break;
|
|
|
|
|
}
|
2019-12-05 21:10:38 +01:00
|
|
|
|
else if (*s == NUL) // we hit NUL.
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
if (copy)
|
2023-05-31 17:12:14 +01:00
|
|
|
|
iemsg(e_damaged_match_string);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
goto exit;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2022-05-30 20:58:55 +01:00
|
|
|
|
if ((flags & REGSUB_BACKSLASH)
|
|
|
|
|
&& (*s == CAR || *s == '\\'))
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* Insert a backslash in front of a CR, otherwise
|
|
|
|
|
* it will be replaced by a line break.
|
|
|
|
|
* Number of backslashes will be halved later,
|
|
|
|
|
* double them here.
|
|
|
|
|
*/
|
|
|
|
|
if (copy)
|
|
|
|
|
{
|
2022-05-30 20:58:55 +01:00
|
|
|
|
if (dst + 2 > dest + destlen)
|
|
|
|
|
{
|
|
|
|
|
iemsg("vim_regsub_both(): not enough space");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
dst[0] = '\\';
|
|
|
|
|
dst[1] = *s;
|
|
|
|
|
}
|
|
|
|
|
dst += 2;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2006-03-16 21:41:35 +00:00
|
|
|
|
if (has_mbyte)
|
|
|
|
|
c = mb_ptr2char(s);
|
|
|
|
|
else
|
|
|
|
|
c = *s;
|
|
|
|
|
|
2013-03-19 17:42:15 +01:00
|
|
|
|
if (func_one != (fptr_T)NULL)
|
2023-09-18 19:51:56 +02:00
|
|
|
|
{
|
|
|
|
|
func_one(&cc, c);
|
|
|
|
|
func_one = NULL;
|
|
|
|
|
}
|
2013-03-19 17:42:15 +01:00
|
|
|
|
else if (func_all != (fptr_T)NULL)
|
2023-09-18 19:51:56 +02:00
|
|
|
|
func_all(&cc, c);
|
2019-12-05 21:10:38 +01:00
|
|
|
|
else // just copy
|
2013-03-19 17:42:15 +01:00
|
|
|
|
cc = c;
|
2006-03-16 21:41:35 +00:00
|
|
|
|
|
|
|
|
|
if (has_mbyte)
|
|
|
|
|
{
|
2007-07-30 20:32:53 +00:00
|
|
|
|
int l;
|
2022-05-30 20:58:55 +01:00
|
|
|
|
int charlen;
|
2007-07-30 20:32:53 +00:00
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Copy composing characters separately, one
|
|
|
|
|
// at a time.
|
2007-07-30 20:32:53 +00:00
|
|
|
|
if (enc_utf8)
|
|
|
|
|
l = utf_ptr2len(s) - 1;
|
|
|
|
|
else
|
|
|
|
|
l = mb_ptr2len(s) - 1;
|
2006-03-16 21:41:35 +00:00
|
|
|
|
|
|
|
|
|
s += l;
|
|
|
|
|
len -= l;
|
2022-05-30 20:58:55 +01:00
|
|
|
|
charlen = mb_char2len(cc);
|
2006-03-16 21:41:35 +00:00
|
|
|
|
if (copy)
|
2022-05-30 20:58:55 +01:00
|
|
|
|
{
|
|
|
|
|
if (dst + charlen > dest + destlen)
|
|
|
|
|
{
|
|
|
|
|
iemsg("vim_regsub_both(): not enough space");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2006-03-16 21:41:35 +00:00
|
|
|
|
mb_char2bytes(cc, dst);
|
2022-05-30 20:58:55 +01:00
|
|
|
|
}
|
|
|
|
|
dst += charlen - 1;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
2019-01-24 16:39:02 +01:00
|
|
|
|
else if (copy)
|
2022-05-30 20:58:55 +01:00
|
|
|
|
{
|
|
|
|
|
if (dst + 1 > dest + destlen)
|
|
|
|
|
{
|
|
|
|
|
iemsg("vim_regsub_both(): not enough space");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
*dst = cc;
|
|
|
|
|
}
|
2006-03-16 21:41:35 +00:00
|
|
|
|
dst++;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
2006-03-16 21:41:35 +00:00
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
++s;
|
|
|
|
|
--len;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
no = -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (copy)
|
|
|
|
|
*dst = NUL;
|
|
|
|
|
|
|
|
|
|
exit:
|
|
|
|
|
return (int)((dst - dest) + 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef FEAT_EVAL
|
2024-05-12 00:07:17 +02:00
|
|
|
|
|
2009-11-25 18:51:24 +00:00
|
|
|
|
static char_u *
|
2016-01-30 20:31:25 +01:00
|
|
|
|
reg_getline_submatch(linenr_T lnum)
|
2009-11-25 18:51:24 +00:00
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
char_u *line;
|
|
|
|
|
|
|
|
|
|
reg_getline_common(lnum, RGLF_LINE | RGLF_SUBMATCH, &line, NULL);
|
|
|
|
|
|
|
|
|
|
return line;
|
|
|
|
|
}
|
2009-11-25 18:51:24 +00:00
|
|
|
|
|
2024-05-12 00:07:17 +02:00
|
|
|
|
static colnr_T
|
|
|
|
|
reg_getline_submatch_len(linenr_T lnum)
|
|
|
|
|
{
|
|
|
|
|
colnr_T length;
|
2009-11-25 18:51:24 +00:00
|
|
|
|
|
2024-05-12 00:07:17 +02:00
|
|
|
|
reg_getline_common(lnum, RGLF_LENGTH | RGLF_SUBMATCH, NULL, &length);
|
2009-11-25 18:51:24 +00:00
|
|
|
|
|
2024-05-12 00:07:17 +02:00
|
|
|
|
return length;
|
2009-11-25 18:51:24 +00:00
|
|
|
|
}
|
|
|
|
|
|
2004-06-13 20:20:40 +00:00
|
|
|
|
/*
|
2007-05-10 18:00:30 +00:00
|
|
|
|
* Used for the submatch() function: get the string from the n'th submatch in
|
2004-06-13 20:20:40 +00:00
|
|
|
|
* allocated memory.
|
|
|
|
|
* Returns NULL when not in a ":s" command and for a non-existing submatch.
|
|
|
|
|
*/
|
|
|
|
|
char_u *
|
2016-01-30 20:31:25 +01:00
|
|
|
|
reg_submatch(int no)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
char_u *retval = NULL;
|
|
|
|
|
char_u *s;
|
|
|
|
|
int len;
|
|
|
|
|
int round;
|
|
|
|
|
linenr_T lnum;
|
|
|
|
|
|
2006-04-22 22:33:57 +00:00
|
|
|
|
if (!can_f_submatch || no < 0)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
return NULL;
|
|
|
|
|
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (rsm.sm_match == NULL)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* First round: compute the length and allocate memory.
|
|
|
|
|
* Second round: copy the text.
|
|
|
|
|
*/
|
|
|
|
|
for (round = 1; round <= 2; ++round)
|
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
lnum = rsm.sm_mmatch->startpos[no].lnum;
|
|
|
|
|
if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
return NULL;
|
|
|
|
|
|
2019-03-20 21:18:34 +01:00
|
|
|
|
s = reg_getline_submatch(lnum);
|
|
|
|
|
if (s == NULL) // anti-crash check, cannot happen?
|
2004-06-13 20:20:40 +00:00
|
|
|
|
break;
|
2019-03-20 21:18:34 +01:00
|
|
|
|
s += rsm.sm_mmatch->startpos[no].col;
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (rsm.sm_mmatch->endpos[no].lnum == lnum)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Within one line: take form start to end col.
|
2016-10-02 16:51:57 +02:00
|
|
|
|
len = rsm.sm_mmatch->endpos[no].col
|
|
|
|
|
- rsm.sm_mmatch->startpos[no].col;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
if (round == 2)
|
2005-07-18 21:47:53 +00:00
|
|
|
|
vim_strncpy(retval, s, len);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
++len;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Multiple lines: take start line from start col, middle
|
|
|
|
|
// lines completely and end line up to end col.
|
2024-05-12 00:07:17 +02:00
|
|
|
|
len = (int)reg_getline_submatch_len(lnum) - rsm.sm_mmatch->startpos[no].col;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
if (round == 2)
|
|
|
|
|
{
|
|
|
|
|
STRCPY(retval, s);
|
|
|
|
|
retval[len] = '\n';
|
|
|
|
|
}
|
|
|
|
|
++len;
|
|
|
|
|
++lnum;
|
2016-10-02 16:51:57 +02:00
|
|
|
|
while (lnum < rsm.sm_mmatch->endpos[no].lnum)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
s = reg_getline_submatch(lnum);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
if (round == 2)
|
|
|
|
|
STRCPY(retval + len, s);
|
2024-05-12 00:07:17 +02:00
|
|
|
|
len += (int)reg_getline_submatch_len(lnum);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
if (round == 2)
|
|
|
|
|
retval[len] = '\n';
|
|
|
|
|
++len;
|
2024-05-12 00:07:17 +02:00
|
|
|
|
++lnum;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
if (round == 2)
|
2009-11-25 18:51:24 +00:00
|
|
|
|
STRNCPY(retval + len, reg_getline_submatch(lnum),
|
2016-10-02 16:51:57 +02:00
|
|
|
|
rsm.sm_mmatch->endpos[no].col);
|
|
|
|
|
len += rsm.sm_mmatch->endpos[no].col;
|
2004-06-13 20:20:40 +00:00
|
|
|
|
if (round == 2)
|
|
|
|
|
retval[len] = NUL;
|
|
|
|
|
++len;
|
|
|
|
|
}
|
|
|
|
|
|
2006-04-22 22:33:57 +00:00
|
|
|
|
if (retval == NULL)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
{
|
2019-05-24 19:39:03 +02:00
|
|
|
|
retval = alloc(len);
|
2006-04-22 22:33:57 +00:00
|
|
|
|
if (retval == NULL)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
s = rsm.sm_match->startp[no];
|
|
|
|
|
if (s == NULL || rsm.sm_match->endp[no] == NULL)
|
2004-06-13 20:20:40 +00:00
|
|
|
|
retval = NULL;
|
|
|
|
|
else
|
2020-06-12 22:59:11 +02:00
|
|
|
|
retval = vim_strnsave(s, rsm.sm_match->endp[no] - s);
|
2004-06-13 20:20:40 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return retval;
|
|
|
|
|
}
|
2014-04-02 19:00:58 +02:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Used for the submatch() function with the optional non-zero argument: get
|
|
|
|
|
* the list of strings from the n'th submatch in allocated memory with NULs
|
|
|
|
|
* represented in NLs.
|
|
|
|
|
* Returns a list of allocated strings. Returns NULL when not in a ":s"
|
|
|
|
|
* command, for a non-existing submatch and for any error.
|
|
|
|
|
*/
|
|
|
|
|
list_T *
|
2016-01-30 20:31:25 +01:00
|
|
|
|
reg_submatch_list(int no)
|
2014-04-02 19:00:58 +02:00
|
|
|
|
{
|
|
|
|
|
char_u *s;
|
|
|
|
|
linenr_T slnum;
|
|
|
|
|
linenr_T elnum;
|
|
|
|
|
colnr_T scol;
|
|
|
|
|
colnr_T ecol;
|
|
|
|
|
int i;
|
|
|
|
|
list_T *list;
|
|
|
|
|
int error = FALSE;
|
|
|
|
|
|
|
|
|
|
if (!can_f_submatch || no < 0)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (rsm.sm_match == NULL)
|
2014-04-02 19:00:58 +02:00
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
slnum = rsm.sm_mmatch->startpos[no].lnum;
|
|
|
|
|
elnum = rsm.sm_mmatch->endpos[no].lnum;
|
2014-04-02 19:00:58 +02:00
|
|
|
|
if (slnum < 0 || elnum < 0)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
2016-10-02 16:51:57 +02:00
|
|
|
|
scol = rsm.sm_mmatch->startpos[no].col;
|
|
|
|
|
ecol = rsm.sm_mmatch->endpos[no].col;
|
2014-04-02 19:00:58 +02:00
|
|
|
|
|
|
|
|
|
list = list_alloc();
|
|
|
|
|
if (list == NULL)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
s = reg_getline_submatch(slnum) + scol;
|
|
|
|
|
if (slnum == elnum)
|
|
|
|
|
{
|
|
|
|
|
if (list_append_string(list, s, ecol - scol) == FAIL)
|
|
|
|
|
error = TRUE;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2024-05-12 00:07:17 +02:00
|
|
|
|
int max_lnum = elnum - slnum;
|
|
|
|
|
|
2014-04-02 19:00:58 +02:00
|
|
|
|
if (list_append_string(list, s, -1) == FAIL)
|
|
|
|
|
error = TRUE;
|
2024-05-12 00:07:17 +02:00
|
|
|
|
for (i = 1; i < max_lnum; i++)
|
2014-04-02 19:00:58 +02:00
|
|
|
|
{
|
|
|
|
|
s = reg_getline_submatch(slnum + i);
|
|
|
|
|
if (list_append_string(list, s, -1) == FAIL)
|
|
|
|
|
error = TRUE;
|
|
|
|
|
}
|
|
|
|
|
s = reg_getline_submatch(elnum);
|
|
|
|
|
if (list_append_string(list, s, ecol) == FAIL)
|
|
|
|
|
error = TRUE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
s = rsm.sm_match->startp[no];
|
|
|
|
|
if (s == NULL || rsm.sm_match->endp[no] == NULL)
|
2014-04-02 19:00:58 +02:00
|
|
|
|
return NULL;
|
|
|
|
|
list = list_alloc();
|
|
|
|
|
if (list == NULL)
|
|
|
|
|
return NULL;
|
|
|
|
|
if (list_append_string(list, s,
|
2016-10-02 16:51:57 +02:00
|
|
|
|
(int)(rsm.sm_match->endp[no] - s)) == FAIL)
|
2014-04-02 19:00:58 +02:00
|
|
|
|
error = TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (error)
|
|
|
|
|
{
|
2016-04-08 17:07:19 +02:00
|
|
|
|
list_free(list);
|
2014-04-02 19:00:58 +02:00
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2020-09-06 15:14:45 +02:00
|
|
|
|
++list->lv_refcount;
|
2014-04-02 19:00:58 +02:00
|
|
|
|
return list;
|
|
|
|
|
}
|
2004-06-13 20:20:40 +00:00
|
|
|
|
#endif
|
2013-05-19 19:40:29 +02:00
|
|
|
|
|
2020-02-15 23:06:45 +01:00
|
|
|
|
/*
|
|
|
|
|
* Initialize the values used for matching against multiple lines
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
init_regexec_multi(
|
|
|
|
|
regmmatch_T *rmp,
|
|
|
|
|
win_T *win, // window in which to search or NULL
|
|
|
|
|
buf_T *buf, // buffer in which to search
|
|
|
|
|
linenr_T lnum) // nr of line to start looking for match
|
|
|
|
|
{
|
|
|
|
|
rex.reg_match = NULL;
|
|
|
|
|
rex.reg_mmatch = rmp;
|
|
|
|
|
rex.reg_buf = buf;
|
|
|
|
|
rex.reg_win = win;
|
|
|
|
|
rex.reg_firstlnum = lnum;
|
|
|
|
|
rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
|
|
|
|
|
rex.reg_line_lbr = FALSE;
|
|
|
|
|
rex.reg_ic = rmp->rmm_ic;
|
|
|
|
|
rex.reg_icombine = FALSE;
|
|
|
|
|
rex.reg_maxcol = rmp->rmm_maxcol;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
#include "regexp_bt.c"
|
|
|
|
|
|
2013-05-19 19:40:29 +02:00
|
|
|
|
static regengine_T bt_regengine =
|
|
|
|
|
{
|
|
|
|
|
bt_regcomp,
|
2013-06-08 18:19:48 +02:00
|
|
|
|
bt_regfree,
|
2013-05-19 19:40:29 +02:00
|
|
|
|
bt_regexec_nl,
|
2024-01-04 22:54:08 +01:00
|
|
|
|
bt_regexec_multi
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
|
,(char_u *)""
|
|
|
|
|
#endif
|
2013-05-19 19:40:29 +02:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#include "regexp_nfa.c"
|
|
|
|
|
|
|
|
|
|
static regengine_T nfa_regengine =
|
|
|
|
|
{
|
|
|
|
|
nfa_regcomp,
|
2013-06-08 18:19:48 +02:00
|
|
|
|
nfa_regfree,
|
2013-05-19 19:40:29 +02:00
|
|
|
|
nfa_regexec_nl,
|
2024-01-04 22:54:08 +01:00
|
|
|
|
nfa_regexec_multi
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
|
,(char_u *)""
|
|
|
|
|
#endif
|
2013-05-19 19:40:29 +02:00
|
|
|
|
};
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Which regexp engine to use? Needed for vim_regcomp().
|
|
|
|
|
// Must match with 'regexpengine'.
|
2013-05-19 19:40:29 +02:00
|
|
|
|
static int regexp_engine = 0;
|
2014-11-05 14:27:36 +01:00
|
|
|
|
|
2013-05-19 19:40:29 +02:00
|
|
|
|
#ifdef DEBUG
|
|
|
|
|
static char_u regname[][30] = {
|
|
|
|
|
"AUTOMATIC Regexp Engine",
|
2013-05-29 18:45:11 +02:00
|
|
|
|
"BACKTRACKING Regexp Engine",
|
2013-05-19 19:40:29 +02:00
|
|
|
|
"NFA Regexp Engine"
|
|
|
|
|
};
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Compile a regular expression into internal code.
|
2013-06-08 18:19:48 +02:00
|
|
|
|
* Returns the program in allocated memory.
|
|
|
|
|
* Use vim_regfree() to free the memory.
|
|
|
|
|
* Returns NULL for an error.
|
2013-05-19 19:40:29 +02:00
|
|
|
|
*/
|
|
|
|
|
regprog_T *
|
2016-01-30 20:31:25 +01:00
|
|
|
|
vim_regcomp(char_u *expr_arg, int re_flags)
|
2013-05-19 19:40:29 +02:00
|
|
|
|
{
|
|
|
|
|
regprog_T *prog = NULL;
|
|
|
|
|
char_u *expr = expr_arg;
|
2019-12-23 22:59:18 +01:00
|
|
|
|
int called_emsg_before;
|
2013-05-19 19:40:29 +02:00
|
|
|
|
|
|
|
|
|
regexp_engine = p_re;
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Check for prefix "\%#=", that sets the regexp engine
|
2013-05-19 19:40:29 +02:00
|
|
|
|
if (STRNCMP(expr, "\\%#=", 4) == 0)
|
|
|
|
|
{
|
|
|
|
|
int newengine = expr[4] - '0';
|
|
|
|
|
|
|
|
|
|
if (newengine == AUTOMATIC_ENGINE
|
|
|
|
|
|| newengine == BACKTRACKING_ENGINE
|
|
|
|
|
|| newengine == NFA_ENGINE)
|
|
|
|
|
{
|
|
|
|
|
regexp_engine = expr[4] - '0';
|
|
|
|
|
expr += 5;
|
|
|
|
|
#ifdef DEBUG
|
2019-01-13 23:38:42 +01:00
|
|
|
|
smsg("New regexp mode selected (%d): %s",
|
2014-05-13 16:46:32 +02:00
|
|
|
|
regexp_engine, regname[newengine]);
|
2013-05-19 19:40:29 +02:00
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2022-01-05 17:49:15 +00:00
|
|
|
|
emsg(_(e_percent_hash_can_only_be_followed_by_zero_one_two_automatic_engine_will_be_used));
|
2013-05-19 19:40:29 +02:00
|
|
|
|
regexp_engine = AUTOMATIC_ENGINE;
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-07-17 05:43:58 +02:00
|
|
|
|
#ifdef DEBUG
|
2013-05-19 19:40:29 +02:00
|
|
|
|
bt_regengine.expr = expr;
|
|
|
|
|
nfa_regengine.expr = expr;
|
2018-07-17 05:43:58 +02:00
|
|
|
|
#endif
|
2019-02-16 18:07:57 +01:00
|
|
|
|
// reg_iswordc() uses rex.reg_buf
|
|
|
|
|
rex.reg_buf = curbuf;
|
2013-05-19 19:40:29 +02:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* First try the NFA engine, unless backtracking was requested.
|
|
|
|
|
*/
|
2019-12-23 22:59:18 +01:00
|
|
|
|
called_emsg_before = called_emsg;
|
2013-05-19 19:40:29 +02:00
|
|
|
|
if (regexp_engine != BACKTRACKING_ENGINE)
|
2018-02-10 18:45:26 +01:00
|
|
|
|
prog = nfa_regengine.regcomp(expr,
|
2015-01-27 12:59:55 +01:00
|
|
|
|
re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
|
2013-05-19 19:40:29 +02:00
|
|
|
|
else
|
|
|
|
|
prog = bt_regengine.regcomp(expr, re_flags);
|
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Check for error compiling regexp with initial engine.
|
2014-11-05 14:27:36 +01:00
|
|
|
|
if (prog == NULL)
|
2013-05-19 19:40:29 +02:00
|
|
|
|
{
|
2013-05-20 21:49:13 +02:00
|
|
|
|
#ifdef BT_REGEXP_DEBUG_LOG
|
2021-01-02 17:43:49 +01:00
|
|
|
|
if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine
|
2013-05-19 19:40:29 +02:00
|
|
|
|
{
|
|
|
|
|
FILE *f;
|
2013-05-20 21:49:13 +02:00
|
|
|
|
f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
|
2013-05-19 19:40:29 +02:00
|
|
|
|
if (f)
|
|
|
|
|
{
|
2013-06-05 21:42:53 +02:00
|
|
|
|
fprintf(f, "Syntax error in \"%s\"\n", expr);
|
2013-05-19 19:40:29 +02:00
|
|
|
|
fclose(f);
|
|
|
|
|
}
|
|
|
|
|
else
|
2019-01-13 23:38:42 +01:00
|
|
|
|
semsg("(NFA) Could not open \"%s\" to write !!!",
|
2018-02-10 18:45:26 +01:00
|
|
|
|
BT_REGEXP_DEBUG_LOG_NAME);
|
2013-05-19 19:40:29 +02:00
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
/*
|
2014-11-05 14:27:36 +01:00
|
|
|
|
* If the NFA engine failed, try the backtracking engine.
|
2015-01-27 12:59:55 +01:00
|
|
|
|
* The NFA engine also fails for patterns that it can't handle well
|
|
|
|
|
* but are still valid patterns, thus a retry should work.
|
2019-02-22 17:29:43 +01:00
|
|
|
|
* But don't try if an error message was given.
|
2015-01-27 12:59:55 +01:00
|
|
|
|
*/
|
2019-12-23 22:59:18 +01:00
|
|
|
|
if (regexp_engine == AUTOMATIC_ENGINE
|
|
|
|
|
&& called_emsg == called_emsg_before)
|
2014-11-05 14:27:36 +01:00
|
|
|
|
{
|
2015-01-27 12:59:55 +01:00
|
|
|
|
regexp_engine = BACKTRACKING_ENGINE;
|
2021-01-02 17:43:49 +01:00
|
|
|
|
#ifdef FEAT_EVAL
|
|
|
|
|
report_re_switch(expr);
|
|
|
|
|
#endif
|
2013-06-05 21:42:53 +02:00
|
|
|
|
prog = bt_regengine.regcomp(expr, re_flags);
|
2014-11-05 14:27:36 +01:00
|
|
|
|
}
|
2013-06-05 21:42:53 +02:00
|
|
|
|
}
|
2013-05-19 19:40:29 +02:00
|
|
|
|
|
2014-11-05 14:27:36 +01:00
|
|
|
|
if (prog != NULL)
|
|
|
|
|
{
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Store the info needed to call regcomp() again when the engine turns
|
|
|
|
|
// out to be very slow when executing it.
|
2014-11-05 14:27:36 +01:00
|
|
|
|
prog->re_engine = regexp_engine;
|
|
|
|
|
prog->re_flags = re_flags;
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-19 19:40:29 +02:00
|
|
|
|
return prog;
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-08 18:19:48 +02:00
|
|
|
|
/*
|
|
|
|
|
* Free a compiled regexp program, returned by vim_regcomp().
|
|
|
|
|
*/
|
|
|
|
|
void
|
2016-01-30 20:31:25 +01:00
|
|
|
|
vim_regfree(regprog_T *prog)
|
2013-06-08 18:19:48 +02:00
|
|
|
|
{
|
|
|
|
|
if (prog != NULL)
|
|
|
|
|
prog->engine->regfree(prog);
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-07 23:16:33 +02:00
|
|
|
|
#if defined(EXITFREE) || defined(PROTO)
|
|
|
|
|
void
|
|
|
|
|
free_regexp_stuff(void)
|
|
|
|
|
{
|
|
|
|
|
ga_clear(®stack);
|
|
|
|
|
ga_clear(&backpos);
|
|
|
|
|
vim_free(reg_tofree);
|
|
|
|
|
vim_free(reg_prev_sub);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2014-11-05 14:27:36 +01:00
|
|
|
|
#ifdef FEAT_EVAL
|
|
|
|
|
static void
|
2016-01-30 20:31:25 +01:00
|
|
|
|
report_re_switch(char_u *pat)
|
2014-11-05 14:27:36 +01:00
|
|
|
|
{
|
|
|
|
|
if (p_verbose > 0)
|
|
|
|
|
{
|
|
|
|
|
verbose_enter();
|
2019-01-19 17:43:09 +01:00
|
|
|
|
msg_puts(_("Switching to backtracking RE engine for pattern: "));
|
|
|
|
|
msg_puts((char *)pat);
|
2014-11-05 14:27:36 +01:00
|
|
|
|
verbose_leave();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2021-11-29 20:39:38 +00:00
|
|
|
|
#if defined(FEAT_X11) || defined(PROTO)
|
2018-12-29 22:28:46 +01:00
|
|
|
|
/*
|
|
|
|
|
* Return whether "prog" is currently being executed.
|
|
|
|
|
*/
|
|
|
|
|
int
|
|
|
|
|
regprog_in_use(regprog_T *prog)
|
|
|
|
|
{
|
|
|
|
|
return prog->re_in_use;
|
|
|
|
|
}
|
2019-01-20 15:30:40 +01:00
|
|
|
|
#endif
|
2018-12-29 22:28:46 +01:00
|
|
|
|
|
2013-05-19 19:40:29 +02:00
|
|
|
|
/*
|
|
|
|
|
* Match a regexp against a string.
|
2022-05-30 20:58:55 +01:00
|
|
|
|
* "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
|
2014-11-19 16:38:07 +01:00
|
|
|
|
* Note: "rmp->regprog" may be freed and changed.
|
2013-05-19 19:40:29 +02:00
|
|
|
|
* Uses curbuf for line count and 'iskeyword'.
|
2014-11-05 14:27:36 +01:00
|
|
|
|
* When "nl" is TRUE consider a "\n" in "line" to be a line break.
|
2013-05-19 19:40:29 +02:00
|
|
|
|
*
|
|
|
|
|
* Return TRUE if there is a match, FALSE if not.
|
|
|
|
|
*/
|
2014-11-05 14:27:36 +01:00
|
|
|
|
static int
|
2017-06-18 22:41:03 +02:00
|
|
|
|
vim_regexec_string(
|
2016-01-30 20:31:25 +01:00
|
|
|
|
regmatch_T *rmp,
|
2019-12-05 21:10:38 +01:00
|
|
|
|
char_u *line, // string to match against
|
|
|
|
|
colnr_T col, // column to start looking for match
|
2016-01-30 20:31:25 +01:00
|
|
|
|
int nl)
|
2014-11-05 14:27:36 +01:00
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
int result;
|
|
|
|
|
regexec_T rex_save;
|
|
|
|
|
int rex_in_use_save = rex_in_use;
|
|
|
|
|
|
2018-07-17 05:43:58 +02:00
|
|
|
|
// Cannot use the same prog recursively, it contains state.
|
|
|
|
|
if (rmp->regprog->re_in_use)
|
|
|
|
|
{
|
2022-01-05 16:09:06 +00:00
|
|
|
|
emsg(_(e_cannot_use_pattern_recursively));
|
2018-07-17 05:43:58 +02:00
|
|
|
|
return FALSE;
|
|
|
|
|
}
|
|
|
|
|
rmp->regprog->re_in_use = TRUE;
|
|
|
|
|
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (rex_in_use)
|
2018-07-17 05:43:58 +02:00
|
|
|
|
// Being called recursively, save the state.
|
2016-10-02 16:51:57 +02:00
|
|
|
|
rex_save = rex;
|
|
|
|
|
rex_in_use = TRUE;
|
2018-07-17 05:43:58 +02:00
|
|
|
|
|
2016-10-02 16:51:57 +02:00
|
|
|
|
rex.reg_startp = NULL;
|
|
|
|
|
rex.reg_endp = NULL;
|
|
|
|
|
rex.reg_startpos = NULL;
|
|
|
|
|
rex.reg_endpos = NULL;
|
|
|
|
|
|
|
|
|
|
result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
|
2018-07-18 06:02:09 +02:00
|
|
|
|
rmp->regprog->re_in_use = FALSE;
|
2014-11-05 14:27:36 +01:00
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// NFA engine aborted because it's very slow.
|
2014-11-05 14:27:36 +01:00
|
|
|
|
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
|
|
|
|
|
&& result == NFA_TOO_EXPENSIVE)
|
|
|
|
|
{
|
|
|
|
|
int save_p_re = p_re;
|
|
|
|
|
int re_flags = rmp->regprog->re_flags;
|
|
|
|
|
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
|
|
|
|
|
|
|
|
|
|
p_re = BACKTRACKING_ENGINE;
|
|
|
|
|
vim_regfree(rmp->regprog);
|
|
|
|
|
if (pat != NULL)
|
|
|
|
|
{
|
|
|
|
|
#ifdef FEAT_EVAL
|
|
|
|
|
report_re_switch(pat);
|
|
|
|
|
#endif
|
|
|
|
|
rmp->regprog = vim_regcomp(pat, re_flags);
|
|
|
|
|
if (rmp->regprog != NULL)
|
2018-07-18 06:02:09 +02:00
|
|
|
|
{
|
|
|
|
|
rmp->regprog->re_in_use = TRUE;
|
2014-11-05 14:27:36 +01:00
|
|
|
|
result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
|
2018-07-18 06:02:09 +02:00
|
|
|
|
rmp->regprog->re_in_use = FALSE;
|
|
|
|
|
}
|
2014-11-05 14:27:36 +01:00
|
|
|
|
vim_free(pat);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
p_re = save_p_re;
|
|
|
|
|
}
|
2016-10-02 16:51:57 +02:00
|
|
|
|
|
|
|
|
|
rex_in_use = rex_in_use_save;
|
|
|
|
|
if (rex_in_use)
|
|
|
|
|
rex = rex_save;
|
|
|
|
|
|
2014-11-20 23:07:05 +01:00
|
|
|
|
return result > 0;
|
2014-11-05 14:27:36 +01:00
|
|
|
|
}
|
|
|
|
|
|
2023-03-12 21:20:59 +00:00
|
|
|
|
#if defined(FEAT_SPELL) || defined(FEAT_EVAL) || defined(FEAT_X11) || defined(PROTO)
|
2014-11-19 16:38:07 +01:00
|
|
|
|
/*
|
|
|
|
|
* Note: "*prog" may be freed and changed.
|
2014-11-20 23:07:05 +01:00
|
|
|
|
* Return TRUE if there is a match, FALSE if not.
|
2014-11-19 16:38:07 +01:00
|
|
|
|
*/
|
|
|
|
|
int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
vim_regexec_prog(
|
|
|
|
|
regprog_T **prog,
|
|
|
|
|
int ignore_case,
|
|
|
|
|
char_u *line,
|
|
|
|
|
colnr_T col)
|
2014-11-19 16:38:07 +01:00
|
|
|
|
{
|
2017-06-18 22:41:03 +02:00
|
|
|
|
int r;
|
|
|
|
|
regmatch_T regmatch;
|
2014-11-19 16:38:07 +01:00
|
|
|
|
|
|
|
|
|
regmatch.regprog = *prog;
|
|
|
|
|
regmatch.rm_ic = ignore_case;
|
2017-06-18 22:41:03 +02:00
|
|
|
|
r = vim_regexec_string(®match, line, col, FALSE);
|
2014-11-19 16:38:07 +01:00
|
|
|
|
*prog = regmatch.regprog;
|
|
|
|
|
return r;
|
|
|
|
|
}
|
2023-03-12 21:20:59 +00:00
|
|
|
|
#endif
|
2014-11-19 16:38:07 +01:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Note: "rmp->regprog" may be freed and changed.
|
2014-11-20 23:07:05 +01:00
|
|
|
|
* Return TRUE if there is a match, FALSE if not.
|
2014-11-19 16:38:07 +01:00
|
|
|
|
*/
|
2013-05-19 19:40:29 +02:00
|
|
|
|
int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
|
2013-05-19 19:40:29 +02:00
|
|
|
|
{
|
2017-06-18 22:41:03 +02:00
|
|
|
|
return vim_regexec_string(rmp, line, col, FALSE);
|
2013-05-19 19:40:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
|
2014-11-19 16:38:07 +01:00
|
|
|
|
* Note: "rmp->regprog" may be freed and changed.
|
2014-11-20 23:07:05 +01:00
|
|
|
|
* Return TRUE if there is a match, FALSE if not.
|
2013-05-19 19:40:29 +02:00
|
|
|
|
*/
|
|
|
|
|
int
|
2016-01-30 20:31:25 +01:00
|
|
|
|
vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
|
2013-05-19 19:40:29 +02:00
|
|
|
|
{
|
2017-06-18 22:41:03 +02:00
|
|
|
|
return vim_regexec_string(rmp, line, col, TRUE);
|
2013-05-19 19:40:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Match a regexp against multiple lines.
|
2018-06-23 14:21:42 +02:00
|
|
|
|
* "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
|
|
|
|
|
* Note: "rmp->regprog" may be freed and changed, even set to NULL.
|
2013-05-19 19:40:29 +02:00
|
|
|
|
* Uses curbuf for line count and 'iskeyword'.
|
|
|
|
|
*
|
|
|
|
|
* Return zero if there is no match. Return number of lines contained in the
|
|
|
|
|
* match otherwise.
|
|
|
|
|
*/
|
|
|
|
|
long
|
2016-01-30 20:31:25 +01:00
|
|
|
|
vim_regexec_multi(
|
|
|
|
|
regmmatch_T *rmp,
|
2019-12-05 21:10:38 +01:00
|
|
|
|
win_T *win, // window in which to search or NULL
|
|
|
|
|
buf_T *buf, // buffer in which to search
|
|
|
|
|
linenr_T lnum, // nr of line to start looking for match
|
|
|
|
|
colnr_T col, // column to start looking for match
|
|
|
|
|
int *timed_out) // flag is set when timeout limit reached
|
2013-05-19 19:40:29 +02:00
|
|
|
|
{
|
2016-10-02 16:51:57 +02:00
|
|
|
|
int result;
|
|
|
|
|
regexec_T rex_save;
|
|
|
|
|
int rex_in_use_save = rex_in_use;
|
|
|
|
|
|
2018-07-17 05:43:58 +02:00
|
|
|
|
// Cannot use the same prog recursively, it contains state.
|
|
|
|
|
if (rmp->regprog->re_in_use)
|
|
|
|
|
{
|
2022-01-05 16:09:06 +00:00
|
|
|
|
emsg(_(e_cannot_use_pattern_recursively));
|
2018-07-17 05:43:58 +02:00
|
|
|
|
return FALSE;
|
|
|
|
|
}
|
|
|
|
|
rmp->regprog->re_in_use = TRUE;
|
|
|
|
|
|
2016-10-02 16:51:57 +02:00
|
|
|
|
if (rex_in_use)
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// Being called recursively, save the state.
|
2016-10-02 16:51:57 +02:00
|
|
|
|
rex_save = rex;
|
|
|
|
|
rex_in_use = TRUE;
|
|
|
|
|
|
2017-06-17 18:44:21 +02:00
|
|
|
|
result = rmp->regprog->engine->regexec_multi(
|
2022-06-05 16:55:54 +01:00
|
|
|
|
rmp, win, buf, lnum, col, timed_out);
|
2018-07-18 06:02:09 +02:00
|
|
|
|
rmp->regprog->re_in_use = FALSE;
|
2014-11-05 14:27:36 +01:00
|
|
|
|
|
2019-12-05 21:10:38 +01:00
|
|
|
|
// NFA engine aborted because it's very slow.
|
2014-11-05 14:27:36 +01:00
|
|
|
|
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
|
|
|
|
|
&& result == NFA_TOO_EXPENSIVE)
|
|
|
|
|
{
|
|
|
|
|
int save_p_re = p_re;
|
|
|
|
|
int re_flags = rmp->regprog->re_flags;
|
|
|
|
|
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
|
|
|
|
|
|
|
|
|
|
p_re = BACKTRACKING_ENGINE;
|
|
|
|
|
if (pat != NULL)
|
|
|
|
|
{
|
2022-04-04 18:14:34 +01:00
|
|
|
|
regprog_T *prev_prog = rmp->regprog;
|
|
|
|
|
|
2014-11-05 14:27:36 +01:00
|
|
|
|
#ifdef FEAT_EVAL
|
|
|
|
|
report_re_switch(pat);
|
|
|
|
|
#endif
|
2018-06-23 15:09:10 +02:00
|
|
|
|
#ifdef FEAT_SYN_HL
|
2018-06-23 14:21:42 +02:00
|
|
|
|
// checking for \z misuse was already done when compiling for NFA,
|
|
|
|
|
// allow all here
|
|
|
|
|
reg_do_extmatch = REX_ALL;
|
2018-06-23 15:09:10 +02:00
|
|
|
|
#endif
|
2014-11-05 14:27:36 +01:00
|
|
|
|
rmp->regprog = vim_regcomp(pat, re_flags);
|
2018-06-23 15:09:10 +02:00
|
|
|
|
#ifdef FEAT_SYN_HL
|
2018-06-23 14:21:42 +02:00
|
|
|
|
reg_do_extmatch = 0;
|
2018-06-23 15:09:10 +02:00
|
|
|
|
#endif
|
2022-04-04 18:14:34 +01:00
|
|
|
|
if (rmp->regprog == NULL)
|
2018-07-18 06:02:09 +02:00
|
|
|
|
{
|
2022-04-04 18:14:34 +01:00
|
|
|
|
// Somehow compiling the pattern failed now, put back the
|
|
|
|
|
// previous one to avoid "regprog" becoming NULL.
|
|
|
|
|
rmp->regprog = prev_prog;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
vim_regfree(prev_prog);
|
|
|
|
|
|
2018-07-18 06:02:09 +02:00
|
|
|
|
rmp->regprog->re_in_use = TRUE;
|
2014-11-05 14:27:36 +01:00
|
|
|
|
result = rmp->regprog->engine->regexec_multi(
|
2022-06-05 16:55:54 +01:00
|
|
|
|
rmp, win, buf, lnum, col, timed_out);
|
2018-07-18 06:02:09 +02:00
|
|
|
|
rmp->regprog->re_in_use = FALSE;
|
|
|
|
|
}
|
2014-11-05 14:27:36 +01:00
|
|
|
|
vim_free(pat);
|
|
|
|
|
}
|
|
|
|
|
p_re = save_p_re;
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-02 16:51:57 +02:00
|
|
|
|
rex_in_use = rex_in_use_save;
|
|
|
|
|
if (rex_in_use)
|
|
|
|
|
rex = rex_save;
|
|
|
|
|
|
2014-11-20 23:07:05 +01:00
|
|
|
|
return result <= 0 ? 0 : result;
|
2013-05-19 19:40:29 +02:00
|
|
|
|
}
|