diff options
Diffstat (limited to 'lib/glob')
-rw-r--r-- | lib/glob/Android.mk | 3 | ||||
-rw-r--r-- | lib/glob/Makefile.in | 11 | ||||
-rw-r--r-- | lib/glob/gmisc.c | 314 | ||||
-rw-r--r-- | lib/glob/smatch.c | 16 | ||||
-rw-r--r-- | lib/glob/xmbsrtowcs.c | 106 |
5 files changed, 445 insertions, 5 deletions
diff --git a/lib/glob/Android.mk b/lib/glob/Android.mk index b7d31bf..b22b8b5 100644 --- a/lib/glob/Android.mk +++ b/lib/glob/Android.mk @@ -9,7 +9,8 @@ LOCAL_SRC_FILES:= \ glob.c \ smatch.c \ strmatch.c \ - xmbsrtowcs.c + xmbsrtowcs.c \ + gmisc.c LOCAL_C_INCLUDES += \ $(LOCAL_PATH)/../.. \ diff --git a/lib/glob/Makefile.in b/lib/glob/Makefile.in index 1ccae68..12cbb61 100644 --- a/lib/glob/Makefile.in +++ b/lib/glob/Makefile.in @@ -71,7 +71,7 @@ CSOURCES = $(srcdir)/glob.c $(srcdir)/strmatch.c $(srcdir)/smatch.c \ # The header files for this library. HSOURCES = $(srcdir)/strmatch.h -OBJECTS = glob.o strmatch.o smatch.o xmbsrtowcs.o +OBJECTS = glob.o strmatch.o smatch.o xmbsrtowcs.o gmisc.o # The texinfo files which document this library. DOCSOURCE = doc/glob.texi @@ -119,6 +119,9 @@ realclean distclean maintainer-clean: clean mostlyclean: clean -( cd doc && $(MAKE) $(MFLAGS) $@ ) +${BUILD_DIR}/pathnames.h: ${BUILD_DIR}/config.h ${BUILD_DIR}/Makefile Makefile + -( cd ${BUILD_DIR} && ${MAKE} ${MFLAGS} pathnames.h ) + ###################################################################### # # # Dependencies for the object files which make up this library. # @@ -137,18 +140,24 @@ strmatch.o: $(BUILD_DIR)/config.h strmatch.o: $(BASHINCDIR)/stdc.h glob.o: $(BUILD_DIR)/config.h +glob.o: $(topdir)/shell.h $(BUILD_DIR)/pathnames.h glob.o: $(topdir)/bashtypes.h $(BASHINCDIR)/ansi_stdlib.h $(topdir)/bashansi.h glob.o: $(BASHINCDIR)/posixstat.h $(BASHINCDIR)/memalloc.h glob.o: strmatch.h glob.h glob.o: $(BASHINCDIR)/shmbutil.h glob.o: $(topdir)/xmalloc.h +gmisc.o: $(BUILD_DIR)/config.h +gmisc.o: $(topdir)/bashtypes.h $(BASHINCDIR)/ansi_stdlib.h $(topdir)/bashansi.h +gmisc.o: $(BASHINCDIR)/shmbutil.h + xmbsrtowcs.o: ${BUILD_DIR}/config.h xmbsrtowcs.o: ${topdir}/bashansi.h ${BASHINCDIR}/ansi_stdlib.h xmbsrtowcs.o: ${BASHINCDIR}/shmbutil.h # Rules for deficient makes, like SunOS and Solaris glob.o: glob.c +gmisc.o: gmisc.c strmatch.o: strmatch.c smatch.o: smatch.c xmbsrtowcs.o: xmbsrtowcs.c diff --git a/lib/glob/gmisc.c b/lib/glob/gmisc.c new file mode 100644 index 0000000..84794cd --- /dev/null +++ b/lib/glob/gmisc.c @@ -0,0 +1,314 @@ +/* gmisc.c -- miscellaneous pattern matching utility functions for Bash. + + Copyright (C) 2010 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne-Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <config.h> + +#include "bashtypes.h" + +#if defined (HAVE_UNISTD_H) +# include <unistd.h> +#endif + +#include "bashansi.h" +#include "shmbutil.h" + +#include "stdc.h" + +#ifndef LPAREN +# define LPAREN '(' +#endif +#ifndef RPAREN +# define RPAREN ')' +#endif + +#if defined (HANDLE_MULTIBYTE) +#define WLPAREN L'(' +#define WRPAREN L')' + +/* Return 1 of the first character of WSTRING could match the first + character of pattern WPAT. Wide character version. */ +int +match_pattern_wchar (wpat, wstring) + wchar_t *wpat, *wstring; +{ + wchar_t wc; + + if (*wstring == 0) + return (0); + + switch (wc = *wpat++) + { + default: + return (*wstring == wc); + case L'\\': + return (*wstring == *wpat); + case L'?': + return (*wpat == WLPAREN ? 1 : (*wstring != L'\0')); + case L'*': + return (1); + case L'+': + case L'!': + case L'@': + return (*wpat == WLPAREN ? 1 : (*wstring == wc)); + case L'[': + return (*wstring != L'\0'); + } +} + +int +wmatchlen (wpat, wmax) + wchar_t *wpat; + size_t wmax; +{ + wchar_t wc, *wbrack; + int matlen, t, in_cclass, in_collsym, in_equiv; + + if (*wpat == 0) + return (0); + + matlen = in_cclass = in_collsym = in_equiv = 0; + while (wc = *wpat++) + { + switch (wc) + { + default: + matlen++; + break; + case L'\\': + if (*wpat == 0) + return ++matlen; + else + { + matlen++; + wpat++; + } + break; + case L'?': + if (*wpat == WLPAREN) + return (matlen = -1); /* XXX for now */ + else + matlen++; + break; + case L'*': + return (matlen = -1); + case L'+': + case L'!': + case L'@': + if (*wpat == WLPAREN) + return (matlen = -1); /* XXX for now */ + else + matlen++; + break; + case L'[': + /* scan for ending `]', skipping over embedded [:...:] */ + wbrack = wpat; + wc = *wpat++; + do + { + if (wc == 0) + { + matlen += wpat - wbrack - 1; /* incremented below */ + break; + } + else if (wc == L'\\') + { + wc = *wpat++; + if (*wpat == 0) + break; + } + else if (wc == L'[' && *wpat == L':') /* character class */ + { + wpat++; + in_cclass = 1; + } + else if (in_cclass && wc == L':' && *wpat == L']') + { + wpat++; + in_cclass = 0; + } + else if (wc == L'[' && *wpat == L'.') /* collating symbol */ + { + wpat++; + if (*wpat == L']') /* right bracket can appear as collating symbol */ + wpat++; + in_collsym = 1; + } + else if (in_collsym && wc == L'.' && *wpat == L']') + { + wpat++; + in_collsym = 0; + } + else if (wc == L'[' && *wpat == L'=') /* equivalence class */ + { + wpat++; + if (*wpat == L']') /* right bracket can appear as equivalence class */ + wpat++; + in_equiv = 1; + } + else if (in_equiv && wc == L'=' && *wpat == L']') + { + wpat++; + in_equiv = 0; + } + } + while ((wc = *wpat++) != L']'); + matlen++; /* bracket expression can only match one char */ + break; + } + } + + return matlen; +} +#endif + +/* Return 1 of the first character of STRING could match the first + character of pattern PAT. Used to avoid n2 calls to strmatch(). */ +int +match_pattern_char (pat, string) + char *pat, *string; +{ + char c; + + if (*string == 0) + return (0); + + switch (c = *pat++) + { + default: + return (*string == c); + case '\\': + return (*string == *pat); + case '?': + return (*pat == LPAREN ? 1 : (*string != '\0')); + case '*': + return (1); + case '+': + case '!': + case '@': + return (*pat == LPAREN ? 1 : (*string == c)); + case '[': + return (*string != '\0'); + } +} + +int +umatchlen (pat, max) + char *pat; + size_t max; +{ + char c, *brack; + int matlen, t, in_cclass, in_collsym, in_equiv; + + if (*pat == 0) + return (0); + + matlen = in_cclass = in_collsym = in_equiv = 0; + while (c = *pat++) + { + switch (c) + { + default: + matlen++; + break; + case '\\': + if (*pat == 0) + return ++matlen; + else + { + matlen++; + pat++; + } + break; + case '?': + if (*pat == LPAREN) + return (matlen = -1); /* XXX for now */ + else + matlen++; + break; + case '*': + return (matlen = -1); + case '+': + case '!': + case '@': + if (*pat == LPAREN) + return (matlen = -1); /* XXX for now */ + else + matlen++; + break; + case '[': + /* scan for ending `]', skipping over embedded [:...:] */ + brack = pat; + c = *pat++; + do + { + if (c == 0) + { + matlen += pat - brack - 1; /* incremented below */ + break; + } + else if (c == '\\') + { + c = *pat++; + if (*pat == 0) + break; + } + else if (c == '[' && *pat == ':') /* character class */ + { + pat++; + in_cclass = 1; + } + else if (in_cclass && c == ':' && *pat == ']') + { + pat++; + in_cclass = 0; + } + else if (c == '[' && *pat == '.') /* collating symbol */ + { + pat++; + if (*pat == ']') /* right bracket can appear as collating symbol */ + pat++; + in_collsym = 1; + } + else if (in_collsym && c == '.' && *pat == ']') + { + pat++; + in_collsym = 0; + } + else if (c == '[' && *pat == '=') /* equivalence class */ + { + pat++; + if (*pat == ']') /* right bracket can appear as equivalence class */ + pat++; + in_equiv = 1; + } + else if (in_equiv && c == '=' && *pat == ']') + { + pat++; + in_equiv = 0; + } + } + while ((c = *pat++) != ']'); + matlen++; /* bracket expression can only match one char */ + break; + } + } + + return matlen; +} diff --git a/lib/glob/smatch.c b/lib/glob/smatch.c index 11d86b0..061142b 100644 --- a/lib/glob/smatch.c +++ b/lib/glob/smatch.c @@ -1,7 +1,7 @@ /* strmatch.c -- ksh-like extended pattern matching for the shell and filename globbing. */ -/* Copyright (C) 1991-2005 Free Software Foundation, Inc. +/* Copyright (C) 1991-2011 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. @@ -241,6 +241,8 @@ is_cclass (c, name) # define STREQ(s1, s2) ((wcscmp (s1, s2) == 0)) # define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0) +extern char *mbsmbchar __P((const char *)); + static int rangecmp_wc (c1, c2) wint_t c1, c2; @@ -314,7 +316,7 @@ is_wcclass (wc, name) memset (&state, '\0', sizeof (mbstate_t)); mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1); - mbslength = wcsrtombs(mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state); + mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state); if (mbslength == (size_t)-1 || mbslength == (size_t)-2) { @@ -365,6 +367,16 @@ xstrmatch (pattern, string, flags) int ret; size_t n; wchar_t *wpattern, *wstring; + size_t plen, slen, mplen, mslen; + +#if 0 + plen = strlen (pattern); + mplen = mbstrlen (pattern); + if (plen == mplen && strlen (string) == mbstrlen (string)) +#else + if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0) +#endif + return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); if (MB_CUR_MAX == 1) return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); diff --git a/lib/glob/xmbsrtowcs.c b/lib/glob/xmbsrtowcs.c index 23fcd8e..7abf727 100644 --- a/lib/glob/xmbsrtowcs.c +++ b/lib/glob/xmbsrtowcs.c @@ -1,6 +1,6 @@ /* xmbsrtowcs.c -- replacement function for mbsrtowcs */ -/* Copyright (C) 2002-2004 Free Software Foundation, Inc. +/* Copyright (C) 2002-2010 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. @@ -18,6 +18,12 @@ along with Bash. If not, see <http://www.gnu.org/licenses/>. */ +/* Ask for GNU extensions to get extern declaration for mbsnrtowcs if + available via glibc. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif + #include <config.h> #include <bashansi.h> @@ -32,6 +38,11 @@ #ifndef FREE # define FREE(x) do { if (x) free (x); } while (0) #endif + +#if ! HAVE_STRCHRNUL +extern char *strchrnul __P((const char *, int)); +#endif + /* On some locales (ex. ja_JP.sjis), mbsrtowc doesn't convert 0x5c to U<0x5c>. So, this function is made for converting 0x5c to U<0x5c>. */ @@ -120,6 +131,94 @@ xmbsrtowcs (dest, src, len, pstate) return (wclength); } +#if HAVE_MBSNRTOWCS +/* Convert a multibyte string to a wide character string. Memory for the + new wide character string is obtained with malloc. + + Fast multiple-character version of xdupmbstowcs used when the indices are + not required and mbsnrtowcs is available. */ + +static size_t +xdupmbstowcs2 (destp, src) + wchar_t **destp; /* Store the pointer to the wide character string */ + const char *src; /* Multibyte character string */ +{ + const char *p; /* Conversion start position of src */ + wchar_t *wsbuf; /* Buffer for wide characters. */ + size_t wsbuf_size; /* Size of WSBUF */ + size_t wcnum; /* Number of wide characters in WSBUF */ + mbstate_t state; /* Conversion State */ + size_t wcslength; /* Number of wide characters produced by the conversion. */ + const char *end_or_backslash; + size_t nms; /* Number of multibyte characters to convert at one time. */ + mbstate_t tmp_state; + const char *tmp_p; + + memset (&state, '\0', sizeof(mbstate_t)); + + wsbuf_size = 0; + wsbuf = NULL; + + p = src; + wcnum = 0; + do + { + end_or_backslash = strchrnul(p, '\\'); + nms = (end_or_backslash - p); + if (*end_or_backslash == '\0') + nms++; + + /* Compute the number of produced wide-characters. */ + tmp_p = p; + tmp_state = state; + wcslength = mbsnrtowcs(NULL, &tmp_p, nms, 0, &tmp_state); + + /* Conversion failed. */ + if (wcslength == (size_t)-1) + { + free (wsbuf); + *destp = NULL; + return (size_t)-1; + } + + /* Resize the buffer if it is not large enough. */ + if (wsbuf_size < wcnum+wcslength+1) /* 1 for the L'\0' or the potential L'\\' */ + { + wchar_t *wstmp; + + wsbuf_size = wcnum+wcslength+1; /* 1 for the L'\0' or the potential L'\\' */ + + wstmp = (wchar_t *) realloc (wsbuf, wsbuf_size * sizeof (wchar_t)); + if (wstmp == NULL) + { + free (wsbuf); + *destp = NULL; + return (size_t)-1; + } + wsbuf = wstmp; + } + + /* Perform the conversion. This is assumed to return 'wcslength'. + * It may set 'p' to NULL. */ + mbsnrtowcs(wsbuf+wcnum, &p, nms, wsbuf_size-wcnum, &state); + + wcnum += wcslength; + + if (mbsinit (&state) && (p != NULL) && (*p == '\\')) + { + wsbuf[wcnum++] = L'\\'; + p++; + } + } + while (p != NULL); + + *destp = wsbuf; + + /* Return the length of the wide character string, not including `\0'. */ + return wcnum; +} +#endif /* HAVE_MBSNRTOWCS */ + /* Convert a multibyte string to a wide character string. Memory for the new wide character string is obtained with malloc. @@ -155,6 +254,11 @@ xdupmbstowcs (destp, indicesp, src) return (size_t)-1; } +#if HAVE_MBSNRTOWCS + if (indicesp == NULL) + return (xdupmbstowcs2 (destp, src)); +#endif + memset (&state, '\0', sizeof(mbstate_t)); wsbuf_size = WSBUF_INC; |