aboutsummaryrefslogtreecommitdiffstats
path: root/lib/glob
diff options
context:
space:
mode:
Diffstat (limited to 'lib/glob')
-rw-r--r--lib/glob/Android.mk3
-rw-r--r--lib/glob/Makefile.in11
-rw-r--r--lib/glob/gmisc.c314
-rw-r--r--lib/glob/smatch.c16
-rw-r--r--lib/glob/xmbsrtowcs.c106
5 files changed, 445 insertions, 5 deletions
diff --git a/lib/glob/Android.mk b/lib/glob/Android.mk
index b7d31bf..b22b8b5 100644
--- a/lib/glob/Android.mk
+++ b/lib/glob/Android.mk
@@ -9,7 +9,8 @@ LOCAL_SRC_FILES:= \
glob.c \
smatch.c \
strmatch.c \
- xmbsrtowcs.c
+ xmbsrtowcs.c \
+ gmisc.c
LOCAL_C_INCLUDES += \
$(LOCAL_PATH)/../.. \
diff --git a/lib/glob/Makefile.in b/lib/glob/Makefile.in
index 1ccae68..12cbb61 100644
--- a/lib/glob/Makefile.in
+++ b/lib/glob/Makefile.in
@@ -71,7 +71,7 @@ CSOURCES = $(srcdir)/glob.c $(srcdir)/strmatch.c $(srcdir)/smatch.c \
# The header files for this library.
HSOURCES = $(srcdir)/strmatch.h
-OBJECTS = glob.o strmatch.o smatch.o xmbsrtowcs.o
+OBJECTS = glob.o strmatch.o smatch.o xmbsrtowcs.o gmisc.o
# The texinfo files which document this library.
DOCSOURCE = doc/glob.texi
@@ -119,6 +119,9 @@ realclean distclean maintainer-clean: clean
mostlyclean: clean
-( cd doc && $(MAKE) $(MFLAGS) $@ )
+${BUILD_DIR}/pathnames.h: ${BUILD_DIR}/config.h ${BUILD_DIR}/Makefile Makefile
+ -( cd ${BUILD_DIR} && ${MAKE} ${MFLAGS} pathnames.h )
+
######################################################################
# #
# Dependencies for the object files which make up this library. #
@@ -137,18 +140,24 @@ strmatch.o: $(BUILD_DIR)/config.h
strmatch.o: $(BASHINCDIR)/stdc.h
glob.o: $(BUILD_DIR)/config.h
+glob.o: $(topdir)/shell.h $(BUILD_DIR)/pathnames.h
glob.o: $(topdir)/bashtypes.h $(BASHINCDIR)/ansi_stdlib.h $(topdir)/bashansi.h
glob.o: $(BASHINCDIR)/posixstat.h $(BASHINCDIR)/memalloc.h
glob.o: strmatch.h glob.h
glob.o: $(BASHINCDIR)/shmbutil.h
glob.o: $(topdir)/xmalloc.h
+gmisc.o: $(BUILD_DIR)/config.h
+gmisc.o: $(topdir)/bashtypes.h $(BASHINCDIR)/ansi_stdlib.h $(topdir)/bashansi.h
+gmisc.o: $(BASHINCDIR)/shmbutil.h
+
xmbsrtowcs.o: ${BUILD_DIR}/config.h
xmbsrtowcs.o: ${topdir}/bashansi.h ${BASHINCDIR}/ansi_stdlib.h
xmbsrtowcs.o: ${BASHINCDIR}/shmbutil.h
# Rules for deficient makes, like SunOS and Solaris
glob.o: glob.c
+gmisc.o: gmisc.c
strmatch.o: strmatch.c
smatch.o: smatch.c
xmbsrtowcs.o: xmbsrtowcs.c
diff --git a/lib/glob/gmisc.c b/lib/glob/gmisc.c
new file mode 100644
index 0000000..84794cd
--- /dev/null
+++ b/lib/glob/gmisc.c
@@ -0,0 +1,314 @@
+/* gmisc.c -- miscellaneous pattern matching utility functions for Bash.
+
+ Copyright (C) 2010 Free Software Foundation, Inc.
+
+ This file is part of GNU Bash, the Bourne-Again SHell.
+
+ Bash is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Bash is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Bash. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <config.h>
+
+#include "bashtypes.h"
+
+#if defined (HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+
+#include "bashansi.h"
+#include "shmbutil.h"
+
+#include "stdc.h"
+
+#ifndef LPAREN
+# define LPAREN '('
+#endif
+#ifndef RPAREN
+# define RPAREN ')'
+#endif
+
+#if defined (HANDLE_MULTIBYTE)
+#define WLPAREN L'('
+#define WRPAREN L')'
+
+/* Return 1 of the first character of WSTRING could match the first
+ character of pattern WPAT. Wide character version. */
+int
+match_pattern_wchar (wpat, wstring)
+ wchar_t *wpat, *wstring;
+{
+ wchar_t wc;
+
+ if (*wstring == 0)
+ return (0);
+
+ switch (wc = *wpat++)
+ {
+ default:
+ return (*wstring == wc);
+ case L'\\':
+ return (*wstring == *wpat);
+ case L'?':
+ return (*wpat == WLPAREN ? 1 : (*wstring != L'\0'));
+ case L'*':
+ return (1);
+ case L'+':
+ case L'!':
+ case L'@':
+ return (*wpat == WLPAREN ? 1 : (*wstring == wc));
+ case L'[':
+ return (*wstring != L'\0');
+ }
+}
+
+int
+wmatchlen (wpat, wmax)
+ wchar_t *wpat;
+ size_t wmax;
+{
+ wchar_t wc, *wbrack;
+ int matlen, t, in_cclass, in_collsym, in_equiv;
+
+ if (*wpat == 0)
+ return (0);
+
+ matlen = in_cclass = in_collsym = in_equiv = 0;
+ while (wc = *wpat++)
+ {
+ switch (wc)
+ {
+ default:
+ matlen++;
+ break;
+ case L'\\':
+ if (*wpat == 0)
+ return ++matlen;
+ else
+ {
+ matlen++;
+ wpat++;
+ }
+ break;
+ case L'?':
+ if (*wpat == WLPAREN)
+ return (matlen = -1); /* XXX for now */
+ else
+ matlen++;
+ break;
+ case L'*':
+ return (matlen = -1);
+ case L'+':
+ case L'!':
+ case L'@':
+ if (*wpat == WLPAREN)
+ return (matlen = -1); /* XXX for now */
+ else
+ matlen++;
+ break;
+ case L'[':
+ /* scan for ending `]', skipping over embedded [:...:] */
+ wbrack = wpat;
+ wc = *wpat++;
+ do
+ {
+ if (wc == 0)
+ {
+ matlen += wpat - wbrack - 1; /* incremented below */
+ break;
+ }
+ else if (wc == L'\\')
+ {
+ wc = *wpat++;
+ if (*wpat == 0)
+ break;
+ }
+ else if (wc == L'[' && *wpat == L':') /* character class */
+ {
+ wpat++;
+ in_cclass = 1;
+ }
+ else if (in_cclass && wc == L':' && *wpat == L']')
+ {
+ wpat++;
+ in_cclass = 0;
+ }
+ else if (wc == L'[' && *wpat == L'.') /* collating symbol */
+ {
+ wpat++;
+ if (*wpat == L']') /* right bracket can appear as collating symbol */
+ wpat++;
+ in_collsym = 1;
+ }
+ else if (in_collsym && wc == L'.' && *wpat == L']')
+ {
+ wpat++;
+ in_collsym = 0;
+ }
+ else if (wc == L'[' && *wpat == L'=') /* equivalence class */
+ {
+ wpat++;
+ if (*wpat == L']') /* right bracket can appear as equivalence class */
+ wpat++;
+ in_equiv = 1;
+ }
+ else if (in_equiv && wc == L'=' && *wpat == L']')
+ {
+ wpat++;
+ in_equiv = 0;
+ }
+ }
+ while ((wc = *wpat++) != L']');
+ matlen++; /* bracket expression can only match one char */
+ break;
+ }
+ }
+
+ return matlen;
+}
+#endif
+
+/* Return 1 of the first character of STRING could match the first
+ character of pattern PAT. Used to avoid n2 calls to strmatch(). */
+int
+match_pattern_char (pat, string)
+ char *pat, *string;
+{
+ char c;
+
+ if (*string == 0)
+ return (0);
+
+ switch (c = *pat++)
+ {
+ default:
+ return (*string == c);
+ case '\\':
+ return (*string == *pat);
+ case '?':
+ return (*pat == LPAREN ? 1 : (*string != '\0'));
+ case '*':
+ return (1);
+ case '+':
+ case '!':
+ case '@':
+ return (*pat == LPAREN ? 1 : (*string == c));
+ case '[':
+ return (*string != '\0');
+ }
+}
+
+int
+umatchlen (pat, max)
+ char *pat;
+ size_t max;
+{
+ char c, *brack;
+ int matlen, t, in_cclass, in_collsym, in_equiv;
+
+ if (*pat == 0)
+ return (0);
+
+ matlen = in_cclass = in_collsym = in_equiv = 0;
+ while (c = *pat++)
+ {
+ switch (c)
+ {
+ default:
+ matlen++;
+ break;
+ case '\\':
+ if (*pat == 0)
+ return ++matlen;
+ else
+ {
+ matlen++;
+ pat++;
+ }
+ break;
+ case '?':
+ if (*pat == LPAREN)
+ return (matlen = -1); /* XXX for now */
+ else
+ matlen++;
+ break;
+ case '*':
+ return (matlen = -1);
+ case '+':
+ case '!':
+ case '@':
+ if (*pat == LPAREN)
+ return (matlen = -1); /* XXX for now */
+ else
+ matlen++;
+ break;
+ case '[':
+ /* scan for ending `]', skipping over embedded [:...:] */
+ brack = pat;
+ c = *pat++;
+ do
+ {
+ if (c == 0)
+ {
+ matlen += pat - brack - 1; /* incremented below */
+ break;
+ }
+ else if (c == '\\')
+ {
+ c = *pat++;
+ if (*pat == 0)
+ break;
+ }
+ else if (c == '[' && *pat == ':') /* character class */
+ {
+ pat++;
+ in_cclass = 1;
+ }
+ else if (in_cclass && c == ':' && *pat == ']')
+ {
+ pat++;
+ in_cclass = 0;
+ }
+ else if (c == '[' && *pat == '.') /* collating symbol */
+ {
+ pat++;
+ if (*pat == ']') /* right bracket can appear as collating symbol */
+ pat++;
+ in_collsym = 1;
+ }
+ else if (in_collsym && c == '.' && *pat == ']')
+ {
+ pat++;
+ in_collsym = 0;
+ }
+ else if (c == '[' && *pat == '=') /* equivalence class */
+ {
+ pat++;
+ if (*pat == ']') /* right bracket can appear as equivalence class */
+ pat++;
+ in_equiv = 1;
+ }
+ else if (in_equiv && c == '=' && *pat == ']')
+ {
+ pat++;
+ in_equiv = 0;
+ }
+ }
+ while ((c = *pat++) != ']');
+ matlen++; /* bracket expression can only match one char */
+ break;
+ }
+ }
+
+ return matlen;
+}
diff --git a/lib/glob/smatch.c b/lib/glob/smatch.c
index 11d86b0..061142b 100644
--- a/lib/glob/smatch.c
+++ b/lib/glob/smatch.c
@@ -1,7 +1,7 @@
/* strmatch.c -- ksh-like extended pattern matching for the shell and filename
globbing. */
-/* Copyright (C) 1991-2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991-2011 Free Software Foundation, Inc.
This file is part of GNU Bash, the Bourne Again SHell.
@@ -241,6 +241,8 @@ is_cclass (c, name)
# define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
# define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
+extern char *mbsmbchar __P((const char *));
+
static int
rangecmp_wc (c1, c2)
wint_t c1, c2;
@@ -314,7 +316,7 @@ is_wcclass (wc, name)
memset (&state, '\0', sizeof (mbstate_t));
mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
- mbslength = wcsrtombs(mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
+ mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
{
@@ -365,6 +367,16 @@ xstrmatch (pattern, string, flags)
int ret;
size_t n;
wchar_t *wpattern, *wstring;
+ size_t plen, slen, mplen, mslen;
+
+#if 0
+ plen = strlen (pattern);
+ mplen = mbstrlen (pattern);
+ if (plen == mplen && strlen (string) == mbstrlen (string))
+#else
+ if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0)
+#endif
+ return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
if (MB_CUR_MAX == 1)
return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
diff --git a/lib/glob/xmbsrtowcs.c b/lib/glob/xmbsrtowcs.c
index 23fcd8e..7abf727 100644
--- a/lib/glob/xmbsrtowcs.c
+++ b/lib/glob/xmbsrtowcs.c
@@ -1,6 +1,6 @@
/* xmbsrtowcs.c -- replacement function for mbsrtowcs */
-/* Copyright (C) 2002-2004 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2010 Free Software Foundation, Inc.
This file is part of GNU Bash, the Bourne Again SHell.
@@ -18,6 +18,12 @@
along with Bash. If not, see <http://www.gnu.org/licenses/>.
*/
+/* Ask for GNU extensions to get extern declaration for mbsnrtowcs if
+ available via glibc. */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+
#include <config.h>
#include <bashansi.h>
@@ -32,6 +38,11 @@
#ifndef FREE
# define FREE(x) do { if (x) free (x); } while (0)
#endif
+
+#if ! HAVE_STRCHRNUL
+extern char *strchrnul __P((const char *, int));
+#endif
+
/* On some locales (ex. ja_JP.sjis), mbsrtowc doesn't convert 0x5c to U<0x5c>.
So, this function is made for converting 0x5c to U<0x5c>. */
@@ -120,6 +131,94 @@ xmbsrtowcs (dest, src, len, pstate)
return (wclength);
}
+#if HAVE_MBSNRTOWCS
+/* Convert a multibyte string to a wide character string. Memory for the
+ new wide character string is obtained with malloc.
+
+ Fast multiple-character version of xdupmbstowcs used when the indices are
+ not required and mbsnrtowcs is available. */
+
+static size_t
+xdupmbstowcs2 (destp, src)
+ wchar_t **destp; /* Store the pointer to the wide character string */
+ const char *src; /* Multibyte character string */
+{
+ const char *p; /* Conversion start position of src */
+ wchar_t *wsbuf; /* Buffer for wide characters. */
+ size_t wsbuf_size; /* Size of WSBUF */
+ size_t wcnum; /* Number of wide characters in WSBUF */
+ mbstate_t state; /* Conversion State */
+ size_t wcslength; /* Number of wide characters produced by the conversion. */
+ const char *end_or_backslash;
+ size_t nms; /* Number of multibyte characters to convert at one time. */
+ mbstate_t tmp_state;
+ const char *tmp_p;
+
+ memset (&state, '\0', sizeof(mbstate_t));
+
+ wsbuf_size = 0;
+ wsbuf = NULL;
+
+ p = src;
+ wcnum = 0;
+ do
+ {
+ end_or_backslash = strchrnul(p, '\\');
+ nms = (end_or_backslash - p);
+ if (*end_or_backslash == '\0')
+ nms++;
+
+ /* Compute the number of produced wide-characters. */
+ tmp_p = p;
+ tmp_state = state;
+ wcslength = mbsnrtowcs(NULL, &tmp_p, nms, 0, &tmp_state);
+
+ /* Conversion failed. */
+ if (wcslength == (size_t)-1)
+ {
+ free (wsbuf);
+ *destp = NULL;
+ return (size_t)-1;
+ }
+
+ /* Resize the buffer if it is not large enough. */
+ if (wsbuf_size < wcnum+wcslength+1) /* 1 for the L'\0' or the potential L'\\' */
+ {
+ wchar_t *wstmp;
+
+ wsbuf_size = wcnum+wcslength+1; /* 1 for the L'\0' or the potential L'\\' */
+
+ wstmp = (wchar_t *) realloc (wsbuf, wsbuf_size * sizeof (wchar_t));
+ if (wstmp == NULL)
+ {
+ free (wsbuf);
+ *destp = NULL;
+ return (size_t)-1;
+ }
+ wsbuf = wstmp;
+ }
+
+ /* Perform the conversion. This is assumed to return 'wcslength'.
+ * It may set 'p' to NULL. */
+ mbsnrtowcs(wsbuf+wcnum, &p, nms, wsbuf_size-wcnum, &state);
+
+ wcnum += wcslength;
+
+ if (mbsinit (&state) && (p != NULL) && (*p == '\\'))
+ {
+ wsbuf[wcnum++] = L'\\';
+ p++;
+ }
+ }
+ while (p != NULL);
+
+ *destp = wsbuf;
+
+ /* Return the length of the wide character string, not including `\0'. */
+ return wcnum;
+}
+#endif /* HAVE_MBSNRTOWCS */
+
/* Convert a multibyte string to a wide character string. Memory for the
new wide character string is obtained with malloc.
@@ -155,6 +254,11 @@ xdupmbstowcs (destp, indicesp, src)
return (size_t)-1;
}
+#if HAVE_MBSNRTOWCS
+ if (indicesp == NULL)
+ return (xdupmbstowcs2 (destp, src));
+#endif
+
memset (&state, '\0', sizeof(mbstate_t));
wsbuf_size = WSBUF_INC;