/* ScummVM - Graphic Adventure Engine * * ScummVM is the legal property of its developers, whose names * are too numerous to list here. Please refer to the COPYRIGHT * file distributed with this source distribution. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * */ #include "glk/adrift/scare.h" #include "glk/adrift/sxprotos.h" namespace Glk { namespace Adrift { /* * Module notes: * * The glob matching functions in this module are derived from an original * (and somewhat hairy) glob.c posted by Arjan Kenter from the University * of Twente, NL, in an assortment of minor variations between 1993 and 1997. * The major modifications are: * * o Added checks to ensure that invalid range patterns such as "[a-" or * "[-" don't cause the loops to walk off the end of the pattern string * and (usually) result in SIGSEGV. * o Moved from plain char to unsigned char to avoid signedness problems * with range comparisons. * o Skipped the leading '[' in the range checker; the original was treating * it as a possible first value of 'r'. * o Moved the range checker while() from the bottom of the loop to the top, * to avoid problems with invalid ranges. * o Gave 'l' in the range checker an initial value that ensures that it * can never match until it's been re-assigned to 'r'. * o Used a return value rather than multiple returns in the matcher, for * better debugability. * o Applied some const-correctness, and replaced some pointers by indexing. * o Added scanf-like special cases, making ']' a valid part of a range if * first, and '-' if last. * * This glob accepts * and ? wild cards, and [] ranges. It does not check * whether the range string is valid (for example, terminates with ']'), but * simply returns the best it can under those circumstances. * * Example call: * glob_match ("a*b?c[A-Za-z_0-9]d*", some_string) */ /* * glob_inrange_unsigned() * glob_match_unsigned() * * Match a "[...]" character range, and match general glob wildcards. See * above for notes on where these functions came from originally. */ static int glob_inrange_unsigned(const unsigned char **const pattern, unsigned char ch) { const unsigned char *const pattern_ = *pattern; int in_range = FALSE; unsigned int l = 256, r = 0, index_; /* Skip the leading '[' on entry to a range check. */ index_ = 1; /* Special-case a range that has ']' as its first character. */ if (pattern_[index_] == ']') { r = pattern_[index_++]; if (ch == r) in_range = TRUE; } /* * Check at the loop top, rather than the bottom, to avoid problems with * invalid or uncompleted ranges. */ while (pattern_[index_] && pattern_[index_] != ']') { r = pattern_[index_++]; if (r == '-') { /* Special-case a range that has '-' as its last character. */ if (pattern_[index_] == ']' || !pattern_[index_]) { if (ch == r) in_range = TRUE; break; } /* Break the loop on unterminated range ending with '-'. */ if (!pattern_[index_]) break; r = pattern_[index_++]; if (l <= ch && ch <= r) in_range = TRUE; } else { l = r; if (ch == r) in_range = TRUE; } } /* Update pattern with characters consumed, return result. */ *pattern += index_; return in_range; } static int glob_match_unsigned(const unsigned char *pattern, const unsigned char *string) { int is_match = FALSE; if (!*string) { if (*pattern == '*') is_match = glob_match_unsigned(pattern + 1, string); else is_match = !*pattern; } else { switch (*pattern) { case '\0': is_match = !*string; break; case '*': if (glob_match_unsigned(pattern + 1, string)) is_match = TRUE; else is_match = glob_match_unsigned(pattern, string + 1); break; case '?': is_match = glob_match_unsigned(pattern + 1, string + 1); break; case '[': /* * After a range check, we need to see if we hit the end of the * pattern before recursively matching pattern + 1. */ is_match = glob_inrange_unsigned(&pattern, *string) && (!*pattern || glob_match_unsigned(pattern + 1, string + 1)); break; default: is_match = *pattern == *string && glob_match_unsigned(pattern + 1, string + 1); break; } } return is_match; } /* Structures and data for the self test function. */ struct sx_test_data_t { const sc_char *const pattern; const sc_char *const string; }; static const sx_test_data_t SHOULD_MATCH[] = { {"a", "a"}, {"abc", "abc"}, {"", ""}, {"*", ""}, {"*", "abc"}, {"*", "cba"}, {"*c", "c"}, {"*c", "abc"}, {"*c", "cbac"}, {"a*", "a"}, {"a*", "abc"}, {"a*", "abca"}, {"a*c", "ac"}, {"a*c", "abc"}, {"a*c", "abcbcbc"}, {"a**c", "ac"}, {"a**c", "abc"}, {"a**c", "abcbcbc"}, {"*b*", "b"}, {"*b*", "abc"}, {"*b*", "ab"}, {"*b*", "bc"}, {"?", "a"}, {"?", "z"}, {"?", "?"}, {"[?]", "?"}, {"a?", "aa"}, {"a?", "az"}, {"a?", "a?"}, {"?c", "ac"}, {"?c", "zc"}, {"?c", "?c"}, {"[abz]", "a"}, {"[abz]", "b"}, {"[abz]", "z"}, {"[a-c]", "a"}, {"[a-c]", "b"}, {"[a-c]", "c"}, {"[ac]b[ac]", "abc"}, {"[ac]b[ac]", "cba"}, {"[]]", "]"}, {"[]a-c]", "a"}, {"[]a-c]", "b"}, {"[]a-c]", "c"}, {"[?]", "?" }, {"[-]", "-"}, {"[z-]", "z"}, {"[z-]", "-"}, {"[][-]", "]"}, {"[][-]", "["}, {"[][-]", "-"}, {"[a-c-]", "a"}, {"[a-c-]", "b"}, {"[a-c-]", "c"}, {"[a-c-]", "-"}, {"*[a-z]*abc?xyz", "aabcQxyz"}, {"*[a-z]*abc?xyz", "aabcQxyz"}, {"*[a-z]*abc?xyz", "aabcQxyz"}, {"*[a-z]*abc?xyz", "aabcQxyz"}, {"???]", "abc]"}, {"[z-a]", "z"}, {"[a-z", "a"}, {"[a-", "a"}, {"[a", "a"}, {"[[", "["}, {NULL, NULL} }; static const sx_test_data_t SHOULD_NOT_MATCH[] = { {"a", "b"}, {"abc", "abd"}, {"a", ""}, {"", "a"}, {"*c", "a"}, {"*c", "ab"}, {"*c", "abca"}, {"a*", "c"}, {"a*", "cba"}, {"a*", "cbac"}, {"a*c", "ca"}, {"a*c", "cba"}, {"a*c", "cbababa"}, {"a**c", "ca"}, {"a**c", "cba"}, {"a**c", "cbababa"}, {"*b*", ""}, {"*b*", "z"}, {"*b*", "ac"}, {"*b*", "azc"}, {"?", ""}, {"?", "ab"}, {"?", "abc"}, {"[?]", "a"}, {"a?", "ca"}, {"a?", "cz"}, {"a?", "??"}, {"?c", "ab"}, {"?c", "zb"}, {"?c", "??"}, {"[bcy]", "a"}, {"[bcy]", "d"}, {"[bcy]", "z"}, {"[b-d]", "a"}, {"[b-d]", "e"}, {"[b-d]", ""}, {"[b-d]", "bc"}, {"[ac]b[ac]", "aaa"}, {"[ac]b[ac]", "bbb"}, {"[ac]b[ac]", "ccc"}, {"[]]", "["}, {"[]]", "a"}, {"[]a-c]", "z"}, {"[?]", "a" }, {"[-]", "a"}, {"[z-]", "a"}, {"[][-]", "a"}, {"[][-]", "z"}, {"[a-c-]", "z"}, {"*[a-z]*abc?xyz", "AabcQxyz"}, {"*[a-z]*abc?xyz", "AabcQxyz"}, {"*[a-z]*abc?xyz", "AabcQxyz"}, {"*[a-z]*abc?xyz", "aabcxyz"}, {"[z-a]", "a"}, {"[z-a]", "b"}, {"[", "a"}, {"[[", "a"}, {NULL, NULL} }; /* * glob_self_test() * * Sed quis custodiet ipsos custodes? */ static void glob_self_test(void) { const sx_test_data_t *test; sc_int errors; /* * Run each test case and compare against expected result. To avoid a lot * of ugly casting, we use the main public glob_match() function. */ errors = 0; for (test = SHOULD_MATCH; test->pattern; test++) { if (!glob_match(test->pattern, test->string)) { sx_error("glob_self_test: \"%s\", \"%s\"" " did not match, and should have matched\n", test->pattern, test->string); errors++; } } for (test = SHOULD_NOT_MATCH; test->pattern; test++) { if (glob_match(test->pattern, test->string)) { sx_error("glob_self_test: \"%s\", \"%s\"" " matched, and should not have matched\n", test->pattern, test->string); errors++; } } /* * Abort if any error. As befits our distrustful nature, we won't even * trust that sx_fatal() calls abort() (though it should). */ if (errors > 0) { sx_fatal("glob_self_test: %ld self-test error%s found, aborting\n", errors, (errors == 1) ? "" : "s"); } } /* * glob_match() * * Adapter for the above globbing functions, presenting a more standard char- * based interface. Here is where all the evil casting lives. */ sc_bool glob_match(const sc_char *pattern, const sc_char *string) { static sc_bool initialized = FALSE; const unsigned char *pattern_ = (const unsigned char *) pattern; const unsigned char *string_ = (const unsigned char *) string; sc_bool retval; assert(pattern && string); /* On the first call, run a self-test to verify basic glob matching. */ if (!initialized) { /* * To avoid lots of icky casting, the self-test uses the core public * glob_match() that we're in right here to run its tests. So set * initialized _before_ the test, to avoid infinite recursion. */ initialized = TRUE; glob_self_test(); } retval = glob_match_unsigned(pattern_, string_) != 0; return retval; } } // End of namespace Adrift } // End of namespace Glk