View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.filter;
20  
21  import static org.junit.Assert.*;
22  
23  import java.util.regex.Pattern;
24  
25  import org.apache.hadoop.hbase.testclassification.SmallTests;
26  import org.apache.hadoop.hbase.filter.RegexStringComparator.EngineType;
27  import org.apache.hadoop.hbase.util.Bytes;
28  import org.junit.Test;
29  import org.junit.experimental.categories.Category;
30  
31  @Category(SmallTests.class)
32  public class TestRegexComparator {
33  
34    @Test
35    public void testSerialization() throws Exception {
36      // Default engine is the Java engine
37      RegexStringComparator a = new RegexStringComparator("a|b");
38      RegexStringComparator b = RegexStringComparator.parseFrom(a.toByteArray());
39      assertTrue(a.areSerializedFieldsEqual(b));
40      assertTrue(b.getEngine() instanceof RegexStringComparator.JavaRegexEngine);
41  
42      // joni engine
43      a = new RegexStringComparator("a|b", EngineType.JONI);
44      b = RegexStringComparator.parseFrom(a.toByteArray());
45      assertTrue(a.areSerializedFieldsEqual(b));
46      assertTrue(b.getEngine() instanceof RegexStringComparator.JoniRegexEngine);
47    }
48  
49    @Test
50    public void testJavaEngine() throws Exception {
51      for (TestCase t: TEST_CASES) {
52        boolean result = new RegexStringComparator(t.regex, t.flags, EngineType.JAVA)
53          .compareTo(Bytes.toBytes(t.haystack)) == 0;
54        assertEquals("Regex '" + t.regex + "' failed test '" + t.haystack + "'", result,
55          t.expected);
56      }
57    }
58  
59    @Test
60    public void testJoniEngine() throws Exception {
61      for (TestCase t: TEST_CASES) {
62        boolean result = new RegexStringComparator(t.regex, t.flags, EngineType.JONI)
63          .compareTo(Bytes.toBytes(t.haystack)) == 0;
64        assertEquals("Regex '" + t.regex + "' failed test '" + t.haystack + "'", result,
65          t.expected);
66      }
67    }
68  
69    private static class TestCase {
70      String regex;
71      String haystack;
72      int flags;
73      boolean expected;
74  
75      public TestCase(String regex, String haystack, boolean expected) {
76        this(regex, Pattern.DOTALL, haystack, expected);
77      }
78  
79      public TestCase(String regex, int flags, String haystack, boolean expected) {
80        this.regex = regex;
81        this.flags = flags;
82        this.haystack = haystack;
83        this.expected = expected;
84      }
85    }
86  
87    // These are a subset of the regex tests from OpenJDK 7
88    private static TestCase TEST_CASES[] = {
89      new TestCase("a|b", "a", true),
90      new TestCase("a|b", "b", true),
91      new TestCase("a|b", Pattern.CASE_INSENSITIVE, "A", true),
92      new TestCase("a|b", Pattern.CASE_INSENSITIVE, "B", true),
93      new TestCase("a|b", "z", false),
94      new TestCase("a|b|cd", "cd", true),
95      new TestCase("z(a|ac)b", "zacb", true),
96      new TestCase("[abc]+", "ababab", true),
97      new TestCase("[abc]+", "defg", false),
98      new TestCase("[abc]+[def]+[ghi]+", "zzzaaddggzzz", true),
99      new TestCase("[a-\\u4444]+", "za-9z", true),
100     new TestCase("[^abc]+", "ababab", false),
101     new TestCase("[^abc]+", "aaabbbcccdefg", true),
102     new TestCase("[abc^b]", "b", true),
103     new TestCase("[abc[def]]", "b", true),
104     new TestCase("[abc[def]]", "e", true),
105     new TestCase("[a-c[d-f[g-i]]]", "h", true),
106     new TestCase("[a-c[d-f[g-i]]m]", "m", true),
107     new TestCase("[a-c&&[d-f]]", "a", false),
108     new TestCase("[a-c&&[d-f]]", "z", false),
109     new TestCase("[a-m&&m-z&&a-c]", "m", false),
110     new TestCase("[a-m&&m-z&&a-z]", "m", true),
111     new TestCase("[[a-m]&&[^a-c]]", "a", false),
112     new TestCase("[[a-m]&&[^a-c]]", "d", true),
113     new TestCase("[[a-c][d-f]&&abc[def]]", "e", true),
114     new TestCase("[[a-c]&&[b-d]&&[c-e]]", "c", true),
115     new TestCase("[[a-c]&&[b-d][c-e]&&[u-z]]", "c", false),
116     new TestCase("[[a]&&[b][c][a]&&[^d]]", "a", true),
117     new TestCase("[[a]&&[b][c][a]&&[^d]]", "d", false),
118     new TestCase("[[[a-d]&&[c-f]]&&[c]&&c&&[cde]]", "c", true),
119     new TestCase("[x[[wz]abc&&bcd[z]]&&[u-z]]", "z", true),
120     new TestCase("a.c.+", "a#c%&", true),
121     new TestCase("ab.", "ab\n", true),
122     new TestCase("(?s)ab.", "ab\n", true),
123     new TestCase("ab\\wc", "abcc", true),
124     new TestCase("\\W\\w\\W", "#r#", true),
125     new TestCase("\\W\\w\\W", "rrrr#ggg", false),
126     new TestCase("abc[\\sdef]*", "abc  def", true),
127     new TestCase("abc[\\sy-z]*", "abc y z", true),
128     new TestCase("abc[a-d\\sm-p]*", "abcaa mn  p", true),
129     new TestCase("\\s\\s\\s", "blah  err", false),
130     new TestCase("\\S\\S\\s", "blah  err", true),
131     new TestCase("ab\\dc", "ab9c", true),
132     new TestCase("\\d\\d\\d", "blah45", false),
133     new TestCase("^abc", "abcdef", true),
134     new TestCase("^abc", "bcdabc", false),
135     new TestCase("^(a)?a", "a", true),
136     new TestCase("^(aa(bb)?)+$", "aabbaa", true),
137     new TestCase("((a|b)?b)+", "b", true),
138     new TestCase("^(a(b)?)+$", "aba", true),
139     new TestCase("^(a(b(c)?)?)?abc", "abc", true),
140     new TestCase("^(a(b(c))).*", "abc", true),
141     new TestCase("a?b", "aaaab", true),
142     new TestCase("a?b", "aaacc", false),
143     new TestCase("a??b", "aaaab", true),
144     new TestCase("a??b", "aaacc", false),
145     new TestCase("a?+b", "aaaab", true),
146     new TestCase("a?+b", "aaacc", false),
147     new TestCase("a+b", "aaaab", true),
148     new TestCase("a+b", "aaacc", false),
149     new TestCase("a+?b", "aaaab", true),
150     new TestCase("a+?b", "aaacc", false),
151     new TestCase("a++b", "aaaab", true),
152     new TestCase("a++b", "aaacc", false),
153     new TestCase("a{2,3}", "a", false),
154     new TestCase("a{2,3}", "aa", true),
155     new TestCase("a{2,3}", "aaa", true),
156     new TestCase("a{3,}", "zzzaaaazzz", true),
157     new TestCase("a{3,}", "zzzaazzz", false),
158     new TestCase("abc(?=d)", "zzzabcd", true),
159     new TestCase("abc(?=d)", "zzzabced", false),
160     new TestCase("abc(?!d)", "zzabcd", false),
161     new TestCase("abc(?!d)", "zzabced", true),
162     new TestCase("\\w(?<=a)", "###abc###", true),
163     new TestCase("\\w(?<=a)", "###ert###", false),
164     new TestCase("(?<!a)c", "bc", true),
165     new TestCase("(?<!a)c", "ac", false),
166     new TestCase("(a+b)+", "ababab", true),
167     new TestCase("(a+b)+", "accccd", false),
168     new TestCase("(ab)+", "ababab", true),
169     new TestCase("(ab)+", "accccd", false),
170     new TestCase("(ab)(cd*)", "zzzabczzz", true),
171     new TestCase("abc(d)*abc", "abcdddddabc", true),
172     new TestCase("a*b", "aaaab", true),
173     new TestCase("a*b", "b", true),
174     new TestCase("a*b", "aaaac", false),
175     new TestCase(".*?b", "aaaab", true),
176     new TestCase("a*+b", "aaaab", true),
177     new TestCase("a*+b", "b", true),
178     new TestCase("a*+b", "aaaac", false),
179     new TestCase("(?i)foobar", "fOobAr", true),
180     new TestCase("f(?i)oobar", "fOobAr", true),
181     new TestCase("f(?i)oobar", "FOobAr", false),
182     new TestCase("foo(?i)bar", "fOobAr", false),
183     new TestCase("(?i)foo[bar]+", "foObAr", true),
184     new TestCase("(?i)foo[a-r]+", "foObAr", true),
185     new TestCase("abc(?x)blah", "abcblah", true),
186     new TestCase("abc(?x)  blah", "abcblah", true),
187     new TestCase("abc(?x)  blah  blech", "abcblahblech", true),
188     new TestCase("[\\n-#]", "!", true),
189     new TestCase("[\\n-#]", "-", false),
190     new TestCase("[\\043]+", "blahblah#blech", true),
191     new TestCase("[\\042-\\044]+", "blahblah#blech", true),
192     new TestCase("[\\u1234-\\u1236]", "blahblah\u1235blech", true),
193     new TestCase("[^\043]*", "blahblah#blech", true),
194     new TestCase("(|f)?+", "foo", true),
195   };
196 }