View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase;
20  
21  import java.security.InvalidParameterException;
22  import java.util.Map;
23  import java.util.Set;
24  import java.util.TreeMap;
25  import java.util.concurrent.atomic.AtomicLong;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.hbase.client.HTable;
31  import org.apache.hadoop.hbase.client.Result;
32  import org.apache.hadoop.hbase.client.ResultScanner;
33  import org.apache.hadoop.hbase.client.Scan;
34  import org.apache.hadoop.hbase.client.Table;
35  import org.apache.hadoop.hbase.filter.CompareFilter;
36  import org.apache.hadoop.hbase.filter.Filter;
37  import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
38  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
39  import org.apache.hadoop.hbase.testclassification.IntegrationTests;
40  import org.apache.hadoop.hbase.util.Bytes;
41  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
42  import org.apache.hadoop.hbase.util.MultiThreadedWriter;
43  import org.apache.hadoop.hbase.util.RegionSplitter;
44  import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
45  import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator;
46  import org.junit.After;
47  import org.junit.Assert;
48  import org.junit.Before;
49  import org.junit.Test;
50  import org.junit.experimental.categories.Category;
51  
52  /**
53   * Integration test that verifies lazy CF loading during scans by doing repeated scans
54   * with this feature while multiple threads are continuously writing values; and
55   * verifying the result.
56   */
57  @Category(IntegrationTests.class)
58  public class IntegrationTestLazyCfLoading {
59    private static final TableName TABLE_NAME =
60        TableName.valueOf(IntegrationTestLazyCfLoading.class.getSimpleName());
61    private static final String TIMEOUT_KEY = "hbase.%s.timeout";
62    private static final String ENCODING_KEY = "hbase.%s.datablock.encoding";
63  
64    /** A soft test timeout; duration of the test, as such, depends on number of keys to put. */
65    private static final int DEFAULT_TIMEOUT_MINUTES = 10;
66  
67    private static final int NUM_SERVERS = 1;
68    /** Set regions per server low to ensure splits happen during test */
69    private static final int REGIONS_PER_SERVER = 3;
70    private static final int KEYS_TO_WRITE_PER_SERVER = 20000;
71    private static final int WRITER_THREADS = 10;
72    private static final int WAIT_BETWEEN_SCANS_MS = 1000;
73  
74    private static final Log LOG = LogFactory.getLog(IntegrationTestLazyCfLoading.class);
75    private IntegrationTestingUtility util = new IntegrationTestingUtility();
76    private final DataGenerator dataGen = new DataGenerator();
77  
78    /** Custom LoadTestDataGenerator. Uses key generation and verification from
79     * LoadTestKVGenerator. Creates 3 column families; one with an integer column to
80     * filter on, the 2nd one with an integer column that matches the first integer column (for
81     * test-specific verification), and byte[] value that is used for general verification; and
82     * the third one with just the value.
83     */
84    private static class DataGenerator extends LoadTestDataGenerator {
85      private static final int MIN_DATA_SIZE = 4096;
86      private static final int MAX_DATA_SIZE = 65536;
87      public static final byte[] ESSENTIAL_CF = Bytes.toBytes("essential");
88      public static final byte[] JOINED_CF1 = Bytes.toBytes("joined");
89      public static final byte[] JOINED_CF2 = Bytes.toBytes("joined2");
90      public static final byte[] FILTER_COLUMN = Bytes.toBytes("filter");
91      public static final byte[] VALUE_COLUMN = Bytes.toBytes("val");
92      public static final long ACCEPTED_VALUE = 1L;
93  
94      private static final Map<byte[], byte[][]> columnMap = new TreeMap<byte[], byte[][]>(
95          Bytes.BYTES_COMPARATOR);
96  
97      private final AtomicLong expectedNumberOfKeys = new AtomicLong(0);
98      private final AtomicLong totalNumberOfKeys = new AtomicLong(0);
99  
100     public DataGenerator() {
101       super(MIN_DATA_SIZE, MAX_DATA_SIZE);
102       columnMap.put(ESSENTIAL_CF, new byte[][] { FILTER_COLUMN });
103       columnMap.put(JOINED_CF1, new byte[][] { FILTER_COLUMN, VALUE_COLUMN });
104       columnMap.put(JOINED_CF2, new byte[][] { VALUE_COLUMN });
105     }
106 
107     public long getExpectedNumberOfKeys() {
108       return expectedNumberOfKeys.get();
109     }
110 
111     public long getTotalNumberOfKeys() {
112       return totalNumberOfKeys.get();
113     }
114 
115     @Override
116     public byte[] getDeterministicUniqueKey(long keyBase) {
117       return LoadTestKVGenerator.md5PrefixedKey(keyBase).getBytes();
118     }
119 
120     @Override
121     public byte[][] getColumnFamilies() {
122       return columnMap.keySet().toArray(new byte[columnMap.size()][]);
123     }
124 
125     @Override
126     public byte[][] generateColumnsForCf(byte[] rowKey, byte[] cf) {
127       return columnMap.get(cf);
128     }
129 
130     @Override
131     public byte[] generateValue(byte[] rowKey, byte[] cf, byte[] column) {
132       if (Bytes.BYTES_COMPARATOR.compare(column, FILTER_COLUMN) == 0) {
133         // Random deterministic way to make some values "on" and others "off" for filters.
134         long value = Long.parseLong(Bytes.toString(rowKey, 0, 4), 16) & ACCEPTED_VALUE;
135         if (Bytes.BYTES_COMPARATOR.compare(cf, ESSENTIAL_CF) == 0) {
136           totalNumberOfKeys.incrementAndGet();
137           if (value == ACCEPTED_VALUE) {
138             expectedNumberOfKeys.incrementAndGet();
139           }
140         }
141         return Bytes.toBytes(value);
142       } else if (Bytes.BYTES_COMPARATOR.compare(column, VALUE_COLUMN) == 0) {
143         return kvGenerator.generateRandomSizeValue(rowKey, cf, column);
144       }
145       String error = "Unknown column " + Bytes.toString(column);
146       assert false : error;
147       throw new InvalidParameterException(error);
148     }
149 
150     @Override
151     public boolean verify(byte[] rowKey, byte[] cf, byte[] column, byte[] value) {
152       if (Bytes.BYTES_COMPARATOR.compare(column, FILTER_COLUMN) == 0) {
153         // Relies on the filter from getScanFilter being used.
154         return Bytes.toLong(value) == ACCEPTED_VALUE;
155       } else if (Bytes.BYTES_COMPARATOR.compare(column, VALUE_COLUMN) == 0) {
156         return LoadTestKVGenerator.verify(value, rowKey, cf, column);
157       }
158       return false; // some bogus value from read, we don't expect any such thing.
159     }
160 
161     @Override
162     public boolean verify(byte[] rowKey, byte[] cf, Set<byte[]> columnSet) {
163       return columnMap.get(cf).length == columnSet.size();
164     }
165 
166     public Filter getScanFilter() {
167       SingleColumnValueFilter scf = new SingleColumnValueFilter(ESSENTIAL_CF, FILTER_COLUMN,
168           CompareFilter.CompareOp.EQUAL, Bytes.toBytes(ACCEPTED_VALUE));
169       scf.setFilterIfMissing(true);
170       return scf;
171     }
172   }
173 
174   @Before
175   public void setUp() throws Exception {
176     LOG.info("Initializing cluster with " + NUM_SERVERS + " servers");
177     util.initializeCluster(NUM_SERVERS);
178     LOG.info("Done initializing cluster");
179     createTable();
180     // after table creation, ACLs need time to be propagated to RSs in a secure deployment
181     // so we sleep a little bit because we don't have a good way to know when permissions
182     // are received by RSs
183     Thread.sleep(3000);
184   }
185 
186   private void createTable() throws Exception {
187     deleteTable();
188     LOG.info("Creating table");
189     Configuration conf = util.getConfiguration();
190     String encodingKey = String.format(ENCODING_KEY, this.getClass().getSimpleName());
191     DataBlockEncoding blockEncoding = DataBlockEncoding.valueOf(conf.get(encodingKey, "FAST_DIFF"));
192     HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
193     for (byte[] cf : dataGen.getColumnFamilies()) {
194       HColumnDescriptor hcd = new HColumnDescriptor(cf);
195       hcd.setDataBlockEncoding(blockEncoding);
196       htd.addFamily(hcd);
197     }
198     int serverCount = util.getHBaseClusterInterface().getClusterStatus().getServersSize();
199     byte[][] splits = new RegionSplitter.HexStringSplit().split(serverCount * REGIONS_PER_SERVER);
200     util.getHBaseAdmin().createTable(htd, splits);
201     LOG.info("Created table");
202   }
203 
204   private void deleteTable() throws Exception {
205     if (util.getHBaseAdmin().tableExists(TABLE_NAME)) {
206       LOG.info("Deleting table");
207       util.deleteTable(TABLE_NAME);
208       LOG.info("Deleted table");
209     }
210   }
211 
212   @After
213   public void tearDown() throws Exception {
214     deleteTable();
215     LOG.info("Restoring the cluster");
216     util.restoreCluster();
217     LOG.info("Done restoring the cluster");
218   }
219 
220   @Test
221   public void testReadersAndWriters() throws Exception {
222     Configuration conf = util.getConfiguration();
223     String timeoutKey = String.format(TIMEOUT_KEY, this.getClass().getSimpleName());
224     long maxRuntime = conf.getLong(timeoutKey, DEFAULT_TIMEOUT_MINUTES);
225     long serverCount = util.getHBaseClusterInterface().getClusterStatus().getServersSize();
226     long keysToWrite = serverCount * KEYS_TO_WRITE_PER_SERVER;
227     Table table = new HTable(conf, TABLE_NAME);
228 
229     // Create multi-threaded writer and start it. We write multiple columns/CFs and verify
230     // their integrity, therefore multi-put is necessary.
231     MultiThreadedWriter writer =
232       new MultiThreadedWriter(dataGen, conf, TABLE_NAME);
233     writer.setMultiPut(true);
234 
235     LOG.info("Starting writer; the number of keys to write is " + keysToWrite);
236     // TODO : Need to see if tag support has to be given here in the integration test suite
237     writer.start(1, keysToWrite, WRITER_THREADS);
238 
239     // Now, do scans.
240     long now = EnvironmentEdgeManager.currentTime();
241     long timeLimit = now + (maxRuntime * 60000);
242     boolean isWriterDone = false;
243     while (now < timeLimit && !isWriterDone) {
244       LOG.info("Starting the scan; wrote approximately "
245         + dataGen.getTotalNumberOfKeys() + " keys");
246       isWriterDone = writer.isDone();
247       if (isWriterDone) {
248         LOG.info("Scanning full result, writer is done");
249       }
250       Scan scan = new Scan();
251       for (byte[] cf : dataGen.getColumnFamilies()) {
252         scan.addFamily(cf);
253       }
254       scan.setFilter(dataGen.getScanFilter());
255       scan.setLoadColumnFamiliesOnDemand(true);
256       // The number of keys we can expect from scan - lower bound (before scan).
257       // Not a strict lower bound - writer knows nothing about filters, so we report
258       // this from generator. Writer might have generated the value but not put it yet.
259       long onesGennedBeforeScan = dataGen.getExpectedNumberOfKeys();
260       long startTs = EnvironmentEdgeManager.currentTime();
261       ResultScanner results = table.getScanner(scan);
262       long resultCount = 0;
263       Result result = null;
264       // Verify and count the results.
265       while ((result = results.next()) != null) {
266         boolean isOk = writer.verifyResultAgainstDataGenerator(result, true, true);
267         Assert.assertTrue("Failed to verify [" + Bytes.toString(result.getRow())+ "]", isOk);
268         ++resultCount;
269       }
270       long timeTaken = EnvironmentEdgeManager.currentTime() - startTs;
271       // Verify the result count.
272       long onesGennedAfterScan = dataGen.getExpectedNumberOfKeys();
273       Assert.assertTrue("Read " + resultCount + " keys when at most " + onesGennedAfterScan
274         + " were generated ", onesGennedAfterScan >= resultCount);
275       if (isWriterDone) {
276         Assert.assertTrue("Read " + resultCount + " keys; the writer is done and "
277           + onesGennedAfterScan + " keys were generated", onesGennedAfterScan == resultCount);
278       } else if (onesGennedBeforeScan * 0.9 > resultCount) {
279         LOG.warn("Read way too few keys (" + resultCount + "/" + onesGennedBeforeScan
280           + ") - there might be a problem, or the writer might just be slow");
281       }
282       LOG.info("Scan took " + timeTaken + "ms");
283       if (!isWriterDone) {
284         Thread.sleep(WAIT_BETWEEN_SCANS_MS);
285         now = EnvironmentEdgeManager.currentTime();
286       }
287     }
288     Assert.assertEquals("There are write failures", 0, writer.getNumWriteFailures());
289     Assert.assertTrue("Writer is not done", isWriterDone);
290     // Assert.fail("Boom!");
291   }
292 }