View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertFalse;
25  import static org.junit.Assert.assertTrue;
26  
27  import java.io.ByteArrayInputStream;
28  import java.io.DataInputStream;
29  import java.io.IOException;
30  import java.nio.ByteBuffer;
31  import java.util.ArrayList;
32  import java.util.List;
33  import java.util.Random;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FSDataInputStream;
39  import org.apache.hadoop.fs.FileSystem;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.hbase.HBaseTestingUtility;
42  import org.apache.hadoop.hbase.KeyValue;
43  import org.apache.hadoop.hbase.KeyValue.KVComparator;
44  import org.apache.hadoop.hbase.testclassification.SmallTests;
45  import org.apache.hadoop.hbase.io.compress.Compression;
46  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
47  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.apache.hadoop.hbase.util.Writables;
50  import org.apache.hadoop.io.Text;
51  import org.apache.hadoop.io.WritableUtils;
52  import org.junit.Before;
53  import org.junit.Test;
54  import org.junit.experimental.categories.Category;
55  
56  /**
57   * Testing writing a version 2 {@link HFile}. This is a low-level test written
58   * during the development of {@link HFileWriterV2}.
59   */
60  @Category(SmallTests.class)
61  public class TestHFileWriterV2 {
62  
63    private static final Log LOG = LogFactory.getLog(TestHFileWriterV2.class);
64  
65    private static final HBaseTestingUtility TEST_UTIL =
66        new HBaseTestingUtility();
67  
68    private Configuration conf;
69    private FileSystem fs;
70  
71    @Before
72    public void setUp() throws IOException {
73      conf = TEST_UTIL.getConfiguration();
74      fs = FileSystem.get(conf);
75    }
76  
77    @Test
78    public void testHFileFormatV2() throws IOException {
79      Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "testHFileFormatV2");
80      final Compression.Algorithm compressAlgo = Compression.Algorithm.GZ;
81      final int entryCount = 10000;
82      writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, false);
83    }
84  
85    @Test
86    public void testMidKeyInHFile() throws IOException{
87      Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
88      "testMidKeyInHFile");
89      Compression.Algorithm compressAlgo = Compression.Algorithm.NONE;
90      int entryCount = 50000;
91      writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, true);
92    }
93  
94    private void writeDataAndReadFromHFile(Path hfilePath,
95        Algorithm compressAlgo, int entryCount, boolean findMidKey) throws IOException {
96  
97      HFileContext context = new HFileContextBuilder()
98                             .withBlockSize(4096)
99                             .withCompression(compressAlgo)
100                            .build();
101     HFileWriterV2 writer = (HFileWriterV2)
102         new HFileWriterV2.WriterFactoryV2(conf, new CacheConfig(conf))
103             .withPath(fs, hfilePath)
104             .withFileContext(context)
105             .create();
106 
107     Random rand = new Random(9713312); // Just a fixed seed.
108     List<KeyValue> keyValues = new ArrayList<KeyValue>(entryCount);
109 
110     for (int i = 0; i < entryCount; ++i) {
111       byte[] keyBytes = randomOrderedKey(rand, i);
112 
113       // A random-length random value.
114       byte[] valueBytes = randomValue(rand);
115       KeyValue keyValue = new KeyValue(keyBytes, null, null, valueBytes);
116       writer.append(keyValue);
117       keyValues.add(keyValue);
118     }
119 
120     // Add in an arbitrary order. They will be sorted lexicographically by
121     // the key.
122     writer.appendMetaBlock("CAPITAL_OF_USA", new Text("Washington, D.C."));
123     writer.appendMetaBlock("CAPITAL_OF_RUSSIA", new Text("Moscow"));
124     writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
125 
126     writer.close();
127     
128 
129     FSDataInputStream fsdis = fs.open(hfilePath);
130 
131     // A "manual" version of a new-format HFile reader. This unit test was
132     // written before the V2 reader was fully implemented.
133 
134     long fileSize = fs.getFileStatus(hfilePath).getLen();
135     FixedFileTrailer trailer =
136         FixedFileTrailer.readFromStream(fsdis, fileSize);
137 
138     assertEquals(2, trailer.getMajorVersion());
139     assertEquals(entryCount, trailer.getEntryCount());
140 
141     HFileContext meta = new HFileContextBuilder()
142                         .withHBaseCheckSum(true)
143                         .withIncludesMvcc(false)
144                         .withIncludesTags(false)
145                         .withCompression(compressAlgo)
146                         .build();
147     
148     HFileBlock.FSReader blockReader = new HFileBlock.FSReaderImpl(fsdis, fileSize, meta);
149     // Comparator class name is stored in the trailer in version 2.
150     KVComparator comparator = trailer.createComparator();
151     HFileBlockIndex.BlockIndexReader dataBlockIndexReader =
152         new HFileBlockIndex.BlockIndexReader(comparator,
153             trailer.getNumDataIndexLevels());
154     HFileBlockIndex.BlockIndexReader metaBlockIndexReader =
155         new HFileBlockIndex.BlockIndexReader(
156             KeyValue.RAW_COMPARATOR, 1);
157 
158     HFileBlock.BlockIterator blockIter = blockReader.blockRange(
159         trailer.getLoadOnOpenDataOffset(),
160         fileSize - trailer.getTrailerSize());
161     // Data index. We also read statistics about the block index written after
162     // the root level.
163     dataBlockIndexReader.readMultiLevelIndexRoot(
164         blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
165         trailer.getDataIndexCount());
166     
167     if (findMidKey) {
168       byte[] midkey = dataBlockIndexReader.midkey();
169       assertNotNull("Midkey should not be null", midkey);
170     }
171     
172     // Meta index.
173     metaBlockIndexReader.readRootIndex(
174         blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX)
175           .getByteStream(), trailer.getMetaIndexCount());
176     // File info
177     FileInfo fileInfo = new FileInfo();
178     fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
179     byte [] keyValueFormatVersion = fileInfo.get(
180         HFileWriterV2.KEY_VALUE_VERSION);
181     boolean includeMemstoreTS = keyValueFormatVersion != null &&
182         Bytes.toInt(keyValueFormatVersion) > 0;
183 
184     // Counters for the number of key/value pairs and the number of blocks
185     int entriesRead = 0;
186     int blocksRead = 0;
187     long memstoreTS = 0;
188 
189     // Scan blocks the way the reader would scan them
190     fsdis.seek(0);
191     long curBlockPos = 0;
192     while (curBlockPos <= trailer.getLastDataBlockOffset()) {
193       HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false);
194       assertEquals(BlockType.DATA, block.getBlockType());
195       if (meta.isCompressedOrEncrypted()) {
196         assertFalse(block.isUnpacked());
197         block = block.unpack(meta, blockReader);
198       }
199       ByteBuffer buf = block.getBufferWithoutHeader();
200       while (buf.hasRemaining()) {
201         int keyLen = buf.getInt();
202         int valueLen = buf.getInt();
203 
204         byte[] key = new byte[keyLen];
205         buf.get(key);
206 
207         byte[] value = new byte[valueLen];
208         buf.get(value);
209 
210         if (includeMemstoreTS) {
211           ByteArrayInputStream byte_input = new ByteArrayInputStream(buf.array(),
212                                buf.arrayOffset() + buf.position(), buf.remaining());
213           DataInputStream data_input = new DataInputStream(byte_input);
214 
215           memstoreTS = WritableUtils.readVLong(data_input);
216           buf.position(buf.position() + WritableUtils.getVIntSize(memstoreTS));
217         }
218 
219         // A brute-force check to see that all keys and values are correct.
220         assertTrue(Bytes.compareTo(key, keyValues.get(entriesRead).getKey()) == 0);
221         assertTrue(Bytes.compareTo(value, keyValues.get(entriesRead).getValue()) == 0);
222 
223         ++entriesRead;
224       }
225       ++blocksRead;
226       curBlockPos += block.getOnDiskSizeWithHeader();
227     }
228     LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead="
229         + blocksRead);
230     assertEquals(entryCount, entriesRead);
231 
232     // Meta blocks. We can scan until the load-on-open data offset (which is
233     // the root block index offset in version 2) because we are not testing
234     // intermediate-level index blocks here.
235 
236     int metaCounter = 0;
237     while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) {
238       LOG.info("Current offset: " + fsdis.getPos() + ", scanning until " +
239           trailer.getLoadOnOpenDataOffset());
240       HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false)
241         .unpack(meta, blockReader);
242       assertEquals(BlockType.META, block.getBlockType());
243       Text t = new Text();
244       ByteBuffer buf = block.getBufferWithoutHeader();
245       if (Writables.getWritable(buf.array(), buf.arrayOffset(), buf.limit(), t) == null) {
246         throw new IOException("Failed to deserialize block " + this + " into a " + t.getClass().getSimpleName());
247       }
248       Text expectedText =
249           (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text(
250               "Moscow") : new Text("Washington, D.C."));
251       assertEquals(expectedText, t);
252       LOG.info("Read meta block data: " + t);
253       ++metaCounter;
254       curBlockPos += block.getOnDiskSizeWithHeader();
255     }
256 
257     fsdis.close();
258   }
259 
260 
261   // Static stuff used by various HFile v2 unit tests
262 
263   public static final String COLUMN_FAMILY_NAME = "_-myColumnFamily-_";
264   private static final int MIN_ROW_OR_QUALIFIER_LENGTH = 64;
265   private static final int MAX_ROW_OR_QUALIFIER_LENGTH = 128;
266 
267   /**
268    * Generates a random key that is guaranteed to increase as the given index i
269    * increases. The result consists of a prefix, which is a deterministic
270    * increasing function of i, and a random suffix.
271    *
272    * @param rand
273    *          random number generator to use
274    * @param i
275    * @return
276    */
277   public static byte[] randomOrderedKey(Random rand, int i) {
278     StringBuilder k = new StringBuilder();
279 
280     // The fixed-length lexicographically increasing part of the key.
281     for (int bitIndex = 31; bitIndex >= 0; --bitIndex) {
282       if ((i & (1 << bitIndex)) == 0)
283         k.append("a");
284       else
285         k.append("b");
286     }
287 
288     // A random-length random suffix of the key.
289     for (int j = 0; j < rand.nextInt(50); ++j)
290       k.append(randomReadableChar(rand));
291 
292     byte[] keyBytes = k.toString().getBytes();
293     return keyBytes;
294   }
295 
296   public static byte[] randomValue(Random rand) {
297     StringBuilder v = new StringBuilder();
298     for (int j = 0; j < 1 + rand.nextInt(2000); ++j) {
299       v.append((char) (32 + rand.nextInt(95)));
300     }
301 
302     byte[] valueBytes = v.toString().getBytes();
303     return valueBytes;
304   }
305 
306   public static final char randomReadableChar(Random rand) {
307     int i = rand.nextInt(26 * 2 + 10 + 1);
308     if (i < 26)
309       return (char) ('A' + i);
310     i -= 26;
311 
312     if (i < 26)
313       return (char) ('a' + i);
314     i -= 26;
315 
316     if (i < 10)
317       return (char) ('0' + i);
318     i -= 10;
319 
320     assert i == 0;
321     return '_';
322   }
323 
324   public static byte[] randomRowOrQualifier(Random rand) {
325     StringBuilder field = new StringBuilder();
326     int fieldLen = MIN_ROW_OR_QUALIFIER_LENGTH
327         + rand.nextInt(MAX_ROW_OR_QUALIFIER_LENGTH
328             - MIN_ROW_OR_QUALIFIER_LENGTH + 1);
329     for (int i = 0; i < fieldLen; ++i)
330       field.append(randomReadableChar(rand));
331     return field.toString().getBytes();
332   }
333 
334   public static KeyValue randomKeyValue(Random rand) {
335     return new KeyValue(randomRowOrQualifier(rand),
336         COLUMN_FAMILY_NAME.getBytes(), randomRowOrQualifier(rand),
337         randomValue(rand));
338   }
339 
340 
341 }
342