View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.fs.ChecksumException;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.fs.Path;
29  import org.apache.hadoop.hbase.util.ChecksumType;
30  import org.apache.hadoop.util.DataChecksum;
31  
32  /**
33   * Utility methods to compute and validate checksums.
34   */
35  @InterfaceAudience.Private
36  public class ChecksumUtil {
37    public static final Log LOG = LogFactory.getLog(ChecksumUtil.class);
38  
39    /** This is used to reserve space in a byte buffer */
40    private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
41  
42    /**
43     * This is used by unit tests to make checksum failures throw an
44     * exception instead of returning null. Returning a null value from
45     * checksum validation will cause the higher layer to retry that
46     * read with hdfs-level checksums. Instead, we would like checksum
47     * failures to cause the entire unit test to fail.
48     */
49    private static boolean generateExceptions = false;
50  
51    /**
52     * Generates a checksum for all the data in indata. The checksum is
53     * written to outdata.
54     * @param indata input data stream
55     * @param startOffset starting offset in the indata stream from where to
56     *                    compute checkums from
57     * @param endOffset ending offset in the indata stream upto
58     *                   which checksums needs to be computed
59     * @param outdata the output buffer where checksum values are written
60     * @param outOffset the starting offset in the outdata where the
61     *                  checksum values are written
62     * @param checksumType type of checksum
63     * @param bytesPerChecksum number of bytes per checksum value
64     */
65    static void generateChecksums(byte[] indata, int startOffset, int endOffset,
66      byte[] outdata, int outOffset, ChecksumType checksumType,
67      int bytesPerChecksum) throws IOException {
68  
69      if (checksumType == ChecksumType.NULL) {
70        return; // No checksum for this block.
71      }
72  
73      DataChecksum checksum = DataChecksum.newDataChecksum(
74          checksumType.getDataChecksumType(), bytesPerChecksum);
75  
76      checksum.calculateChunkedSums(
77         ByteBuffer.wrap(indata, startOffset, endOffset - startOffset),
78         ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset));
79    }
80  
81    /**
82     * Validates that the data in the specified HFileBlock matches the
83     * checksum.  Generates the checksum for the data and
84     * then validate that it matches the value stored in the header.
85     * If there is a checksum mismatch, then return false. Otherwise
86     * return true.
87     * The header is extracted from the specified HFileBlock while the
88     * data-to-be-verified is extracted from 'data'.
89     */
90    static boolean validateBlockChecksum(Path path, long offset, HFileBlock block,
91      byte[] data, int hdrSize) throws IOException {
92  
93      // If this is an older version of the block that does not have
94      // checksums, then return false indicating that checksum verification
95      // did not succeed. Actually, this methiod should never be called
96      // when the minorVersion is 0, thus this is a defensive check for a
97      // cannot-happen case. Since this is a cannot-happen case, it is
98      // better to return false to indicate a checksum validation failure.
99      if (!block.getHFileContext().isUseHBaseChecksum()) {
100       return false;
101     }
102 
103     // Get a checksum object based on the type of checksum that is
104     // set in the HFileBlock header. A ChecksumType.NULL indicates that
105     // the caller is not interested in validating checksums, so we
106     // always return true.
107     ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
108     if (cktype == ChecksumType.NULL) {
109       return true; // No checksum validations needed for this block.
110     }
111 
112     // read in the stored value of the checksum size from the header.
113     int bytesPerChecksum = block.getBytesPerChecksum();
114 
115     DataChecksum dataChecksum = DataChecksum.newDataChecksum(
116         cktype.getDataChecksumType(), bytesPerChecksum);
117     assert dataChecksum != null;
118     int sizeWithHeader =  block.getOnDiskDataSizeWithHeader();
119     if (LOG.isTraceEnabled()) {
120       LOG.info("dataLength=" + data.length
121           + ", sizeWithHeader=" + sizeWithHeader
122           + ", checksumType=" + cktype.getName()
123           + ", file=" + path.toString()
124           + ", offset=" + offset
125           + ", headerSize=" + hdrSize
126           + ", bytesPerChecksum=" + bytesPerChecksum);
127     }
128     try {
129       dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader),
130           ByteBuffer.wrap(data, sizeWithHeader, data.length - sizeWithHeader),
131                           path.toString(), 0);
132     } catch (ChecksumException e) {
133       return false;
134     }
135     return true;  // checksum is valid
136   }
137 
138   /**
139    * Returns the number of bytes needed to store the checksums for
140    * a specified data size
141    * @param datasize number of bytes of data
142    * @param bytesPerChecksum number of bytes in a checksum chunk
143    * @return The number of bytes needed to store the checksum values
144    */
145   static long numBytes(long datasize, int bytesPerChecksum) {
146     return numChunks(datasize, bytesPerChecksum) *
147                      HFileBlock.CHECKSUM_SIZE;
148   }
149 
150   /**
151    * Returns the number of checksum chunks needed to store the checksums for
152    * a specified data size
153    * @param datasize number of bytes of data
154    * @param bytesPerChecksum number of bytes in a checksum chunk
155    * @return The number of checksum chunks
156    */
157   static long numChunks(long datasize, int bytesPerChecksum) {
158     long numChunks = datasize/bytesPerChecksum;
159     if (datasize % bytesPerChecksum != 0) {
160       numChunks++;
161     }
162     return numChunks;
163   }
164 
165   /**
166    * Write dummy checksums to the end of the specified bytes array
167    * to reserve space for writing checksums later
168    * @param baos OutputStream to write dummy checkum values
169    * @param numBytes Number of bytes of data for which dummy checksums
170    *                 need to be generated
171    * @param bytesPerChecksum Number of bytes per checksum value
172    */
173   static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
174     int numBytes, int bytesPerChecksum) throws IOException {
175     long numChunks = numChunks(numBytes, bytesPerChecksum);
176     long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
177     while (bytesLeft > 0) {
178       long count = Math.min(bytesLeft, DUMMY_VALUE.length);
179       baos.write(DUMMY_VALUE, 0, (int)count);
180       bytesLeft -= count;
181     }
182   }
183 
184   /**
185    * Mechanism to throw an exception in case of hbase checksum
186    * failure. This is used by unit tests only.
187    * @param value Setting this to true will cause hbase checksum
188    *              verification failures to generate exceptions.
189    */
190   public static void generateExceptionForChecksumFailureForTest(boolean value) {
191     generateExceptions = value;
192   }
193 }
194