View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  
22  import static org.junit.Assert.assertTrue;
23  import static org.junit.Assert.fail;
24  import static org.mockito.Mockito.mock;
25  import static org.mockito.Mockito.when;
26  
27  import java.io.IOException;
28  import java.util.concurrent.atomic.AtomicLong;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.DroppedSnapshotException;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.Server;
39  import org.apache.hadoop.hbase.TableName;
40  import org.apache.hadoop.hbase.client.Durability;
41  import org.apache.hadoop.hbase.client.Put;
42  import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
43  import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
44  import org.apache.hadoop.hbase.testclassification.MediumTests;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
47  import org.apache.hadoop.hbase.util.Threads;
48  import org.apache.hadoop.hbase.wal.WAL;
49  import org.apache.hadoop.hbase.wal.WALProvider.Writer;
50  import org.junit.After;
51  import org.junit.Before;
52  import org.junit.Rule;
53  import org.junit.Test;
54  import org.junit.experimental.categories.Category;
55  import org.junit.rules.TestName;
56  import org.mockito.Mockito;
57  import org.mockito.exceptions.verification.WantedButNotInvoked;
58  
59  /**
60   * Testing sync/append failures.
61   * Copied from TestHRegion.
62   */
63  @Category({MediumTests.class})
64  public class TestFailedAppendAndSync {
65    private static final Log LOG = LogFactory.getLog(TestFailedAppendAndSync.class);
66    @Rule public TestName name = new TestName();
67  
68    private static final String COLUMN_FAMILY = "MyCF";
69    private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
70  
71    HRegion region = null;
72    // Do not run unit tests in parallel (? Why not?  It don't work?  Why not?  St.Ack)
73    private static HBaseTestingUtility TEST_UTIL;
74    public static Configuration CONF ;
75    private String dir;
76  
77    // Test names
78    protected TableName tableName;
79  
80    @Before
81    public void setup() throws IOException {
82      TEST_UTIL = HBaseTestingUtility.createLocalHTU();
83      CONF = TEST_UTIL.getConfiguration();
84      // Disable block cache.
85      CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
86      dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
87      tableName = TableName.valueOf(name.getMethodName());
88    }
89  
90    @After
91    public void tearDown() throws Exception {
92      EnvironmentEdgeManagerTestHelper.reset();
93      LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
94      TEST_UTIL.cleanupTestDir();
95    }
96  
97    String getName() {
98      return name.getMethodName();
99    }
100 
101   /**
102    * Reproduce locking up that happens when we get an exceptions appending and syncing.
103    * See HBASE-14317.
104    * First I need to set up some mocks for Server and RegionServerServices. I also need to
105    * set up a dodgy WAL that will throw an exception when we go to append to it.
106    */
107   @Test (timeout=300000)
108   public void testLockupAroundBadAssignSync() throws IOException {
109     final AtomicLong rolls = new AtomicLong(0);
110     // Dodgy WAL. Will throw exceptions when flags set.
111     class DodgyFSLog extends FSHLog {
112       volatile boolean throwSyncException = false;
113       volatile boolean throwAppendException = false;
114 
115       public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
116       throws IOException {
117         super(fs, root, logDir, conf);
118       }
119 
120       @Override
121       public byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException {
122         byte [][] regions = super.rollWriter(force);
123         rolls.getAndIncrement();
124         return regions;
125       }
126 
127       @Override
128       protected Writer createWriterInstance(Path path) throws IOException {
129         final Writer w = super.createWriterInstance(path);
130           return new Writer() {
131             @Override
132             public void close() throws IOException {
133               w.close();
134             }
135 
136             @Override
137             public void sync() throws IOException {
138               if (throwSyncException) {
139                 throw new IOException("FAKE! Failed to replace a bad datanode...");
140               }
141               w.sync();
142             }
143 
144             @Override
145             public void append(Entry entry) throws IOException {
146               if (throwAppendException) {
147                 throw new IOException("FAKE! Failed to replace a bad datanode...");
148               }
149               w.append(entry);
150             }
151 
152             @Override
153             public long getLength() throws IOException {
154               return w.getLength();
155               }
156             };
157           }
158       }
159 
160     // Make up mocked server and services.
161     Server server = mock(Server.class);
162     when(server.getConfiguration()).thenReturn(CONF);
163     when(server.isStopped()).thenReturn(false);
164     when(server.isAborted()).thenReturn(false);
165     RegionServerServices services = mock(RegionServerServices.class);
166     // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
167     // the test.
168     FileSystem fs = FileSystem.get(CONF);
169     Path rootDir = new Path(dir + getName());
170     DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
171     LogRoller logRoller = new LogRoller(server, services);
172     logRoller.addWAL(dodgyWAL);
173     logRoller.start();
174 
175     boolean threwOnSync = false;
176     boolean threwOnAppend = false;
177     boolean threwOnBoth = false;
178 
179     HRegion region = initHRegion(tableName, null, null, dodgyWAL);
180     try {
181       // Get some random bytes.
182       byte[] value = Bytes.toBytes(getName());
183       try {
184         // First get something into memstore
185         Put put = new Put(value);
186         put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
187         region.put(put);
188       } catch (IOException ioe) {
189         fail();
190       }
191       long rollsCount = rolls.get();
192       try {
193         dodgyWAL.throwAppendException = true;
194         dodgyWAL.throwSyncException = false;
195         Put put = new Put(value);
196         put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
197         region.put(put);
198       } catch (IOException ioe) {
199         threwOnAppend = true;
200       }
201       while (rollsCount == rolls.get()) Threads.sleep(100);
202       rollsCount = rolls.get();
203 
204       // When we get to here.. we should be ok. A new WAL has been put in place. There were no
205       // appends to sync. We should be able to continue.
206 
207       try {
208         dodgyWAL.throwAppendException = true;
209         dodgyWAL.throwSyncException = true;
210         Put put = new Put(value);
211         put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
212         region.put(put);
213       } catch (IOException ioe) {
214         threwOnBoth = true;
215       }
216       while (rollsCount == rolls.get()) Threads.sleep(100);
217 
218       // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
219       // to just continue.
220 
221       // So, should be no abort at this stage. Verify.
222       Mockito.verify(server, Mockito.atLeast(0)).
223         abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
224       try {
225         dodgyWAL.throwAppendException = false;
226         dodgyWAL.throwSyncException = true;
227         Put put = new Put(value);
228         put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
229         region.put(put);
230       } catch (IOException ioe) {
231         threwOnSync = true;
232       }
233       // An append in the WAL but the sync failed is a server abort condition. That is our
234       // current semantic. Verify. It takes a while for abort to be called. Just hang here till it
235       // happens. If it don't we'll timeout the whole test. That is fine.
236       while (true) {
237         try {
238           Mockito.verify(server, Mockito.atLeast(1)).
239             abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
240           break;
241         } catch (WantedButNotInvoked t) {
242           Threads.sleep(1);
243         }
244       }
245     } finally {
246       // To stop logRoller, its server has to say it is stopped.
247       Mockito.when(server.isStopped()).thenReturn(true);
248       if (logRoller != null) logRoller.interrupt();
249       if (region != null) {
250         try {
251           region.close(true);
252         } catch (DroppedSnapshotException e) {
253           LOG.info("On way out; expected!", e);
254         }
255       }
256       if (dodgyWAL != null) dodgyWAL.close();
257       assertTrue("The regionserver should have thrown an exception", threwOnBoth);
258       assertTrue("The regionserver should have thrown an exception", threwOnAppend);
259       assertTrue("The regionserver should have thrown an exception", threwOnSync);
260     }
261   }
262 
263   /**
264    * @return A region on which you must call
265    *         {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} when done.
266    */
267   public HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, WAL wal)
268   throws IOException {
269     return TEST_UTIL.createLocalHRegion(tableName.getName(), startKey, stopKey,
270       getName(), CONF, false, Durability.SYNC_WAL,
271       wal, COLUMN_FAMILY_BYTES);
272   }
273 }