View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver.wal;
19  
20  import static org.junit.Assert.assertTrue;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.fs.FileSystem;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.HBaseTestingUtility;
31  import org.apache.hadoop.hbase.HColumnDescriptor;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.KeyValue;
36  import org.apache.hadoop.hbase.MiniHBaseCluster;
37  import org.apache.hadoop.hbase.TableName;
38  import org.apache.hadoop.hbase.client.Admin;
39  import org.apache.hadoop.hbase.client.HTable;
40  import org.apache.hadoop.hbase.client.Put;
41  import org.apache.hadoop.hbase.client.Table;
42  import org.apache.hadoop.hbase.regionserver.HRegionServer;
43  import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
44  import org.apache.hadoop.hbase.testclassification.MediumTests;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.apache.hadoop.hbase.util.FSUtils;
47  import org.apache.hadoop.hbase.wal.DefaultWALProvider;
48  import org.apache.hadoop.hbase.wal.WAL;
49  import org.apache.hadoop.hbase.wal.WALFactory;
50  import org.apache.hadoop.hbase.wal.WALKey;
51  import org.apache.hadoop.hbase.wal.WALSplitter;
52  import org.apache.hadoop.hdfs.MiniDFSCluster;
53  import org.junit.After;
54  import org.junit.Assert;
55  import org.junit.Before;
56  import org.junit.BeforeClass;
57  import org.junit.Test;
58  import org.junit.experimental.categories.Category;
59  
60  /**
61   * Tests for conditions that should trigger RegionServer aborts when
62   * rolling the current WAL fails.
63   */
64  @Category(MediumTests.class)
65  public class TestLogRollAbort {
66    private static final Log LOG = LogFactory.getLog(TestLogRolling.class);
67    private static MiniDFSCluster dfsCluster;
68    private static Admin admin;
69    private static MiniHBaseCluster cluster;
70    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
71  
72    /* For the split-then-roll test */
73    private static final Path HBASEDIR = new Path("/hbase");
74    private static final Path OLDLOGDIR = new Path(HBASEDIR, HConstants.HREGION_OLDLOGDIR_NAME);
75  
76    // Need to override this setup so we can edit the config before it gets sent
77    // to the HDFS & HBase cluster startup.
78    @BeforeClass
79    public static void setUpBeforeClass() throws Exception {
80      // Tweak default timeout values down for faster recovery
81      TEST_UTIL.getConfiguration().setInt(
82          "hbase.regionserver.logroll.errors.tolerated", 2);
83      TEST_UTIL.getConfiguration().setInt("hbase.rpc.timeout", 10 * 1000);
84  
85      // Increase the amount of time between client retries
86      TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 5 * 1000);
87  
88      // make sure log.hflush() calls syncFs() to open a pipeline
89      TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
90      // lower the namenode & datanode heartbeat so the namenode
91      // quickly detects datanode failures
92      TEST_UTIL.getConfiguration().setInt("dfs.namenode.heartbeat.recheck-interval", 5000);
93      TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1);
94      // the namenode might still try to choose the recently-dead datanode
95      // for a pipeline, so try to a new pipeline multiple times
96      TEST_UTIL.getConfiguration().setInt("dfs.client.block.write.retries", 10);
97    }
98  
99    private Configuration conf;
100   private FileSystem fs;
101 
102   @Before
103   public void setUp() throws Exception {
104     TEST_UTIL.startMiniCluster(2);
105 
106     cluster = TEST_UTIL.getHBaseCluster();
107     dfsCluster = TEST_UTIL.getDFSCluster();
108     admin = TEST_UTIL.getHBaseAdmin();
109     conf = TEST_UTIL.getConfiguration();
110     fs = TEST_UTIL.getDFSCluster().getFileSystem();
111 
112     // disable region rebalancing (interferes with log watching)
113     cluster.getMaster().balanceSwitch(false);
114     FSUtils.setRootDir(conf, HBASEDIR);
115   }
116 
117   @After
118   public void tearDown() throws Exception {
119     TEST_UTIL.shutdownMiniCluster();
120   }
121 
122   /**
123    * Tests that RegionServer aborts if we hit an error closing the WAL when
124    * there are unsynced WAL edits.  See HBASE-4282.
125    */
126   @Test
127   public void testRSAbortWithUnflushedEdits() throws Exception {
128     LOG.info("Starting testRSAbortWithUnflushedEdits()");
129 
130     // When the hbase:meta table can be opened, the region servers are running
131     new HTable(TEST_UTIL.getConfiguration(),
132       TableName.META_TABLE_NAME).close();
133 
134     // Create the test table and open it
135     TableName tableName = TableName.valueOf(this.getClass().getSimpleName());
136     HTableDescriptor desc = new HTableDescriptor(tableName);
137     desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
138 
139     admin.createTable(desc);
140     Table table = new HTable(TEST_UTIL.getConfiguration(), desc.getTableName());
141     try {
142 
143       HRegionServer server = TEST_UTIL.getRSForFirstRegionInTable(tableName);
144       WAL log = server.getWAL(null);
145 
146       // don't run this test without append support (HDFS-200 & HDFS-142)
147       assertTrue("Need append support for this test",
148         FSUtils.isAppendSupported(TEST_UTIL.getConfiguration()));
149 
150       Put p = new Put(Bytes.toBytes("row2001"));
151       p.add(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2001));
152       table.put(p);
153 
154       log.sync();
155 
156       p = new Put(Bytes.toBytes("row2002"));
157       p.add(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2002));
158       table.put(p);
159 
160       dfsCluster.restartDataNodes();
161       LOG.info("Restarted datanodes");
162 
163       try {
164         log.rollWriter(true);
165       } catch (FailedLogCloseException flce) {
166         // Expected exception.  We used to expect that there would be unsynced appends but this
167         // not reliable now that sync plays a roll in wall rolling.  The above puts also now call
168         // sync.
169       } catch (Throwable t) {
170         LOG.fatal("FAILED TEST: Got wrong exception", t);
171       }
172     } finally {
173       table.close();
174     }
175   }
176 
177   /**
178    * Tests the case where a RegionServer enters a GC pause,
179    * comes back online after the master declared it dead and started to split.
180    * Want log rolling after a master split to fail. See HBASE-2312.
181    */
182   @Test (timeout=300000)
183   public void testLogRollAfterSplitStart() throws IOException {
184     LOG.info("Verify wal roll after split starts will fail.");
185     String logName = "testLogRollAfterSplitStart";
186     Path thisTestsDir = new Path(HBASEDIR, DefaultWALProvider.getWALDirectoryName(logName));
187     final WALFactory wals = new WALFactory(conf, null, logName);
188 
189     try {
190       // put some entries in an WAL
191       TableName tableName =
192           TableName.valueOf(this.getClass().getName());
193       HRegionInfo regioninfo = new HRegionInfo(tableName,
194           HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
195       final WAL log = wals.getWAL(regioninfo.getEncodedNameAsBytes());
196       MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl(1);
197 
198       final int total = 20;
199       for (int i = 0; i < total; i++) {
200         WALEdit kvs = new WALEdit();
201         kvs.add(new KeyValue(Bytes.toBytes(i), tableName.getName(), tableName.getName()));
202         HTableDescriptor htd = new HTableDescriptor(tableName);
203         htd.addFamily(new HColumnDescriptor("column"));
204         log.append(htd, regioninfo, new WALKey(regioninfo.getEncodedNameAsBytes(), tableName,
205             System.currentTimeMillis(), mvcc), kvs, true);
206       }
207       // Send the data to HDFS datanodes and close the HDFS writer
208       log.sync();
209       ((FSHLog) log).replaceWriter(((FSHLog)log).getOldPath(), null, null, null);
210 
211       /* code taken from MasterFileSystem.getLogDirs(), which is called from MasterFileSystem.splitLog()
212        * handles RS shutdowns (as observed by the splitting process)
213        */
214       // rename the directory so a rogue RS doesn't create more WALs
215       Path rsSplitDir = thisTestsDir.suffix(DefaultWALProvider.SPLITTING_EXT);
216       if (!fs.rename(thisTestsDir, rsSplitDir)) {
217         throw new IOException("Failed fs.rename for log split: " + thisTestsDir);
218       }
219       LOG.debug("Renamed region directory: " + rsSplitDir);
220 
221       LOG.debug("Processing the old log files.");
222       WALSplitter.split(HBASEDIR, rsSplitDir, OLDLOGDIR, fs, conf, wals);
223 
224       LOG.debug("Trying to roll the WAL.");
225       try {
226         log.rollWriter();
227         Assert.fail("rollWriter() did not throw any exception.");
228       } catch (IOException ioe) {
229         if (ioe.getCause() instanceof FileNotFoundException) {
230           LOG.info("Got the expected exception: ", ioe.getCause());
231         } else {
232           Assert.fail("Unexpected exception: " + ioe);
233         }
234       }
235     } finally {
236       wals.close();
237       if (fs.exists(thisTestsDir)) {
238         fs.delete(thisTestsDir, true);
239       }
240     }
241   }
242 }