View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.procedure;
20  
21  import static org.junit.Assert.assertEquals;
22  
23  import java.io.IOException;
24  import java.util.Arrays;
25  import java.util.Collection;
26  
27  import org.apache.hadoop.hbase.HBaseTestingUtility;
28  import org.apache.hadoop.hbase.MiniHBaseCluster;
29  import org.apache.hadoop.hbase.TableName;
30  import org.apache.hadoop.hbase.client.ResultScanner;
31  import org.apache.hadoop.hbase.client.Scan;
32  import org.apache.hadoop.hbase.client.Table;
33  import org.apache.hadoop.hbase.master.AssignmentManager;
34  import org.apache.hadoop.hbase.master.HMaster;
35  import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
36  import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
37  import org.apache.hadoop.hbase.regionserver.HRegionServer;
38  import org.apache.hadoop.hbase.testclassification.LargeTests;
39  import org.apache.hadoop.hbase.util.Threads;
40  import org.junit.After;
41  import org.junit.Before;
42  import org.junit.Test;
43  import org.junit.experimental.categories.Category;
44  import org.junit.runner.RunWith;
45  import org.junit.runners.Parameterized;
46  import org.junit.runners.Parameterized.Parameters;
47  
48  /**
49   * It used to first run with DLS and then DLR but HBASE-12751 broke DLR so we disabled it here.
50   */
51  @Category(LargeTests.class)
52  @RunWith(Parameterized.class)
53  public class TestServerCrashProcedure {
54    // Ugly junit parameterization. I just want to pass false and then true but seems like needs
55    // to return sequences of two-element arrays.
56    @Parameters(name = "{index}: setting={0}")
57    public static Collection<Object []> data() {
58      return Arrays.asList(new Object[] [] {{Boolean.FALSE, -1}});
59    }
60  
61    private final HBaseTestingUtility util = new HBaseTestingUtility();
62  
63    @Before
64    public void setup() throws Exception {
65      this.util.startMiniCluster(3);
66      ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(
67        this.util.getHBaseCluster().getMaster().getMasterProcedureExecutor(), false);
68    }
69  
70    @After
71    public void tearDown() throws Exception {
72      MiniHBaseCluster cluster = this.util.getHBaseCluster();
73      HMaster master = cluster == null? null: cluster.getMaster();
74      if (master != null && master.getMasterProcedureExecutor() != null) {
75        ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(master.getMasterProcedureExecutor(),
76          false);
77      }
78      this.util.shutdownMiniCluster();
79    }
80  
81    public TestServerCrashProcedure(final Boolean b, final int ignore) {
82      this.util.getConfiguration().setBoolean("hbase.master.distributed.log.replay", b);
83      this.util.getConfiguration().setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
84    }
85  
86    /**
87     * Run server crash procedure steps twice to test idempotency and that we are persisting all
88     * needed state.
89     * @throws Exception
90     */
91    @Test(timeout = 300000)
92    public void testRecoveryAndDoubleExecutionOnline() throws Exception {
93      final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecutionOnline");
94      this.util.createTable(tableName, HBaseTestingUtility.COLUMNS,
95        HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE);
96      try (Table t = this.util.getConnection().getTable(tableName)) {
97        // Load the table with a bit of data so some logs to split and some edits in each region.
98        this.util.loadTable(t, HBaseTestingUtility.COLUMNS[0]);
99        int count = countRows(t);
100       // Run the procedure executor outside the master so we can mess with it. Need to disable
101       // Master's running of the server crash processing.
102       HMaster master = this.util.getHBaseCluster().getMaster();
103       final ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
104       master.setServerCrashProcessingEnabled(false);
105       // Kill a server. Master will notice but do nothing other than add it to list of dead servers.
106       HRegionServer hrs = this.util.getHBaseCluster().getRegionServer(0);
107       boolean carryingMeta = (master.getAssignmentManager().isCarryingMeta(hrs.getServerName()) ==
108           AssignmentManager.ServerHostRegion.HOSTING_REGION);
109       this.util.getHBaseCluster().killRegionServer(hrs.getServerName());
110       hrs.join();
111       // Wait until the expiration of the server has arrived at the master. We won't process it
112       // by queuing a ServerCrashProcedure because we have disabled crash processing... but wait
113       // here so ServerManager gets notice and adds expired server to appropriate queues.
114       while (!master.getServerManager().isServerDead(hrs.getServerName())) Threads.sleep(10);
115       // Now, reenable processing else we can't get a lock on the ServerCrashProcedure.
116       master.setServerCrashProcessingEnabled(true);
117       // Do some of the master processing of dead servers so when SCP runs, it has expected 'state'.
118       master.getServerManager().moveFromOnelineToDeadServers(hrs.getServerName());
119       // Enable test flags and then queue the crash procedure.
120       ProcedureTestingUtility.waitNoProcedureRunning(procExec);
121       ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
122       long procId =
123         procExec.submitProcedure(new ServerCrashProcedure(hrs.getServerName(), true, carryingMeta));
124       // Now run through the procedure twice crashing the executor on each step...
125       MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId);
126       // Assert all data came back.
127       assertEquals(count, countRows(t));
128     }
129   }
130 
131   int countRows(final Table t) throws IOException {
132     int count = 0;
133     try (ResultScanner scanner = t.getScanner(new Scan())) {
134       while(scanner.next() != null) count++;
135     }
136     return count;
137   }
138 }