View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.client;
20  
21  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
28  import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
29  import static org.junit.Assert.*;
30  
31  import java.util.Arrays;
32  import java.util.Collection;
33  import java.util.List;
34  import java.util.concurrent.ExecutorService;
35  
36  import javax.annotation.Nullable;
37  
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.hbase.Abortable;
42  import org.apache.hadoop.hbase.CategoryBasedTimeout;
43  import org.apache.hadoop.hbase.HBaseTestingUtility;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HRegionInfo;
46  import org.apache.hadoop.hbase.HRegionLocation;
47  import org.apache.hadoop.hbase.MetaTableAccessor;
48  import org.apache.hadoop.hbase.RegionLocations;
49  import org.apache.hadoop.hbase.ServerName;
50  import org.apache.hadoop.hbase.TableName;
51  import org.apache.hadoop.hbase.TableNotFoundException;
52  import org.apache.hadoop.hbase.Waiter;
53  import org.apache.hadoop.hbase.client.ConnectionManager.HConnectionImplementation;
54  import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore;
55  import org.apache.hadoop.hbase.testclassification.LargeTests;
56  import org.apache.hadoop.hbase.util.Bytes;
57  import org.apache.hadoop.hbase.util.HBaseFsck;
58  import org.apache.hadoop.hbase.util.HBaseFsckRepair;
59  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
60  import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
61  import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
62  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
63  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
64  import org.junit.After;
65  import org.junit.Before;
66  import org.junit.Rule;
67  import org.junit.Test;
68  import org.junit.experimental.categories.Category;
69  import org.junit.rules.TestRule;
70  
71  /**
72   * Tests the scenarios where replicas are enabled for the meta table
73   */
74  @Category(LargeTests.class)
75  public class TestMetaWithReplicas {
76    @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
77        withTimeout(this.getClass()).
78        withLookingForStuckThread(true).
79        build();
80    private static final Log LOG = LogFactory.getLog(TestMetaWithReplicas.class);
81    private final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
82  
83    @Before
84    public void setup() throws Exception {
85      TEST_UTIL.getConfiguration().setInt("zookeeper.session.timeout", 30000);
86      TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, 3);
87      TEST_UTIL.getConfiguration().setInt(
88          StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, 1000);
89      TEST_UTIL.startMiniCluster(3);
90      // disable the balancer
91      LoadBalancerTracker l = new LoadBalancerTracker(TEST_UTIL.getZooKeeperWatcher(),
92          new Abortable() {
93        boolean aborted = false;
94        @Override
95        public boolean isAborted() {
96          return aborted;
97        }
98        @Override
99        public void abort(String why, Throwable e) {
100         aborted = true;
101       }
102     });
103     l.setBalancerOn(false);
104     for (int replicaId = 1; replicaId < 3; replicaId ++) {
105       HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO,
106         replicaId);
107       TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(h);
108     }
109     LOG.debug("All meta replicas assigned");
110   }
111 
112   @After
113   public void tearDown() throws Exception {
114     TEST_UTIL.shutdownMiniCluster();
115   }
116 
117   @Test
118   public void testMetaHTDReplicaCount() throws Exception {
119     assertTrue(TEST_UTIL.getHBaseAdmin().getTableDescriptor(TableName.META_TABLE_NAME)
120         .getRegionReplication() == 3);
121   }
122 
123   @Test
124   public void testZookeeperNodesForReplicas() throws Exception {
125     // Checks all the znodes exist when meta's replicas are enabled
126     ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
127     Configuration conf = TEST_UTIL.getConfiguration();
128     String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
129         HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
130     String primaryMetaZnode = ZKUtil.joinZNode(baseZNode,
131         conf.get("zookeeper.znode.metaserver", "meta-region-server"));
132     // check that the data in the znode is parseable (this would also mean the znode exists)
133     byte[] data = ZKUtil.getData(zkw, primaryMetaZnode);
134     ServerName.parseFrom(data);
135     for (int i = 1; i < 3; i++) {
136       String secZnode = ZKUtil.joinZNode(baseZNode,
137           conf.get("zookeeper.znode.metaserver", "meta-region-server") + "-" + i);
138       String str = zkw.getZNodeForReplica(i);
139       assertTrue(str.equals(secZnode));
140       // check that the data in the znode is parseable (this would also mean the znode exists)
141       data = ZKUtil.getData(zkw, secZnode);
142       ServerName.parseFrom(data);
143     }
144   }
145 
146   @Test
147   public void testShutdownHandling() throws Exception {
148     // This test creates a table, flushes the meta (with 3 replicas), kills the
149     // server holding the primary meta replica. Then it does a put/get into/from
150     // the test table. The put/get operations would use the replicas to locate the
151     // location of the test table's region
152     shutdownMetaAndDoValidations(TEST_UTIL);
153   }
154 
155   public static void shutdownMetaAndDoValidations(HBaseTestingUtility util) throws Exception {
156     // This test creates a table, flushes the meta (with 3 replicas), kills the
157     // server holding the primary meta replica. Then it does a put/get into/from
158     // the test table. The put/get operations would use the replicas to locate the
159     // location of the test table's region
160     ZooKeeperWatcher zkw = util.getZooKeeperWatcher();
161     Configuration conf = util.getConfiguration();
162     conf.setBoolean(HConstants.USE_META_REPLICAS, true);
163 
164     String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
165         HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
166     String primaryMetaZnode = ZKUtil.joinZNode(baseZNode,
167         conf.get("zookeeper.znode.metaserver", "meta-region-server"));
168     byte[] data = ZKUtil.getData(zkw, primaryMetaZnode);
169     ServerName primary = ServerName.parseFrom(data);
170 
171     byte[] TABLE = Bytes.toBytes("testShutdownHandling");
172     byte[][] FAMILIES = new byte[][] { Bytes.toBytes("foo") };
173     if (util.getHBaseAdmin().tableExists(TABLE)) {
174       util.getHBaseAdmin().disableTable(TABLE);
175       util.getHBaseAdmin().deleteTable(TABLE);
176     }
177     ServerName master = null;
178     try (Connection c = ConnectionFactory.createConnection(util.getConfiguration());) {
179       try (Table htable = util.createTable(TABLE, FAMILIES, conf);) {
180         util.getHBaseAdmin().flush(TableName.META_TABLE_NAME);
181         Thread.sleep(conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD,
182            30000) * 6);
183         List<HRegionInfo> regions = MetaTableAccessor.getTableRegions(zkw, c,
184           TableName.valueOf(TABLE));
185         HRegionLocation hrl = MetaTableAccessor.getRegionLocation(c, regions.get(0));
186         // Ensure that the primary server for test table is not the same one as the primary
187         // of the meta region since we will be killing the srv holding the meta's primary...
188         // We want to be able to write to the test table even when the meta is not present ..
189         // If the servers are the same, then move the test table's region out of the server
190         // to another random server
191         if (hrl.getServerName().equals(primary)) {
192           util.getHBaseAdmin().move(hrl.getRegionInfo().getEncodedNameAsBytes(), null);
193           // wait for the move to complete
194           do {
195             Thread.sleep(10);
196             hrl = MetaTableAccessor.getRegionLocation(c, regions.get(0));
197           } while (primary.equals(hrl.getServerName()));
198           util.getHBaseAdmin().flush(TableName.META_TABLE_NAME);
199           Thread.sleep(conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD,
200              30000) * 3);
201         }
202         master = util.getHBaseClusterInterface().getClusterStatus().getMaster();
203         // kill the master so that regionserver recovery is not triggered at all
204         // for the meta server
205         util.getHBaseClusterInterface().stopMaster(master);
206         util.getHBaseClusterInterface().waitForMasterToStop(master, 60000);
207         if (!master.equals(primary)) {
208           util.getHBaseClusterInterface().killRegionServer(primary);
209           util.getHBaseClusterInterface().waitForRegionServerToStop(primary, 60000);
210         }
211         ((ClusterConnection)c).clearRegionCache();
212       }
213       Get get = null;
214       Result r = null;
215       byte[] row = "test".getBytes();
216       try (Table htable = c.getTable(TableName.valueOf(TABLE));) {
217         Put put = new Put(row);
218         put.add("foo".getBytes(), row, row);
219         BufferedMutator m = c.getBufferedMutator(TableName.valueOf(TABLE));
220         m.mutate(put);
221         m.flush();
222         // Try to do a get of the row that was just put
223         get = new Get(row);
224         r = htable.get(get);
225         assertTrue(Arrays.equals(r.getRow(), row));
226         // now start back the killed servers and disable use of replicas. That would mean
227         // calls go to the primary
228         util.getHBaseClusterInterface().startMaster(master.getHostname(), 0);
229         util.getHBaseClusterInterface().startRegionServer(primary.getHostname(), 0);
230         util.getHBaseClusterInterface().waitForActiveAndReadyMaster();
231         ((ClusterConnection)c).clearRegionCache();
232       }
233       conf.setBoolean(HConstants.USE_META_REPLICAS, false);
234       try (Table htable = c.getTable(TableName.valueOf(TABLE));) {
235         r = htable.get(get);
236         assertTrue(Arrays.equals(r.getRow(), row));
237       }
238     }
239   }
240 
241   @Test
242   public void testMetaLookupThreadPoolCreated() throws Exception {
243     byte[] TABLE = Bytes.toBytes("testMetaLookupThreadPoolCreated");
244     byte[][] FAMILIES = new byte[][] { Bytes.toBytes("foo") };
245     if (TEST_UTIL.getHBaseAdmin().tableExists(TABLE)) {
246       TEST_UTIL.getHBaseAdmin().disableTable(TABLE);
247       TEST_UTIL.getHBaseAdmin().deleteTable(TABLE);
248     }
249     try (Table htable =
250         TEST_UTIL.createTable(TABLE, FAMILIES, TEST_UTIL.getConfiguration());) {
251       byte[] row = "test".getBytes();
252       HConnectionImplementation c = ((HConnectionImplementation)((HTable)htable).connection);
253       // check that metalookup pool would get created
254       c.relocateRegion(TABLE, row);
255       ExecutorService ex = c.getCurrentMetaLookupPool();
256       assert(ex != null);
257     }
258   }
259 
260   @Test
261   public void testChangingReplicaCount() throws Exception {
262     // tests changing the replica count across master restarts
263     // reduce the replica count from 3 to 2
264     stopMasterAndValidateReplicaCount(3, 2);
265     // increase the replica count from 2 to 3
266     stopMasterAndValidateReplicaCount(2, 3);
267   }
268 
269   private void stopMasterAndValidateReplicaCount(int originalReplicaCount, int newReplicaCount)
270       throws Exception {
271     ServerName sn = TEST_UTIL.getHBaseClusterInterface().getClusterStatus().getMaster();
272     TEST_UTIL.getHBaseClusterInterface().stopMaster(sn);
273     TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(sn, 60000);
274     List<String> metaZnodes = TEST_UTIL.getZooKeeperWatcher().getMetaReplicaNodes();
275     assert(metaZnodes.size() == originalReplicaCount); //we should have what was configured before
276     TEST_UTIL.getHBaseClusterInterface().getConf().setInt(HConstants.META_REPLICAS_NUM,
277         newReplicaCount);
278     TEST_UTIL.getHBaseClusterInterface().startMaster(sn.getHostname(), 0);
279     TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster();
280     int count = 0;
281     do {
282       metaZnodes = TEST_UTIL.getZooKeeperWatcher().getMetaReplicaNodes();
283       Thread.sleep(10);
284       count++;
285       // wait for the count to be different from the originalReplicaCount. When the
286       // replica count is reduced, that will happen when the master unassigns excess
287       // replica, and deletes the excess znodes
288     } while (metaZnodes.size() == originalReplicaCount && count < 1000);
289     assert(metaZnodes.size() == newReplicaCount);
290     // also check if hbck returns without errors
291     TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM,
292         newReplicaCount);
293     HBaseFsck hbck = HbckTestingUtil.doFsck(TEST_UTIL.getConfiguration(), false);
294     HbckTestingUtil.assertNoErrors(hbck);
295   }
296 
297   @Test
298   public void testHBaseFsckWithMetaReplicas() throws Exception {
299     HBaseFsck hbck = HbckTestingUtil.doFsck(TEST_UTIL.getConfiguration(), false);
300     HbckTestingUtil.assertNoErrors(hbck);
301   }
302 
303   @Test
304   public void testHBaseFsckWithFewerMetaReplicas() throws Exception {
305     ClusterConnection c = (ClusterConnection)ConnectionFactory.createConnection(
306         TEST_UTIL.getConfiguration());
307     RegionLocations rl = c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW,
308         false, false);
309     HBaseFsckRepair.closeRegionSilentlyAndWait(c,
310         rl.getRegionLocation(1).getServerName(), rl.getRegionLocation(1).getRegionInfo());
311     // check that problem exists
312     HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false);
313     assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN,ERROR_CODE.NO_META_REGION});
314     // fix the problem
315     hbck = doFsck(TEST_UTIL.getConfiguration(), true);
316     // run hbck again to make sure we don't see any errors
317     hbck = doFsck(TEST_UTIL.getConfiguration(), false);
318     assertErrors(hbck, new ERROR_CODE[]{});
319   }
320 
321   @Test
322   public void testHBaseFsckWithFewerMetaReplicaZnodes() throws Exception {
323     ClusterConnection c = (ClusterConnection)ConnectionFactory.createConnection(
324         TEST_UTIL.getConfiguration());
325     RegionLocations rl = c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW,
326         false, false);
327     HBaseFsckRepair.closeRegionSilentlyAndWait(c,
328         rl.getRegionLocation(2).getServerName(), rl.getRegionLocation(2).getRegionInfo());
329     ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
330     ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(2));
331     // check that problem exists
332     HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false);
333     assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN,ERROR_CODE.NO_META_REGION});
334     // fix the problem
335     hbck = doFsck(TEST_UTIL.getConfiguration(), true);
336     // run hbck again to make sure we don't see any errors
337     hbck = doFsck(TEST_UTIL.getConfiguration(), false);
338     assertErrors(hbck, new ERROR_CODE[]{});
339   }
340 
341   @Test
342   public void testAccessingUnknownTables() throws Exception {
343     Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
344     conf.setBoolean(HConstants.USE_META_REPLICAS, true);
345     Table table = TEST_UTIL.getConnection().getTable(TableName.valueOf("RandomTable"));
346     Get get = new Get(Bytes.toBytes("foo"));
347     try {
348       table.get(get);
349     } catch (TableNotFoundException t) {
350       return;
351     }
352     fail("Expected TableNotFoundException");
353   }
354 
355   @Test
356   public void testMetaAddressChange() throws Exception {
357     // checks that even when the meta's location changes, the various
358     // caches update themselves. Uses the master operations to test
359     // this
360     Configuration conf = TEST_UTIL.getConfiguration();
361     ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
362     String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
363         HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
364     String primaryMetaZnode = ZKUtil.joinZNode(baseZNode,
365         conf.get("zookeeper.znode.metaserver", "meta-region-server"));
366     // check that the data in the znode is parseable (this would also mean the znode exists)
367     byte[] data = ZKUtil.getData(zkw, primaryMetaZnode);
368     ServerName currentServer = ServerName.parseFrom(data);
369     Collection<ServerName> liveServers = TEST_UTIL.getHBaseAdmin().getClusterStatus().getServers();
370     ServerName moveToServer = null;
371     for (ServerName s : liveServers) {
372       if (!currentServer.equals(s)) {
373         moveToServer = s;
374       }
375     }
376     assert(moveToServer != null);
377     String tableName = "randomTable5678";
378     TEST_UTIL.createTable(TableName.valueOf(tableName), "f");
379     assertTrue(TEST_UTIL.getHBaseAdmin().tableExists(tableName));
380     TEST_UTIL.getHBaseAdmin().move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
381         Bytes.toBytes(moveToServer.getServerName()));
382     int i = 0;
383     do {
384       Thread.sleep(10);
385       data = ZKUtil.getData(zkw, primaryMetaZnode);
386       currentServer = ServerName.parseFrom(data);
387       i++;
388     } while (!moveToServer.equals(currentServer) && i < 1000); //wait for 10 seconds overall
389     assert(i != 1000);
390     TEST_UTIL.getHBaseAdmin().disableTable("randomTable5678");
391     assertTrue(TEST_UTIL.getHBaseAdmin().isTableDisabled("randomTable5678"));
392   }
393 
394   @Test
395   public void testShutdownOfReplicaHolder() throws Exception {
396     // checks that the when the server holding meta replica is shut down, the meta replica
397     // can be recovered
398     RegionLocations rl = ConnectionManager.getConnectionInternal(TEST_UTIL.getConfiguration()).
399         locateRegion(TableName.META_TABLE_NAME, Bytes.toBytes(""), false, true);
400     HRegionLocation hrl = rl.getRegionLocation(1);
401     ServerName oldServer = hrl.getServerName();
402     TEST_UTIL.getHBaseClusterInterface().killRegionServer(oldServer);
403     int i = 0;
404     do {
405       LOG.debug("Waiting for the replica " + hrl.getRegionInfo() + " to come up");
406       Thread.sleep(30000); //wait for the detection/recovery
407       rl = ConnectionManager.getConnectionInternal(TEST_UTIL.getConfiguration()).
408           locateRegion(TableName.META_TABLE_NAME, Bytes.toBytes(""), false, true);
409       hrl = rl.getRegionLocation(1);
410       i++;
411     } while ((hrl == null || hrl.getServerName().equals(oldServer)) && i < 3);
412     assertTrue(i != 3);
413   }
414 
415   @Test
416   public void testHBaseFsckWithExcessMetaReplicas() throws Exception {
417     // Create a meta replica (this will be the 4th one) and assign it
418     HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
419         HRegionInfo.FIRST_META_REGIONINFO, 3);
420     // create in-memory state otherwise master won't assign
421     TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager()
422              .getRegionStates().createRegionState(h);
423     TEST_UTIL.getMiniHBaseCluster().getMaster().assignRegion(h);
424     HBaseFsckRepair.waitUntilAssigned(TEST_UTIL.getHBaseAdmin(), h);
425     // check that problem exists
426     HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false);
427     assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN, ERROR_CODE.SHOULD_NOT_BE_DEPLOYED});
428     // fix the problem
429     hbck = doFsck(TEST_UTIL.getConfiguration(), true);
430     // run hbck again to make sure we don't see any errors
431     hbck = doFsck(TEST_UTIL.getConfiguration(), false);
432     assertErrors(hbck, new ERROR_CODE[]{});
433   }
434 }