1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertTrue;
25
26 import java.io.IOException;
27 import java.util.ArrayList;
28 import java.util.Iterator;
29 import java.util.List;
30 import java.util.Set;
31 import java.util.TreeSet;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FileSystem;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.hbase.Abortable;
39 import org.apache.hadoop.hbase.ClusterStatus;
40 import org.apache.hadoop.hbase.HBaseConfiguration;
41 import org.apache.hadoop.hbase.HBaseTestingUtility;
42 import org.apache.hadoop.hbase.HColumnDescriptor;
43 import org.apache.hadoop.hbase.HConstants;
44 import org.apache.hadoop.hbase.HRegionInfo;
45 import org.apache.hadoop.hbase.HTableDescriptor;
46 import org.apache.hadoop.hbase.testclassification.LargeTests;
47 import org.apache.hadoop.hbase.MetaTableAccessor;
48 import org.apache.hadoop.hbase.MiniHBaseCluster;
49 import org.apache.hadoop.hbase.RegionTransition;
50 import org.apache.hadoop.hbase.ServerName;
51 import org.apache.hadoop.hbase.TableName;
52 import org.apache.hadoop.hbase.TableStateManager;
53 import org.apache.hadoop.hbase.client.RegionLocator;
54 import org.apache.hadoop.hbase.client.Table;
55 import org.apache.hadoop.hbase.executor.EventType;
56 import org.apache.hadoop.hbase.master.RegionState.State;
57 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
58 import org.apache.hadoop.hbase.protobuf.RequestConverter;
59 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
60 import org.apache.hadoop.hbase.regionserver.HRegion;
61 import org.apache.hadoop.hbase.regionserver.HRegionServer;
62 import org.apache.hadoop.hbase.regionserver.Region;
63 import org.apache.hadoop.hbase.regionserver.RegionMergeTransactionImpl;
64 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
65 import org.apache.hadoop.hbase.util.Bytes;
66 import org.apache.hadoop.hbase.util.FSTableDescriptors;
67 import org.apache.hadoop.hbase.util.FSUtils;
68 import org.apache.hadoop.hbase.util.JVMClusterUtil;
69 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
70 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
71 import org.apache.hadoop.hbase.util.Threads;
72 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
73 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
74 import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
75 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
76 import org.apache.zookeeper.data.Stat;
77 import org.junit.Ignore;
78 import org.junit.Test;
79 import org.junit.experimental.categories.Category;
80
81 @Category(LargeTests.class)
82 public class TestMasterFailover {
83 private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164 @Test (timeout=240000)
165 public void testMasterFailoverWithMockedRIT() throws Exception {
166
167 final int NUM_MASTERS = 1;
168 final int NUM_RS = 3;
169
170
171 Configuration conf = HBaseConfiguration.create();
172 conf.setBoolean("hbase.assignment.usezk", true);
173
174
175 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
176 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
177 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
178 log("Cluster started");
179
180
181 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
182
183
184 List<MasterThread> masterThreads = cluster.getMasterThreads();
185 assertEquals(1, masterThreads.size());
186
187
188 assertTrue(cluster.waitForActiveAndReadyMaster());
189 HMaster master = masterThreads.get(0).getMaster();
190 assertTrue(master.isActiveMaster());
191 assertTrue(master.isInitialized());
192
193
194 master.balanceSwitch(false);
195
196
197 byte [] FAMILY = Bytes.toBytes("family");
198 byte [][] SPLIT_KEYS = new byte [][] {
199 new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
200 Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
201 Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
202 Bytes.toBytes("iii"), Bytes.toBytes("jjj")
203 };
204
205 byte [] enabledTable = Bytes.toBytes("enabledTable");
206 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
207 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
208
209 FileSystem filesystem = FileSystem.get(conf);
210 Path rootdir = FSUtils.getRootDir(conf);
211 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
212
213 fstd.createTableDescriptor(htdEnabled);
214
215 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), null, null);
216 createRegion(hriEnabled, rootdir, conf, htdEnabled);
217
218 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
219 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
220
221 TableName disabledTable = TableName.valueOf("disabledTable");
222 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
223 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
224
225 fstd.createTableDescriptor(htdDisabled);
226 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
227 createRegion(hriDisabled, rootdir, conf, htdDisabled);
228 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
229 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
230
231 TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
232 TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
233
234 log("Regions in hbase:meta and namespace have been created");
235
236
237
238 assertEquals(4, cluster.countServedRegions());
239
240
241 AssignmentManager am = master.getAssignmentManager();
242 RegionStates regionStates = am.getRegionStates();
243 List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
244 assertEquals(2, mergingRegions.size());
245 HRegionInfo a = mergingRegions.get(0);
246 HRegionInfo b = mergingRegions.get(1);
247 HRegionInfo newRegion = RegionMergeTransactionImpl.getMergedRegionInfo(a, b);
248 ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
249 ServerName serverB = regionStates.getRegionServerOfRegion(b);
250 if (!serverB.equals(mergingServer)) {
251 RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
252 am.balance(plan);
253 assertTrue(am.waitForAssignment(b));
254 }
255
256
257 HRegionServer hrs = cluster.getRegionServer(0);
258 ServerName serverName = hrs.getServerName();
259 HRegionInfo closingRegion = enabledRegions.remove(0);
260
261 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
262 enabledAndAssignedRegions.add(enabledRegions.remove(0));
263 enabledAndAssignedRegions.add(enabledRegions.remove(0));
264 enabledAndAssignedRegions.add(closingRegion);
265
266 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
267 disabledAndAssignedRegions.add(disabledRegions.remove(0));
268 disabledAndAssignedRegions.add(disabledRegions.remove(0));
269
270
271 for (HRegionInfo hri : enabledAndAssignedRegions) {
272 master.assignmentManager.addPlan(hri.getEncodedName(),
273 new RegionPlan(hri, null, serverName));
274 master.assignRegion(hri);
275 }
276
277 for (HRegionInfo hri : disabledAndAssignedRegions) {
278 master.assignmentManager.addPlan(hri.getEncodedName(),
279 new RegionPlan(hri, null, serverName));
280 master.assignRegion(hri);
281 }
282
283
284 log("Waiting for assignment to finish");
285 ZKAssign.blockUntilNoRIT(zkw);
286 log("Assignment completed");
287
288
289 log("Aborting master");
290 cluster.abortMaster(0);
291 cluster.waitOnMaster(0);
292 log("Master has aborted");
293
294
295
296
297
298
299 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
300 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
301
302 log("Beginning to mock scenarios");
303
304
305 TableStateManager zktable = new ZKTableStateManager(zkw);
306 zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
307
308
309
310
311
312
313
314
315 HRegionInfo region = enabledRegions.remove(0);
316 regionsThatShouldBeOnline.add(region);
317 ZKAssign.createNodeOffline(zkw, region, serverName);
318
319
320
321
322
323 regionsThatShouldBeOnline.add(closingRegion);
324 ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
325
326
327
328
329
330
331
332 region = enabledRegions.remove(0);
333 regionsThatShouldBeOnline.add(region);
334 int version = ZKAssign.createNodeClosing(zkw, region, serverName);
335 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
336
337
338 region = disabledRegions.remove(0);
339 regionsThatShouldBeOffline.add(region);
340 version = ZKAssign.createNodeClosing(zkw, region, serverName);
341 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
342
343
344
345
346
347
348
349 region = enabledRegions.remove(0);
350 regionsThatShouldBeOnline.add(region);
351 ZKAssign.createNodeOffline(zkw, region, serverName);
352 ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
353 while (true) {
354 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
355 RegionTransition rt = RegionTransition.parseFrom(bytes);
356 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
357 break;
358 }
359 Thread.sleep(100);
360 }
361
362
363
364 region = disabledRegions.remove(0);
365 regionsThatShouldBeOffline.add(region);
366 ZKAssign.createNodeOffline(zkw, region, serverName);
367 ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
368 while (true) {
369 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
370 RegionTransition rt = RegionTransition.parseFrom(bytes);
371 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
372 break;
373 }
374 Thread.sleep(100);
375 }
376
377
378
379
380
381
382
383 hrs.getCoordinatedStateManager().
384 getRegionMergeCoordination().startRegionMergeTransaction(newRegion, mergingServer, a, b);
385
386
387
388
389
390
391
392
393
394 log("Done mocking data up in ZK");
395
396
397 log("Starting up a new master");
398 master = cluster.startMaster().getMaster();
399 log("Waiting for master to be ready");
400 cluster.waitForActiveAndReadyMaster();
401 log("Master is ready");
402
403
404 regionStates = master.getAssignmentManager().getRegionStates();
405
406 assertTrue(regionStates.isRegionInState(a, State.MERGING));
407 assertTrue(regionStates.isRegionInState(b, State.MERGING));
408 assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
409
410
411 ZKAssign.deleteNodeFailSilent(zkw, newRegion);
412
413
414 log("Waiting for no more RIT");
415 ZKAssign.blockUntilNoRIT(zkw);
416 log("No more RIT in ZK, now doing final test verification");
417
418
419 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
420 for (JVMClusterUtil.RegionServerThread rst :
421 cluster.getRegionServerThreads()) {
422 onlineRegions.addAll(ProtobufUtil.getOnlineRegions(
423 rst.getRegionServer().getRSRpcServices()));
424 }
425
426
427 for (HRegionInfo hri : regionsThatShouldBeOnline) {
428 assertTrue(onlineRegions.contains(hri));
429 }
430
431
432 for (HRegionInfo hri : regionsThatShouldBeOffline) {
433 if (onlineRegions.contains(hri)) {
434 LOG.debug(hri);
435 }
436 assertFalse(onlineRegions.contains(hri));
437 }
438
439 log("Done with verification, all passed, shutting down cluster");
440
441
442 TEST_UTIL.shutdownMiniCluster();
443 }
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501 @Test (timeout=180000)
502 public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
503
504 final int NUM_MASTERS = 1;
505 final int NUM_RS = 2;
506
507
508 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
509 Configuration conf = TEST_UTIL.getConfiguration();
510 conf.setBoolean("hbase.assignment.usezk", true);
511
512 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
513 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
514 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
515 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
516 log("Cluster started");
517
518
519 ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
520 "unittest", new Abortable() {
521
522 @Override
523 public void abort(String why, Throwable e) {
524 LOG.error("Fatal ZK Error: " + why, e);
525 org.junit.Assert.assertFalse("Fatal ZK error", true);
526 }
527
528 @Override
529 public boolean isAborted() {
530 return false;
531 }
532
533 });
534
535
536 List<MasterThread> masterThreads = cluster.getMasterThreads();
537 assertEquals(1, masterThreads.size());
538
539
540 assertTrue(cluster.waitForActiveAndReadyMaster());
541 HMaster master = masterThreads.get(0).getMaster();
542 assertTrue(master.isActiveMaster());
543 assertTrue(master.isInitialized());
544
545
546 master.balanceSwitch(false);
547
548
549 byte [] FAMILY = Bytes.toBytes("family");
550 byte[][] SPLIT_KEYS =
551 TEST_UTIL.getRegionSplitStartKeys(Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 30);
552
553 byte [] enabledTable = Bytes.toBytes("enabledTable");
554 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
555 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
556 FileSystem filesystem = FileSystem.get(conf);
557 Path rootdir = FSUtils.getRootDir(conf);
558 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
559
560 fstd.createTableDescriptor(htdEnabled);
561 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(),
562 null, null);
563 createRegion(hriEnabled, rootdir, conf, htdEnabled);
564
565 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
566 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
567
568 TableName disabledTable =
569 TableName.valueOf("disabledTable");
570 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
571 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
572
573 fstd.createTableDescriptor(htdDisabled);
574 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
575 createRegion(hriDisabled, rootdir, conf, htdDisabled);
576
577 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
578 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
579
580 log("Regions in hbase:meta and Namespace have been created");
581
582
583 assertEquals(2, cluster.countServedRegions());
584
585
586 List<RegionServerThread> regionservers =
587 cluster.getRegionServerThreads();
588 HRegionServer hrs = regionservers.get(0).getRegionServer();
589
590
591 RegionServerThread hrsDeadThread = regionservers.get(1);
592 HRegionServer hrsDead = hrsDeadThread.getRegionServer();
593 ServerName deadServerName = hrsDead.getServerName();
594
595
596 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
597 enabledAndAssignedRegions.addAll(enabledRegions.subList(0, 6));
598 enabledRegions.removeAll(enabledAndAssignedRegions);
599 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
600 disabledAndAssignedRegions.addAll(disabledRegions.subList(0, 6));
601 disabledRegions.removeAll(disabledAndAssignedRegions);
602
603
604 for (HRegionInfo hri : enabledAndAssignedRegions) {
605 master.assignmentManager.addPlan(hri.getEncodedName(),
606 new RegionPlan(hri, null, hrs.getServerName()));
607 master.assignRegion(hri);
608 }
609 for (HRegionInfo hri : disabledAndAssignedRegions) {
610 master.assignmentManager.addPlan(hri.getEncodedName(),
611 new RegionPlan(hri, null, hrs.getServerName()));
612 master.assignRegion(hri);
613 }
614
615 log("Waiting for assignment to finish");
616 ZKAssign.blockUntilNoRIT(zkw);
617 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
618 log("Assignment completed");
619
620 assertTrue(" Table must be enabled.", master.getAssignmentManager()
621 .getTableStateManager().isTableState(TableName.valueOf("enabledTable"),
622 ZooKeeperProtos.Table.State.ENABLED));
623
624 List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
625 enabledAndOnDeadRegions.addAll(enabledRegions.subList(0, 6));
626 enabledRegions.removeAll(enabledAndOnDeadRegions);
627 List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
628 disabledAndOnDeadRegions.addAll(disabledRegions.subList(0, 6));
629 disabledRegions.removeAll(disabledAndOnDeadRegions);
630
631
632 for (HRegionInfo hri : enabledAndOnDeadRegions) {
633 master.assignmentManager.addPlan(hri.getEncodedName(),
634 new RegionPlan(hri, null, deadServerName));
635 master.assignRegion(hri);
636 }
637 for (HRegionInfo hri : disabledAndOnDeadRegions) {
638 master.assignmentManager.addPlan(hri.getEncodedName(),
639 new RegionPlan(hri, null, deadServerName));
640 master.assignRegion(hri);
641 }
642
643
644 log("Waiting for assignment to finish");
645 ZKAssign.blockUntilNoRIT(zkw);
646 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
647 log("Assignment completed");
648
649
650
651 verifyRegionLocation(hrs, enabledAndAssignedRegions);
652 verifyRegionLocation(hrs, disabledAndAssignedRegions);
653 verifyRegionLocation(hrsDead, enabledAndOnDeadRegions);
654 verifyRegionLocation(hrsDead, disabledAndOnDeadRegions);
655
656 assertTrue(" Didn't get enough regions of enabledTalbe on live rs.",
657 enabledAndAssignedRegions.size() >= 2);
658 assertTrue(" Didn't get enough regions of disalbedTable on live rs.",
659 disabledAndAssignedRegions.size() >= 2);
660 assertTrue(" Didn't get enough regions of enabledTalbe on dead rs.",
661 enabledAndOnDeadRegions.size() >= 2);
662 assertTrue(" Didn't get enough regions of disalbedTable on dead rs.",
663 disabledAndOnDeadRegions.size() >= 2);
664
665
666 log("Aborting master");
667 cluster.abortMaster(0);
668 cluster.waitOnMaster(0);
669 log("Master has aborted");
670
671
672
673
674
675
676 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
677 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
678
679 log("Beginning to mock scenarios");
680
681
682 TableStateManager zktable = new ZKTableStateManager(zkw);
683 zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
684
685 assertTrue(" The enabled table should be identified on master fail over.",
686 zktable.isTableState(TableName.valueOf("enabledTable"),
687 ZooKeeperProtos.Table.State.ENABLED));
688
689
690
691
692
693
694 HRegionInfo region = enabledAndOnDeadRegions.remove(0);
695 regionsThatShouldBeOnline.add(region);
696 ZKAssign.createNodeClosing(zkw, region, deadServerName);
697 LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
698 region + "\n\n");
699
700
701 region = disabledAndOnDeadRegions.remove(0);
702 regionsThatShouldBeOffline.add(region);
703 ZKAssign.createNodeClosing(zkw, region, deadServerName);
704 LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
705 region + "\n\n");
706
707
708
709
710
711
712 region = enabledAndOnDeadRegions.remove(0);
713 regionsThatShouldBeOnline.add(region);
714 int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
715 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
716 LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
717 region + "\n\n");
718
719
720 region = disabledAndOnDeadRegions.remove(0);
721 regionsThatShouldBeOffline.add(region);
722 version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
723 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
724 LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
725 region + "\n\n");
726
727
728
729
730
731
732 region = enabledRegions.remove(0);
733 regionsThatShouldBeOnline.add(region);
734 ZKAssign.createNodeOffline(zkw, region, deadServerName);
735 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
736 LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
737 region + "\n\n");
738
739
740 region = disabledRegions.remove(0);
741 regionsThatShouldBeOffline.add(region);
742 ZKAssign.createNodeOffline(zkw, region, deadServerName);
743 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
744 LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
745 region + "\n\n");
746
747
748
749
750
751
752 region = enabledRegions.remove(0);
753 regionsThatShouldBeOnline.add(region);
754 ZKAssign.createNodeOffline(zkw, region, deadServerName);
755 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
756 hrsDead.getServerName(), region);
757 while (true) {
758 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
759 RegionTransition rt = RegionTransition.parseFrom(bytes);
760 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
761 break;
762 }
763 Thread.sleep(100);
764 }
765 LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" + region + "\n\n");
766
767
768 region = disabledRegions.remove(0);
769 regionsThatShouldBeOffline.add(region);
770 ZKAssign.createNodeOffline(zkw, region, deadServerName);
771 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
772 hrsDead.getServerName(), region);
773 while (true) {
774 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
775 RegionTransition rt = RegionTransition.parseFrom(bytes);
776 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
777 break;
778 }
779 Thread.sleep(100);
780 }
781 LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" + region + "\n\n");
782
783
784
785
786
787
788 region = enabledRegions.remove(0);
789 regionsThatShouldBeOnline.add(region);
790 ZKAssign.createNodeOffline(zkw, region, deadServerName);
791 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
792 hrsDead.getServerName(), region);
793 while (true) {
794 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
795 RegionTransition rt = RegionTransition.parseFrom(bytes);
796 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
797 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
798 LOG.debug("DELETED " + rt);
799 break;
800 }
801 Thread.sleep(100);
802 }
803 LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
804 + "\n" + region + "\n\n");
805
806
807 region = disabledRegions.remove(0);
808 regionsThatShouldBeOffline.add(region);
809 ZKAssign.createNodeOffline(zkw, region, deadServerName);
810 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
811 hrsDead.getServerName(), region);
812 while (true) {
813 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
814 RegionTransition rt = RegionTransition.parseFrom(bytes);
815 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
816 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
817 break;
818 }
819 Thread.sleep(100);
820 }
821 LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
822 + "\n" + region + "\n\n");
823
824
825
826
827
828 log("Done mocking data up in ZK");
829
830
831 log("Killing RS " + deadServerName);
832 hrsDead.abort("Killing for unit test");
833 log("RS " + deadServerName + " killed");
834
835
836
837 while (hrsDeadThread.isAlive()) {
838 Threads.sleep(10);
839 }
840 log("Starting up a new master");
841 master = cluster.startMaster().getMaster();
842 log("Waiting for master to be ready");
843 assertTrue(cluster.waitForActiveAndReadyMaster());
844 log("Master is ready");
845
846
847 while (master.getServerManager().areDeadServersInProgress()) {
848 Thread.sleep(10);
849 }
850
851
852 log("Waiting for no more RIT");
853 ZKAssign.blockUntilNoRIT(zkw);
854 log("No more RIT in ZK");
855 long now = System.currentTimeMillis();
856 long maxTime = 120000;
857 boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
858 if (!done) {
859 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
860 LOG.info("rit=" + regionStates.getRegionsInTransition());
861 }
862 long elapsed = System.currentTimeMillis() - now;
863 assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
864 elapsed < maxTime);
865 log("No more RIT in RIT map, doing final test verification");
866
867
868 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
869 now = System.currentTimeMillis();
870 maxTime = 30000;
871 for (JVMClusterUtil.RegionServerThread rst :
872 cluster.getRegionServerThreads()) {
873 try {
874 HRegionServer rs = rst.getRegionServer();
875 while (!rs.getRegionsInTransitionInRS().isEmpty()) {
876 elapsed = System.currentTimeMillis() - now;
877 assertTrue("Test timed out in getting online regions", elapsed < maxTime);
878 if (rs.isAborted() || rs.isStopped()) {
879
880 break;
881 }
882 Thread.sleep(100);
883 }
884 onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rs.getRSRpcServices()));
885 } catch (RegionServerStoppedException e) {
886 LOG.info("Got RegionServerStoppedException", e);
887 }
888 }
889
890
891 for (HRegionInfo hri : regionsThatShouldBeOnline) {
892 assertTrue("region=" + hri.getRegionNameAsString() + ", " + onlineRegions.toString(),
893 onlineRegions.contains(hri));
894 }
895
896
897 for (HRegionInfo hri : regionsThatShouldBeOffline) {
898 assertFalse(onlineRegions.contains(hri));
899 }
900
901 log("Done with verification, all passed, shutting down cluster");
902
903
904 TEST_UTIL.shutdownMiniCluster();
905 }
906
907
908
909
910 private void verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)
911 throws IOException {
912 List<HRegionInfo> tmpOnlineRegions =
913 ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
914 Iterator<HRegionInfo> itr = regions.iterator();
915 while (itr.hasNext()) {
916 HRegionInfo tmp = itr.next();
917 if (!tmpOnlineRegions.contains(tmp)) {
918 itr.remove();
919 }
920 }
921 }
922
923 HRegion createRegion(final HRegionInfo hri, final Path rootdir, final Configuration c,
924 final HTableDescriptor htd)
925 throws IOException {
926 HRegion r = HRegion.createHRegion(hri, rootdir, c, htd);
927
928
929
930
931
932 HRegion.closeHRegion(r);
933 return r;
934 }
935
936
937
938
939 private void log(String string) {
940 LOG.info("\n\n" + string + " \n\n");
941 }
942
943 @Test (timeout=180000)
944 public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
945 throws Exception {
946 LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState");
947 final int NUM_MASTERS = 1;
948 final int NUM_RS = 2;
949
950
951 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
952 Configuration conf = TEST_UTIL.getConfiguration();
953 conf.setInt("hbase.master.info.port", -1);
954 conf.setBoolean("hbase.assignment.usezk", true);
955
956 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
957 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
958
959
960 List<RegionServerThread> regionServerThreads =
961 cluster.getRegionServerThreads();
962 Region metaRegion = null;
963 HRegionServer metaRegionServer = null;
964 for (RegionServerThread regionServerThread : regionServerThreads) {
965 HRegionServer regionServer = regionServerThread.getRegionServer();
966 metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
967 regionServer.abort("");
968 if (null != metaRegion) {
969 metaRegionServer = regionServer;
970 break;
971 }
972 }
973
974 TEST_UTIL.shutdownMiniHBaseCluster();
975
976
977 ZooKeeperWatcher zkw =
978 HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
979 metaRegion, metaRegionServer.getServerName());
980
981 LOG.info("Staring cluster for second time");
982 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
983
984 HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
985 while (!master.isInitialized()) {
986 Thread.sleep(100);
987 }
988
989 log("Waiting for no more RIT");
990 ZKAssign.blockUntilNoRIT(zkw);
991
992 zkw.close();
993
994 TEST_UTIL.shutdownMiniCluster();
995 }
996
997
998
999
1000 @Test(timeout=240000)
1001 public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
1002 final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
1003 final int NUM_MASTERS = 1;
1004 final int NUM_RS = 2;
1005
1006
1007 Configuration conf = HBaseConfiguration.create();
1008 conf.setBoolean("hbase.assignment.usezk", true);
1009
1010
1011 final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1012 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1013 log("Cluster started");
1014
1015 TEST_UTIL.createTable(table, Bytes.toBytes("family"));
1016 HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1017 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1018 HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0);
1019 ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1020 TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
1021
1022 ServerName dstName = null;
1023 for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
1024 if (!tmpServer.equals(serverName)) {
1025 dstName = tmpServer;
1026 break;
1027 }
1028 }
1029
1030 assertTrue(dstName != null);
1031
1032 TEST_UTIL.shutdownMiniHBaseCluster();
1033
1034 ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
1035 ZKAssign.createNodeOffline(zkw, hri, dstName);
1036 Stat stat = new Stat();
1037 byte[] data =
1038 ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat);
1039 assertTrue(data != null);
1040 RegionTransition rt = RegionTransition.parseFrom(data);
1041 assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
1042
1043 LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
1044 + " and dst server=" + dstName);
1045
1046
1047 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
1048
1049 while (true) {
1050 master = TEST_UTIL.getHBaseCluster().getMaster();
1051 if (master != null && master.isInitialized()) {
1052 ServerManager serverManager = master.getServerManager();
1053 if (!serverManager.areDeadServersInProgress()) {
1054 break;
1055 }
1056 }
1057 Thread.sleep(200);
1058 }
1059
1060
1061 master = TEST_UTIL.getHBaseCluster().getMaster();
1062 master.getAssignmentManager().waitForAssignment(hri);
1063 regionStates = master.getAssignmentManager().getRegionStates();
1064 RegionState newState = regionStates.getRegionState(hri);
1065 assertTrue(newState.isOpened());
1066 }
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076 @Test (timeout=240000)
1077 public void testSimpleMasterFailover() throws Exception {
1078
1079 final int NUM_MASTERS = 3;
1080 final int NUM_RS = 3;
1081
1082
1083 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
1084
1085 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1086 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1087
1088
1089 List<MasterThread> masterThreads = cluster.getMasterThreads();
1090
1091
1092 for (MasterThread mt : masterThreads) {
1093 assertTrue(mt.isAlive());
1094 }
1095
1096
1097 int numActive = 0;
1098 int activeIndex = -1;
1099 ServerName activeName = null;
1100 HMaster active = null;
1101 for (int i = 0; i < masterThreads.size(); i++) {
1102 if (masterThreads.get(i).getMaster().isActiveMaster()) {
1103 numActive++;
1104 activeIndex = i;
1105 active = masterThreads.get(activeIndex).getMaster();
1106 activeName = active.getServerName();
1107 }
1108 }
1109 assertEquals(1, numActive);
1110 assertEquals(NUM_MASTERS, masterThreads.size());
1111 LOG.info("Active master " + activeName);
1112
1113
1114 assertNotNull(active);
1115 ClusterStatus status = active.getClusterStatus();
1116 assertTrue(status.getMaster().equals(activeName));
1117 assertEquals(2, status.getBackupMastersSize());
1118 assertEquals(2, status.getBackupMasters().size());
1119
1120
1121 int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
1122 HMaster master = cluster.getMaster(backupIndex);
1123 LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
1124 cluster.stopMaster(backupIndex, false);
1125 cluster.waitOnMaster(backupIndex);
1126
1127
1128 for (int i = 0; i < masterThreads.size(); i++) {
1129 if (masterThreads.get(i).getMaster().isActiveMaster()) {
1130 assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
1131 activeIndex = i;
1132 active = masterThreads.get(activeIndex).getMaster();
1133 }
1134 }
1135 assertEquals(1, numActive);
1136 assertEquals(2, masterThreads.size());
1137 int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
1138 LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
1139 assertEquals(3, rsCount);
1140
1141
1142 assertNotNull(active);
1143 status = active.getClusterStatus();
1144 assertTrue(status.getMaster().equals(activeName));
1145 assertEquals(1, status.getBackupMastersSize());
1146 assertEquals(1, status.getBackupMasters().size());
1147
1148
1149 LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
1150 cluster.stopMaster(activeIndex, false);
1151 cluster.waitOnMaster(activeIndex);
1152
1153
1154 assertTrue(cluster.waitForActiveAndReadyMaster());
1155
1156 LOG.debug("\n\nVerifying backup master is now active\n");
1157
1158 assertEquals(1, masterThreads.size());
1159
1160
1161 active = masterThreads.get(0).getMaster();
1162 assertNotNull(active);
1163 status = active.getClusterStatus();
1164 ServerName mastername = status.getMaster();
1165 assertTrue(mastername.equals(active.getServerName()));
1166 assertTrue(active.isActiveMaster());
1167 assertEquals(0, status.getBackupMastersSize());
1168 assertEquals(0, status.getBackupMasters().size());
1169 int rss = status.getServersSize();
1170 LOG.info("Active master " + mastername.getServerName() + " managing " +
1171 rss + " region servers");
1172 assertEquals(3, rss);
1173
1174
1175 TEST_UTIL.shutdownMiniCluster();
1176 }
1177
1178
1179
1180
1181 @Test (timeout=180000)
1182 @SuppressWarnings("deprecation")
1183 public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
1184 final int NUM_MASTERS = 1;
1185 final int NUM_RS = 1;
1186
1187
1188 Configuration conf = HBaseConfiguration.create();
1189 conf.setBoolean("hbase.assignment.usezk", false);
1190
1191
1192 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1193 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1194 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1195 log("Cluster started");
1196
1197
1198 List<MasterThread> masterThreads = cluster.getMasterThreads();
1199 assertEquals(1, masterThreads.size());
1200
1201
1202 assertTrue(cluster.waitForActiveAndReadyMaster());
1203 HMaster master = masterThreads.get(0).getMaster();
1204 assertTrue(master.isActiveMaster());
1205 assertTrue(master.isInitialized());
1206
1207
1208 Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
1209 onlineTable.close();
1210
1211 HTableDescriptor offlineTable = new HTableDescriptor(
1212 TableName.valueOf(Bytes.toBytes("offlineTable")));
1213 offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
1214
1215 FileSystem filesystem = FileSystem.get(conf);
1216 Path rootdir = FSUtils.getRootDir(conf);
1217 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
1218 fstd.createTableDescriptor(offlineTable);
1219
1220 HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
1221 createRegion(hriOffline, rootdir, conf, offlineTable);
1222 MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
1223
1224 log("Regions in hbase:meta and namespace have been created");
1225
1226
1227
1228 assertEquals(3, cluster.countServedRegions());
1229 HRegionInfo hriOnline = null;
1230 try (RegionLocator locator =
1231 TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
1232 hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
1233 }
1234 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1235 RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
1236
1237
1238
1239 RegionState oldState = regionStates.getRegionState(hriOnline);
1240 RegionState newState = new RegionState(
1241 hriOnline, State.PENDING_CLOSE, oldState.getServerName());
1242 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1243
1244
1245
1246 oldState = new RegionState(hriOffline, State.OFFLINE);
1247 newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
1248 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1249
1250 HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
1251 createRegion(failedClose, rootdir, conf, offlineTable);
1252 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
1253
1254 oldState = new RegionState(failedClose, State.PENDING_CLOSE);
1255 newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
1256 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1257
1258
1259 HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
1260 createRegion(failedOpen, rootdir, conf, offlineTable);
1261 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
1262
1263
1264
1265 oldState = new RegionState(failedOpen, State.PENDING_OPEN);
1266 newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
1267 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1268
1269 HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
1270 createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
1271 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
1272
1273
1274
1275 oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
1276 newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
1277 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1278
1279
1280
1281
1282 log("Aborting master");
1283 cluster.abortMaster(0);
1284 cluster.waitOnMaster(0);
1285 log("Master has aborted");
1286
1287
1288 log("Starting up a new master");
1289 master = cluster.startMaster().getMaster();
1290 log("Waiting for master to be ready");
1291 cluster.waitForActiveAndReadyMaster();
1292 log("Master is ready");
1293
1294
1295 master.getAssignmentManager().waitUntilNoRegionsInTransition(60000);
1296
1297
1298 regionStates = master.getAssignmentManager().getRegionStates();
1299
1300
1301 assertTrue(regionStates.isRegionOnline(hriOffline));
1302 assertTrue(regionStates.isRegionOnline(hriOnline));
1303 assertTrue(regionStates.isRegionOnline(failedClose));
1304 assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
1305 assertTrue(regionStates.isRegionOnline(failedOpen));
1306
1307 log("Done with verification, shutting down cluster");
1308
1309
1310 TEST_UTIL.shutdownMiniCluster();
1311 }
1312
1313
1314
1315
1316 @Test(timeout = 180000)
1317 public void testMetaInTransitionWhenMasterFailover() throws Exception {
1318 final int NUM_MASTERS = 1;
1319 final int NUM_RS = 1;
1320
1321
1322 Configuration conf = HBaseConfiguration.create();
1323 conf.setBoolean("hbase.assignment.usezk", false);
1324 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1325 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1326 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1327 log("Cluster started");
1328
1329 log("Moving meta off the master");
1330 HMaster activeMaster = cluster.getMaster();
1331 HRegionServer rs = cluster.getRegionServer(0);
1332 ServerName metaServerName = cluster.getLiveRegionServerThreads()
1333 .get(0).getRegionServer().getServerName();
1334 activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
1335 Bytes.toBytes(metaServerName.getServerName()));
1336 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1337 assertEquals("Meta should be assigned on expected regionserver",
1338 metaServerName, activeMaster.getMetaTableLocator()
1339 .getMetaRegionLocation(activeMaster.getZooKeeper()));
1340
1341
1342 log("Aborting master");
1343 activeMaster.abort("test-kill");
1344 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1345 log("Master has aborted");
1346
1347
1348 RegionState metaState =
1349 MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
1350 assertEquals("hbase:meta should be onlined on RS",
1351 metaState.getServerName(), rs.getServerName());
1352 assertEquals("hbase:meta should be onlined on RS",
1353 metaState.getState(), State.OPEN);
1354
1355
1356 log("Starting up a new master");
1357 activeMaster = cluster.startMaster().getMaster();
1358 log("Waiting for master to be ready");
1359 cluster.waitForActiveAndReadyMaster();
1360 log("Master is ready");
1361
1362
1363 metaState =
1364 MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1365 assertEquals("hbase:meta should be onlined on RS",
1366 metaState.getServerName(), rs.getServerName());
1367 assertEquals("hbase:meta should be onlined on RS",
1368 metaState.getState(), State.OPEN);
1369
1370
1371
1372
1373
1374 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1375 rs.getServerName(), State.PENDING_OPEN);
1376 Region meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1377 rs.removeFromOnlineRegions(meta, null);
1378 ((HRegion)meta).close();
1379
1380 log("Aborting master");
1381 activeMaster.abort("test-kill");
1382 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1383 log("Master has aborted");
1384
1385
1386 log("Starting up a new master");
1387 activeMaster = cluster.startMaster().getMaster();
1388 log("Waiting for master to be ready");
1389 cluster.waitForActiveAndReadyMaster();
1390 log("Master is ready");
1391
1392 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1393 log("Meta was assigned");
1394
1395 metaState =
1396 MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1397 assertEquals("hbase:meta should be onlined on RS",
1398 metaState.getServerName(), rs.getServerName());
1399 assertEquals("hbase:meta should be onlined on RS",
1400 metaState.getState(), State.OPEN);
1401
1402
1403
1404
1405
1406 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1407 rs.getServerName(), State.PENDING_CLOSE);
1408
1409 log("Aborting master");
1410 activeMaster.abort("test-kill");
1411 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1412 log("Master has aborted");
1413
1414 rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest(
1415 rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1416
1417
1418 log("Starting up a new master");
1419 activeMaster = cluster.startMaster().getMaster();
1420 log("Waiting for master to be ready");
1421 cluster.waitForActiveAndReadyMaster();
1422 log("Master is ready");
1423
1424 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1425 log("Meta was assigned");
1426
1427 rs.getRSRpcServices().closeRegion(
1428 null,
1429 RequestConverter.buildCloseRegionRequest(rs.getServerName(),
1430 HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1431
1432
1433 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1434 ServerName.valueOf("dummyserver.example.org", 1234, -1L), State.OPEN);
1435
1436 log("Aborting master");
1437 activeMaster.stop("test-kill");
1438
1439 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1440 log("Master has aborted");
1441
1442
1443 log("Starting up a new master");
1444 activeMaster = cluster.startMaster().getMaster();
1445 log("Waiting for master to be ready");
1446 cluster.waitForActiveAndReadyMaster();
1447 log("Master is ready");
1448
1449 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1450 log("Meta was assigned");
1451
1452
1453 TEST_UTIL.shutdownMiniCluster();
1454 }
1455 }
1456