1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.procedure;
19
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.io.InterruptedIOException;
23 import java.io.OutputStream;
24 import java.util.ArrayList;
25 import java.util.Collection;
26 import java.util.HashSet;
27 import java.util.List;
28 import java.util.Set;
29 import java.util.concurrent.locks.Lock;
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.hbase.HConstants;
34 import org.apache.hadoop.hbase.HRegionInfo;
35 import org.apache.hadoop.hbase.ServerName;
36 import org.apache.hadoop.hbase.client.ClusterConnection;
37 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
38 import org.apache.hadoop.hbase.master.AssignmentManager;
39 import org.apache.hadoop.hbase.master.MasterFileSystem;
40 import org.apache.hadoop.hbase.master.MasterServices;
41 import org.apache.hadoop.hbase.master.RegionState;
42 import org.apache.hadoop.hbase.master.RegionStates;
43 import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
44 import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
45 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
46 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo;
47 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
48 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashState;
49 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
50 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
51 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
52 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
53 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
54 import org.apache.hadoop.util.StringUtils;
55 import org.apache.zookeeper.KeeperException;
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73 public class ServerCrashProcedure
74 extends StateMachineProcedure<MasterProcedureEnv, ServerCrashState>
75 implements ServerProcedureInterface {
76 private static final Log LOG = LogFactory.getLog(ServerCrashProcedure.class);
77
78
79
80
81 public static final String KEY_SHORT_WAIT_ON_META =
82 "hbase.master.servercrash.short.wait.on.meta.ms";
83
84 public static final int DEFAULT_SHORT_WAIT_ON_META = 1000;
85
86
87
88
89
90 public static final String KEY_RETRIES_ON_META =
91 "hbase.master.servercrash.meta.retries";
92
93 public static final int DEFAULT_RETRIES_ON_META = 10;
94
95
96
97
98 public static final String KEY_WAIT_ON_RIT =
99 "hbase.master.servercrash.wait.on.rit.ms";
100
101 public static final int DEFAULT_WAIT_ON_RIT = 30000;
102
103 private static final Set<HRegionInfo> META_REGION_SET = new HashSet<HRegionInfo>();
104 static {
105 META_REGION_SET.add(HRegionInfo.FIRST_META_REGIONINFO);
106 }
107
108
109
110
111 private ServerName serverName;
112
113
114
115
116 private boolean notifiedDeadServer = false;
117
118
119
120
121 private Set<HRegionInfo> regionsOnCrashedServer;
122
123
124
125
126 private List<HRegionInfo> regionsAssigned;
127
128 private boolean distributedLogReplay = false;
129 private boolean carryingMeta = false;
130 private boolean shouldSplitWal;
131
132
133
134
135 private int cycles = 0;
136
137
138
139
140
141 private int previousState;
142
143
144
145
146
147
148
149 public ServerCrashProcedure(final ServerName serverName,
150 final boolean shouldSplitWal, final boolean carryingMeta) {
151 this.serverName = serverName;
152 this.shouldSplitWal = shouldSplitWal;
153 this.carryingMeta = carryingMeta;
154
155 }
156
157
158
159
160
161 public ServerCrashProcedure() {
162 super();
163 }
164
165 private void throwProcedureYieldException(final String msg) throws ProcedureYieldException {
166 String logMsg = msg + "; cycle=" + this.cycles + ", running for " +
167 StringUtils.formatTimeDiff(System.currentTimeMillis(), getStartTime());
168
169
170 if (LOG.isDebugEnabled()) LOG.debug(logMsg);
171 throw new ProcedureYieldException(logMsg);
172 }
173
174 @Override
175 protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state)
176 throws ProcedureYieldException {
177 if (LOG.isTraceEnabled()) {
178 LOG.trace(state);
179 }
180
181 if (state.ordinal() != this.previousState) {
182 this.previousState = state.ordinal();
183 this.cycles = 0;
184 } else {
185 this.cycles++;
186 }
187 MasterServices services = env.getMasterServices();
188
189 if (!services.getAssignmentManager().isFailoverCleanupDone()) {
190 throwProcedureYieldException("Waiting on master failover to complete");
191 }
192
193
194 if (!notifiedDeadServer) {
195 services.getServerManager().getDeadServers().notifyServer(serverName);
196 notifiedDeadServer = true;
197 }
198
199 try {
200 switch (state) {
201 case SERVER_CRASH_START:
202 LOG.info("Start processing crashed " + this.serverName);
203 start(env);
204
205 if (this.carryingMeta) setNextState(ServerCrashState.SERVER_CRASH_PROCESS_META);
206 else setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
207 break;
208
209 case SERVER_CRASH_GET_REGIONS:
210
211 if (!isMetaAssignedQuickTest(env)) {
212 throwProcedureYieldException("Waiting on hbase:meta assignment");
213 }
214 this.regionsOnCrashedServer =
215 services.getAssignmentManager().getRegionStates().getServerRegions(this.serverName);
216
217
218 if (!this.shouldSplitWal) {
219 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
220 } else if (this.distributedLogReplay) {
221 setNextState(ServerCrashState.SERVER_CRASH_PREPARE_LOG_REPLAY);
222 } else {
223 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
224 }
225 break;
226
227 case SERVER_CRASH_PROCESS_META:
228
229 if (!processMeta(env)) {
230 throwProcedureYieldException("Waiting on regions-in-transition to clear");
231 }
232 setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
233 break;
234
235 case SERVER_CRASH_PREPARE_LOG_REPLAY:
236 prepareLogReplay(env, this.regionsOnCrashedServer);
237 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
238 break;
239
240 case SERVER_CRASH_SPLIT_LOGS:
241 splitLogs(env);
242
243 if (this.distributedLogReplay) setNextState(ServerCrashState.SERVER_CRASH_FINISH);
244 else setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
245 break;
246
247 case SERVER_CRASH_ASSIGN:
248 List<HRegionInfo> regionsToAssign = calcRegionsToAssign(env);
249
250
251
252
253
254
255 boolean regions = regionsToAssign != null && !regionsToAssign.isEmpty();
256 if (regions) {
257 this.regionsAssigned = regionsToAssign;
258 if (!assign(env, regionsToAssign)) {
259 throwProcedureYieldException("Failed assign; will retry");
260 }
261 }
262 if (this.shouldSplitWal && distributedLogReplay) {
263
264
265
266
267 setNextState(ServerCrashState.SERVER_CRASH_WAIT_ON_ASSIGN);
268 } else {
269 setNextState(ServerCrashState.SERVER_CRASH_FINISH);
270 }
271 break;
272
273 case SERVER_CRASH_WAIT_ON_ASSIGN:
274
275
276
277
278
279
280
281 if (this.regionsAssigned != null && !this.regionsAssigned.isEmpty()) {
282 if (!waitOnAssign(env, this.regionsAssigned)) {
283 throwProcedureYieldException("Waiting on region assign");
284 }
285 }
286 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
287 break;
288
289 case SERVER_CRASH_FINISH:
290 LOG.info("Finished processing of crashed " + serverName);
291 services.getServerManager().getDeadServers().finish(serverName);
292 return Flow.NO_MORE_STATE;
293
294 default:
295 throw new UnsupportedOperationException("unhandled state=" + state);
296 }
297 } catch (IOException e) {
298 LOG.warn("Failed serverName=" + this.serverName + ", state=" + state + "; retry", e);
299 } catch (InterruptedException e) {
300
301 LOG.warn("Interrupted serverName=" + this.serverName + ", state=" + state + "; retry", e);
302 Thread.currentThread().interrupt();
303 }
304 return Flow.HAS_MORE_STATE;
305 }
306
307
308
309
310
311
312 private void start(final MasterProcedureEnv env) throws IOException {
313 MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
314
315 mfs.setLogRecoveryMode();
316 this.distributedLogReplay = mfs.getLogRecoveryMode() == RecoveryMode.LOG_REPLAY;
317 }
318
319
320
321
322
323
324
325 private boolean processMeta(final MasterProcedureEnv env)
326 throws IOException {
327 if (LOG.isDebugEnabled()) LOG.debug("Processing hbase:meta that was on " + this.serverName);
328 MasterServices services = env.getMasterServices();
329 MasterFileSystem mfs = services.getMasterFileSystem();
330 AssignmentManager am = services.getAssignmentManager();
331 HRegionInfo metaHRI = HRegionInfo.FIRST_META_REGIONINFO;
332 if (this.shouldSplitWal) {
333 if (this.distributedLogReplay) {
334 prepareLogReplay(env, META_REGION_SET);
335 } else {
336
337 mfs.splitMetaLog(serverName);
338 am.getRegionStates().logSplit(metaHRI);
339 }
340 }
341
342
343 boolean processed = true;
344 boolean shouldAssignMeta = false;
345 AssignmentManager.ServerHostRegion rsCarryingMetaRegion = am.isCarryingMeta(serverName);
346 switch (rsCarryingMetaRegion) {
347 case HOSTING_REGION:
348 LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
349 am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
350 shouldAssignMeta = true;
351 break;
352 case UNKNOWN:
353 if (!services.getMetaTableLocator().isLocationAvailable(services.getZooKeeper())) {
354
355
356
357 shouldAssignMeta = true;
358 break;
359 }
360
361 case NOT_HOSTING_REGION:
362 LOG.info("META has been assigned to otherwhere, skip assigning.");
363 break;
364 default:
365 throw new IOException("Unsupported action in MetaServerShutdownHandler");
366 }
367 if (shouldAssignMeta) {
368
369 verifyAndAssignMetaWithRetries(env);
370 if (this.shouldSplitWal && distributedLogReplay) {
371 int timeout = env.getMasterConfiguration().getInt(KEY_WAIT_ON_RIT, DEFAULT_WAIT_ON_RIT);
372 if (!waitOnRegionToClearRegionsInTransition(am, metaHRI, timeout)) {
373 processed = false;
374 } else {
375
376 mfs.splitMetaLog(serverName);
377 }
378 }
379 }
380 return processed;
381 }
382
383
384
385
386
387 private boolean waitOnRegionToClearRegionsInTransition(AssignmentManager am,
388 final HRegionInfo hri, final int timeout)
389 throws InterruptedIOException {
390 try {
391 if (!am.waitOnRegionToClearRegionsInTransition(hri, timeout)) {
392
393
394 LOG.warn("Region " + hri.getEncodedName() + " didn't complete assignment in time");
395 return false;
396 }
397 } catch (InterruptedException ie) {
398 throw new InterruptedIOException("Caught " + ie +
399 " during waitOnRegionToClearRegionsInTransition for " + hri);
400 }
401 return true;
402 }
403
404 private void prepareLogReplay(final MasterProcedureEnv env, final Set<HRegionInfo> regions)
405 throws IOException {
406 if (LOG.isDebugEnabled()) {
407 LOG.debug("Mark " + size(this.regionsOnCrashedServer) + " regions-in-recovery from " +
408 this.serverName);
409 }
410 MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
411 AssignmentManager am = env.getMasterServices().getAssignmentManager();
412 mfs.prepareLogReplay(this.serverName, regions);
413 am.getRegionStates().logSplit(this.serverName);
414 }
415
416 private void splitLogs(final MasterProcedureEnv env) throws IOException {
417 if (LOG.isDebugEnabled()) {
418 LOG.debug("Splitting logs from " + serverName + "; region count=" +
419 size(this.regionsOnCrashedServer));
420 }
421 MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
422 AssignmentManager am = env.getMasterServices().getAssignmentManager();
423
424 mfs.splitLog(this.serverName);
425 am.getRegionStates().logSplit(this.serverName);
426 }
427
428 static int size(final Collection<HRegionInfo> hris) {
429 return hris == null? 0: hris.size();
430 }
431
432
433
434
435
436
437
438 private List<HRegionInfo> calcRegionsToAssign(final MasterProcedureEnv env)
439 throws IOException {
440 AssignmentManager am = env.getMasterServices().getAssignmentManager();
441 List<HRegionInfo> regionsToAssignAggregator = new ArrayList<HRegionInfo>();
442 int replicaCount = env.getMasterConfiguration().getInt(HConstants.META_REPLICAS_NUM,
443 HConstants.DEFAULT_META_REPLICA_NUM);
444 for (int i = 1; i < replicaCount; i++) {
445 HRegionInfo metaHri =
446 RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, i);
447 if (am.isCarryingMetaReplica(this.serverName, metaHri) ==
448 AssignmentManager.ServerHostRegion.HOSTING_REGION) {
449 if (LOG.isDebugEnabled()) {
450 LOG.debug("Reassigning meta replica" + metaHri + " that was on " + this.serverName);
451 }
452 regionsToAssignAggregator.add(metaHri);
453 }
454 }
455
456 List<HRegionInfo> regionsInTransition = am.cleanOutCrashedServerReferences(serverName);
457 if (LOG.isDebugEnabled()) {
458 LOG.debug("Reassigning " + size(this.regionsOnCrashedServer) +
459 " region(s) that " + (serverName == null? "null": serverName) +
460 " was carrying (and " + regionsInTransition.size() +
461 " regions(s) that were opening on this server)");
462 }
463 regionsToAssignAggregator.addAll(regionsInTransition);
464
465
466 if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
467 RegionStates regionStates = am.getRegionStates();
468 for (HRegionInfo hri: this.regionsOnCrashedServer) {
469 if (regionsInTransition.contains(hri)) continue;
470 String encodedName = hri.getEncodedName();
471 Lock lock = am.acquireRegionLock(encodedName);
472 try {
473 RegionState rit = regionStates.getRegionTransitionState(hri);
474 if (processDeadRegion(hri, am)) {
475 ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
476 if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
477
478
479
480 LOG.info("Skip assigning " + hri.getRegionNameAsString()
481 + " because opened on " + addressFromAM.getServerName());
482 continue;
483 }
484 if (rit != null) {
485 if (rit.getServerName() != null && !rit.isOnServer(this.serverName)) {
486
487 LOG.info("Skip assigning region in transition on other server" + rit);
488 continue;
489 }
490 LOG.info("Reassigning region " + rit + " and clearing zknode if exists");
491 try {
492
493 ZKAssign.deleteNodeFailSilent(env.getMasterServices().getZooKeeper(), hri);
494 } catch (KeeperException e) {
495
496
497 env.getMasterServices().abort("Unexpected error deleting RIT " + hri, e);
498 throw new IOException(e);
499 }
500 regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
501 } else if (regionStates.isRegionInState(
502 hri, RegionState.State.SPLITTING_NEW, RegionState.State.MERGING_NEW)) {
503 regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
504 }
505 regionsToAssignAggregator.add(hri);
506
507 } else if (rit != null) {
508 if ((rit.isPendingCloseOrClosing() || rit.isOffline())
509 && am.getTableStateManager().isTableState(hri.getTable(),
510 ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING) ||
511 am.getReplicasToClose().contains(hri)) {
512
513
514
515
516
517 regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
518 am.deleteClosingOrClosedNode(hri, rit.getServerName());
519 am.offlineDisabledRegion(hri);
520 } else {
521 LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition "
522 + rit + " not to be assigned by SSH of server " + serverName);
523 }
524 }
525 } finally {
526 lock.unlock();
527 }
528 }
529 }
530 return regionsToAssignAggregator;
531 }
532
533 private boolean assign(final MasterProcedureEnv env, final List<HRegionInfo> hris)
534 throws InterruptedIOException {
535 AssignmentManager am = env.getMasterServices().getAssignmentManager();
536 try {
537 am.assign(hris);
538 } catch (InterruptedException ie) {
539 LOG.error("Caught " + ie + " during round-robin assignment");
540 throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
541 } catch (IOException ioe) {
542 LOG.info("Caught " + ioe + " during region assignment, will retry");
543 return false;
544 }
545 return true;
546 }
547
548 private boolean waitOnAssign(final MasterProcedureEnv env, final List<HRegionInfo> hris)
549 throws InterruptedIOException {
550 int timeout = env.getMasterConfiguration().getInt(KEY_WAIT_ON_RIT, DEFAULT_WAIT_ON_RIT);
551 for (HRegionInfo hri: hris) {
552
553 if (!waitOnRegionToClearRegionsInTransition(env.getMasterServices().getAssignmentManager(),
554 hri, timeout)) {
555 return false;
556 }
557 }
558 return true;
559 }
560
561 @Override
562 protected void rollbackState(MasterProcedureEnv env, ServerCrashState state)
563 throws IOException {
564
565 throw new UnsupportedOperationException("unhandled state=" + state);
566 }
567
568 @Override
569 protected ServerCrashState getState(int stateId) {
570 return ServerCrashState.valueOf(stateId);
571 }
572
573 @Override
574 protected int getStateId(ServerCrashState state) {
575 return state.getNumber();
576 }
577
578 @Override
579 protected ServerCrashState getInitialState() {
580 return ServerCrashState.SERVER_CRASH_START;
581 }
582
583 @Override
584 protected boolean abort(MasterProcedureEnv env) {
585
586 return false;
587 }
588
589 @Override
590 protected boolean acquireLock(final MasterProcedureEnv env) {
591 if (env.waitServerCrashProcessingEnabled(this)) return false;
592 return env.getProcedureQueue().tryAcquireServerExclusiveLock(this, getServerName());
593 }
594
595 @Override
596 protected void releaseLock(final MasterProcedureEnv env) {
597 env.getProcedureQueue().releaseServerExclusiveLock(this, getServerName());
598 }
599
600 @Override
601 public void toStringClassDetails(StringBuilder sb) {
602 sb.append(getClass().getSimpleName());
603 sb.append(" serverName=");
604 sb.append(this.serverName);
605 sb.append(", shouldSplitWal=");
606 sb.append(shouldSplitWal);
607 sb.append(", carryingMeta=");
608 sb.append(carryingMeta);
609 }
610
611 @Override
612 public void serializeStateData(final OutputStream stream) throws IOException {
613 super.serializeStateData(stream);
614
615 MasterProcedureProtos.ServerCrashStateData.Builder state =
616 MasterProcedureProtos.ServerCrashStateData.newBuilder().
617 setServerName(ProtobufUtil.toServerName(this.serverName)).
618 setDistributedLogReplay(this.distributedLogReplay).
619 setCarryingMeta(this.carryingMeta).
620 setShouldSplitWal(this.shouldSplitWal);
621 if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
622 for (HRegionInfo hri: this.regionsOnCrashedServer) {
623 state.addRegionsOnCrashedServer(HRegionInfo.convert(hri));
624 }
625 }
626 if (this.regionsAssigned != null && !this.regionsAssigned.isEmpty()) {
627 for (HRegionInfo hri: this.regionsAssigned) {
628 state.addRegionsAssigned(HRegionInfo.convert(hri));
629 }
630 }
631 state.build().writeDelimitedTo(stream);
632 }
633
634 @Override
635 public void deserializeStateData(final InputStream stream) throws IOException {
636 super.deserializeStateData(stream);
637
638 MasterProcedureProtos.ServerCrashStateData state =
639 MasterProcedureProtos.ServerCrashStateData.parseDelimitedFrom(stream);
640 this.serverName = ProtobufUtil.toServerName(state.getServerName());
641 this.distributedLogReplay = state.hasDistributedLogReplay()?
642 state.getDistributedLogReplay(): false;
643 this.carryingMeta = state.hasCarryingMeta()? state.getCarryingMeta(): false;
644
645 this.shouldSplitWal = state.getShouldSplitWal();
646 int size = state.getRegionsOnCrashedServerCount();
647 if (size > 0) {
648 this.regionsOnCrashedServer = new HashSet<HRegionInfo>(size);
649 for (RegionInfo ri: state.getRegionsOnCrashedServerList()) {
650 this.regionsOnCrashedServer.add(HRegionInfo.convert(ri));
651 }
652 }
653 size = state.getRegionsAssignedCount();
654 if (size > 0) {
655 this.regionsAssigned = new ArrayList<HRegionInfo>(size);
656 for (RegionInfo ri: state.getRegionsOnCrashedServerList()) {
657 this.regionsAssigned.add(HRegionInfo.convert(ri));
658 }
659 }
660 }
661
662
663
664
665
666
667
668
669
670 private static boolean processDeadRegion(HRegionInfo hri, AssignmentManager assignmentManager)
671 throws IOException {
672 boolean tablePresent = assignmentManager.getTableStateManager().isTablePresent(hri.getTable());
673 if (!tablePresent) {
674 LOG.info("The table " + hri.getTable() + " was deleted. Hence not proceeding.");
675 return false;
676 }
677
678 boolean disabled = assignmentManager.getTableStateManager().isTableState(hri.getTable(),
679 ZooKeeperProtos.Table.State.DISABLED);
680 if (disabled){
681 LOG.info("The table " + hri.getTable() + " was disabled. Hence not proceeding.");
682 return false;
683 }
684 if (hri.isOffline() && hri.isSplit()) {
685
686
687
688 return false;
689 }
690 boolean disabling = assignmentManager.getTableStateManager().isTableState(hri.getTable(),
691 ZooKeeperProtos.Table.State.DISABLING);
692 if (disabling) {
693 LOG.info("The table " + hri.getTable() + " is disabled. Hence not assigning region" +
694 hri.getEncodedName());
695 return false;
696 }
697 return true;
698 }
699
700
701
702
703
704 private void verifyAndAssignMetaWithRetries(final MasterProcedureEnv env) throws IOException {
705 MasterServices services = env.getMasterServices();
706 int iTimes = services.getConfiguration().getInt(KEY_RETRIES_ON_META, DEFAULT_RETRIES_ON_META);
707
708 long waitTime =
709 services.getConfiguration().getLong(KEY_SHORT_WAIT_ON_META, DEFAULT_SHORT_WAIT_ON_META);
710 int iFlag = 0;
711 while (true) {
712 try {
713 verifyAndAssignMeta(env);
714 break;
715 } catch (KeeperException e) {
716 services.abort("In server shutdown processing, assigning meta", e);
717 throw new IOException("Aborting", e);
718 } catch (Exception e) {
719 if (iFlag >= iTimes) {
720 services.abort("verifyAndAssignMeta failed after" + iTimes + " retries, aborting", e);
721 throw new IOException("Aborting", e);
722 }
723 try {
724 Thread.sleep(waitTime);
725 } catch (InterruptedException e1) {
726 LOG.warn("Interrupted when is the thread sleep", e1);
727 Thread.currentThread().interrupt();
728 throw (InterruptedIOException)new InterruptedIOException().initCause(e1);
729 }
730 iFlag++;
731 }
732 }
733 }
734
735
736
737
738
739
740
741 private void verifyAndAssignMeta(final MasterProcedureEnv env)
742 throws InterruptedException, IOException, KeeperException {
743 MasterServices services = env.getMasterServices();
744 if (!isMetaAssignedQuickTest(env)) {
745 services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO);
746 } else if (serverName.equals(services.getMetaTableLocator().
747 getMetaRegionLocation(services.getZooKeeper()))) {
748
749
750 services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO);
751 } else {
752 LOG.info("Skip assigning hbase:meta because it is online at "
753 + services.getMetaTableLocator().getMetaRegionLocation(services.getZooKeeper()));
754 }
755 }
756
757
758
759
760
761
762
763 private boolean isMetaAssignedQuickTest(final MasterProcedureEnv env)
764 throws InterruptedException, IOException {
765 ZooKeeperWatcher zkw = env.getMasterServices().getZooKeeper();
766 MetaTableLocator mtl = env.getMasterServices().getMetaTableLocator();
767 boolean metaAssigned = false;
768
769 if (mtl.isLocationAvailable(zkw)) {
770 ClusterConnection connection = env.getMasterServices().getConnection();
771
772 long timeout =
773 env.getMasterConfiguration().getLong(KEY_SHORT_WAIT_ON_META, DEFAULT_SHORT_WAIT_ON_META);
774 if (mtl.verifyMetaRegionLocation(connection, zkw, timeout)) {
775 metaAssigned = true;
776 }
777 }
778 return metaAssigned;
779 }
780
781 @Override
782 public ServerName getServerName() {
783 return this.serverName;
784 }
785
786 @Override
787 public boolean hasMetaTableRegion() {
788 return this.carryingMeta;
789 }
790
791 @Override
792 public ServerOperationType getServerOperationType() {
793 return ServerOperationType.CRASH_HANDLER;
794 }
795
796
797
798
799
800
801
802
803 @Override
804 protected boolean isYieldBeforeExecuteFromState(MasterProcedureEnv env, ServerCrashState state) {
805 return true;
806 }
807
808 @Override
809 protected boolean shouldWaitClientAck(MasterProcedureEnv env) {
810
811
812 return false;
813 }
814 }