View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.procedure;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.OutputStream;
24  import java.security.PrivilegedExceptionAction;
25  import java.util.List;
26  import java.util.concurrent.ExecutorService;
27  import java.util.concurrent.atomic.AtomicBoolean;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.hbase.HRegionInfo;
32  import org.apache.hadoop.hbase.MetaTableAccessor;
33  import org.apache.hadoop.hbase.TableName;
34  import org.apache.hadoop.hbase.TableNotEnabledException;
35  import org.apache.hadoop.hbase.TableNotFoundException;
36  import org.apache.hadoop.hbase.TableStateManager;
37  import org.apache.hadoop.hbase.classification.InterfaceAudience;
38  import org.apache.hadoop.hbase.constraint.ConstraintException;
39  import org.apache.hadoop.hbase.exceptions.HBaseException;
40  import org.apache.hadoop.hbase.master.AssignmentManager;
41  import org.apache.hadoop.hbase.master.BulkAssigner;
42  import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
43  import org.apache.hadoop.hbase.master.RegionState;
44  import org.apache.hadoop.hbase.master.RegionStates;
45  import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
46  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
47  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
48  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState;
49  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
50  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
51  import org.apache.hadoop.security.UserGroupInformation;
52  import org.apache.htrace.Trace;
53  
54  @InterfaceAudience.Private
55  public class DisableTableProcedure
56      extends StateMachineProcedure<MasterProcedureEnv, DisableTableState>
57      implements TableProcedureInterface {
58    private static final Log LOG = LogFactory.getLog(DisableTableProcedure.class);
59  
60    private final AtomicBoolean aborted = new AtomicBoolean(false);
61  
62    // This is for back compatible with 1.0 asynchronized operations.
63    private final ProcedurePrepareLatch syncLatch;
64  
65    private TableName tableName;
66    private boolean skipTableStateCheck;
67    private UserGroupInformation user;
68  
69    private Boolean traceEnabled = null;
70  
71    enum MarkRegionOfflineOpResult {
72      MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL,
73      BULK_ASSIGN_REGIONS_FAILED,
74      MARK_ALL_REGIONS_OFFLINE_INTERRUPTED,
75    }
76  
77    public DisableTableProcedure() {
78      syncLatch = null;
79    }
80  
81    /**
82     * Constructor
83     * @param env MasterProcedureEnv
84     * @param tableName the table to operate on
85     * @param skipTableStateCheck whether to check table state
86     * @throws IOException
87     */
88    public DisableTableProcedure(
89        final MasterProcedureEnv env,
90        final TableName tableName,
91        final boolean skipTableStateCheck) throws IOException {
92      this(env, tableName, skipTableStateCheck, null);
93    }
94  
95    /**
96     * Constructor
97     * @param env MasterProcedureEnv
98     * @param tableName the table to operate on
99     * @param skipTableStateCheck whether to check table state
100    * @throws IOException
101    */
102   public DisableTableProcedure(
103       final MasterProcedureEnv env,
104       final TableName tableName,
105       final boolean skipTableStateCheck,
106       final ProcedurePrepareLatch syncLatch) throws IOException {
107     this.tableName = tableName;
108     this.skipTableStateCheck = skipTableStateCheck;
109     this.user = env.getRequestUser().getUGI();
110     this.setOwner(this.user.getShortUserName());
111 
112     // Compatible with 1.0: We use latch to make sure that this procedure implementation is
113     // compatible with 1.0 asynchronized operations. We need to lock the table and check
114     // whether the Disable operation could be performed (table exists and online; table state
115     // is ENABLED). Once it is done, we are good to release the latch and the client can
116     // start asynchronously wait for the operation.
117     //
118     // Note: the member syncLatch could be null if we are in failover or recovery scenario.
119     // This is ok for backward compatible, as 1.0 client would not able to peek at procedure.
120     this.syncLatch = syncLatch;
121   }
122 
123   @Override
124   protected Flow executeFromState(final MasterProcedureEnv env, final DisableTableState state)
125       throws InterruptedException {
126     if (isTraceEnabled()) {
127       LOG.trace(this + " execute state=" + state);
128     }
129 
130     try {
131       switch (state) {
132       case DISABLE_TABLE_PREPARE:
133         if (prepareDisable(env)) {
134           setNextState(DisableTableState.DISABLE_TABLE_PRE_OPERATION);
135         } else {
136           assert isFailed() : "disable should have an exception here";
137           return Flow.NO_MORE_STATE;
138         }
139         break;
140       case DISABLE_TABLE_PRE_OPERATION:
141         preDisable(env, state);
142         setNextState(DisableTableState.DISABLE_TABLE_SET_DISABLING_TABLE_STATE);
143         break;
144       case DISABLE_TABLE_SET_DISABLING_TABLE_STATE:
145         setTableStateToDisabling(env, tableName);
146         setNextState(DisableTableState.DISABLE_TABLE_MARK_REGIONS_OFFLINE);
147         break;
148       case DISABLE_TABLE_MARK_REGIONS_OFFLINE:
149         if (markRegionsOffline(env, tableName, true) ==
150             MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
151           setNextState(DisableTableState.DISABLE_TABLE_SET_DISABLED_TABLE_STATE);
152         } else {
153           LOG.trace("Retrying later to disable the missing regions");
154         }
155         break;
156       case DISABLE_TABLE_SET_DISABLED_TABLE_STATE:
157         setTableStateToDisabled(env, tableName);
158         setNextState(DisableTableState.DISABLE_TABLE_POST_OPERATION);
159         break;
160       case DISABLE_TABLE_POST_OPERATION:
161         postDisable(env, state);
162         return Flow.NO_MORE_STATE;
163       default:
164         throw new UnsupportedOperationException("unhandled state=" + state);
165       }
166     } catch (HBaseException|IOException e) {
167       LOG.warn("Retriable error trying to disable table=" + tableName + " state=" + state, e);
168     }
169     return Flow.HAS_MORE_STATE;
170   }
171 
172   @Override
173   protected void rollbackState(final MasterProcedureEnv env, final DisableTableState state)
174       throws IOException {
175     if (state == DisableTableState.DISABLE_TABLE_PREPARE) {
176       undoTableStateChange(env);
177       ProcedurePrepareLatch.releaseLatch(syncLatch, this);
178       return;
179     }
180 
181     // The delete doesn't have a rollback. The execution will succeed, at some point.
182     throw new UnsupportedOperationException("unhandled state=" + state);
183   }
184 
185   @Override
186   protected DisableTableState getState(final int stateId) {
187     return DisableTableState.valueOf(stateId);
188   }
189 
190   @Override
191   protected int getStateId(final DisableTableState state) {
192     return state.getNumber();
193   }
194 
195   @Override
196   protected DisableTableState getInitialState() {
197     return DisableTableState.DISABLE_TABLE_PREPARE;
198   }
199 
200   @Override
201   protected void setNextState(final DisableTableState state) {
202     if (aborted.get()) {
203       setAbortFailure("disable-table", "abort requested");
204     } else {
205       super.setNextState(state);
206     }
207   }
208 
209   @Override
210   public boolean abort(final MasterProcedureEnv env) {
211     aborted.set(true);
212     return true;
213   }
214 
215   @Override
216   protected boolean acquireLock(final MasterProcedureEnv env) {
217     if (env.waitInitialized(this)) return false;
218     return env.getProcedureQueue().tryAcquireTableExclusiveLock(this, tableName);
219   }
220 
221   @Override
222   protected void releaseLock(final MasterProcedureEnv env) {
223     env.getProcedureQueue().releaseTableExclusiveLock(this, tableName);
224   }
225 
226   @Override
227   public void serializeStateData(final OutputStream stream) throws IOException {
228     super.serializeStateData(stream);
229 
230     MasterProcedureProtos.DisableTableStateData.Builder disableTableMsg =
231         MasterProcedureProtos.DisableTableStateData.newBuilder()
232             .setUserInfo(MasterProcedureUtil.toProtoUserInfo(user))
233             .setTableName(ProtobufUtil.toProtoTableName(tableName))
234             .setSkipTableStateCheck(skipTableStateCheck);
235 
236     disableTableMsg.build().writeDelimitedTo(stream);
237   }
238 
239   @Override
240   public void deserializeStateData(final InputStream stream) throws IOException {
241     super.deserializeStateData(stream);
242 
243     MasterProcedureProtos.DisableTableStateData disableTableMsg =
244         MasterProcedureProtos.DisableTableStateData.parseDelimitedFrom(stream);
245     user = MasterProcedureUtil.toUserInfo(disableTableMsg.getUserInfo());
246     tableName = ProtobufUtil.toTableName(disableTableMsg.getTableName());
247     skipTableStateCheck = disableTableMsg.getSkipTableStateCheck();
248   }
249 
250   @Override
251   public void toStringClassDetails(StringBuilder sb) {
252     sb.append(getClass().getSimpleName());
253     sb.append(" (table=");
254     sb.append(tableName);
255     sb.append(")");
256   }
257 
258   @Override
259   public TableName getTableName() {
260     return tableName;
261   }
262 
263   @Override
264   public TableOperationType getTableOperationType() {
265     return TableOperationType.DISABLE;
266   }
267 
268   /**
269    * Action before any real action of disabling table. Set the exception in the procedure instead
270    * of throwing it.  This approach is to deal with backward compatible with 1.0.
271    * @param env MasterProcedureEnv
272    * @throws HBaseException
273    * @throws IOException
274    */
275   private boolean prepareDisable(final MasterProcedureEnv env) throws HBaseException, IOException {
276     boolean canTableBeDisabled = true;
277     if (tableName.equals(TableName.META_TABLE_NAME)) {
278       setFailure("master-disable-table", new ConstraintException("Cannot disable catalog table"));
279       canTableBeDisabled = false;
280     } else if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) {
281       setFailure("master-disable-table", new TableNotFoundException(tableName));
282       canTableBeDisabled = false;
283     } else if (!skipTableStateCheck) {
284       // There could be multiple client requests trying to disable or enable
285       // the table at the same time. Ensure only the first request is honored
286       // After that, no other requests can be accepted until the table reaches
287       // DISABLED or ENABLED.
288       //
289       // Note: A quick state check should be enough for us to move forward. However, instead of
290       // calling TableStateManager.isTableState() to just check the state, we called
291       // TableStateManager.setTableStateIfInStates() to set the state to DISABLING from ENABLED.
292       // This is because we treat empty state as enabled from 0.92-clusters. See
293       // ZKTableStateManager.setTableStateIfInStates() that has a hack solution to work around
294       // this issue.
295       TableStateManager tsm =
296         env.getMasterServices().getAssignmentManager().getTableStateManager();
297       if (!tsm.setTableStateIfInStates(tableName, ZooKeeperProtos.Table.State.DISABLING,
298             ZooKeeperProtos.Table.State.DISABLING, ZooKeeperProtos.Table.State.ENABLED)) {
299         LOG.info("Table " + tableName + " isn't enabled; skipping disable");
300         setFailure("master-disable-table", new TableNotEnabledException(tableName));
301         canTableBeDisabled = false;
302       }
303     }
304 
305     // We are done the check. Future actions in this procedure could be done asynchronously.
306     ProcedurePrepareLatch.releaseLatch(syncLatch, this);
307 
308     return canTableBeDisabled;
309   }
310 
311   /**
312    * Rollback of table state change in prepareDisable()
313    * @param env MasterProcedureEnv
314    */
315   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
316       justification="Intended")
317   private void undoTableStateChange(final MasterProcedureEnv env) {
318     if (!skipTableStateCheck) {
319       try {
320         // If the state was changed, undo it.
321         if (env.getMasterServices().getAssignmentManager().getTableStateManager().isTableState(
322             tableName, ZooKeeperProtos.Table.State.DISABLING)) {
323           EnableTableProcedure.setTableStateToEnabled(env, tableName);
324         }
325       } catch (Exception e) {
326         // Ignore exception.
327         LOG.trace(e.getMessage());
328       }
329     }
330   }
331 
332   /**
333    * Action before disabling table.
334    * @param env MasterProcedureEnv
335    * @param state the procedure state
336    * @throws IOException
337    * @throws InterruptedException
338    */
339   protected void preDisable(final MasterProcedureEnv env, final DisableTableState state)
340       throws IOException, InterruptedException {
341     runCoprocessorAction(env, state);
342   }
343 
344   /**
345    * Mark table state to Disabling
346    * @param env MasterProcedureEnv
347    * @throws IOException
348    */
349   protected static void setTableStateToDisabling(
350       final MasterProcedureEnv env,
351       final TableName tableName) throws HBaseException, IOException {
352     // Set table disabling flag up in zk.
353     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
354       tableName,
355       ZooKeeperProtos.Table.State.DISABLING);
356   }
357 
358   /**
359    * Mark regions of the table offline with retries
360    * @param env MasterProcedureEnv
361    * @param tableName the target table
362    * @param retryRequired whether to retry if the first run failed
363    * @return whether the operation is fully completed or being interrupted.
364    * @throws IOException
365    */
366   protected static MarkRegionOfflineOpResult markRegionsOffline(
367       final MasterProcedureEnv env,
368       final TableName tableName,
369       final Boolean retryRequired) throws IOException {
370     // Dev consideration: add a config to control max number of retry. For now, it is hard coded.
371     int maxTry = (retryRequired ? 10 : 1);
372     MarkRegionOfflineOpResult operationResult =
373         MarkRegionOfflineOpResult.BULK_ASSIGN_REGIONS_FAILED;
374     do {
375       try {
376         operationResult = markRegionsOffline(env, tableName);
377         if (operationResult == MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
378           break;
379         }
380         maxTry--;
381       } catch (Exception e) {
382         LOG.warn("Received exception while marking regions online. tries left: " + maxTry, e);
383         maxTry--;
384         if (maxTry > 0) {
385           continue; // we still have some retry left, try again.
386         }
387         throw e;
388       }
389     } while (maxTry > 0);
390 
391     if (operationResult != MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
392       LOG.warn("Some or all regions of the Table '" + tableName + "' were still online");
393     }
394 
395     return operationResult;
396   }
397 
398   /**
399    * Mark regions of the table offline
400    * @param env MasterProcedureEnv
401    * @param tableName the target table
402    * @return whether the operation is fully completed or being interrupted.
403    * @throws IOException
404    */
405   private static MarkRegionOfflineOpResult markRegionsOffline(
406       final MasterProcedureEnv env,
407       final TableName tableName) throws IOException {
408     // Get list of online regions that are of this table.  Regions that are
409     // already closed will not be included in this list; i.e. the returned
410     // list is not ALL regions in a table, its all online regions according
411     // to the in-memory state on this master.
412     MarkRegionOfflineOpResult operationResult =
413         MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL;
414     final List<HRegionInfo> regions =
415         env.getMasterServices().getAssignmentManager().getRegionStates()
416             .getRegionsOfTable(tableName);
417     if (regions.size() > 0) {
418       LOG.info("Offlining " + regions.size() + " regions.");
419 
420       BulkDisabler bd = new BulkDisabler(env, tableName, regions);
421       try {
422         if (!bd.bulkAssign()) {
423           operationResult = MarkRegionOfflineOpResult.BULK_ASSIGN_REGIONS_FAILED;
424         }
425       } catch (InterruptedException e) {
426         LOG.warn("Disable was interrupted");
427         // Preserve the interrupt.
428         Thread.currentThread().interrupt();
429         operationResult = MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_INTERRUPTED;
430       }
431     }
432     return operationResult;
433   }
434 
435   /**
436    * Mark table state to Disabled
437    * @param env MasterProcedureEnv
438    * @throws IOException
439    */
440   protected static void setTableStateToDisabled(
441       final MasterProcedureEnv env,
442       final TableName tableName) throws HBaseException, IOException {
443     // Flip the table to disabled
444     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
445       tableName,
446       ZooKeeperProtos.Table.State.DISABLED);
447     LOG.info("Disabled table, " + tableName + ", is completed.");
448   }
449 
450   /**
451    * Action after disabling table.
452    * @param env MasterProcedureEnv
453    * @param state the procedure state
454    * @throws IOException
455    * @throws InterruptedException
456    */
457   protected void postDisable(final MasterProcedureEnv env, final DisableTableState state)
458       throws IOException, InterruptedException {
459     runCoprocessorAction(env, state);
460   }
461 
462   /**
463    * The procedure could be restarted from a different machine. If the variable is null, we need to
464    * retrieve it.
465    * @return traceEnabled
466    */
467   private Boolean isTraceEnabled() {
468     if (traceEnabled == null) {
469       traceEnabled = LOG.isTraceEnabled();
470     }
471     return traceEnabled;
472   }
473 
474   /**
475    * Coprocessor Action.
476    * @param env MasterProcedureEnv
477    * @param state the procedure state
478    * @throws IOException
479    * @throws InterruptedException
480    */
481   private void runCoprocessorAction(final MasterProcedureEnv env, final DisableTableState state)
482       throws IOException, InterruptedException {
483     final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
484     if (cpHost != null) {
485       user.doAs(new PrivilegedExceptionAction<Void>() {
486         @Override
487         public Void run() throws Exception {
488           switch (state) {
489           case DISABLE_TABLE_PRE_OPERATION:
490             cpHost.preDisableTableHandler(tableName);
491             break;
492           case DISABLE_TABLE_POST_OPERATION:
493             cpHost.postDisableTableHandler(tableName);
494             break;
495           default:
496             throw new UnsupportedOperationException(this + " unhandled state=" + state);
497           }
498           return null;
499         }
500       });
501     }
502   }
503 
504   /**
505    * Run bulk disable.
506    */
507   private static class BulkDisabler extends BulkAssigner {
508     private final AssignmentManager assignmentManager;
509     private final List<HRegionInfo> regions;
510     private final TableName tableName;
511     private final int waitingTimeForEvents;
512 
513     public BulkDisabler(final MasterProcedureEnv env, final TableName tableName,
514         final List<HRegionInfo> regions) {
515       super(env.getMasterServices());
516       this.assignmentManager = env.getMasterServices().getAssignmentManager();
517       this.tableName = tableName;
518       this.regions = regions;
519       this.waitingTimeForEvents =
520           env.getMasterServices().getConfiguration()
521               .getInt("hbase.master.event.waiting.time", 1000);
522     }
523 
524     @Override
525     protected void populatePool(ExecutorService pool) {
526       RegionStates regionStates = assignmentManager.getRegionStates();
527       for (final HRegionInfo region : regions) {
528         if (regionStates.isRegionInTransition(region)
529             && !regionStates.isRegionInState(region, RegionState.State.FAILED_CLOSE)) {
530           continue;
531         }
532         pool.execute(Trace.wrap("DisableTableHandler.BulkDisabler", new Runnable() {
533           @Override
534           public void run() {
535             assignmentManager.unassign(region);
536           }
537         }));
538       }
539     }
540 
541     @Override
542     protected boolean waitUntilDone(long timeout) throws InterruptedException {
543       long startTime = EnvironmentEdgeManager.currentTime();
544       long remaining = timeout;
545       List<HRegionInfo> regions = null;
546       long lastLogTime = startTime;
547       while (!server.isStopped() && remaining > 0) {
548         Thread.sleep(waitingTimeForEvents);
549         regions = assignmentManager.getRegionStates().getRegionsOfTable(tableName);
550         long now = EnvironmentEdgeManager.currentTime();
551         // Don't log more than once every ten seconds. Its obnoxious. And only log table regions
552         // if we are waiting a while for them to go down...
553         if (LOG.isDebugEnabled() && ((now - lastLogTime) > 10000)) {
554           lastLogTime = now;
555           LOG.debug("Disable waiting until done; " + remaining + " ms remaining; " + regions);
556         }
557         if (regions.isEmpty()) break;
558         remaining = timeout - (now - startTime);
559       }
560       return regions != null && regions.isEmpty();
561     }
562   }
563 }