1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.snapshot;
19
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22 import java.util.HashSet;
23 import java.util.List;
24 import java.util.Set;
25 import java.util.concurrent.CancellationException;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.hbase.classification.InterfaceAudience;
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.fs.FileSystem;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.hbase.TableName;
34 import org.apache.hadoop.hbase.HRegionInfo;
35 import org.apache.hadoop.hbase.HTableDescriptor;
36 import org.apache.hadoop.hbase.ServerName;
37 import org.apache.hadoop.hbase.MetaTableAccessor;
38 import org.apache.hadoop.hbase.errorhandling.ForeignException;
39 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
40 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
41 import org.apache.hadoop.hbase.executor.EventHandler;
42 import org.apache.hadoop.hbase.executor.EventType;
43 import org.apache.hadoop.hbase.master.MasterServices;
44 import org.apache.hadoop.hbase.master.MetricsSnapshot;
45 import org.apache.hadoop.hbase.master.SnapshotSentinel;
46 import org.apache.hadoop.hbase.master.TableLockManager;
47 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
48 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
49 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
50 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
51 import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
52 import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
53 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
54 import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
55 import org.apache.hadoop.hbase.util.FSUtils;
56 import org.apache.hadoop.hbase.util.Pair;
57 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
58 import org.apache.zookeeper.KeeperException;
59
60
61
62
63
64
65
66
67 @InterfaceAudience.Private
68 public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel,
69 ForeignExceptionSnare {
70 private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class);
71
72 private volatile boolean finished;
73
74
75 protected final MasterServices master;
76 protected final MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
77 protected final SnapshotDescription snapshot;
78 protected final Configuration conf;
79 protected final FileSystem fs;
80 protected final Path rootDir;
81 private final Path snapshotDir;
82 protected final Path workingDir;
83 private final MasterSnapshotVerifier verifier;
84 protected final ForeignExceptionDispatcher monitor;
85 protected final TableLockManager tableLockManager;
86 protected final TableLock tableLock;
87 protected final MonitoredTask status;
88 protected final TableName snapshotTable;
89 protected final SnapshotManifest snapshotManifest;
90
91 protected HTableDescriptor htd;
92
93
94
95
96
97 public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices) {
98 super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
99 assert snapshot != null : "SnapshotDescription must not be nul1";
100 assert masterServices != null : "MasterServices must not be nul1";
101
102 this.master = masterServices;
103 this.snapshot = snapshot;
104 this.snapshotTable = TableName.valueOf(snapshot.getTable());
105 this.conf = this.master.getConfiguration();
106 this.fs = this.master.getMasterFileSystem().getFileSystem();
107 this.rootDir = this.master.getMasterFileSystem().getRootDir();
108 this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
109 this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
110 this.monitor = new ForeignExceptionDispatcher(snapshot.getName());
111 this.snapshotManifest = SnapshotManifest.create(conf, fs, workingDir, snapshot, monitor);
112
113 this.tableLockManager = master.getTableLockManager();
114 this.tableLock = this.tableLockManager.writeLock(
115 snapshotTable,
116 EventType.C_M_SNAPSHOT_TABLE.toString());
117
118
119 this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir);
120
121 this.status = TaskMonitor.get().createStatus(
122 "Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable);
123 }
124
125 private HTableDescriptor loadTableDescriptor()
126 throws FileNotFoundException, IOException {
127 HTableDescriptor htd =
128 this.master.getTableDescriptors().get(snapshotTable);
129 if (htd == null) {
130 throw new IOException("HTableDescriptor missing for " + snapshotTable);
131 }
132 return htd;
133 }
134
135 public TakeSnapshotHandler prepare() throws Exception {
136 super.prepare();
137 this.tableLock.acquire();
138
139 boolean success = false;
140 try {
141 this.htd = loadTableDescriptor();
142 success = true;
143 } finally {
144 if (!success) {
145 releaseTableLock();
146 }
147 }
148
149 return this;
150 }
151
152
153
154
155
156 @Override
157 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
158 justification="Intentional")
159 public void process() {
160 String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " "
161 + eventType + " on table " + snapshotTable;
162 LOG.info(msg);
163 status.setStatus(msg);
164 try {
165
166
167
168
169 SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, fs);
170 snapshotManifest.addTableDescriptor(this.htd);
171 monitor.rethrowException();
172
173 List<Pair<HRegionInfo, ServerName>> regionsAndLocations;
174 if (TableName.META_TABLE_NAME.equals(snapshotTable)) {
175 regionsAndLocations = new MetaTableLocator().getMetaRegionsAndLocations(
176 server.getZooKeeper());
177 } else {
178 regionsAndLocations = MetaTableAccessor.getTableRegionsAndLocations(
179 server.getZooKeeper(), server.getConnection(), snapshotTable, false);
180 }
181
182
183 snapshotRegions(regionsAndLocations);
184 monitor.rethrowException();
185
186
187 Set<String> serverNames = new HashSet<String>();
188 for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
189 if (p != null && p.getFirst() != null && p.getSecond() != null) {
190 HRegionInfo hri = p.getFirst();
191 if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue;
192 serverNames.add(p.getSecond().toString());
193 }
194 }
195
196
197 status.setStatus("Consolidate snapshot: " + snapshot.getName());
198 snapshotManifest.consolidate();
199
200
201 status.setStatus("Verifying snapshot: " + snapshot.getName());
202 verifier.verifySnapshot(this.workingDir, serverNames);
203
204
205 completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
206 msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
207 status.markComplete(msg);
208 LOG.info(msg);
209 metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
210 } catch (Exception e) {
211 status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " +
212 snapshotTable + " because " + e.getMessage());
213 String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot)
214 + " due to exception:" + e.getMessage();
215 LOG.error(reason, e);
216 ForeignException ee = new ForeignException(reason, e);
217 monitor.receive(ee);
218
219 cancel(reason);
220 } finally {
221 LOG.debug("Launching cleanup of working dir:" + workingDir);
222 try {
223
224
225 if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
226 LOG.error("Couldn't delete snapshot working directory:" + workingDir);
227 }
228 } catch (IOException e) {
229 LOG.error("Couldn't delete snapshot working directory:" + workingDir);
230 }
231 releaseTableLock();
232 }
233 }
234
235 protected void releaseTableLock() {
236 if (this.tableLock != null) {
237 try {
238 this.tableLock.release();
239 } catch (IOException ex) {
240 LOG.warn("Could not release the table lock", ex);
241 }
242 }
243 }
244
245
246
247
248
249
250
251
252
253
254 public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs)
255 throws SnapshotCreationException, IOException {
256 LOG.debug("Sentinel is done, just moving the snapshot from " + workingDir + " to "
257 + snapshotDir);
258 if (!fs.rename(workingDir, snapshotDir)) {
259 throw new SnapshotCreationException("Failed to move working directory(" + workingDir
260 + ") to completed directory(" + snapshotDir + ").");
261 }
262 finished = true;
263 }
264
265
266
267
268 protected abstract void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regions)
269 throws IOException, KeeperException;
270
271
272
273
274 protected void snapshotDisabledRegion(final HRegionInfo regionInfo)
275 throws IOException {
276 snapshotManifest.addRegion(FSUtils.getTableDir(rootDir, snapshotTable), regionInfo);
277 monitor.rethrowException();
278 status.setStatus("Completed referencing HFiles for offline region " + regionInfo.toString() +
279 " of table: " + snapshotTable);
280 }
281
282 @Override
283 public void cancel(String why) {
284 if (finished) return;
285
286 this.finished = true;
287 LOG.info("Stop taking snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
288 " because: " + why);
289 CancellationException ce = new CancellationException(why);
290 monitor.receive(new ForeignException(master.getServerName().toString(), ce));
291 }
292
293 @Override
294 public boolean isFinished() {
295 return finished;
296 }
297
298 @Override
299 public long getCompletionTimestamp() {
300 return this.status.getCompletionTimestamp();
301 }
302
303 @Override
304 public SnapshotDescription getSnapshot() {
305 return snapshot;
306 }
307
308 @Override
309 public ForeignException getExceptionIfFailed() {
310 return monitor.getException();
311 }
312
313 @Override
314 public void rethrowExceptionIfFailed() throws ForeignException {
315 monitor.rethrowException();
316 }
317
318 @Override
319 public void rethrowException() throws ForeignException {
320 monitor.rethrowException();
321 }
322
323 @Override
324 public boolean hasException() {
325 return monitor.hasException();
326 }
327
328 @Override
329 public ForeignException getException() {
330 return monitor.getException();
331 }
332
333 }