1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import java.io.IOException;
22 import java.io.InterruptedIOException;
23 import java.util.ArrayList;
24 import java.util.HashSet;
25 import java.util.List;
26 import java.util.Set;
27 import java.util.concurrent.locks.Lock;
28 import java.util.concurrent.locks.ReentrantLock;
29
30 import org.apache.commons.logging.Log;
31 import org.apache.commons.logging.LogFactory;
32 import org.apache.hadoop.hbase.classification.InterfaceAudience;
33 import org.apache.hadoop.conf.Configuration;
34 import org.apache.hadoop.fs.FileStatus;
35 import org.apache.hadoop.fs.FileSystem;
36 import org.apache.hadoop.fs.Path;
37 import org.apache.hadoop.fs.PathFilter;
38 import org.apache.hadoop.fs.permission.FsPermission;
39 import org.apache.hadoop.hbase.ClusterId;
40 import org.apache.hadoop.hbase.TableName;
41 import org.apache.hadoop.hbase.HColumnDescriptor;
42 import org.apache.hadoop.hbase.HConstants;
43 import org.apache.hadoop.hbase.HRegionInfo;
44 import org.apache.hadoop.hbase.HTableDescriptor;
45 import org.apache.hadoop.hbase.InvalidFamilyOperationException;
46 import org.apache.hadoop.hbase.RemoteExceptionHandler;
47 import org.apache.hadoop.hbase.Server;
48 import org.apache.hadoop.hbase.ServerName;
49 import org.apache.hadoop.hbase.backup.HFileArchiver;
50 import org.apache.hadoop.hbase.exceptions.DeserializationException;
51 import org.apache.hadoop.hbase.fs.HFileSystem;
52 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
53 import org.apache.hadoop.hbase.regionserver.HRegion;
54 import org.apache.hadoop.hbase.wal.DefaultWALProvider;
55 import org.apache.hadoop.hbase.wal.WALSplitter;
56 import org.apache.hadoop.hbase.util.Bytes;
57 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
58 import org.apache.hadoop.hbase.util.FSTableDescriptors;
59 import org.apache.hadoop.hbase.util.FSUtils;
60
61 import com.google.common.annotations.VisibleForTesting;
62
63
64
65
66
67
68 @InterfaceAudience.Private
69 public class MasterFileSystem {
70 private static final Log LOG = LogFactory.getLog(MasterFileSystem.class.getName());
71
72 Configuration conf;
73
74 Server master;
75
76 private final MetricsMasterFileSystem metricsMasterFilesystem = new MetricsMasterFileSystem();
77
78 private ClusterId clusterId;
79
80 private final FileSystem fs;
81
82 private volatile boolean fsOk = true;
83
84 private final Path oldLogDir;
85
86 private final Path rootdir;
87
88 private final Path tempdir;
89
90 final Lock splitLogLock = new ReentrantLock();
91 final boolean distributedLogReplay;
92 final SplitLogManager splitLogManager;
93 private final MasterServices services;
94
95 final static PathFilter META_FILTER = new PathFilter() {
96 @Override
97 public boolean accept(Path p) {
98 return DefaultWALProvider.isMetaFile(p);
99 }
100 };
101
102 final static PathFilter NON_META_FILTER = new PathFilter() {
103 @Override
104 public boolean accept(Path p) {
105 return !DefaultWALProvider.isMetaFile(p);
106 }
107 };
108
109 public MasterFileSystem(Server master, MasterServices services)
110 throws IOException {
111 this.conf = master.getConfiguration();
112 this.master = master;
113 this.services = services;
114
115
116
117
118 this.rootdir = FSUtils.getRootDir(conf);
119 this.tempdir = new Path(this.rootdir, HConstants.HBASE_TEMP_DIRECTORY);
120
121
122 this.fs = this.rootdir.getFileSystem(conf);
123 FSUtils.setFsDefault(conf, new Path(this.fs.getUri()));
124
125 fs.setConf(conf);
126
127
128 this.oldLogDir = createInitialFileSystemLayout();
129 HFileSystem.addLocationsOrderInterceptor(conf);
130 this.splitLogManager =
131 new SplitLogManager(master, master.getConfiguration(), master, services,
132 master.getServerName());
133 this.distributedLogReplay = this.splitLogManager.isLogReplaying();
134 }
135
136 @VisibleForTesting
137 SplitLogManager getSplitLogManager() {
138 return this.splitLogManager;
139 }
140
141
142
143
144
145
146
147
148
149
150
151 private Path createInitialFileSystemLayout() throws IOException {
152
153 checkRootDir(this.rootdir, conf, this.fs);
154
155
156 checkTempDir(this.tempdir, conf, this.fs);
157
158 Path oldLogDir = new Path(this.rootdir, HConstants.HREGION_OLDLOGDIR_NAME);
159
160
161 if(!this.fs.exists(oldLogDir)) {
162 this.fs.mkdirs(oldLogDir);
163 }
164
165 return oldLogDir;
166 }
167
168 public FileSystem getFileSystem() {
169 return this.fs;
170 }
171
172
173
174
175
176 public Path getOldLogDir() {
177 return this.oldLogDir;
178 }
179
180
181
182
183
184
185 public boolean checkFileSystem() {
186 if (this.fsOk) {
187 try {
188 FSUtils.checkFileSystemAvailable(this.fs);
189 FSUtils.checkDfsSafeMode(this.conf);
190 } catch (IOException e) {
191 master.abort("Shutting down HBase cluster: file system not available", e);
192 this.fsOk = false;
193 }
194 }
195 return this.fsOk;
196 }
197
198
199
200
201 public Path getRootDir() {
202 return this.rootdir;
203 }
204
205
206
207
208 public Path getTempDir() {
209 return this.tempdir;
210 }
211
212
213
214
215 public ClusterId getClusterId() {
216 return clusterId;
217 }
218
219
220
221
222
223 Set<ServerName> getFailedServersFromLogFolders() {
224 boolean retrySplitting = !conf.getBoolean("hbase.hlog.split.skip.errors",
225 WALSplitter.SPLIT_SKIP_ERRORS_DEFAULT);
226
227 Set<ServerName> serverNames = new HashSet<ServerName>();
228 Path logsDirPath = new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME);
229
230 do {
231 if (master.isStopped()) {
232 LOG.warn("Master stopped while trying to get failed servers.");
233 break;
234 }
235 try {
236 if (!this.fs.exists(logsDirPath)) return serverNames;
237 FileStatus[] logFolders = FSUtils.listStatus(this.fs, logsDirPath, null);
238
239
240 Set<ServerName> onlineServers = ((HMaster) master).getServerManager().getOnlineServers()
241 .keySet();
242
243 if (logFolders == null || logFolders.length == 0) {
244 LOG.debug("No log files to split, proceeding...");
245 return serverNames;
246 }
247 for (FileStatus status : logFolders) {
248 FileStatus[] curLogFiles = FSUtils.listStatus(this.fs, status.getPath(), null);
249 if (curLogFiles == null || curLogFiles.length == 0) {
250
251 continue;
252 }
253 final ServerName serverName = DefaultWALProvider.getServerNameFromWALDirectoryName(
254 status.getPath());
255 if (null == serverName) {
256 LOG.warn("Log folder " + status.getPath() + " doesn't look like its name includes a " +
257 "region server name; leaving in place. If you see later errors about missing " +
258 "write ahead logs they may be saved in this location.");
259 } else if (!onlineServers.contains(serverName)) {
260 LOG.info("Log folder " + status.getPath() + " doesn't belong "
261 + "to a known region server, splitting");
262 serverNames.add(serverName);
263 } else {
264 LOG.info("Log folder " + status.getPath() + " belongs to an existing region server");
265 }
266 }
267 retrySplitting = false;
268 } catch (IOException ioe) {
269 LOG.warn("Failed getting failed servers to be recovered.", ioe);
270 if (!checkFileSystem()) {
271 LOG.warn("Bad Filesystem, exiting");
272 Runtime.getRuntime().halt(1);
273 }
274 try {
275 if (retrySplitting) {
276 Thread.sleep(conf.getInt("hbase.hlog.split.failure.retry.interval", 30 * 1000));
277 }
278 } catch (InterruptedException e) {
279 LOG.warn("Interrupted, aborting since cannot return w/o splitting");
280 Thread.currentThread().interrupt();
281 retrySplitting = false;
282 Runtime.getRuntime().halt(1);
283 }
284 }
285 } while (retrySplitting);
286
287 return serverNames;
288 }
289
290 public void splitLog(final ServerName serverName) throws IOException {
291 Set<ServerName> serverNames = new HashSet<ServerName>();
292 serverNames.add(serverName);
293 splitLog(serverNames);
294 }
295
296
297
298
299
300
301 public void splitMetaLog(final ServerName serverName) throws IOException {
302 Set<ServerName> serverNames = new HashSet<ServerName>();
303 serverNames.add(serverName);
304 splitMetaLog(serverNames);
305 }
306
307
308
309
310
311
312 public void splitMetaLog(final Set<ServerName> serverNames) throws IOException {
313 splitLog(serverNames, META_FILTER);
314 }
315
316 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="UL_UNRELEASED_LOCK", justification=
317 "We only release this lock when we set it. Updates to code that uses it should verify use " +
318 "of the guard boolean.")
319 private List<Path> getLogDirs(final Set<ServerName> serverNames) throws IOException {
320 List<Path> logDirs = new ArrayList<Path>();
321 boolean needReleaseLock = false;
322 if (!this.services.isInitialized()) {
323
324 this.splitLogLock.lock();
325 needReleaseLock = true;
326 }
327 try {
328 for (ServerName serverName : serverNames) {
329 Path logDir = new Path(this.rootdir,
330 DefaultWALProvider.getWALDirectoryName(serverName.toString()));
331 Path splitDir = logDir.suffix(DefaultWALProvider.SPLITTING_EXT);
332
333 if (fs.exists(logDir)) {
334 if (!this.fs.rename(logDir, splitDir)) {
335 throw new IOException("Failed fs.rename for log split: " + logDir);
336 }
337 logDir = splitDir;
338 LOG.debug("Renamed region directory: " + splitDir);
339 } else if (!fs.exists(splitDir)) {
340 LOG.info("Log dir for server " + serverName + " does not exist");
341 continue;
342 }
343 logDirs.add(splitDir);
344 }
345 } finally {
346 if (needReleaseLock) {
347 this.splitLogLock.unlock();
348 }
349 }
350 return logDirs;
351 }
352
353
354
355
356
357
358
359 public void prepareLogReplay(ServerName serverName, Set<HRegionInfo> regions) throws IOException {
360 if (!this.distributedLogReplay) {
361 return;
362 }
363
364 if (regions == null || regions.isEmpty()) {
365 return;
366 }
367 this.splitLogManager.markRegionsRecovering(serverName, regions);
368 }
369
370 public void splitLog(final Set<ServerName> serverNames) throws IOException {
371 splitLog(serverNames, NON_META_FILTER);
372 }
373
374
375
376
377
378
379 void removeStaleRecoveringRegionsFromZK(final Set<ServerName> failedServers)
380 throws IOException, InterruptedIOException {
381 this.splitLogManager.removeStaleRecoveringRegions(failedServers);
382 }
383
384
385
386
387
388
389
390
391
392 public void splitLog(final Set<ServerName> serverNames, PathFilter filter) throws IOException {
393 long splitTime = 0, splitLogSize = 0;
394 List<Path> logDirs = getLogDirs(serverNames);
395
396 splitLogManager.handleDeadWorkers(serverNames);
397 splitTime = EnvironmentEdgeManager.currentTime();
398 splitLogSize = splitLogManager.splitLogDistributed(serverNames, logDirs, filter);
399 splitTime = EnvironmentEdgeManager.currentTime() - splitTime;
400
401 if (this.metricsMasterFilesystem != null) {
402 if (filter == META_FILTER) {
403 this.metricsMasterFilesystem.addMetaWALSplit(splitTime, splitLogSize);
404 } else {
405 this.metricsMasterFilesystem.addSplit(splitTime, splitLogSize);
406 }
407 }
408 }
409
410
411
412
413
414
415
416
417
418
419 @SuppressWarnings("deprecation")
420 private Path checkRootDir(final Path rd, final Configuration c,
421 final FileSystem fs)
422 throws IOException {
423
424 FSUtils.waitOnSafeMode(c, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000));
425
426 boolean isSecurityEnabled = "kerberos".equalsIgnoreCase(c.get("hbase.security.authentication"));
427 FsPermission rootDirPerms = new FsPermission(c.get("hbase.rootdir.perms", "700"));
428
429
430 try {
431 if (!fs.exists(rd)) {
432 if (isSecurityEnabled) {
433 fs.mkdirs(rd, rootDirPerms);
434 } else {
435 fs.mkdirs(rd);
436 }
437
438
439
440
441
442
443
444 FSUtils.setVersion(fs, rd, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
445 10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
446 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
447 } else {
448 if (!fs.isDirectory(rd)) {
449 throw new IllegalArgumentException(rd.toString() + " is not a directory");
450 }
451 if (isSecurityEnabled && !rootDirPerms.equals(fs.getFileStatus(rd).getPermission())) {
452
453 LOG.warn("Found rootdir permissions NOT matching expected \"hbase.rootdir.perms\" for "
454 + "rootdir=" + rd.toString() + " permissions=" + fs.getFileStatus(rd).getPermission()
455 + " and \"hbase.rootdir.perms\" configured as "
456 + c.get("hbase.rootdir.perms", "700") + ". Automatically setting the permissions. You"
457 + " can change the permissions by setting \"hbase.rootdir.perms\" in hbase-site.xml "
458 + "and restarting the master");
459 fs.setPermission(rd, rootDirPerms);
460 }
461
462 FSUtils.checkVersion(fs, rd, true, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
463 10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
464 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
465 }
466 } catch (DeserializationException de) {
467 LOG.fatal("Please fix invalid configuration for " + HConstants.HBASE_DIR, de);
468 IOException ioe = new IOException();
469 ioe.initCause(de);
470 throw ioe;
471 } catch (IllegalArgumentException iae) {
472 LOG.fatal("Please fix invalid configuration for "
473 + HConstants.HBASE_DIR + " " + rd.toString(), iae);
474 throw iae;
475 }
476
477 if (!FSUtils.checkClusterIdExists(fs, rd, c.getInt(
478 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000))) {
479 FSUtils.setClusterId(fs, rd, new ClusterId(), c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000));
480 }
481 clusterId = FSUtils.getClusterId(fs, rd);
482
483
484 if (!FSUtils.metaRegionExists(fs, rd)) {
485 bootstrap(rd, c);
486 } else {
487
488 org.apache.hadoop.hbase.util.FSTableDescriptorMigrationToSubdir
489 .migrateFSTableDescriptorsIfNecessary(fs, rd);
490 }
491
492
493
494
495
496 FSTableDescriptors fsd = new FSTableDescriptors(c, fs, rd);
497 fsd.createTableDescriptor(
498 new HTableDescriptor(fsd.get(TableName.META_TABLE_NAME)));
499
500 return rd;
501 }
502
503
504
505
506
507 private void checkTempDir(final Path tmpdir, final Configuration c, final FileSystem fs)
508 throws IOException {
509
510 if (fs.exists(tmpdir)) {
511
512
513 for (Path tabledir: FSUtils.getTableDirs(fs, tmpdir)) {
514 for (Path regiondir: FSUtils.getRegionDirs(fs, tabledir)) {
515 HFileArchiver.archiveRegion(fs, this.rootdir, tabledir, regiondir);
516 }
517 }
518 if (!fs.delete(tmpdir, true)) {
519 throw new IOException("Unable to clean the temp directory: " + tmpdir);
520 }
521 }
522
523
524 if (!fs.mkdirs(tmpdir)) {
525 throw new IOException("HBase temp directory '" + tmpdir + "' creation failure.");
526 }
527 }
528
529 private static void bootstrap(final Path rd, final Configuration c)
530 throws IOException {
531 LOG.info("BOOTSTRAP: creating hbase:meta region");
532 try {
533
534
535
536
537 HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
538 HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
539 setInfoFamilyCachingForMeta(metaDescriptor, false);
540 HRegion meta = HRegion.createHRegion(metaHRI, rd, c, metaDescriptor, null, true, true);
541 setInfoFamilyCachingForMeta(metaDescriptor, true);
542 HRegion.closeHRegion(meta);
543 } catch (IOException e) {
544 e = RemoteExceptionHandler.checkIOException(e);
545 LOG.error("bootstrap", e);
546 throw e;
547 }
548 }
549
550
551
552
553 public static void setInfoFamilyCachingForMeta(final HTableDescriptor metaDescriptor,
554 final boolean b) {
555 for (HColumnDescriptor hcd: metaDescriptor.getColumnFamilies()) {
556 if (Bytes.equals(hcd.getName(), HConstants.CATALOG_FAMILY)) {
557 hcd.setBlockCacheEnabled(b);
558 hcd.setInMemory(b);
559 }
560 }
561 }
562
563 public void deleteRegion(HRegionInfo region) throws IOException {
564 HFileArchiver.archiveRegion(conf, fs, region);
565 }
566
567 public void deleteTable(TableName tableName) throws IOException {
568 fs.delete(FSUtils.getTableDir(rootdir, tableName), true);
569 }
570
571
572
573
574
575
576
577 public Path moveTableToTemp(TableName tableName) throws IOException {
578 Path srcPath = FSUtils.getTableDir(rootdir, tableName);
579 Path tempPath = FSUtils.getTableDir(this.tempdir, tableName);
580
581
582 if (!fs.exists(tempPath.getParent()) && !fs.mkdirs(tempPath.getParent())) {
583 throw new IOException("HBase temp directory '" + tempPath.getParent() + "' creation failure.");
584 }
585
586 if (!fs.rename(srcPath, tempPath)) {
587 throw new IOException("Unable to move '" + srcPath + "' to temp '" + tempPath + "'");
588 }
589
590 return tempPath;
591 }
592
593 public void updateRegionInfo(HRegionInfo region) {
594
595
596
597 }
598
599 public void deleteFamilyFromFS(HRegionInfo region, byte[] familyName)
600 throws IOException {
601
602 Path tableDir = FSUtils.getTableDir(rootdir, region.getTable());
603 HFileArchiver.archiveFamily(fs, conf, region, tableDir, familyName);
604
605
606 Path familyDir = new Path(tableDir,
607 new Path(region.getEncodedName(), Bytes.toString(familyName)));
608 if (fs.delete(familyDir, true) == false) {
609 if (fs.exists(familyDir)) {
610 throw new IOException("Could not delete family "
611 + Bytes.toString(familyName) + " from FileSystem for region "
612 + region.getRegionNameAsString() + "(" + region.getEncodedName()
613 + ")");
614 }
615 }
616 }
617
618 public void stop() {
619 if (splitLogManager != null) {
620 this.splitLogManager.stop();
621 }
622 }
623
624
625
626
627
628
629
630
631 public HTableDescriptor deleteColumn(TableName tableName, byte[] familyName)
632 throws IOException {
633 LOG.info("DeleteColumn. Table = " + tableName
634 + " family = " + Bytes.toString(familyName));
635 HTableDescriptor htd = this.services.getTableDescriptors().get(tableName);
636 htd.removeFamily(familyName);
637 this.services.getTableDescriptors().add(htd);
638 return htd;
639 }
640
641
642
643
644
645
646
647
648 public HTableDescriptor modifyColumn(TableName tableName, HColumnDescriptor hcd)
649 throws IOException {
650 LOG.info("AddModifyColumn. Table = " + tableName
651 + " HCD = " + hcd.toString());
652
653 HTableDescriptor htd = this.services.getTableDescriptors().get(tableName);
654 byte [] familyName = hcd.getName();
655 if(!htd.hasFamily(familyName)) {
656 throw new InvalidFamilyOperationException("Family '" +
657 Bytes.toString(familyName) + "' doesn't exists so cannot be modified");
658 }
659 htd.modifyFamily(hcd);
660 this.services.getTableDescriptors().add(htd);
661 return htd;
662 }
663
664
665
666
667
668
669
670
671 public HTableDescriptor addColumn(TableName tableName, HColumnDescriptor hcd)
672 throws IOException {
673 LOG.info("AddColumn. Table = " + tableName + " HCD = " +
674 hcd.toString());
675 HTableDescriptor htd = this.services.getTableDescriptors().get(tableName);
676 if (htd == null) {
677 throw new InvalidFamilyOperationException("Family '" +
678 hcd.getNameAsString() + "' cannot be modified as HTD is null");
679 }
680 htd.addFamily(hcd);
681 this.services.getTableDescriptors().add(htd);
682 return htd;
683 }
684
685
686
687
688
689
690 public void setLogRecoveryMode() throws IOException {
691 this.splitLogManager.setRecoveryMode(false);
692 }
693
694 public RecoveryMode getLogRecoveryMode() {
695 return this.splitLogManager.getRecoveryMode();
696 }
697
698 public void logFileSystemState(Log log) throws IOException {
699 FSUtils.logFileSystemState(fs, rootdir, log);
700 }
701 }