1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mttr;
20
21 import static org.junit.Assert.assertEquals;
22
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.concurrent.Callable;
26 import java.util.concurrent.ExecutorService;
27 import java.util.concurrent.Executors;
28 import java.util.concurrent.Future;
29 import java.util.concurrent.TimeUnit;
30
31 import org.apache.commons.lang.RandomStringUtils;
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
35 import org.apache.hadoop.hbase.ClusterStatus;
36 import org.apache.hadoop.hbase.HColumnDescriptor;
37 import org.apache.hadoop.hbase.HTableDescriptor;
38 import org.apache.hadoop.hbase.IntegrationTestingUtility;
39 import org.apache.hadoop.hbase.InvalidFamilyOperationException;
40 import org.apache.hadoop.hbase.NamespaceExistException;
41 import org.apache.hadoop.hbase.NamespaceNotFoundException;
42 import org.apache.hadoop.hbase.TableExistsException;
43 import org.apache.hadoop.hbase.TableName;
44 import org.apache.hadoop.hbase.TableNotFoundException;
45 import org.apache.hadoop.hbase.testclassification.IntegrationTests;
46 import org.apache.hadoop.hbase.chaos.actions.Action;
47 import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction;
48 import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction;
49 import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction;
50 import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingTableAction;
51 import org.apache.hadoop.hbase.chaos.factories.MonkeyConstants;
52 import org.apache.hadoop.hbase.client.Admin;
53 import org.apache.hadoop.hbase.client.HBaseAdmin;
54 import org.apache.hadoop.hbase.client.HTable;
55 import org.apache.hadoop.hbase.client.Put;
56 import org.apache.hadoop.hbase.client.Result;
57 import org.apache.hadoop.hbase.client.ResultScanner;
58 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
59 import org.apache.hadoop.hbase.client.Scan;
60 import org.apache.hadoop.hbase.client.Table;
61 import org.apache.hadoop.hbase.coprocessor.CoprocessorException;
62 import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
63 import org.apache.hadoop.hbase.ipc.FatalConnectionException;
64 import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
65 import org.apache.hadoop.hbase.security.AccessDeniedException;
66 import org.apache.hadoop.hbase.util.Bytes;
67 import org.apache.hadoop.hbase.util.LoadTestTool;
68 import org.apache.htrace.Span;
69 import org.apache.htrace.Trace;
70 import org.apache.htrace.TraceScope;
71 import org.apache.htrace.impl.AlwaysSampler;
72 import org.junit.AfterClass;
73 import org.junit.BeforeClass;
74 import org.junit.Test;
75 import org.junit.experimental.categories.Category;
76
77 import com.google.common.base.Objects;
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119 @Category(IntegrationTests.class)
120 public class IntegrationTestMTTR {
121
122
123
124 private static final byte[] FAMILY = Bytes.toBytes("d");
125 private static final Log LOG = LogFactory.getLog(IntegrationTestMTTR.class);
126 private static long sleepTime;
127 private static final String SLEEP_TIME_KEY = "hbase.IntegrationTestMTTR.sleeptime";
128 private static final long SLEEP_TIME_DEFAULT = 60 * 1000l;
129
130
131
132
133 private static TableName tableName;
134 private static TableName loadTableName;
135
136
137
138
139 private static IntegrationTestingUtility util;
140
141
142
143
144 private static ExecutorService executorService;
145
146
147
148
149 private static Action restartRSAction;
150 private static Action restartMetaAction;
151 private static Action moveMetaRegionsAction;
152 private static Action moveRegionAction;
153 private static Action restartMasterAction;
154
155
156
157
158 private static LoadTestTool loadTool;
159
160
161 @BeforeClass
162 public static void setUp() throws Exception {
163
164 if (util == null) {
165 util = new IntegrationTestingUtility();
166 }
167
168
169 util.initializeCluster(3);
170
171
172 loadTool = new LoadTestTool();
173 loadTool.setConf(util.getConfiguration());
174
175
176
177 executorService = Executors.newFixedThreadPool(8);
178
179
180 setupTables();
181
182
183 sleepTime = util.getConfiguration().getLong(SLEEP_TIME_KEY, SLEEP_TIME_DEFAULT);
184 setupActions();
185 }
186
187 private static void setupActions() throws IOException {
188
189
190 util.getConfiguration().setLong(Action.START_RS_TIMEOUT_KEY, 3 * 60 * 1000);
191
192
193
194 restartRSAction = new RestartRsHoldingTableAction(sleepTime, tableName.getNameAsString());
195
196
197 restartMetaAction = new RestartRsHoldingMetaAction(sleepTime);
198
199
200 moveMetaRegionsAction = new MoveRegionsOfTableAction(sleepTime,
201 MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, TableName.META_TABLE_NAME);
202
203
204 moveRegionAction = new MoveRegionsOfTableAction(sleepTime,
205 MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, tableName);
206
207
208 restartMasterAction = new RestartActiveMasterAction(1000);
209
210
211 Action.ActionContext actionContext = new Action.ActionContext(util);
212 restartRSAction.init(actionContext);
213 restartMetaAction.init(actionContext);
214 moveMetaRegionsAction.init(actionContext);
215 moveRegionAction.init(actionContext);
216 restartMasterAction.init(actionContext);
217 }
218
219 private static void setupTables() throws IOException {
220
221 tableName = TableName.valueOf(util.getConfiguration()
222 .get("hbase.IntegrationTestMTTR.tableName", "IntegrationTestMTTR"));
223
224 loadTableName = TableName.valueOf(util.getConfiguration()
225 .get("hbase.IntegrationTestMTTR.loadTableName", "IntegrationTestMTTRLoadTestTool"));
226
227 if (util.getHBaseAdmin().tableExists(tableName)) {
228 util.deleteTable(tableName);
229 }
230
231 if (util.getHBaseAdmin().tableExists(loadTableName)) {
232 util.deleteTable(loadTableName);
233 }
234
235
236 HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
237
238
239 tableDescriptor.setMaxFileSize(Long.MAX_VALUE);
240
241 HColumnDescriptor descriptor = new HColumnDescriptor(FAMILY);
242 descriptor.setMaxVersions(1);
243 tableDescriptor.addFamily(descriptor);
244 util.getHBaseAdmin().createTable(tableDescriptor);
245
246
247 int ret = loadTool.run(new String[]{"-tn", loadTableName.getNameAsString(), "-init_only"});
248 assertEquals("Failed to initialize LoadTestTool", 0, ret);
249 }
250
251 @AfterClass
252 public static void after() throws IOException {
253
254 util.restoreCluster();
255 util = null;
256
257
258 executorService.shutdown();
259 executorService = null;
260
261
262 moveRegionAction = null;
263 restartMetaAction = null;
264 moveMetaRegionsAction = null;
265 restartRSAction = null;
266 restartMasterAction = null;
267
268 loadTool = null;
269 }
270
271 @Test
272 public void testRestartRsHoldingTable() throws Exception {
273 run(new ActionCallable(restartRSAction), "RestartRsHoldingTableAction");
274 }
275
276 @Test
277 public void testKillRsHoldingMeta() throws Exception {
278 run(new ActionCallable(restartMetaAction), "KillRsHoldingMeta");
279 }
280
281 @Test
282 public void testMoveMeta() throws Exception {
283 run(new ActionCallable(moveMetaRegionsAction), "MoveMeta");
284 }
285
286 @Test
287 public void testMoveRegion() throws Exception {
288 run(new ActionCallable(moveRegionAction), "MoveRegion");
289 }
290
291 @Test
292 public void testRestartMaster() throws Exception {
293 run(new ActionCallable(restartMasterAction), "RestartMaster");
294 }
295
296 public void run(Callable<Boolean> monkeyCallable, String testName) throws Exception {
297 int maxIters = util.getHBaseClusterInterface().isDistributedCluster() ? 10 : 3;
298 LOG.info("Starting " + testName + " with " + maxIters + " iterations.");
299
300
301 ArrayList<TimingResult> resultPuts = new ArrayList<TimingResult>(maxIters);
302 ArrayList<TimingResult> resultScan = new ArrayList<TimingResult>(maxIters);
303 ArrayList<TimingResult> resultAdmin = new ArrayList<TimingResult>(maxIters);
304 long start = System.nanoTime();
305
306 try {
307
308 for (int fullIterations = 0; fullIterations < maxIters; fullIterations++) {
309
310 Future<Boolean> monkeyFuture = executorService.submit(monkeyCallable);
311
312
313 Future<TimingResult> putFuture = executorService.submit(new PutCallable(monkeyFuture));
314 Future<TimingResult> scanFuture = executorService.submit(new ScanCallable(monkeyFuture));
315 Future<TimingResult> adminFuture = executorService.submit(new AdminCallable(monkeyFuture));
316
317 Future<Boolean> loadFuture = executorService.submit(new LoadCallable(monkeyFuture));
318
319 monkeyFuture.get();
320 loadFuture.get();
321
322
323 TimingResult putTime = putFuture.get();
324 TimingResult scanTime = scanFuture.get();
325 TimingResult adminTime = adminFuture.get();
326
327
328 resultPuts.add(putTime);
329 resultScan.add(scanTime);
330 resultAdmin.add(adminTime);
331
332
333 Thread.sleep(5000l);
334 }
335 } catch (Exception e) {
336 long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
337 LOG.info(testName + " failed after " + runtimeMs + "ms.", e);
338 throw e;
339 }
340
341 long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
342
343 Objects.ToStringHelper helper = Objects.toStringHelper("MTTRResults")
344 .add("putResults", resultPuts)
345 .add("scanResults", resultScan)
346 .add("adminResults", resultAdmin)
347 .add("totalRuntimeMs", runtimeMs)
348 .add("name", testName);
349
350
351 LOG.info(helper.toString());
352 }
353
354
355
356
357
358
359 private static class TimingResult {
360 DescriptiveStatistics stats = new DescriptiveStatistics();
361 ArrayList<Long> traces = new ArrayList<Long>(10);
362
363
364
365
366
367
368 public void addResult(long time, Span span) {
369 stats.addValue(TimeUnit.MILLISECONDS.convert(time, TimeUnit.NANOSECONDS));
370 if (TimeUnit.SECONDS.convert(time, TimeUnit.NANOSECONDS) >= 1) {
371 traces.add(span.getTraceId());
372 }
373 }
374
375 @Override
376 public String toString() {
377 Objects.ToStringHelper helper = Objects.toStringHelper(this)
378 .add("numResults", stats.getN())
379 .add("minTime", stats.getMin())
380 .add("meanTime", stats.getMean())
381 .add("maxTime", stats.getMax())
382 .add("25th", stats.getPercentile(25))
383 .add("50th", stats.getPercentile(50))
384 .add("75th", stats.getPercentile(75))
385 .add("90th", stats.getPercentile(90))
386 .add("95th", stats.getPercentile(95))
387 .add("99th", stats.getPercentile(99))
388 .add("99.9th", stats.getPercentile(99.9))
389 .add("99.99th", stats.getPercentile(99.99))
390 .add("traces", traces);
391 return helper.toString();
392 }
393 }
394
395
396
397
398 static abstract class TimingCallable implements Callable<TimingResult> {
399 protected final Future<?> future;
400
401 public TimingCallable(Future<?> f) {
402 future = f;
403 }
404
405 @Override
406 public TimingResult call() throws Exception {
407 TimingResult result = new TimingResult();
408 final int maxIterations = 10;
409 int numAfterDone = 0;
410 int resetCount = 0;
411
412 while (numAfterDone < maxIterations) {
413 long start = System.nanoTime();
414 TraceScope scope = null;
415 try {
416 scope = Trace.startSpan(getSpanName(), AlwaysSampler.INSTANCE);
417 boolean actionResult = doAction();
418 if (actionResult && future.isDone()) {
419 numAfterDone++;
420 }
421
422
423
424
425
426
427 } catch (AccessDeniedException e) {
428 throw e;
429 } catch (CoprocessorException e) {
430 throw e;
431 } catch (FatalConnectionException e) {
432 throw e;
433 } catch (InvalidFamilyOperationException e) {
434 throw e;
435 } catch (NamespaceExistException e) {
436 throw e;
437 } catch (NamespaceNotFoundException e) {
438 throw e;
439 } catch (NoSuchColumnFamilyException e) {
440 throw e;
441 } catch (TableExistsException e) {
442 throw e;
443 } catch (TableNotFoundException e) {
444 throw e;
445 } catch (RetriesExhaustedException e){
446 throw e;
447
448
449
450
451
452 } catch (Exception e) {
453 resetCount++;
454 if (resetCount < maxIterations) {
455 LOG.info("Non-fatal exception while running " + this.toString()
456 + ". Resetting loop counter", e);
457 numAfterDone = 0;
458 } else {
459 LOG.info("Too many unexpected Exceptions. Aborting.", e);
460 throw e;
461 }
462 } finally {
463 if (scope != null) {
464 scope.close();
465 }
466 }
467 result.addResult(System.nanoTime() - start, scope.getSpan());
468 }
469 return result;
470 }
471
472 protected abstract boolean doAction() throws Exception;
473
474 protected String getSpanName() {
475 return this.getClass().getSimpleName();
476 }
477
478 @Override
479 public String toString() {
480 return this.getSpanName();
481 }
482 }
483
484
485
486
487
488 static class PutCallable extends TimingCallable {
489
490 private final Table table;
491
492 public PutCallable(Future<?> f) throws IOException {
493 super(f);
494 this.table = new HTable(util.getConfiguration(), tableName);
495 }
496
497 @Override
498 protected boolean doAction() throws Exception {
499 Put p = new Put(Bytes.toBytes(RandomStringUtils.randomAlphanumeric(5)));
500 p.add(FAMILY, Bytes.toBytes("\0"), Bytes.toBytes(RandomStringUtils.randomAscii(5)));
501 table.put(p);
502 return true;
503 }
504
505 @Override
506 protected String getSpanName() {
507 return "MTTR Put Test";
508 }
509 }
510
511
512
513
514
515 static class ScanCallable extends TimingCallable {
516 private final Table table;
517
518 public ScanCallable(Future<?> f) throws IOException {
519 super(f);
520 this.table = new HTable(util.getConfiguration(), tableName);
521 }
522
523 @Override
524 protected boolean doAction() throws Exception {
525 ResultScanner rs = null;
526 try {
527 Scan s = new Scan();
528 s.setBatch(2);
529 s.addFamily(FAMILY);
530 s.setFilter(new KeyOnlyFilter());
531 s.setMaxVersions(1);
532
533 rs = table.getScanner(s);
534 Result result = rs.next();
535 return result != null && result.size() > 0;
536 } finally {
537 if (rs != null) {
538 rs.close();
539 }
540 }
541 }
542 @Override
543 protected String getSpanName() {
544 return "MTTR Scan Test";
545 }
546 }
547
548
549
550
551 static class AdminCallable extends TimingCallable {
552
553 public AdminCallable(Future<?> f) throws IOException {
554 super(f);
555 }
556
557 @Override
558 protected boolean doAction() throws Exception {
559 Admin admin = null;
560 try {
561 admin = new HBaseAdmin(util.getConfiguration());
562 ClusterStatus status = admin.getClusterStatus();
563 return status != null;
564 } finally {
565 if (admin != null) {
566 admin.close();
567 }
568 }
569 }
570
571 @Override
572 protected String getSpanName() {
573 return "MTTR Admin Test";
574 }
575 }
576
577
578 static class ActionCallable implements Callable<Boolean> {
579 private final Action action;
580
581 public ActionCallable(Action action) {
582 this.action = action;
583 }
584
585 @Override
586 public Boolean call() throws Exception {
587 this.action.perform();
588 return true;
589 }
590 }
591
592
593
594
595
596 public static class LoadCallable implements Callable<Boolean> {
597
598 private final Future<?> future;
599
600 public LoadCallable(Future<?> f) {
601 future = f;
602 }
603
604 @Override
605 public Boolean call() throws Exception {
606 int colsPerKey = 10;
607 int numServers = util.getHBaseClusterInterface().getInitialClusterStatus().getServersSize();
608 int numKeys = numServers * 5000;
609 int writeThreads = 10;
610
611
612
613
614 do {
615 int ret = loadTool.run(new String[]{
616 "-tn", loadTableName.getNameAsString(),
617 "-write", String.format("%d:%d:%d", colsPerKey, 500, writeThreads),
618 "-num_keys", String.valueOf(numKeys),
619 "-skip_init"
620 });
621 assertEquals("Load failed", 0, ret);
622 } while (!future.isDone());
623
624 return true;
625 }
626 }
627 }