View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  
23  import java.io.File;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.NavigableMap;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.fs.FileUtil;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.HBaseTestingUtility;
36  import org.apache.hadoop.hbase.testclassification.LargeTests;
37  import org.apache.hadoop.hbase.TableName;
38  import org.apache.hadoop.hbase.client.HTable;
39  import org.apache.hadoop.hbase.client.Result;
40  import org.apache.hadoop.hbase.client.Scan;
41  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
42  import org.apache.hadoop.hbase.util.Bytes;
43  import org.apache.hadoop.io.NullWritable;
44  import org.apache.hadoop.mapreduce.Job;
45  import org.apache.hadoop.mapreduce.Reducer;
46  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
47  import org.junit.After;
48  import org.junit.AfterClass;
49  import org.junit.BeforeClass;
50  import org.junit.Test;
51  import org.junit.experimental.categories.Category;
52  
53  /**
54   * Tests various scan start and stop row scenarios. This is set in a scan and
55   * tested in a MapReduce job to see if that is handed over and done properly
56   * too.
57   */
58  @Category(LargeTests.class)
59  public class TestMultiTableInputFormat {
60  
61    private static final Log LOG = LogFactory.getLog(TestMultiTableInputFormat.class);
62    static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
63  
64    static final String TABLE_NAME = "scantest";
65    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
66    static final String KEY_STARTROW = "startRow";
67    static final String KEY_LASTROW = "stpRow";
68  
69    @BeforeClass
70    public static void setUpBeforeClass() throws Exception {
71      // switch TIF to log at DEBUG level
72      TEST_UTIL.enableDebug(MultiTableInputFormat.class);
73      TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
74      TEST_UTIL.setJobWithoutMRCluster();
75      // start mini hbase cluster
76      TEST_UTIL.startMiniCluster(3);
77      // create and fill table
78      for (int i = 0; i < 3; i++) {
79        try (HTable table =
80            TEST_UTIL.createMultiRegionTable(TableName.valueOf(TABLE_NAME + String.valueOf(i)),
81              INPUT_FAMILY, 4)) {
82          TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
83        }
84      }
85    }
86  
87    @AfterClass
88    public static void tearDownAfterClass() throws Exception {
89      TEST_UTIL.shutdownMiniCluster();
90    }
91    
92    @After
93    public void tearDown() throws Exception {
94      Configuration c = TEST_UTIL.getConfiguration();
95      FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
96    }
97  
98    /**
99     * Pass the key and value to reducer.
100    */
101   public static class ScanMapper extends
102       TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
103     /**
104      * Pass the key and value to reduce.
105      *
106      * @param key The key, here "aaa", "aab" etc.
107      * @param value The value is the same as the key.
108      * @param context The task context.
109      * @throws IOException When reading the rows fails.
110      */
111     @Override
112     public void map(ImmutableBytesWritable key, Result value, Context context)
113         throws IOException, InterruptedException {
114       if (value.size() != 1) {
115         throw new IOException("There should only be one input column");
116       }
117       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
118           value.getMap();
119       if (!cf.containsKey(INPUT_FAMILY)) {
120         throw new IOException("Wrong input columns. Missing: '" +
121             Bytes.toString(INPUT_FAMILY) + "'.");
122       }
123       String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
124       LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
125           ", value -> " + val);
126       context.write(key, key);
127     }
128   }
129 
130   /**
131    * Checks the last and first keys seen against the scanner boundaries.
132    */
133   public static class ScanReducer
134       extends
135       Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
136       NullWritable, NullWritable> {
137     private String first = null;
138     private String last = null;
139 
140     @Override
141     protected void reduce(ImmutableBytesWritable key,
142         Iterable<ImmutableBytesWritable> values, Context context)
143         throws IOException, InterruptedException {
144       int count = 0;
145       for (ImmutableBytesWritable value : values) {
146         String val = Bytes.toStringBinary(value.get());
147         LOG.debug("reduce: key[" + count + "] -> " +
148             Bytes.toStringBinary(key.get()) + ", value -> " + val);
149         if (first == null) first = val;
150         last = val;
151         count++;
152       }
153       assertEquals(3, count);
154     }
155 
156     @Override
157     protected void cleanup(Context context) throws IOException,
158         InterruptedException {
159       Configuration c = context.getConfiguration();
160       String startRow = c.get(KEY_STARTROW);
161       String lastRow = c.get(KEY_LASTROW);
162       LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
163           startRow + "\"");
164       LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
165           "\"");
166       if (startRow != null && startRow.length() > 0) {
167         assertEquals(startRow, first);
168       }
169       if (lastRow != null && lastRow.length() > 0) {
170         assertEquals(lastRow, last);
171       }
172     }
173   }
174 
175   @Test
176   public void testScanEmptyToEmpty() throws IOException, InterruptedException,
177       ClassNotFoundException {
178     testScan(null, null, null);
179   }
180   
181   @Test
182   public void testScanEmptyToAPP() throws IOException, InterruptedException,
183       ClassNotFoundException {
184     testScan(null, "app", "apo");
185   }
186 
187   @Test
188   public void testScanOBBToOPP() throws IOException, InterruptedException,
189       ClassNotFoundException {
190     testScan("obb", "opp", "opo");
191   }
192 
193   @Test
194   public void testScanYZYToEmpty() throws IOException, InterruptedException,
195       ClassNotFoundException {
196     testScan("yzy", null, "zzz");
197   }
198 
199   /**
200    * Tests a MR scan using specific start and stop rows.
201    *
202    * @throws IOException
203    * @throws ClassNotFoundException
204    * @throws InterruptedException
205    */
206   private void testScan(String start, String stop, String last)
207       throws IOException, InterruptedException, ClassNotFoundException {
208     String jobName =
209         "Scan" + (start != null ? start.toUpperCase() : "Empty") + "To" +
210             (stop != null ? stop.toUpperCase() : "Empty");
211     LOG.info("Before map/reduce startup - job " + jobName);
212     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
213     
214     c.set(KEY_STARTROW, start != null ? start : "");
215     c.set(KEY_LASTROW, last != null ? last : "");
216     
217     List<Scan> scans = new ArrayList<Scan>();
218     
219     for(int i=0; i<3; i++){
220       Scan scan = new Scan();
221       
222       scan.addFamily(INPUT_FAMILY);
223       scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(TABLE_NAME + i));
224       
225       if (start != null) {
226         scan.setStartRow(Bytes.toBytes(start));
227       }
228       if (stop != null) {
229         scan.setStopRow(Bytes.toBytes(stop));
230       }
231       
232       scans.add(scan);
233       
234       LOG.info("scan before: " + scan);
235     }
236     
237     Job job = new Job(c, jobName);
238 
239     TableMapReduceUtil.initTableMapperJob(scans, ScanMapper.class,
240         ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
241     job.setReducerClass(ScanReducer.class);
242     job.setNumReduceTasks(1); // one to get final "first" and "last" key
243     FileOutputFormat.setOutputPath(job,
244       new Path(TEST_UTIL.getDataTestDirOnTestFS(), job.getJobName()));
245     LOG.info("Started " + job.getJobName());
246     job.waitForCompletion(true);
247     assertTrue(job.isSuccessful());
248     LOG.info("After map/reduce completion - job " + jobName);
249   }
250 }