View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.snapshot;
20  
21  import com.google.protobuf.CodedInputStream;
22  
23  import java.io.FileNotFoundException;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.Collection;
27  import java.util.HashMap;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.concurrent.ThreadPoolExecutor;
31  import java.util.concurrent.TimeUnit;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FSDataInputStream;
37  import org.apache.hadoop.fs.FSDataOutputStream;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.HRegionInfo;
41  import org.apache.hadoop.hbase.HTableDescriptor;
42  import org.apache.hadoop.hbase.classification.InterfaceAudience;
43  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
44  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
45  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotDataManifest;
46  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
47  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
48  import org.apache.hadoop.hbase.regionserver.HRegion;
49  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
50  import org.apache.hadoop.hbase.regionserver.Store;
51  import org.apache.hadoop.hbase.regionserver.StoreFile;
52  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
53  import org.apache.hadoop.hbase.util.Bytes;
54  import org.apache.hadoop.hbase.util.FSUtils;
55  import org.apache.hadoop.hbase.util.FSTableDescriptors;
56  import org.apache.hadoop.hbase.util.Threads;
57  
58  /**
59   * Utility class to help read/write the Snapshot Manifest.
60   *
61   * The snapshot format is transparent for the users of this class,
62   * once the snapshot is written, it will never be modified.
63   * On open() the snapshot will be loaded to the current in-memory format.
64   */
65  @InterfaceAudience.Private
66  public class SnapshotManifest {
67    private static final Log LOG = LogFactory.getLog(SnapshotManifest.class);
68  
69    public static final String SNAPSHOT_MANIFEST_SIZE_LIMIT_CONF_KEY = "snapshot.manifest.size.limit";
70  
71    public static final String DATA_MANIFEST_NAME = "data.manifest";
72  
73    private List<SnapshotRegionManifest> regionManifests;
74    private SnapshotDescription desc;
75    private HTableDescriptor htd;
76  
77    private final ForeignExceptionSnare monitor;
78    private final Configuration conf;
79    private final Path workingDir;
80    private final FileSystem fs;
81    private int manifestSizeLimit;
82  
83    private SnapshotManifest(final Configuration conf, final FileSystem fs,
84        final Path workingDir, final SnapshotDescription desc,
85        final ForeignExceptionSnare monitor) {
86      this.monitor = monitor;
87      this.desc = desc;
88      this.workingDir = workingDir;
89      this.conf = conf;
90      this.fs = fs;
91  
92      this.manifestSizeLimit = conf.getInt(SNAPSHOT_MANIFEST_SIZE_LIMIT_CONF_KEY, 64 * 1024 * 1024);
93    }
94  
95    /**
96     * Return a SnapshotManifest instance, used for writing a snapshot.
97     *
98     * There are two usage pattern:
99     *  - The Master will create a manifest, add the descriptor, offline regions
100    *    and consolidate the snapshot by writing all the pending stuff on-disk.
101    *      manifest = SnapshotManifest.create(...)
102    *      manifest.addRegion(tableDir, hri)
103    *      manifest.consolidate()
104    *  - The RegionServer will create a single region manifest
105    *      manifest = SnapshotManifest.create(...)
106    *      manifest.addRegion(region)
107    */
108   public static SnapshotManifest create(final Configuration conf, final FileSystem fs,
109       final Path workingDir, final SnapshotDescription desc,
110       final ForeignExceptionSnare monitor) {
111     return new SnapshotManifest(conf, fs, workingDir, desc, monitor);
112   }
113 
114   /**
115    * Return a SnapshotManifest instance with the information already loaded in-memory.
116    *    SnapshotManifest manifest = SnapshotManifest.open(...)
117    *    HTableDescriptor htd = manifest.getTableDescriptor()
118    *    for (SnapshotRegionManifest regionManifest: manifest.getRegionManifests())
119    *      hri = regionManifest.getRegionInfo()
120    *      for (regionManifest.getFamilyFiles())
121    *        ...
122    */
123   public static SnapshotManifest open(final Configuration conf, final FileSystem fs,
124       final Path workingDir, final SnapshotDescription desc) throws IOException {
125     SnapshotManifest manifest = new SnapshotManifest(conf, fs, workingDir, desc, null);
126     manifest.load();
127     return manifest;
128   }
129 
130 
131   /**
132    * Add the table descriptor to the snapshot manifest
133    */
134   public void addTableDescriptor(final HTableDescriptor htd) throws IOException {
135     this.htd = htd;
136   }
137 
138   interface RegionVisitor<TRegion, TFamily> {
139     TRegion regionOpen(final HRegionInfo regionInfo) throws IOException;
140     void regionClose(final TRegion region) throws IOException;
141 
142     TFamily familyOpen(final TRegion region, final byte[] familyName) throws IOException;
143     void familyClose(final TRegion region, final TFamily family) throws IOException;
144 
145     void storeFile(final TRegion region, final TFamily family, final StoreFileInfo storeFile)
146       throws IOException;
147   }
148 
149   private RegionVisitor createRegionVisitor(final SnapshotDescription desc) throws IOException {
150     switch (getSnapshotFormat(desc)) {
151       case SnapshotManifestV1.DESCRIPTOR_VERSION:
152         return new SnapshotManifestV1.ManifestBuilder(conf, fs, workingDir);
153       case SnapshotManifestV2.DESCRIPTOR_VERSION:
154         return new SnapshotManifestV2.ManifestBuilder(conf, fs, workingDir);
155       default:
156         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
157     }
158   }
159 
160   /**
161    * Creates a 'manifest' for the specified region, by reading directly from the HRegion object.
162    * This is used by the "online snapshot" when the table is enabled.
163    */
164   public void addRegion(final HRegion region) throws IOException {
165     // 0. Get the ManifestBuilder/RegionVisitor
166     RegionVisitor visitor = createRegionVisitor(desc);
167 
168     // 1. dump region meta info into the snapshot directory
169     LOG.debug("Storing '" + region + "' region-info for snapshot.");
170     Object regionData = visitor.regionOpen(region.getRegionInfo());
171     monitor.rethrowException();
172 
173     // 2. iterate through all the stores in the region
174     LOG.debug("Creating references for hfiles");
175 
176     for (Store store : region.getStores()) {
177       // 2.1. build the snapshot reference for the store
178       Object familyData = visitor.familyOpen(regionData, store.getFamily().getName());
179       monitor.rethrowException();
180 
181       List<StoreFile> storeFiles = new ArrayList<StoreFile>(store.getStorefiles());
182       if (LOG.isDebugEnabled()) {
183         LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
184       }
185 
186       // 2.2. iterate through all the store's files and create "references".
187       for (int i = 0, sz = storeFiles.size(); i < sz; i++) {
188         StoreFile storeFile = storeFiles.get(i);
189         monitor.rethrowException();
190 
191         // create "reference" to this store file.
192         LOG.debug("Adding reference for file (" + (i+1) + "/" + sz + "): " + storeFile.getPath());
193         visitor.storeFile(regionData, familyData, storeFile.getFileInfo());
194       }
195       visitor.familyClose(regionData, familyData);
196     }
197     visitor.regionClose(regionData);
198   }
199 
200   /**
201    * Creates a 'manifest' for the specified region, by reading directly from the disk.
202    * This is used by the "offline snapshot" when the table is disabled.
203    */
204   public void addRegion(final Path tableDir, final HRegionInfo regionInfo) throws IOException {
205     // 0. Get the ManifestBuilder/RegionVisitor
206     RegionVisitor visitor = createRegionVisitor(desc);
207 
208     // Open the RegionFS
209     HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(conf, fs,
210           tableDir, regionInfo, true);
211     monitor.rethrowException();
212 
213     // 1. dump region meta info into the snapshot directory
214     LOG.debug("Storing region-info for snapshot.");
215     Object regionData = visitor.regionOpen(regionInfo);
216     monitor.rethrowException();
217 
218     // 2. iterate through all the stores in the region
219     LOG.debug("Creating references for hfiles");
220 
221     // This ensures that we have an atomic view of the directory as long as we have < ls limit
222     // (batch size of the files in a directory) on the namenode. Otherwise, we get back the files in
223     // batches and may miss files being added/deleted. This could be more robust (iteratively
224     // checking to see if we have all the files until we are sure), but the limit is currently 1000
225     // files/batch, far more than the number of store files under a single column family.
226     Collection<String> familyNames = regionFs.getFamilies();
227     if (familyNames != null) {
228       for (String familyName: familyNames) {
229         Object familyData = visitor.familyOpen(regionData, Bytes.toBytes(familyName));
230         monitor.rethrowException();
231 
232         Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(familyName);
233         if (storeFiles == null) {
234           LOG.debug("No files under family: " + familyName);
235           continue;
236         }
237 
238         // 2.1. build the snapshot reference for the store
239         if (LOG.isDebugEnabled()) {
240           LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
241         }
242 
243         // 2.2. iterate through all the store's files and create "references".
244         int i = 0;
245         int sz = storeFiles.size();
246         for (StoreFileInfo storeFile: storeFiles) {
247           monitor.rethrowException();
248 
249           // create "reference" to this store file.
250           LOG.debug("Adding reference for file ("+ (++i) +"/" + sz + "): " + storeFile.getPath());
251           visitor.storeFile(regionData, familyData, storeFile);
252         }
253         visitor.familyClose(regionData, familyData);
254       }
255     }
256     visitor.regionClose(regionData);
257   }
258 
259   /**
260    * Load the information in the SnapshotManifest. Called by SnapshotManifest.open()
261    *
262    * If the format is v2 and there is no data-manifest, means that we are loading an
263    * in-progress snapshot. Since we support rolling-upgrades, we loook for v1 and v2
264    * regions format.
265    */
266   private void load() throws IOException {
267     switch (getSnapshotFormat(desc)) {
268       case SnapshotManifestV1.DESCRIPTOR_VERSION: {
269         this.htd = FSTableDescriptors.getTableDescriptorFromFs(fs, workingDir);
270         ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
271         try {
272           this.regionManifests =
273             SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
274         } finally {
275           tpool.shutdown();
276         }
277         break;
278       }
279       case SnapshotManifestV2.DESCRIPTOR_VERSION: {
280         SnapshotDataManifest dataManifest = readDataManifest();
281         if (dataManifest != null) {
282           htd = HTableDescriptor.convert(dataManifest.getTableSchema());
283           regionManifests = dataManifest.getRegionManifestsList();
284         } else {
285           // Compatibility, load the v1 regions
286           // This happens only when the snapshot is in-progress and the cache wants to refresh.
287           List<SnapshotRegionManifest> v1Regions, v2Regions;
288           ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
289           try {
290             v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
291             v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
292           } finally {
293             tpool.shutdown();
294           }
295           if (v1Regions != null && v2Regions != null) {
296             regionManifests =
297               new ArrayList<SnapshotRegionManifest>(v1Regions.size() + v2Regions.size());
298             regionManifests.addAll(v1Regions);
299             regionManifests.addAll(v2Regions);
300           } else if (v1Regions != null) {
301             regionManifests = v1Regions;
302           } else /* if (v2Regions != null) */ {
303             regionManifests = v2Regions;
304           }
305         }
306         break;
307       }
308       default:
309         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
310     }
311   }
312 
313   /**
314    * Get the current snapshot working dir
315    */
316   public Path getSnapshotDir() {
317     return this.workingDir;
318   }
319 
320   /**
321    * Get the SnapshotDescription
322    */
323   public SnapshotDescription getSnapshotDescription() {
324     return this.desc;
325   }
326 
327   /**
328    * Get the table descriptor from the Snapshot
329    */
330   public HTableDescriptor getTableDescriptor() {
331     return this.htd;
332   }
333 
334   /**
335    * Get all the Region Manifest from the snapshot
336    */
337   public List<SnapshotRegionManifest> getRegionManifests() {
338     return this.regionManifests;
339   }
340 
341   /**
342    * Get all the Region Manifest from the snapshot.
343    * This is an helper to get a map with the region encoded name
344    */
345   public Map<String, SnapshotRegionManifest> getRegionManifestsMap() {
346     if (regionManifests == null || regionManifests.size() == 0) return null;
347 
348     HashMap<String, SnapshotRegionManifest> regionsMap =
349         new HashMap<String, SnapshotRegionManifest>(regionManifests.size());
350     for (SnapshotRegionManifest manifest: regionManifests) {
351       String regionName = getRegionNameFromManifest(manifest);
352       regionsMap.put(regionName, manifest);
353     }
354     return regionsMap;
355   }
356 
357   public void consolidate() throws IOException {
358     if (getSnapshotFormat(desc) == SnapshotManifestV1.DESCRIPTOR_VERSION) {
359       Path rootDir = FSUtils.getRootDir(conf);
360       LOG.info("Using old Snapshot Format");
361       // write a copy of descriptor to the snapshot directory
362       new FSTableDescriptors(conf, fs, rootDir)
363         .createTableDescriptorForTableDirectory(workingDir, htd, false);
364     } else {
365       LOG.debug("Convert to Single Snapshot Manifest");
366       convertToV2SingleManifest();
367     }
368   }
369 
370   /*
371    * In case of rolling-upgrade, we try to read all the formats and build
372    * the snapshot with the latest format.
373    */
374   private void convertToV2SingleManifest() throws IOException {
375     // Try to load v1 and v2 regions
376     List<SnapshotRegionManifest> v1Regions, v2Regions;
377     ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
378     try {
379       v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
380       v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
381     } finally {
382       tpool.shutdown();
383     }
384 
385     SnapshotDataManifest.Builder dataManifestBuilder = SnapshotDataManifest.newBuilder();
386     dataManifestBuilder.setTableSchema(htd.convert());
387 
388     if (v1Regions != null && v1Regions.size() > 0) {
389       dataManifestBuilder.addAllRegionManifests(v1Regions);
390     }
391     if (v2Regions != null && v2Regions.size() > 0) {
392       dataManifestBuilder.addAllRegionManifests(v2Regions);
393     }
394 
395     // Write the v2 Data Manifest.
396     // Once the data-manifest is written, the snapshot can be considered complete.
397     // Currently snapshots are written in a "temporary" directory and later
398     // moved to the "complated" snapshot directory.
399     SnapshotDataManifest dataManifest = dataManifestBuilder.build();
400     writeDataManifest(dataManifest);
401     this.regionManifests = dataManifest.getRegionManifestsList();
402 
403     // Remove the region manifests. Everything is now in the data-manifest.
404     // The delete operation is "relaxed", unless we get an exception we keep going.
405     // The extra files in the snapshot directory will not give any problem,
406     // since they have the same content as the data manifest, and even by re-reading
407     // them we will get the same information.
408     if (v1Regions != null && v1Regions.size() > 0) {
409       for (SnapshotRegionManifest regionManifest: v1Regions) {
410         SnapshotManifestV1.deleteRegionManifest(fs, workingDir, regionManifest);
411       }
412     }
413     if (v2Regions != null && v2Regions.size() > 0) {
414       for (SnapshotRegionManifest regionManifest: v2Regions) {
415         SnapshotManifestV2.deleteRegionManifest(fs, workingDir, regionManifest);
416       }
417     }
418   }
419 
420   /*
421    * Write the SnapshotDataManifest file
422    */
423   private void writeDataManifest(final SnapshotDataManifest manifest)
424       throws IOException {
425     FSDataOutputStream stream = fs.create(new Path(workingDir, DATA_MANIFEST_NAME));
426     try {
427       manifest.writeTo(stream);
428     } finally {
429       stream.close();
430     }
431   }
432 
433   /*
434    * Read the SnapshotDataManifest file
435    */
436   private SnapshotDataManifest readDataManifest() throws IOException {
437     FSDataInputStream in = null;
438     try {
439       in = fs.open(new Path(workingDir, DATA_MANIFEST_NAME));
440       CodedInputStream cin = CodedInputStream.newInstance(in);
441       cin.setSizeLimit(manifestSizeLimit);
442       return SnapshotDataManifest.parseFrom(cin);
443     } catch (FileNotFoundException e) {
444       return null;
445     } finally {
446       if (in != null) in.close();
447     }
448   }
449 
450   private ThreadPoolExecutor createExecutor(final String name) {
451     return createExecutor(conf, name);
452   }
453 
454   public static ThreadPoolExecutor createExecutor(final Configuration conf, final String name) {
455     int maxThreads = conf.getInt("hbase.snapshot.thread.pool.max", 8);
456     return Threads.getBoundedCachedThreadPool(maxThreads, 30L, TimeUnit.SECONDS,
457               Threads.getNamedThreadFactory(name));
458   }
459 
460   /**
461    * Extract the region encoded name from the region manifest
462    */
463   static String getRegionNameFromManifest(final SnapshotRegionManifest manifest) {
464     byte[] regionName = HRegionInfo.createRegionName(
465             ProtobufUtil.toTableName(manifest.getRegionInfo().getTableName()),
466             manifest.getRegionInfo().getStartKey().toByteArray(),
467             manifest.getRegionInfo().getRegionId(), true);
468     return HRegionInfo.encodeRegionName(regionName);
469   }
470 
471   /*
472    * Return the snapshot format
473    */
474   private static int getSnapshotFormat(final SnapshotDescription desc) {
475     return desc.hasVersion() ? desc.getVersion() : SnapshotManifestV1.DESCRIPTOR_VERSION;
476   }
477 }