使用 hbase bulkload,当我在 shell 中运行 hadoop jar 时,我错过了关于 HFileOutputFormat2 not found 的错误

时间:2021-05-25 03:16:33

标签: hadoop hbase bulk-load

我想用BulkLoad把数据推送到Hbase,我用一个带地图的文件来做。但是当我想在shell中运行程序时,错误来了!

我重写了 RecordReader 以每次读取整个文件。但我不知道为什么我的程序出错了!

21/05/25 10:46:01 INFO mapreduce.Job: Job job_1621909750051_0002 running in uber mode : false
21/05/25 10:46:01 INFO mapreduce.Job:  map 0% reduce 0%
21/05/25 10:46:01 INFO mapreduce.Job: Job job_1621909750051_0002 failed with state FAILED due to: Application application_1621909750051_0002 failed 2 times due to AM Container for appattempt_1621909750051_0002_000002 exited with  exitCode: 1
For more detailed output, check application tracking page:http://master:8088/cluster/app/application_1621909750051_0002Then, click on links to logs of each attempt.
Diagnostics: Exception from container-launch.
Container id: container_1621909750051_0002_02_000001
Exit code: 1
Stack trace: ExitCodeException exitCode=1: 
    at org.apache.hadoop.util.Shell.runCommand(Shell.java:585)
    at org.apache.hadoop.util.Shell.run(Shell.java:482)
    at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:776)
    at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:212)
    at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)
    at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)


Container exited with a non-zero exit code 1
Failing this attempt. Failing the application.
21/05/25 10:46:01 INFO mapreduce.Job: Counters: 0


这是我的java程序:


import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.List;

/**
 * a map deal with a file
 */
public class WaveDataInput {
    private static final Log LOG = LogFactory.getLog(WaveDataInput.class);

 
    public static class WholeReader extends RecordReader<Text, Text> {

        private Text key = new Text();
        private Text value = new Text();
        private boolean isProcessed = false;
        private FSDataInputStream fsDataInputStream;
        private byte[] buffer;
        private FileSplit fileSplit;


        @Override
        public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
            this.fileSplit = (FileSplit) split;
            Path path = fileSplit.getPath();
            FileSystem fileSystem = path.getFileSystem(context.getConfiguration());
            fsDataInputStream = fileSystem.open(path);  // 获取文件的输入流
        }

     
        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (!isProcessed) {
                String keyName = fileSplit.getPath().toString();
                key.set(keyName);
                buffer = new byte[(int) fileSplit.getLength()];
                fsDataInputStream.read(buffer);
                value.set(buffer, 0, buffer.length);

                isProcessed = true;
                return true;
            } else {
                return false;
            }
        }

        @Override
        public Text getCurrentKey() throws IOException, InterruptedException {
            return key;
        }

        @Override
        public Text getCurrentValue() throws IOException, InterruptedException {
            return value;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return isProcessed ? 0 : 1;
        }

        @Override
        public void close() throws IOException {
            if (fsDataInputStream != null) {
                IOUtils.closeStream(fsDataInputStream);
            }
        }
    }

    
    public static class customFileInputFormat extends FileInputFormat<Text, Text> {

        @Override
        protected boolean isSplitable(JobContext context, Path filename) {
            return false;
        }

        @Override
        public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
            return new WholeReader();
        }
    }


 
    public static class WaveMapper extends Mapper<Text, Text, ImmutableBytesWritable, Put> {

        @Override
        protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
            String[] text = value.toString().split("\n");

            /**
             * flag level lat  lon timestamp depth time_gap name
             * 2     2 335024N 0782848W  0000     9.8  3600 NDBC
             *
             * data:
             * flag  time depth wave_speed wave_dir
             * 3 20160101000800     0.8       6.60    297.0
             * 3 20160101000800     9.8       2.90    297.0
             */
            float lon = 0.0f;   
            float lat = 0.0f;   
            int level = 0;
            int region = 0;

            for (String line : text) {
                String ele[] = line.split(" ");
                if (ele.length < 3) return;

                if ("2".equals(ele[0].trim())) {
                    if(ele.length < 4) continue;
                    level = Integer.parseInt(ele[1]);
                    lat = getLonLat(ele[2]);
                    lon = getLonLat(ele[3]);

                } else if ("3".equals(ele[0].trim())) {
                    if(ele.length < 5) continue;
                    region = (ele[1].hashCode())%100;
                    String timestap = System.nanoTime() + "";
                    String stamp = timestap.substring(timestap.length() - 9, timestap.length() - 2);
                    String rowkey = (String.format("%02d", region)) + ele[1] +stamp;

                    Put put = new Put(Bytes.toBytes(rowkey));
                    ImmutableBytesWritable putRowkey = new ImmutableBytesWritable(rowkey.getBytes());
                    put.addColumn(Bytes.toBytes("element"), Bytes.toBytes("time"), Bytes.toBytes(ele[1]));
                    put.addColumn(Bytes.toBytes("element"), Bytes.toBytes("lat"), Bytes.toBytes(lat));
                    put.addColumn(Bytes.toBytes("element"), Bytes.toBytes("lon"), Bytes.toBytes(lon));
                    put.addColumn(Bytes.toBytes("element"), Bytes.toBytes("depth"), Bytes.toBytes(Float.parseFloat(ele[2])));
                    put.addColumn(Bytes.toBytes("element"), Bytes.toBytes("wave_speed"), Bytes.toBytes(Float.parseFloat(ele[3])));
                    put.addColumn(Bytes.toBytes("element"), Bytes.toBytes("wave_dir"), Bytes.toBytes(Float.parseFloat(ele[4])));

                    ImmutableBytesWritable outkey = new ImmutableBytesWritable(rowkey.getBytes());
                    context.write(outkey, put);
                }
            }


        }
    }

    public static void main(String[] args) throws Exception {
        if(args.length < 3){
            return;
        }
        System.setProperty("HADOOP_USER_NAME", "611");
        long start = System.currentTimeMillis();

        String inputPath = "hdfs://192.168.1.237:9000/dataset/gtspp/gtspp4_in202012.txt";
        String outputPath = "hdfs://192.168.1.237:9000/dataset/out3";
        String HbaseTable = "sea:element";

        inputPath = args[0];
        outputPath = args[1];
        HbaseTable = args[2];

        Configuration conf = new Configuration();
        conf.set("fs.defaultFs", "hdfs://192.168.1.237:9000");
        conf.set("hbase.zookeeper.quorum", "master,slave1");
        conf.set("hbase.master", "master:6000");
        conf.set("hbase.zookeeper.property.clientPort", "2183");

        conf.set("mapreduce.app-submission.cross-platform", "true");
   
        conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());

        Connection conn = ConnectionFactory.createConnection(conf);
        Admin admin = conn.getAdmin();
        HTable table = (HTable) conn.getTable(TableName.valueOf(HbaseTable));

        final Path OutputPath=new Path(outputPath);
        // delete outpath
        FileSystem fs = FileSystem.get(new URI("hdfs://192.168.1.237:9000"), conf);
        if(fs.exists(OutputPath))
            fs.delete(OutputPath, true);

        Job job = Job.getInstance(conf, "wave-load");
        job.setMapperClass(WaveMapper.class);

        job.setJarByClass(WaveDataInput.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(HFileOutputFormat2.class);

        FileInputFormat.setInputPaths(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        if(job.waitForCompletion(true)){
            LoadIncrementalHFiles Loader=new LoadIncrementalHFiles(conf);
            Loader.doBulkLoad(OutputPath, admin, table, conn.getRegionLocator(TableName.valueOf(HbaseTable)));
        }


    }

    public static float getLonLat(String str){
        int flag = 1;
        if(str.contains("N") || str.contains("E")){
            flag = 1;
        }else if(str.contains("W") || str.contains("S")){
            flag = -1;
        }else {
            return 0;
        }

        float value =Float.parseFloat(str.substring(0, str.length() - 1))/10000 ;
        return value * flag;
    }
}



这是 http://master:8088/cluster/app/application_1621909750051_0002 内容

2021-05-25 10:46:00,634 INFO [main] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: Created MRAppMaster for application appattempt_1621909750051_0002_000002
2021-05-25 10:46:00,824 INFO [main] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: Executing with tokens:
2021-05-25 10:46:00,825 INFO [main] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: Kind: YARN_AM_RM_TOKEN, Service: , Ident: (appAttemptId { application_id { id: 2 cluster_timestamp: 1621909750051 } attemptId: 2 } keyId: -1575443230)
2021-05-25 10:46:01,055 INFO [main] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: Using mapred newApiCommitter.
2021-05-25 10:46:01,057 INFO [main] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: OutputCommitter set in config null
2021-05-25 10:46:01,094 INFO [main] org.apache.hadoop.service.AbstractService: Service org.apache.hadoop.mapreduce.v2.app.MRAppMaster failed in state INITED; cause: org.apache.hadoop.yarn.exceptions.YarnRuntimeException: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2 not found
org.apache.hadoop.yarn.exceptions.YarnRuntimeException: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2 not found
    at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$2.call(MRAppMaster.java:518)
    at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$2.call(MRAppMaster.java:498)
    at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.callWithJobClassLoader(MRAppMaster.java:1593)
    at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.createOutputCommitter(MRAppMaster.java:498)
    at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.serviceInit(MRAppMaster.java:284)
    at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163)
    at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$5.run(MRAppMaster.java:1551)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:422)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762)
    at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.initAndStartAppMaster(MRAppMaster.java:1548)
    at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.main(MRAppMaster.java:1481)
Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2 not found
    at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2267)
    at org.apache.hadoop.mapreduce.task.JobContextImpl.getOutputFormatClass(JobContextImpl.java:222)
    at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$2.call(MRAppMaster.java:514)
    ... 11 more
Caused by: java.lang.ClassNotFoundException: Class org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2 not found
    at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2171)
    at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2265)
    ... 13 more

0 个答案:

没有答案
相关问题