Mapreduce如何链接Mapper >> Reducer >> Reducer

时间:2019-03-22 12:25:53

标签: java hadoop mapreduce bigdata

我在需要链接的地方遇到问题

映射器>>减速器>>减速器

这是我的数据

Dpt.csv

EmpNo1,DeptNo1
EmpNo2,DeptNo2
EmpNo3,DeptNo1
EmpNo4,DeptNo2
EmpNo5,DeptNo2
EmpNo6,DeptNo1

Emp.csv

EmpNo1,10000
EmpNo2,4675432
EmpNo3,76568658
EmpNo4,241423
EmpNo5,75756
EmpNo6,9796854

最后我想要这样的东西:

第1部门>> Total_Salary_Dept_1

一个主要问题是,当我使用多个文件作为输入时,我的第一个reducer没有被调用。

第二个问题是我无法将该输出传递给下一个减速器。 (ChainReducer不能链接2个reducer)

我使用this作为参考,但很快意识到这无济于事。

我找到了this链接,作者在其中的一条评论中说: “在Hadoop 2.X系列中,您可以在内部使用ChainMapper在reducer之前链接映射器,在Chainer之后使用chain Mappers链接。带ChainReducer的减速器。“

这是否意味着我将具有如下结构:

Chain Mapper(映射器1)-> Chain Reducer(减少器1)-> ChainMapper(不必要的映射器)-> Chain Reducer(减少器2)

如果是这种情况,那么将数据从Reducer 1传递到Mapper 2的精确度如何?

有人可以帮我吗?

到目前为止,这是我的代码。

谢谢。

package Aggregate;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;

import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.chain.ChainMapper;
import org.apache.hadoop.mapreduce.lib.chain.ChainReducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.map.InverseMapper;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class Sales extends Configured implements Tool{

    public static class CollectionMapper extends Mapper<LongWritable, Text, Text, Text>{


        public void map(LongWritable key, Text value, Context context) 
                 throws IOException, InterruptedException {

            String[] vals = value.toString().split(",");                        
            context.write(new Text(vals[0]), new Text(vals[1]));

         }
    }

    public static class DeptSalaryJoiner extends Reducer<Text, Text, Text, Text>{

        public void reduce(Text key, Iterable<Text> values, Context context) 
                throws IOException, InterruptedException {

            ArrayList<String> DeptSal = new ArrayList<>();

            for (Text val : values) {

                DeptSal.add(val.toString());

            }      
            context.write(new Text(DeptSal.get(0)), new Text(DeptSal.get(1)));
        }
    }

    public static class SalaryAggregator extends Reducer<Text, Text, Text, IntWritable>{

        public void reduce(Text key, Iterable<Text> values, Context context) 
                throws IOException, InterruptedException {

            Integer totalSal = 0;
            for (Text val : values) {
                Integer salary = new Integer(val.toString());
                totalSal += salary; 

            }      
            context.write(key, new IntWritable(totalSal));
        }
    }

    public static void main(String[] args) throws Exception {
        int exitFlag = ToolRunner.run(new Sales(), args);
        System.exit(exitFlag);
    }

    @Override
    public int run(String[] args) throws Exception {

        String input1 = "./emp.csv";
        String input2 = "./dept.csv";
        String output = "./DeptAggregate";

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "Sales");
        job.setJarByClass(getClass());

        Configuration mapConf = new Configuration(false);
        ChainMapper.addMapper(job, CollectionMapper.class, LongWritable.class, Text.class, Text.class, Text.class,  mapConf);

        Configuration reduce1Conf = new Configuration(false);
        ChainReducer.setReducer(job, DeptSalaryJoiner.class, Text.class, Text.class, Text.class, Text.class, reduce1Conf);

        Configuration reduce2Conf = new Configuration(false);
        ChainReducer.setReducer(job, SalaryAggregator.class, Text.class, Text.class, Text.class, IntWritable.class, reduce2Conf);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job, new Path(input1));
        FileInputFormat.addInputPath(job, new Path(input2));

        try {
            File f = new File(output);
            FileUtils.forceDelete(f);
        } catch (Exception e) {

        }

        FileOutputFormat.setOutputPath(job, new Path(output));

        return job.waitForCompletion(true) ? 0 : 1;
    }

}

0 个答案:

没有答案