我有2个文件需要被hadoop集群访问。这两个文件分别是good.txt和bad.txt。 首先,由于这两个文件都需要从不同的节点访问,我将这两个文件放在驱动程序类的分布式缓存中,如下所示
Configuration conf = new Configuration();
DistributedCache.addCacheFile(new URI("/user/training/Rakshith/good.txt"),conf);
DistributedCache.addCacheFile(new URI("/user/training/Rakshith/bad.txt"),conf);
Job job = new Job(conf);
现在好的和坏的文件都放在分布式缓存中。我在mapper类中访问分布式缓存,如下所示
public class LetterMapper extends Mapper<LongWritable,Text,LongWritable,Text> {
private Path[]files;
@Override
protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context)
throws IOException, InterruptedException {
files=DistributedCache.getLocalCacheFiles(new Configuration(context.getConfiguration()));
}
我需要检查good.txt或bad.txt中是否存在单词。所以我使用这样的东西
File file=new File(files[0].toString()); //to access good.txt
BufferedReader br=new BufferedReader(new FileReader(file));
StringBuider sb=new StringBuilder();
String input=null;
while((input=br.readLine())!=null){
sb.append(input);
}
input=sb.toString();
我应该在输入变量中获取好文件的内容。但我不明白。我错过了什么吗?
答案 0 :(得分:0)
工作成功完成了吗? maptask可能会因为您在此行中使用JobConf而失败
files=DistributedCache.getLocalCacheFiles(new JobConf(context.getConfiguration()));
如果你这样更改它应该有效,我认为你发布的剩余代码没有任何问题。
files=DistributedCache.getLocalCacheFiles(context.getConfiguration());
或
files=DistributedCache.getLocalCacheFiles(new Configuration(context.getConfiguration()));
答案 1 :(得分:0)
@rVr这些是我的驱动程序类
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class AvgWordLength {
public static void main(String[] args) throws Exception {
if (args.length !=2) {
System.out.printf("Usage: AvgWordLength <input dir> <output dir>\n");
System.exit(-1);
}
Configuration conf = new Configuration();
DistributedCache.addCacheFile(new URI("/user/training/Rakshith/good.txt"),conf);
DistributedCache.addCacheFile(new URI("/user/training/Rakshith/bad.txt"),conf);
Job job = new Job(conf);
job.setJarByClass(AvgWordLength.class);
job.setJobName("Average Word Length");
FileInputFormat.setInputPaths(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.setMapperClass(LetterMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
boolean success = job.waitForCompletion(true);
System.exit(success ? 0 : 1);
}
}
我的mapper类是
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
public class LetterMapper extends Mapper<LongWritable,Text,LongWritable,Text> {
private Path[]files;
@Override
protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context)
throws IOException, InterruptedException {
files=DistributedCache.getLocalCacheFiles(new Configuration(context.getConfiguration()));
System.out.println("in setup()"+files.toString());
}
@Override
public void map(LongWritable key, Text value, Context context)throws IOException,InterruptedException{
int i=0;
System.out.println("in map----->>"+files.toString());//added just to view logs
HashMap<String,String> h=new HashMap<String,String>();
String negword=null;
String input=value.toString();
if(isPresent(input,files[0].toString()){
h.put(input,"good");
}
else
if(isPresent(input,files[1].toString()){
h.put(input,"bad");
}
}
public static boolean isPresent(String n,Path files2) throws IOException{
File file=new File(files2.toString());
BufferedReader br=new BufferedReader(new FileReader(file));
StringBuilder sb=new StringBuilder();
String input=null;
while((input=br.readLine().toString())!=null){
sb.append(input.toString());
}
input=sb.toString();
//System.out.println(input);
Pattern pattern=Pattern.compile(n);
Matcher matcher=pattern.matcher(input);
if(matcher.find()){
return true;
}
else
return false;
}
}