我正在尝试读写spring数据hadoop文档中提到的镶木地板文件,但出现以下错误
在定义了名称为“ datasetRepositoryFactory”的bean时出错 类路径资源[com / example / demo / DatasetConfig.class]:调用 init方法失败;嵌套异常为 java.lang.IllegalArgumentException:命名空间属性为必需
春季启动中的参考项目 https://github.com/spring-projects/spring-hadoop-samples/tree/master/dataset
DataSetConfig.java
@Configuration
@ImportResource("hadoop-context.xml")
public class DatasetConfig {
private @Autowired org.apache.hadoop.conf.Configuration hadoopConfiguration;
@Bean
public DatasetRepositoryFactory datasetRepositoryFactory() {
DatasetRepositoryFactory datasetRepositoryFactory = new
DatasetRepositoryFactory();
datasetRepositoryFactory.setConf(hadoopConfiguration);
datasetRepositoryFactory.setBasePath("/tmp"); return
datasetRepositoryFactory;
}
@Bean
public DataStoreWriter<FileInfo> dataStoreWriter() {
return new AvroPojoDatasetStoreWriter<FileInfo>(FileInfo.class,
datasetRepositoryFactory(), fileInfoDatasetDefinition()); }
@Bean
public DatasetOperations datasetOperations() {
DatasetTemplate datasetOperations = new DatasetTemplate();
datasetOperations.setDatasetDefinitions(Arrays.asList(fileInfoDatasetDefinition()));
datasetOperations.setDatasetRepositoryFactory(datasetRepositoryFactory());
return datasetOperations;
}
@Bean
public DatasetDefinition fileInfoDatasetDefinition() {
DatasetDefinition definition = new DatasetDefinition();
definition.setFormat(Formats.PARQUET.getName());
definition.setTargetClass(FileInfo.class);
definition.setAllowNullValues(false);
return definition;
}
}
Main.java
@ComponentScan
@EnableAutoConfiguration
public class ParquetReaderApplication implements CommandLineRunner {
private DatasetOperations datasetOperations;
private DataStoreWriter<FileInfo> writer;
private long count;
@Autowired
public void setDatasetOperations(DatasetOperations datasetOperations) {
this.datasetOperations = datasetOperations;
}
@Autowired
public void setDataStoreWriter(DataStoreWriter dataStoreWriter) {
this.writer = dataStoreWriter;
}
public static void main(String[] args) {
SpringApplication.run(ParquetReaderApplication.class, args);
}
@Override
public void run(String... strings) {
String fileDir = System.getProperty("user.home");
System.out.println("Processing " + fileDir + " ...");
File f = new File(fileDir);
try {
processFile(f);
} catch (IOException e) {
throw new StoreException("Error writing FileInfo", e);
} finally {
close();
}
countFileInfoEntries();
System.out.println("Done!");
}
private void processFile(File file) throws IOException {
if (file.isDirectory()) {
for (File f : file.listFiles()) {
processFile(f);
}
} else {
if (++count % 10000 == 0) {
System.out.println("Writing " + count + " ...");
}
FileInfo fileInfo = new FileInfo(file.getName(), file.getParent(), (int)file.length(), file.lastModified());
writer.write(fileInfo);
}
}
预期结果-
hdfs dfs -ls /tmp/*
Found 2 items
drwxr-xr-x - spring supergroup 0 2014-06-09 17:09 /user/spring/fileinfo/.metadata
-rw-r--r-- 3 spring supergroup 13824695 2014-06-09 17:10 /user/spring/fileinfo/6876f250-010a-404a-b8c8-0ce1ee759206.avro
答案 0 :(得分:1)
默认情况下,namespace
没有DatasetRepositoryFactory
可用,因此请设置namespace
公共无效setNamespace(java.lang.String命名空间) doc
要使用的命名空间。默认为无名称空间(用于Kite SDK API的“默认”)
@Bean
public DatasetRepositoryFactory datasetRepositoryFactory() {
DatasetRepositoryFactory datasetRepositoryFactory = new
DatasetRepositoryFactory();
datasetRepositoryFactory.setConf(hadoopConfiguration);
datasetRepositoryFactory.setBasePath("/tmp");
datasetRepositoryFactory.setNamespace("default");
return datasetRepositoryFactory;