Spring Batch-自定义作业-动态传递分区FileName

时间:2018-07-03 17:31:26

标签: java spring spring-batch

我正在尝试构建一个Spring批处理应用程序,该批处理作业是动态生成的(不是Spring托管的bean)并使用JobLauncher启动。作业是基于源文件和其他一些信息(例如目标存储等)构建的。基于这些详细信息,我必须使用相应的读取器/写入器来构建作业。

我能够成功构建和启动同步以及多线程作业。我正在尝试扩大应用程序以使用Partition SPI处理大型文件。但是我找不到将正确的分区传递给步骤的方法。

由于在常规应用程序中使用了StepScope注释,因此spring为每个Step创建了一个单独的读取器。后期绑定(@Value)有助于将StepExecution(filePath)信息传递给读取器。

不使用Step作用域就可以实现用例吗?

​class CustomJobBuilder {
    ​//JobInfo contains table name, source file etc...

    ​Job build(JobInfo jobInfo) throws Exception {
      return jobBuilderFactory
          .get(jobInfo.getName())
          .start(masterStep())
          .build();
    }


  private Step masterStep() throws Exception {
    Step importFileStep = importFileStep();
    return stepBuilderFactory
        .get("masterStep")
        .partitioner(importFileStep.getName(), partitioner())
        .step(importFileStep)
        .gridSize(6)
        .taskExecutor(new SimpleAsyncTaskExecutor())
        .build();
  }

  private MultiResourcePartitioner partitioner() throws IOException {
    MultiResourcePartitioner partitioner = new MultiResourcePartitioner();
    partitioner.setKeyName(PARTITION_KEY_NAME);
    ResourcePatternResolver patternResolver = new PathMatchingResourcePatternResolver();
    partitioner.setResources(patternResolver.getResources(jobInfo.getFilePath())); //*.csv
    return partitioner;
  }

  private Step importFileStep() throws Exception {
    JdbcBatchItemWriter<Row> successRecordsWriter = dbWriter();
    FlatFileItemWriter<Row> failedRecordsWriter = errorWriter();
    return stepBuilderFactory
        .get("importFile")
        .<Row, Row>chunk(CHUNK_SIZE)
        .reader(csvReader(null))
        .processor(processor())
        .writer(writer(successRecordsWriter, failedRecordsWriter))
        .stream(failedRecordsWriter)
        .build();
  }

  //Problem here. Passing filePath to CSV Reader dynamically
  private ItemReader<Row> csvReader(@Value("#{stepExecutionContext['" + PARTITION_KEY_NAME + "']}") String filePath) {
    DefaultLineMapper<Row> lineMapper = new DefaultLineMapper<>();
    DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer();
    tokenizer.setNames(jobInfo.getColumns());
    lineMapper.setLineTokenizer(tokenizer);
    lineMapper.setFieldSetMapper(new CustomFieldSetMapper(jobInfo.getColumns()));
    lineMapper.afterPropertiesSet();

    FlatFileItemReader<Row> reader = new FlatFileItemReader<>();
    reader.setLinesToSkip(1);
    reader.setResource(new FileSystemResource(filePath));
    reader.setLineMapper(lineMapper);
    return reader;
  }
​}

​class CustomJobLauncher {

    JobParameters jobParameters = new JobParametersBuilder()
        .addString("id", UUID.randomUUID().toString())
        .toJobParameters();
    JobExecution jobExecution;
    try {
      CustomJobBuilder jobBuilder = new CustomJobBuilder();
      jobBuilder.setJobBuilderFactory(jobBuilderFactory);
      jobBuilder.setDataSource(getDataSource(objectDto.getDataStore()));
      jobBuilder.setStepBuilderFactory(stepBuilderFactory);

      jobExecution = jobLauncher.run(jobBuilder.build(jobInfo), jobParameters);
      jobExecution.getAllFailureExceptions().forEach(Throwable::printStackTrace);
    } catch (Exception e) {
      LOGGER.error("Failed", e);
    }
}

1 个答案:

答案 0 :(得分:0)

  

我已经通过模仿解决了这个问题   MessageChannelRemotePartitionHandler和StepExecutionRequestHandler。

     

与其依赖BeanFactoryStepLocator来获取步骤,   beanFactory,我在从属服务器上重新构建了步骤并执行了   它。

     

您必须谨慎构造新的步骤,因为它必须在所有从站上完全相同,否则会导致处理/写入不一致。

// PartitionHandler - partition method
public Collection<StepExecution> handle(StepExecutionSplitter stepExecutionSplitter,
                                          final StepExecution masterStepExecution) throws Exception {

    final Set<StepExecution> split = stepExecutionSplitter.split(masterStepExecution, gridSize);

    if(CollectionUtils.isEmpty(split)) {
      return null;
    }

    int count = 0;

    for (StepExecution stepExecution : split) {
      Message<PartitionExecutionRequest> request = createMessage(count++, split.size(),
          new PartitionExecutionRequest(stepExecution.getJobExecutionId(), stepExecution.getId(), RequestContextProvider.getRequestInfo(), jobInfo, object),
          replyChannel);
      if (logger.isDebugEnabled()) {
        logger.debug("Sending request: " + request);
      }
      messagingGateway.send(request);
    }

    if(!pollRepositoryForResults) {
      return receiveReplies(replyChannel);
    }
    else {
      return pollReplies(masterStepExecution, split);
    }
  }

//On the slave
@MessageEndpoint
public class PartitionExecutionRequestHandler {

  private static final Logger LOGGER = LoggerFactory.getLogger(PartitionExecutionRequestHandler.class);
  private BatchBeanProvider batchBeanProvider;

  public void setBatchBeanProvider(BatchBeanProvider batchBeanProvider) {
    this.batchBeanProvider = batchBeanProvider;
  }


  @ServiceActivator
  public StepExecution handle(PartitionExecutionRequest request) {
    StepExecution stepExecution = null;
    try {
      before(request);
      Long jobExecutionId = request.getJobExecutionId();
      Long stepExecutionId = request.getStepExecutionId();
      stepExecution = batchBeanProvider.getJobExplorer().getStepExecution(jobExecutionId, stepExecutionId);
      if (stepExecution == null) {
        throw new NoSuchStepException("No StepExecution could be located for this request: " + request);
      }
      try {
        CustomJobCreator jobCreator = new CustomJobCreator(batchBeanProvider, request.getJobInfo(), request.getObject());
        jobCreator.afterPropertiesSet();
        ResourcePatternResolver patternResolver = new PathMatchingResourcePatternResolver();
        Resource resource = patternResolver.getResource(stepExecution.getExecutionContext().getString(CustomJobCreator.PARTITION_KEY_NAME));
        Step step = jobCreator.partitionStep(resource.getFile().getAbsolutePath());
        step.execute(stepExecution);
      } catch (JobInterruptedException e) {
        stepExecution.setStatus(BatchStatus.STOPPED);
        // The receiver should update the stepExecution in repository
      } catch (Throwable e) {
        stepExecution.addFailureException(e);
        stepExecution.setStatus(BatchStatus.FAILED);
        // The receiver should update the stepExecution in repository
      }
    }
    return stepExecution;
  }
}