在查找重复任务中删除逗号

时间:2019-01-20 09:49:28

标签: replace duplicates

我一直在使用下面的脚本,该脚本是从这里的某处获取的,它查看单元格中的单词,如果有重复,则将它们放在下一列中,效果很好。

我的问题是,如果有逗号,例如看不到

大红色大,绿色大应该在下一列中显示大大,但是由于逗号而只显示其中的2个,我不想删除逗号,只需在脚本运行时将其删除

import java.io.FileOutputStream;

import org.apache.poi.xwpf.usermodel.*;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.jsoup.select.NodeVisitor;
import org.jsoup.select.NodeTraversor;

public class HTMLtoDOCX {

 private XWPFDocument document;

 public HTMLtoDOCX(String html, String docxPath) throws Exception {

  this.document = new XWPFDocument();

  XWPFParagraph paragraph = null;
  Document htmlDocument = Jsoup.parse(html);
  Elements htmlParagraphs = htmlDocument.select("p");
  for(Element htmlParagraph : htmlParagraphs) {

System.out.println(htmlParagraph);

   paragraph = document.createParagraph();
   createParagraphFromHTML(paragraph, htmlParagraph);
  }

  FileOutputStream out = new FileOutputStream(docxPath);
  document.write(out);
  out.close();
  document.close();

 }

 void createParagraphFromHTML(XWPFParagraph paragraph, Element htmlParagraph) {

  ParagraphNodeVisitor nodeVisitor = new ParagraphNodeVisitor(paragraph);
  NodeTraversor.traverse(nodeVisitor, htmlParagraph);

 }

 private class ParagraphNodeVisitor implements NodeVisitor {

  String nodeName;
  boolean needNewRun;
  boolean isItalic;
  boolean isBold;
  boolean isUnderlined;
  int fontSize;
  String fontColor;
  XWPFParagraph paragraph;
  XWPFRun run;

  ParagraphNodeVisitor(XWPFParagraph paragraph) {
   this.paragraph = paragraph;
   this.run = paragraph.createRun();
   this.nodeName = "";
   this.needNewRun = false;
   this.isItalic = false;
   this.isBold = false;
   this.isUnderlined = false;
   this.fontSize = 11;
   this.fontColor = "000000";

  }

  @Override
  public void head(Node node, int depth) {
   nodeName = node.nodeName();

System.out.println("Start "+nodeName+": " + node);

   needNewRun = false;
   if ("#text".equals(nodeName)) {
    run.setText(((TextNode)node).text​());
    needNewRun = true; //after setting the text in the run a new run is needed
   } else if ("i".equals(nodeName)) {
    isItalic = true;
   } else if ("b".equals(nodeName)) {
    isBold = true;
   } else if ("u".equals(nodeName)) {
    isUnderlined = true;
   } else if ("br".equals(nodeName)) {
    run.addBreak();
   } else if ("font".equals(nodeName)) {
    fontColor = (!"".equals(node.attr("color")))?node.attr("color").substring(1):"000000";
    fontSize = (!"".equals(node.attr("size")))?Integer.parseInt(node.attr("size")):11;
   } 
   if (needNewRun) run = paragraph.createRun();
   needNewRun = false;
   run.setItalic(isItalic);
   run.setBold(isBold);
   if (isUnderlined) run.setUnderline(UnderlinePatterns.SINGLE); else run.setUnderline(UnderlinePatterns.NONE);
   run.setColor(fontColor); run.setFontSize(fontSize);
  }

  @Override
  public void tail(Node node, int depth) {
   nodeName = node.nodeName();

System.out.println("End "+nodeName);

   if ("i".equals(nodeName)) {
    isItalic = false;
   } else if ("b".equals(nodeName)) {
    isBold = false;
   } else if ("u".equals(nodeName)) {
    isUnderlined = false;
   } else if ("font".equals(nodeName)) {
    fontColor = "000000";
    fontSize = 11;
   }
   if (needNewRun) run = paragraph.createRun();
   needNewRun = false;
   run.setItalic(isItalic);
   run.setBold(isBold);
   if (isUnderlined) run.setUnderline(UnderlinePatterns.SINGLE); else run.setUnderline(UnderlinePatterns.NONE);
   run.setColor(fontColor); run.setFontSize(fontSize);
  }
 }

 public static void main(String[] args) throws Exception {

  String html = 
   "<p><font size='32' color='#0000FF'><b>First paragraph.</font></b><br/>Just like a heading</p>"
  +"<p>This is my text <i>which now is in italic <b>but also in bold</b> depending on its <u>importance</u></i>.<br/>Now a <b><i><u>new</u></i></b> line starts <i>within <b>the same</b> paragraph</i>.</p>"
  +"<p><b>Last <u>paragraph <i>comes</u> here</b> finally</i>.</p>"
  +"<p>But yet <u><i><b>another</i></u></b> paragraph having <i><font size='22' color='#FF0000'>special <u>font</u> settings</font></i>. Now default font again.</p>";

  HTMLtoDOCX htmlToDOCX = new HTMLtoDOCX(html, "./CreateWordParagraphFromHTML.docx");

 }
}

我不了解VBA,我尝试过类似   WordArr = Split(WS.Cells(i,1).Value,“”)   WordArr = Replace(WordArr,“,”,“”)

但这会引发错误13

谁能指出我正确的方向

谢谢

0 个答案:

没有答案