给定一个包含<name> brain </name> <job> evil scientist </job>
等文本的文本文件,我需要在html <>
之间提取信息并获取标记。我的方法不断返回数组超出范围的异常,我不确定如何修复方法。
public static String extractTagContent(String html, String tag) {
String list = "";
while (html.contains("</" + tag + ">")) {
list += html.substring(html.indexOf("<" + tag + ">") + tag.length() + 1, html.indexOf("</" + tag + ">")) + "\n";
html = html.substring(html.indexOf("<" + tag + ">") + tag.length() + 3);
}
return list;
}
答案 0 :(得分:0)
在循环中修复了这两行
list += html.substring(html.indexOf("<" + tag + ">") + tag.length() + 2, html.indexOf("</" + tag + ">")) + "\n";
html = html.substring(0, html.indexOf("<" + tag + ">")) + html.substring(html.indexOf("</" + tag + ">") + tag.length() + 3, html.length());
整个代码
public static String extractTagContent(String html, String tag) {
String list = "";
while (html.contains("</" + tag + ">")) {
list += html.substring(html.indexOf("<" + tag + ">") + tag.length() + 2, html.indexOf("</" + tag + ">")) + "\n";
html = html.substring(0, html.indexOf("<" + tag + ">")) + html.substring(html.indexOf("</" + tag + ">") + tag.length() + 3, html.length());
}
return list;
}