我有以下文件,我使用迭代器块来解析文件中某些重新发生的节点/部分。我最初使用正则表达式来解析整个文件,但是当节点中不存在某些字段时,它将不匹配。所以我试图使用收益率模式。使用我正在使用的代码检测文件格式如下。我想从文件中获取的只是复制节点作为单独的部分,因此我可以使用键字符串获取其中的字段并存储在对象集合中。我可以开始解析第一个复制发生的位置,但无法在复制节点结束的地方结束它。
文件格式:
X_HEADER
{
DATA_MANAGEMENT_FIELD_2 NA
DATA_MANAGEMENT_FIELD_3 NA
DATA_MANAGEMENT_FIELD_4 NA
SYSTEM_SOFTWARE_VERSION NA
}
Y_HEADER
{
DATA_MANAGEMENT_FIELD_2 NA
DATA_MANAGEMENT_FIELD_3 NA
DATA_MANAGEMENT_FIELD_4 NA
SYSTEM_SOFTWARE_VERSION NA
}
COMPLETION
{
NUMBER 877
VERSION 4
CALIBRATION_VERSION 1
CONFIGURATION_ID 877
}
REPLICATE
{
REPLICATE_ID 1985
ASSAY_NUMBER 656
ASSAY_VERSION 4
ASSAY_STATUS Research
DILUTION_ID 1
}
REPLICATE
{
REPLICATE_ID 1985
ASSAY_NUMBER 656
ASSAY_VERSION 4
ASSAY_STATUS Research
}
代码:
static IEnumerable<IDictionary<string, string>> ReadParts(string path)
{
using (var reader = File.OpenText(path))
{
var current = new Dictionary<string, string>();
string line;
while ((line = reader.ReadLine()) != null)
{
if (string.IsNullOrWhiteSpace(line)) continue;
if (line.StartsWith("REPLICATE"))
{
yield return current;
current = new Dictionary<string, string>();
}
else
{
var parts = line.Split('\t');
}
if (current.Count > 0) yield return current;
}
}
}
public static void parseFile(string fileName)
{
foreach (var part in ReadParts(fileName))
{
//part["fIELD1"] will retireve certain values from the REPLICATE PART HERE
}
}
答案 0 :(得分:3)
嗯,听起来你需要在你得到一个结束括号时“关闭”一个部分,而那时只需要yield return
。例如:
static IEnumerable<IDictionary<string, string>> ReadParts(string path)
{
using (var reader = File.OpenText(path))
{
string currentName = null;
IDictionary<string, string> currentMap = null;
while ((line = reader.ReadLine()) != null)
{
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
if (line == "{")
{
if (currentName == null || currentMap != null)
{
throw new BadDataException("Open brace at wrong place");
}
currentMap = new Dictionary<string, string>();
}
else if (line == "}")
{
if (currentName == null || currentMap == null)
{
throw new BadDataException("Closing brace at wrong place");
}
// Isolate the "REPLICATE-only" requirement to a single
// line - if you ever need other bits, you can change this.
if (currentName == "REPLICATE")
{
yield return currentMap;
}
currentName = null;
currentMap = null;
}
else if (!line.StartsWith("\t"))
{
if (currentName != null || currentMap != null)
{
throw new BadDataException("Section name at wrong place");
}
currentName = line;
}
else
{
if (currentName == null || currentMap == null)
{
throw new BadDataException("Name/value pair at wrong place");
}
var parts = line.Substring(1).Split('\t');
if (parts.Length != 2)
{
throw new BadDataException("Invalid name/value pair");
}
currentMap[parts[0]] = parts[1];
}
}
}
}
说实话,这是一个非常可怕的功能。我怀疑我将它放在它自己的类中(可能是嵌套的)来存储状态,并使每个处理程序成为自己的方法。哎呀,这实际上是一种状态模式可能有意义的情况:)
答案 1 :(得分:2)
private IEnumerable<IDictionary<string, string>> ParseFile(System.IO.TextReader reader)
{
string token = reader.ReadLine();
while (token != null)
{
bool isReplicate = token.StartsWith("REPLICATE");
token = reader.ReadLine(); //consume this token to either skip it or parse it
if (isReplicate)
{
yield return ParseBlock(ref token, reader);
}
}
}
private IDictionary<string, string> ParseBlock(ref string token, System.IO.TextReader reader)
{
if (token != "{")
{
throw new Exception("Missing opening brace.");
}
token = reader.ReadLine();
var result = ParseValues(ref token, reader);
if (token != "}")
{
throw new Exception("Missing closing brace.");
}
token = reader.ReadLine();
return result;
}
private IDictionary<string, string> ParseValues(ref string token, System.IO.TextReader reader)
{
IDictionary<string, string> result = new Dictionary<string, string>();
while (token != "}" and token != null)
{
var args = token.Split('\t');
if (args.Length < 2)
{
throw new Exception();
}
result.Add(args[0], args[1]);
token = reader.ReadLine();
}
return result;
}
答案 2 :(得分:1)
如果在while循环结束后添加yield return current;
,您将获得最终字典。
我认为最好检查'}'作为当前块的结尾,然后将yield return
放在那里。虽然您无法使用正则表达式解析整个文件,但您可以使用正则表达式搜索行内的键值对。以下迭代器代码应该可以工作。它只返回REPLICATE块的dictonaries。
// Check for lines that are a key-value pair, separated by whitespace.
// Note that value is optional
static string partPattern = @"^(?<Key>\w*)(\s+(?<Value>\.*))?$";
static IEnumerable<IDictionary<string, string>> ReadParts(string path)
{
using (var reader = File.OpenText(path))
{
string line;
while ((line = reader.ReadLine()) != null)
{
// Ignore lines that just contain whitespace
if (string.IsNullOrWhiteSpace(line)) continue;
// This is a new replicate block, start a new dictionary
if (line.Trim().CompareTo("REPLICATE") == 0)
{
yield return parseReplicateBlock(reader);
}
}
}
}
private static IDictionary<string, string> parseReplicateBlock(StreamReader reader)
{
// Make sure we have an opening brace
VerifyOpening(reader);
string line;
var currentDictionary = new Dictionary<string, string>();
while ((line = reader.ReadLine()) != null)
{
// Ignore lines that just contain whitespace
if (string.IsNullOrWhiteSpace(line)) continue;
line = line.Trim();
// Since our regex used groupings (?<Key> and ?<Value>),
// we can do a match and check to see if our groupings
// found anything. If they did, extract the key and value.
Match m = Regex.Match(line, partPattern);
if (m.Groups["Key"].Length > 0)
{
currentDictionary.Add(m.Groups["Key"].Value, m.Groups["Value"].Value);
}
else if (line.CompareTo("}") == 0)
{
return currentDictionary;
}
}
// We exited the loop before we found a closing brace, throw an exception
throw new ApplicationException("Missing closing brace");
}
private static void VerifyOpening(StreamReader reader)
{
string line;
while ((line = reader.ReadLine()) != null)
{
// Ignore lines that just contain whitespace
if (string.IsNullOrWhiteSpace(line)) continue;
if (line.Trim().CompareTo("{") == 0)
{
return;
}
else
{
throw new ApplicationException("Missing opening brace");
}
}
throw new ApplicationException("Missing opening brace");
}
更新:我确保正则表达式字符串包含没有值的情况。此外,如果修改了正则表达式字符串,则组索引全部更改为使用组名以避免任何问题。