我必须读取一个文件,然后将其数据写入结果处理到另一个文件。这个过程花费了太多时间,所以我尝试以并行方式进行读/处理/写入。代码效果很好。但是有一个问题,当我在大约40GB程序停止工作后测试它处理非常大的文件(100GB)时。它不会抛出任何异常(“TimeoutException”除外)。我花了几天时间试图改变很多东西。我知道这不是方法或记忆。但我真的很困惑的原因和方法让它运作良好。
注意:我想在Code Review上发布此内容,但代码审核规则表示不发布问题,因此我将其发布在此处。
我使用代码的方式(它在背景工作者上)
BackgroundWorker worker = (BackgroundWorker)sender;
ReaderWriterMultiThread readerWriterMultiThread = null;
int bufferSize = 2 * 1024 * 1024;
int readerWriterMultiThreadPartsNumber = 10;
int sizeToReadInThisIteration = 0;
int oldprecentage = 0;
long fileDid = 0;
using (FileStream streamReader = new FileStream(fromAddress, FileMode.Open))
using (BinaryReader binaryReader = new BinaryReader(streamReader))
using (FileStream streamWriter = new FileStream(toAddress, FileMode.Open))
using (BinaryWriter binaryWriter = new BinaryWriter(streamWriter))
{
sizeToReadInThisIteration = bufferSize * readerWriterMultiThreadPartsNumber;
streamWriter.Seek(0, SeekOrigin.Begin);
while (streamWriter.Position < length)
{
if (worker.CancellationPending)
{
e.Cancel = true;
return;
}
//change sizeToReadInThisIteration if needs
if (streamWriter.Position + sizeToReadInThisIteration > length)
{ sizeToReadInThisIteration = Convert.ToInt32(length - streamWriter.Position); }
//new it
readerWriterMultiThread = new ReaderWriterMultiThread();
//read/do/write
readerWriterMultiThread.Start(binaryReader, binaryWriter, bufferSize, sizeToReadInThisIteration,
(ref byte[] bytes) => DoNothing(ref bytes));
//report process if needs
fileDid += sizeToReadInThisIteration;
if (((int)(fileDid * 100 / length)) > oldprecentage)
{
oldprecentage = (int)(fileDid * 100 / length);
worker.ReportProgress(oldprecentage);
}
}//while
}//using
DoNothing
方法是:
public void DoNothing(ref byte[] bufferToCode)
{ }
和ReaderWriterMultiThread
类是:(最初是代码使用的线程,但我更改了它以使用任务。)
public class ReaderWriterMultiThread
{
#region variables
//buffer(contain several part)
List<byte[]> buffer = new List<byte[]>();
//lock objects
private object bufferLockForRead = new object();
private object bufferLockForWrite = new object();
//indexes
int readIndex = 0;
int doReadIndex = 0;
int doWriteIndex = 0;
int writeIndex = 0;
//complete vars
int lastIndex = int.MaxValue;
bool readCompleted = false;
//waiting properties
private bool doIsWaiting = false;
private bool writerIsWaiting = false;
//error properties
private bool anyErrorHappend = false;
private string errorsMessage = string.Empty;
//proc delegate
public delegate void DelegateMethod(ref byte[] bytes);
//proc delegate instance
DelegateMethod delegateM;
//
#endregion variables
//==============================
#region methods
//
public void Start(BinaryReader binaryReader, BinaryWriter binaryWriter, int bufferPartsSize, int size, DelegateMethod delegateMethod)
{
//new delegate
delegateM = new DelegateMethod(delegateMethod);
//for wait all
Task[] tasks = new Task[3];
//run
var parentTask = Task.Factory.StartNew(() =>
{
tasks[0] = Task.Factory.StartNew(() =>
{
Writer(binaryWriter);
});
tasks[1] = Task.Factory.StartNew(() =>
{
Do();
});
tasks[2] = Task.Factory.StartNew(() =>
{
Reader(binaryReader, bufferPartsSize, size);
});
});
//wait
parentTask.Wait();
if (!Task.WaitAll(tasks, 10000))
{ throw new TimeoutException(); }
if (anyErrorHappend)
{ throw new Exception(errorsMessage); }
}
private void AddByReader(byte[] newBytes, bool completed)
{
try
{
lock (bufferLockForRead)
{
//add data to buffer
buffer.Add(newBytes);
//updare readIndex
readIndex++;
//if completed show it
if (completed)
{
readCompleted = true;
lastIndex = buffer.Count;//it uses as <lastIndex (so lastIndex = buffer.Count is ok)
}
//manage happend error
if (anyErrorHappend)
{
readCompleted = true;
lastIndex = doReadIndex + 1;
}
//if do is waiting pulse it
if (doIsWaiting)
{ Monitor.Pulse(bufferLockForRead); }
}
}
catch (Exception ex)
{ Debug.Assert(false, ex.ToString()); }
}
private byte[] GetByDo()
{
try
{
lock (bufferLockForRead)
{
//if data did not read already wait
if (doReadIndex == readIndex)
{
doIsWaiting = true;
Monitor.Wait(bufferLockForRead);
}
//do is not waiting now
doIsWaiting = false;
//in case of emergency
if (doReadIndex > readIndex)
{ return new byte[0]; }
//return
return buffer[doReadIndex++];
}
}
catch (Exception ex)
{
Debug.Assert(false, ex.ToString());
return new byte[0];
}
}
private void AddByDo(byte[] newBytes, string errorMessageFromDO)
{
try
{
lock (bufferLockForWrite)
{
//add data
buffer[doWriteIndex] = newBytes;
//update doWriteIndex
doWriteIndex++;
//error happend in Do
if (errorMessageFromDO.Length > 0)
{
anyErrorHappend = true;
errorsMessage += errorMessageFromDO;
lastIndex = -1;
Monitor.Pulse(bufferLockForWrite);
}
//if reader completed and writer is in wait state pulse it
if (readCompleted && writerIsWaiting)
{
Monitor.Pulse(bufferLockForWrite);
}
}
}
catch (Exception ex)
{ Debug.Assert(false, ex.ToString()); }
}
private byte[] GetByWriter()
{
try
{
lock (bufferLockForWrite)
{
//if data did not proccessed wait
if (writeIndex == doWriteIndex)
{
writerIsWaiting = true;
Monitor.Wait(bufferLockForWrite);
}
//writer is not waithing
writerIsWaiting = false;
//return
return buffer[writeIndex++];
}
}
catch (Exception ex)
{
Debug.Assert(false, ex.ToString());
return new byte[0];
}
}
private void Reader(BinaryReader binaryReader, int bufferPartSize, int sizeToRead)
{
try
{
//vars
bool completed = false;
int readedSize = 0;
byte[] readedBytes = new byte[0];
while (readedSize < sizeToRead && !anyErrorHappend)
{
//change bufferPartSize & completed if needs
if (readedSize + bufferPartSize >= sizeToRead)
{
bufferPartSize = sizeToRead - readedSize;
completed = true;
}
try
{
//read
readedBytes = binaryReader.ReadBytes(bufferPartSize);
}
catch (Exception ex)
{
Debug.Assert(false, ex.ToString());
//error happend
anyErrorHappend = true;
errorsMessage += ex.Message;
//for pulse Do() if it is waiting
byte[] amptyBytesArray = new byte[0];
AddByReader(amptyBytesArray, true);//it is better to do it instead change lastIndex here
break;
}
//add to buffer
AddByReader(readedBytes, completed);
//update readedSize
readedSize += bufferPartSize;
}
}
catch (Exception ex)
{ Debug.Assert(false, ex.ToString()); }
}
private void Writer(BinaryWriter binaryWriter)
{
try
{
//vars
byte[] bytesToWrite = new byte[0];//for put getted data in
for (int i = 0; i < lastIndex; i++)
{
//get data from buffer
bytesToWrite = GetByWriter();
try
{
//write
binaryWriter.Write(bytesToWrite);
}
catch (Exception ex)
{
Debug.Assert(false, ex.ToString());
lastIndex = -1;
anyErrorHappend = true;
errorsMessage = ex.Message;
break;
}
}
}
catch (Exception ex)
{ Debug.Assert(false, ex.ToString()); }
}
private void Do()
{
try
{
//vars
byte[] bytes = new byte[0];//for put readed data/result in
for (int i = 0; i < lastIndex; i++)
{
//get data from buffer
bytes = GetByDo();
try
{
//do
delegateM(ref bytes);
}
catch (Exception ex)
{
Debug.Assert(false, ex.ToString());
//add
AddByDo(new byte[0], "error: " + ex.Message);
break;
}
//add data to buffer
AddByDo(bytes, string.Empty);
}
}
catch (Exception ex)
{ Debug.Assert(false, ex.ToString()); }
}
//
#endregion methods
}
答案 0 :(得分:0)
你的代码在这里抛出异常:
if (!Task.WaitAll(tasks, 10000))
{ throw new TimeoutException(); }
这意味着您的一项任务需要10秒以上才能完成。
这可能是因为它需要更多时间。这也可能是因为您的某个任务因等待进入锁定而陷入困境。它也可能因为Monitor.Wait等待锁定而陷入困境。
您可以添加日志记录以查看其挂起的位置。此外,如果您的一个断言语句失败,它将阻止任务及时完成。
作为旁注,磁盘IO非常耗时,并且通常尝试并行化这样的IO操作通常不会有帮助,因为线程最终会相互争吵,争夺磁盘访问时间。磁盘最终需要来回旋转到多个位置以读取和写入数据,最终会产生实际减慢速度的净效应。如果您正在使用RAID,或者您正在从一个磁盘读取并将读取添加到队列中,并且您的写入线程正在从该队列读取并将该数据写入另一个磁盘,那么您可以加快速度。