无法从文件中读取。似乎过早地达到了EOF

时间:2010-11-20 17:30:32

标签: c++ fstream eof

我确信这个问题可以相对轻松地解决,但我很难找到问题。 我的代码只是读取文件中的所有单词,然后将每个单词,单词位置,句子的开头和结尾存储在一个数组中。该数组输出到另一个文本文件。

我可以读到最后一句话的所有信息然后我有一个错误。有什么想法吗?

/**
 *  Programmer: fryeguy
 *  Course: 
 *  Program: TxtCrawl for MicroSearch
 *
 *  Algorithm:
 *  TxtCrawl is the component of MicroSearch that reads text
 *  documents for search terms and stores them for
 *  indexing
 *
 *  1. Count words in doc, then initialize
 *     wordsFromDoc array to wordCount
 *  2. Initiate output file for writing.
 *  3. Open input file for reading words.
 *  4. Until reaching EOF:
 *     4.a. Set value for start "get pointer" in startSentence (.tellg()).
 *     4.b. Store value for end "get pointer" in endSentence (.tellg()).
 *     4.c. Reset "get pointer" to startSentence location.
 *     4.d. Until reaching endSentence, Read into the
 *          array theWord, wordPos, startSent, and endSent
 *  5. Write wordsFromDoc array to file
 *  6. When EOF is reached close the files.
 */

#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>   

using namespace std;

struct wordProps        // stores word info to be placed in array
{
    string  theWord;    // stores the word
    int     wordPos;    // stores the position of word
    int     startSent;  // stores the start point of the sentence
    int     endSent;    // stores the end point of the sentence
};

void countWords(string, int&, int&);

int main()
{

    ifstream iFile; // file stream for reading in data
    ofstream oFile; // file stream for writing data

    string  iFileName = "TextFile2.txt";    // name of test file to read from
    string  oFileName = "OutputFile.txt";   // name of test file to write to
    string  aLine = "";                     // stores a line preceeding a newline character (\n)
    string  aWord = "";                     // stores words from doc for indexing
    int     charCount = 0;                  // count of characters in doc
    int     wordCount = 0;                  // count of words in doc
    int     aLineWordCount = 0;             // count of words in a single line being processed
    int     wordBegin = 0;                  // stores location of word in doc
    int     startSentence = 0;              // stores pointer value for start of sentence
    int     endSentence = 0;                // stores pointer value for end of sentence

    /**
     * 1. Count words in doc, then initialize
     *    wordsFromDoc array to wordCount
     */
    countWords(iFileName, charCount, wordCount);
    cout << "charCount: " << charCount << endl; // DEBUG CODE
    cout << "wordCount: " << wordCount << endl; // DEBUG CODE
    wordProps wordsFromDoc[wordCount];
    cout<< "length of array: " << (sizeof(wordsFromDoc) / sizeof(*wordsFromDoc)) << endl;  // DEBUG CODE

    /**
     * 2. Initiate output file for writing
     */
    oFile.open (oFileName.c_str()); // setup output file and write header
    oFile << setw(20) << left << "File Name: " << iFileName << endl;
    oFile << setw(20) << "---------------------------------------" << endl << endl;

    /**
     * 3. Open input file for reading words
     */
    iFile.open (iFileName.c_str());
    if (!iFile.is_open())
        cout << "No such file exists!" << endl;
    else
    {
        /**
         * 4. Until reaching EOF:
         */
        // I have been attempting different counting methods assuming the eof was being reached prematurely
        // The results really have not varied with this code
        // while (iFile.tellg() != charCount) 
        while (!iFile.eof())
        {
            //cout << "count: " << count << endl;
            /**
             * 4.a. Set value for start "get pointer" in startSentence (.tellg()).
             */
            startSentence = iFile.tellg();
            cout << "startSentence: " << startSentence << endl; // DEBUG CODE

            /**
             * 4.b. Store value for end "get pointer" in endSentence (.tellg()).
             */
            getline(iFile, aLine, '.');
            cout << aLine << endl; // DEBUG CODE
            endSentence = iFile.tellg();
            aLine.clear();
            cout << "endSentence: " << endSentence << endl; // DEBUG CODE

            if (!iFile.is_open())
            {
                cout << "The if, iFile.tellg(): " << iFile.tellg() << endl; // DEBUG CODE
                iFile.close();
                iFile.open (iFileName.c_str());
            }

            /**
             * 4.c. Reset "get pointer" to startSentence location.
             */
            iFile.seekg(startSentence);
            cout << "iFile.tellg(): " << iFile.tellg() << endl; // DEBUG CODE

            /**
             * 4.d. Until reaching endSentence, Read into the
             *      array theWord, wordPos, startSent, and endSent
             */

             // As the last line is about to be read there is an error of some sort.
             // My guess is that somehow I exceed the end of the file but my startSentence
             // and endSentence variables are pointing where I think they should.

            for ( ; iFile.tellg() < endSentence; aLineWordCount++)
            {
                wordsFromDoc[aLineWordCount].wordPos = iFile.tellg();
                cout << "wordPos: " << wordsFromDoc[aLineWordCount].wordPos << endl; // DEBUG CODE
                iFile >> wordsFromDoc[aLineWordCount].theWord;
                cout << "theWord: " << wordsFromDoc[aLineWordCount].theWord << endl; // DEBUG CODE
                wordsFromDoc[aLineWordCount].startSent = startSentence;
                cout << "startSent: " << wordsFromDoc[aLineWordCount].startSent << endl; // DEBUG CODE
                wordsFromDoc[aLineWordCount].endSent = endSentence;
                cout << "endSent: " << wordsFromDoc[aLineWordCount].endSent << endl << endl; // DEBUG CODE
                cout << "aLineWordCount: " << aLineWordCount << endl;
            } // end for

        } // end while !=iFile.eof

            // THIS section of code is never reached because of the hang up above.
            /**
             * 5. Write wordsFromDoc array to file
             */
            for (int count = 0; count < aLineWordCount; count++)
            {
                oFile << setw(20) << left
                << wordsFromDoc[count].theWord << " "
                << wordsFromDoc[count].wordPos << " "
                << wordsFromDoc[count].startSent << " "
                << wordsFromDoc[count].endSent << endl;
            }

    } // end else

    /**
     * 6. When EOF is reached close the files.
     */
    iFile.close();
    oFile.close();

// DEBUG CDODE for verifying results
//  for (int count = 0; count < wordCount; count++) {
//      cout << "theWord: " << wordsFromDoc[count].theWord << endl;
//      cout << "wordPos: " << wordsFromDoc[count].wordPos << endl;
//      cout << "startSent: " << wordsFromDoc[count].startSent << endl;
//      cout << "endSent: " << wordsFromDoc[count].endSent << endl << endl;
//  }

}

/**
 * Implement countWords function
 */
void countWords(string theFileName, int &charCount, int &wordCount)
{
    string  theWord = "";
    char    theChar = ' ';
    fstream inFile;

    //count the chars
    inFile.open (theFileName.c_str());
    if (!inFile.is_open())
        cout << "No such file exists!" << endl;
    else
    {
        inFile.get(theChar);
        while (!inFile.eof())
        {
            charCount++;
            inFile.get(theChar);
        }
    }
    inFile.close();

    // count the words
    inFile.open (theFileName.c_str());
    if (!inFile.is_open())
        cout << "No such file exists!" << endl;
    else
    {
        while (!inFile.eof())
        {
            inFile >> theWord;
            wordCount++;
        }
    }
    inFile.close();
}

1 个答案:

答案 0 :(得分:1)

Istream

我检查了一下。 Istream没有 get getline 的化身  一次处理多个分隔符1

其他人也有同样的问题2。 Char-by-char IO是最多的 实际解决方案其他解决方案涉及编码增强版本  目前的Istream方法。

想法

  1. 立即将完整文件读入内存。
  2. 删除换行符(任何CR或LF)。
  3. 将文档拆分为以每个特殊字符结尾的行 完全停止分隔符,方法是在将文档写回磁盘时,在每个分隔符后放置一致标记(LF或ETX'\ 003')。
  4. 现在可以像往常一样处理文件;但使用已知的标记代替 作为分隔符的期间。
  5. 删除包含重新分隔文档的临时文件。
  6. 立即阅读整个文档并不是问题,因为它全部都在  记忆最终无论如何;将所有单词组合在一起的字符串 等于整个文件。将重新分隔的文档写入磁盘后,可以释放内存。

    备注

    1 Istream ::得到 2使用getline的多个分隔符(在Code Guru讨论)