电子邮件内容分类

时间:2014-09-30 09:25:44

标签: c email-headers

我想将电子邮件的主题内容分成一个文本文件,将其他标题字段分成下一个文本文件,最后将邮件正文分成另一个文本文件。我的代码可以提取具有单行内容的电子邮件字段。但它不会如果字段有多行,则提取。(这是必需的,因为诸如Subject,To ans等字段可能有多行。)plz帮助我... 我的代码如下:

程序名称:f2all.c

# include <stdio.h>
# include <string.h>

int main (int argc, char **argv) {

    if (argc < 5) {
        fprintf (stderr, "Error: insufficient input. Usage: %s input_file output_file\n",
             argv[0]);
        return 1;
    }

    FILE *ifp = fopen(argv[1],"r");
    FILE *ofp1 = fopen(argv[2],"w");/*this points to a file(eg:f.txt),which should contain`contents of subject field only*/
    FILE *ofp2= fopen(argv[3],"w");/*this points to a file(eg:g.txt),which should contain contents of all other other header  field only*/
    FILE *ofp3= fopen(argv[4],"w");/*this points to a file(eg:h.txt),which should contain contents of message body only*/

    char *buf = NULL;  
    char *buf1 = NULL;   /* forces getline to allocate space for buf */
    ssize_t read = 0;
    size_t n = 0;
    char *ptr = NULL;

    if (ifp==NULL)    
    {    
        printf("\nFile cannot be opened\n");
        return 1;
    }
    else
    {
        while ((read = getline (&buf, &n, ifp)) != -1)
        {
            if (((ptr=strstr(buf,"Subject:")) != 0))
            {
                fprintf(ofp1,"%s",(ptr+8));      /* use (ptr + 8) to trim 'Subject:` away */
            }
            if ((ptr=strstr(buf,"subject :")) != 0)
            {
                fprintf(ofp1,"%s",(ptr+9));         
            }

            if (((ptr=strstr(buf,"Date:")) != 0)||((ptr=strstr(buf,"From:")) != 0)||((ptr=strstr(buf,"X-cc:")) != 0))
            {
                fprintf(ofp2,"%s",(ptr+5));         
            }
            if ((ptr=strstr(buf,"X-To:")) != 0)
            {
                fprintf(ofp2,"%s",(ptr+5));                 
            }
            else
            {
                strcpy(buf1,buf);
                fprintf(ofp1,"%s",buf1);  

            }   
        }
    }
    if (buf)        /* free memory allocated by getline for buf */
        free (buf);
    fclose(ofp1);
    fclose(ofp2);
    fclose(ofp3);
    fclose(ifp);

    return 0;
}

我进行了编译,然后按如下方式运行程序:

princy @PRINCY:〜/ minipjt / SUBJECT $ cc f2all.c f2all.c:在函数'main'中: f2all.c:85:9:警告:内置函数'free'的不兼容隐式声明[默认启用] princy @PRINCY:〜/ minipjt / SUBJECT $ ./a.out 8.txt f.txt g.txt h.txt 分段错误(核心转储)

1 个答案:

答案 0 :(得分:0)

您可以在扫描文件时建立上下文,然后根据该上下文打印到输出文件。否则,您的条件仅适用于每个标题条目的第一行。

从这个角度来看,保留一组输出文件指针是有意义的。

您的代码会从标头条目中删除关键字。这意味着在读取输出文件时上下文将丢失:xy@example.com是发件人,收件人还是回复的首选地址?

基于您的实施例。

#define _GNU_SOURCE 1

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

enum {
    SUBJECT,
    HEADER,
    BODY,
    NONE = -1
};

/*
 *      Check whether the line starts with any of the given keywords in
 *      kw. If so, return a pointer to the char after the colon. If not,
 *      return NULL. The array kw must be terminated with NULL.
 */
const char *is_header(const char *line, const char **kw)
{
    while (*kw) {
        int l = strlen(*kw);

        if (strncmp(line, *kw, l) == 0 && line[l] == ':') {
            /* Note: Could use strncasecmp here for case-insenitive matching */
            return line + l + 1;
        }
        kw++;
    }

    return NULL;
}

const char *header_subject[] = { 
    "Subject", NULL
};

const char *header_other[] = { 
    "From", "To", "Date", /* ... */ NULL
};

int main(int argc, char **argv)
{
    if (argc < 5) {
        fprintf(stderr,
            "Error: insufficient input. Usage: %s input_file output_file\n",
            argv[0]);
        return 1;
    }

    FILE *ifp;
    FILE *ofp[3];

    ifp = fopen(argv[1], "r");
    ofp[0] = fopen(argv[2], "w");
    ofp[1] = fopen(argv[3], "w");
    ofp[2] = fopen(argv[4], "w");

    /* Omitted: Error checking for file opening / creatinon */

    char *buf = NULL;
    size_t n;
    int context = NONE;

    while (getline(&buf, &n, ifp) != -1) {
        const char *line = buf;

        if (context != BODY) {
            /* Check for context if we are not already in the body */
            const char *p;

            /* Strip white space from string */
            while (*line == ' ' || *line == '\t') line++;

            if (*line == '\n' || *line == '\r') {                
                context = BODY;     /* An empty line starts the body ... */ 
                continue;           /* ... but we don't print it. */
            }

            p = is_header(buf, header_subject);
            if (p) {
                line = p;
                while (*line == ' ' || *line == '\t') line++;
                context = SUBJECT;
            }

            p = is_header(buf, header_other);
            if (p) {
                line = p;
                while (*line == ' ' || *line == '\t') line++;
                context = HEADER;
            }
        }

        if (context != NONE) fprintf(ofp[context], "%s", line);
    }

    if (buf) free(buf);
    fclose(ofp[0]);
    fclose(ofp[1]);
    fclose(ofp[2]);
    fclose(ifp);

    return 0;
}