根据内容检测文件类型

时间:2015-06-17 12:53:02

标签: java

尝试以下方法:

import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.spi.FileTypeDetector;
import org.apache.tika.Tika;
import org.apache.tika.mime.MimeTypes;

/**
 *
 * @author kiriti.k
 */
public class TikaFileTypeDetector {

    private final Tika tika = new Tika();

    public TikaFileTypeDetector() {
        super();
    }

    public String probeContentType(Path path) throws IOException {
        // Try to detect based on the file name only for efficiency
        String fileNameDetect = tika.detect(path.toString());
        if (!fileNameDetect.equals(MimeTypes.OCTET_STREAM)) {
            return fileNameDetect;
        }

        // Then check the file content if necessary
        String fileContentDetect = tika.detect(path.toFile());
        if (!fileContentDetect.equals(MimeTypes.OCTET_STREAM)) {
            return fileContentDetect;
        }

        // Specification says to return null if we could not 
        // conclusively determine the file type
        return null;
    }

    public static void main(String[] args) throws IOException {

        Tika tika = new Tika();

        // expects file path as the program argument
        if (args.length != 1) {
            printUsage();
            return;
        }

        Path path = Paths.get(args[0]);

        TikaFileTypeDetector detector = new TikaFileTypeDetector();
        // Analyse the file - first based on file name for efficiency.
        // If cannot determine based on name and then analyse content
        String contentType = detector.probeContentType(path);

        System.out.println("File is of type - " + contentType);
    }

    public static void printUsage() {
        System.out.print("Usage: java -classpath ... "
                + TikaFileTypeDetector.class.getName()
                + " ");
    }
}

以上程序仅基于文件扩展名进行检查。如何检查内容类型(mime)然后确定类型。我在netbean 8.0.2中使用tika-app-1.8.jar。我错过了什么?

2 个答案:

答案 0 :(得分:3)

代码首先检查文件扩展名,如果找到结果则返回基于该类型的MIME类型。如果您希望它首先检查内容,只需切换两个语句:

public String probeContentType(Path path) throws IOException {

    // Check contents first
    String fileContentDetect = tika.detect(path.toFile());
    if (!fileContentDetect.equals(MimeTypes.OCTET_STREAM)) {
        return fileContentDetect;
    }

    // Try file name only if content search was not successful
    String fileNameDetect = tika.detect(path.toString());
    if (!fileNameDetect.equals(MimeTypes.OCTET_STREAM)) {
        return fileNameDetect;
    }

    // Specification says to return null if we could not 
    // conclusively determine the file type
    return null;
}

请注意,这可能会对性能产生巨大影响。

答案 1 :(得分:0)