使用多个包含/导入验证针对XSD的XML

时间:2013-01-26 12:05:58

标签: java xsd-validation

我需要针对XSD架构验证XML文件。模式针对多个XSD文件进行传播(使用include和import指令)。

通过跟踪我在SO上发现的问题/答案,我已经提出了以下解决方案。

(请注意,以下代码只是一个快速开发的原型,而不是最终的解决方案。)

private static final String PROJECT_ROOT_DIR_PATH = "--- project-root-path ---";
private static final String SCHEMAS_ROOT_PATH = "--- schemas root path ---";

private void validate() throws Exception
{
    DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
    builderFactory.setNamespaceAware(true);

    DocumentBuilder parser = builderFactory.newDocumentBuilder();

    // parse the XML into a document object
    Document document = parser.parse(
        new File(
            PROJECT_ROOT_DIR_PATH +
            "src\\test\\resources\\example.xml"
        )
    );

    SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);

    // associate the schema factory with the resource resolver, which is responsible for resolving the imported XSD's
    factory.setResourceResolver(new ResourceResolver(PROJECT_ROOT_DIR_PATH + SCHEMAS_ROOT_PATH));

    Schema schema = factory.newSchema(
        new File(
            PROJECT_ROOT_DIR_PATH +
            SCHEMAS_ROOT_PATH +
            "--- root-xsd-file-path\\root-schema.xsd ---"
        )
    );

    Validator validator = schema.newValidator();
    validator.validate(new DOMSource(document));
}

ResourceResolver.java:

public class ResourceResolver implements LSResourceResolver 
{   
    @Override
    public LSInput resolveResource(String type, String namespaceURI,
        String publicId, String systemId, String baseURI) 
    {
        if (!"http://www.w3.org/2001/XMLSchema".equals(type))
        {
            throw new IllegalArgumentException(
                "Unexpected resource type [" + type + "]."
            );
        }

        if (systemId == null)
        {
            throw new IllegalArgumentException(
                "Unexpected resource system-id [" + systemId + "]."
            );
        }

        System.out.println("base-uri: " + baseURI);
        System.out.println("system-id: " + systemId);

        URI targetURI = getTargetURI(baseURI, systemId);
        System.out.println("target-uri: " + targetURI);

        System.out.println("---");

        Input input = null;

        try {
            input = new Input(baseURI, publicId, systemId, targetURI.toURL().openStream());
        } 
        catch (Exception ex)
        {
            throw new RuntimeException(
                "Could not open resource stream - " + ex.getMessage()
            );
        }

        return input;
    }

    private static URI getTargetURI(String baseURI, String relativePath)
    {
        URI targetURI = null;

        try {
            targetURI = (new URI(baseURI)).resolve(relativePath);
        }
        catch (URISyntaxException ex)
        {
            throw new RuntimeException(
                "Could not resolve target URI - " + ex.getMessage()
            );
        }

        return targetURI;
    }
}

Input.java:

public class Input implements LSInput 
{
    private BufferedInputStream inputStream;

    private String baseURI;
    private String publicId;
    private String systemId;

    public Input(String baseURI, String publicId, String sysId, InputStream input) 
    {
        this.baseURI = baseURI;
        this.publicId = publicId;
        this.systemId = sysId;
        this.inputStream = new BufferedInputStream(input);
    }

    public String getPublicId() 
    {
        return publicId;
    }

    public void setPublicId(String publicId) 
    {
        this.publicId = publicId;
    }

    public String getBaseURI() 
    {
        return baseURI;
    }

    public InputStream getByteStream() 
    {
        return null;
    }

    public boolean getCertifiedText() 
    {
        return false;
    }

    public Reader getCharacterStream() 
    {
        return null;
    }

    public String getEncoding() 
    {
        return null;
    }

    public String getStringData() 
    {
        synchronized (inputStream) 
        {
            try {
                return IOUtils.toString(inputStream);
            } 
            catch (IOException e) {
                e.printStackTrace();
                System.out.println("Exception " + e);
                return null;
            }
        }
    }

    public void setBaseURI(String baseURI) {
    }

    public void setByteStream(InputStream byteStream) {
    }

    public void setCertifiedText(boolean certifiedText) {
    }

    public void setCharacterStream(Reader characterStream) {
    }

    public void setEncoding(String encoding) {
    }

    public void setStringData(String stringData) {
    }

    public String getSystemId() {
        return systemId;
    }

    public void setSystemId(String systemId) {
        this.systemId = systemId;
    }

    public BufferedInputStream getInputStream() 
    {
        return inputStream;
    }

    public void setInputStream(BufferedInputStream inputStream) 
    {
        this.inputStream = inputStream;
    }
}

通过观察此解决方案生成的日志,它似乎真正按深度优先搜索顺序处理include / import语句。

但是,在某些地方,某些导入语句会被忽略(例如根本没有处理)。

我无法找到处理哪些语句以及哪些语句不处理的模式。例如,在处理文件的以下三行时,只处理第一行和第三行。尽管它们似乎都与我相同。

<import namespace="schemas/src/x20130601" schemaLocation="../../x20130601/Personalnumber.xsd"/>
<import namespace="schemas/src/common/2008/01/03" schemaLocation="../../../contract/x20080103/Contractnumber.xsd"/>
<import namespace="schemas/src/20100504" schemaLocation="../../../system/x20100504/Contractidentification.xsd" />

忽略某些import语句会导致以下类型的异常。

Exception in thread "main" org.xml.sax.SAXParseException; systemId: file:/z:/--- project path ---/schemas//x20130504/Identification.xsd; lineNumber: 18; columnNumber: 61; src-resolve: Cannot resolve the name 'dat20080103:Contractnumber' to a(n) 'element declaration' component.

遗漏文件的声明位于已跳过的XSD文件中。


请告诉我在哪里寻找错误或要包含哪些其他信息。

0 个答案:

没有答案