# Schema to model pipelines in the Bloomsbury XML CMS.
#
# A pipeline consists of at least one step, to be executed
# by the processor sequentially, in document order.
#
# In the case of a transform step, the output of one step
# is passed to the following step.
#
# Note that attribute failOnError is NOT specified for the 
# transform step, since a failed transformation will result
# in non-well-formed output (if at all) that cannot be 
# passed on to the next step.
#
# The input into a pipeline is expected to be a single
# document. The output of the final (non-validation) step
# is one or more documents.
#
# The output location is implementation-defined.
#
# Author: Andrew Sales <andrew.sales@bloomsbury.com>
# Date: 20190924
# Version: 0.2
# Comments: 
# 20211206 [AS] added xml:base to steps, re CMS-306
# 20210325 [AS] adjusted types of metadata, re JIRA CMS-260
# 20210115 [AS] added XQuery step, re JIRA CMS-192
# 20190924 [AS] added type attribute re JIRA CMS-34


default namespace = "http://cms.bloomsbury.com/pipeline"

start =
  element pipeline {
    attribute id { xsd:ID },
    attribute name { xsd:NCName },
    attribute type { string 'ingest' | string 'export' }?,
    step+
  }

step = transform | validate | xquery

label = attribute label{text}
xml-base = attribute xml:base{xsd:anyURI}

transform = element transform {
      attribute href { xsd:anyURI },
      label?,
      xml-base?
    }

validate = element validate {
      attribute failOnError { xsd:boolean }?,   #whether to abort if validation errors occur
      attribute href { xsd:anyURI },
      label?,
      xml-base?
    }
    
xquery = element xquery {
      attribute href {xsd:anyURI},
      label?,
      xml-base?,
      requires*
    }
    
#specifies which metadata sources are required by containing XQuery    
requires = element requires {
      attribute metadataType { metadata-types }
    }

#types of source metadata
metadata-types = 
    string 'onix' |  #ONIX feeds
    string 'organisation' |  #organisation metadata
    string 'person' |   #person metadata
    string 'publisher' |   #publisher metadata
    string 'series'   #series metadata