<?xml version="1.0"?>
<xs:schema 
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:olac="http://www.language-archives.org/OLAC/1.0/">
<xs:import namespace="http://www.language-archives.org/OLAC/1.0/" schemaLocation="http://www.language-archives.org/OLAC/1.0/olac.xsd"/>

<xs:annotation>
  <xs:documentation>
    schema for IGT-XML
    Alexis Palmer, 3-11-2008
  </xs:documentation>
</xs:annotation>

<xs:element name="text" type="IGText"/>
<!-- "text" is currently the only valid root element -->

<xs:attribute name="tx_ref" type="xs:IDREF" use="required"/>
<xs:attribute name="ph_ref" type="xs:IDREF" use="required"/>
<xs:attribute name="wd_ref" type="xs:IDREF" use="required"/>
<xs:attribute name="mr_ref" type="xs:IDREF" use="required"/>
<!-- rewrite as keyrefs, w/ corresponding IDs as keys? -->
<!-- throughout (often, but not always) 'tx' for text, 'ph' for phrase, 
     'wd' for word, 'mr' for morpheme, 'gl' for gloss,
     'tr' for translation -->

<xs:complexType name="IGtext">
  <xs:sequence>
    <xs:element name="metadata" type="IGTmetadata" use="required"/>
    <xs:element name="body" type="textBody" use="required"/>
  </xs:sequence>
  <xs:attribute name="tx_id" type="xs:ID" use="required"/>
  <xs:attribute name="lg" type="olac:language" use="required"/>
  <xs:attribute name="src_id" type="xs:string" use="required"/>
    <!-- "src_id" contains ID assigned to  
	 the text in the original data; thus should be 
	 of a derived, customized type -->
  <xs:attribute name="title" type="xs:string"/>
</xs:complexType>

<xs:complexType name="IGTmetadata">
  <xs:sequence>
    <xs:element type="olac:olac"/>
  </xs:sequence>
<xs:attribute name="text_id" type="tx_ref"/>
</xs:complexType>

<xs:complexType name="textBody">
  <xs:sequence>
    <xs:element name="phrases" type="IGTphrases"/>
    <xs:element name="morphemes" type="IGTmorphemes"/>
    <xs:element name="gloss" type="IGTglossline"/>
    <xs:element name="translations" type="IGTtranslation"/>
    <!-- additional annotation layers belong here -->
  </xs:sequence>
</xs:complexType>

<xs:complexType name="IGTphrases">
  <xs:sequence>
    <xs:element name="phrase" type="ph_phrase" maxOccurs="unbounded"/>
  </xs:sequence>
  <xs:attribute name="src_layer" type="xs:string"/>
  <!-- 'src_layer' contains identifier for annotation
       layer in original text (e.g. '\tx' in our 
       Q'anjob'al Shoebox data) -->
</xs:complexType>

<xs:complexType name="ph_phrase">
  <xs:sequence>
    <xs:element name="plaintext" type="xs:string"/>
    <xs:element name="word" type="IGTword" maxOccurs="unbounded"/>
  </xs:sequence>
  <xs:attribute name="ph_id" type="xs:ID" use="required"/>
  <xs:attribute name="src_ph_id" type="xs:string"/>
    <!-- 'src_ph_id' contains ID assigned to individual 
	 phrase in original data; likely should be 
	 a customized type -->
</xs:complexType>

<xs:complexType name="IGTword">
  <xs:attribute name="wd_id" type="xs:ID" use="required"/>
  <xs:attribute name="text" type="xs:token"/>
</xs:complexType>

<xs:complexType name="IGTmorphemes">
  <xs:sequence>
    <xs:element name="phrase" type="mr_phrase" maxOccurs="unbounded"/>
    <!-- additional morphological analyses belong here,
	 if e.g. we have multiple automatic analyzers 
	 each providing their proposed analyses -->
  </xs:sequence>
  <xs:attribute name="src_layer" type="xs:string"/>
</xs:complexType>

<xs:complexType name="mr_phrase">
  <xs:sequence>
    <xs:element name="morph" type="IGTmorph" maxOccurs="unbounded"/>
  </xs:sequence>
  <xs:attribute name="phrase_ref" type="ph_ref"/>
</xs:complexType>

<xs:complexType name="IGTmorph">
  <xs:sequence>
    <xs:element name="type" type="xs:token" minOccurs="0" maxOccurs="unbounded"/>
      <!-- morphs may optionally be labelled with additional
	   information (e.g. affix type, derivational/inflectional) -->
    </xs:sequence>
  <xs:attribute name="morph_id" type="xs:ID" use="required"/>
  <xs:attribute name="word_ref" type="wd_ref" use="required"/>
  <xs:attribute name="text" type="xs:token"/>
</xs:complexType>

<xs:complexType name="IGTglossline">
 <xs:sequence>
    <xs:element name="phrase" type="gl_phrase" maxOccurs="unbounded"/>
    <!-- additional analyses belong here,
	 if e.g. we have multiple automatic analyzers 
	 each providing their proposed analyses -->
  </xs:sequence>
  <xs:attribute name="src_layer" type="xs:string"/>
</xs:complexType>

<xs:complexType name="gl_phrase">
  <xs:sequence>
    <xs:element name="gls" type="IGTgloss" maxOccurs="unbounded"/>
    <!-- this is one of the places I imagine GOLD could usefully
	 be 'injected', by typing the glosses using the GOLD namespace -->
  </xs:sequence>
  <xs:attribute name="phrase_ref" type="ph_ref"/>
</xs:complexType>

<xs:complexType name="IGTgloss">
  <xs:attribute name="morph_ref" type="mr_ref"/>
  <xs:attribute name="text" type="xs:token"/>
</xs:complexType>

<xs:complexType name="IGTtranslation">
  <xs:sequence>
    <xs:element name="phrase" type="tr_phrase" maxOccurs="unbounded"/>
  </xs:sequence>
</xs:complexType>

<xs:complexType name="tr_phrase">
  <xs:sequence>
    <xs:element name="trans" type="IGTtrans" maxOccurs="unbounded"/>
  </xs:sequence>
  <xs:attribute name="phrase_ref" type="ph_ref"/>
</xs:complexType>

<xs:complexType name="IGTtrans">
  <xs:sequence>
    <xs:element name="trans_text" type="xs:string"/>
  </xs:sequence>
  <xs:attribute name="trans_id" type="xs:ID" use="required"/>
  <xs:attribute name="lg" type="olac:language" use="required"/>
  <xs:attribute name="src_layer" type="xs:string"/>
</xs:complexType>

</xs:schema>


