start = element pdf2xml { Firstpage? | (Firstpage, Page+) } Firstpage = element page { PageAttributes, Fontspec+, Text* } Page = element page { PageAttributes, Text* } Fontspec = element fontspec { attribute color { text }, attribute family { xsd:NCName }, attribute id { xsd:integer }, attribute size { xsd:integer } } Text = element text { attribute font { xsd:integer }, BoundingBox, FreeText+ } FreeText = mixed { element i { FreeText } | element a { FreeText, attribute href { xsd:anyURI } } | element b { FreeText } } # attributes PageAttributes = BoundingBox, Number, Position BoundingBox = Height, Left, Top, Width Number = attribute number { xsd:integer } Position = attribute position { xsd:NCName } Height = attribute height { xsd:integer } Left = attribute left { xsd:integer } Top = attribute top { xsd:integer } Width = attribute width { xsd:integer }