{- |
This module defines an internal (generic) representation for XML
documents including their DTDs.
History:
The original module was derived by hand from the XML specification,
following the grammar precisely. Then we simplified the types,
removing layers of indirection and redundancy, and generally making
things easier to work with. Then we allowed PEReferences to be
ubiquitous, by removing them from the types and resolving all
PE references at parse-time. Finally, we added a per-document
symbol table for GEReferences, and a whitespace-significance flag
for plaintext.
-}
module Text.XML.HaXml.Types
(
-- * A simple symbol table mapping strings (references) to values.
SymTab
-- ** Symbol table operations
, emptyST
, addST
, lookupST
-- * XML Types
-- ** The top-level document container
, Document(..)
-- ** The main document content
, Element(..)
, ElemTag(..)
, Content(..)
, Attribute
, AttValue(..)
, info
-- ** Administrative parts of the document
, Prolog(..)
, XMLDecl(..)
, Misc(..)
, ProcessingInstruction
, SDDecl
, VersionInfo
, Comment
, PITarget
-- ** The DTD
-- *** content model
, DocTypeDecl(..)
, MarkupDecl(..)
, ExtSubset(..)
, ExtSubsetDecl(..)
, ElementDecl(..)
, ContentSpec(..)
, CP(..)
, Modifier(..)
, Mixed(..)
-- *** attribute model
, AttListDecl(..)
, AttDef(..)
, AttType(..)
, TokenizedType(..)
, EnumeratedType(..)
, NotationType
, Enumeration
, DefaultDecl(..)
, FIXED(..)
-- *** conditional sections
, ConditionalSect(..)
, IncludeSect
, IgnoreSect
, Ignore(..)
, IgnoreSectContents(..)
-- ** References
, Reference(..)
, EntityRef
, CharRef
, PEReference
-- ** Entities
, EntityDecl(..)
, GEDecl(..)
, PEDecl(..)
, EntityDef(..)
, PEDef(..)
, ExternalID(..)
, NDataDecl(..)
, TextDecl(..)
, ExtParsedEnt(..)
, ExtPE(..)
, NotationDecl(..)
, PublicID(..)
, EncodingDecl(..)
, EntityValue(..)
, EV(..)
, PubidLiteral(..)
, SystemLiteral(..)
-- ** Basic value types
, Name
, Names
, NmToken
, NmTokens
, CharData
, CDSect
) where
{- A simple symbol table for storing macros whilst parsing. -}
type SymTab a = [(String,a)]
emptyST :: SymTab a
emptyST = []
addST :: String -> a -> SymTab a -> SymTab a
addST n v = ((n,v):)
lookupST :: String -> SymTab a -> Maybe a
lookupST = lookup
{- XML types start here -}
-- | The symbol table stored in a document holds all its general entity
-- reference definitions.
data Document i = Document Prolog (SymTab EntityDef) (Element i) [Misc]
deriving Eq
data Prolog = Prolog (Maybe XMLDecl) [Misc] (Maybe DocTypeDecl) [Misc]
deriving Eq
data XMLDecl = XMLDecl VersionInfo (Maybe EncodingDecl) (Maybe SDDecl)
deriving Eq
data Misc = Comment Comment
| PI ProcessingInstruction
deriving Eq
type ProcessingInstruction = (PITarget,String)
type SDDecl = Bool
type VersionInfo = String
type Comment = String
type PITarget = String
data DocTypeDecl = DTD Name (Maybe ExternalID) [MarkupDecl] deriving Eq
data MarkupDecl = Element ElementDecl
| AttList AttListDecl
| Entity EntityDecl
| Notation NotationDecl
| MarkupMisc Misc
deriving Eq
data ExtSubset = ExtSubset (Maybe TextDecl) [ExtSubsetDecl] deriving Eq
data ExtSubsetDecl = ExtMarkupDecl MarkupDecl
| ExtConditionalSect ConditionalSect
deriving Eq
data Element i = Elem Name [Attribute] [Content i] deriving Eq
-- intermediate for parsing
data ElemTag = ElemTag Name [Attribute]
type Attribute = (Name, AttValue)
data Content i = CElem (Element i) i
| CString Bool CharData i
-- ^ bool is whether whitespace is significant
| CRef Reference i
| CMisc Misc i
deriving Eq
info :: Content t -> t
info (CElem _ i) = i
info (CString _ _ i) = i
info (CRef _ i) = i
info (CMisc _ i) = i
instance Functor Document where
fmap f (Document p st e ms) = Document p st (fmap f e) ms
instance Functor Element where
fmap f (Elem t as cs) = Elem t as (map (fmap f) cs)
instance Functor Content where
fmap f (CElem e i) = CElem (fmap f e) (f i)
fmap f (CString b s i) = CString b s (f i)
fmap f (CRef r i) = CRef r (f i)
fmap f (CMisc m i) = CMisc m (f i)
data ElementDecl = ElementDecl Name ContentSpec deriving Eq
data ContentSpec = EMPTY
| ANY
| Mixed Mixed
| ContentSpec CP
deriving Eq
-- FIXME: What is TagName here? Seems to be in disagreement with XML spec.
data CP = TagName Name Modifier
| Choice [CP] Modifier
| Seq [CP] Modifier
deriving Eq
data Modifier = None -- ^ Just One
| Query -- ^ Zero Or One
| Star -- ^ Zero Or More
| Plus -- ^ One Or More
deriving Eq
data Mixed = PCDATA
| PCDATAplus [Name]
deriving Eq
data AttListDecl = AttListDecl Name [AttDef] deriving Eq
data AttDef = AttDef Name AttType DefaultDecl deriving Eq
data AttType = StringType
| TokenizedType TokenizedType
| EnumeratedType EnumeratedType
deriving Eq
data TokenizedType = ID
| IDREF
| IDREFS
| ENTITY
| ENTITIES
| NMTOKEN
| NMTOKENS
deriving Eq
data EnumeratedType = NotationType NotationType
| Enumeration Enumeration
deriving Eq
type NotationType = [Name] -- nonempty list
type Enumeration = [NmToken] -- nonempty list
data DefaultDecl = REQUIRED
| IMPLIED
| DefaultTo AttValue (Maybe FIXED)
deriving Eq
data FIXED = FIXED deriving Eq
data ConditionalSect = IncludeSect IncludeSect
| IgnoreSect IgnoreSect
deriving Eq
type IncludeSect = [ExtSubsetDecl]
type IgnoreSect = [IgnoreSectContents]
data Ignore = Ignore deriving Eq
data IgnoreSectContents = IgnoreSectContents Ignore [(IgnoreSectContents,Ignore)] deriving Eq
data Reference = RefEntity EntityRef
| RefChar CharRef
deriving (Eq,Show)
type EntityRef = Name
type CharRef = Int
type PEReference = Name
data EntityDecl = EntityGEDecl GEDecl
| EntityPEDecl PEDecl
deriving Eq
data GEDecl = GEDecl Name EntityDef deriving Eq
data PEDecl = PEDecl Name PEDef deriving Eq
data EntityDef = DefEntityValue EntityValue
| DefExternalID ExternalID (Maybe NDataDecl)
deriving Eq
data PEDef = PEDefEntityValue EntityValue
| PEDefExternalID ExternalID deriving (Eq,Show)
data ExternalID = SYSTEM SystemLiteral
| PUBLIC PubidLiteral SystemLiteral deriving (Eq,Show)
newtype NDataDecl = NDATA Name deriving Eq
data TextDecl = TextDecl (Maybe VersionInfo) EncodingDecl deriving Eq
data ExtParsedEnt i = ExtParsedEnt (Maybe TextDecl) (Content i) deriving Eq
data ExtPE = ExtPE (Maybe TextDecl) [ExtSubsetDecl] deriving Eq
data NotationDecl = NOTATION Name (Either ExternalID PublicID) deriving Eq
newtype PublicID = PUBLICID PubidLiteral deriving Eq
newtype EncodingDecl = EncodingDecl String deriving Eq
type Name = String -- non-empty string
type Names = [Name] -- non-empty list
type NmToken = String -- non-empty string
type NmTokens = [NmToken] -- non-empty list
data AttValue = AttValue [Either String Reference] deriving Eq
instance Show AttValue where
show (AttValue v) = concatMap decode v
where
decode (Left w) = w
decode (Right (RefEntity ent)) = "&"++ent++";"
decode (Right (RefChar cref)) = "&"++show cref++";"
data EntityValue = EntityValue [EV] deriving (Eq,Show)
data EV = EVString String
-- -- | EVPERef PEReference
| EVRef Reference deriving (Eq,Show)
newtype PubidLiteral = PubidLiteral String deriving (Eq,Show)
newtype SystemLiteral = SystemLiteral String deriving (Eq,Show)
type CharData = String
type CDSect = CharData
instance Eq ElemTag where
(ElemTag n _) == (ElemTag m _) = n==m