141 lines
4.0 KiB
Mathematica
141 lines
4.0 KiB
Mathematica
HDISVM01 ;BPFO/JRP - PARSE XML DOCUMENT USING SAX;12/20/2004
|
|
;;1.0;HEALTH DATA & INFORMATICS;;Feb 22, 2005
|
|
;
|
|
SAX(XMLARR,PRSARR) ;Parse XML document using SAX interface
|
|
; Input: XMLARR - Global array containing XML document (closed root)
|
|
; PRSARR - Array to output parsed XML document (closed root)
|
|
;Output: None
|
|
; @PRSARR@("ESUBS",ESN) = Element name
|
|
; @PRSARR@("EINDX",ElementName) = Subscript number (ESN)
|
|
; @PRSARR@("ASUBS",ESN,ASN) = Attribute name
|
|
; @PRSARR@("AINDX",ESN,AttributeName) = Subscript number (ASN)
|
|
; @PRSARR@("DATA",ESN,Repetition,"V") = Value of element
|
|
; @PRSARR@("DATA",ESN,Repetition,"A",ASN) = Value of attribute
|
|
; @PRSARR@("DATA",ESN1,Rep1,ESN2,Rep2,"V") = Value of child
|
|
; element
|
|
; @PRSARR@("DATA",ESN1,Rep1,ESN2,Rep2,"A",ASN) = Value of child
|
|
; attribute
|
|
; Notes : XMLARR must be a global array (i.e. no local arrays)
|
|
; : PRSARR is initialized (i.e KILLed) on input
|
|
Q:$G(XMLARR)=""
|
|
Q:'$D(@XMLARR)
|
|
Q:$G(PRSARR)=""
|
|
N HDICBK,SUBNUM,TAGNAME
|
|
N ESUBS,EINDX,ASUBS,AINDX,DATA
|
|
S ESUBS=$NA(@PRSARR@("ESUBS"))
|
|
S EINDX=$NA(@PRSARR@("EINDX"))
|
|
S ASUBS=$NA(@PRSARR@("ASUBS"))
|
|
S AINDX=$NA(@PRSARR@("AINDX"))
|
|
S DATA=$NA(@PRSARR@("DATA"))
|
|
;Set callbacks
|
|
S HDICBK("STARTDOCUMENT")="STRTDOC^HDISVM01"
|
|
S HDICBK("ENDDOCUMENT")="ENDDOC^HDISVM01"
|
|
S HDICBK("DOCTYPE")="DOCTYPE^HDISVM01"
|
|
S HDICBK("STARTELEMENT")="STRTLMNT^HDISVM01"
|
|
S HDICBK("ENDELEMENT")="ENDLMNT^HDISVM01"
|
|
S HDICBK("CHARACTERS")="CHARS^HDISVM01"
|
|
S HDICBK("PI")="PI^HDISVM01"
|
|
S HDICBK("EXTERNAL")="EXTERN^HDISVM01"
|
|
S HDICBK("NOTATION")="NOTATION^HDISVM01"
|
|
S HDICBK("COMMENT")="COMMENT^HDISVM01"
|
|
S HDICBK("ERROR")="ERROR^HDISVM01"
|
|
;Parse XML document using SAX
|
|
K @PRSARR
|
|
D EN^MXMLPRSE(XMLARR,.HDICBK,"")
|
|
Q
|
|
;
|
|
STRTDOC ;Start document
|
|
Q
|
|
;
|
|
ENDDOC ;End document
|
|
Q
|
|
;
|
|
DOCTYPE(ROOT,PUBID,SYSID) ;DOCTYPE declaration
|
|
Q
|
|
;
|
|
STRTLMNT(NAME,ATTRLIST) ;Start element
|
|
N ESN,REP,ATTR,ASN,TMPREF
|
|
;Determine element subscript number
|
|
S ESN=+$G(@EINDX@(NAME))
|
|
I 'ESN D
|
|
.S ESN=1+$O(@ESUBS@(""),-1)
|
|
.S @ESUBS@(ESN)=NAME
|
|
.S @EINDX@(NAME)=ESN
|
|
;Determine repetition number
|
|
S REP=1+$O(@DATA@(ESN,""),-1)
|
|
;Add element subscript number and repetition number to output array
|
|
S TMPREF=$$OREF^DILF(DATA)
|
|
S TMPREF=TMPREF_ESN_","_REP_","
|
|
S DATA=$$CREF^DILF(TMPREF)
|
|
;Store attributes
|
|
S ATTR=""
|
|
F S ATTR=$O(ATTRLIST(ATTR)) Q:ATTR="" D
|
|
.;Get attribute subscript number
|
|
.S ASN=+$G(@AINDX@(ESN,ATTR))
|
|
.I 'ASN D
|
|
..S ASN=1+$O(@ASUBS@(""),-1)
|
|
..S @ASUBS@(ESN,ASN)=ATTR
|
|
..S @AINDX@(ESN,ATTR)=ASN
|
|
.;Store value
|
|
.S @DATA@("A",ASN)=ATTRLIST(ATTR)
|
|
Q
|
|
;
|
|
ENDLMNT(NAME) ;End element
|
|
N TMPREF,SUBCNT,SUBCHK
|
|
;Remove element subscript number and repition number from output array
|
|
S TMPREF=$$OREF^DILF(DATA)
|
|
S SUBCNT=$L(TMPREF,",")
|
|
S SUBCHK=SUBCNT-3
|
|
I SUBCHK>0 S TMPREF=$P(TMPREF,",",1,SUBCHK)_","
|
|
I SUBCHK<1 S TMPREF=$P(TMPREF,"(",1)_"("
|
|
S DATA=$$CREF^DILF(TMPREF)
|
|
Q
|
|
;
|
|
CHARS(TEXT) ;Non-markup content
|
|
;Store element value
|
|
S @DATA@("V")=TEXT
|
|
Q
|
|
;
|
|
PI(TARGET,TEXT) ;Processing instruction
|
|
Q
|
|
;
|
|
EXTERN(SYSID,PUBID,GLOBAL) ;External entity reference
|
|
Q
|
|
;
|
|
NOTATION(NAME,SYSID,PUBID) ;Notation declaration
|
|
Q
|
|
;
|
|
COMMENT(TEXT) ;Comment
|
|
Q
|
|
;
|
|
ERROR(ERR) ;Error
|
|
Q
|
|
;
|
|
UNESC(TEXT) ;Convert escaped characters
|
|
;Assumes TEXT is not corrupt
|
|
N ESCBEG,ESCEND,ESCTXT,ESCCHAR,OUTPUT
|
|
S TEXT=$G(TEXT)
|
|
I TEXT="" Q TEXT
|
|
I TEXT'["&" Q TEXT
|
|
;Do conversion
|
|
S OUTPUT=""
|
|
F Q:TEXT'["&" D
|
|
.;Find escaped character
|
|
.S ESCBEG=$F(TEXT,"&")
|
|
.S ESCEND=$F(TEXT,";",ESCBEG)
|
|
.S ESCTXT=$E(TEXT,ESCBEG,ESCEND-2)
|
|
.;Convert escaped character
|
|
.S ESCCHAR=""
|
|
.I ESCTXT="amp" S ESCCHAR="&"
|
|
.I ESCTXT="lt" S ESCCHAR="<"
|
|
.I ESCTXT="gt" S ESCCHAR=">"
|
|
.I ESCTXT="apos" S ESCCHAR="'"
|
|
.I ESCTXT="quot" S ESCCHAR=$C(34)
|
|
.;Replace escaped character with actual character
|
|
.S OUTPUT=OUTPUT_$E(TEXT,1,ESCBEG-2)_ESCCHAR
|
|
.;Continue processing rest of string
|
|
.S TEXT=$E(TEXT,ESCEND,$L(TEXT))
|
|
;Add on remainder of text
|
|
S OUTPUT=OUTPUT_TEXT
|
|
Q OUTPUT
|