sax.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647
  1. //[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
  2. //[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
  3. //[5] Name ::= NameStartChar (NameChar)*
  4. var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF
  5. var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]");
  6. var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$');
  7. //var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/
  8. //var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
  9. //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
  10. //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
  11. var S_TAG = 0;//tag name offerring
  12. var S_ATTR = 1;//attr name offerring
  13. var S_ATTR_SPACE=2;//attr name end and space offer
  14. var S_EQ = 3;//=space?
  15. var S_ATTR_NOQUOT_VALUE = 4;//attr value(no quot value only)
  16. var S_ATTR_END = 5;//attr value end and no space(quot end)
  17. var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer)
  18. var S_TAG_CLOSE = 7;//closed el<el />
  19. /**
  20. * Creates an error that will not be caught by XMLReader aka the SAX parser.
  21. *
  22. * @param {string} message
  23. * @param {any?} locator Optional, can provide details about the location in the source
  24. * @constructor
  25. */
  26. function ParseError(message, locator) {
  27. this.message = message
  28. this.locator = locator
  29. if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError);
  30. }
  31. ParseError.prototype = new Error();
  32. ParseError.prototype.name = ParseError.name
  33. function XMLReader(){
  34. }
  35. XMLReader.prototype = {
  36. parse:function(source,defaultNSMap,entityMap){
  37. var domBuilder = this.domBuilder;
  38. domBuilder.startDocument();
  39. _copy(defaultNSMap ,defaultNSMap = {})
  40. parse(source,defaultNSMap,entityMap,
  41. domBuilder,this.errorHandler);
  42. domBuilder.endDocument();
  43. }
  44. }
  45. function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
  46. function fixedFromCharCode(code) {
  47. // String.prototype.fromCharCode does not supports
  48. // > 2 bytes unicode chars directly
  49. if (code > 0xffff) {
  50. code -= 0x10000;
  51. var surrogate1 = 0xd800 + (code >> 10)
  52. , surrogate2 = 0xdc00 + (code & 0x3ff);
  53. return String.fromCharCode(surrogate1, surrogate2);
  54. } else {
  55. return String.fromCharCode(code);
  56. }
  57. }
  58. function entityReplacer(a){
  59. var k = a.slice(1,-1);
  60. if(k in entityMap){
  61. return entityMap[k];
  62. }else if(k.charAt(0) === '#'){
  63. return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x')))
  64. }else{
  65. errorHandler.error('entity not found:'+a);
  66. return a;
  67. }
  68. }
  69. function appendText(end){//has some bugs
  70. if(end>start){
  71. var xt = source.substring(start,end).replace(/&#?\w+;/g,entityReplacer);
  72. locator&&position(start);
  73. domBuilder.characters(xt,0,end-start);
  74. start = end
  75. }
  76. }
  77. function position(p,m){
  78. while(p>=lineEnd && (m = linePattern.exec(source))){
  79. lineStart = m.index;
  80. lineEnd = lineStart + m[0].length;
  81. locator.lineNumber++;
  82. //console.log('line++:',locator,startPos,endPos)
  83. }
  84. locator.columnNumber = p-lineStart+1;
  85. }
  86. var lineStart = 0;
  87. var lineEnd = 0;
  88. var linePattern = /.*(?:\r\n?|\n)|.*$/g
  89. var locator = domBuilder.locator;
  90. var parseStack = [{currentNSMap:defaultNSMapCopy}]
  91. var closeMap = {};
  92. var start = 0;
  93. while(true){
  94. try{
  95. var tagStart = source.indexOf('<',start);
  96. if(tagStart<0){
  97. if(!source.substr(start).match(/^\s*$/)){
  98. var doc = domBuilder.doc;
  99. var text = doc.createTextNode(source.substr(start));
  100. doc.appendChild(text);
  101. domBuilder.currentElement = text;
  102. }
  103. return;
  104. }
  105. if(tagStart>start){
  106. appendText(tagStart);
  107. }
  108. switch(source.charAt(tagStart+1)){
  109. case '/':
  110. var end = source.indexOf('>',tagStart+3);
  111. var tagName = source.substring(tagStart+2,end);
  112. var config = parseStack.pop();
  113. if(end<0){
  114. tagName = source.substring(tagStart+2).replace(/[\s<].*/,'');
  115. //console.error('#@@@@@@'+tagName)
  116. errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName);
  117. end = tagStart+1+tagName.length;
  118. }else if(tagName.match(/\s</)){
  119. tagName = tagName.replace(/[\s<].*/,'');
  120. errorHandler.error("end tag name: "+tagName+' maybe not complete');
  121. end = tagStart+1+tagName.length;
  122. }
  123. //console.error(parseStack.length,parseStack)
  124. //console.error(config);
  125. var localNSMap = config.localNSMap;
  126. var endMatch = config.tagName == tagName;
  127. var endIgnoreCaseMach = endMatch || config.tagName&&config.tagName.toLowerCase() == tagName.toLowerCase()
  128. if(endIgnoreCaseMach){
  129. domBuilder.endElement(config.uri,config.localName,tagName);
  130. if(localNSMap){
  131. for(var prefix in localNSMap){
  132. domBuilder.endPrefixMapping(prefix) ;
  133. }
  134. }
  135. if(!endMatch){
  136. errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName ); // No known test case
  137. }
  138. }else{
  139. parseStack.push(config)
  140. }
  141. end++;
  142. break;
  143. // end elment
  144. case '?':// <?...?>
  145. locator&&position(tagStart);
  146. end = parseInstruction(source,tagStart,domBuilder);
  147. break;
  148. case '!':// <!doctype,<![CDATA,<!--
  149. locator&&position(tagStart);
  150. end = parseDCC(source,tagStart,domBuilder,errorHandler);
  151. break;
  152. default:
  153. locator&&position(tagStart);
  154. var el = new ElementAttributes();
  155. var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
  156. //elStartEnd
  157. var end = parseElementStartPart(source,tagStart,el,currentNSMap,entityReplacer,errorHandler);
  158. var len = el.length;
  159. if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){
  160. el.closed = true;
  161. if(!entityMap.nbsp){
  162. errorHandler.warning('unclosed xml attribute');
  163. }
  164. }
  165. if(locator && len){
  166. var locator2 = copyLocator(locator,{});
  167. //try{//attribute position fixed
  168. for(var i = 0;i<len;i++){
  169. var a = el[i];
  170. position(a.offset);
  171. a.locator = copyLocator(locator,{});
  172. }
  173. //}catch(e){console.error('@@@@@'+e)}
  174. domBuilder.locator = locator2
  175. if(appendElement(el,domBuilder,currentNSMap)){
  176. parseStack.push(el)
  177. }
  178. domBuilder.locator = locator;
  179. }else{
  180. if(appendElement(el,domBuilder,currentNSMap)){
  181. parseStack.push(el)
  182. }
  183. }
  184. if(el.uri === 'http://www.w3.org/1999/xhtml' && !el.closed){
  185. end = parseHtmlSpecialContent(source,end,el.tagName,entityReplacer,domBuilder)
  186. }else{
  187. end++;
  188. }
  189. }
  190. }catch(e){
  191. if (e instanceof ParseError) {
  192. throw e;
  193. }
  194. errorHandler.error('element parse error: '+e)
  195. end = -1;
  196. }
  197. if(end>start){
  198. start = end;
  199. }else{
  200. //TODO: 这里有可能sax回退,有位置错误风险
  201. appendText(Math.max(tagStart,start)+1);
  202. }
  203. }
  204. }
  205. function copyLocator(f,t){
  206. t.lineNumber = f.lineNumber;
  207. t.columnNumber = f.columnNumber;
  208. return t;
  209. }
  210. /**
  211. * @see #appendElement(source,elStartEnd,el,selfClosed,entityReplacer,domBuilder,parseStack);
  212. * @return end of the elementStartPart(end of elementEndPart for selfClosed el)
  213. */
  214. function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,errorHandler){
  215. /**
  216. * @param {string} qname
  217. * @param {string} value
  218. * @param {number} startIndex
  219. */
  220. function addAttribute(qname, value, startIndex) {
  221. if (qname in el.attributeNames) errorHandler.fatalError('Attribute ' + qname + ' redefined')
  222. el.addValue(qname, value, startIndex)
  223. }
  224. var attrName;
  225. var value;
  226. var p = ++start;
  227. var s = S_TAG;//status
  228. while(true){
  229. var c = source.charAt(p);
  230. switch(c){
  231. case '=':
  232. if(s === S_ATTR){//attrName
  233. attrName = source.slice(start,p);
  234. s = S_EQ;
  235. }else if(s === S_ATTR_SPACE){
  236. s = S_EQ;
  237. }else{
  238. //fatalError: equal must after attrName or space after attrName
  239. throw new Error('attribute equal must after attrName'); // No known test case
  240. }
  241. break;
  242. case '\'':
  243. case '"':
  244. if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE
  245. ){//equal
  246. if(s === S_ATTR){
  247. errorHandler.warning('attribute value must after "="')
  248. attrName = source.slice(start,p)
  249. }
  250. start = p+1;
  251. p = source.indexOf(c,start)
  252. if(p>0){
  253. value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
  254. addAttribute(attrName, value, start-1);
  255. s = S_ATTR_END;
  256. }else{
  257. //fatalError: no end quot match
  258. throw new Error('attribute value no end \''+c+'\' match');
  259. }
  260. }else if(s == S_ATTR_NOQUOT_VALUE){
  261. value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
  262. //console.log(attrName,value,start,p)
  263. addAttribute(attrName, value, start);
  264. //console.dir(el)
  265. errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
  266. start = p+1;
  267. s = S_ATTR_END
  268. }else{
  269. //fatalError: no equal before
  270. throw new Error('attribute value must after "="'); // No known test case
  271. }
  272. break;
  273. case '/':
  274. switch(s){
  275. case S_TAG:
  276. el.setTagName(source.slice(start,p));
  277. case S_ATTR_END:
  278. case S_TAG_SPACE:
  279. case S_TAG_CLOSE:
  280. s =S_TAG_CLOSE;
  281. el.closed = true;
  282. case S_ATTR_NOQUOT_VALUE:
  283. case S_ATTR:
  284. case S_ATTR_SPACE:
  285. break;
  286. //case S_EQ:
  287. default:
  288. throw new Error("attribute invalid close char('/')") // No known test case
  289. }
  290. break;
  291. case ''://end document
  292. errorHandler.error('unexpected end of input');
  293. if(s == S_TAG){
  294. el.setTagName(source.slice(start,p));
  295. }
  296. return p;
  297. case '>':
  298. switch(s){
  299. case S_TAG:
  300. el.setTagName(source.slice(start,p));
  301. case S_ATTR_END:
  302. case S_TAG_SPACE:
  303. case S_TAG_CLOSE:
  304. break;//normal
  305. case S_ATTR_NOQUOT_VALUE://Compatible state
  306. case S_ATTR:
  307. value = source.slice(start,p);
  308. if(value.slice(-1) === '/'){
  309. el.closed = true;
  310. value = value.slice(0,-1)
  311. }
  312. case S_ATTR_SPACE:
  313. if(s === S_ATTR_SPACE){
  314. value = attrName;
  315. }
  316. if(s == S_ATTR_NOQUOT_VALUE){
  317. errorHandler.warning('attribute "'+value+'" missed quot(")!');
  318. addAttribute(attrName, value.replace(/&#?\w+;/g,entityReplacer), start)
  319. }else{
  320. if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !value.match(/^(?:disabled|checked|selected)$/i)){
  321. errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
  322. }
  323. addAttribute(value, value, start)
  324. }
  325. break;
  326. case S_EQ:
  327. throw new Error('attribute value missed!!');
  328. }
  329. // console.log(tagName,tagNamePattern,tagNamePattern.test(tagName))
  330. return p;
  331. /*xml space '\x20' | #x9 | #xD | #xA; */
  332. case '\u0080':
  333. c = ' ';
  334. default:
  335. if(c<= ' '){//space
  336. switch(s){
  337. case S_TAG:
  338. el.setTagName(source.slice(start,p));//tagName
  339. s = S_TAG_SPACE;
  340. break;
  341. case S_ATTR:
  342. attrName = source.slice(start,p)
  343. s = S_ATTR_SPACE;
  344. break;
  345. case S_ATTR_NOQUOT_VALUE:
  346. var value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
  347. errorHandler.warning('attribute "'+value+'" missed quot(")!!');
  348. addAttribute(attrName, value, start)
  349. case S_ATTR_END:
  350. s = S_TAG_SPACE;
  351. break;
  352. //case S_TAG_SPACE:
  353. //case S_EQ:
  354. //case S_ATTR_SPACE:
  355. // void();break;
  356. //case S_TAG_CLOSE:
  357. //ignore warning
  358. }
  359. }else{//not space
  360. //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
  361. //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
  362. switch(s){
  363. //case S_TAG:void();break;
  364. //case S_ATTR:void();break;
  365. //case S_ATTR_NOQUOT_VALUE:void();break;
  366. case S_ATTR_SPACE:
  367. var tagName = el.tagName;
  368. if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !attrName.match(/^(?:disabled|checked|selected)$/i)){
  369. errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!')
  370. }
  371. addAttribute(attrName, attrName, start);
  372. start = p;
  373. s = S_ATTR;
  374. break;
  375. case S_ATTR_END:
  376. errorHandler.warning('attribute space is required"'+attrName+'"!!')
  377. case S_TAG_SPACE:
  378. s = S_ATTR;
  379. start = p;
  380. break;
  381. case S_EQ:
  382. s = S_ATTR_NOQUOT_VALUE;
  383. start = p;
  384. break;
  385. case S_TAG_CLOSE:
  386. throw new Error("elements closed character '/' and '>' must be connected to");
  387. }
  388. }
  389. }//end outer switch
  390. //console.log('p++',p)
  391. p++;
  392. }
  393. }
  394. /**
  395. * @return true if has new namespace define
  396. */
  397. function appendElement(el,domBuilder,currentNSMap){
  398. var tagName = el.tagName;
  399. var localNSMap = null;
  400. //var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
  401. var i = el.length;
  402. while(i--){
  403. var a = el[i];
  404. var qName = a.qName;
  405. var value = a.value;
  406. var nsp = qName.indexOf(':');
  407. if(nsp>0){
  408. var prefix = a.prefix = qName.slice(0,nsp);
  409. var localName = qName.slice(nsp+1);
  410. var nsPrefix = prefix === 'xmlns' && localName
  411. }else{
  412. localName = qName;
  413. prefix = null
  414. nsPrefix = qName === 'xmlns' && ''
  415. }
  416. //can not set prefix,because prefix !== ''
  417. a.localName = localName ;
  418. //prefix == null for no ns prefix attribute
  419. if(nsPrefix !== false){//hack!!
  420. if(localNSMap == null){
  421. localNSMap = {}
  422. //console.log(currentNSMap,0)
  423. _copy(currentNSMap,currentNSMap={})
  424. //console.log(currentNSMap,1)
  425. }
  426. currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
  427. a.uri = 'http://www.w3.org/2000/xmlns/'
  428. domBuilder.startPrefixMapping(nsPrefix, value)
  429. }
  430. }
  431. var i = el.length;
  432. while(i--){
  433. a = el[i];
  434. var prefix = a.prefix;
  435. if(prefix){//no prefix attribute has no namespace
  436. if(prefix === 'xml'){
  437. a.uri = 'http://www.w3.org/XML/1998/namespace';
  438. }if(prefix !== 'xmlns'){
  439. a.uri = currentNSMap[prefix || '']
  440. //{console.log('###'+a.qName,domBuilder.locator.systemId+'',currentNSMap,a.uri)}
  441. }
  442. }
  443. }
  444. var nsp = tagName.indexOf(':');
  445. if(nsp>0){
  446. prefix = el.prefix = tagName.slice(0,nsp);
  447. localName = el.localName = tagName.slice(nsp+1);
  448. }else{
  449. prefix = null;//important!!
  450. localName = el.localName = tagName;
  451. }
  452. //no prefix element has default namespace
  453. var ns = el.uri = currentNSMap[prefix || ''];
  454. domBuilder.startElement(ns,localName,tagName,el);
  455. //endPrefixMapping and startPrefixMapping have not any help for dom builder
  456. //localNSMap = null
  457. if(el.closed){
  458. domBuilder.endElement(ns,localName,tagName);
  459. if(localNSMap){
  460. for(prefix in localNSMap){
  461. domBuilder.endPrefixMapping(prefix)
  462. }
  463. }
  464. }else{
  465. el.currentNSMap = currentNSMap;
  466. el.localNSMap = localNSMap;
  467. //parseStack.push(el);
  468. return true;
  469. }
  470. }
  471. function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){
  472. if(/^(?:script|textarea)$/i.test(tagName)){
  473. var elEndStart = source.indexOf('</'+tagName+'>',elStartEnd);
  474. var text = source.substring(elStartEnd+1,elEndStart);
  475. if(/[&<]/.test(text)){
  476. if(/^script$/i.test(tagName)){
  477. //if(!/\]\]>/.test(text)){
  478. //lexHandler.startCDATA();
  479. domBuilder.characters(text,0,text.length);
  480. //lexHandler.endCDATA();
  481. return elEndStart;
  482. //}
  483. }//}else{//text area
  484. text = text.replace(/&#?\w+;/g,entityReplacer);
  485. domBuilder.characters(text,0,text.length);
  486. return elEndStart;
  487. //}
  488. }
  489. }
  490. return elStartEnd+1;
  491. }
  492. function fixSelfClosed(source,elStartEnd,tagName,closeMap){
  493. //if(tagName in closeMap){
  494. var pos = closeMap[tagName];
  495. if(pos == null){
  496. //console.log(tagName)
  497. pos = source.lastIndexOf('</'+tagName+'>')
  498. if(pos<elStartEnd){//忘记闭合
  499. pos = source.lastIndexOf('</'+tagName)
  500. }
  501. closeMap[tagName] =pos
  502. }
  503. return pos<elStartEnd;
  504. //}
  505. }
  506. function _copy(source,target){
  507. for(var n in source){target[n] = source[n]}
  508. }
  509. function parseDCC(source,start,domBuilder,errorHandler){//sure start with '<!'
  510. var next= source.charAt(start+2)
  511. switch(next){
  512. case '-':
  513. if(source.charAt(start + 3) === '-'){
  514. var end = source.indexOf('-->',start+4);
  515. //append comment source.substring(4,end)//<!--
  516. if(end>start){
  517. domBuilder.comment(source,start+4,end-start-4);
  518. return end+3;
  519. }else{
  520. errorHandler.error("Unclosed comment");
  521. return -1;
  522. }
  523. }else{
  524. //error
  525. return -1;
  526. }
  527. default:
  528. if(source.substr(start+3,6) == 'CDATA['){
  529. var end = source.indexOf(']]>',start+9);
  530. domBuilder.startCDATA();
  531. domBuilder.characters(source,start+9,end-start-9);
  532. domBuilder.endCDATA()
  533. return end+3;
  534. }
  535. //<!DOCTYPE
  536. //startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
  537. var matchs = split(source,start);
  538. var len = matchs.length;
  539. if(len>1 && /!doctype/i.test(matchs[0][0])){
  540. var name = matchs[1][0];
  541. var pubid = false;
  542. var sysid = false;
  543. if(len>3){
  544. if(/^public$/i.test(matchs[2][0])){
  545. pubid = matchs[3][0];
  546. sysid = len>4 && matchs[4][0];
  547. }else if(/^system$/i.test(matchs[2][0])){
  548. sysid = matchs[3][0];
  549. }
  550. }
  551. var lastMatch = matchs[len-1]
  552. domBuilder.startDTD(name, pubid, sysid);
  553. domBuilder.endDTD();
  554. return lastMatch.index+lastMatch[0].length
  555. }
  556. }
  557. return -1;
  558. }
  559. function parseInstruction(source,start,domBuilder){
  560. var end = source.indexOf('?>',start);
  561. if(end){
  562. var match = source.substring(start,end).match(/^<\?(\S*)\s*([\s\S]*?)\s*$/);
  563. if(match){
  564. var len = match[0].length;
  565. domBuilder.processingInstruction(match[1], match[2]) ;
  566. return end+2;
  567. }else{//error
  568. return -1;
  569. }
  570. }
  571. return -1;
  572. }
  573. function ElementAttributes(){
  574. this.attributeNames = {}
  575. }
  576. ElementAttributes.prototype = {
  577. setTagName:function(tagName){
  578. if(!tagNamePattern.test(tagName)){
  579. throw new Error('invalid tagName:'+tagName)
  580. }
  581. this.tagName = tagName
  582. },
  583. addValue:function(qName, value, offset) {
  584. if(!tagNamePattern.test(qName)){
  585. throw new Error('invalid attribute:'+qName)
  586. }
  587. this.attributeNames[qName] = this.length;
  588. this[this.length++] = {qName:qName,value:value,offset:offset}
  589. },
  590. length:0,
  591. getLocalName:function(i){return this[i].localName},
  592. getLocator:function(i){return this[i].locator},
  593. getQName:function(i){return this[i].qName},
  594. getURI:function(i){return this[i].uri},
  595. getValue:function(i){return this[i].value}
  596. // ,getIndex:function(uri, localName)){
  597. // if(localName){
  598. //
  599. // }else{
  600. // var qName = uri
  601. // }
  602. // },
  603. // getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
  604. // getType:function(uri,localName){}
  605. // getType:function(i){},
  606. }
  607. function split(source,start){
  608. var match;
  609. var buf = [];
  610. var reg = /'[^']+'|"[^"]+"|[^\s<>\/=]+=?|(\/?\s*>|<)/g;
  611. reg.lastIndex = start;
  612. reg.exec(source);//skip <
  613. while(match = reg.exec(source)){
  614. buf.push(match);
  615. if(match[1])return buf;
  616. }
  617. }
  618. exports.XMLReader = XMLReader;
  619. exports.ParseError = ParseError;