|
1 |
| |
|
2 |
| |
|
3 |
| |
|
4 |
| |
|
5 |
| |
|
6 |
| |
|
7 |
| |
|
8 |
| |
|
9 |
| |
|
10 |
| |
|
11 |
| |
|
12 |
| |
|
13 |
| |
|
14 |
| |
|
15 |
| |
|
16 |
| |
|
17 |
| |
|
18 |
| |
|
19 |
| |
|
20 |
| package org.apache.xindice.xml.sax; |
|
21 |
| |
|
22 |
| import org.apache.commons.logging.Log; |
|
23 |
| import org.apache.commons.logging.LogFactory; |
|
24 |
| import org.apache.xindice.util.ByteArrayInput; |
|
25 |
| import org.apache.xindice.xml.SymbolTable; |
|
26 |
| import org.apache.xindice.xml.XMLCompressedInput; |
|
27 |
| import org.apache.xindice.xml.dom.DOMCompressor; |
|
28 |
| |
|
29 |
| import org.w3c.dom.Document; |
|
30 |
| import org.w3c.dom.Node; |
|
31 |
| import org.xml.sax.ContentHandler; |
|
32 |
| import org.xml.sax.DTDHandler; |
|
33 |
| import org.xml.sax.EntityResolver; |
|
34 |
| import org.xml.sax.ErrorHandler; |
|
35 |
| import org.xml.sax.InputSource; |
|
36 |
| import org.xml.sax.SAXException; |
|
37 |
| import org.xml.sax.SAXNotRecognizedException; |
|
38 |
| import org.xml.sax.SAXNotSupportedException; |
|
39 |
| import org.xml.sax.XMLReader; |
|
40 |
| import org.xml.sax.helpers.AttributesImpl; |
|
41 |
| |
|
42 |
| import java.io.IOException; |
|
43 |
| import java.util.HashMap; |
|
44 |
| import java.util.Map; |
|
45 |
| |
|
46 |
| |
|
47 |
| |
|
48 |
| |
|
49 |
| |
|
50 |
| |
|
51 |
| public final class SAXEventGenerator implements XMLReader { |
|
52 |
| |
|
53 |
| private static final Log log = LogFactory.getLog(SAXEventGenerator.class); |
|
54 |
| |
|
55 |
| private static final int XMLNS_MAP_INCREMENT = 5; |
|
56 |
| |
|
57 |
| |
|
58 |
| |
|
59 |
| |
|
60 |
| |
|
61 |
| |
|
62 |
| |
|
63 |
| public static final String SAX_NAMESPACES_FEATURE |
|
64 |
| = "http://xml.org/sax/features/namespaces"; |
|
65 |
| |
|
66 |
| |
|
67 |
| |
|
68 |
| |
|
69 |
| |
|
70 |
| |
|
71 |
| |
|
72 |
| public static final String SAX_NAMESPACE_PREFIXES_FEATURE |
|
73 |
| = "http://xml.org/sax/features/namespace-prefixes"; |
|
74 |
| |
|
75 |
| |
|
76 |
| |
|
77 |
| |
|
78 |
| |
|
79 |
| |
|
80 |
| |
|
81 |
| private boolean hasSaxNamespaces = true; |
|
82 |
| |
|
83 |
| |
|
84 |
| |
|
85 |
| |
|
86 |
| |
|
87 |
| |
|
88 |
| |
|
89 |
| private boolean hasSaxNamespacesPrefixes; |
|
90 |
| |
|
91 |
| private SymbolTable symbols; |
|
92 |
| private byte[] data; |
|
93 |
| private int pos; |
|
94 |
| private int len; |
|
95 |
| |
|
96 |
| private Map properties = new HashMap(); |
|
97 |
| private ContentHandler content; |
|
98 |
| private CompressionHandler comp; |
|
99 |
| private ErrorHandler errors; |
|
100 |
| private EntityResolver entities; |
|
101 |
| private DTDHandler dtd; |
|
102 |
| |
|
103 |
| private boolean interrupt; |
|
104 |
| |
|
105 |
| |
|
106 |
8
| public SAXEventGenerator(SymbolTable symbols, byte[] data) {
|
|
107 |
8
| this(symbols, data, 0, data.length);
|
|
108 |
| } |
|
109 |
| |
|
110 |
8
| public SAXEventGenerator(SymbolTable symbols, byte[] data, int pos, int len) {
|
|
111 |
8
| this.symbols = symbols;
|
|
112 |
8
| this.data = data;
|
|
113 |
8
| this.pos = pos;
|
|
114 |
8
| this.len = len;
|
|
115 |
| } |
|
116 |
| |
|
117 |
11110
| public SAXEventGenerator(SymbolTable symbols, Document doc) {
|
|
118 |
11110
| this.symbols = symbols != null ? symbols : new SymbolTable();
|
|
119 |
11110
| data = DOMCompressor.compress(doc, this.symbols);
|
|
120 |
11110
| pos = 0;
|
|
121 |
11110
| len = data.length;
|
|
122 |
| } |
|
123 |
| |
|
124 |
| |
|
125 |
| |
|
126 |
| |
|
127 |
| |
|
128 |
| |
|
129 |
| |
|
130 |
| |
|
131 |
| |
|
132 |
0
| public boolean getFeature(String name) throws SAXNotRecognizedException, SAXNotSupportedException {
|
|
133 |
| |
|
134 |
0
| if (SAX_NAMESPACES_FEATURE.equals(name)) {
|
|
135 |
| |
|
136 |
0
| return hasSaxNamespaces;
|
|
137 |
0
| } else if (SAX_NAMESPACE_PREFIXES_FEATURE.equals(name)) {
|
|
138 |
| |
|
139 |
0
| return hasSaxNamespacesPrefixes;
|
|
140 |
| } else { |
|
141 |
| |
|
142 |
0
| return false;
|
|
143 |
| } |
|
144 |
| } |
|
145 |
| |
|
146 |
| |
|
147 |
| |
|
148 |
| |
|
149 |
| |
|
150 |
| |
|
151 |
| |
|
152 |
| |
|
153 |
| |
|
154 |
16
| public void setFeature(String name, boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
|
|
155 |
| |
|
156 |
16
| if (SAX_NAMESPACES_FEATURE.equals(name)) {
|
|
157 |
| |
|
158 |
8
| hasSaxNamespaces = value;
|
|
159 |
8
| } else if (SAX_NAMESPACE_PREFIXES_FEATURE.equals(name)) {
|
|
160 |
| |
|
161 |
8
| hasSaxNamespacesPrefixes = value;
|
|
162 |
| } |
|
163 |
| } |
|
164 |
| |
|
165 |
0
| public Object getProperty(String name) throws SAXNotRecognizedException, SAXNotSupportedException {
|
|
166 |
0
| return properties.get(name);
|
|
167 |
| } |
|
168 |
| |
|
169 |
11100
| public void setProperty(String name, Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
|
|
170 |
11100
| properties.put(name, value);
|
|
171 |
| |
|
172 |
11100
| if (name.equals(CompressionHandler.HANDLER)
|
|
173 |
| && value instanceof CompressionHandler) { |
|
174 |
11100
| comp = (CompressionHandler) value;
|
|
175 |
| } |
|
176 |
| } |
|
177 |
| |
|
178 |
0
| public void setEntityResolver(EntityResolver resolver) {
|
|
179 |
0
| entities = resolver;
|
|
180 |
| } |
|
181 |
| |
|
182 |
0
| public EntityResolver getEntityResolver() {
|
|
183 |
0
| return entities;
|
|
184 |
| } |
|
185 |
| |
|
186 |
0
| public void setDTDHandler(DTDHandler handler) {
|
|
187 |
0
| dtd = handler;
|
|
188 |
| } |
|
189 |
| |
|
190 |
0
| public DTDHandler getDTDHandler() {
|
|
191 |
0
| return dtd;
|
|
192 |
| } |
|
193 |
| |
|
194 |
11118
| public void setContentHandler(ContentHandler handler) {
|
|
195 |
11118
| content = handler;
|
|
196 |
| } |
|
197 |
| |
|
198 |
0
| public ContentHandler getContentHandler() {
|
|
199 |
0
| return content;
|
|
200 |
| } |
|
201 |
| |
|
202 |
0
| public void setErrorHandler(ErrorHandler handler) {
|
|
203 |
0
| errors = handler;
|
|
204 |
| } |
|
205 |
| |
|
206 |
0
| public ErrorHandler getErrorHandler() {
|
|
207 |
0
| return errors;
|
|
208 |
| } |
|
209 |
| |
|
210 |
0
| public void parse(InputSource input) throws IOException, SAXException {
|
|
211 |
| } |
|
212 |
| |
|
213 |
0
| public void parse(String systemId) throws IOException, SAXException {
|
|
214 |
| } |
|
215 |
| |
|
216 |
65918
| private String getLocalName(String qname) {
|
|
217 |
65918
| int idx = qname.indexOf(":");
|
|
218 |
65918
| if (idx != -1) {
|
|
219 |
254
| return qname.substring(idx + 1);
|
|
220 |
| } else { |
|
221 |
65664
| return qname;
|
|
222 |
| } |
|
223 |
| } |
|
224 |
| |
|
225 |
21866
| private boolean isNSAttr(final String qName) {
|
|
226 |
21866
| return "xmlns".equals(qName) || qName.startsWith("xmlns:");
|
|
227 |
| } |
|
228 |
| |
|
229 |
55170
| public boolean processContainer(boolean element, int pos, int len) throws IOException, SAXException {
|
|
230 |
55170
| ByteArrayInput bis = new ByteArrayInput(data, pos, len);
|
|
231 |
55170
| XMLCompressedInput in = new XMLCompressedInput(bis, symbols);
|
|
232 |
55170
| String elemName = null;
|
|
233 |
55170
| String localName = null;
|
|
234 |
55170
| String nsURI = null;
|
|
235 |
55170
| String[] mappedPrefixes = null;
|
|
236 |
55170
| int nsMapCount = 0;
|
|
237 |
| |
|
238 |
55170
| if (element) {
|
|
239 |
44052
| in.readSignature();
|
|
240 |
44052
| in.readContentSize();
|
|
241 |
| |
|
242 |
44052
| short elemSymbol = in.readShort();
|
|
243 |
44052
| elemName = symbols.getName(elemSymbol);
|
|
244 |
44052
| localName = getLocalName(elemName);
|
|
245 |
44052
| nsURI = symbols.getNamespaceURI(elemSymbol);
|
|
246 |
44052
| int attrCount = in.readAttributeCount();
|
|
247 |
44052
| AttributesImpl attrs = new AttributesImpl();
|
|
248 |
44052
| for (int i = 0; i < attrCount; i++) {
|
|
249 |
21866
| short symbol = in.readShort();
|
|
250 |
21866
| short strLen = in.readShort();
|
|
251 |
21866
| byte[] b = new byte[strLen];
|
|
252 |
21866
| in.read(b);
|
|
253 |
| |
|
254 |
21866
| String attrName = symbols.getName(symbol);
|
|
255 |
21866
| String attrURI = symbols.getNamespaceURI(symbol);
|
|
256 |
21866
| String lclName = getLocalName(attrName);
|
|
257 |
| |
|
258 |
21866
| String attrValue = new String(b, "UTF8");
|
|
259 |
| |
|
260 |
| |
|
261 |
21866
| if (isNSAttr(attrName)) {
|
|
262 |
| |
|
263 |
| |
|
264 |
84
| if (mappedPrefixes == null) {
|
|
265 |
24
| mappedPrefixes = new String[XMLNS_MAP_INCREMENT];
|
|
266 |
| |
|
267 |
| } |
|
268 |
| |
|
269 |
| |
|
270 |
84
| if (nsMapCount >= mappedPrefixes.length) {
|
|
271 |
10
| String[] newBuf = new String[mappedPrefixes.length + XMLNS_MAP_INCREMENT];
|
|
272 |
10
| System.arraycopy(mappedPrefixes, 0, newBuf, 0, mappedPrefixes.length);
|
|
273 |
10
| mappedPrefixes = newBuf;
|
|
274 |
| } |
|
275 |
| |
|
276 |
| |
|
277 |
| |
|
278 |
| |
|
279 |
84
| String prefix = ("xmlns".equals(attrName) ? "" : lclName);
|
|
280 |
| |
|
281 |
| |
|
282 |
| |
|
283 |
84
| content.startPrefixMapping(prefix, attrValue);
|
|
284 |
84
| mappedPrefixes[nsMapCount++] = prefix;
|
|
285 |
| |
|
286 |
84
| if (hasSaxNamespacesPrefixes) {
|
|
287 |
| |
|
288 |
0
| attrs.addAttribute("", "", attrName, "CDATA", attrValue);
|
|
289 |
| } |
|
290 |
| } else { |
|
291 |
| |
|
292 |
21782
| attrs.addAttribute(attrURI != null ? attrURI : "",
|
|
293 |
| lclName, attrName, "", attrValue); |
|
294 |
| } |
|
295 |
| } |
|
296 |
| |
|
297 |
44052
| if (comp != null) {
|
|
298 |
43888
| comp.symbolID(elemSymbol);
|
|
299 |
43888
| comp.dataLocation(pos, len);
|
|
300 |
| } |
|
301 |
44052
| content.startElement(nsURI != null ? nsURI : "", localName, elemName, attrs);
|
|
302 |
| } else { |
|
303 |
11118
| in.readInt();
|
|
304 |
| } |
|
305 |
| |
|
306 |
55170
| while (!interrupt && bis.available() > 0) {
|
|
307 |
109373
| pos = bis.getPos();
|
|
308 |
109373
| in.readSignature();
|
|
309 |
109373
| len = in.readContentSize();
|
|
310 |
109373
| if (len == 0) {
|
|
311 |
0
| len = 1;
|
|
312 |
| } |
|
313 |
| |
|
314 |
109373
| int type = in.getNodeType();
|
|
315 |
109373
| switch (type) {
|
|
316 |
| |
|
317 |
44052
| case Node.ELEMENT_NODE:
|
|
318 |
44052
| processContainer(true, pos, len);
|
|
319 |
44052
| break;
|
|
320 |
| |
|
321 |
65321
| case Node.TEXT_NODE:
|
|
322 |
0
| case Node.PROCESSING_INSTRUCTION_NODE:
|
|
323 |
0
| case Node.CDATA_SECTION_NODE:
|
|
324 |
0
| case Node.COMMENT_NODE:
|
|
325 |
| { |
|
326 |
65321
| ByteArrayInput tbis = new ByteArrayInput(data, pos, len);
|
|
327 |
65321
| XMLCompressedInput tin = new XMLCompressedInput(tbis, symbols);
|
|
328 |
| |
|
329 |
65321
| tin.readSignature();
|
|
330 |
65321
| if (type == Node.TEXT_NODE) {
|
|
331 |
65321
| tin.readContentSize();
|
|
332 |
| } else { |
|
333 |
0
| tin.readInt();
|
|
334 |
| } |
|
335 |
| |
|
336 |
65321
| byte[] buf = new byte[tbis.available()];
|
|
337 |
65321
| tin.read(buf);
|
|
338 |
| |
|
339 |
65321
| String value = new String(buf, "UTF-8");
|
|
340 |
| |
|
341 |
65321
| switch (type) {
|
|
342 |
| |
|
343 |
65321
| case Node.TEXT_NODE:
|
|
344 |
65321
| char[] c = value.toCharArray();
|
|
345 |
65321
| content.characters(c, 0, c.length);
|
|
346 |
65321
| break;
|
|
347 |
| |
|
348 |
0
| case Node.PROCESSING_INSTRUCTION_NODE:
|
|
349 |
0
| int i = value.indexOf(' ');
|
|
350 |
0
| content.processingInstruction(value.substring(0, i), value.substring(i + 1));
|
|
351 |
0
| break;
|
|
352 |
| |
|
353 |
0
| case Node.CDATA_SECTION_NODE:
|
|
354 |
0
| case Node.COMMENT_NODE:
|
|
355 |
| |
|
356 |
0
| break;
|
|
357 |
| |
|
358 |
0
| default:
|
|
359 |
0
| if (log.isWarnEnabled()) {
|
|
360 |
0
| log.warn("invalid type : " + type);
|
|
361 |
| } |
|
362 |
| } |
|
363 |
| |
|
364 |
65321
| break;
|
|
365 |
| } |
|
366 |
| |
|
367 |
0
| case Node.ENTITY_REFERENCE_NODE:
|
|
368 |
| |
|
369 |
0
| break;
|
|
370 |
| |
|
371 |
0
| case Node.NOTATION_NODE:
|
|
372 |
0
| break;
|
|
373 |
| |
|
374 |
0
| default:
|
|
375 |
0
| if (log.isWarnEnabled()) {
|
|
376 |
0
| log.warn("invalid node type : " + type);
|
|
377 |
| } |
|
378 |
| } |
|
379 |
| |
|
380 |
109373
| bis.setPos(pos);
|
|
381 |
109373
| bis.skip(len);
|
|
382 |
| } |
|
383 |
| |
|
384 |
55170
| if (element && !interrupt) {
|
|
385 |
| |
|
386 |
44052
| content.endElement(nsURI != null ? nsURI : "", localName, elemName);
|
|
387 |
| |
|
388 |
| |
|
389 |
44052
| for (int i = 0; i < nsMapCount; i++) {
|
|
390 |
84
| content.endPrefixMapping(mappedPrefixes[i]);
|
|
391 |
| } |
|
392 |
| } |
|
393 |
| |
|
394 |
55170
| return !interrupt;
|
|
395 |
| } |
|
396 |
| |
|
397 |
11118
| public boolean start() throws IOException, SAXException {
|
|
398 |
11118
| if (comp != null) {
|
|
399 |
11100
| comp.symbols(symbols);
|
|
400 |
11100
| comp.dataBytes(data);
|
|
401 |
| } |
|
402 |
11118
| content.startDocument();
|
|
403 |
11118
| boolean result = processContainer(false, pos, len);
|
|
404 |
11118
| if (result) {
|
|
405 |
11118
| content.endDocument();
|
|
406 |
| } |
|
407 |
11118
| return result;
|
|
408 |
| } |
|
409 |
| |
|
410 |
0
| public void stop() {
|
|
411 |
0
| interrupt = true;
|
|
412 |
| } |
|
413 |
| } |
|
414 |
| |