1 |
| |
2 |
| |
3 |
| |
4 |
| |
5 |
| |
6 |
| |
7 |
| |
8 |
| |
9 |
| |
10 |
| |
11 |
| |
12 |
| |
13 |
| |
14 |
| |
15 |
| |
16 |
| |
17 |
| |
18 |
| |
19 |
| |
20 |
| package org.apache.xindice.xml.sax; |
21 |
| |
22 |
| import org.apache.commons.logging.Log; |
23 |
| import org.apache.commons.logging.LogFactory; |
24 |
| import org.apache.xindice.util.ByteArrayInput; |
25 |
| import org.apache.xindice.xml.SymbolTable; |
26 |
| import org.apache.xindice.xml.XMLCompressedInput; |
27 |
| import org.apache.xindice.xml.dom.DOMCompressor; |
28 |
| |
29 |
| import org.w3c.dom.Document; |
30 |
| import org.w3c.dom.Node; |
31 |
| import org.xml.sax.ContentHandler; |
32 |
| import org.xml.sax.DTDHandler; |
33 |
| import org.xml.sax.EntityResolver; |
34 |
| import org.xml.sax.ErrorHandler; |
35 |
| import org.xml.sax.InputSource; |
36 |
| import org.xml.sax.SAXException; |
37 |
| import org.xml.sax.SAXNotRecognizedException; |
38 |
| import org.xml.sax.SAXNotSupportedException; |
39 |
| import org.xml.sax.XMLReader; |
40 |
| import org.xml.sax.helpers.AttributesImpl; |
41 |
| |
42 |
| import java.io.IOException; |
43 |
| import java.util.HashMap; |
44 |
| import java.util.Map; |
45 |
| |
46 |
| |
47 |
| |
48 |
| |
49 |
| |
50 |
| |
51 |
| public final class SAXEventGenerator implements XMLReader { |
52 |
| |
53 |
| private static final Log log = LogFactory.getLog(SAXEventGenerator.class); |
54 |
| |
55 |
| private static final int XMLNS_MAP_INCREMENT = 5; |
56 |
| |
57 |
| |
58 |
| |
59 |
| |
60 |
| |
61 |
| |
62 |
| |
63 |
| public static final String SAX_NAMESPACES_FEATURE |
64 |
| = "http://xml.org/sax/features/namespaces"; |
65 |
| |
66 |
| |
67 |
| |
68 |
| |
69 |
| |
70 |
| |
71 |
| |
72 |
| public static final String SAX_NAMESPACE_PREFIXES_FEATURE |
73 |
| = "http://xml.org/sax/features/namespace-prefixes"; |
74 |
| |
75 |
| |
76 |
| |
77 |
| |
78 |
| |
79 |
| |
80 |
| |
81 |
| private boolean hasSaxNamespaces = true; |
82 |
| |
83 |
| |
84 |
| |
85 |
| |
86 |
| |
87 |
| |
88 |
| |
89 |
| private boolean hasSaxNamespacesPrefixes; |
90 |
| |
91 |
| private SymbolTable symbols; |
92 |
| private byte[] data; |
93 |
| private int pos; |
94 |
| private int len; |
95 |
| |
96 |
| private Map properties = new HashMap(); |
97 |
| private ContentHandler content; |
98 |
| private CompressionHandler comp; |
99 |
| private ErrorHandler errors; |
100 |
| private EntityResolver entities; |
101 |
| private DTDHandler dtd; |
102 |
| |
103 |
| private boolean interrupt; |
104 |
| |
105 |
| |
106 |
8
| public SAXEventGenerator(SymbolTable symbols, byte[] data) {
|
107 |
8
| this(symbols, data, 0, data.length);
|
108 |
| } |
109 |
| |
110 |
8
| public SAXEventGenerator(SymbolTable symbols, byte[] data, int pos, int len) {
|
111 |
8
| this.symbols = symbols;
|
112 |
8
| this.data = data;
|
113 |
8
| this.pos = pos;
|
114 |
8
| this.len = len;
|
115 |
| } |
116 |
| |
117 |
11110
| public SAXEventGenerator(SymbolTable symbols, Document doc) {
|
118 |
11110
| this.symbols = symbols != null ? symbols : new SymbolTable();
|
119 |
11110
| data = DOMCompressor.compress(doc, this.symbols);
|
120 |
11110
| pos = 0;
|
121 |
11110
| len = data.length;
|
122 |
| } |
123 |
| |
124 |
| |
125 |
| |
126 |
| |
127 |
| |
128 |
| |
129 |
| |
130 |
| |
131 |
| |
132 |
0
| public boolean getFeature(String name) throws SAXNotRecognizedException, SAXNotSupportedException {
|
133 |
| |
134 |
0
| if (SAX_NAMESPACES_FEATURE.equals(name)) {
|
135 |
| |
136 |
0
| return hasSaxNamespaces;
|
137 |
0
| } else if (SAX_NAMESPACE_PREFIXES_FEATURE.equals(name)) {
|
138 |
| |
139 |
0
| return hasSaxNamespacesPrefixes;
|
140 |
| } else { |
141 |
| |
142 |
0
| return false;
|
143 |
| } |
144 |
| } |
145 |
| |
146 |
| |
147 |
| |
148 |
| |
149 |
| |
150 |
| |
151 |
| |
152 |
| |
153 |
| |
154 |
16
| public void setFeature(String name, boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
|
155 |
| |
156 |
16
| if (SAX_NAMESPACES_FEATURE.equals(name)) {
|
157 |
| |
158 |
8
| hasSaxNamespaces = value;
|
159 |
8
| } else if (SAX_NAMESPACE_PREFIXES_FEATURE.equals(name)) {
|
160 |
| |
161 |
8
| hasSaxNamespacesPrefixes = value;
|
162 |
| } |
163 |
| } |
164 |
| |
165 |
0
| public Object getProperty(String name) throws SAXNotRecognizedException, SAXNotSupportedException {
|
166 |
0
| return properties.get(name);
|
167 |
| } |
168 |
| |
169 |
11100
| public void setProperty(String name, Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
|
170 |
11100
| properties.put(name, value);
|
171 |
| |
172 |
11100
| if (name.equals(CompressionHandler.HANDLER)
|
173 |
| && value instanceof CompressionHandler) { |
174 |
11100
| comp = (CompressionHandler) value;
|
175 |
| } |
176 |
| } |
177 |
| |
178 |
0
| public void setEntityResolver(EntityResolver resolver) {
|
179 |
0
| entities = resolver;
|
180 |
| } |
181 |
| |
182 |
0
| public EntityResolver getEntityResolver() {
|
183 |
0
| return entities;
|
184 |
| } |
185 |
| |
186 |
0
| public void setDTDHandler(DTDHandler handler) {
|
187 |
0
| dtd = handler;
|
188 |
| } |
189 |
| |
190 |
0
| public DTDHandler getDTDHandler() {
|
191 |
0
| return dtd;
|
192 |
| } |
193 |
| |
194 |
11118
| public void setContentHandler(ContentHandler handler) {
|
195 |
11118
| content = handler;
|
196 |
| } |
197 |
| |
198 |
0
| public ContentHandler getContentHandler() {
|
199 |
0
| return content;
|
200 |
| } |
201 |
| |
202 |
0
| public void setErrorHandler(ErrorHandler handler) {
|
203 |
0
| errors = handler;
|
204 |
| } |
205 |
| |
206 |
0
| public ErrorHandler getErrorHandler() {
|
207 |
0
| return errors;
|
208 |
| } |
209 |
| |
210 |
0
| public void parse(InputSource input) throws IOException, SAXException {
|
211 |
| } |
212 |
| |
213 |
0
| public void parse(String systemId) throws IOException, SAXException {
|
214 |
| } |
215 |
| |
216 |
65918
| private String getLocalName(String qname) {
|
217 |
65918
| int idx = qname.indexOf(":");
|
218 |
65918
| if (idx != -1) {
|
219 |
254
| return qname.substring(idx + 1);
|
220 |
| } else { |
221 |
65664
| return qname;
|
222 |
| } |
223 |
| } |
224 |
| |
225 |
21866
| private boolean isNSAttr(final String qName) {
|
226 |
21866
| return "xmlns".equals(qName) || qName.startsWith("xmlns:");
|
227 |
| } |
228 |
| |
229 |
55170
| public boolean processContainer(boolean element, int pos, int len) throws IOException, SAXException {
|
230 |
55170
| ByteArrayInput bis = new ByteArrayInput(data, pos, len);
|
231 |
55170
| XMLCompressedInput in = new XMLCompressedInput(bis, symbols);
|
232 |
55170
| String elemName = null;
|
233 |
55170
| String localName = null;
|
234 |
55170
| String nsURI = null;
|
235 |
55170
| String[] mappedPrefixes = null;
|
236 |
55170
| int nsMapCount = 0;
|
237 |
| |
238 |
55170
| if (element) {
|
239 |
44052
| in.readSignature();
|
240 |
44052
| in.readContentSize();
|
241 |
| |
242 |
44052
| short elemSymbol = in.readShort();
|
243 |
44052
| elemName = symbols.getName(elemSymbol);
|
244 |
44052
| localName = getLocalName(elemName);
|
245 |
44052
| nsURI = symbols.getNamespaceURI(elemSymbol);
|
246 |
44052
| int attrCount = in.readAttributeCount();
|
247 |
44052
| AttributesImpl attrs = new AttributesImpl();
|
248 |
44052
| for (int i = 0; i < attrCount; i++) {
|
249 |
21866
| short symbol = in.readShort();
|
250 |
21866
| short strLen = in.readShort();
|
251 |
21866
| byte[] b = new byte[strLen];
|
252 |
21866
| in.read(b);
|
253 |
| |
254 |
21866
| String attrName = symbols.getName(symbol);
|
255 |
21866
| String attrURI = symbols.getNamespaceURI(symbol);
|
256 |
21866
| String lclName = getLocalName(attrName);
|
257 |
| |
258 |
21866
| String attrValue = new String(b, "UTF8");
|
259 |
| |
260 |
| |
261 |
21866
| if (isNSAttr(attrName)) {
|
262 |
| |
263 |
| |
264 |
84
| if (mappedPrefixes == null) {
|
265 |
24
| mappedPrefixes = new String[XMLNS_MAP_INCREMENT];
|
266 |
| |
267 |
| } |
268 |
| |
269 |
| |
270 |
84
| if (nsMapCount >= mappedPrefixes.length) {
|
271 |
10
| String[] newBuf = new String[mappedPrefixes.length + XMLNS_MAP_INCREMENT];
|
272 |
10
| System.arraycopy(mappedPrefixes, 0, newBuf, 0, mappedPrefixes.length);
|
273 |
10
| mappedPrefixes = newBuf;
|
274 |
| } |
275 |
| |
276 |
| |
277 |
| |
278 |
| |
279 |
84
| String prefix = ("xmlns".equals(attrName) ? "" : lclName);
|
280 |
| |
281 |
| |
282 |
| |
283 |
84
| content.startPrefixMapping(prefix, attrValue);
|
284 |
84
| mappedPrefixes[nsMapCount++] = prefix;
|
285 |
| |
286 |
84
| if (hasSaxNamespacesPrefixes) {
|
287 |
| |
288 |
0
| attrs.addAttribute("", "", attrName, "CDATA", attrValue);
|
289 |
| } |
290 |
| } else { |
291 |
| |
292 |
21782
| attrs.addAttribute(attrURI != null ? attrURI : "",
|
293 |
| lclName, attrName, "", attrValue); |
294 |
| } |
295 |
| } |
296 |
| |
297 |
44052
| if (comp != null) {
|
298 |
43888
| comp.symbolID(elemSymbol);
|
299 |
43888
| comp.dataLocation(pos, len);
|
300 |
| } |
301 |
44052
| content.startElement(nsURI != null ? nsURI : "", localName, elemName, attrs);
|
302 |
| } else { |
303 |
11118
| in.readInt();
|
304 |
| } |
305 |
| |
306 |
55170
| while (!interrupt && bis.available() > 0) {
|
307 |
109373
| pos = bis.getPos();
|
308 |
109373
| in.readSignature();
|
309 |
109373
| len = in.readContentSize();
|
310 |
109373
| if (len == 0) {
|
311 |
0
| len = 1;
|
312 |
| } |
313 |
| |
314 |
109373
| int type = in.getNodeType();
|
315 |
109373
| switch (type) {
|
316 |
| |
317 |
44052
| case Node.ELEMENT_NODE:
|
318 |
44052
| processContainer(true, pos, len);
|
319 |
44052
| break;
|
320 |
| |
321 |
65321
| case Node.TEXT_NODE:
|
322 |
0
| case Node.PROCESSING_INSTRUCTION_NODE:
|
323 |
0
| case Node.CDATA_SECTION_NODE:
|
324 |
0
| case Node.COMMENT_NODE:
|
325 |
| { |
326 |
65321
| ByteArrayInput tbis = new ByteArrayInput(data, pos, len);
|
327 |
65321
| XMLCompressedInput tin = new XMLCompressedInput(tbis, symbols);
|
328 |
| |
329 |
65321
| tin.readSignature();
|
330 |
65321
| if (type == Node.TEXT_NODE) {
|
331 |
65321
| tin.readContentSize();
|
332 |
| } else { |
333 |
0
| tin.readInt();
|
334 |
| } |
335 |
| |
336 |
65321
| byte[] buf = new byte[tbis.available()];
|
337 |
65321
| tin.read(buf);
|
338 |
| |
339 |
65321
| String value = new String(buf, "UTF-8");
|
340 |
| |
341 |
65321
| switch (type) {
|
342 |
| |
343 |
65321
| case Node.TEXT_NODE:
|
344 |
65321
| char[] c = value.toCharArray();
|
345 |
65321
| content.characters(c, 0, c.length);
|
346 |
65321
| break;
|
347 |
| |
348 |
0
| case Node.PROCESSING_INSTRUCTION_NODE:
|
349 |
0
| int i = value.indexOf(' ');
|
350 |
0
| content.processingInstruction(value.substring(0, i), value.substring(i + 1));
|
351 |
0
| break;
|
352 |
| |
353 |
0
| case Node.CDATA_SECTION_NODE:
|
354 |
0
| case Node.COMMENT_NODE:
|
355 |
| |
356 |
0
| break;
|
357 |
| |
358 |
0
| default:
|
359 |
0
| if (log.isWarnEnabled()) {
|
360 |
0
| log.warn("invalid type : " + type);
|
361 |
| } |
362 |
| } |
363 |
| |
364 |
65321
| break;
|
365 |
| } |
366 |
| |
367 |
0
| case Node.ENTITY_REFERENCE_NODE:
|
368 |
| |
369 |
0
| break;
|
370 |
| |
371 |
0
| case Node.NOTATION_NODE:
|
372 |
0
| break;
|
373 |
| |
374 |
0
| default:
|
375 |
0
| if (log.isWarnEnabled()) {
|
376 |
0
| log.warn("invalid node type : " + type);
|
377 |
| } |
378 |
| } |
379 |
| |
380 |
109373
| bis.setPos(pos);
|
381 |
109373
| bis.skip(len);
|
382 |
| } |
383 |
| |
384 |
55170
| if (element && !interrupt) {
|
385 |
| |
386 |
44052
| content.endElement(nsURI != null ? nsURI : "", localName, elemName);
|
387 |
| |
388 |
| |
389 |
44052
| for (int i = 0; i < nsMapCount; i++) {
|
390 |
84
| content.endPrefixMapping(mappedPrefixes[i]);
|
391 |
| } |
392 |
| } |
393 |
| |
394 |
55170
| return !interrupt;
|
395 |
| } |
396 |
| |
397 |
11118
| public boolean start() throws IOException, SAXException {
|
398 |
11118
| if (comp != null) {
|
399 |
11100
| comp.symbols(symbols);
|
400 |
11100
| comp.dataBytes(data);
|
401 |
| } |
402 |
11118
| content.startDocument();
|
403 |
11118
| boolean result = processContainer(false, pos, len);
|
404 |
11118
| if (result) {
|
405 |
11118
| content.endDocument();
|
406 |
| } |
407 |
11118
| return result;
|
408 |
| } |
409 |
| |
410 |
0
| public void stop() {
|
411 |
0
| interrupt = true;
|
412 |
| } |
413 |
| } |
414 |
| |