506 lines
12 KiB
Java
506 lines
12 KiB
Java
|
/*
|
||
|
|
||
|
Copyright 2006 Rene Grothmann, modified by Eric Hakenholz
|
||
|
|
||
|
This file is part of C.a.R. software.
|
||
|
|
||
|
C.a.R. is a free software: you can redistribute it and/or modify
|
||
|
it under the terms of the GNU General Public License as published by
|
||
|
the Free Software Foundation, version 3 of the License.
|
||
|
|
||
|
C.a.R. is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
|
||
|
*/
|
||
|
|
||
|
|
||
|
package rene.util.xml;
|
||
|
|
||
|
import java.io.BufferedInputStream;
|
||
|
import java.io.BufferedReader;
|
||
|
import java.io.ByteArrayInputStream;
|
||
|
import java.io.FileInputStream;
|
||
|
import java.io.IOException;
|
||
|
import java.io.InputStream;
|
||
|
import java.io.InputStreamReader;
|
||
|
import java.io.UnsupportedEncodingException;
|
||
|
|
||
|
import rene.util.SimpleByteBuffer;
|
||
|
import rene.util.SimpleStringBuffer;
|
||
|
import rene.util.list.ListElement;
|
||
|
|
||
|
public class XmlReader {
|
||
|
BufferedReader In;
|
||
|
SimpleStringBuffer buf = new SimpleStringBuffer(10000);
|
||
|
|
||
|
public XmlReader() {
|
||
|
In = null;
|
||
|
}
|
||
|
|
||
|
public XmlReader(final BufferedReader in) {
|
||
|
In = in;
|
||
|
}
|
||
|
|
||
|
public XmlReader(final InputStream in) throws XmlReaderException {
|
||
|
try { // read the file into a buffer
|
||
|
final BufferedInputStream rin = new BufferedInputStream(in);
|
||
|
final SimpleByteBuffer bb = new SimpleByteBuffer(10000);
|
||
|
while (true) {
|
||
|
final int k = rin.read();
|
||
|
if (k < 0)
|
||
|
break;
|
||
|
bb.append((byte) k);
|
||
|
}
|
||
|
rin.close();
|
||
|
final byte b[] = bb.getByteArray();
|
||
|
|
||
|
// Try to open an ASCII stream, or a default stream
|
||
|
ByteArrayInputStream bin = new ByteArrayInputStream(b);
|
||
|
XmlReader R = null;
|
||
|
try {
|
||
|
R = new XmlReader(new BufferedReader(new InputStreamReader(bin,
|
||
|
"ASCII")));
|
||
|
} catch (final UnsupportedEncodingException ex) {
|
||
|
R = new XmlReader(
|
||
|
new BufferedReader(new InputStreamReader(bin)));
|
||
|
}
|
||
|
|
||
|
// Determine the encoding
|
||
|
String Encoding = null;
|
||
|
while (true) {
|
||
|
while (true) {
|
||
|
final int c = R.read();
|
||
|
if (c == -1)
|
||
|
throw new Exception("<?xml> tag not found");
|
||
|
if (c == '<')
|
||
|
break;
|
||
|
}
|
||
|
if (R.found("?xml")) {
|
||
|
String s = R.scanFor("?>");
|
||
|
if (s == null)
|
||
|
throw new Exception("<?xml> tag error");
|
||
|
int n = s.indexOf("encoding=\"");
|
||
|
if (n >= 0) {
|
||
|
n += "encoding=\"".length();
|
||
|
s = s.substring(n);
|
||
|
final int m = s.indexOf('\"');
|
||
|
if (m < 0)
|
||
|
throw new Exception("Closing bracket missing");
|
||
|
Encoding = s.substring(0, m).toUpperCase();
|
||
|
if (Encoding.equals("UTF-8"))
|
||
|
Encoding = "UTF8";
|
||
|
// for IE5 !
|
||
|
break;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Open a stream with this encoding
|
||
|
bin = new ByteArrayInputStream(b);
|
||
|
if (Encoding == null)
|
||
|
In = new BufferedReader(new InputStreamReader(bin));
|
||
|
else
|
||
|
try {
|
||
|
In = new BufferedReader(
|
||
|
new InputStreamReader(bin, Encoding));
|
||
|
} catch (final UnsupportedEncodingException e) {
|
||
|
try {
|
||
|
In = new BufferedReader(new InputStreamReader(bin,
|
||
|
"ASCII"));
|
||
|
} catch (final UnsupportedEncodingException ex) {
|
||
|
In = new BufferedReader(new InputStreamReader(bin));
|
||
|
}
|
||
|
}
|
||
|
} catch (final Exception e) {
|
||
|
throw new XmlReaderException(e.toString());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public void init(final InputStream in) throws XmlReaderException {
|
||
|
try { // read the file into a buffer
|
||
|
|
||
|
final BufferedInputStream rin = new BufferedInputStream(in);
|
||
|
final SimpleByteBuffer bb = new SimpleByteBuffer(10000);
|
||
|
while (true) {
|
||
|
final int k = rin.read();
|
||
|
if (k < 0)
|
||
|
break;
|
||
|
bb.append((byte) k);
|
||
|
}
|
||
|
rin.close();
|
||
|
final byte b[] = bb.getByteArray();
|
||
|
|
||
|
// Try to open an ASCII stream, or a default stream
|
||
|
ByteArrayInputStream bin = new ByteArrayInputStream(b);
|
||
|
XmlReader R = null;
|
||
|
try {
|
||
|
R = new XmlReader(new BufferedReader(new InputStreamReader(bin,
|
||
|
"ASCII")));
|
||
|
} catch (final UnsupportedEncodingException ex) {
|
||
|
R = new XmlReader(
|
||
|
new BufferedReader(new InputStreamReader(bin)));
|
||
|
}
|
||
|
|
||
|
// Determine the encoding
|
||
|
String Encoding = null;
|
||
|
while (true) {
|
||
|
while (true) {
|
||
|
final int c = R.read();
|
||
|
if (c == -1)
|
||
|
throw new Exception("<?xml> tag not found");
|
||
|
if (c == '<')
|
||
|
break;
|
||
|
}
|
||
|
if (R.found("?xml")) {
|
||
|
String s = R.scanFor("?>");
|
||
|
if (s == null)
|
||
|
throw new Exception("<?xml> tag error");
|
||
|
int n = s.indexOf("encoding=\"");
|
||
|
if (n >= 0) {
|
||
|
n += "encoding=\"".length();
|
||
|
s = s.substring(n);
|
||
|
final int m = s.indexOf('\"');
|
||
|
if (m < 0)
|
||
|
throw new Exception("Closing bracket missing");
|
||
|
Encoding = s.substring(0, m).toUpperCase();
|
||
|
if (Encoding.equals("UTF-8"))
|
||
|
Encoding = "UTF8";
|
||
|
// for IE5 !
|
||
|
break;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Open a stream with this encoding
|
||
|
bin = new ByteArrayInputStream(b);
|
||
|
if (Encoding == null)
|
||
|
In = new BufferedReader(new InputStreamReader(bin));
|
||
|
else
|
||
|
try {
|
||
|
In = new BufferedReader(
|
||
|
new InputStreamReader(bin, Encoding));
|
||
|
} catch (final UnsupportedEncodingException e) {
|
||
|
try {
|
||
|
In = new BufferedReader(new InputStreamReader(bin,
|
||
|
"ASCII"));
|
||
|
} catch (final UnsupportedEncodingException ex) {
|
||
|
In = new BufferedReader(new InputStreamReader(bin));
|
||
|
}
|
||
|
}
|
||
|
} catch (final Exception e) {
|
||
|
throw new XmlReaderException(e.toString());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Scan an xml file. This function reads, until <?xml is found. then it
|
||
|
* skips this declaration and scans the rest of the items.
|
||
|
*/
|
||
|
public XmlTree scan() throws XmlReaderException {
|
||
|
while (true) {
|
||
|
while (true) {
|
||
|
final int c = read();
|
||
|
if (c == -1)
|
||
|
return null;
|
||
|
if (c == '<')
|
||
|
break;
|
||
|
}
|
||
|
if (found("?xml")) {
|
||
|
final String s = scanFor("?>");
|
||
|
if (s == null)
|
||
|
return null;
|
||
|
final XmlTree t = new XmlTree(new XmlTagRoot());
|
||
|
t.addchild(new XmlTree(new XmlTagPI(s)));
|
||
|
scanContent(t);
|
||
|
return t;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public void scanContent(final XmlTree t) throws XmlReaderException { // System.out.println("Sanning for "+t.getTag().name()+" ("+
|
||
|
// t.getTag().countParams()+")");
|
||
|
while (true) {
|
||
|
String s = scanFor('<');
|
||
|
if (s == null) {
|
||
|
if (t.getTag() instanceof XmlTagRoot)
|
||
|
return;
|
||
|
exception("File ended surprisingly");
|
||
|
}
|
||
|
if (!empty(s)) {
|
||
|
t
|
||
|
.addchild(new XmlTree(new XmlTagText(XmlTranslator
|
||
|
.toText(s))));
|
||
|
}
|
||
|
if (found("!--")) {
|
||
|
s = scanFor("-->");
|
||
|
continue;
|
||
|
}
|
||
|
if (found("!")) {
|
||
|
s = scanTagFor('>');
|
||
|
continue;
|
||
|
}
|
||
|
if (found("?")) {
|
||
|
s = scanTagFor("?>");
|
||
|
t.addchild(new XmlTree(new XmlTagPI(s)));
|
||
|
continue;
|
||
|
}
|
||
|
s = scanTagFor('>');
|
||
|
if (s == null)
|
||
|
exception("> missing");
|
||
|
if (s.startsWith("/")) {
|
||
|
if (s.substring(1).equals(t.getTag().name()))
|
||
|
return;
|
||
|
else
|
||
|
exception("End tag without start tag");
|
||
|
}
|
||
|
if (s.endsWith("/")) {
|
||
|
t.addchild(new XmlTree(new XmlTag(s
|
||
|
.substring(0, s.length() - 1))));
|
||
|
} else {
|
||
|
final XmlTree t0 = new XmlTree(new XmlTag(s));
|
||
|
scanContent(t0);
|
||
|
t.addchild(t0);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public boolean empty(final String s) {
|
||
|
final int n = s.length();
|
||
|
for (int i = 0; i < n; i++) {
|
||
|
final char c = s.charAt(i);
|
||
|
if (c != ' ' && c != '\n' && c != '\t')
|
||
|
return false;
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Skip Blanks.
|
||
|
*
|
||
|
* @return Non-blank character or -1 for EOF.
|
||
|
*/
|
||
|
public int skipBlanks() throws XmlReaderException {
|
||
|
while (true) {
|
||
|
final int c = read();
|
||
|
if (c == ' ' || c == '\t' || c == '\n')
|
||
|
continue;
|
||
|
else
|
||
|
return c;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Scan for an end character.
|
||
|
*
|
||
|
* @return String between the current position and the end character, or
|
||
|
* null.
|
||
|
*/
|
||
|
public String scanFor(final char end) throws XmlReaderException {
|
||
|
buf.clear();
|
||
|
int c = read();
|
||
|
while (c != end) {
|
||
|
buf.append((char) c);
|
||
|
c = read();
|
||
|
if (c < 0)
|
||
|
return null;
|
||
|
}
|
||
|
return buf.toString();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Scan for a specific string.
|
||
|
*
|
||
|
* @return String between the current position and the string.
|
||
|
*/
|
||
|
public String scanFor(final String s) throws XmlReaderException {
|
||
|
buf.clear();
|
||
|
while (!found(s)) {
|
||
|
final int c = read();
|
||
|
if (c < 0)
|
||
|
return null;
|
||
|
buf.append((char) c);
|
||
|
}
|
||
|
for (int i = 0; i < s.length(); i++)
|
||
|
read();
|
||
|
return buf.toString();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Scan tag for an end character (interpreting " and ')
|
||
|
*
|
||
|
* @return String between the current position and the end character, or
|
||
|
* null.
|
||
|
*/
|
||
|
public String scanTagFor(final char end) throws XmlReaderException {
|
||
|
buf.clear();
|
||
|
int c = read();
|
||
|
while (c != end) {
|
||
|
if (c == '\"') {
|
||
|
buf.append((char) c);
|
||
|
while (true) {
|
||
|
c = read();
|
||
|
if (c < 0)
|
||
|
return null;
|
||
|
if (c == '\"')
|
||
|
break;
|
||
|
buf.append((char) c);
|
||
|
}
|
||
|
buf.append((char) c);
|
||
|
} else if (c == '\'') {
|
||
|
buf.append((char) c);
|
||
|
while (true) {
|
||
|
c = read();
|
||
|
if (c < 0)
|
||
|
return null;
|
||
|
if (c == '\'')
|
||
|
break;
|
||
|
buf.append((char) c);
|
||
|
}
|
||
|
buf.append((char) c);
|
||
|
} else
|
||
|
buf.append((char) c);
|
||
|
c = read();
|
||
|
if (c < 0)
|
||
|
return null;
|
||
|
}
|
||
|
return buf.toString();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Scan tag for a specific string (interpreting " and ')
|
||
|
*
|
||
|
* @return String between the current position and the string.
|
||
|
*/
|
||
|
public String scanTagFor(final String s) throws XmlReaderException {
|
||
|
buf.clear();
|
||
|
while (!found(s)) {
|
||
|
int c = read();
|
||
|
if (c < 0)
|
||
|
return null;
|
||
|
if (c == '\"') {
|
||
|
buf.append((char) c);
|
||
|
while (true) {
|
||
|
c = read();
|
||
|
if (c < 0)
|
||
|
return null;
|
||
|
if (c == '\"')
|
||
|
break;
|
||
|
buf.append((char) c);
|
||
|
}
|
||
|
buf.append((char) c);
|
||
|
} else if (c == '\'') {
|
||
|
buf.append((char) c);
|
||
|
while (true) {
|
||
|
c = read();
|
||
|
if (c < 0)
|
||
|
return null;
|
||
|
if (c == '\'')
|
||
|
break;
|
||
|
buf.append((char) c);
|
||
|
}
|
||
|
buf.append((char) c);
|
||
|
} else
|
||
|
buf.append((char) c);
|
||
|
}
|
||
|
for (int i = 0; i < s.length(); i++)
|
||
|
read();
|
||
|
return buf.toString();
|
||
|
}
|
||
|
|
||
|
String Line = null;
|
||
|
int LinePos;
|
||
|
|
||
|
public int read() throws XmlReaderException {
|
||
|
try {
|
||
|
if (Line == null) {
|
||
|
Line = In.readLine();
|
||
|
LinePos = 0;
|
||
|
// System.out.println("Read --> "+Line);
|
||
|
}
|
||
|
if (Line == null)
|
||
|
return -1;
|
||
|
if (LinePos >= Line.length()) {
|
||
|
Line = null;
|
||
|
return '\n';
|
||
|
}
|
||
|
return Line.charAt(LinePos++);
|
||
|
} catch (final Exception e) {
|
||
|
return -1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @return If the string is at the current line position.
|
||
|
*/
|
||
|
public boolean found(final String s) {
|
||
|
final int n = s.length();
|
||
|
if (LinePos + n > Line.length())
|
||
|
return false;
|
||
|
for (int i = 0; i < n; i++)
|
||
|
if (s.charAt(i) != Line.charAt(LinePos + i))
|
||
|
return false;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
public void exception(final String s) throws XmlReaderException {
|
||
|
throw new XmlReaderException(s, Line, LinePos);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* A test program.
|
||
|
*/
|
||
|
public static void main(final String args[]) {
|
||
|
try {
|
||
|
final BufferedReader in = new BufferedReader(new InputStreamReader(
|
||
|
new FileInputStream("rene\\util\\xml\\test.xml"), "UTF8"));
|
||
|
final XmlReader reader = new XmlReader(in);
|
||
|
final XmlTree tree = reader.scan();
|
||
|
in.close();
|
||
|
print(tree);
|
||
|
} catch (final XmlReaderException e) {
|
||
|
System.out.println(e.toString() + "\n" + e.getLine() + "\n"
|
||
|
+ "Position : " + e.getPos());
|
||
|
} catch (final IOException e) {
|
||
|
System.out.println(e);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public static void print(final XmlTree t) {
|
||
|
final XmlTag tag = t.getTag();
|
||
|
System.out.print("<" + tag.name());
|
||
|
for (int i = 0; i < tag.countParams(); i++) {
|
||
|
System.out.print(" " + tag.getParam(i) + "=\"" + tag.getValue(i)
|
||
|
+ "\"");
|
||
|
}
|
||
|
System.out.println(">");
|
||
|
ListElement el = t.children().first();
|
||
|
while (el != null) {
|
||
|
print((XmlTree) (el.content()));
|
||
|
el = el.next();
|
||
|
}
|
||
|
System.out.println("</" + tag.name() + ">");
|
||
|
}
|
||
|
|
||
|
public static boolean testXml(final String s) {
|
||
|
int i = 0;
|
||
|
while (i < s.length()) {
|
||
|
final char c = s.charAt(i);
|
||
|
if (c == '<')
|
||
|
break;
|
||
|
i++;
|
||
|
}
|
||
|
if (i >= s.length())
|
||
|
return false;
|
||
|
if (s.substring(i).startsWith("<?xml"))
|
||
|
return true;
|
||
|
return false;
|
||
|
}
|
||
|
}
|