Hi!
I am using JTIDY to convert a HTML into XML.
I don't want to save xml to file and I wan't to use org.jdom.Document instead of org.w3c.dom.Document.
I tryed to convert the generated file using the code at:
http://www-106.ibm.com/developerworks/java/library/x-tipcdm.html and it works fine.
But this convert from an xml-file and not a org.w3c.dom.Document.
Anyone who knows how I do that?
This is the code for reading HTML.
import java.io.*;
import java.net.URL;
import org.w3c.dom.Document;
import org.w3c.tidy.Tidy;
public class LoadDOM implements Runnable {
private
String url;
private String outFileName;
private String errOutFileName;
private boolean xmlOut;
public LoadDOM(String url, String outFileName,
String errOutFileName, boolean xmlOut) {
this.url = url;
this.outFileName = outFileName;
this.errOutFileName = errOutFileName;
this.xmlOut = xmlOut;
}
public void run() {
URL u;
BufferedInputStream in;
FileOutputStream out;
Tidy tidy = new Tidy();
tidy.setConfigurationFromFile("D:\\jtidy.conf");
tidy.setXmlOut(xmlOut);
try {
tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName), true));
u = new URL(url);
in = new BufferedInputStream(u.openStream());
out = new FileOutputStream(outFileName);
Document
doc = tidy.parseDOM(in, out); //I wan't to convert this
//org.w3c.dom.Document to
//a org.jdom.Document
System.out.println("File saved to:" + outFileName);
} catch (IOException ex) {
ex.printStackTrace();
}
}
public static void main(String[] args) {
LoadDOM t1 = new LoadDOM("http://www.myPage.com",
"D:\\myPage.xml",
"D:\\myPage_error.txt", true);
Thread th1 = new Thread(t1);
th1.start();
}
}