Search...
FAQs
Subscribe
Pie
FAQs
Recent topics
Flagged topics
Hot topics
Best topics
Search...
Search within Sockets and Internet Protocols
Search Coderanch
Advance search
Google search
Register / Login
Bookmark Topic
Watch Topic
New Topic
programming forums
Java
Mobile
Certification
Databases
Caching
Books
Engineering
Micro Controllers
OS
Languages
Paradigms
IDEs
Build Tools
Frameworks
Application Servers
Open Source
This Site
Careers
Other
Pie Elite
all forums
this forum made possible by our volunteer staff, including ...
Marshals:
Campbell Ritchie
Tim Cooke
paul wheaton
Jeanne Boyarsky
Ron McLeod
Sheriffs:
Paul Clapham
Liutauras Vilda
Devaka Cooray
Saloon Keepers:
Tim Holloway
Roland Mueller
Bartenders:
Forum:
Sockets and Internet Protocols
parsing data and storing in the xml
shan rast
Greenhorn
Posts: 7
posted 15 years ago
Number of slices to send:
Optional 'thank-you' note:
Send
////
<
import java.io.*; import java.net.*; import java.util.regex.*; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Result; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.TransformerFactoryConfigurationError; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.w3c.dom.Attr; import org.w3c.dom.Comment; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; import org.xml.sax.InputSource; import org.xml.sax.SAXException; public class Spider{ Document document; public static void main(String args[]){ try { URL url = new URL(&quot;http://www.csc.liv.ac.uk/teaching/modules/bscmodulesyr3.html&quot;); URLConnection urlConnection = url.openConnection(); DataInputStream dis = new DataInputStream(urlConnection.getInputStream()); String html= &quot;&quot;, tmp = &quot;&quot;; // read all HTML source from given URL while ((tmp = dis.readLine()) != null) { html += &quot; &quot;+tmp; } dis.close(); // replace all white spaces region with single space html = html.replaceAll(&quot;\\s+&quot;, &quot; &quot;); // build the pattern using regular expression Pattern p = Pattern.compile(&quot;&lt;td width=[^&gt;]*&gt;&quot;+&quot;.*?&lt;A HREF=\&quot;&quot;+&quot;([^\&quot;]+)&quot;+&quot;\&quot;[^&gt;]*&gt;&quot;+&quot;([^&lt;]+)&quot; + &quot;&amp;nbsp;.*?&amp;nbsp;&quot;+&quot;([^&lt;]+)&quot;+&quot;&lt;/A[^&gt;]*&gt;&quot;); // Match the pattern with given html source Matcher m = p.matcher(html); // Get all matches that matched my pattern while (m.find() == true){ // Print the first matched pattern System.out.println(m.group(1)); System.out.println(m.group(2)); System.out.println(m.group(3)); // String s=m.group(1); String s=&quot;http://www.csc.liv.ac.uk&quot;+m.group(1); //System.out.println(s); URL url1 = new URL(s); URLConnection urlConnection1 = url1.openConnection(); DataInputStream dis1 = new DataInputStream(urlConnection1.getInputStream()); String html1= &quot;&quot;, tmp1 = &quot;&quot;; // read all HTML source from given URL while ((tmp1 = dis1.readLine()) != null) { html1 += &quot; &quot;+tmp1; } dis1.close(); html1 = html1.replaceAll(&quot;\\s+&quot;, &quot; &quot;); Pattern p1 = Pattern.compile(&quot;&lt;h2[^&gt;]*&gt;&quot;+&quot;.*?&lt;a href=\&quot;&quot;+&quot;([^\&quot;]+)&quot;+&quot;\&quot;[^&gt;]*&gt;&quot;+&quot;([^&lt;]+)&quot;+&quot;&lt;/a[^&gt;]*&gt;&quot;); Matcher m1 = p1.matcher(html1); // Get all matches that matched my pattern while (m1.find() == true){ // Print the first matched pattern System.out.println(m1.group(2)); } //Modules mod=new Modules(m1.group(2),m.group(1),m.group(3),m.group(2)); BuildXmlUsingDom b=new BuildXmlUsingDom(m.group(1), m.group(2), m.group(3), m1.group(2));///ERROR HERE I need to parse data from html page and /////store it in xml my data is parsed and only what i need is the each time the loop works the value should go in .xml but its giving error IllegalState Exception ////No match found /////at java.util.regex.Matcher.group(Unknown Source) ///at Spider.main(Spider.java:91) //b.createModules(document, m.group(1), m.group(2), m.group(3), m1.group(2)); } }catch (Exception e) { e.printStackTrace(); } } } class Modules { private String courseurl; private String title; private String code; private String lecturer; public String getCourseurl() { return courseurl; } public void setCourseurl(String courseurl) { this.courseurl = courseurl; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getCode() { return code; } public void setCode(String code) { this.code = code; } public String getLecturer() { return lecturer; } public void setLecturer(String lecturer) { this.lecturer = lecturer; } public Modules(String lecturer1, String courseurl1, String title1,String code1) { lecturer = lecturer1; courseurl = courseurl1; title = title1; code=code1; } public String toString() { return lecturer + &quot;\t&quot; + courseurl + &quot;\t&quot; + title + &quot;\t&quot; + code; } } class BuildXmlUsingDom { private Document document; String a,b,c,d; public BuildXmlUsingDom(String l,String m,String n,String o) { DocumentBuilderFactory dBF = DocumentBuilderFactory.newInstance(); try { DocumentBuilder builder = dBF.newDocumentBuilder(); // java xml documentbuilder document = builder.newDocument(); } catch (ParserConfigurationException parserException) { parserException.printStackTrace(); } Element root = document.createElement(&quot;Modules&quot;); document.appendChild(root); // add comment to XML document Comment simpleComment = document.createComment(&quot;Modules details&quot;); root.appendChild(simpleComment); // add Employee child elements Node modulesNode = createModules(document,l,m,n,o); root.appendChild(modulesNode); // We can also add processing instructions and java xml cdata Sections as shown // write the XML document to disk try { // create DOMSource for source XML document Source xmlSource = new DOMSource(document); // create StreamResult for transformation result Result result = new StreamResult(new FileOutputStream( &quot;modules.xml&quot;)); // create TransformerFactory TransformerFactory transformerFactory = TransformerFactory .newInstance(); // create Transformer for transformation Transformer transformer = transformerFactory.newTransformer(); transformer.setOutputProperty(&quot;indent&quot;, &quot;yes&quot;); //Java XML Indent // transform and deliver content to client transformer.transform(xmlSource, result); } // handle exception creating TransformerFactory catch (TransformerFactoryConfigurationError factoryError) { System.err.println(&quot;Error creating &quot; + &quot;TransformerFactory&quot;); factoryError.printStackTrace(); } catch (TransformerException transformerError) { System.err.println(&quot;Error transforming document&quot;); transformerError.printStackTrace(); } catch (IOException ioException) { ioException.printStackTrace(); } } public Node createModules(Document document,String u,String c,String t,String l) { // create FirstName and LastName elements Element module=document.createElement(&quot;module&quot;); Element url = document.createElement(&quot;url&quot;); //Element code = document.createElement(&quot;code&quot;); Element title = document.createElement(&quot;title&quot;); Element lecturer = document.createElement(&quot;lecturer&quot;); // java xml text title.appendChild(document.createTextNode(t)); url.appendChild(document.createTextNode(u)); lecturer.appendChild(document.createTextNode(l)); // create employee element Element modules = document.createElement(&quot;Modules&quot;); // java xml attributes Attr moduleAttribute = document.createAttribute(&quot;name&quot;); moduleAttribute.setValue(c); // append attribute to contact element modules.setAttributeNode(moduleAttribute); modules.appendChild(module); module.appendChild(title); module.appendChild(lecturer); module.appendChild(url); return modules; } // Function to read DOM Tree from File /*public Document readingXMLFromFile() { DocumentBuilderFactory dBF = DocumentBuilderFactory.newInstance(); dBF.setIgnoringComments(true); // Ignore the comments present in the // XML File when reading the xml DocumentBuilder builder = null; try { builder = dBF.newDocumentBuilder(); } catch (ParserConfigurationException e) { e.printStackTrace(); } InputSource input = new InputSource(&quot;Employee.xml&quot;); Document doc = null; try { doc = builder.parse(input); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return doc; } /* * Employee.xml * * &lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt; &lt;Employees&gt; &lt;!--Employee * Details--&gt; &lt;Employee gender=&quot;M&quot;&gt; &lt;FirstName&gt;Shawn&lt;/FirstName&gt; * &lt;LastName&gt;Michaels&lt;/LastName&gt; &lt;/Employee&gt; &lt;/Employees&gt; * */ /*public ArrayList CreateListOfObjects(Document doc) { ArrayList&lt;Employees&gt; employeesList = new ArrayList&lt;Employees&gt;(); Element root = doc.getDocumentElement(); // Retrieve the list of Empolyee Nodes NodeList employeeList = root.getElementsByTagName(&quot;Employee&quot;); for (int i = 0; i &lt; employeeList.getLength(); i++) { Element employeeElement = (Element) employeeList.item(i); String gender = employeeElement.getAttribute(&quot;gender&quot;); // java xml nodelist NodeList firstNameList = employeeElement .getElementsByTagName(&quot;FirstName&quot;); Element firstNameElement = (Element) firstNameList.item(0); Text firstNameText = (Text) firstNameElement.getFirstChild(); String fName = firstNameText.getNodeValue(); NodeList lastNameList = employeeElement .getElementsByTagName(&quot;LastName&quot;); Element lastNameElement = (Element) lastNameList.item(0); Text lastNameText = (Text) lastNameElement.getFirstChild(); String lName = lastNameText.getNodeValue(); Modules mod1 = new Modules(lecturer1,course1,title); employeesList.add(emp); } return employeesList; }*/ }
>///////
Joe Ess
Bartender
Posts: 9626
16
I like...
posted 15 years ago
Number of slices to send:
Optional 'thank-you' note:
Send
Please do not post the
same question
more than once.
You would do well to look over our FAQ,
HowToAskQuestionsOnJavaRanch
. If I had not read your previous post, I would not know what you wanted from us, just posting a bunch of code.
[
How To Ask Questions On JavaRanch
]
If we don't do the shopping, we won't have anything for dinner. And I've invited this tiny ad:
Smokeless wood heat with a rocket mass heater
https://woodheat.net
Bookmark Topic
Watch Topic
New Topic
Boost this thread!
Similar Threads
create a xml from getting values from html forms
Implementing HttpSession in the Spring MVC
java.lang.ClassCastException: com.sun.org.apache.xerces.internal.dom.DocumentImpl
Special Characters
file downloading concept
Building a Better World in your Backyard by Paul Wheaton and Shawn Klassen-Koop
More...