目录

java抓取google查询页面,获得hit数

目录

java抓取google查询页面,获得hit数

import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; public class Test { /**

  • @param args / public static void main(String[] args) { URL url; try { url = new URL(“ HttpURLConnection connection=(HttpURLConnection) url.openConnection(); connection.setConnectTimeout(12000); connection.setRequestProperty(“User-Agent”, “Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)”); connection.setRequestProperty(“Accept”, " images/gif, image/x-xbitmap, image/jpeg ,/*”); connection.setRequestProperty(“Accept-Encoding”, “deflate”); connection.setRequestProperty(“Accept-Charset”, “null”); connection.setRequestProperty(“Connection”, “Keep-Alive”); connection.setDoInput(true); connection.setRequestMethod(“GET”); connection.connect(); BufferedReader reader=new BufferedReader(new InputStreamReader(connection.getInputStream())); String line=null; BufferedWriter writer=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(“a.html”))); while((line=reader.readLine())!=null) { if (line.indexOf(“約有 “) != -1) { String count = line.substring(line.indexOf(“約有 “) + 3, line.indexOf(” 項結果”)); String t = “”; for (int i = 0; i < count.length(); ++i) { if (count.charAt(i) != ‘,’) { t += count.charAt(i); } } int c = Integer.parseInt(t); System.out.println(c); } } writer.close(); connection.disconnect(); } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }