在这里主要通过流分析,通过java模拟访问页面获取到页面的html元素,并通过jsoup来分析获取到的html元素,
然后通过流处理来将图片保存到本机
package getpicture;import java.io.BufferedReader;import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.OutputStreamWriter;import java.net.HttpURLConnection;import java.net.URL;import java.text.SimpleDateFormat;import java.util.Date;import java.util.Scanner;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements; public class getPicture { public static void main(String[] args) { new Thread(new Spider()).start(); }} // 抓网页, 并分析出图片地址class Spider implements Runnable { private String firstUrl = "http://jandan.net/ooxx/page-"; //1111#comments private String connUrl = "#comments"; private int beginIndex = 1115; private String preHtml; //private String testPath="http://www.mop.com/#"; private String mSavePath; public Spider() {}; @Override public void run() { try { URL newURL = new URL(firstUrl + beginIndex + connUrl); //URL newURL = new URL(testPath); HttpURLConnection conn = (HttpURLConnection) newURL.openConnection(); conn.setRequestProperty("Connection","keep-alive"); conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36"); conn.setDoInput(true); conn.setDoOutput(true); OutputStreamWriter out = new OutputStreamWriter(conn.getOutputStream(),"utf-8"); out.flush(); out.close(); InputStream inputStream = conn.getInputStream(); BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "utf-8")); String line; //读取页面html元素 while ((line = reader.readLine()) != null) { preHtml+=line; } System.out.println(preHtml); //当页面访问成功时,解析页面元素,获取页面图片元素 if(conn.getResponseCode()==200){ Document doc=Jsoup.parse(preHtml); Elements elements = doc.select(".row img"); for(Element e : elements) { String imgSrc = e.attr("src"); new Thread(new DownloadImage(imgSrc)).start(); } } }catch(Exception e) { e.printStackTrace(); } }} class DownloadImage implements Runnable { private String imageSrc; private String imageName; public DownloadImage(String imageSrc) { this.imageSrc = imageSrc; } @Override public void run() { String[] splits = imageSrc.split("/"); imageName = splits[splits.length - 1]; Date date=new Date(); SimpleDateFormat sdf=new SimpleDateFormat("yyyyMMdd"); String random=sdf.format(date); File file = new File("E:\\picture\\"+sdf+"\\"+imageName); // 如果路径不存在,则创建 if (!file.getParentFile().exists()) { file.getParentFile().mkdirs(); } //判断文件是否存在,不存在就创建文件 if(!file.exists()&& !file .isDirectory()) { try { file.createNewFile(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } System.out.println("开始下载图片:" + imageName); try { URL newURL = new URL("http:"+imageSrc); HttpURLConnection conn = (HttpURLConnection) newURL.openConnection(); conn.setRequestProperty("Connection","keep-alive"); conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36"); conn.setDoInput(true); conn.setDoOutput(true); //通过输入流获取图片数据 InputStream inputStream = conn.getInputStream(); //BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); byte[] data=new byte[1024]; //创建输出流 FileOutputStream fos = new FileOutputStream(file); int len = 0; //使用一个输入流从buffer里把数据读取出来 while( (len=inputStream.read(data)) != -1 ){ //用输出流往buffer里写入数据,中间参数代表从哪个位置开始读,len代表读取的长度 fos.write(data, 0, len); } fos.flush(); fos.close(); System.out.println("下载完成:" + imageName); }catch(Exception e) { System.err.println(" 这个图片下载不了哇!\n删除妹子" + imageName); return; } }}