截圖效果:
源程序分別有四個文件:
CheckLinks.java
package org.qyclass;
import java.awt.*;
import javax.swing.*;
import java.net.*;
import java.io.*;
public class CheckLinks extends javax.swing.JFrame implements Runnable, ISpiderReportable {
public CheckLinks() {
//{{INIT_CONTROLS
setTitle("找到死鏈接");
getContentPane().setLayout(null);
setSize(405, 288);
setVisible(false);
label1.setText("輸入一個URL:");
getContentPane().add(label1);
label1.setBounds(12, 12, 84, 12);
begin.setText("Begin");
begin.setActionCommand("Begin");
getContentPane().add(begin);
begin.setBounds(12, 36, 84, 24);
getContentPane().add(url);
url.setBounds(108, 36, 288, 24);
errorScroll.setAutoscrolls(true);
errorScroll.setHorizontalScrollBarPolicy(javax.swing.ScrollPaneC*****tants.HORIZONTAL_SCROLLBAR_ALWAYS);
errorScroll.setVerticalScrollBarPolicy(javax.swing.ScrollPaneC*****tants.VERTICAL_SCROLLBAR_ALWAYS);
errorScroll.setOpaque(true);
getContentPane().add(errorScroll);
errorScroll.setBounds(12, 120, 384, 156);
errors.setEditable(false);
errorScroll.getViewport().add(errors);
errors.setBounds(0, 0, 366, 138);
current.setText("當前處理進度:");
getContentPane().add(current);
current.setBounds(12, 72, 384, 12);
goodLinksLabel.setText("正常鏈接:0");
getContentPane().add(goodLinksLabel);
goodLinksLabel.setBounds(12, 96, 192, 12);
badLinksLabel.setText("死鏈接:0");
getContentPane().add(badLinksLabel);
badLinksLabel.setBounds(216, 96, 96, 12);
//}}
//{{INIT_MENUS
//}}
//{{REGISTER_LISTENERS
SymAction lSymAction = new SymAction();
begin.addActionListener(lSymAction);
//}}
}
/**
*參數args未使用
*/
static public void main(String args[]) {
(new CheckLinks()).setVisible(true);
}
/**
*添加通知
*/
@Override
public void addNotify() {
//記錄窗口尺寸并調用父類的addNotify.
Dimension size = getSize();
super.addNotify();
if (frameSizeAdjusted) {
return;
}
frameSizeAdjusted = true;
//根據菜單欄等調整Frame尺寸
Insets insets = getInsets();
javax.swing.JMenuBar menuBar = getRootPane().getJMenuBar();
int menuBarHeight = 0;
if (menuBar != null) {
menuBarHeight = menuBar.getPreferredSize().height;
}
setSize(insets.left + insets.right + size.width, insets.top +
insets.bottom + size.height +
menuBarHeight);
}
boolean frameSizeAdjusted = false;
//{{
javax.swing.JLabel label1 = new javax.swing.JLabel();
javax.swing.JButton begin = new javax.swing.JButton();
javax.swing.JTextField url = new javax.swing.JTextField();
javax.swing.JScrollPane errorScroll = new javax.swing.JScrollPane();
/**
*存儲錯誤信息
*/
javax.swing.JTextArea errors = new javax.swing.JTextArea();
javax.swing.JLabel current = new javax.swing.JLabel();
javax.swing.JLabel goodLinksLabel = new javax.swing.JLabel();
javax.swing.JLabel badLinksLabel = new javax.swing.JLabel();
//}}
//{{
//
/**
*后臺蜘蛛線程
*/
protected Thread backgroundThread;
protected Spider spider;
protected URL base;
protected int badLinksCount = 0;
protected int goodLinksCount = 0;
/**
*用于分發事件的內部類
*/
class SymAction implements java.awt.event.ActionListener {
public void actionPerformed(java.awt.event.ActionEvent event) {
Object object = event.getSource();
if (object == begin) {
begin_actionPerformed(event);
}
}
}
/**
*當begin或cancel按鈕被點擊時調用
*
*參數event與按鈕相連
*/
void begin_actionPerformed(java.awt.event.ActionEvent event) {
if (backgroundThread == null) {
begin.setLabel("Cancel");
backgroundThread = new Thread(this);
backgroundThread.start();
goodLinksCount = 0;
badLinksCount = 0;
} else {
spider.cancel();
}
}
/**
*執行后臺線程操作
*/
public void run() {
try {
errors.setText("");
spider = new Spider(this);
spider.clear();
base = new URL(url.getText());
spider.addURL(base);
spider.begin();
Runnable doLater = new Runnable() {
public void run() {
begin.setText("Begin");
}
};
SwingUtilities.invokeLater(doLater);
backgroundThread = null;
} catch (MalformedURLException e) {
UpdateErrors err = new UpdateErrors();
err.msg = "錯誤地址。";
SwingUtilities.invokeLater(err);
}
}
/**
*當找到某一URL時由蜘蛛調用,在此驗證鏈接。
*
*參數base是找到鏈接時的頁面
*參數url是鏈接地址
*/
public boolean spiderFoundURL(URL base, URL url) {
UpdateCurrentStats cs = new UpdateCurrentStats();
cs.msg = url.toString();
SwingUtilities.invokeLater(cs);
if (!checkLink(url)) {
UpdateErrors err = new UpdateErrors();
err.msg = url + "(on page " + base + ")\n";
SwingUtilities.invokeLater(err);
badLinksCount++;
return false;
}
UpdateErrors err = new UpdateErrors();
err.msg = url.toString()+"\n";
SwingUtilities.invokeLater(err);
goodLinksCount++;
if (!url.getHost().equalsIgnoreCase(base.getHost())) {
return false;
} else {
return true;
}
}
/**
*當發現URL錯誤時調用
*
*參數url是導致錯誤的URL
*/
public void spiderURLError(URL url) {
}
/**
*由內部調用檢查鏈接是否有效
*
*參數url是被檢查的鏈接
*返回True表示鏈接正常有效
*/
protected boolean checkLink(URL url) {
try {
URLConnection connection = url.openConnection();
connection.connect();
return true;
} catch (IOException e) {
return false;
}
}
/**
*當蜘蛛找到電子郵件地址時調用
*
*參數email為找到的電子郵件地址
*/
public void spiderFoundEMail(String email) {
}
/**
*以線程安全方式更新錯誤信息的內部類
*/
class UpdateErrors implements Runnable {
public String msg;
public void run() {
errors.append(msg);
}
}
/**
*以線程安全方式更新當前狀態信息
*/
class UpdateCurrentStats implements Runnable {
public String msg;
public void run() {
current.setText("當前進度:" + msg);
goodLinksLabel.setText("正常鏈接:" + goodLinksCount);
badLinksLabel.setText("死鏈接:" + badLinksCount);
}
}
}
HTMLParse.java
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.qyclass;
import javax.swing.text.html.*;
public class HTMLParse extends HTMLEditorKit {
public HTMLEditorKit.Parser getParser()
{
return super.getParser();
}
}
復制代碼
ISpiderReportable.java
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.qyclass;
import java.net.*;
interface ISpiderReportable {
public boolean spiderFoundURL(URL base,URL url);
public void spiderURLError(URL url);
public void spiderFoundEMail(String email);
}
復制代碼
Spider.java
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.qyclass;
import java.util.*;
import java.net.*;
import java.io.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
?
public class Spider {
/**
?? *導致錯誤的URL集合
&
nbsp;? */
protected Collection workloadError = new ArrayList(3);
/**
?? *等待區URL集合
?? */
protected Collection workloadWaiting = new ArrayList(3);
/**
?? *處理過的URL集合
?? */
protected Collection workloadProcessed = new ArrayList(3);
protected ISpiderReportable report;
/**
?? *表明處理過程是否應取消的標志
?? */
protected boolean cancel = false;
/**
?? *構造函數
?? *
?? *參數report為實現了ISpiderReportable接口的類
?? */
public Spider(ISpiderReportable report)
{
??? this.report = report;
}
/**
?? *獲取導致錯誤的URL
?? */
public Collection getWorkloadError()
{
??? return workloadError;
}
/**
?? *獲取在等待的URL
?? *應添加至少一個URL到此集合以啟動蜘蛛
?? */
public Collection getWorkloadWaiting()
{
??? return workloadWaiting;
}
/**
?? *獲取被處理過的URL
?? */
public Collection getWorkloadProcessed()
{
??? return workloadProcessed;
}
/**
?? *清空所有
?? */
public void clear()
{
??? getWorkloadError().clear();
??? getWorkloadWaiting().clear();
??? getWorkloadProcessed().clear();
}
/**
?? *設置一標志,使begin方法在完成之前返回
?? */
public void cancel()
{
??? cancel = true;
}
?
public void addURL(URL url)
{
??? if ( getWorkloadWaiting().contains(url) )
????? return;
??? if ( getWorkloadError().contains(url) )
????? return;
??? if ( getWorkloadProcessed().contains(url) )
????? return;
??? log("正添加到工作區:" + url );
??? getWorkloadWaiting().add(url);
}
?
public void processURL(URL url)
{
??? try {
????? log("正在處理:" + url );
????? //獲取URL的內容
????? URLConnection connection = url.openConnection();
????? if ( (connection.getContentType()!=null) &&
?????????? !connection.getContentType().toLowerCase().startsWith("text/") ) {
??????? getWorkloadWaiting().remove(url);
??????? getWorkloadProcessed().add(url);
??????? log("不會進行正理,因為類型為:" +
???????????? connection.getContentType() );
??????? return;
????? }
????? //讀取URL
????? InputStream is = connection.getInputStream();
????? Reader r = new InputStreamReader(is);
????? //解析URL
????? HTMLEditorKit.Parser parse = new HTMLParse().getParser();
????? parse.parse(r,new Parser(url),true);
??? } catch ( IOException e ) {
????? getWorkloadWaiting().remove(url);
????? getWorkloadError().add(url);
????? log("錯誤:" + url );
????? report.spiderURLError(url);
????? return;
??? }
??? //標記此URL已完成
??? getWorkloadWaiting().remove(url);
??? getWorkloadProcessed().add(url);
??? log("已完成:" + url );
? }
?
public void begin()
{
??? cancel = false;
??? while ( !getWorkloadWaiting().isEmpty() && !cancel ) {
????? Object list[] = getWorkloadWaiting().toArray();
????? for ( int i=0;(i<list.length)&&!cancel;i++ )
??????? processURL((URL)list[i]);
??? }
}
/**
*HTML解析器回調函數
*/
protected class Parser
extends HTMLEditorKit.ParserCallback {
??? protected URL base;
??? public Parser(URL base)
??? {
????? this.base = base;
??? }
??? public void handleSimpleTag(HTML.Tag t,
??????????????????????????????? MutableAttributeSet a,int pos)
??? {
????? String href = (String)a.getAttribute(HTML.Attribute.HREF);
????? if( (href==null) && (t==HTML.Tag.FRAME) )
??????? href = (String)a.getAttribute(HTML.Attribute.SRC);
????? if ( href==null )
????????? return;
????? int i = href.indexOf("#");
????? if ( i!=-1 )
??????? href = href.substring(0,i);
????? if ( href.toLowerCase().startsWith("mailto:") ) {
??????? report.spiderFoundEMail(href);
????? return;
????? }
????? handleLink(base,href);
??? }
??? public void handleStartTag(HTML.Tag t,
?????????????????????????????? MutableAttributeSet a,int pos)
??? {
????? handleSimpleTag(t,a,pos);//以同樣的方式處理
??? }
??? protected void handleLink(URL base,String str)
??? {
????? try {
??????? URL url = new URL(base,str);
??????? if ( report.spiderFoundURL(base,url) )
????????? addURL(url);
????? } catch ( MalformedURLException e ) {
??????? log("找到畸形URL:" + str );
????? }
??? }
}
/**
?? *由內部調用來記錄信息
?? *僅是把日志寫到標準輸出
?? *
?? *參數entry為寫到日志的信息
?? */
public void log(String entry)
{
??? System.out.println( (new Date()) + ":" + entry );
}
}
原帖地址:
http://www.phpjava.org/thread-81-1-1.html
本文來自: PJDN--php&Java論壇|技術交流社區,打造中國php&java開發者社區[www.phpjava.org]
更多文章、技術交流、商務合作、聯系博主
微信掃碼或搜索:z360901061

微信掃一掃加我為好友
QQ號聯系: 360901061
您的支持是博主寫作最大的動力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點擊下面給點支持吧,站長非常感激您!手機微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元
