public class Site extends Object
PageProcessor| 限定符和类型 | 类和说明 |
|---|---|
static interface |
Site.HeaderConst
已过时。
|
| 构造器和说明 |
|---|
Site() |
| 限定符和类型 | 方法和说明 |
|---|---|
Site |
addCookie(String name,
String value)
Add a cookie with domain
getDomain() |
Site |
addCookie(String domain,
String name,
String value)
Add a cookie with specific domain.
|
Site |
addHeader(String key,
String value)
Put an Http header for downloader.
|
Site |
addStartRequest(Request startRequest)
已过时。
|
Site |
addStartUrl(String startUrl)
已过时。
|
Site |
enableHttpProxyPool() |
boolean |
equals(Object o) |
Set<Integer> |
getAcceptStatCode()
get acceptStatCode
|
Map<String,Map<String,String>> |
getAllCookies()
get cookies of all domains
|
String |
getCharset()
get charset set manually
|
Map<String,String> |
getCookies()
get cookies
|
int |
getCycleRetryTimes()
When cycleRetryTimes is more than 0, it will add back to scheduler and try download again.
|
String |
getDomain()
get domain
|
Map<String,String> |
getHeaders() |
org.apache.http.HttpHost |
getHttpProxy() |
org.apache.http.HttpHost |
getHttpProxyFromPool() |
ProxyPool |
getHttpProxyPool() |
int |
getRetrySleepTime() |
int |
getRetryTimes()
Get retry times immediately when download fail, 0 by default.
|
int |
getSleepTime()
Get the interval between the processing of two pages.
|
List<Request> |
getStartRequests() |
List<String> |
getStartUrls()
已过时。
|
int |
getTimeOut() |
String |
getUserAgent()
get user agent
|
int |
hashCode() |
boolean |
isUseGzip() |
static Site |
me()
new a Site
|
void |
returnHttpProxyToPool(org.apache.http.HttpHost proxy,
int statusCode) |
Site |
setAcceptStatCode(Set<Integer> acceptStatCode)
Set acceptStatCode.
|
Site |
setCharset(String charset)
Set charset of page manually.
|
Site |
setCycleRetryTimes(int cycleRetryTimes)
Set cycleRetryTimes times when download fail, 0 by default.
|
Site |
setDomain(String domain)
set the domain of site.
|
Site |
setHttpProxy(org.apache.http.HttpHost httpProxy)
set up httpProxy for this site
|
Site |
setHttpProxyPool(List<String[]> httpProxyList)
Set httpProxyPool, String[0]:ip, String[1]:port
|
Site |
setProxyReuseInterval(int reuseInterval) |
Site |
setRetrySleepTime(int retrySleepTime)
Set retry sleep times when download fail, 1000 by default.
|
Site |
setRetryTimes(int retryTimes)
Set retry times when download fail, 0 by default.
|
Site |
setSleepTime(int sleepTime)
Set the interval between the processing of two pages.
|
Site |
setTimeOut(int timeOut)
set timeout for downloader in ms
|
Site |
setUseGzip(boolean useGzip)
Whether use gzip.
|
Site |
setUserAgent(String userAgent)
set user agent
|
String |
toString() |
Task |
toTask() |
public static Site me()
public Site addCookie(String name, String value)
getDomain()name - namevalue - valuepublic Site addCookie(String domain, String name, String value)
domain - domainname - namevalue - valuepublic Site setUserAgent(String userAgent)
userAgent - userAgentpublic Map<String,Map<String,String>> getAllCookies()
public String getUserAgent()
public String getDomain()
public Site setCharset(String charset)
charset - charsetpublic String getCharset()
public int getTimeOut()
public Site setTimeOut(int timeOut)
timeOut - timeOutpublic Site setAcceptStatCode(Set<Integer> acceptStatCode)
acceptStatCode - acceptStatCode@Deprecated public List<String> getStartUrls()
getStartRequests()public Site addStartUrl(String startUrl)
Spider.addUrl(String...)}startUrl - startUrlSpider.addUrl(String...)public Site addStartRequest(Request startRequest)
Spider.addRequest(Request...)}startRequest - startRequestSpider.addRequest(Request...)public Site setSleepTime(int sleepTime)
sleepTime - sleepTimepublic int getSleepTime()
public int getRetryTimes()
public Site addHeader(String key, String value)
addCookie(String, String) for cookie and setUserAgent(String) for user-agent. key - key of http header, there are some keys constant in Site.HeaderConstvalue - value of headerpublic Site setRetryTimes(int retryTimes)
retryTimes - retryTimespublic int getCycleRetryTimes()
public Site setCycleRetryTimes(int cycleRetryTimes)
cycleRetryTimes - cycleRetryTimespublic org.apache.http.HttpHost getHttpProxy()
public Site setHttpProxy(org.apache.http.HttpHost httpProxy)
httpProxy - httpProxypublic boolean isUseGzip()
public int getRetrySleepTime()
public Site setRetrySleepTime(int retrySleepTime)
retrySleepTime - retrySleepTimepublic Site setUseGzip(boolean useGzip)
useGzip - useGzippublic Task toTask()
public Site setHttpProxyPool(List<String[]> httpProxyList)
httpProxyList - httpProxyListpublic Site enableHttpProxyPool()
public ProxyPool getHttpProxyPool()
public org.apache.http.HttpHost getHttpProxyFromPool()
public void returnHttpProxyToPool(org.apache.http.HttpHost proxy,
int statusCode)
public Site setProxyReuseInterval(int reuseInterval)
Copyright © 2016. All rights reserved.