一般情況下,為了保證安全性,網站會定期更新登錄的detail,例如修改參數名、更新加密(散列)算法等。所以模擬登錄的代碼定期肯定會失效,但是如果網站沒有進行大的更新的話,稍微改一改還是能用的。另外,碰到驗證碼的情況就更難辦了,雖然程序可以一定程度地識別驗證碼字符,但目前很難找到簡單的可以通用的驗證碼識別程序。
很多朋友有模擬登錄新浪微博抓取數據的需求,其實對於一般的微博數據獲取,如用戶信息、微博內容等,使用微博開放平臺API是更明智的選擇:速度更快,而且節省許多網頁處理的功夫。對於API沒有開放的數據,我們再採用模擬登錄的方法。不多說了,我們直接上代碼:(相關JAR文件地址:https://github.com/leepeng/files)
package cn.com.leepeng.test;
import java.text.MessageFormat;
import java.util.HashMap;
import java.util.Map;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import net.sf.json.JSONObject;
import weibo4j.util.CommonHttpProtocolRequestUtil;
/**
* 微博模擬登錄獲取AccessToken工具
*
* @author Alex.Lee
* @mail [email protected]
* @date 3rd Apr ,2018
*
*/
public class Weibo {
private static final String AUTH_URL = "https://api.weibo.com/oauth2/authorize?client_id={0}&redirect_uri={1}";
private static final String AUTH_TOKEN = "https://api.weibo.com/oauth2/access_token";
private String clientID;
private String redirectUri;
private String username;
private String password;
private String clientSecret;
private String code;
public Weibo() {
super();
}
public Weibo(String clientID, String redirectUri, String username, String password, String clientSecret) {
super();
this.clientID = clientID;
this.redirectUri = redirectUri;
this.username = username;
this.password = password;
this.clientSecret = clientSecret;
}
public String getClientID() {
return clientID;
}
public void setClientID(String clientID) {
this.clientID = clientID;
}
public String getRedirectUri() {
return redirectUri;
}
public void setRedirectUri(String redirectUri) {
this.redirectUri = redirectUri;
}
public String getClientSecret() {
return clientSecret;
}
public void setClientSecret(String clientSecret) {
this.clientSecret = clientSecret;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getPassword() {
return password;
}
public void setPassword(String password) {
this.password = password;
}
@Override
public String toString() {
return JSONObject.fromBean(this).toString();
}
public static class AccessTokenData {
String access_token;
String remind_in;
String expires_in;
String uid;
String isRealName;
public void setAccess_token(String access_token) {
this.access_token = access_token;
}
public void setRemind_in(String remind_in) {
this.remind_in = remind_in;
}
public void setExpires_in(String expires_in) {
this.expires_in = expires_in;
}
public void setUid(String uid) {
this.uid = uid;
}
public void setIsRealName(String isRealName) {
this.isRealName = isRealName;
}
public String getAccess_token() {
return access_token;
}
public String getRemind_in() {
return remind_in;
}
public String getExpires_in() {
return expires_in;
}
public String getUid() {
return uid;
}
public String getIsRealName() {
return isRealName;
}
@Override
public String toString() {
return JSONObject.fromBean(this).toString();
}
}
public AccessTokenData getAccessTokenData() throws Exception {
WebClient webClient = null;
String authUrl = null;
Weibo.AccessTokenData accessTokenData = null;
try {
webClient = getWebClient();
authUrl = MessageFormat.format(AUTH_URL, clientID, redirectUri);
HtmlPage page = (HtmlPage) webClient.getPage(authUrl);
HtmlForm form = page.getForms().get(0);
form.getInputByName("userId").setValueAttribute(this.username);
form.getInputByName("passwd").setValueAttribute(this.password);
page = (HtmlPage) form.getOneHtmlElementByAttribute("a", "action-type", "submit").click();// 登錄進入
webClient.waitForBackgroundJavaScript(1000);// 等待1秒
String baseURI = page.getBaseURI();
if (baseURI != null && baseURI.indexOf("code") > 0) {
String code = baseURI.substring(baseURI.indexOf("code") + 5);
this.setCode(code);
Map<string> params = new HashMap<>();/<string>
params.put("client_id", clientID);
params.put("client_secret", clientSecret);
params.put("grant_type", "authorization_code");
params.put("code", code);
params.put("redirect_uri", redirectUri);
accessTokenData = (Weibo.AccessTokenData) JSONObject.toBean(
JSONObject.fromString(CommonHttpProtocolRequestUtil.requestWithPost(AUTH_TOKEN, params)),
Weibo.AccessTokenData.class);
}
} catch (Exception e) {
throw e;
} finally {
webClient.close();
}
return accessTokenData;
}
public static WebClient getWebClient() {
WebClient webClient = new WebClient(BrowserVersion.CHROME);
webClient.getOptions().setJavaScriptEnabled(true); // 啟動JS
webClient.getOptions().setUseInsecureSSL(true);// 忽略ssl認證
webClient.getOptions().setCssEnabled(false);// 禁用Css,可避免自動二次請求CSS進行渲染
webClient.getOptions().setThrowExceptionOnScriptError(false);// 運行錯誤時,不拋出異常
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.setAjaxController(new NicelyResynchronizingAjaxController());// 設置Ajax異步
return webClient;
}
public static void main(String[] args) throws Exception {
Weibo weibo = new Weibo("xxxx", "https://api.weibo.com/oauth2/default.html", "xxxxxx",
"xxxxx", "xxxxxxx");
AccessTokenData accessTokenData = weibo.getAccessTokenData();
System.out.println(accessTokenData);
}
}
閱讀更多 李鵬 的文章