主要思想:通过后台WebView载入指定网页,再提取出WebView中的内容
关键代码:
var html = await webView.InvokeScriptAsync("eval", new string[] { "document.documentElement.outerHTML;" });
有一个很简单的思路,
订阅WebView NavigationCompleted事件,然后让Navigate到指定的网址,发生事件时执行这行代码
除此之外,这里还有一个异步的方法,用到了TaskCompletionSource这个东西
首先,创建一个TaskCompletionSource:
TaskCompletionSourcecompletionSource = new TaskCompletionSource ();
因为返回的东西是string(html),所以泛型T设置成string
然后使用lambda的形式订阅Navigation事件:
1 webView.NavigationCompleted += async (sender, args) => 2 { 3 if (args.Uri != uri) 4 return; 5 await Task.Delay(200); 6 var html = await sender.InvokeScriptAsync("eval", new string[] { "document.documentElement.outerHTML;" }); 7 webView.NavigateToString(""); 8 webView = null; 9 completionSource.SetResult(html);10 };
Line5的延迟200ms,是为了Navigation完成之后再给页面里的其他一些元素(比如一些js脚本)一些加载的时间(讲道理订阅事件里也应该写一个的)
Line7的导航到空是为了防止WebView里的东西继续运行从而导致一些灵异事件(尤其是一些带视频的网页,咳咳)
Line9,给Task设置个Result,await就会结束
最后:
1 return completionSource.Task;
封装成类:
public class WebHelper { public class WebLoadedArgs:EventArgs { public bool Success { get; private set; } public WebErrorStatus WebErrorStatus { get; private set; } public string Html { get; private set; } public WebLoadedArgs(WebErrorStatus webErrorStatus) { WebErrorStatus = webErrorStatus; Success = false; } public WebLoadedArgs(string Html,WebErrorStatus webErrorStatus) { this.Html = Html; WebErrorStatus = webErrorStatus; Success = true; } } public string Url { get; private set; } public event EventHandlerWebLoaded; private WebView webView; public WebHelper(string Url) { this.Url = Url; webView = new WebView(WebViewExecutionMode.SeparateThread); webView.Navigate(new Uri(Url)); webView.NavigationCompleted += WebView_NavigationCompleted; webView.NavigationFailed += WebView_NavigationFailed; } private void WebView_NavigationFailed(object sender, WebViewNavigationFailedEventArgs e) { WebLoaded(this, new WebLoadedArgs(e.WebErrorStatus)); } private async void WebView_NavigationCompleted(WebView sender, WebViewNavigationCompletedEventArgs args) { var html = await sender.InvokeScriptAsync("eval", new string[] { "document.documentElement.outerHTML;" }); webView = null; WebLoaded(this, new WebLoadedArgs(html,args.WebErrorStatus)); } /// /// 异步实现获取Web内容 /// /// 网址 /// 超时时间 ///Web的Html内容 public static TaskLoadWebAsync(string Url,int Timeout) { return LoadWebAsync(Url, "", Timeout); } /// /// 异步实现获取Web内容 /// /// 网址 /// Header[Referer],用以解决一些盗链效验 /// 超时时间 ///Web的Html内容 public static TaskLoadWebAsync(string Url,string Referer, int TimeOut) { WebView webView = new WebView(WebViewExecutionMode.SeparateThread); Uri uri = new Uri(Url); HttpRequestMessage requestMessage = new HttpRequestMessage(HttpMethod.Get, uri); requestMessage.Headers.Add("Referer", Referer); webView.NavigateWithHttpRequestMessage(requestMessage); TaskCompletionSource completionSource = new TaskCompletionSource (); webView.NavigationCompleted += async (sender, args) => { if (args.Uri != uri) return; await Task.Delay(200); var html = await sender.InvokeScriptAsync("eval", new string[] { "document.documentElement.outerHTML;" }); webView.NavigateToString(""); webView = null; completionSource.SetResult(html); }; webView.NavigationFailed += (sender, args) => { webView = null; completionSource.SetException(new WebException("", (WebExceptionStatus)args.WebErrorStatus)); }; DispatcherTimer timer = new DispatcherTimer(); timer.Interval = TimeSpan.FromSeconds(TimeOut); timer.Tick += (sender, args) => { timer = null; webView.NavigateToString(""); webView = null; completionSource.SetException(new TimeoutException()); }; timer.Start(); return completionSource.Task; } }
使用方法:
(事件订阅的方式)
WebHelper webHelper = new WebHelper("http://www.baidu.com/"); webHelper.WebLoaded += WebHelper_WebLoaded; private void WebHelper_WebLoaded(object sender, WebHelper.WebLoadedArgs e) { if(e.Success) { var html = e.Html; } }
(异步的方式)
var html = await WebHelper.LoadWebAsync("http://www.baidu.com", 120);