分类信息网站如何优化,做算命类网站违法吗,邯郸之战,余姚有专业做网站的吗C#获取HTML源码
2024年03月23日记录
以前的那个从网上找到的方法, 在一些网站上用不了#xff0c;如17K#xff0c;取出来的是乱码#xff0c;要么就是一坨JS#xff0c;好像是用JS又重新加载了什么的
using System;
using System.Collections.Generic;
using System.We…C#获取HTML源码
2024年03月23日记录
以前的那个从网上找到的方法, 在一些网站上用不了如17K取出来的是乱码要么就是一坨JS好像是用JS又重新加载了什么的
using System;
using System.Collections.Generic;
using System.Web;
using System.Net;
using System.IO;
using System.Text;
using System.Net.Security;
using System.Security.Authentication;
using System.Security.Cryptography.X509Certificates;namespace Niunan.XiaoShuo.Util
{/// summary/// http连接基础类负责底层的http通信/// /summarypublic class HttpService{public static bool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors){//直接确认否则打不开 return true;}/// summary/// post提交/// /summary/// param namexml/param/// param nameurl/param/// param nameisUseCert/param/// param nametimeout/param/// param namecontenttype如application/x-www-form-urlencodedtext/xml/param/// param nameAuthorization为空的时候就不用加用于容联云通讯/param/// returns/returnspublic static string Post(string xml, string url, bool isUseCert, int timeout,string contenttype application/x-www-form-urlencoded,string Authorization){System.GC.Collect();//垃圾回收回收没有正常关闭的http连接string result ;//返回结果HttpWebRequest request null;HttpWebResponse response null;Stream reqStream null;try{//设置最大连接数ServicePointManager.DefaultConnectionLimit 200;//设置https验证方式if (url.StartsWith(https, StringComparison.OrdinalIgnoreCase)){ServicePointManager.ServerCertificateValidationCallback new RemoteCertificateValidationCallback(CheckValidationResult);}/**************************************************************** 下面设置HttpWebRequest的相关属性* ************************************************************/request (HttpWebRequest)WebRequest.Create(url);request.Method POST;request.Timeout timeout * 1000;if (!string.IsNullOrEmpty(Authorization)){request.Headers.Add(HttpRequestHeader.Authorization, Authorization);}//设置代理服务器//WebProxy proxy new WebProxy(); //定义一个网关对象//proxy.Address new Uri(WxPayConfig.PROXY_URL); //网关服务器端口:端口//request.Proxy proxy;//设置POST的数据类型和长度request.ContentType contenttype;byte[] data System.Text.Encoding.UTF8.GetBytes(xml);request.ContentLength data.Length;//是否使用证书if (isUseCert){//复制微信DEMO的这里不用证书//string path HttpContext.Current.Request.PhysicalApplicationPath;//X509Certificate2 cert new X509Certificate2(path WxPayConfig.SSLCERT_PATH, WxPayConfig.SSLCERT_PASSWORD);//request.ClientCertificates.Add(cert);//Log.Debug(WxPayApi, PostXml used cert);}//往服务器写入数据reqStream request.GetRequestStream();reqStream.Write(data, 0, data.Length);reqStream.Close();//获取服务端返回response (HttpWebResponse)request.GetResponse();//获取服务端返回数据StreamReader sr new StreamReader(response.GetResponseStream(), Encoding.UTF8);result sr.ReadToEnd().Trim();sr.Close();} catch (Exception e){// Log.Error(HttpService, e.ToString());throw e;}finally{//关闭连接和流if (response ! null){response.Close();}if(request ! null){request.Abort();}}return result;}/// summary/// 处理http GET请求返回数据/// /summary/// param nameurl请求的url地址/param/// returnshttp GET成功后返回的数据失败抛WebException异常/returnspublic static string Get(string url){System.GC.Collect();string result ;HttpWebRequest request null;HttpWebResponse response null;//请求url以获取数据try{//设置最大连接数ServicePointManager.DefaultConnectionLimit 200;//设置https验证方式if (url.StartsWith(https, StringComparison.OrdinalIgnoreCase)){ServicePointManager.ServerCertificateValidationCallback new RemoteCertificateValidationCallback(CheckValidationResult);}/**************************************************************** 下面设置HttpWebRequest的相关属性* ************************************************************/request (HttpWebRequest)WebRequest.Create(url);request.Method GET;//设置代理//WebProxy proxy new WebProxy();//proxy.Address new Uri(WxPayConfig.PROXY_URL);//request.Proxy proxy;//获取服务器返回response (HttpWebResponse)request.GetResponse();//获取HTTP返回数据StreamReader sr new StreamReader(response.GetResponseStream(), Encoding.UTF8);result sr.ReadToEnd().Trim();sr.Close();} catch (Exception e){throw e;}finally{//关闭连接和流if (response ! null){response.Close();}if (request ! null){request.Abort();}}return result;}}
}
弄了一上午到处问人到处查发现下面的代码可以用于17K网站
var handler new HttpClientHandler()
{AutomaticDecompression System.Net.DecompressionMethods.GZip | System.Net.DecompressionMethods.Deflate,UseCookiesfalse,
};
var httpClient new HttpClient(handler);
var requestMessage new HttpRequestMessage(HttpMethod.Get, url);
requestMessage.Headers.Add(Accept-encoding, gzip, deflate, br, zstd);
var message await httpClient.SendAsync(requestMessage);
var content await message.Content.ReadAsStringAsync();
//后来发现这段代码前几次可以抓取到然后又抓不到了。。只能用下面的模拟浏览器打开网页抓取源代码了
后来又来了个更狠的用PuppeteerSharp 相当于用代码来控制让系统中的chrome浏览器打开一个网页然后再来获取这个网页的源代码
using PuppeteerSharp; //nuget引入一下namespace ConsoleApp2
{internal class Program{static async Task Main(string[] args){await new BrowserFetcher().DownloadAsync(BrowserTag.Stable); //自动下载他提供的无头浏览器不用这一行就得在下面指定本地的浏览器var browser await Puppeteer.LaunchAsync(new LaunchOptions{//ExecutablePath C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe,Headless true});var page await browser.NewPageAsync();await page.GoToAsync(https://www.17k.com/book/554720.html);await page.WaitForTimeoutAsync(2000);string html await page.GetContentAsync();Console.WriteLine(html);await browser.CloseAsync();}}
}然后还有一个playwright的也能实现操作浏览器打开网页的功能用于自动化测试的以前有记录过这个名字不过一直没有时间看。。。主要是“懒”。。。。。
Installation | Playwright .NET