using System; using System.Collections.Generic; using System.Configuration; using System.Diagnostics; using System.IO; using System.IO.Compression; using System.Linq; using System.Net; using System.Text; using System.Threading; using System.Threading.Tasks; using HtmlAgilityPack; using Newtonsoft.Json; using static System.Net.Mime.MediaTypeNames; namespace FCS.Common { public class IPHelper { static object locker = new object(); static int threadCount = 0; static int finishcount = 0; private static List list = new List(); private static List listContent = new List(); private static List urlList = new List(); private static string title; private static bool isSeleteOne = false; private static int dateNowMinute = DateTime.Now.Minute; private static string iPAgencyAddress = AppDomain.CurrentDomain.BaseDirectory + "/XmlConfig/IPAgency.txt"; public static Dictionary formDataDict; /// /// 初始化方法 /// public static void GetIp(IEnumerable ipList = null, bool isIpAgency = true) { if (isIpAgency) { string configFile = AppDomain.CurrentDomain.BaseDirectory + "/XmlConfig/UrlData.xml"; var urlDataList = CommonHelper.ConvertXMLToObject(configFile, "UrlSetting"); var list = from a in urlDataList select a.Url; //代理IP池 GetIPData(list.ToList(), "ip", iPAgencyAddress); } //爬取ip池 if (ipList == null || ipList.Count() <= 0) ipList = new List { "http://fenxi.zgzcw.com/1766186/dxdb/zhishu?company_id=8&company=Bet365",//大小指数-实时 "http://fenxi.zgzcw.com/1766186/bjop/zhishu?company_id=115&company=%E5%A8%81%E5%BB%89%E5%B8%8C%E5%B0%94",//欧赔指数-实时 "http://fenxi.zgzcw.com/1766186/ypdb/zhishu?company_id=1&company=%E6%BE%B3%E9%97%A8",//亚盘指数-实时 "http://fenxi.zgzcw.com/2413445/dxdb5",//大小指数-固化 "http://fenxi.zgzcw.com/2413445/bjop",//欧赔指数-固化 "http://fenxi.zgzcw.com/2413445/ypdb",//亚盘指数-固化 "http://saishi.zgzcw.com/soccer/team/25",//球队介绍 "http://saishi.zgzcw.com/soccer",//赛事 "http://saishi.zgzcw.com/soccer/league/36/2018-2019/sxpl",//上下盘 "http://saishi.zgzcw.com/soccer/league/36/2018-2019/sxds",//上下单双 "http://saishi.zgzcw.com/soccer/league/36/2018-2019/jqds",//总进球单双 "http://saishi.zgzcw.com/soccer/league/36/2018-2019/ssb",//射手榜 "http://saishi.zgzcw.com/soccer/league/36/2018-2019",//积分榜 "http://saishi.zgzcw.com/soccer/league/36",//球队,分组,比赛 "http://cp.zgzcw.com/lottery/zucai/14csfc/index.jsp",//14场 "http://cp.zgzcw.com/lottery/zucai/6cbqc/index.jsp",//6场 "http://cp.zgzcw.com/lottery/zucai/4cjqc/index.jsp",//4场 "http://live.zgzcw.com/jz/",//竞彩 "http://live.zgzcw.com/bd/",//北单 "http://live.zgzcw.com/sfc/",//足彩 "http://fenxi.zgzcw.com/2404696/zrtj",//阵容 "http://saishi.zgzcw.com/soccer/player/4369",//球员 }; GetIPData(ipList.ToList()); } private static void GetIPData(List _urlList, string _title = "", string path = "") { if (_urlList.Count <= 0) urlList.Add(ConfigurationManager.AppSettings["TestUrl"].ToString()); else urlList = _urlList; title = _title; Trace.WriteLine("开始更新IP代理池!请稍后"); string configFile = AppDomain.CurrentDomain.BaseDirectory + "/XmlConfig/UrlData.xml"; var urlData = CommonHelper.ConvertXMLToObject(configFile, "UrlSetting"); urlData.ForEach(p => { Task.Run(() => GetIp(p)); }); Thread.Sleep(300000); lock (locker) { while (finishcount != threadCount) { Thread.Sleep(5000); Trace.WriteLine("已执行线程:" + finishcount + "||剩余线程:" + (threadCount - finishcount) + "可用IP数:" + list.Count); Monitor.Wait(locker);//等待 } } Trace.WriteLine("IP代理池更新完毕!可用IP:" + list.Count); if (path.IsEmpty()) path = AppDomain.CurrentDomain.BaseDirectory + "/XmlConfig/IP.txt"; if (list.Count > 0) CommonHelper.Write_IP(path, JsonConvert.SerializeObject(list.Distinct().ToList())); } public static string GetIPDataBYOne_FormData(List _urlList, Dictionary formData, string _title = "") { InitIPHelper(); formDataDict = formData; return GetIPDataBYOne(_urlList, _title, true); } public static string GetIPDataBYOne(List _urlList, string _title = "", bool isFormData = false) { if (!isFormData) InitIPHelper(); isSeleteOne = true; if (_urlList.Count <= 0) urlList.Add(ConfigurationManager.AppSettings["TestUrl"].ToString()); else urlList = _urlList; title = _title; Trace.WriteLine("获取单个IP!请稍后"); string configFile = AppDomain.CurrentDomain.BaseDirectory + "/XmlConfig/UrlData.xml"; var urlData = CommonHelper.ConvertXMLToObject(configFile, "UrlSetting"); urlData.ForEach(p => { Task.Run(() => GetIp(p)); }); var content = string.Empty; dateNowMinute = DateTime.Now.Minute; while (true) { if (list.Count >= 1) { lock (locker) { content = listContent[0]; break; } } else { var minute = DateTime.Now.Minute; var poor = dateNowMinute > minute ? minute + 60 - dateNowMinute : minute - dateNowMinute; if (poor > 3 || finishcount > 2000) { if (finishcount == threadCount || finishcount > 2000) break; } } } return content; } public static void InitIPHelper() { formDataDict = null; finishcount = 0; threadCount = 0; list = new List(); listContent = new List(); urlList = new List(); dateNowMinute = DateTime.Now.Minute; } /// /// 代理 /// /// private static void GetIp(IPEntity model) { for (int i = 0; i < model.Number; i++) { var _url = model.Url; if (model.Number != 1) { if (model.Url.Split('?').Length > 1) _url = model.Url + "&" + model.PageName + "=" + (i + 1); else _url = model.Url + (i == 0 ? string.Empty : "/" + (i + 1)); } var doc = new HtmlDocument(); if (isSeleteOne && list.Count > 0) return; doc.LoadHtml(TestIp(new Uri(_url), CommonHelper.GetIp(iPAgencyAddress, false))); while (string.IsNullOrEmpty(doc.DocumentNode.InnerHtml)) { if (isSeleteOne && list.Count > 0) return; doc.LoadHtml(TestIp(new Uri(_url), CommonHelper.GetIp(iPAgencyAddress, false))); } var parents = model.SelectParents.Split(',').ToList(); var trparents = doc.DocumentNode.SelectSingleNode(parents[0]); while (trparents == null) { if (isSeleteOne && list.Count > 0) return; doc.LoadHtml(TestIp(new Uri(_url), CommonHelper.GetIp(iPAgencyAddress, false))); trparents = doc.DocumentNode.SelectSingleNode(parents[0]); } var tr = trparents.SelectNodes(parents[1]); threadCount += tr.Count - 1; foreach (HtmlNode item in tr) { if (tr.IndexOf(item) != 0) { var td = item.SelectNodes(model.SelectParent); //if (model.AnonymityPath != -1) // if (td[model.AnonymityPath].InnerText.IndexOf("匿") != -1) // continue; var path = model.SelectPath.Split(',').ToList(); var ip = td[int.Parse(path[0])].InnerText + ":" + td[int.Parse(path[1])].InnerText; if (isSeleteOne && list.Count > 0) return; SetData(ip); } } } } /// /// 异步创建爬虫 /// /// 爬虫URL地址 /// 代理服务器 /// 网页源代码 public static string TestIp(Uri uri, string proxy = null) { var pageSource = string.Empty; try { var watch = new Stopwatch(); watch.Start(); var request = (HttpWebRequest)WebRequest.Create(uri); request.Accept = "text/html, */*; q=0.01"; request.ServicePoint.Expect100Continue = false;//加快载入速度 request.ServicePoint.UseNagleAlgorithm = false;//禁止Nagle算法加快载入速度 request.AllowWriteStreamBuffering = false;//禁止缓冲加快载入速度 request.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip,deflate");//定义gzip压缩页面支持 request.ContentType = "text/html;charset=UTF-8";//定义文档类型及编码 request.AllowAutoRedirect = false;//禁止自动跳转 //设置User-Agent,伪装成Google Chrome浏览器 request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36"; request.Timeout = 5000;//定义请求超时时间为5秒 request.KeepAlive = true;//启用长连接 request.Method = "GET";//定义请求方式为GET request.ReadWriteTimeout = 10000; if (proxy != null) request.Proxy = new WebProxy(proxy);//设置代理服务器IP,伪装请求地址 //request.CookieContainer = this.CookiesContainer;//附加Cookie容器 request.ServicePoint.ConnectionLimit = int.MaxValue;//定义最大连接数 using (var response = (HttpWebResponse)request.GetResponse()) {//获取请求响应 // foreach (Cookie cookie in response.Cookies) this.CookiesContainer.Add(cookie);//将Cookie加入容器,保存登录状态 if (response.ContentEncoding.ToLower().Contains("gzip"))//解压 { using (GZipStream stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress)) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { pageSource = reader.ReadToEnd(); } } } else if (response.ContentEncoding.ToLower().Contains("deflate"))//解压 { using (DeflateStream stream = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress)) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { pageSource = reader.ReadToEnd(); } } } else { using (Stream stream = response.GetResponseStream())//原始 { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { pageSource = reader.ReadToEnd(); } } } } request.Abort(); watch.Stop(); var threadId = System.Threading.Thread.CurrentThread.ManagedThreadId;//获取当前任务线程ID var milliseconds = watch.ElapsedMilliseconds;//获取请求执行时间 return pageSource; } catch (Exception ex) { //Trace.WriteLine("连接失败:" + proxy + "||" + ex.Message); return string.Empty; } } public static string TestIpFormDta(string uri, string proxy = null) { var pageSource = string.Empty; try { var watch = new Stopwatch(); watch.Start(); var request = (HttpWebRequest)WebRequest.Create(uri); request.Accept = "text/html, */*; q=0.01"; request.ServicePoint.Expect100Continue = false;//加快载入速度 request.ServicePoint.UseNagleAlgorithm = false;//禁止Nagle算法加快载入速度 request.AllowWriteStreamBuffering = false;//禁止缓冲加快载入速度 request.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip,deflate");//定义gzip压缩页面支持 request.ContentType = "application/x-www-form-urlencoded";//定义文档类型及编码 request.AllowAutoRedirect = false;//禁止自动跳转 //设置User-Agent,伪装成Google Chrome浏览器 request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36"; request.Timeout = 5000;//定义请求超时时间为5秒 request.KeepAlive = true;//启用长连接 request.Method = "Post";//定义请求方式为GET request.ReadWriteTimeout = 10000; if (proxy != null) request.Proxy = new WebProxy(proxy);//设置代理服务器IP,伪装请求地址 //request.CookieContainer = this.CookiesContainer;//附加Cookie容器 request.ServicePoint.ConnectionLimit = int.MaxValue;//定义最大连接数 string boundary = "----------------------------" + DateTime.Now.Ticks.ToString("x"); request.ContentType = "multipart/form-data; boundary=" + boundary; //验证在得到结果时是否有传入数据 if (!request.Method.Trim().ToLower().Contains("get")) { MemoryStream stream = new MemoryStream(); string format = "--" + boundary + "\r\nContent-Disposition:form-data;name=\"{0}\"\r\n\r\n{1}\r\n"; //自带项目分隔符 foreach (string key in formDataDict.Keys) { string s = string.Format(format, key, formDataDict[key]); byte[] data = Encoding.UTF8.GetBytes(s); stream.Write(data, 0, data.Length); } byte[] foot_data = Encoding.UTF8.GetBytes("--" + boundary + "--\r\n"); //项目最后的分隔符字符串需要带上-- stream.Write(foot_data, 0, foot_data.Length); request.ContentLength = stream.Length; Stream requestStream = request.GetRequestStream(); //写入请求数据 stream.Position = 0L; stream.CopyTo(requestStream); stream.Close(); } using (var response = (HttpWebResponse)request.GetResponse()) {//获取请求响应 // foreach (Cookie cookie in response.Cookies) this.CookiesContainer.Add(cookie);//将Cookie加入容器,保存登录状态 if (response.ContentEncoding.ToLower().Contains("gzip"))//解压 { using (GZipStream stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress)) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { pageSource = reader.ReadToEnd(); } } } else if (response.ContentEncoding.ToLower().Contains("deflate"))//解压 { using (DeflateStream stream = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress)) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { pageSource = reader.ReadToEnd(); } } } else { using (Stream stream = response.GetResponseStream())//原始 { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { pageSource = reader.ReadToEnd(); } } } } request.Abort(); watch.Stop(); var threadId = System.Threading.Thread.CurrentThread.ManagedThreadId;//获取当前任务线程ID var milliseconds = watch.ElapsedMilliseconds;//获取请求执行时间 return pageSource; } catch (Exception ex) { //Trace.WriteLine("连接失败:" + proxy + "||" + ex.Message); return string.Empty; } } /// /// 获取数据 /// /// /// private static async Task SetData(string ip) { return await Task.Run(() => { Thread.Sleep(100); try { if (isSeleteOne && list.Count > 0) { } else { var url = urlList[new Random().Next(0, urlList.Count)]; Trace.WriteLine("url:" + url); var html = ""; if (formDataDict != null) html = TestIpFormDta(url, ip); else html = TestIp(new Uri(url), ip); var doc = new HtmlDocument(); if (!string.IsNullOrEmpty(html)) { doc.LoadHtml(html); var body = doc.DocumentNode.SelectNodes(".//body"); if ((formDataDict == null && (body == null || string.IsNullOrEmpty(body[0].InnerHtml))) || (html.Contains("403") && html.ToLower().Contains("forbidden")) || (html.Contains("404") && html.ToLower().Contains("not found")) || (!string.IsNullOrEmpty(title) && !html.ToLower().Contains(title))) { } else { if (isSeleteOne && list.Count > 0) return string.Empty; lock (locker) { list.Add(ip); if (isSeleteOne) listContent.Add(html); } Trace.WriteLine("连接成功:" + ip); Console.WriteLine(ip); } } else { //Trace.WriteLine("连接失败:" + ip); } } } catch (Exception) { } finally { lock (locker) { finishcount++; Monitor.Pulse(locker); //完成,通知等待队列,告知已完,执行下一个。 } } return string.Empty; }); } } public class IPEntity { /// /// 父节点,,也就是ip的父节点 /// 最后一个为SelectNodes,之前的为SelectSingleNode /// public string SelectParents { get; set; } /// /// 路径 /// public string Url { get; set; } /// /// 上一级的名称(td||th) /// public string SelectParent { get; set; } = "td"; /// /// 最终Ip的路径 (ip和host) /// public string SelectPath { get; set; } /// /// 查询页数 /// public int Number { get; set; } /// /// 匿名路径 /// public int AnonymityPath { get; set; } = -1; /// /// 分页参数名称 /// public string PageName { get; set; } } }