|
|
@@ -17,7 +17,7 @@ namespace FCS.Common
|
|
|
{
|
|
|
public class IPHelper
|
|
|
{
|
|
|
-
|
|
|
+ private List<Task> taskList = new List<Task>();
|
|
|
static object locker = new object();
|
|
|
static int threadCount = 0;
|
|
|
static int finishcount = 0;
|
|
|
@@ -41,7 +41,7 @@ namespace FCS.Common
|
|
|
var list = from a in urlDataList
|
|
|
select a.Url;
|
|
|
//代理IP池
|
|
|
- GetIPData(list.ToList(), "ip", iPAgencyAddress);
|
|
|
+ new IPHelper().GetIPData(list.ToList(), "ip", iPAgencyAddress);
|
|
|
}
|
|
|
|
|
|
//爬取ip池
|
|
|
@@ -70,11 +70,11 @@ namespace FCS.Common
|
|
|
"http://fenxi.zgzcw.com/2404696/zrtj",//阵容
|
|
|
"http://saishi.zgzcw.com/soccer/player/4369",//球员
|
|
|
};
|
|
|
- GetIPData(ipList.ToList());
|
|
|
+ new IPHelper().GetIPData(ipList.ToList());
|
|
|
}
|
|
|
- private static void GetIPData(List<string> _urlList, string _title = "", string path = "")
|
|
|
+ private void GetIPData(List<string> _urlList, string _title = "", string path = "")
|
|
|
{
|
|
|
-
|
|
|
+ list = new List<string>();
|
|
|
if (_urlList.Count <= 0)
|
|
|
urlList.Add(ConfigurationManager.AppSettings["TestUrl"].ToString());
|
|
|
else
|
|
|
@@ -85,19 +85,10 @@ namespace FCS.Common
|
|
|
var urlData = CommonHelper.ConvertXMLToObject<IPEntity>(configFile, "UrlSetting");
|
|
|
urlData.ForEach(p =>
|
|
|
{
|
|
|
- Task.Run(() => GetIp(p));
|
|
|
-
|
|
|
+ taskList.Add(Task.Run(() => GetIp(p)));
|
|
|
});
|
|
|
Thread.Sleep(300000);
|
|
|
- lock (locker)
|
|
|
- {
|
|
|
- while (finishcount != threadCount)
|
|
|
- {
|
|
|
- Thread.Sleep(5000);
|
|
|
- Trace.WriteLine("已执行线程:" + finishcount + "||剩余线程:" + (threadCount - finishcount) + "可用IP数:" + list.Count);
|
|
|
- Monitor.Wait(locker);//等待
|
|
|
- }
|
|
|
- }
|
|
|
+ Task.WaitAll(taskList.ToArray());
|
|
|
|
|
|
Trace.WriteLine("IP代理池更新完毕!可用IP:" + list.Count);
|
|
|
if (path.IsEmpty())
|
|
|
@@ -105,75 +96,11 @@ namespace FCS.Common
|
|
|
if (list.Count > 0)
|
|
|
CommonHelper.Write_IP(path, JsonConvert.SerializeObject(list.Distinct().ToList()));
|
|
|
}
|
|
|
- public static string GetIPDataBYOne_FormData(List<string> _urlList, Dictionary<string, string> formData, string _title = "")
|
|
|
- {
|
|
|
- InitIPHelper();
|
|
|
- formDataDict = formData;
|
|
|
- return GetIPDataBYOne(_urlList, _title, true);
|
|
|
- }
|
|
|
- public static string GetIPDataBYOne(List<string> _urlList, string _title = "", bool isFormData = false)
|
|
|
- {
|
|
|
- if (!isFormData)
|
|
|
- InitIPHelper();
|
|
|
- isSeleteOne = true;
|
|
|
- if (_urlList.Count <= 0)
|
|
|
- urlList.Add(ConfigurationManager.AppSettings["TestUrl"].ToString());
|
|
|
- else
|
|
|
- urlList = _urlList;
|
|
|
- title = _title;
|
|
|
- Trace.WriteLine("获取单个IP!请稍后");
|
|
|
- string configFile = AppDomain.CurrentDomain.BaseDirectory + "/XmlConfig/UrlData.xml";
|
|
|
- var urlData = CommonHelper.ConvertXMLToObject<IPEntity>(configFile, "UrlSetting");
|
|
|
- urlData.ForEach(p =>
|
|
|
- {
|
|
|
- Task.Run(() => GetIp(p));
|
|
|
-
|
|
|
- });
|
|
|
- var content = string.Empty;
|
|
|
- dateNowMinute = DateTime.Now.Minute;
|
|
|
- while (true)
|
|
|
- {
|
|
|
-
|
|
|
- if (list.Count >= 1)
|
|
|
- {
|
|
|
- lock (locker)
|
|
|
- {
|
|
|
- content = listContent[0];
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- var minute = DateTime.Now.Minute;
|
|
|
- var poor = dateNowMinute > minute ? minute + 60 - dateNowMinute : minute - dateNowMinute;
|
|
|
- if (poor > 3 || finishcount > 2000)
|
|
|
- {
|
|
|
- if (finishcount == threadCount || finishcount > 2000)
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- }
|
|
|
- return content;
|
|
|
- }
|
|
|
-
|
|
|
- public static void InitIPHelper()
|
|
|
- {
|
|
|
- formDataDict = null;
|
|
|
- finishcount = 0;
|
|
|
- threadCount = 0;
|
|
|
- list = new List<string>();
|
|
|
- listContent = new List<string>();
|
|
|
- urlList = new List<string>();
|
|
|
- dateNowMinute = DateTime.Now.Minute;
|
|
|
- }
|
|
|
-
|
|
|
/// <summary>
|
|
|
/// 代理
|
|
|
/// </summary>
|
|
|
/// <param name="DATA"></param>
|
|
|
- private static void GetIp(IPEntity model)
|
|
|
+ private void GetIp(IPEntity model)
|
|
|
{
|
|
|
for (int i = 0; i < model.Number; i++)
|
|
|
{
|
|
|
@@ -190,11 +117,15 @@ namespace FCS.Common
|
|
|
if (isSeleteOne && list.Count > 0)
|
|
|
return;
|
|
|
doc.LoadHtml(TestIp(new Uri(_url), CommonHelper.GetIp(iPAgencyAddress, false)));
|
|
|
+ var number = 0;
|
|
|
while (string.IsNullOrEmpty(doc.DocumentNode.InnerHtml))
|
|
|
{
|
|
|
+ if (number >= 10)
|
|
|
+ break;
|
|
|
if (isSeleteOne && list.Count > 0)
|
|
|
- return;
|
|
|
+ break;
|
|
|
doc.LoadHtml(TestIp(new Uri(_url), CommonHelper.GetIp(iPAgencyAddress, false)));
|
|
|
+ number++;
|
|
|
}
|
|
|
|
|
|
var parents = model.SelectParents.Split(',').ToList();
|
|
|
@@ -208,16 +139,13 @@ namespace FCS.Common
|
|
|
}
|
|
|
var tr = trparents.SelectNodes(parents[1]);
|
|
|
|
|
|
- threadCount += tr.Count - 1;
|
|
|
+ // threadCount += tr.Count - 1;
|
|
|
foreach (HtmlNode item in tr)
|
|
|
{
|
|
|
if (tr.IndexOf(item) != 0)
|
|
|
{
|
|
|
|
|
|
var td = item.SelectNodes(model.SelectParent);
|
|
|
- //if (model.AnonymityPath != -1)
|
|
|
- // if (td[model.AnonymityPath].InnerText.IndexOf("匿") != -1)
|
|
|
- // continue;
|
|
|
var path = model.SelectPath.Split(',').ToList();
|
|
|
var ip = td[int.Parse(path[0])].InnerText + ":" + td[int.Parse(path[1])].InnerText;
|
|
|
if (isSeleteOne && list.Count > 0)
|
|
|
@@ -231,11 +159,11 @@ namespace FCS.Common
|
|
|
}
|
|
|
|
|
|
/// <summary>
|
|
|
- /// 异步创建爬虫
|
|
|
+ /// Http请求
|
|
|
/// </summary>
|
|
|
- /// <param name="uri">爬虫URL地址</param>
|
|
|
- /// <param name="proxy">代理服务器</param>
|
|
|
- /// <returns>网页源代码</returns>
|
|
|
+ /// <param name="uri"></param>
|
|
|
+ /// <param name="proxy"></param>
|
|
|
+ /// <returns></returns>
|
|
|
public static string TestIp(Uri uri, string proxy = null)
|
|
|
{
|
|
|
var pageSource = string.Empty;
|
|
|
@@ -313,6 +241,12 @@ namespace FCS.Common
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ /// <summary>
|
|
|
+ /// 表单Http请求
|
|
|
+ /// </summary>
|
|
|
+ /// <param name="uri"></param>
|
|
|
+ /// <param name="proxy"></param>
|
|
|
+ /// <returns></returns>
|
|
|
public static string TestIpFormDta(string uri, string proxy = null)
|
|
|
{
|
|
|
var pageSource = string.Empty;
|
|
|
@@ -418,15 +352,15 @@ namespace FCS.Common
|
|
|
/// </summary>
|
|
|
/// <param name="ip"></param>
|
|
|
/// <returns></returns>
|
|
|
- private static async Task<string> SetData(string ip)
|
|
|
+ private async void SetData(string ip)
|
|
|
{
|
|
|
- return await Task.Run(() =>
|
|
|
+ taskList.Add(Task.Run(() =>
|
|
|
{
|
|
|
Thread.Sleep(100);
|
|
|
try
|
|
|
{
|
|
|
if (isSeleteOne && list.Count > 0)
|
|
|
- { }
|
|
|
+ return;
|
|
|
else
|
|
|
{
|
|
|
var url = urlList[new Random().Next(0, urlList.Count)];
|
|
|
@@ -449,7 +383,7 @@ namespace FCS.Common
|
|
|
else
|
|
|
{
|
|
|
if (isSeleteOne && list.Count > 0)
|
|
|
- return string.Empty;
|
|
|
+ return;
|
|
|
lock (locker)
|
|
|
{
|
|
|
list.Add(ip);
|
|
|
@@ -466,19 +400,11 @@ namespace FCS.Common
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- catch (Exception)
|
|
|
- {
|
|
|
- }
|
|
|
finally
|
|
|
{
|
|
|
- lock (locker)
|
|
|
- {
|
|
|
- finishcount++;
|
|
|
- Monitor.Pulse(locker); //完成,通知等待队列,告知已完,执行下一个。
|
|
|
- }
|
|
|
+
|
|
|
}
|
|
|
- return string.Empty;
|
|
|
- });
|
|
|
+ }));
|
|
|
|
|
|
}
|
|
|
|