B_TeamJob.cs 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. using FCS.Common;
  2. using FCS.Interface;
  3. using FCS.Models;
  4. using FCS.Models.DTO;
  5. using FCS.Models.Entity;
  6. using HtmlAgilityPack;
  7. using Quartz;
  8. using System;
  9. using System.Collections.Generic;
  10. using System.Data;
  11. using System.Diagnostics;
  12. using System.Linq;
  13. using System.Text;
  14. using System.Threading;
  15. using System.Threading.Tasks;
  16. namespace FCS.Crawler.Basketball
  17. {
  18. /// <summary>
  19. /// 篮球球队抓取服务
  20. /// </summary>
  21. public class B_TeamJob : CommonJob, IJob
  22. {
  23. private static List<B_Team> AllTeamList = new List<B_Team>();
  24. private static List<B_Team> TeamList = new List<B_Team>();
  25. private static List<DataItemDetail> DateItem = new List<DataItemDetail>();
  26. public B_TeamJob()
  27. {
  28. log = new LogHelper();
  29. services = IOC.Resolve<IDTOpenCode>();
  30. }
  31. public void Execute(IJobExecutionContext context)
  32. {
  33. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  34. GetAll();
  35. }
  36. public void GetAll()
  37. {
  38. ThreadPool.SetMinThreads(10, 10);
  39. ThreadPool.SetMaxThreads(200, 200);
  40. var ds = services.Query<B_Grouping>("and Remark is not null", "", "distinct EventId,Season,Remark").ToList();
  41. AllTeamList = services.Query<B_Team>().ToList();
  42. //获取分组存取的url ds.Tables[0].Rows.Count
  43. foreach (var item1 in ds)
  44. {
  45. Task.Run(() =>
  46. {
  47. List<string> urlList = new List<string>();
  48. var url = item1.Remark.ToString().Replace("saiAll.do", "1/teamStatistics.do");
  49. urlList.Add(url);
  50. url = item1.Remark.ToString().Replace("saiAll.do", "2/teamStatistics.do");
  51. urlList.Add(url);
  52. url = item1.Remark.ToString().Replace("saiAll.do", "3/teamStatistics.do");
  53. urlList.Add(url);
  54. foreach (var urlItem in urlList)
  55. {
  56. Task.Run(() =>
  57. {
  58. HtmlDocument doc = CommonHelper.GetHtml(urlItem);
  59. //获取到球队链接a list
  60. var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='paiming']/table/tr/td/a");
  61. //循环赛季
  62. if (TongJiListData != null)
  63. {
  64. foreach (var item in TongJiListData)
  65. {
  66. var sjurl = item.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  67. var teamName = item.InnerText.Trim().Replace("\n", "").Replace("\r", "").Replace("\t", "");
  68. var eventId = item1.EventId;
  69. Task.Run(() =>
  70. {
  71. GetTeamHtml(sjurl, teamName, eventId);
  72. });
  73. }
  74. }
  75. });
  76. }
  77. });
  78. }
  79. int maxWorkerThreads, workerThreads;
  80. int maxportThreads, portThreads;
  81. while (true)
  82. {
  83. /*
  84. GetAvailableThreads():检索由 GetMaxThreads 返回的线程池线程的最大数目和当前活动数目之间的差值。
  85. 而GetMaxThreads 检索可以同时处于活动状态的线程池请求的数目。
  86. 通过最大数目减可用数目就可以得到当前活动线程的数目,如果为零,那就说明没有活动线程,说明所有线程运行完毕。
  87. */
  88. ThreadPool.GetMaxThreads(out maxWorkerThreads, out maxportThreads);
  89. ThreadPool.GetAvailableThreads(out workerThreads, out portThreads);
  90. Thread.Sleep(1000);
  91. Trace.WriteLine("正在执行任务的线程数" + (maxWorkerThreads - workerThreads));
  92. if (maxWorkerThreads - workerThreads == 0)
  93. {
  94. Console.WriteLine("Thread Finished!");
  95. break;
  96. }
  97. }
  98. try
  99. {
  100. if (TeamList != null)
  101. {
  102. var addList = new List<B_Team>();
  103. foreach (var item in TeamList)
  104. {
  105. if (AllTeamList.Count() == 0&& addList.Count()==0)
  106. {
  107. addList.Add(item);
  108. }
  109. else
  110. {
  111. if (AllTeamList.Where(o => o.Name == item.Name).Count() == 0 && addList.Where(o => o.Name == item.Name).Count() == 0)
  112. {
  113. addList.Add(item);
  114. }
  115. }
  116. }
  117. services.SqlBulkCopyAdd<B_Team>(addList);
  118. }
  119. }
  120. catch (Exception ex)
  121. {
  122. throw;
  123. }
  124. }
  125. /// <summary>
  126. /// 解析球队信息
  127. /// </summary>
  128. /// <param name="url"></param>
  129. /// <returns></returns>
  130. public bool GetTeamHtml(string url, string TeamName, string eventId)
  131. {
  132. HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(new HtmlParameterDTO {
  133. Url=url,
  134. Title = "球队详情"
  135. });
  136. if (doc == null || doc.DocumentNode.InnerText.Contains("球队还没有资料"))
  137. {
  138. B_Team b_Team1 = new B_Team();
  139. b_Team1.Id = Guid.NewGuid().ToString();
  140. b_Team1.Name = TeamName;
  141. b_Team1.EventId = eventId;
  142. TeamList.Add(b_Team1);
  143. return false;
  144. }
  145. //球队图片
  146. var teamImg = doc.DocumentNode.SelectNodes("//*[@class='xq_img']/img");
  147. //球队的基本信息
  148. var teamData = doc.DocumentNode.SelectNodes("//*[@class='div_qdxq']/span/em");
  149. B_Team b_Team2 = new B_Team();
  150. b_Team2.Id = Guid.NewGuid().ToString();
  151. b_Team2.Name = TeamName;
  152. b_Team2.Remark = url;
  153. b_Team2.EventId = eventId;
  154. b_Team2.LogoImage = teamImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
  155. b_Team2.Describe = "";
  156. if (teamData != null)
  157. {
  158. var dataList = teamData.ToList();
  159. b_Team2.City = dataList.Count() > 8 ? dataList[7].InnerText.ToString() : "";
  160. b_Team2.Year = dataList.Count() > 14 ? (dataList[13].InnerText.ToString() == "" ? "" : dataList[13].InnerText.ToString()) : "";
  161. b_Team2.Venues = dataList.Count() > 16 ? dataList[15].InnerText.ToString() : "";
  162. b_Team2.Coach = dataList.Count() > 12 ? dataList[11].InnerText.ToString() : "";
  163. }
  164. TeamList.Add(b_Team2);
  165. return false;
  166. }
  167. #region SQL语句
  168. /// <summary>
  169. /// 获取分组信息
  170. /// </summary>
  171. private static string GetAllGroupingUrl = @"select distinct EventId,Season,Remark from F_Grouping where Remark is not null";
  172. #endregion
  173. }
  174. }