B_TeamJob.cs 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. using FCS.Common;
  2. using FCS.Interface;
  3. using FCS.Models;
  4. using FCS.Models.DTO;
  5. using FCS.Models.Entity;
  6. using HtmlAgilityPack;
  7. using Quartz;
  8. using System;
  9. using System.Collections.Generic;
  10. using System.Data;
  11. using System.Diagnostics;
  12. using System.Linq;
  13. using System.Text;
  14. using System.Threading;
  15. using System.Threading.Tasks;
  16. namespace FCS.Crawler.Basketball
  17. {
  18. /// <summary>
  19. /// 篮球球队抓取服务
  20. /// </summary>
  21. public class B_TeamJob : CommonJob, IJob
  22. {
  23. private static List<B_Team> AllTeamList = new List<B_Team>();
  24. private static List<B_Team> TeamList = new List<B_Team>();
  25. private static List<DataItemDetail> DateItem = new List<DataItemDetail>();
  26. public B_TeamJob()
  27. {
  28. log = new LogHelper();
  29. services = IOC.Resolve<IDTOpenCode>();
  30. }
  31. public void Execute(IJobExecutionContext context)
  32. {
  33. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  34. GetAll();
  35. }
  36. public void GetAll()
  37. {
  38. var ds = services.Query<B_Grouping>("and Remark is not null", "", "distinct EventId,Season,Remark").ToList();
  39. AllTeamList = services.Query<B_Team>().ToList();
  40. //获取分组存取的url ds.Tables[0].Rows.Count
  41. foreach (var item1 in ds)
  42. {
  43. Task.Run(() =>
  44. {
  45. List<string> urlList = new List<string>();
  46. var url = item1.Remark.ToString().Replace("saiAll.do", "1/teamStatistics.do");
  47. urlList.Add(url);
  48. url = item1.Remark.ToString().Replace("saiAll.do", "2/teamStatistics.do");
  49. urlList.Add(url);
  50. url = item1.Remark.ToString().Replace("saiAll.do", "3/teamStatistics.do");
  51. urlList.Add(url);
  52. foreach (var urlItem in urlList)
  53. {
  54. Task.Run(() =>
  55. {
  56. HtmlDocument doc = CommonHelper.GetHtml(urlItem);
  57. //获取到球队链接a list
  58. var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='paiming']/table/tr/td/a");
  59. //循环赛季
  60. if (TongJiListData != null)
  61. {
  62. foreach (var item in TongJiListData)
  63. {
  64. var sjurl = item.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  65. var teamName = item.InnerText.Trim().Replace("\n", "").Replace("\r", "").Replace("\t", "");
  66. var eventId = item1.EventId;
  67. Task.Run(() =>
  68. {
  69. GetTeamHtml(sjurl, teamName, eventId);
  70. });
  71. }
  72. }
  73. });
  74. }
  75. });
  76. }
  77. int maxWorkerThreads, workerThreads;
  78. int maxportThreads, portThreads;
  79. while (true)
  80. {
  81. /*
  82. GetAvailableThreads():检索由 GetMaxThreads 返回的线程池线程的最大数目和当前活动数目之间的差值。
  83. 而GetMaxThreads 检索可以同时处于活动状态的线程池请求的数目。
  84. 通过最大数目减可用数目就可以得到当前活动线程的数目,如果为零,那就说明没有活动线程,说明所有线程运行完毕。
  85. */
  86. ThreadPool.GetMaxThreads(out maxWorkerThreads, out maxportThreads);
  87. ThreadPool.GetAvailableThreads(out workerThreads, out portThreads);
  88. Thread.Sleep(1000);
  89. Trace.WriteLine("正在执行任务的线程数" + (maxWorkerThreads - workerThreads));
  90. if (maxWorkerThreads - workerThreads == 0)
  91. {
  92. Console.WriteLine("Thread Finished!");
  93. break;
  94. }
  95. }
  96. try
  97. {
  98. if (TeamList != null)
  99. {
  100. var addList = new List<B_Team>();
  101. foreach (var item in TeamList)
  102. {
  103. if (AllTeamList.Count() == 0&& addList.Count()==0)
  104. {
  105. addList.Add(item);
  106. }
  107. else
  108. {
  109. if (AllTeamList.Where(o => o.Name == item.Name).Count() == 0 && addList.Where(o => o.Name == item.Name).Count() == 0)
  110. {
  111. addList.Add(item);
  112. }
  113. }
  114. }
  115. services.SqlBulkCopyAdd<B_Team>(addList);
  116. }
  117. }
  118. catch (Exception ex)
  119. {
  120. throw;
  121. }
  122. }
  123. /// <summary>
  124. /// 解析球队信息
  125. /// </summary>
  126. /// <param name="url"></param>
  127. /// <returns></returns>
  128. public bool GetTeamHtml(string url, string TeamName, string eventId)
  129. {
  130. HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(new HtmlParameterDTO {
  131. Url=url,
  132. Title = "球队详情"
  133. });
  134. if (doc == null || doc.DocumentNode.InnerText.Contains("球队还没有资料"))
  135. {
  136. B_Team b_Team1 = new B_Team();
  137. b_Team1.Id = Guid.NewGuid().ToString();
  138. b_Team1.Name = TeamName;
  139. b_Team1.EventId = eventId;
  140. TeamList.Add(b_Team1);
  141. return false;
  142. }
  143. //球队图片
  144. var teamImg = doc.DocumentNode.SelectNodes("//*[@class='xq_img']/img");
  145. //球队的基本信息
  146. var teamData = doc.DocumentNode.SelectNodes("//*[@class='div_qdxq']/span/em");
  147. B_Team b_Team2 = new B_Team();
  148. b_Team2.Id = Guid.NewGuid().ToString();
  149. b_Team2.Name = TeamName;
  150. b_Team2.Remark = url;
  151. b_Team2.EventId = eventId;
  152. b_Team2.LogoImage = teamImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
  153. b_Team2.Describe = "";
  154. if (teamData != null)
  155. {
  156. var dataList = teamData.ToList();
  157. b_Team2.City = dataList.Count() > 8 ? dataList[7].InnerText.ToString() : "";
  158. b_Team2.Year = dataList.Count() > 14 ? (dataList[13].InnerText.ToString() == "" ? "" : dataList[13].InnerText.ToString()) : "";
  159. b_Team2.Venues = dataList.Count() > 16 ? dataList[15].InnerText.ToString() : "";
  160. b_Team2.Coach = dataList.Count() > 12 ? dataList[11].InnerText.ToString() : "";
  161. }
  162. TeamList.Add(b_Team2);
  163. return false;
  164. }
  165. #region SQL语句
  166. /// <summary>
  167. /// 获取分组信息
  168. /// </summary>
  169. private static string GetAllGroupingUrl = @"select distinct EventId,Season,Remark from F_Grouping where Remark is not null";
  170. #endregion
  171. }
  172. }