B_TeamJob.cs 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. using FCS.Common;
  2. using FCS.Interface;
  3. using FCS.Models;
  4. using FCS.Models.DTO;
  5. using FCS.Models.Entity;
  6. using HtmlAgilityPack;
  7. using Quartz;
  8. using System;
  9. using System.Collections.Generic;
  10. using System.Data;
  11. using System.Diagnostics;
  12. using System.Linq;
  13. using System.Text;
  14. using System.Threading;
  15. using System.Threading.Tasks;
  16. namespace FCS.Crawler.Basketball
  17. {
  18. /// <summary>
  19. /// 篮球球队抓取服务
  20. /// </summary>
  21. public class B_TeamJob : CommonJob, IJob
  22. {
  23. private static List<B_Team> AllTeamList = new List<B_Team>();
  24. private static List<B_Team> TeamList = new List<B_Team>();
  25. private static List<DataItemDetail> DateItem = new List<DataItemDetail>();
  26. B_Team g;
  27. public B_TeamJob()
  28. {
  29. log = new LogHelper();
  30. services = IOC.Resolve<IDTOpenCode>();
  31. g = new B_Team();
  32. }
  33. public void Execute(IJobExecutionContext context)
  34. {
  35. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  36. GetAll();
  37. }
  38. public void GetAll()
  39. {
  40. var ds = services.Query<B_Grouping>("and Remark is not null", "", "distinct EventId,Season,Remark").ToList();
  41. AllTeamList = services.Query<B_Team>().ToList();
  42. //获取分组存取的url ds.Tables[0].Rows.Count
  43. int max = ds.Count;
  44. int num = 0;
  45. ds.ForEach(async p =>
  46. {
  47. await Task.Run(() =>
  48. {
  49. List<string> urlList = new List<string>();
  50. var url = p.Remark.ToString().Replace("saiAll.do", "1/teamStatistics.do");
  51. urlList.Add(url);
  52. url = p.Remark.ToString().Replace("saiAll.do", "2/teamStatistics.do");
  53. urlList.Add(url);
  54. url = p.Remark.ToString().Replace("saiAll.do", "3/teamStatistics.do");
  55. urlList.Add(url);
  56. foreach (var urlItem in urlList)
  57. {
  58. Task.Run(() =>
  59. {
  60. HtmlDocument doc = CommonHelper.GetHtml(urlItem);
  61. //获取到球队链接a list
  62. var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='paiming']/table/tr/td/a");
  63. //循环赛季
  64. if (TongJiListData != null)
  65. {
  66. foreach (var item in TongJiListData)
  67. {
  68. var sjurl = item.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  69. var teamName = item.InnerText.Trim().Replace("\n", "").Replace("\r", "").Replace("\t", "");
  70. var eventId = p.EventId;
  71. Task.Run(() =>
  72. {
  73. GetTeamHtml(sjurl, teamName, eventId);
  74. });
  75. }
  76. }
  77. });
  78. }
  79. });
  80. lock (g)
  81. {
  82. num++;
  83. Monitor.Pulse(g); //完成,通知等待队列,告知已完,执行下一个。
  84. }
  85. });
  86. lock (g)
  87. {
  88. while (num < max)
  89. {
  90. Monitor.Wait(g);//等待
  91. }
  92. }
  93. Trace.WriteLine("获取B_Team 完结");
  94. try
  95. {
  96. if (TeamList != null)
  97. {
  98. var addList = new List<B_Team>();
  99. foreach (var item in TeamList)
  100. {
  101. if (AllTeamList.Count() == 0&& addList.Count()==0)
  102. {
  103. addList.Add(item);
  104. }
  105. else
  106. {
  107. if (AllTeamList.Where(o => o.Name == item.Name).Count() == 0 && addList.Where(o => o.Name == item.Name).Count() == 0)
  108. {
  109. addList.Add(item);
  110. }
  111. }
  112. }
  113. services.SqlBulkCopyAdd<B_Team>(addList);
  114. }
  115. }
  116. catch (Exception ex)
  117. {
  118. throw;
  119. }
  120. }
  121. /// <summary>
  122. /// 解析球队信息
  123. /// </summary>
  124. /// <param name="url"></param>
  125. /// <returns></returns>
  126. public bool GetTeamHtml(string url, string TeamName, string eventId)
  127. {
  128. HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(new HtmlParameterDTO {
  129. Url=url,
  130. Title = "球队详情"
  131. });
  132. if (doc == null || doc.DocumentNode.InnerText.Contains("球队还没有资料"))
  133. {
  134. B_Team b_Team1 = new B_Team();
  135. b_Team1.Id = Guid.NewGuid().ToString();
  136. b_Team1.Name = TeamName;
  137. b_Team1.EventId = eventId;
  138. TeamList.Add(b_Team1);
  139. return false;
  140. }
  141. //球队图片
  142. var teamImg = doc.DocumentNode.SelectNodes("//*[@class='xq_img']/img");
  143. //球队的基本信息
  144. var teamData = doc.DocumentNode.SelectNodes("//*[@class='div_qdxq']/span/em");
  145. B_Team b_Team2 = new B_Team();
  146. b_Team2.Id = Guid.NewGuid().ToString();
  147. b_Team2.Name = TeamName;
  148. b_Team2.Remark = url;
  149. b_Team2.EventId = eventId;
  150. b_Team2.LogoImage = teamImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
  151. b_Team2.Describe = "";
  152. if (teamData != null)
  153. {
  154. var dataList = teamData.ToList();
  155. b_Team2.City = dataList.Count() > 8 ? dataList[7].InnerText.ToString() : "";
  156. b_Team2.Year = dataList.Count() > 14 ? (dataList[13].InnerText.ToString() == "" ? "" : dataList[13].InnerText.ToString()) : "";
  157. b_Team2.Venues = dataList.Count() > 16 ? dataList[15].InnerText.ToString() : "";
  158. b_Team2.Coach = dataList.Count() > 12 ? dataList[11].InnerText.ToString() : "";
  159. }
  160. TeamList.Add(b_Team2);
  161. return false;
  162. }
  163. #region SQL语句
  164. /// <summary>
  165. /// 获取分组信息
  166. /// </summary>
  167. private static string GetAllGroupingUrl = @"select distinct EventId,Season,Remark from F_Grouping where Remark is not null";
  168. #endregion
  169. }
  170. }