B_PlayerJob.cs 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. using System;
  2. using System.Collections.Generic;
  3. using FCS.Common;
  4. using FCS.Crawler.Tools;
  5. using FCS.Interface;
  6. using FCS.Models;
  7. using HtmlAgilityPack;
  8. using Newtonsoft.Json;
  9. using Quartz;
  10. using System.Data;
  11. using System.Linq;
  12. using System.Text;
  13. using System.Text.RegularExpressions;
  14. using System.Threading;
  15. using System.Threading.Tasks;
  16. using System.Diagnostics;
  17. using FCS.Models.Entity;
  18. using FCS.Models.DTO;
  19. namespace FCS.Crawler.Basketball
  20. {
  21. /// <summary>
  22. /// 篮球球员
  23. /// </summary>
  24. public class B_PlayerJob : CommonJob, IJob
  25. {
  26. public List<B_Players> players = new List<B_Players>();//球员列表
  27. public List<B_Players> allplayers = new List<B_Players>();//球员列表
  28. B_Players g;
  29. public B_PlayerJob()
  30. {
  31. log = new LogHelper();
  32. services = IOC.Resolve<IDTOpenCode>();
  33. g = new B_Players();
  34. }
  35. public void Execute(IJobExecutionContext context)
  36. {
  37. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  38. GetAll();
  39. }
  40. public void GetAll()
  41. {
  42. var ds = services.Query<B_Team>("and Remark is not null").ToList(); ;
  43. allplayers = services.Query<B_Players>().ToList();
  44. int max = ds.Count;
  45. int num = 0;
  46. //获取赛事存取的url ds.Tables[0].Rows.Count
  47. ds.ForEach(async p =>
  48. {
  49. //异步加载分组
  50. var url = p.Remark.ToString();
  51. var TeamId = p.Id.ToString();
  52. var TeamName = p.Name.ToString();
  53. await Task.Run(() =>
  54. {
  55. GetPlayerListHtml(url, TeamId, TeamName);
  56. });
  57. lock (g)
  58. {
  59. num++;
  60. Monitor.Pulse(g); //完成,通知等待队列,告知已完,执行下一个。
  61. }
  62. });
  63. lock (g)
  64. {
  65. while (num < max)
  66. {
  67. Monitor.Wait(g);//等待
  68. }
  69. }
  70. Trace.WriteLine("获取B_Players 完结");
  71. try
  72. {
  73. if (players != null)
  74. {
  75. var addList = new List<B_Players>();
  76. foreach (var item in players)
  77. {
  78. if (allplayers.Count() == 0 && addList.Count() == 0)
  79. {
  80. addList.Add(item);
  81. }
  82. else
  83. {
  84. if (allplayers.Where(o => o.ChineseName == item.ChineseName).Count() == 0 && addList.Where(o => o.ChineseName == item.ChineseName).Count() == 0)
  85. {
  86. addList.Add(item);
  87. }
  88. }
  89. }
  90. services.SqlBulkCopyAdd<B_Players>(addList);
  91. }
  92. }
  93. catch (Exception ex)
  94. {
  95. throw;
  96. }
  97. }
  98. /// <summary>
  99. /// 读取球员列表
  100. /// </summary>
  101. /// <param name="url"></param>
  102. /// <param name="teamId"></param>
  103. /// <param name="teamName"></param>
  104. /// <returns></returns>
  105. public bool GetPlayerListHtml(string url, string teamId, string teamName)
  106. {
  107. HtmlParameterDTO dtomodel = new HtmlParameterDTO();
  108. dtomodel.Url = url;
  109. HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(dtomodel);
  110. if (doc.DocumentNode.InnerHtml == "Termination" || doc.DocumentNode.InnerHtml == "")
  111. {
  112. return false;
  113. }
  114. //获取球员的列表
  115. var playerhtml = doc.DocumentNode.SelectNodes("//*[@class='paiming1']/table").Count() > 0 ? doc.DocumentNode.SelectNodes("//*[@class='paiming1']/table").ToList().Where(o => o.InnerText.Contains("球员") && o.InnerText.Contains("身高")) : null;
  116. if (playerhtml == null) return false;
  117. HtmlDocument playerDoc = new HtmlDocument();
  118. playerDoc.LoadHtml(playerhtml.FirstOrDefault().InnerHtml);
  119. var playerLIst = playerDoc.DocumentNode.SelectNodes("//tr");
  120. if (playerLIst == null || playerLIst.Count() == 0) return false;
  121. foreach (var playitem in playerLIst)
  122. {
  123. if (playitem.InnerHtml.Contains("<th"))
  124. {
  125. continue;
  126. }
  127. HtmlDocument trdoc = new HtmlDocument();
  128. trdoc.LoadHtml(playitem.InnerHtml);
  129. var trHtml = trdoc.DocumentNode.SelectNodes("//td").Where(o => o.InnerHtml.Contains("href")).FirstOrDefault();
  130. if (trHtml == null || trHtml.InnerHtml == "")
  131. {
  132. continue;
  133. }
  134. HtmlDocument hrefDoc = new HtmlDocument();
  135. hrefDoc.LoadHtml(trHtml.InnerHtml);
  136. var playerurl = hrefDoc.DocumentNode.SelectNodes("//a").FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  137. var playerName = hrefDoc.DocumentNode.InnerText.Trim();
  138. var NBAVeteranHtml = trdoc.DocumentNode.SelectNodes("//td").ToList().Last();
  139. var NBAVeteran = NBAVeteranHtml.InnerText.Trim();
  140. var NumHtml = trdoc.DocumentNode.SelectNodes("//td").ToList().First();
  141. var Num = NumHtml.InnerText.Trim();
  142. var playerInfo = "";
  143. if (allplayers.Where(o => playerName.Contains(o.ChineseName)).Count() == 0)
  144. {
  145. playerInfo = GetPlayerInfo(playerurl, playerName, NBAVeteran, Num, teamId);
  146. }
  147. }
  148. return false;
  149. }
  150. /// <summary>
  151. /// 解析球员信息,并返回id
  152. /// </summary>
  153. /// <param name="url"></param>
  154. /// <param name="name"></param>
  155. /// <returns></returns>
  156. public string GetPlayerInfo(string url, string name, string NBAVeteran, string Num, string TeamId)
  157. {
  158. HtmlParameterDTO dtomodel = new HtmlParameterDTO();
  159. dtomodel.Url = url;
  160. dtomodel.IsCheckEmpty = false;
  161. HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(dtomodel);
  162. if (doc.DocumentNode.InnerHtml == "Termination" || doc.DocumentNode.InnerHtml == "" || doc.DocumentNode.SelectNodes("//*[@class='div_qyxq']/span/em") == null)
  163. {
  164. return "";
  165. }
  166. //球员图片
  167. var playerImg = doc.DocumentNode.SelectNodes("//*[@class='xq_img1']/img");
  168. var playerData = doc.DocumentNode.SelectNodes("//*[@class='div_qyxq']/span/em").ToList();
  169. B_Players b_Players = new B_Players();
  170. b_Players.Id = Guid.NewGuid().ToString();
  171. b_Players.ChineseName = name;
  172. b_Players.TeamId = TeamId;
  173. b_Players.EnglishName = playerData[3].InnerText.ToString();
  174. b_Players.Stature = playerData[5].InnerText.ToString();
  175. b_Players.Birthday = playerData[7].InnerText.ToString();
  176. b_Players.Position = playerData[9].InnerText.ToString();
  177. b_Players.Nationality = playerData[11].InnerText.ToString();
  178. b_Players.NBAVeteran = NBAVeteran;
  179. b_Players.Number = Num;
  180. b_Players.Remark = url;
  181. b_Players.LogoImage = playerImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
  182. players.Add(b_Players);
  183. return "";
  184. }
  185. #region SQL语句
  186. private static string GetAllTeamUrl = @"select Id,Name, Remark from B_Team where Remark is not null";
  187. #endregion
  188. }
  189. }