B_PlayerJob.cs 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. using System;
  2. using System.Collections.Generic;
  3. using FCS.Common;
  4. using FCS.Crawler.Tools;
  5. using FCS.Interface;
  6. using FCS.Models;
  7. using HtmlAgilityPack;
  8. using Newtonsoft.Json;
  9. using Quartz;
  10. using System.Data;
  11. using System.Linq;
  12. using System.Text;
  13. using System.Text.RegularExpressions;
  14. using System.Threading;
  15. using System.Threading.Tasks;
  16. using System.Diagnostics;
  17. using FCS.Models.Entity;
  18. using FCS.Models.DTO;
  19. namespace FCS.Crawler.Basketball
  20. {
  21. /// <summary>
  22. /// 篮球球员
  23. /// </summary>
  24. public class B_PlayerJob : CommonJob, IJob
  25. {
  26. public List<B_Players> players = new List<B_Players>();//球员列表
  27. public List<B_Players> allplayers = new List<B_Players>();//球员列表
  28. public B_PlayerJob()
  29. {
  30. log = new LogHelper();
  31. services = IOC.Resolve<IDTOpenCode>();
  32. }
  33. public void Execute(IJobExecutionContext context)
  34. {
  35. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  36. GetAll();
  37. }
  38. public void GetAll()
  39. {
  40. var ds = services.Query<B_Team>("and Remark is not null").ToList(); ;
  41. allplayers = services.Query<B_Players>().ToList();
  42. //获取赛事存取的url ds.Tables[0].Rows.Count
  43. foreach (var item in ds)
  44. {
  45. //异步加载分组
  46. var url = item.Remark.ToString();
  47. var TeamId = item.Id.ToString();
  48. var TeamName = item.Name.ToString();
  49. Task.Run(() =>
  50. {
  51. GetPlayerListHtml(url, TeamId, TeamName);
  52. });
  53. }
  54. int maxWorkerThreads, workerThreads;
  55. int maxportThreads, portThreads;
  56. while (true)
  57. {
  58. /*
  59. GetAvailableThreads():检索由 GetMaxThreads 返回的线程池线程的最大数目和当前活动数目之间的差值。
  60. 而GetMaxThreads 检索可以同时处于活动状态的线程池请求的数目。
  61. 通过最大数目减可用数目就可以得到当前活动线程的数目,如果为零,那就说明没有活动线程,说明所有线程运行完毕。
  62. */
  63. ThreadPool.GetMaxThreads(out maxWorkerThreads, out maxportThreads);
  64. ThreadPool.GetAvailableThreads(out workerThreads, out portThreads);
  65. Thread.Sleep(1000);
  66. Trace.WriteLine("正在执行任务的线程数" + (maxWorkerThreads - workerThreads));
  67. if (maxWorkerThreads - workerThreads == 0)
  68. {
  69. Console.WriteLine("Thread Finished!");
  70. break;
  71. }
  72. }
  73. try
  74. {
  75. if (players != null)
  76. {
  77. var addList = new List<B_Players>();
  78. foreach (var item in players)
  79. {
  80. if (allplayers.Count() == 0 && addList.Count() == 0)
  81. {
  82. addList.Add(item);
  83. }
  84. else
  85. {
  86. if (allplayers.Where(o => o.ChineseName == item.ChineseName).Count() == 0 && addList.Where(o => o.ChineseName == item.ChineseName).Count() == 0)
  87. {
  88. addList.Add(item);
  89. }
  90. }
  91. }
  92. services.SqlBulkCopyAdd<B_Players>(addList);
  93. }
  94. }
  95. catch (Exception ex)
  96. {
  97. throw;
  98. }
  99. }
  100. /// <summary>
  101. /// 读取球员列表
  102. /// </summary>
  103. /// <param name="url"></param>
  104. /// <param name="teamId"></param>
  105. /// <param name="teamName"></param>
  106. /// <returns></returns>
  107. public bool GetPlayerListHtml(string url, string teamId, string teamName)
  108. {
  109. HtmlParameterDTO dtomodel = new HtmlParameterDTO();
  110. dtomodel.Url = url;
  111. HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(dtomodel);
  112. if (doc.DocumentNode.InnerHtml == "Termination" || doc.DocumentNode.InnerHtml == "")
  113. {
  114. return false;
  115. }
  116. //获取球员的列表
  117. var playerhtml = doc.DocumentNode.SelectNodes("//*[@class='paiming1']/table").Count() > 0 ? doc.DocumentNode.SelectNodes("//*[@class='paiming1']/table").ToList().Where(o => o.InnerText.Contains("球员") && o.InnerText.Contains("身高")) : null;
  118. if (playerhtml == null) return false;
  119. HtmlDocument playerDoc = new HtmlDocument();
  120. playerDoc.LoadHtml(playerhtml.FirstOrDefault().InnerHtml);
  121. var playerLIst = playerDoc.DocumentNode.SelectNodes("//tr");
  122. if (playerLIst == null || playerLIst.Count() == 0) return false;
  123. foreach (var playitem in playerLIst)
  124. {
  125. if (playitem.InnerHtml.Contains("<th"))
  126. {
  127. continue;
  128. }
  129. HtmlDocument trdoc = new HtmlDocument();
  130. trdoc.LoadHtml(playitem.InnerHtml);
  131. var trHtml = trdoc.DocumentNode.SelectNodes("//td").Where(o => o.InnerHtml.Contains("href")).FirstOrDefault();
  132. if (trHtml == null || trHtml.InnerHtml == "")
  133. {
  134. continue;
  135. }
  136. HtmlDocument hrefDoc = new HtmlDocument();
  137. hrefDoc.LoadHtml(trHtml.InnerHtml);
  138. var playerurl = hrefDoc.DocumentNode.SelectNodes("//a").FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  139. var playerName = hrefDoc.DocumentNode.InnerText.Trim();
  140. var NBAVeteranHtml = trdoc.DocumentNode.SelectNodes("//td").ToList().Last();
  141. var NBAVeteran = NBAVeteranHtml.InnerText.Trim();
  142. var NumHtml = trdoc.DocumentNode.SelectNodes("//td").ToList().First();
  143. var Num = NumHtml.InnerText.Trim();
  144. var playerInfo = "";
  145. if (allplayers.Where(o => playerName.Contains(o.ChineseName)).Count() == 0)
  146. {
  147. playerInfo = GetPlayerInfo(playerurl, playerName, NBAVeteran, Num, teamId);
  148. }
  149. }
  150. return false;
  151. }
  152. /// <summary>
  153. /// 解析球员信息,并返回id
  154. /// </summary>
  155. /// <param name="url"></param>
  156. /// <param name="name"></param>
  157. /// <returns></returns>
  158. public string GetPlayerInfo(string url, string name, string NBAVeteran, string Num, string TeamId)
  159. {
  160. HtmlParameterDTO dtomodel = new HtmlParameterDTO();
  161. dtomodel.Url = url;
  162. dtomodel.IsCheckEmpty = false;
  163. HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(dtomodel);
  164. if (doc.DocumentNode.InnerHtml == "Termination" || doc.DocumentNode.InnerHtml == "" || doc.DocumentNode.SelectNodes("//*[@class='div_qyxq']/span/em") == null)
  165. {
  166. return "";
  167. }
  168. //球员图片
  169. var playerImg = doc.DocumentNode.SelectNodes("//*[@class='xq_img1']/img");
  170. var playerData = doc.DocumentNode.SelectNodes("//*[@class='div_qyxq']/span/em").ToList();
  171. B_Players b_Players = new B_Players();
  172. b_Players.Id = Guid.NewGuid().ToString();
  173. b_Players.ChineseName = name;
  174. b_Players.TeamId = TeamId;
  175. b_Players.EnglishName = playerData[3].InnerText.ToString();
  176. b_Players.Stature = playerData[5].InnerText.ToString();
  177. b_Players.Birthday = playerData[7].InnerText.ToString();
  178. b_Players.Position = playerData[9].InnerText.ToString();
  179. b_Players.Nationality = playerData[11].InnerText.ToString();
  180. b_Players.NBAVeteran = NBAVeteran;
  181. b_Players.Number = Num;
  182. b_Players.Remark = url;
  183. b_Players.LogoImage = playerImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
  184. players.Add(b_Players);
  185. return "";
  186. }
  187. #region SQL语句
  188. private static string GetAllTeamUrl = @"select Id,Name, Remark from B_Team where Remark is not null";
  189. #endregion
  190. }
  191. }