FootBallTeamsJob.cs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. using FCS.Common;
  2. using FCS.Interface;
  3. using FCS.Models;
  4. using HtmlAgilityPack;
  5. using Quartz;
  6. using System;
  7. using System.Collections.Generic;
  8. using System.Data;
  9. using System.Diagnostics;
  10. using System.Linq;
  11. using System.Threading;
  12. using System.Threading.Tasks;
  13. namespace FCS.Crawler.ZCLotteryTeam
  14. {
  15. /// <summary>
  16. /// 抓取球队信息
  17. /// </summary>
  18. public class FootBallTeamsJob : CommonJob, IJob
  19. {
  20. private static List<F_Team> teamList = new List<F_Team>();
  21. private static List<F_Team> qhyyteamList = new List<F_Team>();
  22. private static List<F_Team> allTeamList = new List<F_Team>();
  23. public FootBallTeamsJob()
  24. {
  25. log = new LogHelper();
  26. services = IOC.Resolve<IDTOpenCode>();
  27. }
  28. public void Execute(IJobExecutionContext context)
  29. {
  30. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  31. GetAll();
  32. }
  33. public void GetAll()
  34. {
  35. ThreadPool.SetMinThreads(10, 10);
  36. ThreadPool.SetMaxThreads(200, 200);
  37. allTeamList = services.GetTeamList();
  38. var ds = SqlHelper.ExecuteDataset(CommandType.Text, GetAllEventUrl);
  39. if (ds != null && ds.Tables.Count > 0)
  40. {
  41. //获取赛事存取的url ds.Tables[0].Rows.Count
  42. for (int i = 0; i < ds.Tables[0].Rows.Count; i++)
  43. {
  44. var url = ds.Tables[0].Rows[i]["Remark"].ToString();
  45. var eventId = ds.Tables[0].Rows[i]["Id"].ToString();
  46. var eventName = ds.Tables[0].Rows[i]["Name"].ToString();
  47. Task.Run(() =>
  48. {
  49. var mainUrl = "http://saishi.zgzcw.com" + url;
  50. Stopwatch sw = new Stopwatch();
  51. Trace.WriteLine("开始:" + mainUrl);
  52. sw.Start();
  53. HtmlDocument doc = CommonHelper.GetHtml(mainUrl, "足彩");
  54. var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='select_options']/a");
  55. //循环赛季
  56. if (TongJiListData != null)//&& !doc.DocumentNode.InnerText.Contains("球会友谊")
  57. {
  58. foreach (var item in TongJiListData)
  59. {
  60. var sjurl = item.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  61. sjurl = sjurl.Replace("-", "-");
  62. GetEventHtml(sjurl, eventId, eventName, doc.DocumentNode.InnerText.Contains("球会友谊"));
  63. }
  64. sw.Stop();
  65. Trace.WriteLine(Thread.CurrentThread.ManagedThreadId.ToString() + "||" + sw.ElapsedTicks);
  66. }
  67. });
  68. }
  69. int maxWorkerThreads, workerThreads;
  70. int maxportThreads, portThreads;
  71. while (true)
  72. {
  73. /*
  74. GetAvailableThreads():检索由 GetMaxThreads 返回的线程池线程的最大数目和当前活动数目之间的差值。
  75. 而GetMaxThreads 检索可以同时处于活动状态的线程池请求的数目。
  76. 通过最大数目减可用数目就可以得到当前活动线程的数目,如果为零,那就说明没有活动线程,说明所有线程运行完毕。
  77. */
  78. ThreadPool.GetMaxThreads(out maxWorkerThreads, out maxportThreads);
  79. ThreadPool.GetAvailableThreads(out workerThreads, out portThreads);
  80. Thread.Sleep(1000);
  81. Trace.WriteLine("正在执行任务的线程数" + (maxWorkerThreads - workerThreads));
  82. if (maxWorkerThreads - workerThreads == 0)
  83. {
  84. Console.WriteLine("Thread Finished!");
  85. break;
  86. }
  87. }
  88. try
  89. {
  90. if (teamList != null)
  91. {
  92. foreach (var item in teamList)
  93. {
  94. if (allTeamList.Where(o => o.Name == item.Name).Count() == 0)
  95. {
  96. services.AddTeam(FCSLottery.F_Team, item);
  97. }
  98. }
  99. }
  100. if (qhyyteamList != null)
  101. {
  102. foreach (var item in qhyyteamList)
  103. {
  104. if (allTeamList.Where(o => o.Name == item.Name).Count() == 0)
  105. {
  106. services.AddTeam(FCSLottery.F_Team, item);
  107. }
  108. }
  109. // TODO球会友谊球队解析
  110. }
  111. }
  112. catch (Exception ex)
  113. {
  114. throw;
  115. }
  116. }
  117. }
  118. /// <summary>
  119. /// 抓取每个赛事下面的球队信息
  120. /// </summary>
  121. /// <param name="url"></param>
  122. /// <param name="eventId"></param>
  123. /// <param name="eventName"></param>
  124. public bool GetEventHtml(string url, string eventId, string eventName, bool IsQHYY)
  125. {
  126. if (url.Contains("wwaattssuunn"))
  127. {
  128. return false;
  129. }
  130. var mainUrl = url;
  131. Task.Run(() =>
  132. {
  133. HtmlDocument doc = CommonHelper.GetHtml(mainUrl, "足彩");
  134. //获取左边的列表
  135. var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='tongji_list']");
  136. if (TongJiListData != null)
  137. {
  138. foreach (var item in TongJiListData)
  139. {
  140. if (item.InnerText.Contains("球队列表") && item.InnerHtml != null)
  141. {
  142. //获取当前赛事的球队列表
  143. HtmlDocument teamHtml = new HtmlDocument();
  144. teamHtml.LoadHtml(item.InnerHtml);
  145. var teams = teamHtml.DocumentNode.SelectNodes("//ul/a");
  146. if (teams != null)
  147. {
  148. Task.Run(() =>
  149. {
  150. //球会友谊这个赛事里面的球队太多,单独提取,先读取出数据,后面插入到数据库再进行解析球队
  151. if (IsQHYY)
  152. {
  153. foreach (var teamitem in teams)
  154. {
  155. int index = teams.IndexOf(item);
  156. if (teamitem.InnerHtml != null && teamitem.InnerHtml != "")
  157. {
  158. var teamhref = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  159. var teamName = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")).Value;
  160. teamhref = teamhref.Replace("-", "-");
  161. var listcont = qhyyteamList.ToList().Where(o => o.Name.Trim() == teamName.Trim()).Count();
  162. if (listcont == 0)
  163. {
  164. F_Team f_Team1 = new F_Team();
  165. f_Team1.Id = Guid.NewGuid().ToString();
  166. f_Team1.Name = teamName;
  167. f_Team1.EventId = eventId;
  168. f_Team1.Remark = teamhref;
  169. qhyyteamList.Add(f_Team1);
  170. }
  171. }
  172. }
  173. }
  174. else
  175. {
  176. foreach (var teamitem in teams)
  177. {
  178. int index = teams.IndexOf(item);
  179. if (teamitem.InnerHtml != null && teamitem.InnerHtml != "")
  180. {
  181. var teamhref = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  182. var teamName = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")).Value;
  183. teamhref = teamhref.Replace("-", "-");
  184. var listcont = teamList.ToList().Where(o => o.Name.Trim() == teamName.Trim()).Count();
  185. if (listcont == 0)
  186. {
  187. F_Team f_Team1 = new F_Team();
  188. f_Team1.Id = Guid.NewGuid().ToString();
  189. f_Team1.Name = teamName;
  190. f_Team1.EventId = eventId;
  191. f_Team1.Remark = url;
  192. teamList.Add(f_Team1);
  193. }
  194. if (teamhref != "")
  195. {
  196. GetTeamHtml(teamhref, teamName, eventId);
  197. }
  198. }
  199. }
  200. }
  201. });
  202. }
  203. }
  204. }
  205. }
  206. });
  207. return true;
  208. }
  209. /// <summary>
  210. /// 读取球队的详细信息页
  211. /// </summary>
  212. /// <param name="url"></param>
  213. /// <param name="TeamName"></param>
  214. public bool GetTeamHtml(string url, string TeamName, string eventId)
  215. {
  216. var list = teamList.ToList().Where(o => o.Name== TeamName);
  217. if (list == null)
  218. {
  219. return false;
  220. }
  221. var listcont = list.Count();
  222. HtmlDocument doc = CommonHelper.GetHtml(url, "足彩");
  223. var count = 0;
  224. while (doc.DocumentNode.SelectNodes("//*[@class='introduceDiv']") == null && count < 3)
  225. {
  226. count++;
  227. doc = CommonHelper.GetHtml(url, "足彩");
  228. }
  229. if (doc == null || doc.DocumentNode.InnerText.Contains("球队还没有资料"))
  230. {
  231. F_Team f_Team1 = new F_Team();
  232. f_Team1.Id = Guid.NewGuid().ToString();
  233. f_Team1.Name = TeamName;
  234. teamList.Add(f_Team1);
  235. return false;
  236. }
  237. //球队图片
  238. var teamImg = doc.DocumentNode.SelectNodes("//*[@class='star_dl']/dt/img");
  239. //球队介绍
  240. var teamIntroData = doc.DocumentNode.SelectNodes("//*[@class='introduceDiv']").FirstOrDefault().InnerHtml.Trim();
  241. //球队的基本信息
  242. var teamData = doc.DocumentNode.SelectNodes("//*[@class='star_dl']/dd").ToList();
  243. F_Team f_Team = new F_Team();
  244. f_Team.Id = Guid.NewGuid().ToString();
  245. f_Team.Name = TeamName;
  246. f_Team.EventId = eventId;
  247. f_Team.Describe = teamIntroData;
  248. f_Team.Remark = url;
  249. f_Team.LogoImage = teamImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
  250. if (teamData != null)
  251. {
  252. var contry = teamData.Where(o => o.InnerText.Contains("国家")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  253. f_Team.Address = contry.Count() > 1 ? contry[1].Trim() : "";
  254. var setUpTime = teamData.Where(o => o.InnerText.Contains("球队成立")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  255. f_Team.SetUpDateTime = setUpTime[1];// (setUpTime.Count() > 1 && setUpTime[1] != "") ? DateTime.Parse(realTime(setUpTime[1])) : DateTime.Parse("1500-01-01");
  256. var coath = teamData.Where(o => o.InnerText.Contains("教练")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  257. f_Team.Coach = coath.Count() > 1 ? coath[1].Trim() : "";
  258. var city = teamData.Where(o => o.InnerText.Contains("城市")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  259. f_Team.CityName = city.Count() > 1 ? city[1].Trim() : "";
  260. var vence = teamData.Where(o => o.InnerText.Contains("球场")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  261. f_Team.Venue = vence.Count() > 1 ? vence[1].Trim() : "";
  262. var website = teamData.Where(o => o.InnerText.Contains("官网")).FirstOrDefault().InnerHtml;
  263. HtmlDocument websitedoc = new HtmlDocument();
  264. websitedoc.LoadHtml(website);
  265. var webData = websitedoc.DocumentNode.SelectNodes("//var/a");
  266. f_Team.Website = webData.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  267. }
  268. teamList.Add(f_Team);
  269. return true;
  270. }
  271. public string realTime(string time)
  272. {
  273. time = time.Replace(":", "-").Replace(":", "-");
  274. DateTime outvalue = new DateTime();
  275. if (DateTime.TryParse(time, out outvalue))
  276. {
  277. return time;
  278. }
  279. var splittime = time.Split('-');
  280. if (splittime.Count() == 1)
  281. {
  282. return time + "-01-01";
  283. }
  284. if (splittime.Count() == 2)
  285. {
  286. return time + "-01";
  287. }
  288. return "1500-01-01";
  289. }
  290. #region SQL语句
  291. private static string GetAllEventUrl = @"select Id,Name, Remark from F_Events where Remark is not null ";
  292. #endregion
  293. }
  294. }