FootBallTeamsJob.cs 15 KB


  1. using FCS.Common;
  2. using FCS.Interface;
  3. using FCS.Models;
  4. using HtmlAgilityPack;
  5. using Quartz;
  6. using System;
  7. using System.Collections.Generic;
  8. using System.Data;
  9. using System.Diagnostics;
  10. using System.Linq;
  11. using System.Threading;
  12. using System.Threading.Tasks;
  13. namespace FCS.Crawler.ZCLotteryTeam
  14. {
  15. /// <summary>
  16. /// 抓取球队信息
  17. /// </summary>
  18. public class FootBallTeamsJob : CommonJob, IJob
  19. {
  20. private List<F_Team> teamList = new List<F_Team>();
  21. private List<F_Team> qhyyteamList = new List<F_Team>();
  22. private List<F_Team> allTeamList = new List<F_Team>();
  23. F_Team g;
  24. public FootBallTeamsJob()
  25. {
  26. log = new LogHelper();
  27. services = IOC.Resolve<IDTOpenCode>();
  28. g = new F_Team();
  29. }
  30. public void Execute(IJobExecutionContext context)
  31. {
  32. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  33. GetAll();
  34. }
  35. public void GetAll()
  36. {
  37. CommonHelper.LogBD(typeof(FootBallTeamsJob), () =>
  38. {
  39. allTeamList = services.GetTeamList();
  40. var ds = services.Query<F_Events>("and Remark is not null").ToList();
  41. if (ds != null && ds.Count > 0)
  42. {
  43. //int max = ds.Count;
  44. //int num = 0;
  45. ds.ForEach(p =>
  46. {
  47. var url = p.Remark.ToString();
  48. var eventId = p.Id.ToString();
  49. var eventName = p.Name.ToString();
  50. Task.Run(() =>
  51. {
  52. var mainUrl = "http://saishi.zgzcw.com" + url;
  53. HtmlDocument doc = CommonHelper.GetHtml(mainUrl, new Dictionary<string, string>(), "足彩", "", 10000, 100);
  54. var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='select_options']/a");
  55. //max += TongJiListData.Count();
  56. //循环赛季
  57. if (TongJiListData != null && !doc.DocumentNode.InnerText.Contains("球会友谊"))
  58. {
  59. Task.Run(() =>
  60. {
  61. foreach (var item in TongJiListData)
  62. {
  63. var sjurl = item.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  64. sjurl = sjurl.Replace("-", "-");
  65. GetEventHtml(sjurl, eventId, eventName, doc.DocumentNode.InnerText.Contains("球会友谊"));
  66. }
  67. });
  68. }
  69. });
  70. //lock (g)
  71. //{
  72. // num++;
  73. // Monitor.Pulse(g); //完成,通知等待队列,告知已完,执行下一个。
  74. //}
  75. });
  76. //lock (g)
  77. //{
  78. // while (num < max)
  79. // {
  80. // Monitor.Wait(g);//等待
  81. // }
  82. //}
  83. //Trace.WriteLine("获取球队 完结");
  84. while (true)
  85. {
  86. if (CommonHelper.ThreadsFinsh())
  87. break;
  88. }
  89. try
  90. {
  91. if (teamList != null)
  92. {
  93. foreach (var item in teamList)
  94. {
  95. if (allTeamList.Where(o => o.Name == item.Name).Count() == 0)
  96. {
  97. services.AddTeam(FCSLottery.F_Team, item);
  98. }
  99. }
  100. }
  101. if (qhyyteamList != null)
  102. {
  103. foreach (var item in qhyyteamList)
  104. {
  105. if (allTeamList.Where(o => o.Name == item.Name).Count() == 0)
  106. {
  107. services.AddTeam(FCSLottery.F_Team, item);
  108. }
  109. }
  110. // TODO球会友谊球队解析
  111. }
  112. }
  113. catch (Exception ex)
  114. {
  115. }
  116. }
  117. });
  118. }
  119. /// <summary>
  120. /// 抓取每个赛事下面的球队信息
  121. /// </summary>
  122. /// <param name="url"></param>
  123. /// <param name="eventId"></param>
  124. /// <param name="eventName"></param>
  125. public bool GetEventHtml(string url, string eventId, string eventName, bool IsQHYY)
  126. {
  127. if (url.Contains("wwaattssuunn"))
  128. {
  129. return false;
  130. }
  131. var mainUrl = url;
  132. HtmlDocument doc = CommonHelper.GetHtml(mainUrl, new Dictionary<string, string>(), "足彩", "", 10000, 100);
  133. //获取左边的列表
  134. var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='tongji_list']");
  135. if (TongJiListData != null)
  136. {
  137. foreach (var item in TongJiListData)
  138. {
  139. if (item.InnerText.Contains("球队列表") && item.InnerHtml != null)
  140. {
  141. //获取当前赛事的球队列表
  142. HtmlDocument teamHtml = new HtmlDocument();
  143. teamHtml.LoadHtml(item.InnerHtml);
  144. var teams = teamHtml.DocumentNode.SelectNodes("//ul/a");
  145. if (teams != null)
  146. {
  147. //球会友谊这个赛事里面的球队太多,单独提取,先读取出数据,后面插入到数据库再进行解析球队
  148. if (IsQHYY)
  149. {
  150. foreach (var teamitem in teams)
  151. {
  152. int index = teams.IndexOf(item);
  153. if (teamitem.InnerHtml != null && teamitem.InnerHtml != "")
  154. {
  155. var teamhref = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  156. var teamName = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")).Value;
  157. teamhref = teamhref.Replace("-", "-");
  158. var listcont = qhyyteamList.ToList().Where(o => o.Name.Trim() == teamName.Trim()).Count();
  159. if (listcont == 0)
  160. {
  161. F_Team f_Team1 = new F_Team();
  162. f_Team1.Id = Guid.NewGuid().ToString();
  163. f_Team1.Name = teamName;
  164. f_Team1.EventId = eventId;
  165. f_Team1.Remark = teamhref;
  166. qhyyteamList.Add(f_Team1);
  167. }
  168. }
  169. }
  170. }
  171. else
  172. {
  173. foreach (var teamitem in teams)
  174. {
  175. int index = teams.IndexOf(item);
  176. if (teamitem.InnerHtml != null && teamitem.InnerHtml != "")
  177. {
  178. var teamhref = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  179. var teamName = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")).Value;
  180. teamhref = teamhref.Replace("-", "-");
  181. lock (teamList)
  182. {
  183. try
  184. {
  185. var listcont = teamList.ToList().Where(o => o.Name.Trim() == teamName.Trim()).Count();
  186. if (listcont == 0)
  187. {
  188. F_Team f_Team1 = new F_Team();
  189. f_Team1.Id = Guid.NewGuid().ToString();
  190. f_Team1.Name = teamName;
  191. f_Team1.EventId = eventId;
  192. f_Team1.Remark = url;
  193. teamList.Add(f_Team1);
  194. }
  195. }
  196. catch (Exception ex) { continue; }
  197. }
  198. if (teamhref != "")
  199. {
  200. GetTeamHtml(teamhref, teamName, eventId);
  201. }
  202. }
  203. }
  204. }
  205. }
  206. }
  207. }
  208. }
  209. return true;
  210. }
  211. /// <summary>
  212. /// 读取球队的详细信息页
  213. /// </summary>
  214. /// <param name="url"></param>
  215. /// <param name="TeamName"></param>
  216. public bool GetTeamHtml(string url, string TeamName, string eventId)
  217. {
  218. lock (teamList)
  219. {
  220. var listcont = teamList.Where(o => o.Name == TeamName).Count();
  221. if (listcont > 0)
  222. {
  223. return false;
  224. }
  225. }
  226. HtmlDocument doc = CommonHelper.GetHtml(url, new Dictionary<string, string>(), "足彩", "", 10000, 100);
  227. var count = 0;
  228. while (doc.DocumentNode.SelectNodes("//*[@class='introduceDiv']") == null && count < 3)
  229. {
  230. count++;
  231. doc = CommonHelper.GetHtml(url, new Dictionary<string, string>(), "足彩", "", 10000, 100);
  232. }
  233. if (doc == null || doc.DocumentNode.InnerText.Contains("球队还没有资料"))
  234. {
  235. F_Team f_Team1 = new F_Team();
  236. f_Team1.Id = Guid.NewGuid().ToString();
  237. f_Team1.Name = TeamName;
  238. teamList.Add(f_Team1);
  239. return false;
  240. }
  241. //球队图片
  242. var teamImg = doc.DocumentNode.SelectNodes("//*[@class='star_dl']/dt/img");
  243. //球队介绍
  244. var teamIntroData = doc.DocumentNode.SelectNodes("//*[@class='introduceDiv']").FirstOrDefault().InnerHtml.Trim();
  245. //球队的基本信息
  246. var teamData = doc.DocumentNode.SelectNodes("//*[@class='star_dl']/dd").ToList();
  247. F_Team f_Team = new F_Team();
  248. f_Team.Id = Guid.NewGuid().ToString();
  249. f_Team.Name = TeamName;
  250. f_Team.EventId = eventId;
  251. f_Team.Describe = teamIntroData;
  252. f_Team.Remark = url;
  253. f_Team.LogoImage = teamImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
  254. if (teamData != null)
  255. {
  256. var contry = teamData.Where(o => o.InnerText.Contains("国家")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  257. f_Team.Address = contry.Count() > 1 ? contry[1].Trim() : "";
  258. var setUpTime = teamData.Where(o => o.InnerText.Contains("球队成立")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  259. f_Team.SetUpDateTime = setUpTime[1];// (setUpTime.Count() > 1 && setUpTime[1] != "") ? DateTime.Parse(realTime(setUpTime[1])) : DateTime.Parse("1500-01-01");
  260. var coath = teamData.Where(o => o.InnerText.Contains("教练")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  261. f_Team.Coach = coath.Count() > 1 ? coath[1].Trim() : "";
  262. var city = teamData.Where(o => o.InnerText.Contains("城市")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  263. f_Team.CityName = city.Count() > 1 ? city[1].Trim() : "";
  264. var vence = teamData.Where(o => o.InnerText.Contains("球场")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  265. f_Team.Venue = vence.Count() > 1 ? vence[1].Trim() : "";
  266. var website = teamData.Where(o => o.InnerText.Contains("官网")).FirstOrDefault().InnerHtml;
  267. HtmlDocument websitedoc = new HtmlDocument();
  268. websitedoc.LoadHtml(website);
  269. var webData = websitedoc.DocumentNode.SelectNodes("//var/a");
  270. f_Team.Website = webData.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  271. }
  272. teamList.Add(f_Team);
  273. return true;
  274. }
  275. public string realTime(string time)
  276. {
  277. time = time.Replace(":", "-").Replace(":", "-");
  278. DateTime outvalue = new DateTime();
  279. if (DateTime.TryParse(time, out outvalue))
  280. {
  281. return time;
  282. }
  283. var splittime = time.Split('-');
  284. if (splittime.Count() == 1)
  285. {
  286. return time + "-01-01";
  287. }
  288. if (splittime.Count() == 2)
  289. {
  290. return time + "-01";
  291. }
  292. return "1500-01-01";
  293. }
  294. #region SQL语句
  295. private static string GetAllEventUrl = @"select Id,Name, Remark from F_Events where Remark is not null ";
  296. #endregion
  297. }
  298. }