FootBallTeamsJob.cs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. using FCS.Common;
  2. using FCS.Interface;
  3. using FCS.Models;
  4. using HtmlAgilityPack;
  5. using Quartz;
  6. using System;
  7. using System.Collections.Generic;
  8. using System.Data;
  9. using System.Diagnostics;
  10. using System.Linq;
  11. using System.Threading;
  12. using System.Threading.Tasks;
  13. namespace FCS.Crawler.ZCLotteryTeam
  14. {
  15. /// <summary>
  16. /// 抓取球队信息
  17. /// </summary>
  18. public class FootBallTeamsJob : CommonJob, IJob
  19. {
  20. private static List<F_Team> teamList = new List<F_Team>();
  21. private static List<F_Team> qhyyteamList = new List<F_Team>();
  22. private static List<F_Team> allTeamList = new List<F_Team>();
  23. F_Team g;
  24. public FootBallTeamsJob()
  25. {
  26. log = new LogHelper();
  27. services = IOC.Resolve<IDTOpenCode>();
  28. g = new F_Team();
  29. }
  30. public void Execute(IJobExecutionContext context)
  31. {
  32. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  33. GetAll();
  34. }
  35. public void GetAll()
  36. {
  37. allTeamList = services.GetTeamList();
  38. var ds = services.Query<F_Events>("and Remark is not null").ToList();
  39. if (ds != null && ds.Count > 0)
  40. {
  41. //int max = ds.Count;
  42. //int num = 0;
  43. ds.ForEach(p =>
  44. {
  45. var url = p.Remark.ToString();
  46. var eventId = p.Id.ToString();
  47. var eventName = p.Name.ToString();
  48. Task.Run(() =>
  49. {
  50. var mainUrl = "http://saishi.zgzcw.com" + url;
  51. HtmlDocument doc = CommonHelper.GetHtml(mainUrl, new Dictionary<string, string>(), "足彩", "", 10000, 100);
  52. var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='select_options']/a");
  53. //max += TongJiListData.Count();
  54. //循环赛季
  55. if (TongJiListData != null && !doc.DocumentNode.InnerText.Contains("球会友谊"))
  56. {
  57. Task.Run(() =>
  58. {
  59. foreach (var item in TongJiListData)
  60. {
  61. var sjurl = item.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  62. sjurl = sjurl.Replace("-", "-");
  63. GetEventHtml(sjurl, eventId, eventName, doc.DocumentNode.InnerText.Contains("球会友谊"));
  64. }
  65. });
  66. }
  67. });
  68. //lock (g)
  69. //{
  70. // num++;
  71. // Monitor.Pulse(g); //完成,通知等待队列,告知已完,执行下一个。
  72. //}
  73. });
  74. //lock (g)
  75. //{
  76. // while (num < max)
  77. // {
  78. // Monitor.Wait(g);//等待
  79. // }
  80. //}
  81. //Trace.WriteLine("获取球队 完结");
  82. while (true)
  83. {
  84. if (CommonHelper.ThreadsFinsh())
  85. break;
  86. }
  87. try
  88. {
  89. if (teamList != null)
  90. {
  91. foreach (var item in teamList)
  92. {
  93. if (allTeamList.Where(o => o.Name == item.Name).Count() == 0)
  94. {
  95. services.AddTeam(FCSLottery.F_Team, item);
  96. }
  97. }
  98. }
  99. if (qhyyteamList != null)
  100. {
  101. foreach (var item in qhyyteamList)
  102. {
  103. if (allTeamList.Where(o => o.Name == item.Name).Count() == 0)
  104. {
  105. services.AddTeam(FCSLottery.F_Team, item);
  106. }
  107. }
  108. // TODO球会友谊球队解析
  109. }
  110. }
  111. catch (Exception ex)
  112. {
  113. throw;
  114. }
  115. }
  116. }
  117. /// <summary>
  118. /// 抓取每个赛事下面的球队信息
  119. /// </summary>
  120. /// <param name="url"></param>
  121. /// <param name="eventId"></param>
  122. /// <param name="eventName"></param>
  123. public bool GetEventHtml(string url, string eventId, string eventName, bool IsQHYY)
  124. {
  125. if (url.Contains("wwaattssuunn"))
  126. {
  127. return false;
  128. }
  129. var mainUrl = url;
  130. HtmlDocument doc = CommonHelper.GetHtml(mainUrl, new Dictionary<string, string>(), "足彩", "", 10000, 100);
  131. //获取左边的列表
  132. var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='tongji_list']");
  133. if (TongJiListData != null)
  134. {
  135. foreach (var item in TongJiListData)
  136. {
  137. if (item.InnerText.Contains("球队列表") && item.InnerHtml != null)
  138. {
  139. //获取当前赛事的球队列表
  140. HtmlDocument teamHtml = new HtmlDocument();
  141. teamHtml.LoadHtml(item.InnerHtml);
  142. var teams = teamHtml.DocumentNode.SelectNodes("//ul/a");
  143. if (teams != null)
  144. {
  145. //球会友谊这个赛事里面的球队太多,单独提取,先读取出数据,后面插入到数据库再进行解析球队
  146. if (IsQHYY)
  147. {
  148. foreach (var teamitem in teams)
  149. {
  150. int index = teams.IndexOf(item);
  151. if (teamitem.InnerHtml != null && teamitem.InnerHtml != "")
  152. {
  153. var teamhref = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  154. var teamName = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")).Value;
  155. teamhref = teamhref.Replace("-", "-");
  156. var listcont = qhyyteamList.ToList().Where(o => o.Name.Trim() == teamName.Trim()).Count();
  157. if (listcont == 0)
  158. {
  159. F_Team f_Team1 = new F_Team();
  160. f_Team1.Id = Guid.NewGuid().ToString();
  161. f_Team1.Name = teamName;
  162. f_Team1.EventId = eventId;
  163. f_Team1.Remark = teamhref;
  164. qhyyteamList.Add(f_Team1);
  165. }
  166. }
  167. }
  168. }
  169. else
  170. {
  171. foreach (var teamitem in teams)
  172. {
  173. int index = teams.IndexOf(item);
  174. if (teamitem.InnerHtml != null && teamitem.InnerHtml != "")
  175. {
  176. var teamhref = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  177. var teamName = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")).Value;
  178. teamhref = teamhref.Replace("-", "-");
  179. lock (teamList)
  180. {
  181. try
  182. {
  183. var listcont = teamList.ToList().Where(o => o.Name.Trim() == teamName.Trim()).Count();
  184. if (listcont == 0)
  185. {
  186. F_Team f_Team1 = new F_Team();
  187. f_Team1.Id = Guid.NewGuid().ToString();
  188. f_Team1.Name = teamName;
  189. f_Team1.EventId = eventId;
  190. f_Team1.Remark = url;
  191. teamList.Add(f_Team1);
  192. }
  193. }
  194. catch (Exception ex) { continue; }
  195. }
  196. if (teamhref != "")
  197. {
  198. GetTeamHtml(teamhref, teamName, eventId);
  199. }
  200. }
  201. }
  202. }
  203. }
  204. }
  205. }
  206. }
  207. return true;
  208. }
  209. /// <summary>
  210. /// 读取球队的详细信息页
  211. /// </summary>
  212. /// <param name="url"></param>
  213. /// <param name="TeamName"></param>
  214. public bool GetTeamHtml(string url, string TeamName, string eventId)
  215. {
  216. lock (teamList)
  217. {
  218. var listcont = teamList.Where(o => o.Name == TeamName).Count();
  219. if (listcont > 0)
  220. {
  221. return false;
  222. }
  223. }
  224. HtmlDocument doc = CommonHelper.GetHtml(url, new Dictionary<string, string>(), "足彩", "", 10000, 100);
  225. var count = 0;
  226. while (doc.DocumentNode.SelectNodes("//*[@class='introduceDiv']") == null && count < 3)
  227. {
  228. count++;
  229. doc = CommonHelper.GetHtml(url, new Dictionary<string, string>(), "足彩", "", 10000, 100);
  230. }
  231. if (doc == null || doc.DocumentNode.InnerText.Contains("球队还没有资料"))
  232. {
  233. F_Team f_Team1 = new F_Team();
  234. f_Team1.Id = Guid.NewGuid().ToString();
  235. f_Team1.Name = TeamName;
  236. teamList.Add(f_Team1);
  237. return false;
  238. }
  239. //球队图片
  240. var teamImg = doc.DocumentNode.SelectNodes("//*[@class='star_dl']/dt/img");
  241. //球队介绍
  242. var teamIntroData = doc.DocumentNode.SelectNodes("//*[@class='introduceDiv']").FirstOrDefault().InnerHtml.Trim();
  243. //球队的基本信息
  244. var teamData = doc.DocumentNode.SelectNodes("//*[@class='star_dl']/dd").ToList();
  245. F_Team f_Team = new F_Team();
  246. f_Team.Id = Guid.NewGuid().ToString();
  247. f_Team.Name = TeamName;
  248. f_Team.EventId = eventId;
  249. f_Team.Describe = teamIntroData;
  250. f_Team.Remark = url;
  251. f_Team.LogoImage = teamImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
  252. if (teamData != null)
  253. {
  254. var contry = teamData.Where(o => o.InnerText.Contains("国家")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  255. f_Team.Address = contry.Count() > 1 ? contry[1].Trim() : "";
  256. var setUpTime = teamData.Where(o => o.InnerText.Contains("球队成立")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  257. f_Team.SetUpDateTime = setUpTime[1];// (setUpTime.Count() > 1 && setUpTime[1] != "") ? DateTime.Parse(realTime(setUpTime[1])) : DateTime.Parse("1500-01-01");
  258. var coath = teamData.Where(o => o.InnerText.Contains("教练")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  259. f_Team.Coach = coath.Count() > 1 ? coath[1].Trim() : "";
  260. var city = teamData.Where(o => o.InnerText.Contains("城市")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  261. f_Team.CityName = city.Count() > 1 ? city[1].Trim() : "";
  262. var vence = teamData.Where(o => o.InnerText.Contains("球场")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
  263. f_Team.Venue = vence.Count() > 1 ? vence[1].Trim() : "";
  264. var website = teamData.Where(o => o.InnerText.Contains("官网")).FirstOrDefault().InnerHtml;
  265. HtmlDocument websitedoc = new HtmlDocument();
  266. websitedoc.LoadHtml(website);
  267. var webData = websitedoc.DocumentNode.SelectNodes("//var/a");
  268. f_Team.Website = webData.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
  269. }
  270. teamList.Add(f_Team);
  271. return true;
  272. }
  273. public string realTime(string time)
  274. {
  275. time = time.Replace(":", "-").Replace(":", "-");
  276. DateTime outvalue = new DateTime();
  277. if (DateTime.TryParse(time, out outvalue))
  278. {
  279. return time;
  280. }
  281. var splittime = time.Split('-');
  282. if (splittime.Count() == 1)
  283. {
  284. return time + "-01-01";
  285. }
  286. if (splittime.Count() == 2)
  287. {
  288. return time + "-01";
  289. }
  290. return "1500-01-01";
  291. }
  292. #region SQL语句
  293. private static string GetAllEventUrl = @"select Id,Name, Remark from F_Events where Remark is not null ";
  294. #endregion
  295. }
  296. }