B_GroupingJob.cs 9.7 KB


  1. using System;
  2. using System.Collections.Generic;
  3. using System.Configuration;
  4. using System.Linq;
  5. using System.Text;
  6. using System.Threading.Tasks;
  7. using FCS.Crawler.ZCLotteryMatchs;
  8. using FCS.Interface;
  9. using FCS.Common;
  10. using FCS.Models.Entity;
  11. using HtmlAgilityPack;
  12. using System.Threading;
  13. using Newtonsoft.Json;
  14. using System.Net;
  15. using System.Text.RegularExpressions;
  16. namespace FCS.Crawler.Basketball
  17. {
  18. public class B_GroupingJob : CommonJob
  19. {
  20. private string eventId = "";
  21. public void Click()
  22. {
  23. Click_NBA();
  24. }
  25. #region NBA
  26. public void Click_NBA()
  27. {
  28. var result = new List<B_Grouping>();
  29. var url = "http://lanqiu.zgzcw.com/1/{0}/saiAll.do".FormatMe(
  30. DateTime.Now.AddYears(-1).Year.ToString().Substring(2) + "-" + DateTime.Now.Year.ToString().Substring(2));
  31. eventId = new B_EventsJob().GetEventList().Where(p => p.Name == ConfigurationManager.AppSettings["NBAEventName"]).ToList()[0].Id;
  32. var doc = CommonHelper.GetHtml(url, "足彩");//加载主页面
  33. var season = GetNBASeason(doc.DocumentNode).ToList();
  34. foreach (var model in season)
  35. {
  36. taskList.Add(Task.Run(() =>
  37. {
  38. if (model.Remark != url)
  39. doc = CommonHelper.GetHtml(model.Remark, "足彩");//加载主页面
  40. var parentGroup = doc.DocumentNode.SelectSingleNode(".//div[@attr='saiAll']").SelectNodes("./span");
  41. foreach (var item in parentGroup)
  42. {
  43. var childId = item.Attributes["Id"].Value.Replace("Title", "") + "s";
  44. var childGroup_Div = doc.DocumentNode.SelectSingleNode(".//div[@id='{0}']".FormatMe(childId)).SelectSingleNode(".//div[@class='xq']");
  45. var id = CommonHelper.GetGuid().ToString();
  46. result.Add(GetModel(new B_Grouping
  47. {
  48. Id = id,
  49. Name = item.SelectNodes("./a")[0].InnerText.Trim(),
  50. Sort = parentGroup.IndexOf(item) + 1,
  51. Remark = model.Remark,
  52. Season = model.Season,
  53. Extended1 = childGroup_Div == null ? "-1" : string.Empty,
  54. }));
  55. if (childGroup_Div != null)
  56. {
  57. var childGroup = childGroup_Div.SelectNodes(".//a");
  58. if (childGroup != null)
  59. {
  60. foreach (var _item in childGroup)
  61. {
  62. result.Add(GetModel(new B_Grouping
  63. {
  64. Name = _item.InnerText.Trim(),
  65. Sort = childGroup.IndexOf(_item) + 1,
  66. ParentId = id,
  67. Remark = model.Remark,
  68. Season = model.Season,
  69. Extended1 = _item.Attributes["attr"].Value
  70. }));
  71. }
  72. }
  73. }
  74. }
  75. }));
  76. }
  77. Task.WaitAll(taskList.ToArray());
  78. var data = new List<B_Grouping>();
  79. var groupList = services.Query<B_Grouping>(" AND EventId='{0}'".FormatMe(eventId));
  80. //新增的父分组、子分组
  81. var parentList = (from a in result
  82. where !(from b in groupList where string.IsNullOrEmpty(b.ParentId) && a.EventId == b.EventId select b.Name.Trim()).Contains(a.Name.Trim()) && string.IsNullOrEmpty(a.ParentId)
  83. select a).ToList();
  84. parentList.ForEach(p =>
  85. {
  86. data.Add(p);
  87. var childList = result.Where(q => q.ParentId == p.Id).ToList();
  88. childList.ForEach(q =>
  89. {
  90. data.Add(q);
  91. });
  92. });
  93. //新增的子分组
  94. groupList.Where(p => string.IsNullOrEmpty(p.ParentId)).ToList().ForEach(q =>
  95. {
  96. var oldList = groupList.Where(s => (s.ParentId.IsEmpty() ? string.Empty : s.ParentId.Trim()) == q.Id && s.EventId == q.EventId).ToList();//数据库子节点的数据
  97. var newParentId = result.Where(s => s.Name.Trim() == q.Name.Trim() && s.EventId == q.EventId && string.IsNullOrEmpty(s.ParentId)).ToList();//爬取数据子节点数据
  98. var newList = result.Where(s => s.ParentId == (newParentId.Count > 0 ? newParentId[0].Id : "0")).ToList();
  99. if (newList.Count > 0 && oldList.Count != newList.Count)
  100. {
  101. var list = (from a in newList
  102. where !(from b in oldList select b.Name.Trim()).Contains(a.Name.Trim())
  103. select a).ToList();
  104. list.ForEach(d =>
  105. {
  106. d.ParentId = q.Id;
  107. data.Add(d);
  108. });
  109. }
  110. });
  111. services.SqlBulkCopyAdd(data);
  112. }
  113. private B_Grouping GetModel(B_Grouping model)
  114. {
  115. if (model.Id.IsEmpty())
  116. model.Id = CommonHelper.GetGuid().ToString();
  117. model.CreateDateTime = DateTime.Now;
  118. model.EventId = eventId;
  119. return model;
  120. }
  121. /// <summary>
  122. /// 得到NBA的期数
  123. /// </summary>
  124. /// <param name="doc"></param>
  125. /// <returns></returns>
  126. private IEnumerable<B_Grouping> GetNBASeason(HtmlNode doc)
  127. {
  128. // var list = new List< B_Grouping >()
  129. var oldSeason = (from a in services.Query<B_Grouping>(" AND EventId='{0}'".FormatMe(eventId))
  130. group a by a.Season into g
  131. select g.Key).ToList();
  132. var season = doc.SelectSingleNode(".//select[@id='selectSeason']").SelectNodes(".//option");
  133. foreach (var item in season)
  134. {
  135. if (!oldSeason.Contains(item.Attributes["value"].Value) || item.Attributes["value"].Value.Contains(DateTime.Now.Year.ToString()))
  136. yield return new B_Grouping
  137. {
  138. Remark = "http://lanqiu.zgzcw.com/1/" + item.Attributes["value"].Value + "/saiAll.do",
  139. Season = item.Attributes["value"].Value
  140. };
  141. }
  142. }
  143. #endregion
  144. #region CBA
  145. public void Click_CBA()
  146. {
  147. //http://nba.nowscore.com/jsData/matchResult/17-18/l5_3.js?version=2018111310 季前赛
  148. //http://nba.nowscore.com/jsData/matchResult/17-18/l5_1_2018_2.js?version=2018111310 常规赛
  149. //
  150. //http://nba.nowscore.com/jsData/matchResult/17-18/l5_2.js?version=2018111310 季后赛
  151. var result = new List<B_Grouping>();
  152. eventId = new B_EventsJob().GetEventList().Where(p => p.Name == ConfigurationManager.AppSettings["CBAEventName"]).ToList()[0].Id;
  153. var seasonList = GetCBASeason().OrderByDescending(p=>p.Season).ToList();
  154. seasonList.ForEach(p =>
  155. {
  156. //taskList.Add(Task.Run(() =>
  157. //{
  158. var url = $"http://nba.nowscore.com/cn/Playoffs.aspx?SclassID=5&matchSeason={p.Season}";
  159. if(seasonList.IndexOf(p)==0)
  160. url = $"http://nba.nowscore.com/cn/Normal.aspx?SclassID=5&matchSeason={p.Season}";
  161. var doc = CommonHelper.GetHtmlHtmlDocument(new Models.DTO.HtmlParameterDTO { Url = url, IsWebClient = true });
  162. var scripts = doc.DocumentNode.SelectNodes(".//script");
  163. foreach (var item in scripts)
  164. {
  165. if (!item.Attributes["src"].IsEmpty() && item.Attributes["src"].Value.Contains("jsData") && item.Attributes["src"].Value.Contains(p.Season.Replace("20", "")))
  166. {
  167. url = item.Attributes["src"].Value;
  168. break;
  169. }
  170. }
  171. url = "http://nba.nowscore.com" + url;
  172. var html = CommonHelper.GetHtmlString(new Models.DTO.HtmlParameterDTO { Url = url, IsWebClient = true });
  173. var aa = 1;
  174. // }));
  175. });
  176. }
  177. /// <summary>
  178. /// 得到NBA的期数
  179. /// </summary>
  180. /// <param name="doc"></param>
  181. /// <returns></returns>
  182. private IEnumerable<B_Grouping> GetCBASeason()
  183. {
  184. var oldSeason = (from a in services.Query<B_Grouping>(" AND EventId='{0}'".FormatMe(eventId))
  185. group a by a.Season into g
  186. select g.Key).ToList();
  187. var url = "http://nba.nowscore.com/jsData/LeagueSeason/sea5.js";
  188. var html = CommonHelper.GetHtmlString(new Models.DTO.HtmlParameterDTO { Url = url, IsWebClient = true });
  189. var data = JsonConvert.DeserializeObject<string[][]>(html.Split('=')[1].Replace(";", ""));
  190. foreach (var item in data)
  191. {
  192. if (!oldSeason.Contains(item[0]) || item[0].Contains(DateTime.Now.Year.ToString()))
  193. yield return new B_Grouping
  194. {
  195. Season = item[0].ToString(),
  196. Remark = item[1].ToString(),
  197. };
  198. }
  199. }
  200. #endregion
  201. }
  202. }