KL123SkillJob.cs 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using HtmlAgilityPack;
  6. using Quartz;
  7. using SCC.Common;
  8. using SCC.Interface;
  9. using SCC.Models;
  10. namespace SCC.Crawler.LotterySkill
  11. {
  12. public class KL123SkillJob : IJob
  13. {
  14. /// <summary>
  15. /// 构造函数
  16. /// </summary>
  17. public KL123SkillJob()
  18. {
  19. log = new LogHelper();
  20. services = IOC.Resolve<IDTOpenCode>();
  21. email = IOC.Resolve<IEmail>();
  22. }
  23. /// <summary>
  24. /// 执行入口
  25. /// </summary>
  26. /// <param name="context"></param>
  27. public void Execute(IJobExecutionContext context)
  28. {
  29. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  30. DoMainUrl();
  31. }
  32. /// <summary>
  33. /// 执行主站技巧
  34. /// </summary>
  35. private void DoMainUrl()
  36. {
  37. List<string> urls = GetMainUrl(Config);
  38. #pragma warning disable CS0219 // 变量“lotterySkill”已被赋值,但从未使用过它的值
  39. LotterySkillModel lotterySkill = null;
  40. #pragma warning restore CS0219 // 变量“lotterySkill”已被赋值,但从未使用过它的值
  41. foreach (string url in urls)
  42. {
  43. List<LotterySkillModel> res = GetOpenListFromMainUrl(url);
  44. foreach (var lotterySkillModel in res)
  45. {
  46. if (services.LotterySkillModel(currentLottery, lotterySkillModel))
  47. {
  48. //Do Success Log
  49. log.Info(GetType(), CommonHelper.GetJobMainLogInfo(Config, lotterySkillModel.Title));
  50. isGetData = true;
  51. }
  52. }
  53. }
  54. }
  55. /// <summary>
  56. /// 组装主站爬取地址
  57. /// </summary>
  58. /// <param name="config"></param>
  59. /// <returns></returns>
  60. private List<string> GetMainUrl(SCCConfig config)
  61. {
  62. List<string> urlList = new List<string>();
  63. string url = config.MainUrl;
  64. int pages = config.MainUrlPages > 0 ? config.MainUrlPages : 1;
  65. for (int i = 1; i <= pages; i++)
  66. {
  67. string res = string.Format(url, i);
  68. if (!urlList.Contains(res))
  69. {
  70. urlList.Add(res);
  71. }
  72. }
  73. return urlList;
  74. }
  75. /// <summary>
  76. /// 爬取主站技巧列表
  77. /// </summary>
  78. /// <param name="mainUrl"></param>
  79. /// <returns></returns>
  80. private List<LotterySkillModel> GetOpenListFromMainUrl(string mainUrl)
  81. {
  82. var result = new List<LotterySkillModel>();
  83. try
  84. {
  85. var url = new Uri(mainUrl);
  86. var htmlResource = NetHelper.GetUrlResponse(mainUrl, Encoding.GetEncoding("utf-8"));
  87. if (htmlResource == null) return result;
  88. HtmlDocument doc = new HtmlDocument();
  89. doc.LoadHtml(htmlResource);
  90. //获取li下面所有a标签
  91. HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes("//*[@class='art-list']/ul/li/a");
  92. if (nodeList == null) return result;
  93. List<string> urls = new List<string>();
  94. //遍历a标签
  95. foreach (HtmlNode node in nodeList)
  96. {
  97. HtmlAttribute attr = node.Attributes.SingleOrDefault(a => a.Name.Equals("href"));
  98. if (attr != null)
  99. {
  100. string href = Host + attr.Value;
  101. //去重
  102. if (!urls.Contains(href))
  103. {
  104. urls.Add(href);
  105. }
  106. }
  107. }
  108. foreach (var url1 in urls)
  109. {
  110. var LotterySkill = GetSkillModel(url1);
  111. result.Add(LotterySkill);
  112. }
  113. }
  114. catch (Exception ex)
  115. {
  116. log.Error(GetType(),
  117. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  118. }
  119. return result;
  120. }
  121. /// <summary>
  122. /// 根据主站url获取技巧详情
  123. /// </summary>
  124. /// <param name="url"></param>
  125. /// <returns></returns>
  126. private LotterySkillModel GetSkillModel(string url)
  127. {
  128. LotterySkillModel lotterySkill = new LotterySkillModel();
  129. try
  130. {
  131. var htmlResource = NetHelper.GetUrlResponse(url, Encoding.GetEncoding("utf-8"));
  132. if (htmlResource == null) return lotterySkill;
  133. HtmlDocument doc = new HtmlDocument();
  134. doc.LoadHtml(htmlResource);
  135. //获取li下面所有a标签
  136. var div = doc.DocumentNode.SelectSingleNode("//*[@class='artile']");
  137. var Title = div.ChildNodes.Where(node => node.Name == "h1").ToList();
  138. var div1 = div.ChildNodes.Where(node => node.Name == "div").ToList();
  139. lotterySkill.Title = Title[0].InnerText.Trim();
  140. lotterySkill.Author = "cn55128";
  141. lotterySkill.Content = div1[1].InnerHtml.Trim();
  142. lotterySkill.IsDelete = false;
  143. lotterySkill.SourceUrl = url.ToString();
  144. lotterySkill.TypeId = lotterySkillType;
  145. lotterySkill.TypeName = lotterySkillType.GetEnumDescription();
  146. }
  147. catch (Exception ex)
  148. {
  149. log.Error(GetType(),
  150. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  151. }
  152. return lotterySkill;
  153. }
  154. #region Attribute
  155. /// <summary>
  156. /// 主机地址
  157. /// </summary>
  158. public string Host = "http://www.55125.cn/";
  159. /// <summary>
  160. /// 配置信息
  161. /// </summary>
  162. private SCCConfig Config;
  163. #pragma warning disable CS0414 // 字段“KL123SkillJob.LatestItem”已被赋值,但从未使用过它的值
  164. /// <summary>
  165. /// 当天抓取的最新一期开奖记录
  166. /// </summary>
  167. private LotterySkillModel LatestItem = null;
  168. #pragma warning restore CS0414 // 字段“KL123SkillJob.LatestItem”已被赋值,但从未使用过它的值
  169. #pragma warning disable CS0414 // 字段“KL123SkillJob.FailedQiHaoList”已被赋值,但从未使用过它的值
  170. /// <summary>
  171. /// 当天抓取失败列表
  172. /// </summary>
  173. private List<string> FailedQiHaoList = null;
  174. #pragma warning restore CS0414 // 字段“KL123SkillJob.FailedQiHaoList”已被赋值,但从未使用过它的值
  175. /// <summary>
  176. /// 日志对象
  177. /// </summary>
  178. private readonly LogHelper log;
  179. /// <summary>
  180. /// 数据服务
  181. /// </summary>
  182. private readonly IDTOpenCode services;
  183. /// <summary>
  184. /// 当前彩种
  185. /// </summary>
  186. private SCCLottery currentLottery => SCCLottery.LotterySkill;
  187. /// <summary>
  188. /// 福彩3D技巧
  189. /// </summary>
  190. private LotterySkillType lotterySkillType = LotterySkillType.KL123;
  191. /// <summary>
  192. /// 邮件接口
  193. /// </summary>
  194. private IEmail email;
  195. #pragma warning disable CS0414 // 字段“KL123SkillJob.isGetData”已被赋值,但从未使用过它的值
  196. /// <summary>
  197. /// 是否本次运行抓取到开奖数据
  198. /// </summary>
  199. private bool isGetData = false;
  200. #pragma warning restore CS0414 // 字段“KL123SkillJob.isGetData”已被赋值,但从未使用过它的值
  201. #endregion
  202. }
  203. }