QXCSkillJob.cs 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using HtmlAgilityPack;
  6. using Quartz;
  7. using SCC.Common;
  8. using SCC.Interface;
  9. using SCC.Models;
  10. namespace SCC.Crawler.LotterySkill
  11. {
  12. public class QXCSkillJob : IJob
  13. { /// <summary>
  14. /// 构造函数
  15. /// </summary>
  16. public QXCSkillJob()
  17. {
  18. log = new LogHelper();
  19. services = IOC.Resolve<IDTOpenCode>();
  20. email = IOC.Resolve<IEmail>();
  21. }
  22. /// <summary>
  23. /// 执行入口
  24. /// </summary>
  25. /// <param name="context"></param>
  26. public void Execute(IJobExecutionContext context)
  27. {
  28. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  29. DoMainUrl();
  30. }
  31. /// <summary>
  32. /// 执行主站技巧
  33. /// </summary>
  34. private void DoMainUrl()
  35. {
  36. List<string> urls = GetMainUrl(Config);
  37. LotterySkillModel lotterySkill = null;
  38. foreach (string url in urls)
  39. {
  40. List<LotterySkillModel> res = GetOpenListFromMainUrl(url);
  41. foreach (var lotterySkillModel in res)
  42. {
  43. if (services.LotterySkillModel(currentLottery, lotterySkillModel))
  44. {
  45. //Do Success Log
  46. log.Info(GetType(), CommonHelper.GetJobMainLogInfo(Config, lotterySkillModel.Title));
  47. isGetData = true;
  48. }
  49. }
  50. }
  51. }
  52. /// <summary>
  53. /// 组装主站爬取地址
  54. /// </summary>
  55. /// <param name="config"></param>
  56. /// <returns></returns>
  57. private List<string> GetMainUrl(SCCConfig config)
  58. {
  59. List<string> urlList = new List<string>();
  60. string url = config.MainUrl;
  61. int pages = config.MainUrlPages > 0 ? config.MainUrlPages : 1;
  62. for (int i = 1; i <= pages; i++)
  63. {
  64. string res = string.Format(url, i);
  65. if (!urlList.Contains(res))
  66. {
  67. urlList.Add(res);
  68. }
  69. }
  70. return urlList;
  71. }
  72. /// <summary>
  73. /// 爬取主站技巧列表
  74. /// </summary>
  75. /// <param name="mainUrl"></param>
  76. /// <returns></returns>
  77. private List<LotterySkillModel> GetOpenListFromMainUrl(string mainUrl)
  78. {
  79. var result = new List<LotterySkillModel>();
  80. try
  81. {
  82. var url = new Uri(mainUrl);
  83. var htmlResource = NetHelper.GetUrlResponse(mainUrl, Encoding.GetEncoding("utf-8"));
  84. if (htmlResource == null) return result;
  85. HtmlDocument doc = new HtmlDocument();
  86. doc.LoadHtml(htmlResource);
  87. //获取li下面所有a标签
  88. HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes("//*[@class='art-list']/ul/li/a");
  89. if (nodeList == null) return result;
  90. List<string> urls = new List<string>();
  91. //遍历a标签
  92. foreach (HtmlNode node in nodeList)
  93. {
  94. HtmlAttribute attr = node.Attributes.SingleOrDefault(a => a.Name.Equals("href"));
  95. if (attr != null)
  96. {
  97. string href = Host + attr.Value;
  98. //去重
  99. if (!urls.Contains(href))
  100. {
  101. urls.Add(href);
  102. }
  103. }
  104. }
  105. foreach (var url1 in urls)
  106. {
  107. var LotterySkill = GetSkillModel(url1);
  108. result.Add(LotterySkill);
  109. }
  110. }
  111. catch (Exception ex)
  112. {
  113. log.Error(GetType(),
  114. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  115. }
  116. return result;
  117. }
  118. /// <summary>
  119. /// 根据主站url获取技巧详情
  120. /// </summary>
  121. /// <param name="url"></param>
  122. /// <returns></returns>
  123. private LotterySkillModel GetSkillModel(string url)
  124. {
  125. LotterySkillModel lotterySkill = new LotterySkillModel();
  126. try
  127. {
  128. var htmlResource = NetHelper.GetUrlResponse(url, Encoding.GetEncoding("utf-8"));
  129. if (htmlResource == null) return lotterySkill;
  130. HtmlDocument doc = new HtmlDocument();
  131. doc.LoadHtml(htmlResource);
  132. //获取li下面所有a标签
  133. var div = doc.DocumentNode.SelectSingleNode("//*[@class='artile']");
  134. var Title = div.ChildNodes.Where(node => node.Name == "h1").ToList();
  135. var div1 = div.ChildNodes.Where(node => node.Name == "div").ToList();
  136. lotterySkill.Title = Title[0].InnerText.Trim();
  137. lotterySkill.Author = "cn55128";
  138. lotterySkill.Content = div1[1].InnerHtml.Trim();
  139. lotterySkill.IsDelete = false;
  140. lotterySkill.SourceUrl = url.ToString();
  141. lotterySkill.TypeId = lotterySkillType;
  142. lotterySkill.TypeName = lotterySkillType.GetEnumDescription();
  143. }
  144. catch (Exception ex)
  145. {
  146. log.Error(GetType(),
  147. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  148. }
  149. return lotterySkill;
  150. }
  151. #region Attribute
  152. /// <summary>
  153. /// 主机地址
  154. /// </summary>
  155. public string Host = "http://www.55125.cn/";
  156. /// <summary>
  157. /// 配置信息
  158. /// </summary>
  159. private SCCConfig Config;
  160. /// <summary>
  161. /// 当天抓取的最新一期开奖记录
  162. /// </summary>
  163. private LotterySkillModel LatestItem = null;
  164. /// <summary>
  165. /// 当天抓取失败列表
  166. /// </summary>
  167. private List<string> FailedQiHaoList = null;
  168. /// <summary>
  169. /// 日志对象
  170. /// </summary>
  171. private readonly LogHelper log;
  172. /// <summary>
  173. /// 数据服务
  174. /// </summary>
  175. private readonly IDTOpenCode services;
  176. /// <summary>
  177. /// 当前彩种
  178. /// </summary>
  179. private SCCLottery currentLottery => SCCLottery.LotterySkill;
  180. /// <summary>
  181. /// 福彩3D技巧
  182. /// </summary>
  183. private LotterySkillType lotterySkillType = LotterySkillType.QXC;
  184. /// <summary>
  185. /// 邮件接口
  186. /// </summary>
  187. private IEmail email;
  188. /// <summary>
  189. /// 是否本次运行抓取到开奖数据
  190. /// </summary>
  191. private bool isGetData = false;
  192. #endregion
  193. }
  194. }