QTNewsJob.cs 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using HtmlAgilityPack;
  6. using Quartz;
  7. using SCC.Common;
  8. using SCC.Interface;
  9. using SCC.Models;
  10. namespace SCC.Crawler.LotteryNews
  11. {
  12. public class QTNewsJob : IJob
  13. {
  14. /// <summary>
  15. /// 构造函数
  16. /// </summary>
  17. public QTNewsJob()
  18. {
  19. log = new LogHelper();
  20. services = IOC.Resolve<IDTOpenCode>();
  21. email = IOC.Resolve<IEmail>();
  22. }
  23. /// <summary>
  24. /// 执行入口
  25. /// </summary>
  26. /// <param name="context"></param>
  27. public void Execute(IJobExecutionContext context)
  28. {
  29. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  30. DoMainUrl();
  31. }
  32. /// <summary>
  33. /// 执行主站技巧
  34. /// </summary>
  35. private void DoMainUrl()
  36. {
  37. List<string> urls = GetMainUrl(Config);
  38. #pragma warning disable CS0219 // 变量“lotterySkill”已被赋值,但从未使用过它的值
  39. LotteryNewsModel lotterySkill = null;
  40. #pragma warning restore CS0219 // 变量“lotterySkill”已被赋值,但从未使用过它的值
  41. foreach (string url in urls)
  42. {
  43. List<LotteryNewsModel> res = GetOpenListFromMainUrl(url);
  44. foreach (var LotteryNewsModel in res)
  45. {
  46. if (LotteryNewsModel.Content != null)
  47. {
  48. if (services.LotteryNewsModel(currentLottery, LotteryNewsModel))
  49. {
  50. //Do Success Log
  51. log.Info(GetType(), CommonHelper.GetJobMainLogInfo(Config, LotteryNewsModel.Title));
  52. isGetData = true;
  53. }
  54. }
  55. }
  56. }
  57. }
  58. /// <summary>
  59. /// 组装主站爬取地址
  60. /// </summary>
  61. /// <param name="config"></param>
  62. /// <returns></returns>
  63. private List<string> GetMainUrl(SCCConfig config)
  64. {
  65. List<string> urlList = new List<string>();
  66. string url = config.MainUrl;
  67. int pages = config.MainUrlPages > 0 ? config.MainUrlPages : 1;
  68. for (int i = 1; i <= pages; i++)
  69. {
  70. string res;
  71. if (i == 1)
  72. {
  73. res = "http://www.zhcw.com/xinwen/caizhongxinwen-qt/";
  74. }
  75. else
  76. {
  77. res = string.Format(url, i);
  78. }
  79. if (!urlList.Contains(res))
  80. {
  81. urlList.Add(res);
  82. }
  83. }
  84. return urlList;
  85. }
  86. /// <summary>
  87. /// 爬取主站技巧列表
  88. /// </summary>
  89. /// <param name="mainUrl"></param>
  90. /// <returns></returns>
  91. private List<LotteryNewsModel> GetOpenListFromMainUrl(string mainUrl)
  92. {
  93. var result = new List<LotteryNewsModel>();
  94. try
  95. {
  96. var url = new Uri(mainUrl);
  97. var htmlResource = NetHelper.GetUrlResponse(mainUrl, Encoding.GetEncoding("utf-8"));
  98. if (htmlResource == null) return result;
  99. HtmlDocument doc = new HtmlDocument();
  100. doc.LoadHtml(htmlResource);
  101. //获取li下面所有a标签
  102. HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes("//*[@class='Nleftbox']/ul/li/span/a");
  103. if (nodeList == null) return result;
  104. List<string> urls = new List<string>();
  105. //遍历a标签
  106. foreach (HtmlNode node in nodeList)
  107. {
  108. HtmlAttribute attr = node.Attributes.SingleOrDefault(a => a.Name.Equals("href"));
  109. if (attr != null)
  110. {
  111. string href = Host + attr.Value;
  112. //去重
  113. if (!urls.Contains(href))
  114. {
  115. urls.Add(href);
  116. }
  117. }
  118. }
  119. foreach (var url1 in urls)
  120. {
  121. var LotterySkill = GetSkillModel(url1);
  122. result.Add(LotterySkill);
  123. }
  124. }
  125. catch (Exception ex)
  126. {
  127. log.Error(GetType(),
  128. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  129. }
  130. return result;
  131. }
  132. /// <summary>
  133. /// 根据主站url获取技巧详情
  134. /// </summary>
  135. /// <param name="url"></param>
  136. /// <returns></returns>
  137. private LotteryNewsModel GetSkillModel(string url)
  138. {
  139. LotteryNewsModel lotterySkill = new LotteryNewsModel();
  140. try
  141. {
  142. var htmlResource = NetHelper.GetUrlResponse(url, Encoding.GetEncoding("utf-8"));
  143. if (htmlResource == null) return lotterySkill;
  144. HtmlDocument doc = new HtmlDocument();
  145. doc.LoadHtml(htmlResource);
  146. //获取li下面所有a标签
  147. var div = doc.DocumentNode.SelectSingleNode("//*[@class='news_content']");
  148. var Title = div.ChildNodes.Where(node => node.Name == "h2").ToList();
  149. var div1 = div.ChildNodes.Where(node => node.Name == "div").ToList();
  150. string txt = div1[2].InnerHtml.Trim();
  151. var Content = txt.Replace("<img src=\"", " <img src=\"http://www.zhcw.com").Replace("中彩网讯", "").Replace("中彩网综合报道", "综合报道").Replace("中彩网", "");
  152. if (Content == "")
  153. {
  154. Content = null;
  155. }
  156. lotterySkill.Title = Title[0].InnerText.Trim();
  157. lotterySkill.Author = "cn55128";
  158. lotterySkill.Content = Content;
  159. lotterySkill.IsDelete = false;
  160. lotterySkill.SourceUrl = url.ToString();
  161. lotterySkill.TypeID = lotterySkillType;
  162. lotterySkill.TypeName = lotterySkillType.GetEnumDescription();
  163. }
  164. catch (Exception ex)
  165. {
  166. log.Error(GetType(),
  167. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  168. }
  169. return lotterySkill;
  170. }
  171. #region Attribute
  172. /// <summary>
  173. /// 主机地址
  174. /// </summary>
  175. public string Host = "http://www.zhcw.com";
  176. /// <summary>
  177. /// 配置信息
  178. /// </summary>
  179. private SCCConfig Config;
  180. #pragma warning disable CS0414 // 字段“QTNewsJob.LatestItem”已被赋值,但从未使用过它的值
  181. /// <summary>
  182. /// 当天抓取的最新一期开奖记录
  183. /// </summary>
  184. private LotteryNewsModel LatestItem = null;
  185. #pragma warning restore CS0414 // 字段“QTNewsJob.LatestItem”已被赋值,但从未使用过它的值
  186. #pragma warning disable CS0414 // 字段“QTNewsJob.FailedQiHaoList”已被赋值,但从未使用过它的值
  187. /// <summary>
  188. /// 当天抓取失败列表
  189. /// </summary>
  190. private List<string> FailedQiHaoList = null;
  191. #pragma warning restore CS0414 // 字段“QTNewsJob.FailedQiHaoList”已被赋值,但从未使用过它的值
  192. /// <summary>
  193. /// 日志对象
  194. /// </summary>
  195. private readonly LogHelper log;
  196. /// <summary>
  197. /// 数据服务
  198. /// </summary>
  199. private readonly IDTOpenCode services;
  200. /// <summary>
  201. /// 当前彩种
  202. /// </summary>
  203. private SCCLottery currentLottery => SCCLottery.LotteryNews;
  204. /// <summary>
  205. /// 福彩3D技巧
  206. /// </summary>
  207. private LotteryNewsType lotterySkillType = LotteryNewsType.QTNews;
  208. /// <summary>
  209. /// 邮件接口
  210. /// </summary>
  211. private IEmail email;
  212. #pragma warning disable CS0414 // 字段“QTNewsJob.isGetData”已被赋值,但从未使用过它的值
  213. /// <summary>
  214. /// 是否本次运行抓取到开奖数据
  215. /// </summary>
  216. private bool isGetData = false;
  217. #pragma warning restore CS0414 // 字段“QTNewsJob.isGetData”已被赋值,但从未使用过它的值
  218. #endregion
  219. }
  220. }