PL3SkillJob.cs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using HtmlAgilityPack;
  6. using Quartz;
  7. using SCC.Common;
  8. using SCC.Interface;
  9. using SCC.Models;
  10. namespace SCC.Crawler.LotterySkill
  11. {
  12. public class PL3SkillJob : IJob
  13. {
  14. public PL3SkillJob()
  15. {
  16. log = new LogHelper();
  17. services = IOC.Resolve<IDTOpenCode>();
  18. email = IOC.Resolve<IEmail>();
  19. }
  20. /// <summary>
  21. /// 执行入口
  22. /// </summary>
  23. /// <param name="context"></param>
  24. public void Execute(IJobExecutionContext context)
  25. {
  26. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  27. DoBackUrl();
  28. DoMainUrl();
  29. }
  30. /// <summary>
  31. /// 执行主站技巧
  32. /// </summary>
  33. private void DoMainUrl()
  34. {
  35. List<string> urls = GetMainUrl(Config);
  36. LotterySkillModel lotterySkill = null;
  37. foreach (string url in urls)
  38. {
  39. List<LotterySkillModel> res = GetOpenListFromMainUrl(url);
  40. foreach (var lotterySkillModel in res)
  41. {
  42. if (services.LotterySkillModel(currentLottery, lotterySkillModel))
  43. {
  44. //Do Success Log
  45. log.Info(GetType(), CommonHelper.GetJobMainLogInfo(Config, lotterySkillModel.Title));
  46. isGetData = true;
  47. }
  48. }
  49. }
  50. }
  51. /// <summary>
  52. /// 执行副站技巧
  53. /// </summary>
  54. private void DoBackUrl()
  55. {
  56. List<string> urls = GetBackUrl(Config);
  57. LotterySkillModel lotterySkill = null;
  58. foreach (string url in urls)
  59. {
  60. List<LotterySkillModel> res = GetOpenListFromBackUrl(url);
  61. foreach (var lotterySkillModel in res)
  62. {
  63. if (services.LotterySkillModel(currentLottery, lotterySkillModel))
  64. {
  65. //Do Success Log
  66. log.Info(GetType(), CommonHelper.GetJobMainLogInfo(Config, lotterySkillModel.Title));
  67. isGetData = true;
  68. }
  69. }
  70. }
  71. }
  72. /// <summary>
  73. /// 组装主站爬取地址
  74. /// </summary>
  75. /// <param name="config"></param>
  76. /// <returns></returns>
  77. private List<string> GetMainUrl(SCCConfig config)
  78. {
  79. List<string> urlList = new List<string>();
  80. string url = config.MainUrl;
  81. int pages = config.MainUrlPages > 0 ? config.MainUrlPages : 1;
  82. for (int i = 1; i <= pages; i++)
  83. {
  84. string res = string.Format(url, i);
  85. if (!urlList.Contains(res))
  86. {
  87. urlList.Add(res);
  88. }
  89. }
  90. return urlList;
  91. }
  92. /// <summary>
  93. /// 组装副站爬取地址
  94. /// </summary>
  95. /// <param name="config"></param>
  96. /// <returns></returns>
  97. private List<string> GetBackUrl(SCCConfig config)
  98. {
  99. List<string> urlList = new List<string>();
  100. string url = config.BackUrl;
  101. int pages = config.MainUrlPages > 0 ? config.BackUrlPages : 1;
  102. for (int i = 1; i <= pages; i++)
  103. {
  104. string res = string.Format(url, i);
  105. if (!urlList.Contains(res))
  106. {
  107. urlList.Add(res);
  108. }
  109. }
  110. return urlList;
  111. }
  112. /// <summary>
  113. /// 爬取主站技巧列表
  114. /// </summary>
  115. /// <param name="mainUrl"></param>
  116. /// <returns></returns>
  117. private List<LotterySkillModel> GetOpenListFromMainUrl(string mainUrl)
  118. {
  119. var result = new List<LotterySkillModel>();
  120. try
  121. {
  122. var url = new Uri(mainUrl);
  123. var htmlResource = NetHelper.GetUrlResponse(mainUrl, Encoding.GetEncoding("utf-8"));
  124. if (htmlResource == null) return result;
  125. HtmlDocument doc = new HtmlDocument();
  126. doc.LoadHtml(htmlResource);
  127. //获取li下面所有a标签
  128. HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes("//*[@class='art-list']/ul/li/a");
  129. if (nodeList == null) return result;
  130. List<string> urls = new List<string>();
  131. //遍历a标签
  132. foreach (HtmlNode node in nodeList)
  133. {
  134. HtmlAttribute attr = node.Attributes.SingleOrDefault(a => a.Name.Equals("href"));
  135. if (attr != null)
  136. {
  137. string href = Host + attr.Value;
  138. //去重
  139. if (!urls.Contains(href))
  140. {
  141. urls.Add(href);
  142. }
  143. }
  144. }
  145. foreach (var url1 in urls)
  146. {
  147. var LotterySkill = GetSkillModel(url1);
  148. result.Add(LotterySkill);
  149. }
  150. }
  151. catch (Exception ex)
  152. {
  153. log.Error(GetType(),
  154. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  155. }
  156. return result;
  157. }
  158. /// <summary>
  159. /// 爬取副站技巧列表
  160. /// </summary>
  161. /// <param name="backUrl"></param>
  162. /// <returns></returns>
  163. private List<LotterySkillModel> GetOpenListFromBackUrl(string backUrl)
  164. {
  165. var result = new List<LotterySkillModel>();
  166. try
  167. {
  168. var url = new Uri(backUrl);
  169. var htmlResource = NetHelper.GetUrlResponse(backUrl, Encoding.GetEncoding("utf-8"));
  170. if (htmlResource == null) return result;
  171. HtmlDocument doc = new HtmlDocument();
  172. doc.LoadHtml(htmlResource);
  173. //获取li下面所有a标签
  174. HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes("//*[@class='listContainer']/ul/li/a");
  175. if (nodeList == null) return result;
  176. List<string> urls = new List<string>();
  177. //遍历a标签
  178. foreach (HtmlNode node in nodeList)
  179. {
  180. HtmlAttribute attr = node.Attributes.SingleOrDefault(a => a.Name.Equals("href"));
  181. if (attr != null)
  182. {
  183. string href = HostBackUrl + attr.Value;
  184. //去重
  185. if (!urls.Contains(href))
  186. {
  187. urls.Add(href);
  188. }
  189. }
  190. }
  191. foreach (var url1 in urls)
  192. {
  193. var LotterySkill = GetSkillModelBackUrl(url1);
  194. result.Add(LotterySkill);
  195. }
  196. }
  197. catch (Exception ex)
  198. {
  199. log.Error(GetType(),
  200. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  201. }
  202. return result;
  203. }
  204. /// <summary>
  205. /// 根据副站url获取技巧详情
  206. /// </summary>
  207. /// <param name="url"></param>
  208. /// <returns></returns>
  209. private LotterySkillModel GetSkillModelBackUrl(string url)
  210. {
  211. LotterySkillModel lotterySkill = new LotterySkillModel();
  212. try
  213. {
  214. var htmlResource = NetHelper.GetUrlResponse(url, Encoding.GetEncoding("utf-8"));
  215. if (htmlResource == null) return lotterySkill;
  216. HtmlDocument doc = new HtmlDocument();
  217. doc.LoadHtml(htmlResource);
  218. //获取li下面所有a标签
  219. var div = doc.DocumentNode.SelectSingleNode("//*[@class='article mb-10']");
  220. var Title = div.ChildNodes.Where(node => node.Name == "h5").ToList();
  221. var div1 = div.ChildNodes.Where(node => node.Name == "div").ToList();
  222. lotterySkill.Title = Title[0].InnerText.Trim();
  223. lotterySkill.Author = "cn55128";
  224. lotterySkill.Content = div1[2].InnerHtml.Trim();
  225. lotterySkill.IsDelete = false;
  226. lotterySkill.SourceUrl = url.ToString();
  227. lotterySkill.TypeId = lotterySkillType;
  228. lotterySkill.TypeName = lotterySkillType.GetEnumDescription();
  229. }
  230. catch (Exception ex)
  231. {
  232. log.Error(GetType(),
  233. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  234. }
  235. return lotterySkill;
  236. }
  237. /// <summary>
  238. /// 根据主站url获取技巧详情
  239. /// </summary>
  240. /// <param name="url"></param>
  241. /// <returns></returns>
  242. private LotterySkillModel GetSkillModel(string url)
  243. {
  244. LotterySkillModel lotterySkill = new LotterySkillModel();
  245. try
  246. {
  247. var htmlResource = NetHelper.GetUrlResponse(url, Encoding.GetEncoding("utf-8"));
  248. if (htmlResource == null) return lotterySkill;
  249. HtmlDocument doc = new HtmlDocument();
  250. doc.LoadHtml(htmlResource);
  251. //获取li下面所有a标签
  252. var div = doc.DocumentNode.SelectSingleNode("//*[@class='artile']");
  253. var Title = div.ChildNodes.Where(node => node.Name == "h1").ToList();
  254. var div1 = div.ChildNodes.Where(node => node.Name == "div").ToList();
  255. lotterySkill.Title = Title[0].InnerText.Trim();
  256. lotterySkill.Author = "cn55128";
  257. lotterySkill.Content = div1[1].InnerHtml.Trim();
  258. lotterySkill.IsDelete = false;
  259. lotterySkill.SourceUrl = url.ToString();
  260. lotterySkill.TypeId = lotterySkillType;
  261. lotterySkill.TypeName = lotterySkillType.GetEnumDescription();
  262. }
  263. catch (Exception ex)
  264. {
  265. log.Error(GetType(),
  266. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  267. }
  268. return lotterySkill;
  269. }
  270. #region Attribute
  271. /// <summary>
  272. /// 主机地址
  273. /// </summary>
  274. public string Host = "http://www.55125.cn/";
  275. /// <summary>
  276. /// 副站地址
  277. /// </summary>
  278. public string HostBackUrl = "https://www.cz89.com/";
  279. /// <summary>
  280. /// 配置信息
  281. /// </summary>
  282. private SCCConfig Config;
  283. /// <summary>
  284. /// 当天抓取的最新一期开奖记录
  285. /// </summary>
  286. private LotterySkillModel LatestItem = null;
  287. /// <summary>
  288. /// 当天抓取失败列表
  289. /// </summary>
  290. private List<string> FailedQiHaoList = null;
  291. /// <summary>
  292. /// 日志对象
  293. /// </summary>
  294. private readonly LogHelper log;
  295. /// <summary>
  296. /// 数据服务
  297. /// </summary>
  298. private readonly IDTOpenCode services;
  299. /// <summary>
  300. /// 当前彩种
  301. /// </summary>
  302. private SCCLottery currentLottery => SCCLottery.LotterySkill;
  303. /// <summary>
  304. /// 福彩3D技巧
  305. /// </summary>
  306. private LotterySkillType lotterySkillType = LotterySkillType.PL3;
  307. /// <summary>
  308. /// 邮件接口
  309. /// </summary>
  310. private IEmail email;
  311. /// <summary>
  312. /// 是否本次运行抓取到开奖数据
  313. /// </summary>
  314. private bool isGetData = false;
  315. #endregion
  316. }
  317. }