SSqSkillJob.cs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using HtmlAgilityPack;
  6. using Quartz;
  7. using SCC.Common;
  8. using SCC.Interface;
  9. using SCC.Models;
  10. namespace SCC.Crawler.LotterySkill
  11. {
  12. public class SSqSkillJob : IJob
  13. {
  14. /// <summary>
  15. /// 构造函数
  16. /// </summary>
  17. public SSqSkillJob()
  18. {
  19. log = new LogHelper();
  20. services = IOC.Resolve<IDTOpenCode>();
  21. email = IOC.Resolve<IEmail>();
  22. }
  23. /// <summary>
  24. /// 执行入口
  25. /// </summary>
  26. /// <param name="context"></param>
  27. public void Execute(IJobExecutionContext context)
  28. {
  29. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  30. DoBackUrl();
  31. DoMainUrl();
  32. }
  33. /// <summary>
  34. /// 执行主站技巧
  35. /// </summary>
  36. private void DoMainUrl()
  37. {
  38. List<string> urls = GetMainUrl(Config);
  39. LotterySkillModel lotterySkill = null;
  40. foreach (string url in urls)
  41. {
  42. List<LotterySkillModel> res = GetOpenListFromMainUrl(url);
  43. foreach (var lotterySkillModel in res)
  44. {
  45. if (services.LotterySkillModel(currentLottery, lotterySkillModel))
  46. {
  47. //Do Success Log
  48. log.Info(GetType(), CommonHelper.GetJobMainLogInfo(Config, lotterySkillModel.Title));
  49. isGetData = true;
  50. }
  51. }
  52. }
  53. }
  54. /// <summary>
  55. /// 执行副站技巧
  56. /// </summary>
  57. private void DoBackUrl()
  58. {
  59. List<string> urls = GetBackUrl(Config);
  60. LotterySkillModel lotterySkill = null;
  61. foreach (string url in urls)
  62. {
  63. List<LotterySkillModel> res = GetOpenListFromBackUrl(url);
  64. foreach (var lotterySkillModel in res)
  65. {
  66. if (services.LotterySkillModel(currentLottery, lotterySkillModel))
  67. {
  68. //Do Success Log
  69. log.Info(GetType(), CommonHelper.GetJobMainLogInfo(Config, lotterySkillModel.Title));
  70. isGetData = true;
  71. }
  72. }
  73. }
  74. }
  75. /// <summary>
  76. /// 组装主站爬取地址
  77. /// </summary>
  78. /// <param name="config"></param>
  79. /// <returns></returns>
  80. private List<string> GetMainUrl(SCCConfig config)
  81. {
  82. List<string> urlList = new List<string>();
  83. string url = config.MainUrl;
  84. int pages = config.MainUrlPages > 0 ? config.MainUrlPages : 1;
  85. for (int i = 1; i <= pages; i++)
  86. {
  87. string res = string.Format(url, i);
  88. if (!urlList.Contains(res))
  89. {
  90. urlList.Add(res);
  91. }
  92. }
  93. return urlList;
  94. }
  95. /// <summary>
  96. /// 组装副站爬取地址
  97. /// </summary>
  98. /// <param name="config"></param>
  99. /// <returns></returns>
  100. private List<string> GetBackUrl(SCCConfig config)
  101. {
  102. List<string> urlList = new List<string>();
  103. string url = config.BackUrl;
  104. int pages = config.MainUrlPages > 0 ? config.BackUrlPages : 1;
  105. for (int i = 1; i <= pages; i++)
  106. {
  107. string res = string.Format(url, i);
  108. if (!urlList.Contains(res))
  109. {
  110. urlList.Add(res);
  111. }
  112. }
  113. return urlList;
  114. }
  115. /// <summary>
  116. /// 爬取主站技巧列表
  117. /// </summary>
  118. /// <param name="mainUrl"></param>
  119. /// <returns></returns>
  120. private List<LotterySkillModel> GetOpenListFromMainUrl(string mainUrl)
  121. {
  122. var result = new List<LotterySkillModel>();
  123. try
  124. {
  125. var url = new Uri(mainUrl);
  126. var htmlResource = NetHelper.GetUrlResponse(mainUrl, Encoding.GetEncoding("utf-8"));
  127. if (htmlResource == null) return result;
  128. HtmlDocument doc = new HtmlDocument();
  129. doc.LoadHtml(htmlResource);
  130. //获取li下面所有a标签
  131. HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes("//*[@class='art-list']/ul/li/a");
  132. if (nodeList == null) return result;
  133. List<string> urls = new List<string>();
  134. //遍历a标签
  135. foreach (HtmlNode node in nodeList)
  136. {
  137. HtmlAttribute attr = node.Attributes.SingleOrDefault(a => a.Name.Equals("href"));
  138. if (attr != null)
  139. {
  140. string href = Host + attr.Value;
  141. //去重
  142. if (!urls.Contains(href))
  143. {
  144. urls.Add(href);
  145. }
  146. }
  147. }
  148. foreach (var url1 in urls)
  149. {
  150. var LotterySkill = GetSkillModel(url1);
  151. result.Add(LotterySkill);
  152. }
  153. }
  154. catch (Exception ex)
  155. {
  156. log.Error(GetType(),
  157. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  158. }
  159. return result;
  160. }
  161. /// <summary>
  162. /// 爬取副站技巧列表
  163. /// </summary>
  164. /// <param name="backUrl"></param>
  165. /// <returns></returns>
  166. private List<LotterySkillModel> GetOpenListFromBackUrl(string backUrl)
  167. {
  168. var result = new List<LotterySkillModel>();
  169. try
  170. {
  171. var url = new Uri(backUrl);
  172. var htmlResource = NetHelper.GetUrlResponse(backUrl, Encoding.GetEncoding("utf-8"));
  173. if (htmlResource == null) return result;
  174. HtmlDocument doc = new HtmlDocument();
  175. doc.LoadHtml(htmlResource);
  176. //获取li下面所有a标签
  177. HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes("//*[@class='listContainer']/ul/li/a");
  178. if (nodeList == null) return result;
  179. List<string> urls = new List<string>();
  180. //遍历a标签
  181. foreach (HtmlNode node in nodeList)
  182. {
  183. HtmlAttribute attr = node.Attributes.SingleOrDefault(a => a.Name.Equals("href"));
  184. if (attr != null)
  185. {
  186. string href = HostBackUrl + attr.Value;
  187. //去重
  188. if (!urls.Contains(href))
  189. {
  190. urls.Add(href);
  191. }
  192. }
  193. }
  194. foreach (var url1 in urls)
  195. {
  196. var LotterySkill = GetSkillModelBackUrl(url1);
  197. result.Add(LotterySkill);
  198. }
  199. }
  200. catch (Exception ex)
  201. {
  202. log.Error(GetType(),
  203. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  204. }
  205. return result;
  206. }
  207. /// <summary>
  208. /// 根据副站url获取技巧详情
  209. /// </summary>
  210. /// <param name="url"></param>
  211. /// <returns></returns>
  212. private LotterySkillModel GetSkillModelBackUrl(string url)
  213. {
  214. LotterySkillModel lotterySkill = new LotterySkillModel();
  215. try
  216. {
  217. var htmlResource = NetHelper.GetUrlResponse(url, Encoding.GetEncoding("utf-8"));
  218. if (htmlResource == null) return lotterySkill;
  219. HtmlDocument doc = new HtmlDocument();
  220. doc.LoadHtml(htmlResource);
  221. //获取li下面所有a标签
  222. var div = doc.DocumentNode.SelectSingleNode("//*[@class='article mb-10']");
  223. var Title = div.ChildNodes.Where(node => node.Name == "h5").ToList();
  224. var div1 = div.ChildNodes.Where(node => node.Name == "div").ToList();
  225. lotterySkill.Title = Title[0].InnerText.Trim();
  226. lotterySkill.Author = "cn55128";
  227. lotterySkill.Content = div1[2].InnerHtml.Trim();
  228. lotterySkill.IsDelete = false;
  229. lotterySkill.SourceUrl = url.ToString();
  230. lotterySkill.TypeId = lotterySkillType;
  231. lotterySkill.TypeName = lotterySkillType.GetEnumDescription();
  232. }
  233. catch (Exception ex)
  234. {
  235. log.Error(GetType(),
  236. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  237. }
  238. return lotterySkill;
  239. }
  240. /// <summary>
  241. /// 根据主站url获取技巧详情
  242. /// </summary>
  243. /// <param name="url"></param>
  244. /// <returns></returns>
  245. private LotterySkillModel GetSkillModel(string url)
  246. {
  247. LotterySkillModel lotterySkill = new LotterySkillModel();
  248. try
  249. {
  250. var htmlResource = NetHelper.GetUrlResponse(url, Encoding.GetEncoding("utf-8"));
  251. if (htmlResource == null) return lotterySkill;
  252. HtmlDocument doc = new HtmlDocument();
  253. doc.LoadHtml(htmlResource);
  254. //获取li下面所有a标签
  255. var div = doc.DocumentNode.SelectSingleNode("//*[@class='artile']");
  256. var Title = div.ChildNodes.Where(node => node.Name == "h1").ToList();
  257. var div1 = div.ChildNodes.Where(node => node.Name == "div").ToList();
  258. lotterySkill.Title = Title[0].InnerText.Trim();
  259. lotterySkill.Author = "cn55128";
  260. lotterySkill.Content = div1[1].InnerHtml.Trim();
  261. lotterySkill.IsDelete = false;
  262. lotterySkill.SourceUrl = url.ToString();
  263. lotterySkill.TypeId = lotterySkillType;
  264. lotterySkill.TypeName = lotterySkillType.GetEnumDescription();
  265. }
  266. catch (Exception ex)
  267. {
  268. log.Error(GetType(),
  269. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  270. }
  271. return lotterySkill;
  272. }
  273. #region Attribute
  274. /// <summary>
  275. /// 主机地址
  276. /// </summary>
  277. public string Host = "http://www.55125.cn/";
  278. /// <summary>
  279. /// 副站地址
  280. /// </summary>
  281. public string HostBackUrl = "https://www.cz89.com/";
  282. /// <summary>
  283. /// 配置信息
  284. /// </summary>
  285. private SCCConfig Config;
  286. /// <summary>
  287. /// 当天抓取的最新一期开奖记录
  288. /// </summary>
  289. private LotterySkillModel LatestItem = null;
  290. /// <summary>
  291. /// 当天抓取失败列表
  292. /// </summary>
  293. private List<string> FailedQiHaoList = null;
  294. /// <summary>
  295. /// 日志对象
  296. /// </summary>
  297. private readonly LogHelper log;
  298. /// <summary>
  299. /// 数据服务
  300. /// </summary>
  301. private readonly IDTOpenCode services;
  302. /// <summary>
  303. /// 当前彩种
  304. /// </summary>
  305. private SCCLottery currentLottery => SCCLottery.LotterySkill;
  306. /// <summary>
  307. /// 福彩3D技巧
  308. /// </summary>
  309. private LotterySkillType lotterySkillType = LotterySkillType.SSQ;
  310. /// <summary>
  311. /// 邮件接口
  312. /// </summary>
  313. private IEmail email;
  314. /// <summary>
  315. /// 是否本次运行抓取到开奖数据
  316. /// </summary>
  317. private bool isGetData = false;
  318. #endregion
  319. }
  320. }