FC3DSkillJob.cs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Runtime.InteropServices;
  5. using System.Text;
  6. using HtmlAgilityPack;
  7. using Quartz;
  8. using SCC.Common;
  9. using SCC.Interface;
  10. using SCC.Models;
  11. namespace SCC.Crawler.LotterySkill
  12. {
  13. /// <summary>
  14. /// 3d抓取技巧
  15. /// </summary>
  16. public class FC3DSkillJob : IJob
  17. {
  18. public FC3DSkillJob()
  19. {
  20. log = new LogHelper();
  21. services = IOC.Resolve<IDTOpenCode>();
  22. email = IOC.Resolve<IEmail>();
  23. }
  24. /// <summary>
  25. /// 执行入口
  26. /// </summary>
  27. /// <param name="context"></param>
  28. public void Execute(IJobExecutionContext context)
  29. {
  30. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  31. DoBackUrl();
  32. DoMainUrl();
  33. }
  34. /// <summary>
  35. /// 执行主站技巧
  36. /// </summary>
  37. private void DoMainUrl()
  38. {
  39. List<string> urls = GetMainUrl(Config);
  40. LotterySkillModel lotterySkill = null;
  41. foreach (string url in urls)
  42. {
  43. List<LotterySkillModel> res = GetOpenListFromMainUrl(url);
  44. foreach (var lotterySkillModel in res)
  45. {
  46. if (services.LotterySkillModel(currentLottery, lotterySkillModel))
  47. {
  48. //Do Success Log
  49. log.Info(GetType(), CommonHelper.GetJobMainLogInfo(Config, lotterySkillModel.Title));
  50. isGetData = true;
  51. }
  52. }
  53. }
  54. }
  55. /// <summary>
  56. /// 执行副站技巧
  57. /// </summary>
  58. private void DoBackUrl()
  59. {
  60. List<string> urls = GetBackUrl(Config);
  61. LotterySkillModel lotterySkill = null;
  62. foreach (string url in urls)
  63. {
  64. List<LotterySkillModel> res = GetOpenListFromBackUrl(url);
  65. foreach (var lotterySkillModel in res)
  66. {
  67. if (services.LotterySkillModel(currentLottery, lotterySkillModel))
  68. {
  69. //Do Success Log
  70. log.Info(GetType(), CommonHelper.GetJobMainLogInfo(Config, lotterySkillModel.Title));
  71. isGetData = true;
  72. }
  73. }
  74. }
  75. }
  76. /// <summary>
  77. /// 组装主站爬取地址
  78. /// </summary>
  79. /// <param name="config"></param>
  80. /// <returns></returns>
  81. private List<string> GetMainUrl(SCCConfig config)
  82. {
  83. List<string> urlList = new List<string>();
  84. string url = config.MainUrl;
  85. int pages = config.MainUrlPages > 0 ? config.MainUrlPages : 1;
  86. for (int i = 1; i <= pages; i++)
  87. {
  88. string res = string.Format(url, i);
  89. if (!urlList.Contains(res))
  90. {
  91. urlList.Add(res);
  92. }
  93. }
  94. return urlList;
  95. }
  96. /// <summary>
  97. /// 组装副站爬取地址
  98. /// </summary>
  99. /// <param name="config"></param>
  100. /// <returns></returns>
  101. private List<string> GetBackUrl(SCCConfig config)
  102. {
  103. List<string> urlList = new List<string>();
  104. string url = config.BackUrl;
  105. int pages = config.MainUrlPages > 0 ? config.BackUrlPages : 1;
  106. for (int i = 1; i <= pages; i++)
  107. {
  108. string res = string.Format(url, i);
  109. if (!urlList.Contains(res))
  110. {
  111. urlList.Add(res);
  112. }
  113. }
  114. return urlList;
  115. }
  116. /// <summary>
  117. /// 爬取主站技巧列表
  118. /// </summary>
  119. /// <param name="mainUrl"></param>
  120. /// <returns></returns>
  121. private List<LotterySkillModel> GetOpenListFromMainUrl(string mainUrl)
  122. {
  123. var result = new List<LotterySkillModel>();
  124. try
  125. {
  126. var url = new Uri(mainUrl);
  127. var htmlResource = NetHelper.GetUrlResponse(mainUrl, Encoding.GetEncoding("utf-8"));
  128. if (htmlResource == null) return result;
  129. HtmlDocument doc = new HtmlDocument();
  130. doc.LoadHtml(htmlResource);
  131. //获取li下面所有a标签
  132. HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes("//*[@class='art-list']/ul/li/a");
  133. if (nodeList == null) return result;
  134. List<string> urls = new List<string>();
  135. //遍历a标签
  136. foreach (HtmlNode node in nodeList)
  137. {
  138. HtmlAttribute attr = node.Attributes.SingleOrDefault(a => a.Name.Equals("href"));
  139. if (attr != null)
  140. {
  141. string href = Host + attr.Value;
  142. //去重
  143. if (!urls.Contains(href))
  144. {
  145. urls.Add(href);
  146. }
  147. }
  148. }
  149. foreach (var url1 in urls)
  150. {
  151. var LotterySkill = GetSkillModel(url1);
  152. result.Add(LotterySkill);
  153. }
  154. }
  155. catch (Exception ex)
  156. {
  157. log.Error(GetType(),
  158. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  159. }
  160. return result;
  161. }
  162. /// <summary>
  163. /// 爬取副站技巧列表
  164. /// </summary>
  165. /// <param name="backUrl"></param>
  166. /// <returns></returns>
  167. private List<LotterySkillModel> GetOpenListFromBackUrl(string backUrl)
  168. {
  169. var result = new List<LotterySkillModel>();
  170. try
  171. {
  172. var url = new Uri(backUrl);
  173. var htmlResource = NetHelper.GetUrlResponse(backUrl, Encoding.GetEncoding("utf-8"));
  174. if (htmlResource == null) return result;
  175. HtmlDocument doc = new HtmlDocument();
  176. doc.LoadHtml(htmlResource);
  177. //获取li下面所有a标签
  178. HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes("//*[@class='listContainer']/ul/li/a");
  179. if (nodeList == null) return result;
  180. List<string> urls = new List<string>();
  181. //遍历a标签
  182. foreach (HtmlNode node in nodeList)
  183. {
  184. HtmlAttribute attr = node.Attributes.SingleOrDefault(a => a.Name.Equals("href"));
  185. if (attr != null)
  186. {
  187. string href = HostBackUrl + attr.Value;
  188. //去重
  189. if (!urls.Contains(href))
  190. {
  191. urls.Add(href);
  192. }
  193. }
  194. }
  195. foreach (var url1 in urls)
  196. {
  197. var LotterySkill = GetSkillModelBackUrl(url1);
  198. result.Add(LotterySkill);
  199. }
  200. }
  201. catch (Exception ex)
  202. {
  203. log.Error(GetType(),
  204. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  205. }
  206. return result;
  207. }
  208. /// <summary>
  209. /// 根据副站url获取技巧详情
  210. /// </summary>
  211. /// <param name="url"></param>
  212. /// <returns></returns>
  213. private LotterySkillModel GetSkillModelBackUrl(string url)
  214. {
  215. LotterySkillModel lotterySkill = new LotterySkillModel();
  216. try
  217. {
  218. var htmlResource = NetHelper.GetUrlResponse(url, Encoding.GetEncoding("utf-8"));
  219. if (htmlResource == null) return lotterySkill;
  220. HtmlDocument doc = new HtmlDocument();
  221. doc.LoadHtml(htmlResource);
  222. //获取li下面所有a标签
  223. var div = doc.DocumentNode.SelectSingleNode("//*[@class='article mb-10']");
  224. var Title = div.ChildNodes.Where(node => node.Name == "h5").ToList();
  225. var div1 = div.ChildNodes.Where(node => node.Name == "div").ToList();
  226. lotterySkill.Title = Title[0].InnerText.Trim();
  227. lotterySkill.Author = "cn55128";
  228. lotterySkill.Content = div1[2].InnerHtml.Trim();
  229. lotterySkill.IsDelete = false;
  230. lotterySkill.SourceUrl = url.ToString();
  231. lotterySkill.TypeId = lotterySkillType;
  232. lotterySkill.TypeName = lotterySkillType.GetEnumDescription();
  233. }
  234. catch (Exception ex)
  235. {
  236. log.Error(GetType(),
  237. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  238. }
  239. return lotterySkill;
  240. }
  241. /// <summary>
  242. /// 根据主站url获取技巧详情
  243. /// </summary>
  244. /// <param name="url"></param>
  245. /// <returns></returns>
  246. private LotterySkillModel GetSkillModel(string url)
  247. {
  248. LotterySkillModel lotterySkill = new LotterySkillModel();
  249. try
  250. {
  251. var htmlResource = NetHelper.GetUrlResponse(url, Encoding.GetEncoding("utf-8"));
  252. if (htmlResource == null) return lotterySkill;
  253. HtmlDocument doc = new HtmlDocument();
  254. doc.LoadHtml(htmlResource);
  255. //获取li下面所有a标签
  256. var div = doc.DocumentNode.SelectSingleNode("//*[@class='artile']");
  257. var Title = div.ChildNodes.Where(node => node.Name == "h1").ToList();
  258. var div1 = div.ChildNodes.Where(node => node.Name == "div").ToList();
  259. lotterySkill.Title = Title[0].InnerText.Trim();
  260. lotterySkill.Author = "cn55128";
  261. lotterySkill.Content = div1[1].InnerHtml.Trim();
  262. lotterySkill.IsDelete = false;
  263. lotterySkill.SourceUrl = url.ToString();
  264. lotterySkill.TypeId = lotterySkillType;
  265. lotterySkill.TypeName = lotterySkillType.GetEnumDescription();
  266. }
  267. catch (Exception ex)
  268. {
  269. log.Error(GetType(),
  270. string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  271. }
  272. return lotterySkill;
  273. }
  274. #region Attribute
  275. /// <summary>
  276. /// 主机地址
  277. /// </summary>
  278. public string Host = "http://www.55125.cn/";
  279. /// <summary>
  280. /// 副站地址
  281. /// </summary>
  282. public string HostBackUrl = "https://www.cz89.com/";
  283. /// <summary>
  284. /// 配置信息
  285. /// </summary>
  286. private SCCConfig Config;
  287. /// <summary>
  288. /// 当天抓取的最新一期开奖记录
  289. /// </summary>
  290. private LotterySkillModel LatestItem = null;
  291. /// <summary>
  292. /// 当天抓取失败列表
  293. /// </summary>
  294. private List<string> FailedQiHaoList = null;
  295. /// <summary>
  296. /// 日志对象
  297. /// </summary>
  298. private readonly LogHelper log;
  299. /// <summary>
  300. /// 数据服务
  301. /// </summary>
  302. private readonly IDTOpenCode services;
  303. /// <summary>
  304. /// 当前彩种
  305. /// </summary>
  306. private SCCLottery currentLottery => SCCLottery.LotterySkill;
  307. /// <summary>
  308. /// 福彩3D技巧
  309. /// </summary>
  310. private LotterySkillType lotterySkillType = LotterySkillType.FC3D;
  311. /// <summary>
  312. /// 邮件接口
  313. /// </summary>
  314. private IEmail email;
  315. /// <summary>
  316. /// 是否本次运行抓取到开奖数据
  317. /// </summary>
  318. private bool isGetData = false;
  319. #endregion
  320. }
  321. }