JSTC7WSJob.cs 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Text.RegularExpressions;
  6. using CP.Model;
  7. using HtmlAgilityPack;
  8. using Newtonsoft.Json;
  9. using Quartz;
  10. using SCC.Common;
  11. using SCC.Crawler.Tools;
  12. using SCC.Interface;
  13. using SCC.Models;
  14. namespace SCC.Crawler.DT
  15. {
  16. /// <summary>
  17. /// 江苏体彩7位数
  18. /// </summary>
  19. [DisallowConcurrentExecution]
  20. [PersistJobDataAfterExecution]
  21. public class JSTC7WSJob : IJob
  22. {
  23. /// <summary>
  24. /// 初始化函数
  25. /// </summary>
  26. public JSTC7WSJob()
  27. {
  28. log = new LogHelper();
  29. email = IOC.Resolve<IEmail>();
  30. }
  31. /// <summary>
  32. /// 作业执行入口
  33. /// </summary>
  34. /// <param name="context">作业执行上下文</param>
  35. public void Execute(IJobExecutionContext context)
  36. {
  37. Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
  38. //预设节假日不开奖
  39. if (Config.SkipDate.Contains(CommonHelper.SCCSysDateTime.ToString("yyyyMMdd"))) return;
  40. LatestItem = context.JobDetail.JobDataMap["LatestItem"] as Tcjs7wsLongInfo;
  41. try
  42. {
  43. //服务启动时配置初始数据
  44. if (LatestItem == null)
  45. {
  46. LatestItem = new Tcjs7wsLongInfo
  47. {
  48. qi = CommonHelper.GenerateQiHaoYYQQQ(0),
  49. date = new DateTime(CommonHelper.SCCSysDateTime.Year, 1, 1)
  50. };
  51. }
  52. //程序时间第二天,程序根据配置检查是否昨天有开奖
  53. isGetData = false;
  54. if (CommonHelper.CheckDTIsNeedGetData(Config))
  55. {
  56. DoMainUrl();
  57. DoBackUrl();
  58. }
  59. if (!LatestItem.qi.ToString().StartsWith(CommonHelper.SCCSysDateTime.ToString("yy")))
  60. LatestItem = new Tcjs7wsLongInfo
  61. {
  62. qi = CommonHelper.GenerateQiHaoYYQQQ(0),
  63. date = new DateTime(CommonHelper.SCCSysDateTime.Year, 1, 1)
  64. };
  65. //当今日开奖并且当前时间是晚上8点过后开始抓取
  66. if (CommonHelper.CheckTodayIsOpenDay(Config) && CommonHelper.SCCSysDateTime.Hour > 0)
  67. {
  68. DoMainUrl();
  69. DoBackUrl();
  70. }
  71. }
  72. catch (Exception ex)
  73. {
  74. log.Error(typeof(JSTC7WSJob),
  75. string.Format("【{0}】抓取时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  76. }
  77. //保存最新期号
  78. context.JobDetail.JobDataMap["LatestItem"] = LatestItem;
  79. }
  80. /// <summary>
  81. /// 通过主站点爬取开奖数据
  82. /// (江苏体彩网)
  83. /// </summary>
  84. private void DoMainUrl()
  85. {
  86. if (!string.IsNullOrEmpty(Config.MainUrl))
  87. {
  88. var openList = GetOpenListFromMainUrl(Config.MainUrl);
  89. if (openList == null || openList.Count == 0) return; //无抓取数据
  90. //抓取到的最新期数
  91. var newestQiHao = Convert.ToInt32(openList.OrderByDescending(m => m.qi).First().qi.ToString());
  92. //数据库里面最新期数
  93. LatestItem = Tcjs7wsData.GetLastOne();
  94. var startQiNum = Convert.ToInt32(LatestItem.qi.ToString());
  95. if (startQiNum > newestQiHao) return; //无最新数据
  96. //处理最新开奖数据
  97. Tcjs7wsLongInfo matchItem = null;
  98. for (var i = startQiNum; i <= newestQiHao; i++)
  99. {
  100. matchItem = openList.Where(R => R.qi.ToString() == i.ToString()).FirstOrDefault();
  101. if (matchItem != null)
  102. {
  103. //add db
  104. matchItem.addtime = DateTime.Now;
  105. Tcjs7wsData.Add(matchItem);
  106. //Do Success Log
  107. log.Info(typeof(JSTC7WSJob), CommonHelper.GetJobMainLogInfo(Config, i.ToString()));
  108. LatestItem = matchItem;
  109. isGetData = true;
  110. }
  111. }
  112. }
  113. }
  114. /// <summary>
  115. /// 获取主站开奖列表数据
  116. /// </summary>
  117. /// <param name="mainUrl">主站地址</param>
  118. /// <returns></returns>
  119. private List<Tcjs7wsLongInfo> GetOpenListFromMainUrl(string mainUrl)
  120. {
  121. var result = new List<Tcjs7wsLongInfo>();
  122. try
  123. {
  124. var pageIndex = 1;
  125. var htmlResource = string.Empty;
  126. var resourceUrl = new Uri(mainUrl);
  127. var isLoop = true;
  128. var lastYear = (DateTime.Now.Year - 1).ToString().Substring(2);
  129. var postData = "current_page={0}&all_count=0&num=";
  130. var OpenTime = string.Empty;
  131. while (isLoop)
  132. {
  133. htmlResource = NetHelper.GetUrlResponse(resourceUrl.AbsoluteUri, "POST",
  134. string.Format(postData, pageIndex), Encoding.UTF8);
  135. var jsonData = htmlResource.JsonToEntity<dynamic>();
  136. var dataList = jsonData["items"];
  137. foreach (var data in dataList)
  138. {
  139. if (data["num"].Value.StartsWith(lastYear))
  140. {
  141. isLoop = false;
  142. break;
  143. }
  144. OpenTime = data["date_publish"].Value.Insert(6, "-").Insert(4, "-");
  145. string detailUrl = string.Format(
  146. "http://www.js-lottery.com/Article/news/group_id/3/article_id/{0}.html",
  147. data["article_id"].Value);
  148. string qihao = data["num"].Value;
  149. qihao = qihao.Length < 7 ? $"20{qihao}" : qihao;
  150. var model = new Tcjs7wsLongInfo
  151. {
  152. qi = Convert.ToInt32(qihao),
  153. n1 = Convert.ToInt32(data["one"].Value),
  154. n2 = Convert.ToInt32(data["two"].Value),
  155. n3 = Convert.ToInt32(data["three"].Value),
  156. n4 = Convert.ToInt32(data["four"].Value),
  157. n5 = Convert.ToInt32(data["five"].Value),
  158. n6 = Convert.ToInt32(data["six"].Value),
  159. n7 = Convert.ToInt32(data["seven"].Value),
  160. date = Convert.ToDateTime(OpenTime),
  161. };
  162. OptimizeMainModel(ref model, detailUrl);
  163. result.Add(model);
  164. }
  165. pageIndex++;
  166. }
  167. //var checkDataHelper = new CheckDataHelper();
  168. //var dbdata = services.GetListS<OpenCode7DTModel>(currentLottery)
  169. // .ToDictionary(w => w.Term.ToString(), w => w.GetCodeStr());
  170. //checkDataHelper.CheckData(dbdata, result.ToDictionary(w => w.Term.ToString(), w => w.GetCodeStr()),
  171. // Config.Area, currentLottery);
  172. //result = result.OrderByDescending(S => S.Term).ToList();
  173. }
  174. catch (Exception ex)
  175. {
  176. log.Error(typeof(JSTC7WSJob),
  177. string.Format("【{0}】通过主站点抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  178. }
  179. return result;
  180. }
  181. /// <summary>
  182. /// 完善主站江苏体彩7位数开奖详情信息
  183. /// </summary>
  184. /// <param name="model"></param>
  185. private void OptimizeBackModel(ref Tcjs7wsLongInfo model, HtmlNode tr)
  186. {
  187. try
  188. {
  189. var tds = tr.ChildNodes.Where(w => w.Name == "td").ToList();
  190. var xiaoshoue = tds[2].InnerText.Trim().Replace(",", "").Replace("元", "");
  191. var jiangchi = "";
  192. //特等奖
  193. model.zj = tds[3].InnerText.Trim();
  194. model.jo = tds[4].InnerText;
  195. //一等奖
  196. model.zj1 = tds[5].InnerText.Trim();
  197. model.jo1 = tds[6].InnerText;
  198. //二等奖
  199. model.zj2 = tds[7].InnerText.Trim();
  200. model.jo2 = tds[8].InnerText;
  201. model.nextmoney = jiangchi;
  202. model.tzmoney = xiaoshoue;
  203. var list = new List<Winbonus>();
  204. list.Add(new Winbonus()
  205. {
  206. item = "特等奖",
  207. wincount = model.zj,
  208. winmoney = model.jo
  209. });
  210. list.Add(new Winbonus()
  211. {
  212. item = "一等奖",
  213. wincount = model.zj1,
  214. winmoney = model.jo1
  215. });
  216. list.Add(new Winbonus()
  217. {
  218. item = "二等奖",
  219. wincount = model.zj2,
  220. winmoney = model.jo2
  221. });
  222. model.winbonus = JsonConvert.SerializeObject(list);
  223. }
  224. catch (Exception ex)
  225. {
  226. log.Error(typeof(JSTC7WSJob),
  227. string.Format("【{0}】通过主站点优化开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  228. }
  229. }
  230. /// <summary>
  231. /// 完善主站江苏体彩7位数开奖详情信息
  232. /// </summary>
  233. /// <param name="model"></param>
  234. private void OptimizeMainModel(ref Tcjs7wsLongInfo model, string detailUrl)
  235. {
  236. try
  237. {
  238. var htmlResource = NetHelper.GetUrlResponse(detailUrl);
  239. if (htmlResource == null) return;
  240. if (!string.IsNullOrEmpty(htmlResource))
  241. {
  242. var doc = new HtmlDocument();
  243. doc.LoadHtml(htmlResource);
  244. var table = doc.DocumentNode.SelectNodes("//table");
  245. if (table != null && table.Count > 1)
  246. {
  247. var trs = table[1].ChildNodes.Where(N => N.Name.ToLower() == "tbody").First().ChildNodes
  248. .Where(N => N.Name.ToLower() == "tr").ToList();
  249. for (var i = 0; i < trs.Count; i++)
  250. {
  251. var tds = trs[i].ChildNodes.Where(N => N.Name.ToLower() == "td").ToList();
  252. if (tds[0].InnerText == "特等奖")
  253. {
  254. model.zj = tds[1].InnerText.Replace(",", string.Empty).Replace("注", string.Empty);
  255. model.jo = tds[2].InnerText.Replace("元", string.Empty).Replace("--", "0").Replace(",", "").Trim();
  256. }
  257. else if (tds[0].InnerText == "一等奖")
  258. {
  259. model.zj1 = tds[1].InnerText.Replace(",", string.Empty).Replace("注", string.Empty);
  260. model.jo1 = tds[2].InnerText.Replace("元", string.Empty).Replace("--", "0").Replace(",", "").Trim();
  261. }
  262. else if (tds[0].InnerText == "二等奖")
  263. {
  264. model.zj2 = tds[1].InnerText.Replace(",", string.Empty).Replace("注", string.Empty);
  265. model.jo2 = tds[2].InnerText.Replace("元", string.Empty).Replace("--", "0").Replace(",", "").Trim();
  266. }
  267. else if (tds[0].InnerText == "三等奖")
  268. {
  269. model.zj3 = tds[1].InnerText.Replace(",", string.Empty).Replace("注", string.Empty);
  270. model.jo3 = tds[2].InnerText.Replace("元", string.Empty).Replace("--", "0").Replace(",", "").Trim();
  271. }
  272. else if (tds[0].InnerText == "四等奖")
  273. {
  274. model.zj4 = tds[1].InnerText.Replace(",", string.Empty).Replace("注", string.Empty);
  275. model.jo4 = tds[2].InnerText.Replace("元", string.Empty).Replace("--", "0").Replace(",", "").Trim();
  276. }
  277. else if (tds[0].InnerText == "五等奖")
  278. {
  279. model.zj5 = tds[1].InnerText.Replace(",", string.Empty).Replace("注", string.Empty);
  280. model.jo5 = tds[2].InnerText.Replace("元", string.Empty).Replace("--", "0").Replace(",", "").Trim();
  281. };
  282. var list = new List<Winbonus>();
  283. list.Add(new Winbonus()
  284. {
  285. item = "特等奖",
  286. wincount = model.zj,
  287. winmoney = model.jo
  288. });
  289. list.Add(new Winbonus()
  290. {
  291. item = "一等奖",
  292. wincount = model.zj1,
  293. winmoney = model.jo1
  294. });
  295. list.Add(new Winbonus()
  296. {
  297. item = "二等奖",
  298. wincount = model.zj2,
  299. winmoney = model.jo2
  300. });
  301. list.Add(new Winbonus()
  302. {
  303. item = "三等奖",
  304. wincount = model.zj3,
  305. winmoney = model.jo3
  306. });
  307. list.Add(new Winbonus()
  308. {
  309. item = "四等奖",
  310. wincount = model.zj4,
  311. winmoney = model.jo4
  312. });
  313. list.Add(new Winbonus()
  314. {
  315. item = "五等奖",
  316. wincount = model.zj5,
  317. winmoney = model.jo5
  318. });
  319. model.winbonus = JsonConvert.SerializeObject(list);
  320. }
  321. }
  322. var reg1 = new Regex(@"本省(区、市)销售额:([\s\S]*?)元");
  323. var match1 = reg1.Match(htmlResource);
  324. if (match1.Success)
  325. {
  326. //2016年182期及以前期数
  327. //Sales = Convert.ToDecimal(match1.Result("$1"));
  328. model.tzmoney = match1.Result("$1");
  329. }
  330. else
  331. {
  332. //2016年183期及以后期数
  333. reg1 = new Regex(@"本期销售金额:([\s\S]*?)元");
  334. match1 = reg1.Match(htmlResource);
  335. if (match1.Success) model.tzmoney = match1.Result("$1");
  336. }
  337. var ps = table[1].ParentNode.ChildNodes.Where(N => N.Name.ToLower() == "p").ToList();
  338. var potString = ps.Last().InnerHtml;
  339. reg1 = new Regex(@"<br>([\s\S]*?)元");
  340. match1 = reg1.Match(potString);
  341. if (match1.Success)
  342. {
  343. var potValue = match1.Result("$1").Replace("&nbsp;", string.Empty);
  344. if (potValue.Contains("<br>"))
  345. {
  346. while (potValue.IndexOf("<br>") > 0)
  347. {
  348. potValue = potValue.Substring(potValue.IndexOf("<br>") + 4);
  349. }
  350. model.nextmoney = potValue;
  351. }// Jackpot = Convert.ToDecimal(potValue.Substring(potValue.IndexOf("<br>") + 4));
  352. else
  353. // Jackpot = Convert.ToDecimal(potValue);
  354. model.tzmoney = potValue;
  355. }
  356. }
  357. }
  358. catch (Exception ex)
  359. {
  360. log.Error(typeof(JSTC7WSJob),
  361. string.Format("【{0}】通过主站点优化开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  362. }
  363. }
  364. /// <summary>
  365. /// 通过备用站点抓取开奖数据
  366. /// (百度乐彩)
  367. /// </summary>
  368. private void DoBackUrl()
  369. {
  370. if (!string.IsNullOrEmpty(Config.BackUrl))
  371. {
  372. var openList = GetOpenListFromBackUrl();
  373. if (openList == null || openList.Count == 0) return; //无抓取数据
  374. //抓取到的最新期数
  375. var newestQiHao = Convert.ToInt32(openList.OrderByDescending(m => m.qi).First().qi.ToString());
  376. //数据库里面最新期数
  377. //LatestItem = Tcjs7wsData.GetLastOne();
  378. var startQiNum = Convert.ToInt32(LatestItem.qi.ToString());
  379. if (startQiNum > newestQiHao) return; //无最新数据
  380. //处理最新开奖数据
  381. Tcjs7wsLongInfo matchItem = null;
  382. for (var i = startQiNum; i <= newestQiHao; i++)
  383. {
  384. matchItem = openList.Where(R => R.qi.ToString() == i.ToString()).FirstOrDefault();
  385. if (matchItem != null)
  386. {
  387. //add db
  388. matchItem.addtime = DateTime.Now;
  389. Tcjs7wsData.Add(matchItem);
  390. //Do Success Log
  391. log.Info(typeof(JSTC7WSJob), CommonHelper.GetJobBackLogInfo(Config, i.ToString()));
  392. LatestItem = matchItem;
  393. isGetData = true;
  394. }
  395. }
  396. }
  397. }
  398. private List<Tcjs7wsLongInfo> GetOpenListFromBackUrl()
  399. {
  400. var result = new List<Tcjs7wsLongInfo>();
  401. try
  402. {
  403. var url = Config.BackUrl;
  404. try
  405. {
  406. var htmlResource = NetHelper.GetUrlResponse(url, Encoding.GetEncoding("utf-8"));
  407. if (htmlResource == null) return result;
  408. var doc = new HtmlDocument();
  409. doc.LoadHtml(htmlResource);
  410. var table = doc.DocumentNode.SelectSingleNode("//table");
  411. if (table == null) return result;
  412. var trs = table.ChildNodes.Where(node => node.Name == "tr").ToList();
  413. Tcjs7wsLongInfo model = null;
  414. HtmlNode nodeA = null;
  415. var optimizeUrl = string.Empty;
  416. for (var i = 2; i < trs.Count; i++) //第一二行为表头
  417. {
  418. var trstyle = trs[i].Attributes["style"];
  419. if (trstyle != null && trstyle.Value == "display:none") continue;
  420. var tds = trs[i].ChildNodes.Where(node => node.Name == "td").ToList();
  421. if (tds.Count < 14) continue;
  422. model = new Tcjs7wsLongInfo();
  423. nodeA = tds[13].ChildNodes.Where(n => n.Name == "a").FirstOrDefault();
  424. if (nodeA == null) continue;
  425. model.qi = Convert.ToInt32(tds[0].InnerText.Trim());
  426. //model.DetailUrl = new Uri(url, optimizeUrl).AbsoluteUri;
  427. model.date = Convert.ToDateTime(tds[1].InnerText.Substring(0, 10));
  428. if (tds[2].ChildNodes.Count == 0) continue;
  429. var opencodeNode = tds[2].ChildNodes.Where(n => n.Name.ToLower() == "em").ToList();
  430. if (opencodeNode.Count < 6) continue;
  431. model.n1 = Convert.ToInt32(opencodeNode[0].InnerText.Trim());
  432. model.n2 = Convert.ToInt32(opencodeNode[1].InnerText.Trim());
  433. model.n3 = Convert.ToInt32(opencodeNode[2].InnerText.Trim());
  434. model.n4 = Convert.ToInt32(opencodeNode[3].InnerText.Trim());
  435. model.n5 = Convert.ToInt32(opencodeNode[4].InnerText.Trim());
  436. model.n6 = Convert.ToInt32(opencodeNode[5].InnerText.Trim());
  437. model.n7 = Convert.ToInt32(opencodeNode[6].InnerText.Trim());
  438. OptimizeBackModel(ref model, trs[i]);
  439. result.Add(model);
  440. }
  441. //var checkDataHelper = new CheckDataHelper();
  442. //var dbdata = services.GetListS<OpenCode8DTModel>(currentLottery)
  443. // .ToDictionary(w => w.Term.ToString(), w => w.GetCodeStr());
  444. //checkDataHelper.CheckData(dbdata, result.ToDictionary(w => w.Term.ToString(), w => w.GetCodeStr()),
  445. // Config.Area, currentLottery);
  446. //result = result.OrderByDescending(S => S.Term).ToList();
  447. }
  448. catch (Exception ex)
  449. {
  450. log.Error(GetType(),
  451. string.Format("【{0}】通过主站点抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  452. }
  453. return result;
  454. }
  455. catch (Exception ex)
  456. {
  457. log.Error(typeof(JSTC7WSJob),
  458. string.Format("【{0}】通过备用站点抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
  459. }
  460. return result;
  461. }
  462. #region Attribute
  463. /// <summary>
  464. /// 配置信息
  465. /// </summary>
  466. private SCCConfig Config;
  467. /// <summary>
  468. /// 当天抓取的最新一期开奖记录
  469. /// </summary>
  470. private Tcjs7wsLongInfo LatestItem;
  471. #pragma warning disable CS0414 // 字段“JSTC7WSJob.FailedQiHaoList”已被赋值,但从未使用过它的值
  472. /// <summary>
  473. /// 当天抓取失败列表
  474. /// </summary>
  475. private List<string> FailedQiHaoList = null;
  476. #pragma warning restore CS0414 // 字段“JSTC7WSJob.FailedQiHaoList”已被赋值,但从未使用过它的值
  477. /// <summary>
  478. /// 日志对象
  479. /// </summary>
  480. private readonly LogHelper log;
  481. /// <summary>
  482. /// 当前彩种
  483. /// </summary>
  484. private SCCLottery currentLottery => SCCLottery.JiangSuTC7WS;
  485. /// <summary>
  486. /// 邮件接口
  487. /// </summary>
  488. private IEmail email;
  489. #pragma warning disable CS0414 // 字段“JSTC7WSJob.isGetData”已被赋值,但从未使用过它的值
  490. /// <summary>
  491. /// 是否本次运行抓取到开奖数据
  492. /// </summary>
  493. private bool isGetData;
  494. #pragma warning restore CS0414 // 字段“JSTC7WSJob.isGetData”已被赋值,但从未使用过它的值
  495. #endregion
  496. }
  497. }