using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using Quartz; using SCC.Common; using SCC.Models; using SCC.Interface; using Newtonsoft.Json; using HtmlAgilityPack; namespace SCC.Crawler.DT { /// <summary> /// 数据爬取类 /// 江苏体彩7位数 /// </summary> [DisallowConcurrentExecution] [PersistJobDataAfterExecution] public class JSTC7WSJob : IJob { /// <summary> /// 构造函数 /// </summary> public JSTC7WSJob() { log = new LogHelper(); services = IOC.Resolve<IDTOpenCode>(); email = IOC.Resolve<IEmail>(); } /// <summary> /// 作业执行入口 /// </summary> /// <param name="context">作业执行上下文</param> public void Execute(IJobExecutionContext context) { Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap); //预设节假日不开奖 if (Config.SkipDate.Contains(CommonHelper.SCCSysDateTime.ToString("yyyyMMdd"))) return; LatestItem = context.JobDetail.JobDataMap["LatestItem"] as OpenCode7DTModel; try { //服务启动时配置初始数据 if (LatestItem == null) { LatestItem = services.GetOpenCode7DTLastItem(currentLottery); if (LatestItem == null) { //服务第一次启动,数据库一条数据都没有时 LatestItem = new OpenCode7DTModel() { Term = CommonHelper.GenerateQiHaoYYQQQ(0), OpenTime = new DateTime(CommonHelper.SCCSysDateTime.Year, 1, 1) }; } } //程序时间第二天,程序根据配置检查是否昨天有开奖 isGetData = false; if (CommonHelper.CheckDTIsNeedGetData(Config)) { CheckingOpenDayTheLotteryData(); } if (!LatestItem.Term.ToString().StartsWith(CommonHelper.SCCSysDateTime.ToString("yy"))) { //翻年时 LatestItem = new OpenCode7DTModel() { Term = CommonHelper.GenerateQiHaoYYQQQ(0), OpenTime = new DateTime(CommonHelper.SCCSysDateTime.Year, 1, 1) }; } //当今日开奖并且当前时间是晚上8点过后开始抓取 if (CommonHelper.CheckTodayIsOpenDay(Config) && CommonHelper.SCCSysDateTime.Hour > 12) { DoTodayJobByMainUrl(); DoTodayJobByBackUrl(); } if (isGetData) TrendChartHelper.GenerateJSTC7WSTrendChart(log); } catch (Exception ex) { log.Error(typeof(JSTC7WSJob), string.Format("【{0}】抓取时发生错误,错误信息【{1}】", Config.Area + Config.LotteryName, ex.Message)); } //保存最新期号 context.JobDetail.JobDataMap["LatestItem"] = LatestItem; } /// <summary> /// 自检爬取未爬取到的开奖数据,并对昨日开奖但未爬取到开奖数据的彩种添加邮件提醒 /// </summary> private void CheckingOpenDayTheLotteryData() { //从数据库中获取昨天数据抓取失败列表 FailedQiHaoList = services.GetFailedYYQQQList(currentLottery); if (FailedQiHaoList.Count > 0) { DoYesterdayFailedListByMainUrl(); DoYesterdayFailedListByBackUrl(); foreach (var fQiHao in FailedQiHaoList) { //将抓取失败数据推送至邮件列表,待邮件服务发送至配置管理员的邮箱中 if (email.AddEmail(Config.Area + Config.LotteryName, fQiHao, CommonHelper.GenerateDTOpenTime(Config))) log.Error(typeof(JSTC7WSJob), CommonHelper.GetJobLogError(Config, fQiHao)); } } if (LatestItem.OpenTime.ToString("yyyyMMdd") != CommonHelper.SCCSysDateTime.AddDays(-1).ToString("yyyyMMdd")) { //开奖时间(昨天)未抓取到最新开奖数据,则再抓取一次,若还不成功则写入待发送邮件列表 DoTodayJobByMainUrl(); DoTodayJobByBackUrl(); if (LatestItem.OpenTime.ToString("yyyyMMdd") != CommonHelper.SCCSysDateTime.AddDays(-1).ToString("yyyyMMdd")) { var openQiHao = (LatestItem.Term + 1).ToString(); if (email.AddEmail(Config.Area + Config.LotteryName, openQiHao, CommonHelper.GenerateDTOpenTime(Config))) log.Error(typeof(JSTC7WSJob), CommonHelper.GetJobLogError(Config, openQiHao)); } } } /// <summary> /// 通过主站点爬取开奖数据 /// (江苏体彩网) /// </summary> private void DoTodayJobByMainUrl() { if (!string.IsNullOrEmpty(Config.MainUrl)) { var OpenList = GetOpenListFromMainUrl(Config.MainUrl); if (OpenList.Count == 0) return;//无抓取数据 var newestQiHao = OpenList.First().Term.ToString(); var startQiNum = Convert.ToInt32(LatestItem.Term.ToString().Substring(2)) + 1; var newestQiNum = Convert.ToInt32(newestQiHao.Substring(2)); if (startQiNum > newestQiNum) return;//无最新数据 //处理最新开奖数据 string getQiHao = string.Empty; OpenCode7DTModel matchItem = null; for (var i = startQiNum; i <= newestQiNum; i++) { getQiHao = LatestItem.Term.ToString().Substring(0, 2) + i.ToString().PadLeft(3, '0'); matchItem = OpenList.Where(R => R.Term.ToString() == getQiHao).FirstOrDefault(); if (matchItem != null && OptimizeMainModel(ref matchItem) && services.AddDTOpen7Code(currentLottery, matchItem)) { //Do Success Log log.Info(typeof(JSTC7WSJob), CommonHelper.GetJobMainLogInfo(Config, getQiHao)); LatestItem = matchItem; isGetData = true; } } } } /// <summary> /// 通过主站爬取错误期号列表中每一个期号 /// (江苏体彩网) /// </summary> private void DoYesterdayFailedListByMainUrl() { if (!string.IsNullOrEmpty(Config.MainUrl) && FailedQiHaoList.Count > 0) { var OpenList = GetOpenListFromMainUrl(Config.MainUrl); if (OpenList.Count == 0) return;//无抓取数据 OpenCode7DTModel matchItem = null; var SuccessList = new List<string>(); foreach (string failedQiHao in FailedQiHaoList) { matchItem = OpenList.Where(R => R.Term.ToString() == failedQiHao).FirstOrDefault(); if (matchItem != null && OptimizeMainModel(ref matchItem) && services.AddDTOpen7Code(currentLottery, matchItem)) { //Do Success Log log.Info(typeof(JSTC7WSJob), CommonHelper.GetJobMainLogInfo(Config, failedQiHao)); if (matchItem.Term > LatestItem.Term) { LatestItem = matchItem; } isGetData = true; SuccessList.Add(failedQiHao); } } foreach (var successQiHao in SuccessList) { FailedQiHaoList.Remove(successQiHao); } } } /// <summary> /// 获取主站开奖列表数据 /// </summary> /// <param name="mainUrl">主站地址</param> /// <returns></returns> private List<OpenCode7DTModel> GetOpenListFromMainUrl(string mainUrl) { List<OpenCode7DTModel> result = new List<OpenCode7DTModel>(); try { var pageIndex = 1; var htmlResource = string.Empty; Uri resourceUrl = new Uri(mainUrl); var isLoop = true; var lastYear = (DateTime.Now.Year - 1).ToString().Substring(2); var postData = "current_page={0}&all_count=0&num="; var OpenTime = string.Empty; while (isLoop) { htmlResource = NetHelper.GetUrlResponse(resourceUrl.AbsoluteUri, "POST", string.Format(postData, pageIndex), Encoding.UTF8); var jsonData = JsonConvert.DeserializeObject<dynamic>(htmlResource); var dataList = jsonData["items"]; foreach (var data in dataList) { if (data["num"].Value.StartsWith(lastYear)) { isLoop = false; break; } OpenTime = data["date_publish"].Value.Insert(6, "-").Insert(4, "-"); result.Add(new OpenCode7DTModel() { Term = Convert.ToInt32(data["num"].Value), OpenCode1 = Convert.ToInt32(data["one"].Value), OpenCode2 = Convert.ToInt32(data["two"].Value), OpenCode3 = Convert.ToInt32(data["three"].Value), OpenCode4 = Convert.ToInt32(data["four"].Value), OpenCode5 = Convert.ToInt32(data["five"].Value), OpenCode6 = Convert.ToInt32(data["six"].Value), OpenCode7 = Convert.ToInt32(data["seven"].Value), OpenTime = Convert.ToDateTime(OpenTime), DetailUrl = string.Format("http://www.js-lottery.com/Article/news/group_id/3/article_id/{0}.html", data["article_id"].Value) }); } pageIndex++; } result = result.OrderByDescending(S => S.Term).ToList(); } catch (Exception ex) { log.Error(typeof(JSTC7WSJob), string.Format("【{0}】通过主站点抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + Config.LotteryName, ex.Message)); } return result; } /// <summary> /// 完善主站江苏体彩7位数开奖详情信息 /// </summary> /// <param name="model"></param> private bool OptimizeMainModel(ref OpenCode7DTModel model) { try { var htmlResource = NetHelper.GetUrlResponse(model.DetailUrl); if (!string.IsNullOrEmpty(htmlResource)) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlResource); var table = doc.DocumentNode.SelectNodes("//table"); if (table != null && table.Count > 1) { int Level1Num = 0, Level2Num = 0, Level3Num = 0, Level4Num = 0, Level5Num = 0, Level6Num = 0; decimal Level1Money = 0, Level2Money = 0, Level3Money = 0, Level4Money = 0, Level5Money = 0, Level6Money = 0, Sales = 0, Jackpot = 0; var trs = table[1].ChildNodes.Where(N => N.Name.ToLower() == "tbody").First().ChildNodes.Where(N => N.Name.ToLower() == "tr").ToList(); for (var i = 0; i < trs.Count; i++) { var tds = trs[i].ChildNodes.Where(N => N.Name.ToLower() == "td").ToList(); if (tds[0].InnerText == "特等奖") { Level1Num = Convert.ToInt32(tds[1].InnerText.Replace(",", string.Empty).Replace("注", string.Empty)); Level1Money = Convert.ToDecimal(tds[2].InnerText.Replace("元", string.Empty)); } else if (tds[0].InnerText == "一等奖") { Level2Num = Convert.ToInt32(tds[1].InnerText.Replace(",", string.Empty).Replace("注", string.Empty)); Level2Money = Convert.ToDecimal(tds[2].InnerText.Replace("元", string.Empty)); } else if (tds[0].InnerText == "二等奖") { Level3Num = Convert.ToInt32(tds[1].InnerText.Replace(",", string.Empty).Replace("注", string.Empty)); Level3Money = Convert.ToDecimal(tds[2].InnerText.Replace("元", string.Empty)); } else if (tds[0].InnerText == "三等奖") { Level4Num = Convert.ToInt32(tds[1].InnerText.Replace(",", string.Empty).Replace("注", string.Empty)); Level4Money = Convert.ToDecimal(tds[2].InnerText.Replace("元", string.Empty)); } else if (tds[0].InnerText == "四等奖") { Level5Num = Convert.ToInt32(tds[1].InnerText.Replace(",", string.Empty).Replace("注", string.Empty)); Level5Money = Convert.ToDecimal(tds[2].InnerText.Replace("元", string.Empty)); } else if (tds[0].InnerText == "五等奖") { Level6Num = Convert.ToInt32(tds[1].InnerText.Replace(",", string.Empty).Replace("注", string.Empty)); Level6Money = Convert.ToDecimal(tds[2].InnerText.Replace("元", string.Empty)); } } Regex reg1 = new Regex(@"本省(区、市)销售额:([\s\S]*?)元"); Match match1 = reg1.Match(htmlResource); if (match1.Success) { //2016年182期及以前期数 Sales = Convert.ToDecimal(match1.Result("$1")); } else { //2016年183期及以后期数 reg1 = new Regex(@"本期销售金额:([\s\S]*?)元"); match1 = reg1.Match(htmlResource); if (match1.Success) { Sales = Convert.ToDecimal(match1.Result("$1")); } } var ps = table[1].ParentNode.ChildNodes.Where(N => N.Name.ToLower() == "p").ToList(); var potString = ps.Last().InnerHtml; reg1 = new Regex(@"<br>([\s\S]*?)元"); match1 = reg1.Match(potString); if (match1.Success) { var potValue = match1.Result("$1").Replace(" ", string.Empty); if (potValue.Contains("<br>")) Jackpot = Convert.ToDecimal(potValue.Substring(potValue.IndexOf("<br>") + 4)); else Jackpot = Convert.ToDecimal(potValue); } model.Detail = string.Format("{0},{1}^特等奖|{2}|{3},一等奖|{4}|{5},二等奖|{6}|{7},三等奖|{8}|{9},四等奖|{10}|{11},五等奖|{12}|{13}", Sales, Jackpot, Level1Num, Level1Money, Level2Num, Level2Money, Level3Num, Level3Money, Level4Num, Level4Money, Level5Num, Level5Money, Level6Num, Level6Money); return true; } } } catch (Exception ex) { log.Error(typeof(JSTC7WSJob), string.Format("【{0}】通过主站点优化开奖列表时发生错误,错误信息【{1}】", Config.Area + Config.LotteryName, ex.Message)); } return false; } /// <summary> /// 通过备用站点抓取开奖数据 /// (百度乐彩) /// </summary> private void DoTodayJobByBackUrl() { if (!string.IsNullOrEmpty(Config.BackUrl)) { var OpenList = GetOpenListFromBackUrl(Config.BackUrl); if (OpenList.Count == 0) return;//无抓取数据 var newestQiHao = OpenList.First().Term.ToString(); var startQiNum = Convert.ToInt32(LatestItem.Term.ToString().Substring(2)) + 1; var newestQiNum = Convert.ToInt32(newestQiHao.Substring(2)); if (startQiNum > newestQiNum) return;//无最新数据 //处理最新开奖数据 var getQiHao = string.Empty; OpenCode7DTModel matchItem = null; for (var i = startQiNum; i <= newestQiNum; i++) { getQiHao = LatestItem.Term.ToString().Substring(0, 2) + i.ToString().PadLeft(3, '0'); matchItem = OpenList.Where(R => R.Term.ToString() == getQiHao).FirstOrDefault(); if (matchItem != null && OptimizeBackModel(ref matchItem) && services.AddDTOpen7Code(currentLottery, matchItem)) { //Do Success Log log.Info(typeof(JSTC7WSJob), CommonHelper.GetJobBackLogInfo(Config, getQiHao)); LatestItem = matchItem; isGetData = true; } } } } /// <summary> /// 通过备用地址抓取错误期号列表中每一个期号 /// (百度乐彩) /// </summary> private void DoYesterdayFailedListByBackUrl() { if (!string.IsNullOrEmpty(Config.BackUrl) && FailedQiHaoList.Count > 0) { var OpenList = GetOpenListFromBackUrl(Config.BackUrl); if (OpenList.Count == 0) return;//无抓取数据 OpenCode7DTModel matchItem = null; var SuccessList = new List<string>(); foreach (string failedQiHao in FailedQiHaoList) { matchItem = OpenList.Where(R => R.Term.ToString() == failedQiHao).FirstOrDefault(); if (matchItem != null && OptimizeBackModel(ref matchItem) && services.AddDTOpen7Code(currentLottery, matchItem)) { //Do Success Log log.Info(typeof(JSTC7WSJob), CommonHelper.GetJobBackLogInfo(Config, failedQiHao)); if (matchItem.Term > LatestItem.Term) { LatestItem = matchItem; } SuccessList.Add(failedQiHao); isGetData = true; } } foreach (var successQiHao in SuccessList) { FailedQiHaoList.Remove(successQiHao); } } } /// <summary> /// 获取备用站点开奖列表数据 /// </summary> /// <param name="backUrl">备用站点</param> /// <returns></returns> private List<OpenCode7DTModel> GetOpenListFromBackUrl(string backUrl) { List<OpenCode7DTModel> result = new List<OpenCode7DTModel>(); try { string requestUrl = string.Format("{0}?r={1}", backUrl, new Random().Next(1000, 9999)); var htmlResource = NetHelper.GetBaiDuLeCaiResponse(requestUrl); if (string.IsNullOrWhiteSpace(htmlResource)) return result; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlResource); var table = doc.DocumentNode.SelectSingleNode("//tbody"); if (table == null) return result; var trs = table.ChildNodes.Where(node => node.Name == "tr").ToList(); OpenCode7DTModel model = null; for (var i = 0; i < trs.Count; i++)//第一行 { var trstyle = trs[i].Attributes["style"]; if (trstyle != null && trstyle.Value == "display:none") { continue; } var tds = trs[i].ChildNodes.Where(node => node.Name == "td").ToList(); if (tds.Count < 4) continue; model = new OpenCode7DTModel(); model.OpenTime = Convert.ToDateTime(tds[0].InnerText); model.Term = Convert.ToInt64(tds[1].ChildNodes.Where(n => n.Name == "a").ToList()[0].InnerText.Trim()); if (tds[2].ChildNodes.Count == 0) continue; var opencodeNode = tds[2].ChildNodes[0].ChildNodes.Where(n => n.Name == "span").ToList(); if (opencodeNode.Count < 7) continue; model.OpenCode1 = Convert.ToInt32(opencodeNode[0].InnerText.Trim()); model.OpenCode2 = Convert.ToInt32(opencodeNode[1].InnerText.Trim()); model.OpenCode3 = Convert.ToInt32(opencodeNode[2].InnerText.Trim()); model.OpenCode4 = Convert.ToInt32(opencodeNode[3].InnerText.Trim()); model.OpenCode5 = Convert.ToInt32(opencodeNode[4].InnerText.Trim()); model.OpenCode6 = Convert.ToInt32(opencodeNode[5].InnerText.Trim()); model.OpenCode7 = Convert.ToInt32(opencodeNode[6].InnerText.Trim()); result.Add(model); } result = result.OrderByDescending(S => S.Term).ToList(); } catch (Exception ex) { log.Error(typeof(JSTC7WSJob), string.Format("【{0}】通过备用站点抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + Config.LotteryName, ex.Message)); } return result; } /// <summary> /// 完善备用站点江苏体彩7位数开奖实体信息 /// </summary> /// <param name="model"></param> private bool OptimizeBackModel(ref OpenCode7DTModel model) { var url = string.Format("http://baidu.lecai.com/lottery/draw/view/533/{0}?r={1}", model.Term, new Random().Next(1000, 9999)); try { var htmlResource = NetHelper.GetBaiDuLeCaiResponse(url); Regex reg = new Regex(@"var phaseData = ([\s\S]*?);"); Match m = reg.Match(htmlResource); if (m.Success) { var dataJson = m.Result("$1"); var obj = JsonConvert.DeserializeObject<dynamic>(dataJson); var data = obj[model.Term.ToString()]; if (data != null) { int Level1Num = 0, Level2Num = 0, Level3Num = 0, Level4Num = 0, Level5Num = 0, Level6Num = 0; decimal Level1Money = 0, Level2Money = 0, Level3Money = 0, Level4Money = 0, Level5Money = 0, Level6Money = 0, Sales = 0, Jackpot = 0; Jackpot = Convert.ToDecimal(data["formatPoolAmount"]); Sales = Convert.ToDecimal(data["formatSaleAmount"]); Level1Num = Convert.ToInt32(data["list"]["prize1"]["bet"].Value.Replace(",", string.Empty).Replace("注", string.Empty)); Level1Money = Convert.ToDecimal(data["list"]["prize1"]["prize"].Value.Replace(",", string.Empty).Replace("元", string.Empty)); Level2Num = Convert.ToInt32(data["list"]["prize2"]["bet"].Value.Replace(",", string.Empty).Replace("注", string.Empty)); Level2Money = Convert.ToDecimal(data["list"]["prize2"]["prize"].Value.Replace(",", string.Empty).Replace("元", string.Empty)); Level3Num = Convert.ToInt32(data["list"]["prize3"]["bet"].Value.Replace(",", string.Empty).Replace("注", string.Empty)); Level3Money = Convert.ToDecimal(data["list"]["prize3"]["prize"].Value.Replace(",", string.Empty).Replace("元", string.Empty)); Level4Num = Convert.ToInt32(data["list"]["prize4"]["bet"].Value.Replace(",", string.Empty).Replace("注", string.Empty)); Level4Money = Convert.ToDecimal(data["list"]["prize4"]["prize"].Value.Replace(",", string.Empty).Replace("元", string.Empty)); Level5Num = Convert.ToInt32(data["list"]["prize5"]["bet"].Value.Replace(",", string.Empty).Replace("注", string.Empty)); Level5Money = Convert.ToDecimal(data["list"]["prize5"]["prize"].Value.Replace(",", string.Empty).Replace("元", string.Empty)); Level6Num = Convert.ToInt32(data["list"]["prize6"]["bet"].Value.Replace(",", string.Empty).Replace("注", string.Empty)); Level6Money = Convert.ToDecimal(data["list"]["prize6"]["prize"].Value.Replace(",", string.Empty).Replace("元", string.Empty)); model.Detail = string.Format("{0},{1}^特等奖|{2}|{3},一等奖|{4}|{5},二等奖|{6}|{7},三等奖|{8}|{9},四等奖|{10}|{11},五等奖|{12}|{13}", Sales, Jackpot, Level1Num, Level1Money, Level2Num, Level2Money, Level3Num, Level3Money, Level4Num, Level4Money, Level5Num, Level5Money, Level6Num, Level6Money); } return true; } } catch (Exception ex) { log.Error(typeof(JSTC7WSJob), string.Format("【{0}】通过备用站点优化开奖列表时发生错误,错误信息【{1}】", Config.Area + Config.LotteryName, ex.Message)); } return false; } #region Attribute /// <summary> /// 配置信息 /// </summary> private SCCConfig Config = null; /// <summary> /// 当天抓取的最新一期开奖记录 /// </summary> private OpenCode7DTModel LatestItem = null; /// <summary> /// 当天抓取失败列表 /// </summary> private List<string> FailedQiHaoList = null; /// <summary> /// 日志对象 /// </summary> private LogHelper log = null; /// <summary> /// 数据服务 /// </summary> private IDTOpenCode services = null; /// <summary> /// 当前彩种 /// </summary> private SCCLottery currentLottery { get { return SCCLottery.JSTC7WS; } } /// <summary> /// 邮件接口 /// </summary> private IEmail email = null; /// <summary> /// 是否本次运行抓取到开奖数据 /// </summary> private bool isGetData = false; #endregion } }