using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using Quartz;
using SCC.Common;
using SCC.Models;
using SCC.Interface;
using Newtonsoft.Json;
using HtmlAgilityPack;
namespace SCC.Crawler.DT
{
///
/// 数据爬取类
/// 浙江体彩6+1
///
[DisallowConcurrentExecution]
[PersistJobDataAfterExecution]
public class ZJTC6J1Job : IJob
{
///
/// 构造函数
///
public ZJTC6J1Job()
{
log = new LogHelper();
services = IOC.Resolve();
email = IOC.Resolve();
}
///
/// 作业执行入口
///
/// 作业执行上下文
public void Execute(IJobExecutionContext context)
{
Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
//预设节假日不开奖
if (Config.SkipDate.Contains(CommonHelper.SCCSysDateTime.ToString("yyyyMMdd"))) return;
LatestItem = context.JobDetail.JobDataMap["LatestItem"] as OpenCode7DTModel;
try
{
//服务启动时配置初始数据
if (LatestItem == null)
{
LatestItem = services.GetOpenCode7DTLastItem(currentLottery);
if (LatestItem == null)
{
//服务第一次启动,数据库一条数据都没有时
LatestItem = new OpenCode7DTModel()
{
Term = CommonHelper.GenerateQiHaoYYQQQ(0),
OpenTime = new DateTime(CommonHelper.SCCSysDateTime.Year, 1, 1)
};
}
}
//程序时间第二天,程序根据配置检查是否昨天有开奖
isGetData = false;
if (CommonHelper.CheckDTIsNeedGetData(Config))
{
CheckingOpenDayTheLotteryData();
}
if (!LatestItem.Term.ToString().StartsWith(CommonHelper.SCCSysDateTime.ToString("yy")))
{
//翻年时
LatestItem = new OpenCode7DTModel()
{
Term = CommonHelper.GenerateQiHaoYYQQQ(0),
OpenTime = new DateTime(CommonHelper.SCCSysDateTime.Year, 1, 1)
};
}
//当今日开奖并且当前时间是晚上8点过后开始抓取
if (CommonHelper.CheckTodayIsOpenDay(Config) && CommonHelper.SCCSysDateTime.Hour > 12)
{
DoTodayJobByMainUrl();
DoTodayJobByBackUrl();
}
if (isGetData)
TrendChartHelper.GenerateZJTC6J1TrendChart(log);
}
catch (Exception ex)
{
log.Error(typeof(ZJTC6J1Job), string.Format("【{0}】抓取时发生错误,错误信息【{1}】", Config.Area + Config.LotteryName, ex.Message));
}
//保存最新期号
context.JobDetail.JobDataMap["LatestItem"] = LatestItem;
}
///
/// 自检爬取未爬取到的开奖数据,并对昨日开奖但未爬取到开奖数据的彩种添加邮件提醒
///
private void CheckingOpenDayTheLotteryData()
{
//从数据库中获取昨天数据抓取失败列表
FailedQiHaoList = services.GetFailedYYQQQList(currentLottery);
if (FailedQiHaoList.Count > 0)
{
DoYesterdayFailedListByMainUrl();
DoYesterdayFailedListByBackUrl();
foreach (var fQiHao in FailedQiHaoList)
{
//将抓取失败数据推送至邮件列表,待邮件服务发送至配置管理员的邮箱中
if (email.AddEmail(Config.Area + Config.LotteryName, fQiHao, CommonHelper.GenerateDTOpenTime(Config)))
log.Error(typeof(ZJTC6J1Job), CommonHelper.GetJobLogError(Config, fQiHao));
}
}
if (LatestItem.OpenTime.ToString("yyyyMMdd") != CommonHelper.SCCSysDateTime.AddDays(-1).ToString("yyyyMMdd"))
{
//开奖时间(昨天)未抓取到最新开奖数据,则再抓取一次,若还不成功则写入待发送邮件列表
DoTodayJobByMainUrl();
DoTodayJobByBackUrl();
if (LatestItem.OpenTime.ToString("yyyyMMdd") != CommonHelper.SCCSysDateTime.AddDays(-1).ToString("yyyyMMdd"))
{
var openQiHao = (LatestItem.Term + 1).ToString();
if (email.AddEmail(Config.Area + Config.LotteryName, openQiHao, CommonHelper.GenerateDTOpenTime(Config)))
log.Error(typeof(ZJTC6J1Job), CommonHelper.GetJobLogError(Config, openQiHao));
}
}
}
///
/// 通过主站点爬取开奖数据
/// (浙江体彩网)
///
private void DoTodayJobByMainUrl()
{
if (!string.IsNullOrEmpty(Config.MainUrl))
{
var OpenList = GetOpenListFromMainUrl(Config.MainUrl);
if (OpenList.Count == 0) return;//无抓取数据
var newestQiHao = OpenList.First().Term.ToString();
var startQiNum = Convert.ToInt32(LatestItem.Term.ToString().Substring(2)) + 1;
var newestQiNum = Convert.ToInt32(newestQiHao.Substring(2));
if (startQiNum > newestQiNum) return;//无最新数据
//处理最新开奖数据
string getQiHao = string.Empty;
OpenCode7DTModel matchItem = null;
for (var i = startQiNum; i <= newestQiNum; i++)
{
getQiHao = LatestItem.Term.ToString().Substring(0, 2) + i.ToString().PadLeft(3, '0');
matchItem = OpenList.Where(R => R.Term.ToString() == getQiHao).FirstOrDefault();
if (matchItem != null && OptimizeMainModel(ref matchItem) && services.AddDTOpen7Code(currentLottery, matchItem))
{
//Do Success Log
log.Info(typeof(ZJTC6J1Job), CommonHelper.GetJobMainLogInfo(Config, getQiHao));
LatestItem = matchItem;
isGetData = true;
}
}
}
}
///
/// 通过主站爬取错误期号列表中每一个期号
/// (浙江体彩网)
///
private void DoYesterdayFailedListByMainUrl()
{
if (!string.IsNullOrEmpty(Config.MainUrl) && FailedQiHaoList.Count > 0)
{
var OpenList = GetOpenListFromMainUrl(Config.MainUrl);
if (OpenList.Count == 0) return;//无抓取数据
OpenCode7DTModel matchItem = null;
var SuccessList = new List();
foreach (string failedQiHao in FailedQiHaoList)
{
matchItem = OpenList.Where(R => R.Term.ToString() == failedQiHao).FirstOrDefault();
if (matchItem != null && OptimizeMainModel(ref matchItem) && services.AddDTOpen7Code(currentLottery, matchItem))
{
//Do Success Log
log.Info(typeof(ZJTC6J1Job), CommonHelper.GetJobMainLogInfo(Config, failedQiHao));
if (matchItem.Term > LatestItem.Term)
{
LatestItem = matchItem;
}
isGetData = true;
SuccessList.Add(failedQiHao);
}
}
foreach (var successQiHao in SuccessList)
{
FailedQiHaoList.Remove(successQiHao);
}
}
}
///
/// 获取主站开奖列表数据
///
/// 主站地址
///
private List GetOpenListFromMainUrl(string mainUrl)
{
List result = new List();
try
{
var requestUrl = mainUrl + "?flag=1&expect=200&page=1";
Uri resourceUrl = new Uri(requestUrl);
var htmlResource = NetHelper.GetUrlResponse(requestUrl, Encoding.GetEncoding("gb2312"));
if (string.IsNullOrWhiteSpace(htmlResource)) return result;
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlResource);
var table = doc.DocumentNode.SelectSingleNode("//table");
if (table == null) return result;
var trs = table.ChildNodes.Where(N => N.Name.ToLower() == "tr").ToList();
List tds, spans = null;
string detailUrl = string.Empty;
var lastYear = (CommonHelper.SCCSysDateTime.Year - 1).ToString().Substring(2);
for (var i = 1; i < trs.Count; i++)
{
tds = trs[i].ChildNodes.Where(N => N.Name.ToLower() == "td").ToList();
if (tds.Count < 5) continue;
if (tds[1].InnerText.StartsWith(lastYear)) break;
spans = tds[3].ChildNodes.Where(N => N.Name.ToLower() == "span").ToList();
if (spans.Count != 7) continue;
detailUrl = tds[4].ChildNodes.Where(N => N.Name.ToLower() == "a").FirstOrDefault().Attributes["href"].Value;
detailUrl = detailUrl.Substring(detailUrl.IndexOf("'") + 1);
detailUrl = detailUrl.Substring(0, detailUrl.IndexOf("'"));
result.Add(new OpenCode7DTModel()
{
Term = Convert.ToInt32(tds[1].InnerText),
OpenCode1 = Convert.ToInt32(spans[0].InnerText.Trim()),
OpenCode2 = Convert.ToInt32(spans[1].InnerText.Trim()),
OpenCode3 = Convert.ToInt32(spans[2].InnerText.Trim()),
OpenCode4 = Convert.ToInt32(spans[3].InnerText.Trim()),
OpenCode5 = Convert.ToInt32(spans[4].InnerText.Trim()),
OpenCode6 = Convert.ToInt32(spans[5].InnerText.Trim()),
OpenCode7 = Convert.ToInt32(spans[6].InnerText.Trim()),
OpenTime = Convert.ToDateTime(tds[0].InnerText),
DetailUrl = new Uri(resourceUrl, detailUrl).AbsoluteUri
});
}
result = result.OrderByDescending(S => S.Term).ToList();
}
catch (Exception ex)
{
log.Error(typeof(ZJTC6J1Job), string.Format("【{0}】通过主站点抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + Config.LotteryName, ex.Message));
}
return result;
}
///
/// 完善主站浙江体彩6+1开奖详情信息
///
///
private bool OptimizeMainModel(ref OpenCode7DTModel model)
{
try
{
var htmlResource = NetHelper.GetUrlResponse(model.DetailUrl, Encoding.GetEncoding("gb2312"));
if (!string.IsNullOrEmpty(htmlResource))
{
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlResource);
var table = doc.GetElementbyId("result");
if (table == null) return false;
var tbody = table.SelectSingleNode("//tbody");
if (tbody == null) return false;
var trs = tbody.ChildNodes.Where(N => N.Name.ToLower() == "tr").ToList();
int Level1Num = 0, Level2Num = 0, Level3Num = 0, Level4Num = 0, Level5Num = 0, Level6Num = 0;
decimal Level1Money = 0, Level2Money = 0, Level3Money = 0, Level4Money = 0, Level5Money = 0, Level6Money = 0, Sales = 0, Jackpot = 0;
for (var i = 1; i < trs.Count; i++)//第一行为表头
{
var tds = trs[i].ChildNodes.Where(N => N.Name.ToLower() == "td").ToList();
if (tds.Count < 3) continue;
if (tds[0].InnerText == "特等奖")
{
Level1Num = Convert.ToInt32(tds[1].InnerText.Replace("注", string.Empty).Replace(",", string.Empty).Trim());
Level1Money = Convert.ToDecimal(tds[2].InnerText.Replace("元", string.Empty).Trim());
}
else if (tds[0].InnerText == "一等奖")
{
Level2Num = Convert.ToInt32(tds[1].InnerText.Replace("注", string.Empty).Replace(",", string.Empty).Trim());
Level2Money = Convert.ToDecimal(tds[2].InnerText.Replace("元", string.Empty).Trim());
}
else if (tds[0].InnerText == "二等奖")
{
Level3Num = Convert.ToInt32(tds[1].InnerText.Replace("注", string.Empty).Replace(",", string.Empty).Trim());
Level3Money = Convert.ToDecimal(tds[2].InnerText.Replace("元", string.Empty).Trim());
}
else if (tds[0].InnerText == "三等奖")
{
Level4Num = Convert.ToInt32(tds[1].InnerText.Replace("注", string.Empty).Replace(",", string.Empty).Trim());
Level4Money = Convert.ToDecimal(tds[2].InnerText.Replace("元", string.Empty).Trim());
}
else if (tds[0].InnerText == "四等奖")
{
Level5Num = Convert.ToInt32(tds[1].InnerText.Replace("注", string.Empty).Replace(",", string.Empty).Trim());
Level5Money = Convert.ToDecimal(tds[2].InnerText.Replace("元", string.Empty).Trim());
}
else if (tds[0].InnerText == "五等奖")
{
Level6Num = Convert.ToInt32(tds[1].InnerText.Replace("注", string.Empty).Replace(",", string.Empty).Trim());
Level6Money = Convert.ToDecimal(tds[2].InnerText.Replace("元", string.Empty).Trim());
}
}
Regex reg1 = new Regex(@"本期销售金额:([\s\S]*?)元");
Match match1 = reg1.Match(htmlResource);
if (match1.Success)
{
var sales = match1.Result("$1");
Sales = Convert.ToDecimal(sales);
}
Regex reg2 = new Regex(@"([\d,.]*?)元奖金滚入下期奖池");
Match match2 = reg2.Match(htmlResource);
if (match2.Success)
{
var jackpot = match2.Result("$1");
Jackpot = Convert.ToDecimal(jackpot);
}
model.Detail = string.Format("{0},{1}^特等奖|{2}|{3},一等奖|{4}|{5},二等奖|{6}|{7},三等奖|{8}|{9},四等奖|{10}|{11},五等奖|{12}|{13}",
Sales, Jackpot, Level1Num, Level1Money, Level2Num, Level2Money, Level3Num, Level3Money,
Level4Num, Level4Money, Level5Num, Level5Money, Level6Num, Level6Money);
return true;
}
}
catch (Exception ex)
{
log.Error(typeof(ZJTC6J1Job), string.Format("【{0}】通过主站点优化开奖列表时发生错误,错误信息【{1}】", Config.Area + Config.LotteryName, ex.Message));
}
return false;
}
///
/// 通过备用站点抓取开奖数据
/// (彩票两元网)
///
private void DoTodayJobByBackUrl()
{
if (!string.IsNullOrEmpty(Config.BackUrl))
{
var OpenList = GetOpenListFromBackUrl(Config.BackUrl);
if (OpenList.Count == 0) return;//无抓取数据
var newestQiHao = OpenList.First().Term.ToString();
var startQiNum = Convert.ToInt32(LatestItem.Term.ToString().Substring(2)) + 1;
var newestQiNum = Convert.ToInt32(newestQiHao.Substring(2));
if (startQiNum > newestQiNum) return;//无最新数据
//处理最新开奖数据
var getQiHao = string.Empty;
OpenCode7DTModel matchItem = null;
for (var i = startQiNum; i <= newestQiNum; i++)
{
getQiHao = LatestItem.Term.ToString().Substring(0, 2) + i.ToString().PadLeft(3, '0');
matchItem = OpenList.Where(R => R.Term.ToString() == getQiHao).FirstOrDefault();
if (matchItem != null && OptimizeBackModel(ref matchItem) && services.AddDTOpen7Code(currentLottery, matchItem))
{
//Do Success Log
log.Info(typeof(ZJTC6J1Job), CommonHelper.GetJobBackLogInfo(Config, getQiHao));
LatestItem = matchItem;
isGetData = true;
}
}
}
}
///
/// 通过备用地址抓取错误期号列表中每一个期号
/// (彩票两元网)
///
private void DoYesterdayFailedListByBackUrl()
{
if (!string.IsNullOrEmpty(Config.BackUrl) && FailedQiHaoList.Count > 0)
{
var OpenList = GetOpenListFromBackUrl(Config.BackUrl);
if (OpenList.Count == 0) return;//无抓取数据
OpenCode7DTModel matchItem = null;
var SuccessList = new List();
foreach (string failedQiHao in FailedQiHaoList)
{
matchItem = OpenList.Where(R => R.Term.ToString() == failedQiHao).FirstOrDefault();
if (matchItem != null && OptimizeBackModel(ref matchItem) && services.AddDTOpen7Code(currentLottery, matchItem))
{
//Do Success Log
log.Info(typeof(ZJTC6J1Job), CommonHelper.GetJobBackLogInfo(Config, failedQiHao));
if (matchItem.Term > LatestItem.Term)
{
LatestItem = matchItem;
}
SuccessList.Add(failedQiHao);
isGetData = true;
}
}
foreach (var successQiHao in SuccessList)
{
FailedQiHaoList.Remove(successQiHao);
}
}
}
///
/// 获取备用站点开奖列表数据
///
/// 备用站点
///
private List GetOpenListFromBackUrl(string backUrl)
{
List result = new List();
try
{
var htmlResource = NetHelper.GetUrlResponse(backUrl);
if (string.IsNullOrWhiteSpace(htmlResource)) return result;
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlResource);
var table = doc.DocumentNode.SelectSingleNode("//table");
if (table == null) return result;
var trs = table.ChildNodes.Where(node => node.Name.ToLower() == "tr").ToList();
OpenCode7DTModel model = null;
var lastYear = (CommonHelper.SCCSysDateTime.Year - 1).ToString().Substring(2);
for (var i = 0; i < trs.Count; i++)//第一行
{
var trstyle = trs[i].Attributes["style"];
if (trstyle != null && trstyle.Value == "display:none")
{
continue;
}
var tds = trs[i].ChildNodes.Where(node => node.Name.ToLower() == "td").ToList();
if (tds.Count < 11) continue;
if (tds[0].InnerText.Trim().StartsWith(lastYear)) break;
model = new OpenCode7DTModel();
model.OpenTime = Convert.ToDateTime(tds[9].InnerText.Trim());
model.Term = Convert.ToInt64(tds[0].InnerText.Trim());
var opencodeNode = tds[1].ChildNodes.Where(n => n.Name.ToLower() == "i").ToList();
if (opencodeNode.Count < 7) continue;
model.OpenCode1 = Convert.ToInt32(opencodeNode[0].InnerText.Trim());
model.OpenCode2 = Convert.ToInt32(opencodeNode[1].InnerText.Trim());
model.OpenCode3 = Convert.ToInt32(opencodeNode[2].InnerText.Trim());
model.OpenCode4 = Convert.ToInt32(opencodeNode[3].InnerText.Trim());
model.OpenCode5 = Convert.ToInt32(opencodeNode[4].InnerText.Trim());
model.OpenCode6 = Convert.ToInt32(opencodeNode[5].InnerText.Trim());
model.OpenCode7 = Convert.ToInt32(opencodeNode[6].InnerText.Trim());
model.DetailUrl = backUrl + model.Term + "/";
result.Add(model);
}
result = result.OrderByDescending(S => S.Term).ToList();
}
catch (Exception ex)
{
log.Error(typeof(ZJTC6J1Job), string.Format("【{0}】通过备用站点抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + Config.LotteryName, ex.Message));
}
return result;
}
///
/// 完善备用站点浙江体彩6+1开奖实体信息
///
///
private bool OptimizeBackModel(ref OpenCode7DTModel model)
{
try
{
var htmlResource = NetHelper.GetUrlResponse(model.DetailUrl);
if (!string.IsNullOrEmpty(htmlResource))
{
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlResource);
var tables = doc.DocumentNode.SelectNodes("//table");
if (tables.Count < 1) return false;
var trs = tables[0].ChildNodes.Where(N => N.Name.ToLower() == "tr").ToList();
int Level1Num = 0, Level2Num = 0, Level3Num = 0, Level4Num = 0, Level5Num = 0, Level6Num = 0;
decimal Level1Money = 0, Level2Money = 0, Level3Money = 0, Level4Money = 0, Level5Money = 0, Level6Money = 0, Sales = 0, Jackpot = 0;
for (var i = 1; i < trs.Count; i++)//第一行为表头
{
var tds = trs[i].ChildNodes.Where(N => N.Name.ToLower() == "td").ToList();
if (tds.Count < 4) continue;
if (tds[1].InnerText == "特等奖")
{
Level1Num = Convert.ToInt32(tds[2].InnerText.Replace("注", string.Empty).Replace(",", string.Empty).Trim());
Level1Money = Convert.ToDecimal(tds[3].InnerText.Replace("元", string.Empty).Trim() == "--" ? "0" : tds[3].InnerText.Replace("元", string.Empty).Trim());
}
else if (tds[1].InnerText == "一等奖")
{
Level2Num = Convert.ToInt32(tds[2].InnerText.Replace("注", string.Empty).Replace(",", string.Empty).Trim());
Level2Money = Convert.ToDecimal(tds[3].InnerText.Replace("元", string.Empty).Trim() == "--" ? "0" : tds[3].InnerText.Replace("元", string.Empty).Trim());
}
else if (tds[1].InnerText == "二等奖")
{
Level3Num = Convert.ToInt32(tds[2].InnerText.Replace("注", string.Empty).Replace(",", string.Empty).Trim());
Level3Money = Convert.ToDecimal(tds[3].InnerText.Replace("元", string.Empty).Trim() == "--" ? "0" : tds[3].InnerText.Replace("元", string.Empty).Trim());
}
else if (tds[1].InnerText == "三等奖")
{
Level4Num = Convert.ToInt32(tds[2].InnerText.Replace("注", string.Empty).Replace(",", string.Empty).Trim());
Level4Money = Convert.ToDecimal(tds[3].InnerText.Replace("元", string.Empty).Trim() == "--" ? "0" : tds[3].InnerText.Replace("元", string.Empty).Trim());
}
else if (tds[1].InnerText == "四等奖")
{
Level5Num = Convert.ToInt32(tds[2].InnerText.Replace("注", string.Empty).Replace(",", string.Empty).Trim());
Level5Money = Convert.ToDecimal(tds[3].InnerText.Replace("元", string.Empty).Trim() == "--" ? "0" : tds[3].InnerText.Replace("元", string.Empty).Trim());
}
else if (tds[1].InnerText == "五等奖")
{
Level6Num = Convert.ToInt32(tds[2].InnerText.Replace("注", string.Empty).Replace(",", string.Empty).Trim());
Level6Money = Convert.ToDecimal(tds[3].InnerText.Replace("元", string.Empty).Trim() == "--" ? "0" : tds[3].InnerText.Replace("元", string.Empty).Trim());
}
}
Regex reg1 = new Regex(@"投注总额:([\d,.]*?) 元");
Match match1 = reg1.Match(htmlResource);
if (match1.Success)
{
var sales = match1.Result("$1");
Sales = Convert.ToDecimal(sales);
}
Regex reg2 = new Regex(@"奖池资金累计金额:([\d,.]*?) 元");
Match match2 = reg2.Match(htmlResource);
if (match2.Success)
{
var jackpot = match2.Result("$1");
Jackpot = Convert.ToDecimal(jackpot);
}
model.Detail = string.Format("{0},{1}^特等奖|{2}|{3},一等奖|{4}|{5},二等奖|{6}|{7},三等奖|{8}|{9},四等奖|{10}|{11},五等奖|{12}|{13}",
Sales, Jackpot, Level1Num, Level1Money, Level2Num, Level2Money, Level3Num, Level3Money,
Level4Num, Level4Money, Level5Num, Level5Money, Level6Num, Level6Money);
return true;
}
}
catch (Exception ex)
{
log.Error(typeof(ZJTC6J1Job), string.Format("【{0}】通过主站点优化开奖列表时发生错误,错误信息【{1}】", Config.Area + Config.LotteryName, ex.Message));
}
return false;
}
#region Attribute
///
/// 配置信息
///
private SCCConfig Config = null;
///
/// 当天抓取的最新一期开奖记录
///
private OpenCode7DTModel LatestItem = null;
///
/// 当天抓取失败列表
///
private List FailedQiHaoList = null;
///
/// 日志对象
///
private LogHelper log = null;
///
/// 数据服务
///
private IDTOpenCode services = null;
///
/// 当前彩种
///
private SCCLottery currentLottery
{
get
{
return SCCLottery.ZJTC6J1;
}
}
///
/// 邮件接口
///
private IEmail email = null;
///
/// 是否本次运行抓取到开奖数据
///
private bool isGetData = false;
#endregion
}
}