using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HtmlAgilityPack;
using Quartz;
using SCC.Common;
using SCC.Interface;
using SCC.Models;
namespace SCC.Crawler.LotterySkill
{
public class SHSSCSkillJob : IJob
{
///
/// 构造函数
///
public SHSSCSkillJob()
{
log = new LogHelper();
services = IOC.Resolve();
email = IOC.Resolve();
}
///
/// 执行入口
///
///
public void Execute(IJobExecutionContext context)
{
Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
DoMainUrl();
}
///
/// 执行主站技巧
///
private void DoMainUrl()
{
List urls = GetMainUrl(Config);
LotterySkillModel lotterySkill = null;
foreach (string url in urls)
{
List res = GetOpenListFromMainUrl(url);
foreach (var lotterySkillModel in res)
{
if (services.LotterySkillModel(currentLottery, lotterySkillModel))
{
//Do Success Log
log.Info(GetType(), CommonHelper.GetJobMainLogInfo(Config, lotterySkillModel.Title));
isGetData = true;
}
}
}
}
///
/// 组装主站爬取地址
///
///
///
private List GetMainUrl(SCCConfig config)
{
List urlList = new List();
string url = config.MainUrl;
int pages = config.MainUrlPages > 0 ? config.MainUrlPages : 1;
for (int i = 1; i <= pages; i++)
{
string res = string.Format(url, i);
if (!urlList.Contains(res))
{
urlList.Add(res);
}
}
return urlList;
}
///
/// 爬取主站技巧列表
///
///
///
private List GetOpenListFromMainUrl(string mainUrl)
{
var result = new List();
try
{
var url = new Uri(mainUrl);
var htmlResource = NetHelper.GetUrlResponse(mainUrl, Encoding.GetEncoding("utf-8"));
if (htmlResource == null) return result;
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlResource);
//获取li下面所有a标签
HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes("//*[@class='art-list']/ul/li/a");
if (nodeList == null) return result;
List urls = new List();
//遍历a标签
foreach (HtmlNode node in nodeList)
{
HtmlAttribute attr = node.Attributes.SingleOrDefault(a => a.Name.Equals("href"));
if (attr != null)
{
string href = Host + attr.Value;
//去重
if (!urls.Contains(href))
{
urls.Add(href);
}
}
}
foreach (var url1 in urls)
{
var LotterySkill = GetSkillModel(url1);
result.Add(LotterySkill);
}
}
catch (Exception ex)
{
log.Error(GetType(),
string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
}
return result;
}
///
/// 根据主站url获取技巧详情
///
///
///
private LotterySkillModel GetSkillModel(string url)
{
LotterySkillModel lotterySkill = new LotterySkillModel();
try
{
var htmlResource = NetHelper.GetUrlResponse(url, Encoding.GetEncoding("utf-8"));
if (htmlResource == null) return lotterySkill;
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlResource);
//获取li下面所有a标签
var div = doc.DocumentNode.SelectSingleNode("//*[@class='artile']");
var Title = div.ChildNodes.Where(node => node.Name == "h1").ToList();
var div1 = div.ChildNodes.Where(node => node.Name == "div").ToList();
lotterySkill.Title = Title[0].InnerText.Trim();
lotterySkill.Author = "cn55128";
lotterySkill.Content = div1[1].InnerHtml.Trim();
lotterySkill.IsDelete = false;
lotterySkill.SourceUrl = url.ToString();
lotterySkill.TypeId = lotterySkillType;
lotterySkill.TypeName = lotterySkillType.GetEnumDescription();
}
catch (Exception ex)
{
log.Error(GetType(),
string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message));
}
return lotterySkill;
}
#region Attribute
///
/// 主机地址
///
public string Host = "http://www.55125.cn/";
///
/// 配置信息
///
private SCCConfig Config;
///
/// 当天抓取的最新一期开奖记录
///
private LotterySkillModel LatestItem = null;
///
/// 当天抓取失败列表
///
private List FailedQiHaoList = null;
///
/// 日志对象
///
private readonly LogHelper log;
///
/// 数据服务
///
private readonly IDTOpenCode services;
///
/// 当前彩种
///
private SCCLottery currentLottery => SCCLottery.LotterySkill;
///
/// 福彩3D技巧
///
private LotterySkillType lotterySkillType = LotterySkillType.SHSSC;
///
/// 邮件接口
///
private IEmail email;
///
/// 是否本次运行抓取到开奖数据
///
private bool isGetData = false;
#endregion
}
}