using System; using System.Collections.Generic; using System.Linq; using System.Text; using HtmlAgilityPack; using Quartz; using SCC.Common; using SCC.Interface; using SCC.Models; namespace SCC.Crawler.LotteryNews { public class QTNewsJob : IJob { /// /// 构造函数 /// public QTNewsJob() { log = new LogHelper(); services = IOC.Resolve(); email = IOC.Resolve(); } /// /// 执行入口 /// /// public void Execute(IJobExecutionContext context) { Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap); DoMainUrl(); } /// /// 执行主站技巧 /// private void DoMainUrl() { List urls = GetMainUrl(Config); LotteryNewsModel lotterySkill = null; foreach (string url in urls) { List res = GetOpenListFromMainUrl(url); foreach (var LotteryNewsModel in res) { if (LotteryNewsModel.Content != null) { if (services.LotteryNewsModel(currentLottery, LotteryNewsModel)) { //Do Success Log log.Info(GetType(), CommonHelper.GetJobMainLogInfo(Config, LotteryNewsModel.Title)); isGetData = true; } } } } } /// /// 组装主站爬取地址 /// /// /// private List GetMainUrl(SCCConfig config) { List urlList = new List(); string url = config.MainUrl; int pages = config.MainUrlPages > 0 ? config.MainUrlPages : 1; for (int i = 1; i <= pages; i++) { string res; if (i == 1) { res = "http://www.zhcw.com/xinwen/caizhongxinwen-qt/"; } else { res = string.Format(url, i); } if (!urlList.Contains(res)) { urlList.Add(res); } } return urlList; } /// /// 爬取主站技巧列表 /// /// /// private List GetOpenListFromMainUrl(string mainUrl) { var result = new List(); try { var url = new Uri(mainUrl); var htmlResource = NetHelper.GetUrlResponse(mainUrl, Encoding.GetEncoding("utf-8")); if (htmlResource == null) return result; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlResource); //获取li下面所有a标签 HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes("//*[@class='Nleftbox']/ul/li/span/a"); if (nodeList == null) return result; List urls = new List(); //遍历a标签 foreach (HtmlNode node in nodeList) { HtmlAttribute attr = node.Attributes.SingleOrDefault(a => a.Name.Equals("href")); if (attr != null) { string href = Host + attr.Value; //去重 if (!urls.Contains(href)) { urls.Add(href); } } } foreach (var url1 in urls) { var LotterySkill = GetSkillModel(url1); result.Add(LotterySkill); } } catch (Exception ex) { log.Error(GetType(), string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message)); } return result; } /// /// 根据主站url获取技巧详情 /// /// /// private LotteryNewsModel GetSkillModel(string url) { LotteryNewsModel lotterySkill = new LotteryNewsModel(); try { var htmlResource = NetHelper.GetUrlResponse(url, Encoding.GetEncoding("utf-8")); if (htmlResource == null) return lotterySkill; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlResource); //获取li下面所有a标签 var div = doc.DocumentNode.SelectSingleNode("//*[@class='news_content']"); var Title = div.ChildNodes.Where(node => node.Name == "h2").ToList(); var div1 = div.ChildNodes.Where(node => node.Name == "div").ToList(); string txt = div1[2].InnerHtml.Trim(); var Content = txt.Replace(" /// 主机地址 /// public string Host = "http://www.zhcw.com"; /// /// 配置信息 /// private SCCConfig Config; /// /// 当天抓取的最新一期开奖记录 /// private LotteryNewsModel LatestItem = null; /// /// 当天抓取失败列表 /// private List FailedQiHaoList = null; /// /// 日志对象 /// private readonly LogHelper log; /// /// 数据服务 /// private readonly IDTOpenCode services; /// /// 当前彩种 /// private SCCLottery currentLottery => SCCLottery.LotteryNews; /// /// 福彩3D技巧 /// private LotteryNewsType lotterySkillType = LotteryNewsType.QTNews; /// /// 邮件接口 /// private IEmail email; /// /// 是否本次运行抓取到开奖数据 /// private bool isGetData = false; #endregion } }