using System; using System.Collections.Generic; using System.Linq; using System.Text; using HtmlAgilityPack; using Quartz; using SCC.Common; using SCC.Interface; using SCC.Models; namespace SCC.Crawler.LotterySkill { public class SHSSCSkillJob : IJob { /// /// 构造函数 /// public SHSSCSkillJob() { log = new LogHelper(); services = IOC.Resolve(); email = IOC.Resolve(); } /// /// 执行入口 /// /// public void Execute(IJobExecutionContext context) { Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap); DoMainUrl(); } /// /// 执行主站技巧 /// private void DoMainUrl() { List urls = GetMainUrl(Config); LotterySkillModel lotterySkill = null; foreach (string url in urls) { List res = GetOpenListFromMainUrl(url); foreach (var lotterySkillModel in res) { if (services.LotterySkillModel(currentLottery, lotterySkillModel)) { //Do Success Log log.Info(GetType(), CommonHelper.GetJobMainLogInfo(Config, lotterySkillModel.Title)); isGetData = true; } } } } /// /// 组装主站爬取地址 /// /// /// private List GetMainUrl(SCCConfig config) { List urlList = new List(); string url = config.MainUrl; int pages = config.MainUrlPages > 0 ? config.MainUrlPages : 1; for (int i = 1; i <= pages; i++) { string res = string.Format(url, i); if (!urlList.Contains(res)) { urlList.Add(res); } } return urlList; } /// /// 爬取主站技巧列表 /// /// /// private List GetOpenListFromMainUrl(string mainUrl) { var result = new List(); try { var url = new Uri(mainUrl); var htmlResource = NetHelper.GetUrlResponse(mainUrl, Encoding.GetEncoding("utf-8")); if (htmlResource == null) return result; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlResource); //获取li下面所有a标签 HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes("//*[@class='art-list']/ul/li/a"); if (nodeList == null) return result; List urls = new List(); //遍历a标签 foreach (HtmlNode node in nodeList) { HtmlAttribute attr = node.Attributes.SingleOrDefault(a => a.Name.Equals("href")); if (attr != null) { string href = Host + attr.Value; //去重 if (!urls.Contains(href)) { urls.Add(href); } } } foreach (var url1 in urls) { var LotterySkill = GetSkillModel(url1); result.Add(LotterySkill); } } catch (Exception ex) { log.Error(GetType(), string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message)); } return result; } /// /// 根据主站url获取技巧详情 /// /// /// private LotterySkillModel GetSkillModel(string url) { LotterySkillModel lotterySkill = new LotterySkillModel(); try { var htmlResource = NetHelper.GetUrlResponse(url, Encoding.GetEncoding("utf-8")); if (htmlResource == null) return lotterySkill; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlResource); //获取li下面所有a标签 var div = doc.DocumentNode.SelectSingleNode("//*[@class='artile']"); var Title = div.ChildNodes.Where(node => node.Name == "h1").ToList(); var div1 = div.ChildNodes.Where(node => node.Name == "div").ToList(); lotterySkill.Title = Title[0].InnerText.Trim(); lotterySkill.Author = "cn55128"; lotterySkill.Content = div1[1].InnerHtml.Trim(); lotterySkill.IsDelete = false; lotterySkill.SourceUrl = url.ToString(); lotterySkill.TypeId = lotterySkillType; lotterySkill.TypeName = lotterySkillType.GetEnumDescription(); } catch (Exception ex) { log.Error(GetType(), string.Format("【{0}】通过主抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message)); } return lotterySkill; } #region Attribute /// /// 主机地址 /// public string Host = "http://www.55125.cn/"; /// /// 配置信息 /// private SCCConfig Config; /// /// 当天抓取的最新一期开奖记录 /// private LotterySkillModel LatestItem = null; /// /// 当天抓取失败列表 /// private List FailedQiHaoList = null; /// /// 日志对象 /// private readonly LogHelper log; /// /// 数据服务 /// private readonly IDTOpenCode services; /// /// 当前彩种 /// private SCCLottery currentLottery => SCCLottery.LotterySkill; /// /// 福彩3D技巧 /// private LotterySkillType lotterySkillType = LotterySkillType.SHSSC; /// /// 邮件接口 /// private IEmail email; /// /// 是否本次运行抓取到开奖数据 /// private bool isGetData = false; #endregion } }