using System;
using System.Collections.Generic;
using FCS.Common;
using FCS.Crawler.Tools;
using FCS.Interface;
using FCS.Models;
using HtmlAgilityPack;
using Newtonsoft.Json;
using Quartz;
using System.Data;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace FCS.Crawler.ZCLotteryMatchs
{
///
/// 足球赛事
///
public class FootballMatchJob : CommonJob, IJob
{
private static List f_Areas = new List();//地区列表
public FootballMatchJob()
{
log = new LogHelper();
services = IOC.Resolve();
}
public void Execute(IJobExecutionContext context)
{
Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
GetAll();
}
///
/// 执行主站技巧
///
public void GetAll()
{
List urls = new List { "http://saishi.zgzcw.com/soccer/" };
foreach (string url in urls)
{
//爬取区域国家下的赛事
GetCOntryMatch(url);
//爬取区域的洲赛事
GetAreaMatch(url);
}
}
///
/// 获取区域下的州赛赛事,欧冠一类的
///
///
private void GetAreaMatch(string mainUrl)
{
var url = new Uri(mainUrl);
var htmlResource = NetHelper.GetUrlResponse(mainUrl, Encoding.GetEncoding("utf-8"));
if (htmlResource == null) return;
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlResource);
//获取枚举
var enumData = services.GetDataItem(DataItemDetailEnum.FootBallMatchType);
//区域集合
HtmlNodeCollection areaHtml = doc.DocumentNode.SelectNodes("//*[@class='mbcon mbr fl']");
if (areaHtml == null) return;
foreach (var areaItem in areaHtml)
{
if (areaItem.InnerText == null || areaItem.InnerText == "" || areaItem.InnerText.Contains("世界排名"))
{
continue;
}
HtmlDocument doc1 = new HtmlDocument();
doc1.LoadHtml(areaItem.InnerHtml);
//获取区域
var area = doc1.DocumentNode.SelectNodes("//span")[0].InnerText.Trim();
HtmlNodeCollection matchHtml = doc1.DocumentNode.SelectNodes("//div/a");
if (matchHtml == null) continue;
foreach (var matchitem in matchHtml)
{
HtmlDocument imgDoc = new HtmlDocument();
imgDoc.LoadHtml(matchitem.InnerHtml);
var imgHtml = imgDoc.DocumentNode.SelectNodes("//div/img").FirstOrDefault();
HtmlAttribute img = imgHtml.Attributes.SingleOrDefault(a => a.Name.Equals("src"));
F_Events events = new F_Events();
events.Id = Guid.NewGuid().ToString();
events.Name = matchitem.InnerText.Trim();
events.AreaId = f_Areas.Where(o => o.Name == area).Count() != 0 ? f_Areas.Where(o => o.Name == area).FirstOrDefault().Id : "";
events.LogoImage = img.Value;
switch (area)
{
case "欧洲赛事":
events.EventType = enumData.Where(o => o.Name == "欧洲赛事").Select(o => o.Id).ToList()[0].ToString();
break;
case "亚洲赛事":
events.EventType = enumData.Where(o => o.Name == "亚洲赛事").Select(o => o.Id).ToList()[0].ToString();
break;
case "美洲赛事":
events.EventType = enumData.Where(o => o.Name == "美洲赛事").Select(o => o.Id).ToList()[0].ToString();
break;
case "非洲赛事":
events.EventType = enumData.Where(o => o.Name == "非洲赛事").Select(o => o.Id).ToList()[0].ToString();
break;
case "澳洲赛事":
events.EventType = enumData.Where(o => o.Name == "大洋洲赛事").Select(o => o.Id).ToList()[0].ToString();
break;
}
events.Remark = matchitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
services.AddEvents(FCSLottery.F_Events, events);
}
}
}
///
/// 获取区域赛事
///
///
///
private void GetCOntryMatch(string mainUrl)
{
try
{
var url = new Uri(mainUrl);
var htmlResource = NetHelper.GetUrlResponse(mainUrl, Encoding.GetEncoding("utf-8"));
if (htmlResource == null) return;
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(htmlResource);
//获取枚举
var enumData = services.GetDataItem(DataItemDetailEnum.FootBallMatchType);
//区域集合
HtmlNodeCollection areaHtml = doc.DocumentNode.SelectNodes("//*[@class='mbcon mbl fl']");
if (areaHtml == null) return;
foreach (var areaItem in areaHtml)
{
if (areaItem.InnerText == null || areaItem.InnerText == "")
{
continue;
}
HtmlDocument doc1 = new HtmlDocument();
doc1.LoadHtml(areaItem.InnerHtml);
if (doc1.DocumentNode.SelectNodes("//span") == null || doc1.DocumentNode.SelectNodes("//span").Count == 0)
{
continue;
}
//获取区域
var area = doc1.DocumentNode.SelectNodes("//span")[0].InnerText.Trim();
// 插入区域到区域表中
F_Area areaData = new F_Area();
areaData.Id = Guid.NewGuid().ToString();
areaData.Name = area;
services.AddArea(FCSLottery.F_Area, areaData);
f_Areas.Add(areaData);
//国家集合
HtmlNodeCollection CountryHtml = doc1.DocumentNode.SelectNodes("//*[@class='ls']/div");
if (CountryHtml == null)
{
continue;
}
foreach (var countryItem in CountryHtml)
{
HtmlDocument countryDoc = new HtmlDocument();
countryDoc.LoadHtml(countryItem.InnerHtml);
if (countryItem.InnerHtml == null || countryItem.InnerHtml == "")
{
continue;
}
//获取国家
if (countryDoc.DocumentNode.SelectNodes("//div") == null || countryDoc.DocumentNode.SelectNodes("//div").Count == 0)
{
continue;
}
var country = countryDoc.DocumentNode.SelectNodes("//div")[0].InnerText.Trim();
//TODO 把国家信息插入数据库
F_Area countryData = new F_Area();
countryData.Id = Guid.NewGuid().ToString();
countryData.Name = country;
countryData.ParentId = areaData.Id;
if (areaData.Name == "杯赛赛事")
{
F_Events events = new F_Events();
events.Id = Guid.NewGuid().ToString();
events.Name = country;
events.AreaId = areaData.Id;
events.EventType = enumData.Where(o => o.Name == "国际赛事").Select(o => o.Id).ToList()[0].ToString();
HtmlNodeCollection href = countryDoc.DocumentNode.SelectNodes("//a");
events.Remark = href == null ? "" : href[0].Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
HtmlNodeCollection img = countryDoc.DocumentNode.SelectNodes("//a/img");
events.LogoImage = img == null ? "" : img[0].Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
services.AddEvents(FCSLottery.F_Events, events);
}
else
{
services.AddArea(FCSLottery.F_Area, countryData);
//赛事集合
HtmlNodeCollection matchHtml = countryDoc.DocumentNode.SelectNodes("//*[@class='kuang']/a");
if (matchHtml == null)
{
continue;
}
foreach (var matchItem in matchHtml)
{
if (matchItem.InnerHtml == null || matchItem.InnerHtml == "")
{
continue;
}
HtmlDocument imgDoc = new HtmlDocument();
imgDoc.LoadHtml(matchItem.InnerHtml);
var imgHtml = imgDoc.DocumentNode.SelectNodes("//div/img").FirstOrDefault();
HtmlAttribute img = imgHtml.Attributes.SingleOrDefault(a => a.Name.Equals("src"));
HtmlAttribute attr = matchItem.Attributes.SingleOrDefault(a => a.Name.Equals("href"));
//获取赛事名称以及url
F_Events events = new F_Events();
events.Id = Guid.NewGuid().ToString();
events.Name = matchItem.InnerText.Trim();
events.AreaId = countryData.Id;
events.Remark = attr.Value;
events.LogoImage = img.Value;
switch (areaData.Name)
{
case "欧洲赛事":
events.EventType = enumData.Where(o => o.Name == "欧洲赛事").Select(o => o.Id).ToList()[0].ToString();
break;
case "亚洲赛事":
events.EventType = enumData.Where(o => o.Name == "亚洲赛事").Select(o => o.Id).ToList()[0].ToString();
break;
case "美洲赛事":
events.EventType = enumData.Where(o => o.Name == "美洲赛事").Select(o => o.Id).ToList()[0].ToString();
break;
case "非洲赛事":
events.EventType = enumData.Where(o => o.Name == "非洲赛事").Select(o => o.Id).ToList()[0].ToString();
break;
case "澳洲赛事":
events.EventType = enumData.Where(o => o.Name == "大洋洲赛事").Select(o => o.Id).ToList()[0].ToString();
break;
//case "杯赛赛事":
// events.EventType = enumData.Where(o => o.Name == "国际赛事").Select(o => o.Id).ToList()[0].ToString();
// break;
}
services.AddEvents(FCSLottery.F_Events, events);
}
}
}
}
}
catch (Exception ex)
{
log.Error(GetType(),
string.Format("【{0}】通过主抓取篮球分析时发生错误,错误信息【{1}】", Config.Area + currentNews, ex.Message));
}
return;
}
///
/// 组装主站爬取地址
///
///
///
private List GetMainUrl(FCSConfig config)
{
List urlList = new List();
string url = config.MainUrl;
int pages = config.MainUrlPages > 0 ? config.MainUrlPages : 1;
for (int i = 1; i <= pages; i++)
{
string res;
if (i == 1)
{
res = "http://saishi.zgzcw.com/soccer";
}
else
{
res = string.Format(url, i);
}
if (!urlList.Contains(res))
{
urlList.Add(res);
}
}
return urlList;
}
public static string NoHTML(string html) //去除HTML标记
{
Regex regex1 =
new Regex(@"标记
html = regex2.Replace(html, ""); //过滤href=javascript: () 属性
html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件
html = regex4.Replace(html, ""); //过滤iframe
html = regex5.Replace(html, ""); //过滤frameset
html = regex6.Replace(html, ""); //过滤frameset
html = regex9.Replace(html, "");
html = Regex.Replace(html, "[\f\n\r\t\v]", ""); //过滤回车换行制表符
int index = html.IndexOf("本文来源");//删除文本来源及责任编辑
if (index != -1)
{
html = html.Substring(0, index - 1);
}
html.Replace("网易体育", "彩吧足球");
return html;
}
#region 初始化信息
///
/// 枚举类型
///
private NewsTypeEnum currentNews => NewsTypeEnum.篮球分析;
#endregion 初始化信息
#region SQL语句
///
///查询类别对应的id
///
private static string GetLotterySqlByTableName = @"SELECT TOP 1 [ID],[ItemId],[ItemName] FROM [dbo].[{0}] where [ItemName]='{1}' ";//WHERE [IsChecked] = 1 AND [IsPassed] = 1
#endregion
}
}