123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336 |
- using FCS.Common;
- using FCS.Interface;
- using FCS.Models;
- using HtmlAgilityPack;
- using Quartz;
- using System;
- using System.Collections.Generic;
- using System.Data;
- using System.Diagnostics;
- using System.Linq;
- using System.Threading;
- using System.Threading.Tasks;
- namespace FCS.Crawler.ZCLotteryTeam
- {
- /// <summary>
- /// 抓取球队信息
- /// </summary>
- public class FootBallTeamsJob : CommonJob, IJob
- {
- private static List<F_Team> teamList = new List<F_Team>();
- private static List<F_Team> qhyyteamList = new List<F_Team>();
- private static List<F_Team> allTeamList = new List<F_Team>();
- F_Team g;
- public FootBallTeamsJob()
- {
- log = new LogHelper();
- services = IOC.Resolve<IDTOpenCode>();
- g = new F_Team();
- }
- public void Execute(IJobExecutionContext context)
- {
- Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
- GetAll();
- }
- public void GetAll()
- {
- allTeamList = services.GetTeamList();
- var ds = services.Query<F_Events>("and Remark is not null").ToList();
- if (ds != null && ds.Count > 0)
- {
- //int max = ds.Count;
- //int num = 0;
- ds.ForEach(p =>
- {
- var url = p.Remark.ToString();
- var eventId = p.Id.ToString();
- var eventName = p.Name.ToString();
- Task.Run(() =>
- {
- var mainUrl = "http://saishi.zgzcw.com" + url;
- HtmlDocument doc = CommonHelper.GetHtml(mainUrl, new Dictionary<string, string>(), "足彩", "", 10000, 100);
- var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='select_options']/a");
- //max += TongJiListData.Count();
- //循环赛季
- if (TongJiListData != null && !doc.DocumentNode.InnerText.Contains("球会友谊"))
- {
- Task.Run(() =>
- {
- foreach (var item in TongJiListData)
- {
- var sjurl = item.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
- sjurl = sjurl.Replace("-", "-");
- GetEventHtml(sjurl, eventId, eventName, doc.DocumentNode.InnerText.Contains("球会友谊"));
- }
- });
- }
- });
- //lock (g)
- //{
- // num++;
- // Monitor.Pulse(g); //完成,通知等待队列,告知已完,执行下一个。
- //}
- });
- //lock (g)
- //{
- // while (num < max)
- // {
- // Monitor.Wait(g);//等待
- // }
- //}
- //Trace.WriteLine("获取球队 完结");
- while (true)
- {
- if (CommonHelper.ThreadsFinsh())
- break;
- }
- try
- {
- if (teamList != null)
- {
- foreach (var item in teamList)
- {
- if (allTeamList.Where(o => o.Name == item.Name).Count() == 0)
- {
- services.AddTeam(FCSLottery.F_Team, item);
- }
- }
- }
- if (qhyyteamList != null)
- {
- foreach (var item in qhyyteamList)
- {
- if (allTeamList.Where(o => o.Name == item.Name).Count() == 0)
- {
- services.AddTeam(FCSLottery.F_Team, item);
- }
- }
- // TODO球会友谊球队解析
- }
- }
- catch (Exception ex)
- {
- throw;
- }
- }
- }
- /// <summary>
- /// 抓取每个赛事下面的球队信息
- /// </summary>
- /// <param name="url"></param>
- /// <param name="eventId"></param>
- /// <param name="eventName"></param>
- public bool GetEventHtml(string url, string eventId, string eventName, bool IsQHYY)
- {
- if (url.Contains("wwaattssuunn"))
- {
- return false;
- }
- var mainUrl = url;
- HtmlDocument doc = CommonHelper.GetHtml(mainUrl, new Dictionary<string, string>(), "足彩", "", 10000, 100);
- //获取左边的列表
- var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='tongji_list']");
- if (TongJiListData != null)
- {
- foreach (var item in TongJiListData)
- {
- if (item.InnerText.Contains("球队列表") && item.InnerHtml != null)
- {
- //获取当前赛事的球队列表
- HtmlDocument teamHtml = new HtmlDocument();
- teamHtml.LoadHtml(item.InnerHtml);
- var teams = teamHtml.DocumentNode.SelectNodes("//ul/a");
- if (teams != null)
- {
- //球会友谊这个赛事里面的球队太多,单独提取,先读取出数据,后面插入到数据库再进行解析球队
- if (IsQHYY)
- {
- foreach (var teamitem in teams)
- {
- int index = teams.IndexOf(item);
- if (teamitem.InnerHtml != null && teamitem.InnerHtml != "")
- {
- var teamhref = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
- var teamName = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")).Value;
- teamhref = teamhref.Replace("-", "-");
- var listcont = qhyyteamList.ToList().Where(o => o.Name.Trim() == teamName.Trim()).Count();
- if (listcont == 0)
- {
- F_Team f_Team1 = new F_Team();
- f_Team1.Id = Guid.NewGuid().ToString();
- f_Team1.Name = teamName;
- f_Team1.EventId = eventId;
- f_Team1.Remark = teamhref;
- qhyyteamList.Add(f_Team1);
- }
- }
- }
- }
- else
- {
- foreach (var teamitem in teams)
- {
- int index = teams.IndexOf(item);
- if (teamitem.InnerHtml != null && teamitem.InnerHtml != "")
- {
- var teamhref = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
- var teamName = teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")) == null ? "" : teamitem.Attributes.SingleOrDefault(a => a.Name.Equals("title")).Value;
- teamhref = teamhref.Replace("-", "-");
- lock (teamList)
- {
- try
- {
- var listcont = teamList.ToList().Where(o => o.Name.Trim() == teamName.Trim()).Count();
- if (listcont == 0)
- {
- F_Team f_Team1 = new F_Team();
- f_Team1.Id = Guid.NewGuid().ToString();
- f_Team1.Name = teamName;
- f_Team1.EventId = eventId;
- f_Team1.Remark = url;
- teamList.Add(f_Team1);
- }
- }
- catch (Exception ex) { continue; }
- }
- if (teamhref != "")
- {
- GetTeamHtml(teamhref, teamName, eventId);
- }
- }
- }
- }
- }
- }
- }
- }
- return true;
- }
- /// <summary>
- /// 读取球队的详细信息页
- /// </summary>
- /// <param name="url"></param>
- /// <param name="TeamName"></param>
- public bool GetTeamHtml(string url, string TeamName, string eventId)
- {
- lock (teamList)
- {
- var listcont = teamList.Where(o => o.Name == TeamName).Count();
- if (listcont > 0)
- {
- return false;
- }
- }
- HtmlDocument doc = CommonHelper.GetHtml(url, new Dictionary<string, string>(), "足彩", "", 10000, 100);
- var count = 0;
- while (doc.DocumentNode.SelectNodes("//*[@class='introduceDiv']") == null && count < 3)
- {
- count++;
- doc = CommonHelper.GetHtml(url, new Dictionary<string, string>(), "足彩", "", 10000, 100);
- }
- if (doc == null || doc.DocumentNode.InnerText.Contains("球队还没有资料"))
- {
- F_Team f_Team1 = new F_Team();
- f_Team1.Id = Guid.NewGuid().ToString();
- f_Team1.Name = TeamName;
- teamList.Add(f_Team1);
- return false;
- }
- //球队图片
- var teamImg = doc.DocumentNode.SelectNodes("//*[@class='star_dl']/dt/img");
- //球队介绍
- var teamIntroData = doc.DocumentNode.SelectNodes("//*[@class='introduceDiv']").FirstOrDefault().InnerHtml.Trim();
- //球队的基本信息
- var teamData = doc.DocumentNode.SelectNodes("//*[@class='star_dl']/dd").ToList();
- F_Team f_Team = new F_Team();
- f_Team.Id = Guid.NewGuid().ToString();
- f_Team.Name = TeamName;
- f_Team.EventId = eventId;
- f_Team.Describe = teamIntroData;
- f_Team.Remark = url;
- f_Team.LogoImage = teamImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
- if (teamData != null)
- {
- var contry = teamData.Where(o => o.InnerText.Contains("国家")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
- f_Team.Address = contry.Count() > 1 ? contry[1].Trim() : "";
- var setUpTime = teamData.Where(o => o.InnerText.Contains("球队成立")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
- f_Team.SetUpDateTime = setUpTime[1];// (setUpTime.Count() > 1 && setUpTime[1] != "") ? DateTime.Parse(realTime(setUpTime[1])) : DateTime.Parse("1500-01-01");
- var coath = teamData.Where(o => o.InnerText.Contains("教练")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
- f_Team.Coach = coath.Count() > 1 ? coath[1].Trim() : "";
- var city = teamData.Where(o => o.InnerText.Contains("城市")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
- f_Team.CityName = city.Count() > 1 ? city[1].Trim() : "";
- var vence = teamData.Where(o => o.InnerText.Contains("球场")).FirstOrDefault().InnerText.Replace("\t", "").Split(':');
- f_Team.Venue = vence.Count() > 1 ? vence[1].Trim() : "";
- var website = teamData.Where(o => o.InnerText.Contains("官网")).FirstOrDefault().InnerHtml;
- HtmlDocument websitedoc = new HtmlDocument();
- websitedoc.LoadHtml(website);
- var webData = websitedoc.DocumentNode.SelectNodes("//var/a");
- f_Team.Website = webData.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
- }
- teamList.Add(f_Team);
- return true;
- }
- public string realTime(string time)
- {
- time = time.Replace(":", "-").Replace(":", "-");
- DateTime outvalue = new DateTime();
- if (DateTime.TryParse(time, out outvalue))
- {
- return time;
- }
- var splittime = time.Split('-');
- if (splittime.Count() == 1)
- {
- return time + "-01-01";
- }
- if (splittime.Count() == 2)
- {
- return time + "-01";
- }
- return "1500-01-01";
- }
- #region SQL语句
- private static string GetAllEventUrl = @"select Id,Name, Remark from F_Events where Remark is not null ";
- #endregion
- }
- }
|