123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237 |
- using System;
- using System.Collections.Generic;
- using FCS.Common;
- using FCS.Crawler.Tools;
- using FCS.Interface;
- using FCS.Models;
- using HtmlAgilityPack;
- using Newtonsoft.Json;
- using Quartz;
- using System.Data;
- using System.Linq;
- using System.Text;
- using System.Text.RegularExpressions;
- using System.Threading;
- using System.Threading.Tasks;
- using System.Diagnostics;
- using FCS.Models.DTO;
- namespace FCS.Crawler.ZCLotteryTeam
- {
- /// <summary>
- /// 球員爬取
- /// </summary>
- public class FootballPlayerJob : CommonJob, IJob
- {
- public List<F_Relation> relations = new List<F_Relation>();//球员球队关系
- public List<F_Players> players = new List<F_Players>();//球员列表
- public List<F_Players> allPlayer = new List<F_Players>();
- F_Players g;
- public FootballPlayerJob()
- {
- log = new LogHelper();
- services = IOC.Resolve<IDTOpenCode>();
- g = new F_Players();
- }
- public void Execute(IJobExecutionContext context)
- {
- Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
- GetAll();
- }
- public void GetAll()
- {
- var ds = services.Query<F_Team>("and Remark is not null").ToList();
- allPlayer = services.GetPlayerList();
- if (ds != null && ds.Count > 0)
- {
- //获取赛事存取的url ds.Tables[0].Rows.Count
- //int max = ds.Count;
- //int num = 0;
- ds.ForEach(p =>
- {
- //异步加载分组
- var url = p.Remark.ToString();
- var TeamId = p.Id.ToString();
- var TeamName = p.Name.ToString();
- Task.Run(() =>
- {
- GetPlayerListHtml(url, TeamId, TeamName);
- });
- //lock (g)
- //{
- // num++;
- // Monitor.Pulse(g); //完成,通知等待队列,告知已完,执行下一个。
- //}
- });
- //lock (g)
- //{
- // while (num < max)
- // {
- // Monitor.Wait(g);//等待
- // }
- //}
- //Trace.WriteLine("获取球员 完结");
- while (true)
- {
- if (CommonHelper.ThreadsFinsh())
- break;
- }
- }
- try
- {
- services.SqlBulkCopyAdd<F_Players>(players);
- services.SqlBulkCopyAdd<F_Relation>(relations);
- }
- catch (Exception ex)
- {
- throw;
- }
- }
- /// <summary>
- /// 读取球员列表
- /// </summary>
- /// <param name="url"></param>
- /// <param name="teamId"></param>
- /// <param name="teamName"></param>
- /// <returns></returns>
- public bool GetPlayerListHtml(string url, string teamId, string teamName)
- {
- if (url.Contains("wwaattssuunn"))
- {
- return false;
- }
- HtmlParameterDTO dtomodel = new HtmlParameterDTO();
- dtomodel.Url = url;
- dtomodel.Timeout = 10 * 1000;
- dtomodel.IsCheckEmpty = false;
- HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(dtomodel);// CommonHelper.GetHtml(url, new Dictionary<string, string>(), "", "", 90000, 10);
- if (doc.DocumentNode.InnerHtml == "Termination")
- {
- return false;
- }
- //获取球员的列表
- var TongJiListData = doc.DocumentNode.SelectNodes("//*[@class='mingd_right']/dl");
- if (TongJiListData == null || TongJiListData.Count == 0) return false;
- foreach (var list in TongJiListData)
- {
- HtmlDocument doc1 = new HtmlDocument();
- doc1.LoadHtml(list.InnerHtml);
- var playerLIst = doc1.DocumentNode.SelectNodes("//dd/a");
- var playeerType = doc1.DocumentNode.SelectNodes("//dt");
- var position = "";
- if (playeerType.Count > 0)
- {
- position = playeerType.FirstOrDefault().InnerText.Replace("/t", "").Trim();
- }
- if (playerLIst == null)
- {
- continue;
- }
- Task.Run(() =>
- {
- foreach (var playitem in playerLIst)
- {
- var playerurl = playitem.Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
- var playerName = playitem.InnerText.Trim();
- var playerInfo = "";
- if (allPlayer.Where(o => o.ChineseName == playerName).Count() == 0)
- {
- playerInfo = GetPlayerInfo(playerurl, playerName);
- }
- var playid = ""; var num = "";
- if (playerInfo != "")//不为空表示需要新增
- {
- playid = playerInfo.Split(',').Count() > 0 ? playerInfo.Split(',')[0] : "";
- num = playerInfo.Split(',').Count() > 1 ? playerInfo.Split(',')[1] : "0";
- F_Relation f_Relation = new F_Relation();
- f_Relation.Id = Guid.NewGuid().ToString();
- f_Relation.TeamId = teamId;
- f_Relation.PlayerId = playid;
- f_Relation.PoloShirt = num == "" ? 0 : int.Parse(num);
- f_Relation.Position = position;
- f_Relation.CreateDateTime = DateTime.Now;
- f_Relation.ContractTime = DateTime.Now;
- if (f_Relation.PlayerId != null && f_Relation.PlayerId != "")
- {
- relations.Add(f_Relation);
- }
- }
- }
- });
- }
- return true;
- }
- /// <summary>
- /// 解析球员信息,并返回id
- /// </summary>
- /// <param name="url"></param>
- /// <param name="name"></param>
- /// <returns></returns>
- public string GetPlayerInfo(string url, string name)
- {
- var mainUrl = "http://saishi.zgzcw.com/" + url;
- HtmlParameterDTO dtomodel = new HtmlParameterDTO();
- dtomodel.Url = url;
- dtomodel.Timeout = 10 * 1000;
- dtomodel.IsCheckEmpty = false;
- HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(dtomodel); //CommonHelper.GetHtml(mainUrl,new Dictionary<string, string>() ,"","",90000,10);
- if (doc.DocumentNode.InnerHtml == "Termination")
- {
- return "";
- }
- //球员图片
- var teamImg = doc.DocumentNode.SelectNodes("//*[@class='star_dl']/dt/img");
- //球员介绍
- var teamIntroData = doc.DocumentNode.SelectNodes("//*[@class='introduceDiv']") == null || doc.DocumentNode.SelectNodes("//*[@class='introduceDiv']").Count == 0 ? "" : doc.DocumentNode.SelectNodes("//*[@class='introduceDiv']").FirstOrDefault().InnerHtml.Trim();
- //球员的基本信息
- if (doc.DocumentNode.SelectNodes("//*[@class='star_dl']/dd") == null || doc.DocumentNode.SelectNodes("//*[@class='star_dl']/dd").Count == 0)
- {
- return "";
- }
- var playerData = doc.DocumentNode.SelectNodes("//*[@class='star_dl']/dd").ToList();
- F_Players f_Players = new F_Players();
- f_Players.Id = Guid.NewGuid().ToString();
- f_Players.ChineseName = name;
- f_Players.Describe = teamIntroData;
- f_Players.Remark = mainUrl;
- f_Players.LogoImage = teamImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
- var contry = playerData.Where(o => o.InnerText.Contains("国家")).Count() > 0 ? playerData.Where(o => o.InnerText.Contains("国家")).FirstOrDefault().InnerText.Replace("\t", "").Split(':') : null;
- f_Players.Nationality = contry != null && contry.Count() > 1 ? contry[1].Trim() : "";
- var Weight = playerData.Where(o => o.InnerText.Contains("体重")).Count() > 0 ? playerData.Where(o => o.InnerText.Contains("体重")).FirstOrDefault().InnerText.Replace("\t", "").Split(':') : null;
- f_Players.Weight = Weight != null && Weight.Count() > 1 ? Weight[1].Trim() : "";
- var Stature = playerData.Where(o => o.InnerText.Contains("身高")).Count() > 0 ? playerData.Where(o => o.InnerText.Contains("身高")).FirstOrDefault().InnerText.Replace("\t", "").Split(':') : null;
- f_Players.Stature = Stature != null && Stature.Count() > 1 ? Stature[1].Trim() : "";
- var Birthday = playerData.Where(o => o.InnerText.Contains("生日")).Count() > 0 ? playerData.Where(o => o.InnerText.Contains("生日")).FirstOrDefault().InnerText.Replace("\t", "").Split(':') : null;
- f_Players.Birthday = Birthday != null && Birthday.Count() > 1 ? Birthday[1].Trim() : "";
- var num = playerData.Where(o => o.InnerText.Contains("号码")).Count() > 0 ? playerData.Where(o => o.InnerText.Contains("号码")).FirstOrDefault().InnerText.Replace("\t", "").Split(':') : null;
- var playerNum = num != null && num.Count() > 1 ? num[1].Trim() : "";
- //if (allPlayer.Where(o => o.ChineseName == f_Players.ChineseName).Count() == 0)
- //{
- players.Add(f_Players);
- return f_Players.Id + "," + playerNum;
- //}
- //services.AddPlayers(FCSLottery.F_Players, f_Players);
- //return "";
- }
- #region SQL语句
- private static string GetAllTeamUrl = @"select Id,Name, Remark from F_Team where Remark is not null";
- #endregion
- }
- }
|