using System; using System.Collections.Generic; using FCS.Common; using FCS.Crawler.Tools; using FCS.Interface; using FCS.Models; using HtmlAgilityPack; using Newtonsoft.Json; using Quartz; using System.Data; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; using System.Diagnostics; using FCS.Models.Entity; using FCS.Models.DTO; namespace FCS.Crawler.Basketball { /// <summary> /// 篮球球员 /// </summary> public class B_PlayerJob : CommonJob, IJob { public List<B_Players> players = new List<B_Players>();//球员列表 public List<B_Players> allplayers = new List<B_Players>();//球员列表 public B_PlayerJob() { log = new LogHelper(); services = IOC.Resolve<IDTOpenCode>(); } public void Execute(IJobExecutionContext context) { Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap); GetAll(); } public void GetAll() { var ds = services.Query<B_Team>("and Remark is not null").ToList(); ; allplayers = services.Query<B_Players>().ToList(); //获取赛事存取的url ds.Tables[0].Rows.Count foreach (var item in ds) { //异步加载分组 var url = item.Remark.ToString(); var TeamId = item.Id.ToString(); var TeamName = item.Name.ToString(); Task.Run(() => { GetPlayerListHtml(url, TeamId, TeamName); }); } int maxWorkerThreads, workerThreads; int maxportThreads, portThreads; while (true) { /* GetAvailableThreads():检索由 GetMaxThreads 返回的线程池线程的最大数目和当前活动数目之间的差值。 而GetMaxThreads 检索可以同时处于活动状态的线程池请求的数目。 通过最大数目减可用数目就可以得到当前活动线程的数目,如果为零,那就说明没有活动线程,说明所有线程运行完毕。 */ ThreadPool.GetMaxThreads(out maxWorkerThreads, out maxportThreads); ThreadPool.GetAvailableThreads(out workerThreads, out portThreads); Thread.Sleep(1000); Trace.WriteLine("正在执行任务的线程数" + (maxWorkerThreads - workerThreads)); if (maxWorkerThreads - workerThreads == 0) { Console.WriteLine("Thread Finished!"); break; } } try { if (players != null) { var addList = new List<B_Players>(); foreach (var item in players) { if (allplayers.Count() == 0 && addList.Count() == 0) { addList.Add(item); } else { if (allplayers.Where(o => o.ChineseName == item.ChineseName).Count() == 0 && addList.Where(o => o.ChineseName == item.ChineseName).Count() == 0) { addList.Add(item); } } } services.SqlBulkCopyAdd<B_Players>(addList); } } catch (Exception ex) { throw; } } /// <summary> /// 读取球员列表 /// </summary> /// <param name="url"></param> /// <param name="teamId"></param> /// <param name="teamName"></param> /// <returns></returns> public bool GetPlayerListHtml(string url, string teamId, string teamName) { HtmlParameterDTO dtomodel = new HtmlParameterDTO(); dtomodel.Url = url; HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(dtomodel); if (doc.DocumentNode.InnerHtml == "Termination" || doc.DocumentNode.InnerHtml == "") { return false; } //获取球员的列表 var playerhtml = doc.DocumentNode.SelectNodes("//*[@class='paiming1']/table").Count() > 0 ? doc.DocumentNode.SelectNodes("//*[@class='paiming1']/table").ToList().Where(o => o.InnerText.Contains("球员") && o.InnerText.Contains("身高")) : null; if (playerhtml == null) return false; HtmlDocument playerDoc = new HtmlDocument(); playerDoc.LoadHtml(playerhtml.FirstOrDefault().InnerHtml); var playerLIst = playerDoc.DocumentNode.SelectNodes("//tr"); if (playerLIst == null || playerLIst.Count() == 0) return false; foreach (var playitem in playerLIst) { if (playitem.InnerHtml.Contains("<th")) { continue; } HtmlDocument trdoc = new HtmlDocument(); trdoc.LoadHtml(playitem.InnerHtml); var trHtml = trdoc.DocumentNode.SelectNodes("//td").Where(o => o.InnerHtml.Contains("href")).FirstOrDefault(); if (trHtml == null || trHtml.InnerHtml == "") { continue; } HtmlDocument hrefDoc = new HtmlDocument(); hrefDoc.LoadHtml(trHtml.InnerHtml); var playerurl = hrefDoc.DocumentNode.SelectNodes("//a").FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value; var playerName = hrefDoc.DocumentNode.InnerText.Trim(); var NBAVeteranHtml = trdoc.DocumentNode.SelectNodes("//td").ToList().Last(); var NBAVeteran = NBAVeteranHtml.InnerText.Trim(); var NumHtml = trdoc.DocumentNode.SelectNodes("//td").ToList().First(); var Num = NumHtml.InnerText.Trim(); var playerInfo = ""; if (allplayers.Where(o => playerName.Contains(o.ChineseName)).Count() == 0) { playerInfo = GetPlayerInfo(playerurl, playerName, NBAVeteran, Num, teamId); } } return false; } /// <summary> /// 解析球员信息,并返回id /// </summary> /// <param name="url"></param> /// <param name="name"></param> /// <returns></returns> public string GetPlayerInfo(string url, string name, string NBAVeteran, string Num, string TeamId) { HtmlParameterDTO dtomodel = new HtmlParameterDTO(); dtomodel.Url = url; dtomodel.IsCheckEmpty = false; HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(dtomodel); if (doc.DocumentNode.InnerHtml == "Termination" || doc.DocumentNode.InnerHtml == "" || doc.DocumentNode.SelectNodes("//*[@class='div_qyxq']/span/em") == null) { return ""; } //球员图片 var playerImg = doc.DocumentNode.SelectNodes("//*[@class='xq_img1']/img"); var playerData = doc.DocumentNode.SelectNodes("//*[@class='div_qyxq']/span/em").ToList(); B_Players b_Players = new B_Players(); b_Players.Id = Guid.NewGuid().ToString(); b_Players.ChineseName = name; b_Players.TeamId = TeamId; b_Players.EnglishName = playerData[3].InnerText.ToString(); b_Players.Stature = playerData[5].InnerText.ToString(); b_Players.Birthday = playerData[7].InnerText.ToString(); b_Players.Position = playerData[9].InnerText.ToString(); b_Players.Nationality = playerData[11].InnerText.ToString(); b_Players.NBAVeteran = NBAVeteran; b_Players.Number = Num; b_Players.Remark = url; b_Players.LogoImage = playerImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value; players.Add(b_Players); return ""; } #region SQL语句 private static string GetAllTeamUrl = @"select Id,Name, Remark from B_Team where Remark is not null"; #endregion } }