123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205 |
- using System;
- using System.Collections.Generic;
- using FCS.Common;
- using FCS.Crawler.Tools;
- using FCS.Interface;
- using FCS.Models;
- using HtmlAgilityPack;
- using Newtonsoft.Json;
- using Quartz;
- using System.Data;
- using System.Linq;
- using System.Text;
- using System.Text.RegularExpressions;
- using System.Threading;
- using System.Threading.Tasks;
- using System.Diagnostics;
- using FCS.Models.Entity;
- using FCS.Models.DTO;
- namespace FCS.Crawler.Basketball
- {
- /// <summary>
- /// 篮球球员
- /// </summary>
- public class B_PlayerJob : CommonJob, IJob
- {
- public List<B_Players> players = new List<B_Players>();//球员列表
- public List<B_Players> allplayers = new List<B_Players>();//球员列表
- public B_PlayerJob()
- {
- log = new LogHelper();
- services = IOC.Resolve<IDTOpenCode>();
- }
- public void Execute(IJobExecutionContext context)
- {
- Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
- GetAll();
- }
- public void GetAll()
- {
- var ds = services.Query<B_Team>("and Remark is not null").ToList(); ;
- allplayers = services.Query<B_Players>().ToList();
- //获取赛事存取的url ds.Tables[0].Rows.Count
- foreach (var item in ds)
- {
- //异步加载分组
- var url = item.Remark.ToString();
- var TeamId = item.Id.ToString();
- var TeamName = item.Name.ToString();
- Task.Run(() =>
- {
- GetPlayerListHtml(url, TeamId, TeamName);
- });
- }
- int maxWorkerThreads, workerThreads;
- int maxportThreads, portThreads;
- while (true)
- {
- /*
- GetAvailableThreads():检索由 GetMaxThreads 返回的线程池线程的最大数目和当前活动数目之间的差值。
- 而GetMaxThreads 检索可以同时处于活动状态的线程池请求的数目。
- 通过最大数目减可用数目就可以得到当前活动线程的数目,如果为零,那就说明没有活动线程,说明所有线程运行完毕。
- */
- ThreadPool.GetMaxThreads(out maxWorkerThreads, out maxportThreads);
- ThreadPool.GetAvailableThreads(out workerThreads, out portThreads);
- Thread.Sleep(1000);
- Trace.WriteLine("正在执行任务的线程数" + (maxWorkerThreads - workerThreads));
- if (maxWorkerThreads - workerThreads == 0)
- {
- Console.WriteLine("Thread Finished!");
- break;
- }
- }
- try
- {
- if (players != null)
- {
- var addList = new List<B_Players>();
- foreach (var item in players)
- {
- if (allplayers.Count() == 0 && addList.Count() == 0)
- {
- addList.Add(item);
- }
- else
- {
- if (allplayers.Where(o => o.ChineseName == item.ChineseName).Count() == 0 && addList.Where(o => o.ChineseName == item.ChineseName).Count() == 0)
- {
- addList.Add(item);
- }
- }
- }
- services.SqlBulkCopyAdd<B_Players>(addList);
- }
- }
- catch (Exception ex)
- {
- throw;
- }
- }
- /// <summary>
- /// 读取球员列表
- /// </summary>
- /// <param name="url"></param>
- /// <param name="teamId"></param>
- /// <param name="teamName"></param>
- /// <returns></returns>
- public bool GetPlayerListHtml(string url, string teamId, string teamName)
- {
- HtmlParameterDTO dtomodel = new HtmlParameterDTO();
- dtomodel.Url = url;
- HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(dtomodel);
- if (doc.DocumentNode.InnerHtml == "Termination" || doc.DocumentNode.InnerHtml == "")
- {
- return false;
- }
- //获取球员的列表
- var playerhtml = doc.DocumentNode.SelectNodes("//*[@class='paiming1']/table").Count() > 0 ? doc.DocumentNode.SelectNodes("//*[@class='paiming1']/table").ToList().Where(o => o.InnerText.Contains("球员") && o.InnerText.Contains("身高")) : null;
- if (playerhtml == null) return false;
- HtmlDocument playerDoc = new HtmlDocument();
- playerDoc.LoadHtml(playerhtml.FirstOrDefault().InnerHtml);
- var playerLIst = playerDoc.DocumentNode.SelectNodes("//tr");
- if (playerLIst == null || playerLIst.Count() == 0) return false;
- foreach (var playitem in playerLIst)
- {
- if (playitem.InnerHtml.Contains("<th"))
- {
- continue;
- }
- HtmlDocument trdoc = new HtmlDocument();
- trdoc.LoadHtml(playitem.InnerHtml);
- var trHtml = trdoc.DocumentNode.SelectNodes("//td").Where(o => o.InnerHtml.Contains("href")).FirstOrDefault();
- if (trHtml == null || trHtml.InnerHtml == "")
- {
- continue;
- }
- HtmlDocument hrefDoc = new HtmlDocument();
- hrefDoc.LoadHtml(trHtml.InnerHtml);
- var playerurl = hrefDoc.DocumentNode.SelectNodes("//a").FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
- var playerName = hrefDoc.DocumentNode.InnerText.Trim();
- var NBAVeteranHtml = trdoc.DocumentNode.SelectNodes("//td").ToList().Last();
- var NBAVeteran = NBAVeteranHtml.InnerText.Trim();
- var NumHtml = trdoc.DocumentNode.SelectNodes("//td").ToList().First();
- var Num = NumHtml.InnerText.Trim();
- var playerInfo = "";
- if (allplayers.Where(o => playerName.Contains(o.ChineseName)).Count() == 0)
- {
- playerInfo = GetPlayerInfo(playerurl, playerName, NBAVeteran, Num, teamId);
- }
- }
- return false;
- }
- /// <summary>
- /// 解析球员信息,并返回id
- /// </summary>
- /// <param name="url"></param>
- /// <param name="name"></param>
- /// <returns></returns>
- public string GetPlayerInfo(string url, string name, string NBAVeteran, string Num, string TeamId)
- {
- HtmlParameterDTO dtomodel = new HtmlParameterDTO();
- dtomodel.Url = url;
- dtomodel.IsCheckEmpty = false;
- HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(dtomodel);
- if (doc.DocumentNode.InnerHtml == "Termination" || doc.DocumentNode.InnerHtml == "" || doc.DocumentNode.SelectNodes("//*[@class='div_qyxq']/span/em") == null)
- {
- return "";
- }
- //球员图片
- var playerImg = doc.DocumentNode.SelectNodes("//*[@class='xq_img1']/img");
- var playerData = doc.DocumentNode.SelectNodes("//*[@class='div_qyxq']/span/em").ToList();
- B_Players b_Players = new B_Players();
- b_Players.Id = Guid.NewGuid().ToString();
- b_Players.ChineseName = name;
- b_Players.TeamId = TeamId;
- b_Players.EnglishName = playerData[3].InnerText.ToString();
- b_Players.Stature = playerData[5].InnerText.ToString();
- b_Players.Birthday = playerData[7].InnerText.ToString();
- b_Players.Position = playerData[9].InnerText.ToString();
- b_Players.Nationality = playerData[11].InnerText.ToString();
- b_Players.NBAVeteran = NBAVeteran;
- b_Players.Number = Num;
- b_Players.Remark = url;
- b_Players.LogoImage = playerImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
- players.Add(b_Players);
- return "";
- }
- #region SQL语句
- private static string GetAllTeamUrl = @"select Id,Name, Remark from B_Team where Remark is not null";
- #endregion
- }
- }
|