using System;
using System.Collections.Generic;
using FCS.Common;
using FCS.Crawler.Tools;
using FCS.Interface;
using FCS.Models;
using HtmlAgilityPack;
using Newtonsoft.Json;
using Quartz;
using System.Data;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using System.Diagnostics;
using FCS.Models.Entity;
using FCS.Models.DTO;
namespace FCS.Crawler.Basketball
{
///
/// 篮球球员
///
public class B_PlayerJob : CommonJob, IJob
{
public List players = new List();//球员列表
public List allplayers = new List();//球员列表
public B_PlayerJob()
{
log = new LogHelper();
services = IOC.Resolve();
}
public void Execute(IJobExecutionContext context)
{
Config = CommonHelper.GetConfigFromDataMap(context.JobDetail.JobDataMap);
GetAll();
}
public void GetAll()
{
ThreadPool.SetMinThreads(10, 10);
ThreadPool.SetMaxThreads(200, 200);
var ds = services.Query("and Remark is not null").ToList(); ;
allplayers = services.Query().ToList();
//获取赛事存取的url ds.Tables[0].Rows.Count
foreach (var item in ds)
{
//异步加载分组
var url = item.Remark.ToString();
var TeamId = item.Id.ToString();
var TeamName = item.Name.ToString();
Task.Run(() =>
{
GetPlayerListHtml(url, TeamId, TeamName);
});
}
int maxWorkerThreads, workerThreads;
int maxportThreads, portThreads;
while (true)
{
/*
GetAvailableThreads():检索由 GetMaxThreads 返回的线程池线程的最大数目和当前活动数目之间的差值。
而GetMaxThreads 检索可以同时处于活动状态的线程池请求的数目。
通过最大数目减可用数目就可以得到当前活动线程的数目,如果为零,那就说明没有活动线程,说明所有线程运行完毕。
*/
ThreadPool.GetMaxThreads(out maxWorkerThreads, out maxportThreads);
ThreadPool.GetAvailableThreads(out workerThreads, out portThreads);
Thread.Sleep(1000);
Trace.WriteLine("正在执行任务的线程数" + (maxWorkerThreads - workerThreads));
if (maxWorkerThreads - workerThreads == 0)
{
Console.WriteLine("Thread Finished!");
break;
}
}
try
{
if (players != null)
{
var addList = new List();
foreach (var item in players)
{
if (allplayers.Count() == 0 && addList.Count() == 0)
{
addList.Add(item);
}
else
{
if (allplayers.Where(o => o.ChineseName == item.ChineseName).Count() == 0 && addList.Where(o => o.ChineseName == item.ChineseName).Count() == 0)
{
addList.Add(item);
}
}
}
services.SqlBulkCopyAdd(addList);
}
}
catch (Exception ex)
{
throw;
}
}
///
/// 读取球员列表
///
///
///
///
///
public bool GetPlayerListHtml(string url, string teamId, string teamName)
{
HtmlParameterDTO dtomodel = new HtmlParameterDTO();
dtomodel.Url = url;
HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(dtomodel);
if (doc.DocumentNode.InnerHtml == "Termination" || doc.DocumentNode.InnerHtml == "")
{
return false;
}
//获取球员的列表
var playerhtml = doc.DocumentNode.SelectNodes("//*[@class='paiming1']/table").ToList().Where(o => o.InnerText.Contains("球员") && o.InnerText.Contains("身高"));
if (playerhtml == null) return false;
HtmlDocument playerDoc = new HtmlDocument();
playerDoc.LoadHtml(playerhtml.FirstOrDefault().InnerHtml);
var playerLIst = playerDoc.DocumentNode.SelectNodes("//tr");
if (playerLIst == null || playerLIst.Count() == 0) return false;
foreach (var playitem in playerLIst)
{
if (playitem.InnerHtml.Contains(" o.InnerHtml.Contains("href")).FirstOrDefault();
if (trHtml == null || trHtml.InnerHtml == "")
{
continue;
}
HtmlDocument hrefDoc = new HtmlDocument();
hrefDoc.LoadHtml(trHtml.InnerHtml);
var playerurl = hrefDoc.DocumentNode.SelectNodes("//a").FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("href")).Value;
var playerName = hrefDoc.DocumentNode.InnerText.Trim();
var NBAVeteranHtml = trdoc.DocumentNode.SelectNodes("//td").ToList().Last();
var NBAVeteran = NBAVeteranHtml.InnerText.Trim();
var NumHtml = trdoc.DocumentNode.SelectNodes("//td").ToList().First();
var Num = NumHtml.InnerText.Trim();
var playerInfo = "";
if (allplayers.Where(o => playerName.Contains(o.ChineseName)).Count() == 0)
{
playerInfo = GetPlayerInfo(playerurl, playerName, NBAVeteran, Num, teamId);
}
}
return false;
}
///
/// 解析球员信息,并返回id
///
///
///
///
public string GetPlayerInfo(string url, string name, string NBAVeteran, string Num, string TeamId)
{
HtmlParameterDTO dtomodel = new HtmlParameterDTO();
dtomodel.Url = url;
dtomodel.IsCheckEmpty = false;
HtmlDocument doc = CommonHelper.GetHtmlHtmlDocument(dtomodel);
if (doc.DocumentNode.InnerHtml == "Termination" || doc.DocumentNode.InnerHtml == "" || doc.DocumentNode.SelectNodes("//*[@class='div_qyxq']/span/em") == null)
{
return "";
}
//球员图片
var playerImg = doc.DocumentNode.SelectNodes("//*[@class='xq_img1']/img");
var playerData = doc.DocumentNode.SelectNodes("//*[@class='div_qyxq']/span/em").ToList();
B_Players b_Players = new B_Players();
b_Players.Id = Guid.NewGuid().ToString();
b_Players.ChineseName = name;
b_Players.TeamId = TeamId;
b_Players.EnglishName = playerData[3].InnerText.ToString();
b_Players.Stature = playerData[5].InnerText.ToString();
b_Players.Birthday = playerData[7].InnerText.ToString();
b_Players.Position = playerData[9].InnerText.ToString();
b_Players.Nationality = playerData[11].InnerText.ToString();
b_Players.NBAVeteran = NBAVeteran;
b_Players.Number = Num;
b_Players.Remark = url;
b_Players.LogoImage = playerImg.FirstOrDefault().Attributes.SingleOrDefault(a => a.Name.Equals("src")).Value;
players.Add(b_Players);
return "";
}
#region SQL语句
private static string GetAllTeamUrl = @"select Id,Name, Remark from B_Team where Remark is not null";
#endregion
}
}
|