#StackBounty: #python #python-3.x Create football stats from a .csv file

Bounty: 50

I have collected a .csv file with some statistics about football games in the following format. Here is a sample .csv file.

Date,Home,Away,HomeShots,AwayShots,HomeBT,AwayBT,HomeCrosses,AwayCrosses,HomeCorners,AwayCorners,HomeGoals,AwayGoals,HomeXG,AwayXG

My code does the following:

  • Calculate a summary of statistics for the given subset of games,
  • Calculate a summary of statistics for the each individual team,
  • Filter games by date or by range of some stat, and
  • Print a summary as html or csv.

I have some questions about my code.

  1. How should I write unit tests for checking the correctness of functions that calculate stats?
  2. How to make a function that prints output to work with an arbitrary list of fields, instead of a particular one? Since I print a lot of fields, passing them one by one is tedious. Maybe I could create some common templates and pass one of them?
  3. Can I simplify calculate_team_stats()? Maybe it can be improved by using Counter() or some third party library.

Any or all other feedback is welcomed!

import csv
import datetime
from collections import namedtuple, defaultdict
from statistics import mean

FILENAME = 'epl 18_19 games.csv'

Game = namedtuple('Game', ['Date', 'Home', 'Away', 'HomeShots', 'AwayShots',
                           'HomeBT', 'AwayBT', 'HomeCrosses', 'AwayCrosses',
                           'HomeCorners', 'AwayCorners', 'HomeGoals',
                           'AwayGoals', 'HomeXG', 'AwayXG'])


def csv_to_list_of_games(filename=FILENAME):
    """
    Makes a list of Game from a csv file.
    """

    games = []
    with open(FILENAME) as f:
        csv_reader = csv.reader(f)
        next(csv_reader)
        for game in csv_reader:
            date = game[0].split('.')
            year = int(date[2])
            month = int(date[1])
            day = int(date[0])
            date_object = datetime.date(year, month, day)
            games.append(Game(date_object, *game[1:]))

    return games


def get_teams_list(games):
    """
    Makes a list of teams in the given list of games.
    """

    return list(set([game.Home for game in games] + [game.Away for game in games]))


def get_games_by_team(teamname, games):
    """
    Returns a list of Game featuring the given team.
    """

    return [game for game in games if game.Home == teamname or game.Away == teamname]


def calculate_team_stats(teams, games):
    """
    Calculates team stats for each team in the list.
    """

    team_stats = dict()
    for team in teams:
        team_stats[team] = defaultdict(int)
        team_stats[team]['HomeShotsFor'] = sum(int(game.HomeShots) for game in games if game.Home == team)
        team_stats[team]['HomeShotsAgainst'] = sum(int(game.AwayShots) for game in games if game.Home == team)
        team_stats[team]['HomeBoxTouchesFor'] = sum(int(game.HomeBT) for game in games if game.Home == team)
        team_stats[team]['HomeBoxTouchesAgainst'] = sum(int(game.AwayBT) for game in games if game.Home == team)
        team_stats[team]['HomeCrossesFor'] = sum(int(game.HomeCrosses) for game in games if game.Home == team)
        team_stats[team]['HomeCrossesAgainst'] = sum(int(game.AwayCrosses) for game in games if game.Home == team)
        team_stats[team]['HomeCornersFor'] = sum(int(game.HomeCorners) for game in games if game.Home == team)
        team_stats[team]['HomeCornersAgainst'] = sum(int(game.AwayCorners) for game in games if game.Home == team)
        team_stats[team]['HomeGoalsFor'] = sum(int(game.HomeGoals) for game in games if game.Home == team)
        team_stats[team]['HomeGoalsAgainst'] = sum(int(game.AwayGoals) for game in games if game.Home == team)
        team_stats[team]['HomeXGoalsFor'] = sum(float(game.HomeXG) for game in games if game.Home == team)
        team_stats[team]['HomeXGoalsAgainst'] = sum(float(game.AwayXG) for game in games if game.Home == team)
        team_stats[team]['HomeGames'] = sum(1 for game in games if game.Home == team)

        team_stats[team]['AwayShotsFor'] = sum(int(game.AwayShots) for game in games if game.Away == team)
        team_stats[team]['AwayShotsAgainst'] = sum(int(game.HomeShots) for game in games if game.Away == team)
        team_stats[team]['AwayBoxTouchesFor'] = sum(int(game.AwayBT) for game in games if game.Away == team)
        team_stats[team]['AwayBoxTouchesAgainst'] = sum(int(game.HomeBT) for game in games if game.Away == team)
        team_stats[team]['AwayCrossesFor'] = sum(int(game.AwayCrosses) for game in games if game.Away == team)
        team_stats[team]['AwayCrossesAgainst'] = sum(int(game.HomeCrosses) for game in games if game.Away == team)
        team_stats[team]['AwayCornersFor'] = sum(int(game.AwayCorners) for game in games if game.Away == team)
        team_stats[team]['AwayCornersAgainst'] = sum(int(game.HomeCorners) for game in games if game.Away == team)
        team_stats[team]['AwayGoalsFor'] = sum(int(game.AwayGoals) for game in games if game.Away == team)
        team_stats[team]['AwayGoalsAgainst'] = sum(int(game.HomeGoals) for game in games if game.Away == team)
        team_stats[team]['AwayXGoalsFor'] = sum(float(game.AwayXG) for game in games if game.Away == team)
        team_stats[team]['AwayXGoalsAgainst'] = sum(float(game.HomeXG) for game in games if game.Away == team)
        team_stats[team]['AwayGames'] = sum(1 for game in games if game.Away == team)

        team_stats[team]['ShotsFor'] += team_stats[team]['HomeShotsFor'] + team_stats[team]['AwayShotsFor']
        team_stats[team]['ShotsAgainst'] += team_stats[team]['HomeShotsAgainst'] + team_stats[team]['AwayShotsAgainst']
        team_stats[team]['CrossesFor'] += team_stats[team]['HomeCrossesFor'] + team_stats[team]['AwayCrossesFor']
        team_stats[team]['CrossesAgainst'] += team_stats[team]['HomeCrossesAgainst'] + team_stats[team]['AwayCrossesAgainst']
        team_stats[team]['BoxTouchesFor'] += team_stats[team]['HomeBoxTouchesFor'] + team_stats[team]['AwayBoxTouchesFor']
        team_stats[team]['BoxTouchesAgainst'] += team_stats[team]['HomeBoxTouchesAgainst'] + team_stats[team]['AwayBoxTouchesAgainst']
        team_stats[team]['CornersFor'] += team_stats[team]['HomeCornersFor'] + team_stats[team]['AwayCornersFor']
        team_stats[team]['CornersAgainst'] += team_stats[team]['HomeCornersAgainst'] + team_stats[team]['AwayCornersAgainst']

        team_stats[team]['GoalsFor'] += team_stats[team]['HomeGoalsFor'] + team_stats[team]['AwayGoalsFor']
        team_stats[team]['GoalsAgainst'] += team_stats[team]['HomeGoalsAgainst'] + team_stats[team]['AwayGoalsAgainst']
        team_stats[team]['XGoalsFor'] += team_stats[team]['HomeXGoalsFor'] + team_stats[team]['AwayXGoalsFor']
        team_stats[team]['XGoalsAgainst'] += team_stats[team]['HomeXGoalsAgainst'] + team_stats[team]['AwayXGoalsAgainst']
        team_stats[team]['Games'] += team_stats[team]['HomeGames'] + team_stats[team]['AwayGames']

        team_stats[team]['HomeShotsRatio'] = team_stats[team]['HomeShotsFor'] / (team_stats[team]['HomeShotsFor'] + team_stats[team]['HomeShotsAgainst'])
        team_stats[team]['AwayShotsRatio'] = team_stats[team]['AwayShotsFor'] / (team_stats[team]['AwayShotsFor'] + team_stats[team]['AwayShotsAgainst'])
        team_stats[team]['ShotsRatio'] = team_stats[team]['ShotsFor'] / (team_stats[team]['ShotsFor'] + team_stats[team]['ShotsAgainst'])

        team_stats[team]['HomeCrossesRatio'] = team_stats[team]['HomeCrossesFor'] / (team_stats[team]['HomeCrossesFor'] + team_stats[team]['HomeCrossesAgainst'])
        team_stats[team]['AwayCrossesRatio'] = team_stats[team]['AwayCrossesFor'] / (team_stats[team]['AwayCrossesFor'] + team_stats[team]['AwayCrossesAgainst'])
        team_stats[team]['CrossesRatio'] = team_stats[team]['CrossesFor'] / (team_stats[team]['CrossesFor'] + team_stats[team]['CrossesAgainst'])

        team_stats[team]['HomeBoxTouchesRatio'] = team_stats[team]['HomeBoxTouchesFor'] / (team_stats[team]['HomeBoxTouchesFor'] + team_stats[team]['HomeBoxTouchesAgainst'])
        team_stats[team]['AwayBoxTouchesRatio'] = team_stats[team]['AwayBoxTouchesFor'] / (team_stats[team]['AwayBoxTouchesFor'] + team_stats[team]['AwayBoxTouchesAgainst'])
        team_stats[team]['BoxTouchesRatio'] = team_stats[team]['BoxTouchesFor'] / (team_stats[team]['BoxTouchesFor'] + team_stats[team]['BoxTouchesAgainst'])

        team_stats[team]['HomeCornersRatio'] = team_stats[team]['HomeCornersFor'] / (team_stats[team]['HomeCornersFor'] + team_stats[team]['HomeCornersAgainst'])
        team_stats[team]['AwayCornersRatio'] = team_stats[team]['AwayCornersFor'] / (team_stats[team]['AwayCornersFor'] + team_stats[team]['AwayCornersAgainst'])
        team_stats[team]['CornersRatio'] = team_stats[team]['CornersFor'] / (team_stats[team]['CornersFor'] + team_stats[team]['CornersAgainst'])

        team_stats[team]['HomeGoalsRatio'] = team_stats[team]['HomeGoalsFor'] / (team_stats[team]['HomeGoalsFor'] + team_stats[team]['HomeGoalsAgainst'])
        team_stats[team]['AwayGoalsRatio'] = team_stats[team]['AwayGoalsFor'] / (team_stats[team]['AwayGoalsFor'] + team_stats[team]['AwayGoalsAgainst'])
        team_stats[team]['GoalsRatio'] = team_stats[team]['GoalsFor'] / (team_stats[team]['GoalsFor'] + team_stats[team]['GoalsAgainst'])

        team_stats[team]['HomeXGoalsRatio'] = team_stats[team]['HomeXGoalsFor'] / (team_stats[team]['HomeXGoalsFor'] + team_stats[team]['HomeXGoalsAgainst'])
        team_stats[team]['AwayXGoalsRatio'] = team_stats[team]['AwayXGoalsFor'] / (team_stats[team]['AwayXGoalsFor'] + team_stats[team]['AwayXGoalsAgainst'])
        team_stats[team]['XGoalsRatio'] = team_stats[team]['XGoalsFor'] / (team_stats[team]['XGoalsFor'] + team_stats[team]['XGoalsAgainst'])

        team_stats[team]['CornersTotalPg'] = (team_stats[team]['CornersFor'] + team_stats[team]['CornersAgainst']) / team_stats[team]['Games']

        team_stats[team]['HomeBoxTouchesTotal'] = (team_stats[team]['HomeBoxTouchesFor'] + team_stats[team]['HomeBoxTouchesAgainst'])
        team_stats[team]['AwayBoxTouchesTotal'] = (team_stats[team]['AwayBoxTouchesFor'] + team_stats[team]['AwayBoxTouchesAgainst'])

        team_stats[team]['HomeBoxTouchesTotalPg'] = team_stats[team]['HomeBoxTouchesTotal'] / team_stats[team]['HomeGames']
        team_stats[team]['AwayBoxTouchesTotalPg'] = team_stats[team]['AwayBoxTouchesTotal'] / team_stats[team]['AwayGames']

        team_stats[team]['BoxTouchesTotalPg'] = (team_stats[team]['HomeBoxTouchesTotal'] + team_stats[team]['AwayBoxTouchesTotal']) / team_stats[team]['Games']

    return team_stats


def print_team_stats_html(team_stats):
    """
    Prints a subset of team stats in HTML format.
    """

    headers = ['Team', 'HomeBoxTouchesRatio', 'AwayBoxTouchesRatio',
               'HomeBoxTouchesTotalPg', 'AwayBoxTouchesTotalPg',
               'HomeCornersRatio', 'AwayCornersRatio']

    print('<table border=1>')
    print('<tr>', end='')

    for header in headers:
        print('<th>{}</th>'.format(header), end='')
    print('</tr>')

    for key, value in sorted(team_stats.items()):
        print('<tr>')
        print('<td>{}</td>'.format(key))
        print('<td>{:.2f}</td>'.format(value['HomeBoxTouchesRatio']))
        print('<td>{:.2f}</td>'.format(value['AwayBoxTouchesRatio']))
        print('<td>{:.2f}</td>'.format(value['HomeBoxTouchesTotalPg']))
        print('<td>{:.2f}</td>'.format(value['AwayBoxTouchesTotalPg']))
        print('<td>{:.2f}</td>'.format(value['HomeCornersRatio']))
        print('<td>{:.2f}</td>'.format(value['AwayCornersRatio']))      
        print('</tr>')
    print('</table>')


def find_games_by_teams_stats(home_stat, away_stat, home_value, away_value, teams_stats, games, home_epsilon=0.05, away_epsilon=0.05):
    """
    Finds teams with home and away stat <= EPSILON <= and returns a list of games between those teams.
    """

    relevant_home_teams = []
    relevant_away_teams = []

    for team in teams_stats:
        if abs(teams_stats[team][home_stat] - home_value) <= home_epsilon:
            relevant_home_teams.append(team)
        if abs(teams_stats[team][away_stat] - away_value) <= away_epsilon:
            relevant_away_teams.append(team)

    return [game for game in games if game.Home in relevant_home_teams and game.Away in relevant_away_teams]


def calculate_sample_stats(games):
    """
    Calculates summary statistics for the given list of Game.
    """

    avg_home_corners = mean(int(game.HomeCorners) for game in games)
    avg_away_corners = mean(int(game.AwayCorners) for game in games)
    avg_home_bt = mean(int(game.HomeBT) for game in games)
    avg_away_bt = mean(int(game.AwayBT) for game in games)
    avg_home_goals = mean(int(game.HomeGoals) for game in games)
    avg_away_goals = mean(int(game.AwayGoals) for game in games)
    avg_home_xgoals = mean(float(game.HomeXG) for game in games)
    avg_away_xgoals = mean(float(game.AwayXG) for game in games)
    avg_home_bt_ratio = avg_home_bt / (avg_home_bt + avg_away_bt)
    avg_away_bt_ratio = avg_away_bt / (avg_home_bt + avg_away_bt)

    stats = {
        'games_count': len(games),
        'avg_home_corners': avg_home_corners,
        'avg_away_corners': avg_away_corners,
        'avg_home_bt': avg_home_bt,
        'avg_away_bt': avg_away_bt,
        'avg_home_goals': avg_home_goals,
        'avg_away_goals': avg_away_goals,
        'avg_home_xgoals': avg_home_xgoals,
        'avg_away_xgoals': avg_away_xgoals,
        'avg_home_bt_ratio': avg_home_bt_ratio,
        'avg_away_bt_ratio': avg_away_bt_ratio,
    }

    return stats


def print_sample_stats(stats):
    """
    Prints the statistical summary of the list of Game.
    """

    print(f'{stats["games_count"]} games have been found')
    print(f'Average home corners: {stats["avg_home_corners"]:.2f}')
    print(f'Average away corners: {stats["avg_away_corners"]:.2f}')
    print(f'Average home BoxTouches: {stats["avg_home_bt"]:.2f}')
    print(f'Average away BoxTouches: {stats["avg_away_bt"]:.2f}')
    print(f'Average home Goals: {stats["avg_home_goals"]:.2f}')
    print(f'Average away Goals: {stats["avg_away_goals"]:.2f}')
    print(f'Average home Xgoals: {stats["avg_home_xgoals"]:.2f}')
    print(f'Average away Xgoals: {stats["avg_away_xgoals"]:.2f}')
    print(f'Average home BoxTouches ratio: {stats["avg_home_bt_ratio"]:.3f}')
    print(f'Average away BoxTouches ratio: {stats["avg_away_bt_ratio"]:.3f}')


if __name__ == '__main__':
    games = csv_to_list_of_games(FILENAME)
    teams = get_teams_list(games)
    team_stats = calculate_team_stats(teams, games)
    relevant_games = find_games_by_teams_stats('HomeBoxTouchesRatio', 'AwayBoxTouchesRatio', 0.55, 0.45, team_stats, games, 0.03, 0.03)
    relevant_stats = calculate_sample_stats(relevant_games)
    print_sample_stats(relevant_stats)
    print()
    print(set(game.Home for game in relevant_games))
    print(set(game.Away for game in relevant_games))
    print()


Get this bounty!!!

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.