Source code for game_theory4e

"""Multiagent decision making: game theory and social choice (Chapter 18)."""

from collections import Counter, defaultdict
from itertools import combinations, permutations
from math import factorial

import numpy as np
from scipy.optimize import linprog


[docs] def dominates(payoff, s, t, strongly=True): """ [Section 18.2] True if, for the player whose payoff matrix is 'payoff' (rows = that player's own strategies, columns = the opponents' joint strategies), strategy s dominates strategy t. Strong domination requires s to be strictly better than t against every opponent strategy; weak domination requires s to be no worse everywhere and strictly better in at least one column. """ payoff = np.asarray(payoff, dtype=float) if strongly: return bool(np.all(payoff[s] > payoff[t])) return bool(np.all(payoff[s] >= payoff[t]) and np.any(payoff[s] > payoff[t]))
[docs] def dominant_strategy(payoff, strongly=True): """ [Section 18.2] Return the strategy (row index) that dominates all of the player's other strategies, or None if the player has no such dominant strategy. To analyze the column player of a game, pass the transpose of their payoff matrix so that their strategies become the rows. """ payoff = np.asarray(payoff, dtype=float) for s in range(payoff.shape[0]): if all(dominates(payoff, s, t, strongly) for t in range(payoff.shape[0]) if t != s): return s return None
[docs] def iterated_dominance(payoff1, payoff2, strongly=True): """ [Section 18.2] Iterated elimination of dominated strategies: since a rational player never plays a dominated strategy, we can repeatedly delete, for either player, any strategy that is dominated among the strategies still in play, until none remains. 'payoff1'/'payoff2' are the row and column player's payoff matrices. Returns the surviving (rows, cols) strategy indices of the original game. """ A, B = np.asarray(payoff1, dtype=float), np.asarray(payoff2, dtype=float) rows, cols = list(range(A.shape[0])), list(range(A.shape[1])) def find_dominated(payoff, own, opponent): """A strategy in 'own' that is dominated against the surviving 'opponent' strategies.""" for s in own: for t in own: if s != t: better, worse = payoff[t][opponent], payoff[s][opponent] if (np.all(better > worse) if strongly else np.all(better >= worse) and np.any(better > worse)): return s return None eliminated = True while eliminated: # the row player's payoff is A[row][col]; the column player's is B.T[col][row] row = find_dominated(A, rows, cols) if row is not None: rows.remove(row) col = find_dominated(B.T, cols, rows) if col is not None: cols.remove(col) eliminated = row is not None or col is not None return rows, cols
[docs] def pure_nash_equilibria(payoff1, payoff2): """ [Section 18.2] All pure-strategy Nash equilibria of a two-player game, given the payoff matrices payoff1[i][j] and payoff2[i][j] for the row and column player when the row player plays i and the column player plays j. A profile (i, j) is a Nash equilibrium when each player is playing a best response to the other, so neither can gain by deviating unilaterally. Returns a list of (i, j) profiles (possibly empty, e.g. for matching pennies). """ payoff1, payoff2 = np.asarray(payoff1, dtype=float), np.asarray(payoff2, dtype=float) m, n = payoff1.shape return [(i, j) for i in range(m) for j in range(n) # row player best-responds within column j and column player within row i if payoff1[i, j] >= payoff1[:, j].max() and payoff2[i, j] >= payoff2[i, :].max()]
[docs] def solve_zero_sum_game(payoff): """ [Section 18.2] Solve a two-player zero-sum game by linear programming. 'payoff' is the payoff to the row (maximizing) player; the column (minimizing) player receives its negation. By von Neumann's minimax theorem the game has a value v and optimal mixed strategies x, y such that x maximizes the row player's guaranteed payoff (for every column j, sum_i x_i payoff[i][j] >= v) and y minimizes the column player's guaranteed loss. Returns (value, row_strategy, col_strategy). """ payoff = np.asarray(payoff, dtype=float) m, n = payoff.shape # row player: maximize v s.t. for every column j, sum_i x_i payoff[i][j] >= v; # variables are [x_1, ..., x_m, v], and linprog minimizes, so we minimize -v res = linprog(c=np.append(np.zeros(m), -1), A_ub=np.column_stack([-payoff.T, np.ones(n)]), b_ub=np.zeros(n), A_eq=np.append(np.ones(m), 0).reshape(1, -1), b_eq=[1], bounds=[(0, None)] * m + [(None, None)]) value, row_strategy = res.x[m], res.x[:m] # column player: minimize w s.t. for every row i, sum_j payoff[i][j] y_j <= w res = linprog(c=np.append(np.zeros(n), 1), A_ub=np.column_stack([payoff, -np.ones(m)]), b_ub=np.zeros(m), A_eq=np.append(np.ones(n), 0).reshape(1, -1), b_eq=[1], bounds=[(0, None)] * n + [(None, None)]) col_strategy = res.x[:n] return value, row_strategy, col_strategy
# ______________________________________________________________________________ # 18.3 Cooperative Game Theory
[docs] def shapley_value(players, characteristic_function): """ [Section 18.3] The Shapley value of a cooperative game (players, v): a fair division of the grand coalition's value v(N) that pays each player the average, over all n! orderings of the players, of the marginal contribution mc_i(C) = v(C and {i}) - v(C) they make to the players preceding them. The 'characteristic_function' is a callable mapping a frozenset of players to its value. Returns a dict mapping each player to their Shapley value. """ players = list(players) phi = {i: 0.0 for i in players} for order in permutations(players): preceding = set() for i in order: phi[i] += (characteristic_function(frozenset(preceding | {i})) - characteristic_function(frozenset(preceding))) preceding.add(i) return {i: phi[i] / factorial(len(players)) for i in players}
[docs] def is_in_core(players, characteristic_function, payoff): """ [Section 18.3] True if 'payoff' (a dict mapping each player to their share) lies in the core of the cooperative game (players, v): it must distribute exactly the grand coalition's value (sum of shares = v(N)) and be immune to defection, i.e. for every coalition C the players in C receive at least v(C) (otherwise C would be better off on its own). An empty core means the grand coalition cannot form. """ players, v = list(players), characteristic_function # efficiency: the grand coalition's value is fully distributed if not np.isclose(sum(payoff[i] for i in players), v(frozenset(players))): return False # no coalition can object: x(C) >= v(C) for every coalition C return all(sum(payoff[i] for i in coalition) >= v(frozenset(coalition)) - 1e-9 for size in range(1, len(players)) for coalition in combinations(players, size))
# ______________________________________________________________________________ # 18.4 Making Collective Decisions
[docs] def plurality_winner(preferences): """ [Section 18.4] Winner under plurality voting: the candidate ranked first by the most voters. 'preferences' is a list of ballots, each a list of candidates ordered from most to least preferred. """ first_choices = Counter(ballot[0] for ballot in preferences) return max(first_choices, key=first_choices.get)
[docs] def borda_winner(preferences): """ [Section 18.4] Winner under the Borda count: with k candidates each voter awards k points to their top choice, k-1 to the next, down to 1 for the last; the candidate with the highest total score wins. """ scores = defaultdict(int) for ballot in preferences: for rank, candidate in enumerate(ballot): scores[candidate] += len(ballot) - rank return max(scores, key=scores.get)
[docs] def condorcet_winner(preferences): """ [Section 18.4] The Condorcet winner: the candidate that beats every other candidate in a pairwise majority comparison. Returns None when no such candidate exists (Condorcet's paradox), in which case majority preference is cyclic. """ candidates = list(preferences[0]) def beats(a, b): # a majority of voters rank a above b votes = sum(ballot.index(a) < ballot.index(b) for ballot in preferences) return votes > len(preferences) / 2 return next((a for a in candidates if all(a == b or beats(a, b) for b in candidates)), None)
[docs] def vickrey_auction(bids): """ [Section 18.4] Sealed-bid second-price (Vickrey) auction: the highest bidder wins but pays only the second-highest bid. 'bids' maps each bidder to their bid. Returns the (winner, price) pair. Because the winner does not pay their own bid, bidding one's true value is a dominant strategy (the mechanism is truth-revealing). """ ranked = sorted(bids.values(), reverse=True) winner = max(bids, key=bids.get) price = ranked[1] if len(ranked) > 1 else ranked[0] return winner, price
[docs] def contract_net(tasks, agents, bid, select=min): """ [Section 18.4.1] The contract net protocol for task allocation. For each task the manager broadcasts a task announcement; every agent submits a bid through the callable bid(agent, task), which returns a numeric bid or None when the agent cannot or will not perform the task. The manager then awards the task to the agent with the best bid ('select' = min for costs, max for values). Returns a dict mapping each task to an (agent, bid) award, or to None if nobody bid. """ allocation = {} for task in tasks: bids = {agent: bid(agent, task) for agent in agents} bids = {agent: b for agent, b in bids.items() if b is not None} allocation[task] = (select(bids, key=bids.get), select(bids.values())) if bids else None return allocation
[docs] def alternating_offers_bargaining(discount_a, discount_b): """ [Section 18.4.4] Rubinstein's alternating-offers bargaining over how to split a pie of size 1 between two impatient agents with discount factors discount_a and discount_b in [0, 1). In the unique subgame-perfect equilibrium the agent who makes the first offer (A) keeps a share (1 - discount_b) / (1 - discount_a * discount_b) and B receives the rest; the more patient an agent is (larger discount factor) the larger the share it secures. Returns the (share_a, share_b) pair. """ share_a = (1 - discount_b) / (1 - discount_a * discount_b) return share_a, 1 - share_a