围棋吧 关注:339,326贴子:10,286,650
  • 2回复贴,共1

katago 模仿人类参数5k

只看楼主收藏回复

# This is an example config for configuring KataGo to attempt to imitate a weaker human player.
# Running with this config requires giving a human SL model b18c384nbt-humanv0.bin.gz
# on the command line such as:
# ./katago gtp -config gtp_human5k_example.cfg -model your_favorite_normal_model_for_katago.bin.gz -human-model b18c384nbt-humanv0.bin.gz
# You can obtain the human model at https://github.com/lightvector/KataGo/releases/tag/v1.15.0
# Below, the most important parts of the config for human-like play are commented.
# See the original gtp_example for comments on other parameters.
# For another useful guide on human-style analysis, see here:
# https://github.com/lightvector/KataGo/blob/master/docs/Analysis_Engine.md#human-sl-analysis-guide
# It is ALSO possible to pass in simply '-model b18c384nbt-humanv0.bin.gz' and NOT
# pass in -human-model, i.e. use the human model as if it were KataGo's normal neural net.
# If you do that, you need to use a config more like the normal gtp_example.cfg, not this config!
# Keep in mind that if using the model normally, or if using -human-model but also altering
# parameters below to blend in some of KataGo's search, KataGo's play might NOT be very human-like,
# or might be human-like but play at a strength very different than the humanSLProfile.
# You can experiment, some of the comments below hopefully will help illustrate things too.
logDir = gtp_logs
logAllGTPCommunication = true
logSearchInfo = true
logSearchInfoForChosenMove = false
logToStderr = false
# Use these rules by default, but a GUI or GTP controller might override this.
rules = chinese
# When using -human-model, we only resign when far behind since a weaker player
# might continue to fight much longer than a strong bot normally would.
allowResignation = true
resignThreshold = -0.99
resignConsecTurns = 20
resignMinScoreDifference = 40
resignMinMovesPerBoardArea = 0.4
# Note: unless you change other parameters too, by default increasing visits won't do much.
# If humanSLChosenMoveProp = 1.0 AND humanSLChosenMovePiklLambda is a large number,
# then KataGo's normal search is ignored except for possibly choosing whether to pass/resign,
# so more visits will have no effect on play. Only the raw model is used for move selection.
# Still, having some visits is good for ensuring good pass/resign judgment, this is what the 40 visits
# are used for unless you change other parameters to make use of them for move choice too.
maxVisits = 40
numSearchThreads = 1
lagBuffer = 1.0
# Rough scale in seconds to randomly delay moving, so as not to respond instantly.
# Some moves will delay longer, some moves will delay a little less.
delayMoveScale = 2.0
delayMoveMax = 10.0
# ===========================================================================
# HUMAN SL PARAMETERS
# ===========================================================================
# The most important parameter for human-like play configuration!
# Choose the "profile" of players that the human SL model will imitate.
# Available options are:
# preaz_{RANK from 20k to 9d} - imitate player of given rank, before AlphaZero opening style became popular
# rank_{RANK from 20k to 9d} - imitate player of given rank, after human openings changed due to AlphaZero.
# preaz_{BRANK}_{WRANK} or rank_{BRANK}_{WRANK} - same, but imitate how black with the rank BR and white
# with the rank WR would play against each other, knowing that the other player is stronger/weaker than them.
# Warning: for rank differences > 9 ranks, or drastically mis-matched to the handicap used in the game,
# this may be out of distribution due to lack of training data and the model might not behave well! Experiment with care.
# proyear_{YEAR from 1800 to 2023} - imitate historical pros or insei from given year.
# NOTE: for pros or high-dan ranks the raw human SL model is NOT expected to match the *strength* of the players imitated
# because it does no search! Go AI reaching high-dan/pro ranks virtually always requires search.
# To increase strength and make use of the visits in the search, adjust values like humanSLChosenMovePiklLambda and temperature. See:
# https://github.com/lightvector/KataGo/blob/master/docs/Analysis_Engine.md#how-to-get-stronger-human-style-play
humanSLProfile = preaz_5k
# The probability that we should play a HUMAN-like move, rather than playing KataGo's move.
# Applies BEFORE temperature.
humanSLChosenMoveProp = 1.0
# If true, ignore the human SL model's choice of when to pass, and still use KataGo to determine that.
# The human SL model, in theory, is not guaranteed to be reliable at when to pass for all profiles,
# since e.g. some historical games omit passes.
humanSLChosenMoveIgnorePass = true
# By default humanSLChosenMovePiklLambda is a large number which effectively disables it.
# Setting it to a smaller number will "suppress" human-like moves that KataGo disapproves of.
# In particular, if set to, for example, 0.4 when KataGo judges a human SL move to lose 0.4 utility,
# it will substantially suppress the chance of playing that move (in particular, by a factor of exp(1)).
# Less-bad moves will also be suppressed, but not by as much, e.g. a move losing 0.2 would get lowered
# by a factor of exp(0.5).
# As configured lower down, utilities by default range from -1.0 (loss) to +1.0 (win), plus up to +/- 0.3 for score.
# WARNING: ONLY moves that KataGo actually searches will get suppressed! If a move is so bad that KataGo
# rejects it without searching it, it will NOT get suppressed.
# Therefore, to use humanSLChosenMovePiklLambda, it is STRONGLY recommended that you also use something
# like humanSLRootExploreProbWeightless to ensure most human moves including bad moves get searched,
# and ALSO use at least hundreds and ideally thousands of maxVisits, to ensure enough visits.
humanSLChosenMovePiklLambda = 100000000
# These parameters tell KataGo to use the human SL policy for exploration during search.
# Each of these specifies the probability that KataGo will perform PUCT using the Human SL policy to
# explore different moves, rather than using KataGo's normal policy, after a certain minimal number of visits.
# "Root": applies only at the root of the search
# "Pla": applies during non-root nodes of the search where it is katago's turn.
# "Opp": applies during non-root nodes of the search where it is the opponent's turn.
# "Weightless": search the move to evaluate it, but do NOT allow this visit to affect the parent's average utility.
# "Weightful": search the move to evaluate it, and DO allow this visit to affect the parent's average utility.
# For example, humanSLRootExploreProbWeightless = 0.5 would tell KataGo at the root of the search to spend
# 50% of its visits to judge different possible human moves, but NOT to use those visits for determining the
# value of the position (avoiding biasing the utility if some human SL moves are very bad).
# If you don't understand these well, ask for help or look up some online explainers for MCTS (Monte-Carlo Tree Search).
humanSLRootExploreProbWeightless = 0.0
humanSLRootExploreProbWeightful = 0.0
humanSLPlaExploreProbWeightless = 0.0
humanSLPlaExploreProbWeightful = 0.0
humanSLOppExploreProbWeightless = 0.0
humanSLOppExploreProbWeightful = 0.0
# When using the human SL policy for exploration during search, use this cPUCT.
# This only has an effect if at least one of humanSL{Root,Pla,Opp}ExploreProbWeight{less,ful} is nonzero.
humanSLCpuctExploration = 0.50
# Same as humanSLCpuctExploration, but NEVER diminshes its exploration no matter how many visits are used.
# Normally, PUCT will sharpen with visits and spend a diminishing proportion of visits on moves with lower utility.
# This is the coefficient for a term that does NOT diminish, i.e. if this is 0.2, then roughly moves within
# 0.2 utility (about 10% winrate) of the best move will forever continue getting a decent fraction of visits,
# smoothly falling off for greater utility differences.
# Note that in combination with Weightful exploration above, if used for Opp exploration, this could be used
# to model an opponent that will always have some chance to make small mistakes no matter how deep they search.
# If further this was increased to a very large value, it would model an opponent that always played according
# to the human SL raw policy. These might be interesting to experiment with for handicap play.
humanSLCpuctPermanent = 0.2
# ===========================================================================
# OTHER USEFUL PARAMETERS FOR HUMAN PLAY ADJUSTMENT
# ===========================================================================
# Choosing temperature near 1, and restricting it to only affect moves already below 1% chance,
# so that we sample close to the full range of human play.
# You can also reduce the temperature to settings more like the plain gtp_example.cfg.
# Then, rather than imitating a realistic human player, it will be more like imitating the
# *majority vote* of players at that rank. For example it would avoid a lot of blunders
# that players of that level would make, because even if players often blunder, the *majority vote*
# of players would be much less likely to select any given blunder that an individual player would.
chosenMoveTemperatureEarly = 0.85
chosenMoveTemperature = 0.70
chosenMoveTemperatureHalflife = 80
chosenMoveTemperatureOnlyBelowProb = 0.01 # temperature only starts to dampen moves below this
chosenMoveSubtract = 0
chosenMovePrune = 0
# Use a small NN cache to save memory since we're using very low visits anyways. You can increase
# these back to more like the plain gtp_example.cfg if you are doing more extensive searches to
# improve performance.
nnCacheSizePowerOfTwo = 17
nnMutexPoolSizePowerOfTwo = 14
# ===========================================================================
# PARAMETERS CHANGED FROM DEFAULT TO MAKE SURE HUMAN SL USAGE WORKS WELL
# ===========================================================================
# Make sure to take into account the recent moves in the game, don't ignore history.
# This will produce the best imitation/prediction, since humans definitely do play differently based on where the
# most recent moves in the game were, rather than coming fresh to the board position on every turn.
ignorePreRootHistory = false
analysisIgnorePreRootHistory = false
# Average 2 neural net samples at the root - ensures a bit smoother probabilities and results in
# 8 * 7 / 2 = 28 possible policies instead of 8 possibilities.
rootNumSymmetriesToSample = 2
# LCB improves strength for KataGo, but we disable it so it doesn't mess up move selection when blending human play.
useLcbForSelection = false
# We disable dynamicScoreUtilityFactor - the human SL model can make score predictions that are a bit swingy, so
# if we do want to do a search that blends human SL values in (TODO there isn't a way to do this anyways yet), using
# static score utility might be a bit more stable.
winLossUtilityFactor = 1.0
staticScoreUtilityFactor = 0.30
dynamicScoreUtilityFactor = 0.00
# Uncertainty improves strength for KataGo normally, but messes with the weights of playouts in complicated ways,
# so lets turn it off when doing human SL stuff.
useUncertainty = false
# Subtree value bias improves strength for KataGo normally, but messes with the values of nodes in complicated ways,
# so let's turn it off when doing human SL stuff.
subtreeValueBiasFactor = 0.0
# Noise pruning prunes out weight from moves that KataGo thinks are bad, but if we are doing human SL we might actively
# want to be playing or exploring and weighting "bad" but human-like moves. So disable this.
# Warning: when this is false, there is much less protection against the search severely misbehaving when you use too many threads.
# Make sure not to set numSearchThreads to be too large - at a minimum, keep at least a 20x buffer between
# the number of visits you use and the number of threads you use.
# (as an aside, ideally, you want to have visits be a sufficient factor larger than threads EVEN when
# useNoisePruning is true, this parameter just blunts the worst effects but doesn't entirely fix the badness).
useNoisePruning = false


IP属地:辽宁1楼2024-09-21 15:31回复
    katago 模仿人类参数9d
    # This is an example config for configuring KataGo to attempt to imitate a 9d player and then
    # further improving the strength (to possibly modestly superhuman) using KataGo's evaluations and search.
    # By contrast, gtp_human5k_example.cfg uses only the raw neural net. Raw neural nets like the
    # HumanSL net without search do NOT play accurately enough to reach top human levels.
    # If you were to take gtp_human5k_example.cfg and swap out preaz_5k -> preaz_9d,
    # despite *attempting* to imitate a 9d player, it would not actually achieve that strength.
    # Due to the use of search, this config plays significantly stronger, and depending on how
    # you adjust visits and parameters and what `-model` you use for KataGo's model, can probably
    # reach 9d strength or higher.
    # Using the search greatly reduces the "humanness" of the play in exchange for strength,
    # but still retains noticeable bias from the human SL policy and may still play somewhat
    # "human-style" openings and moves.
    # Running with this config requires giving a human SL model b18c384nbt-humanv0.bin.gz
    # on the command line such as:
    # ./katago gtp -config gtp_human5k_example.cfg -model your_favorite_normal_model_for_katago.bin.gz -human-model b18c384nbt-humanv0.bin.gz
    # You can obtain the human model at https://github.com/lightvector/KataGo/releases/tag/v1.15.0
    # You can compare the config parameters here with gtp_human5k_example.cfg and see the differences
    # and how they were achieved.
    # gtp_human5k_example.cfg also has a bit more explanation on many of the parameters.
    logDir = gtp_logs
    logAllGTPCommunication = true
    logSearchInfo = true
    logSearchInfoForChosenMove = false
    logToStderr = false
    rules = chinese
    # Adjusted resignation settings compared to gtp_human5k_example.cfg, to resign a bit more easily.
    allowResignation = true
    resignThreshold = -0.98 # was -0.99 in gtp_human5k_example.cfg
    resignConsecTurns = 10 # was 20 in gtp_human5k_example.cfg
    resignMinScoreDifference = 20 # was 40 in gtp_human5k_example.cfg
    resignMinMovesPerBoardArea = 0.40
    # maxVisits: With a config like this, more visits *will* make it stronger and fewer visits will make it weaker.
    # But strength will NOT scale indefinitely upward with visits unlike KataGo's normal config without human SL.
    # This is due to the strong bias of the human SL network.
    # You can reduce this number if you are on weaker hardware. It may reduce strength a bit but will still
    # provide a huge strength boost over using the humanSL network alone as in gtp_human5k_example.cfg
    # It's NOT recommended to reduce this below about 30-40 visits, since that will result in too few explorations of a variety of moves.
    # If you want to adjust strength, see humanSLChosenMovePiklLambda instead.
    maxVisits = 400 # 40 in gtp_human5k_example.cfg.
    # Having more than one thread speeds up search when visits are larger.
    # Make sure the number of threads is much smaller than the number of visits, however.
    numSearchThreads = 8 # 1 in gtp_human5k_example.cfg.
    lagBuffer = 1.0
    # Rough scale in seconds to randomly delay moving, so as not to respond instantly.
    # Some moves will delay longer, some moves will delay a little less.
    delayMoveScale = 2.0
    delayMoveMax = 10.0
    # Imitate human amateur 9d players (roughly based on ~KGS ranks)
    humanSLProfile = preaz_9d # was preaz_5k in gtp_human5k_example.cfg
    humanSLChosenMoveProp = 1.0
    humanSLChosenMoveIgnorePass = true
    # When a move starts to lose more than 0.08 utility (several percent winrate), downweight it.
    # Increase this number to reduce the strength and use the human SL policy more and KataGo's evaluations less.
    # Decrease this number a little more to improve strength even further and play less human-like.
    # (although below 0.02 you probably are better off going back to a normal KataGo config and scaling visits).
    # Since this uses KataGo's judgment, even at large values and with a weak-ranked humanSLProfile
    # this may still produce a very strong player.
    # To calibrate for some but less strength gain, it will take experimentation.
    # In addition to increasing this value a lot, e.g. (0.2, 0.5, 1.0, 2.0,...)
    # you can also try using old/small KataGo nets (e.g. b6c96, b10c128), reducing visits (though reducing below about 30-40
    # is NOT recommended), or using the humanSLModel itself as the main "-model".
    humanSLChosenMovePiklLambda = 0.08 # was 100000000 in gtp_human5k_example.cfg.
    # Spend 80% of visits to explore humanSL moves to ensure they get evaluations to use with humanSLChosenMovePiklLambda
    humanSLRootExploreProbWeightless = 0.8 # was 0 in gtp_human5k_example.cfg.
    humanSLRootExploreProbWeightful = 0.0
    humanSLPlaExploreProbWeightless = 0.0
    humanSLPlaExploreProbWeightful = 0.0
    humanSLOppExploreProbWeightless = 0.0
    humanSLOppExploreProbWeightful = 0.0
    humanSLCpuctExploration = 0.50
    # Explore moves very widely according to the human SL policy even if they lose up to about 2.0 utility.
    # This is not the largest this value can go, you increase it further to do even less filtering based on winrates
    # as far as move exploration goes.
    humanSLCpuctPermanent = 2.0 # was 0.2 in gtp_human5k_example.cfg.
    # Reduced temperature settings - reduce the chance of picking unlikely low policy or low-value moves.
    chosenMoveTemperatureEarly = 0.70 # was 0.85 in gtp_human5k_example.cfg.
    chosenMoveTemperature = 0.25 # was 0.70 in gtp_human5k_example.cfg.
    chosenMoveTemperatureHalflife = 30 # was 80 in gtp_human5k_example.cfg.
    chosenMoveTemperatureOnlyBelowProb = 1.0 # was 0.01 in gtp_human5k_example.cfg.
    chosenMoveSubtract = 0
    chosenMovePrune = 0
    # Since we're doing more search, increase size of neural net cache a bit for performance.
    nnCacheSizePowerOfTwo = 20 # was 17 in gtp_human5k_example.cfg.
    nnMutexPoolSizePowerOfTwo = 14
    ignorePreRootHistory = false
    analysisIgnorePreRootHistory = false
    rootNumSymmetriesToSample = 2
    useLcbForSelection = false
    # Make score utility count a bit more
    winLossUtilityFactor = 1.0
    staticScoreUtilityFactor = 0.5 # was 0.3 in gtp_human5k_example.cfg.
    dynamicScoreUtilityFactor = 0.5 # was 0.0 in gtp_human5k_example.cfg.
    useUncertainty = false
    subtreeValueBiasFactor = 0.0
    useNoisePruning = false


    IP属地:辽宁2楼2024-09-21 15:33
    回复
      哪些是关键参数例如调成2d


      IP属地:辽宁3楼2024-09-21 15:35
      回复