70 lines
2.7 KiB
INI
70 lines
2.7 KiB
INI
[model]
|
|
# Path to folder where the model files will be stored.
|
|
# If path is relative, then the model.py must be called from the same directory.
|
|
data_folder = models
|
|
|
|
# Size of the GPT-2 model. Could be one of 'small' (117M) or 'medium' (345M)
|
|
# Select small for CPU or experimentation, and medium for GPU
|
|
model_size = medium
|
|
|
|
# Dataset name the model was trained on. One of 'multiref' (147M multi-turn dialogue
|
|
# from Reddit discussion thread) or 'dstc' (DSTC-7 grounded dialogue generation challenge).
|
|
dataset = multiref
|
|
|
|
# True: load model trained from scratch or False: load model trained from fine-tuning the GPT-2.
|
|
from_scratch = False
|
|
|
|
# Avoid using CUDA when available.
|
|
no_cuda = False
|
|
|
|
# Further increases quality by selecting the response that yields lowest backward model loss.
|
|
# Keep in mind: Uses inference on another medium model and further decreases bot's response time.
|
|
# You should set num_samples > 1 for this to work
|
|
use_mmi = False
|
|
|
|
[decoder]
|
|
# Seed for random number generators, fix seed to reproduce results.
|
|
# By default there is no seed and each turn should be unique.
|
|
seed
|
|
|
|
# Float value controlling randomness in boltzmann
|
|
# distribution. Lower temperature results in less random completions. As the
|
|
# temperature approaches zero, the model will become deterministic and
|
|
# repetitive. Higher temperature results in more random completions.
|
|
temperature = 0.6474
|
|
|
|
# Integer value controlling diversity. 1 means only 1 word is
|
|
# considered for each step (token), resulting in deterministic completions,
|
|
# while 40 means 40 words are considered at each step. 0 (default) is a
|
|
# special setting meaning no restrictions. 40 generally is a good value.
|
|
top_k = 40
|
|
|
|
# Like top_k, top_p is a constraint on the craziness of the output
|
|
top_p = 0
|
|
|
|
# The maximal number of tokens to be returned, inclusive of punctuations etc.
|
|
# It will automatically stop if the end-of-sequence token was found earlier.
|
|
# Usually, only in rare cases generation will go beyond 64 tokens.
|
|
max_length = 128
|
|
|
|
# Number of samples to generate.
|
|
# You will have to implement a strategy to choose one message from generated list.
|
|
# For example, you can choose the most dissimilar message, or the lengthiest one.
|
|
# But keep in mind: the higher, the slower the generation.
|
|
num_samples = 1
|
|
|
|
# The number of turns (turn = answer and response) the model should consider.
|
|
# Set to 0 to focus on the last message. Set to -1 for unlimited context length.
|
|
max_turns_history = 1
|
|
|
|
[chatbot]
|
|
|
|
# Your Telegram token. See https://core.telegram.org/bots -- Functionality is disabled
|
|
telegram_token = YOUR_TOKEN_HERE
|
|
|
|
# Your GIPHY API token. See
|
|
giphy_token = YOUR_TOKEN_HERE
|
|
|
|
# Value from 0-10 which makes results weirder as you go up the scale.
|
|
giphy_weirdness = 5
|