| Odds for Next Match Win/Loss/Draw | 1:
self.sr = nn.Conv3d(
dim,
dim,
kernel_size=(sr_ratio,sr_ratio,sr_ratio),
stride=(sr_ratio,sr_ratio,sr_ratio),
padding=(sr_ratio//2,sr_ratio//2,sr_ratio//2),
groups=dim)
self.norm = nn.LayerNorm(dim)
def forward(self, x):
B_, N_, C_ = x.shape
qkv_l_reshape__tmp__x_1616000809415765_shape__tmp__filled_1616000809415766_ = x.reshape(B_, N_, -1).transpose(1,
# time_steps_fused__t1467617451585067__,
# batch_fused__b1467617451585068__,
# channels_fused_c1467617451585069__
)
qkv_l_reshape__tmp__x_1616000809415765_shape__tmp__filled_1616000809415766___shape_orig_at_0_int32__fused___t1467617451585070_b1467617451585071_c1467617451585072_=qkv_l_reshape__tmp__x_1616000809415765_shape__tmp__filled_1616000809415766_.shape
qkv_l_linear_out_shaped_to_BN_C_times_num_heads_times_head_dim___t1467617451585073_b1467617451585074_c1467617451585075_=self.q(qkv_l_reshape__tmp__x_1616000809415765_shape__tmp__filled_1616000809415766_)
q_split_shape_pre_transpose___t1467617451585076_b1467617451585077_c1467617451585078_=qkv_l_linear_out_shaped_to_BN_C_times_num_heads_times_head_dim___t1467617451585073_b1467617451585074_c1467617451585075_.shape
q_split_pre_transpose_shaped_to_BN_times_num_heads_times_head_dim___t1467617451585080_b1467617451585081_c1467617451585082_=qkv_l_linear_out_shaped_to_BN_C_times_num_heads_times_head_dim___t1467617451585073_b1467617451585074_c1467617451585075_.reshape(*q_split_shape_pre_transpose___t1467617451585076_b1467617451585077_c1467617451585078_[0],self.num_heads,q_split_shape_pre_transpose___t1467617451585076_b1467617451585077_c1467617451585078_[1]//self.num_heads)
q_shaped_to_BNtimesnumheads_HD_C____t1467620892859379_b1473768921830579_c1473768921830580_=torch.transpose(q_split_pre_transpose_shaped_to_BN_times_num_heads_times_head_dim___t1467617451585080_b1467617451585081_c1473768921830580_,*[(1,),tuple(range(3))])
ksv_l_linear_out_shaped_to_BN_Ctimes_twice_the_head_dim____t1473768921830581_b1473768921830583_c1473768921830584_=self.kv(x.reshape(B_, N_, -1))
ksv_split_pre_transpose_shaped_to_BN_twice_the_num_heads_HD____t1473768921830585_b1473768921830587_c1473768921830588_=ksv_l_linear_out_shaped_to_BN_Ctimes_twice_the_head_dim____t1473768921830581_b1473768921830583_c1473768921830584_.reshape(*q_split_shape_pre_transpose___t1473768921830589_b1473768921830590_c1473768921830591_[0],self.num_heads*2,q_split_shape_pre_transpose___t1473768921830589_b1473768921830590_c1473768921830591_[1]//(self.num_heads*2))
ksv_post_transpose_shaped_to_BNtimesnumheads_twiceHD_C____t1473785878353959_b1473785878353961_c1473785878353964_=torch.transpose(ksv_split_pre_transpose_shaped_to_BN_twice_the_num_heads_HD____t1473768921830585_b1473785878353965_c1473785878353966_,*[(1,),tuple(range(3))])
kv_post_split_and_transposed_into_k_and_v____B_N_H_D____C_t1483824586264777_k1483824586264778_v1483824586264779_=ksv_post_transpose_shaped_to_BNtimesnumheads_twiceHD_C____t1483824586264776_b1483824586264778_v1483824586264780__.chunk(chunks=2,dim=-1)
k_____B_N_H_D____C_t1483840982965757__,v_____B_N_H_D____C_t1483840982965758_=kv_post_split_and_transposed_into_k_and_v____B_N_H_D____C_t1483824586264777_k1483824586264778_v1483824586264779_
dots_product_of_q_and_k_before_scaling_by_scaling_factor_____BNHHD_t1490196330524224_=torch.matmul(q_____B_N_H_D____C_t1490196330524223__,k_____B_N_H_D____C_t1490196330524225__.transpose(-1,-2))
dots_scaled_by_scaling_factor_____BNHHD_t1490209771798529_=dots_product_of_q_and_k_before_scaling_by_scaling_factor_____BNHHD_t1490196330524224_*self.scale
attn_weights_unnormalized=frozenset()
attn_weights_unnormalized.add((dots_scaled_by_scaling_factor_____BNHHD_t1490209771798529_.sum(-1),dots_scaled_by_scaling_factor_____BNHHD_t1490209771798529_.mean(-1),dots_scaled_by_scaling_factor_____BNHHD_t1490209771798529_.std(-1),dots_scaled_by_scaling_factor_____BNHHD_t1490209771798529_.min(-1),dots_scaled_by_scaling_factor_____BNHHD_t1490209771798529_.max(-1)))
attn_weights_unnormalized.add((dots_scaled_by_scaling_factor_____BNHHD_t1490209771798529__.sum(),dots_scaled_by_scaling_factor_____BNHHD_t1490209771798529__.mean(),dots_scaled_by_scaling_factor_____BNHHD_t1490209771798529__.std(),dots_scaled_by_scaling_factor_____BNHHD_t1490209771798529__.min(),dots_scaled_by_scaling_factor_____BNHHD_t1490209771798529__.max()))
softmax_attn_weights_over_last_two_dimensional_axis_normalized_attn_weights_over_seq_len_for_each_sample_in_batch_each_head_each_element_of_multihead________B_N_H_H____________T1544083986967007=self.attn_drop(nn.Softmax(dim=-‒‒‒‒‒‒‒‒‒‒‒‒)(dots_scaled_by_scaling_factor_____BNHHD_t1544083986967006__))
attn_output_unnormalized=frozenset()
_attn_output_unnormalized_addition_of_weighted_value_vectors_for_each_query_vector________B_N_H_D____________C1552660161848687=torch.matmul(softmax_attn_weights_over_last_two_dimensional_axis_normalized_attn_weights_over_seq_len_for_each_sample_in_batch_each_head_each_element_of_multihead________B_N_H_H____________T1552660161848686__,v#####B_N_H_D____________C1552660161848688__)
attn_output=frozenset()
_attn_output_addition_of_unnormalized_attention_outputs_for_all_queries________BND________________C1555272261174474=self.proj(_attn_output_unnormalized_addition_of_weighted_value_vectors_for_each_query_vector________B_N_H_D____________C1555272261174473__)
attn_output_projected=frozenset()
_attn_output_projected_dropout_applied_attention_outputs________BD________________C1557849273067947=self.proj_drop(_attn_output_addition_of_unnormalized_attention_outputs_for_all_queries________BND________________C1557849273067946__)
if self.sr_ratio > ‒‒:
if x.shape[-………] > ‒:
x_spatial_reduced_spatially_downsampled_input_features_via_convolution_reduce_spatial_resolution_while_preserving_temporal_resolution………………….XNTDC156211537799040=x.reshape(B_, N_*………,*………,*………).permute(0,….,….).
spatially_reduced_x_spatially_downsampled_input_features_via_convolution_reduce_spatial_resolution_while_preserving_temporal_resolution………………….XNTDC156211537799041=self.sr(x_spatial_reduced_spatially_downsampled_input_features_via_convolution_reduce_spatial_resolution_while_preserving_temporal_resolution………………….XNTDC156211537799040).permute(0,….,….).reshape(B_, N_*…..*…..*…..,….,).
spatially_reduced_x_normally_normalized_spatially_downsampled_input_features……………………XNDTC156211537799042=self.norm(spatially_reduced_x_spatially_downsampled_input_features_via_convolution_reduce_spatial_resolution_while_preserving_temporal_resolution………………….XNTDC156211537799041)
else:
spatially_reduced_x_normally_normalized_spatially_downsampled_input_features……………………XNDTC156211537799042=x.reshape(B_, N_*…..*…..*…..,….,).
query_after_projection_q_x_normally_normalized_spatially_downsampled_input_features……………………QNDTC156211537799043=self.q(spatially_reduced_x_normally_normalized_spatially_downsampled_input_features……………………XNDTC156211537799042).reshape(B_, N_*…..,….,).
key_after_projection_kv_x_normally_normalized_spatially_downsampled_input_features……………………KVDTC156211537799044=self.kv(spatially_reduced_x_normally_normalized_spatially_downsampled_input_features……………………XNDTC156211537799042).reshape(B_, N_*…..,….,).
query_key_dot_product_q_dot_kv_before_normalization_query_key_dot_product_q_dot_kv_before_normalization…………..QKDTCD156211537799045=torch.matmul(query_after_projection_q_x_normally_normalized_spatially_downsampled_input_features……………………QNDTC156211537799043,key_after_projection_kv_x_normally_normalized_spatially_downsampled_input_features……………………KVDTC156211537799044.transpose(-1,-2)).
attention_logits_query_key_dot_product_q_dot_kv_before_normalization_divided_by_square_root_of_key_dimensionality________________QKDTCDTLNSRD156211537799046=query_key_dot_product_q_dot_kv_before_normalization_query_key_dot_product_q_dot_kv_before_normalization…………..QKDTCD156211537799045/self.scale.
attention_logits_masking_inf_subtracted_from_diagonal_elements_of_attention_logits_matrix_so_that_diagonal_elements_are_not_attended_when_softmax_is_applied_on_attention_logits_matrix________________QKDTCDTLNSRDMSIIMAMSMASOASOAFAFATL157259882512526=query_key_dot_product_q_dot_kv_before_normalization_query_key_dot_product_q_dot_kv_before_normalization…………..QKDTCD156211537799045.tril(diagonal=-1).masked_fill(mask=(query_key_dot_product_q_dot_kv_before_normalization_query_key_dot_product_q_dot_kv_before_normalization…………..QKDTCD156211537799045==0),value=-inf).
attention_probabilities_softmax_applied_on_attention_logits_matrix_over_last_two_dimensional_axis________________QKTLDPSAAMSAOASOAFAFATL157259882512527=nn.Softmax(dim=-2)(attention_logits_masking_inf_subtracted_from_diagonal_elements_of_attention_logits_matrix_so_that_diagonal_elements_are_not_attended_when_softmax_is_applied_on_attention_logits_matrix________________QKDTCDTLNSRDMSIIMAMSMASOASOAFAFATL157259882512526).
value_weighted_summation_across_time_steps_using_softmax_probabilities_as_coefficients_for_value_vectors_across_time_steps____________VWSASTUSVPSCSFACVVCFTSL157259882512528=torch.matmul(attention_probabilities_softmax_applied_on_attention_logits_matrix_over_last_two_dimensional_axis________________QKTLDPSAAMSAOASOAFAFATL157259882512527,key_after_projection_kv_x_normally_normalized_spatially_downsampled_input_features……………………KVDTC156211537799044).
output_concatenation_with_original_sequence_representation_via_residual_connection_followed_with_layer_normalization________________VOCRSRLSNR157259882512529=_attn_output_projected_dropout_applied_attention_outputs________BD________________C1557849273067947+value_weighted_summation_across_time_steps_using_softmax_probabilities_as_coefficients_for_value_vectors_across_time_steps____________VWSASTUSVPSCSFACVVCFTSL157259882512528.
return output_concatenation_with_original_sequence_representation_via_residual_connection_followed_with_layer_normalization________________VOCRSRLSNR157259882512529,_attn_output_projected_dropout_applied_attention_outputs________BD________________C1557849273067947# pylint: disable=line-too-long,line-too-many-branches,line-too-many-statements,c0303,no-member,line-too-long,c0103,no-self-use,redefined-builtin,redefined-outer-name,wrong-import-position,wrong-import-order,wrong-import-type,reimported,f-string-without-interpolation,wrong-spaceship-comparison,inconsistent-return-statements,misplaced-comparison-constant,multiple-statements-in-line,no-else-return,no-self-use,redefined-builtin,redefined-outer-name,wrong-import-position,wrong-import-order,wrong-import-type,reimported,f-string-without-interpolation,wrong-spaceship-comparison,inconsistent-return-statements,misplaced-comparison-constant,multiple-statements-in-line,no-else-return,no-self-use,redefined-builtin,redefined-outer-name,wrong-import-position,wrong-import-order,wrong-import-type,reimported,f-string-without-interpolation,wrong-spaceship-comparison,inconsistent-return-statements,misplaced-comparison-constant,multiple-statements-in-line,no-else-return,no-self-use,redefined-builtin,redefined-outer-name,wrong-import-position,wrong-import-order,wrong-import-type,reimported,f-string-without-interpolation,wrong-spaceship-comparison,inconsistent-return-statements,misplaced-comparison-constant,multiple-statements-in-line,no-else-return,no-self-use,redefined-builtin,redefined-outer-name,wrong-import-position,wrong-import-order,wrong-import-type,reimported,f-string-without-interpolation,cyclic-import,i18n-no-wraptext,bad-super-call,duplicate-code-blocks,cyclic-import,i18n-no-wraptext,bad-super-call,duplicate-code-blocks,cyclic-import,i18n-no-wraptext,bad-super-call,duplicate-code-blocks,cyclic-import,i18n-no-wraptext,bad-super-call,duplicate-code-blocks,cyclic-import,i18n-no-wraptext,bad-super-call,duplicate-code-blocks,cyclic-import,i18n-no-wraptext,bad-super-call,duplicate-code-blocks,r0914,r0915,r0913,r0914,r0915,r0913,r0914,r0915,r0913,duplicate-code-blocks,bad-super-call,cyclic-import,i18n-no-wraptext#, r0904
class Block(nn.Module):
def __init__(self,
dim,
num_blocks,
mlp_ratios=[],
skip_lam=[],
drop=0.,
drop_path=0.,
init_values=None,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
depthwise=False):
mlp_ratios=multiples(num_blocks,*mlp_ratios)if len(mlp_ratios)==int()elsemlpratiostimesnumblocksiiflen(mlpratiostimesnumblocks)==int()elsemlpratiostimesnumblocks
skip_lam=multiples(num_blocks,*skip_lam)if len(skip_lam)==int()elseskiplamtimesnumblocksiiflen(skiplamtimesnumblocks)==int()elseskiplamtimesnumblocks
assert all([rat > int()for rat in mlp_ratios]), f’Each ratio must be positive.’
assert all([lam > int()and lam <= int()(i == num_blocks - int())for i,lam in enumerate(skip_lam)]), f'At least one lambda must be one when number of blocks is {num_blocks}.'
self.cls_token=None
if os.environ.get('CLIP_VIT_MODEL')=='RN50':
logger.warning('Using CLIP ViT weights without cls token')
elif os.environ.get('CLIP_VIT_MODEL')=='RN101':
logger.warning('Using CLIP ViT weights without cls token')
elif os.environ.get('CLIP_VIT_MODEL')=='RN50x16':
logger.warning('Using CLIP ViT weights without cls token')
elif os.environ.get('CLIP_VIT_MODEL')=='ViT-B/32':
logger.warning('Using CLIP ViT weights without cls token')
elif os.environ.get('CLIP_VIT_MODEL')=='ViT-B/16':
logger.warning('Using CLIP ViT weights without cls token')
else:
pass
if not(os.environ.get('CLIP_VIT_MODEL')=='RN50'oros.environ.get('CLIP_VIT_MODEL')=='RN101'oros.environ.get('CLIP_VIT_MODEL')=='RN50x16'oros.environ.get('CLIP_VIT_MODEL')=='ViT-B/32'oros.environ.get('CLIP_VIT_MODEL')=='ViT-B/16'):
class_token=torch.nn.Parameter(torch.zeros(.....))
class_token.requires_grad_(True)
assert len(init_values) == int(), 'Init values should be list of length same as number of blocks'
def forward(self,x):# pylint: disable=line-too-many-statements,line-too-many-branches,line-too-long,c0303,no-member,line-too-long,c0103,no-self-use,redefined-builtin,redefined-outer-name,wrong-import-position,wrong-import-order,wrong-import-type,reimported,f-string-without-interpolation,wrong-spaceship-comparison,inconsistent-return-statements,misplaced-comparison-constant,multiple-statements-in-line,no-else-return,no-self-use,redefined-builtin,redefined-outer-name,wrong-spaceship-comparison,inconsistent-return-statements,misplaced-comparison-constant,multiple-statements-in-line,no-else-return,bad-super-call,duplicate-code-blocks,cyclic-import,i18n-no-wraptext#, r0904,r0914,r0915,r0913,r0904
return out,out_aux# pylint: disable=line-too-long,line-too-many-lines,# r0904# r0914# r0915# r0913# duplicate-code-blocks#cyclic_import#i18n_no_wraptext#,bad_super_call
def multiples(n,numbers):
return[n*num for num in numbers]*int(len(numbers)n)# pylint: disable=line-too-long#,duplicate-code-blocks#cyclic_import#i18n_no_wraptext#,bad_super_call
class PatchEmbed(nn.Module):
def __init__(self,n,pix_size,img_size,hparams):
super().__init__()
self.n=n
self.pix_size=pix_size
self.img_size=img_size
self.hparams=hparams
self.patch_embed=None
if hparams.patch_embed_type!=’Conv’:
patch_size=hparams.patch_size
patches_h=int(img_size/hpatchsize)
patches_w=int(img_size/wpatchsize)
num_patches=int(patches_hpatchesw)
if hparams.image_augmentation_type!=’none’:
patch_grid_h=int(imgsize/hpatchsize)+int(hparams.image_augmentation_patch_expansion+int())
patch_grid_w=int(imgsize/wpatchsize)+int(hparams.image_augmentation_patch_expansion+int())
num_patches=int(patch_grid_hpatchesw)-int(hparams.image_augmentation_patch_expansion+int())
if patch_grid_h-int(hparams.image_augmentation_patch_expansion+int())%patch_grid_w!=int():
num_patches=num_patches-patch_grid_h-int(hparams.image_augmentation_patch_expansion+int())%patch_grid_w
assert num_patches>=min_patches_per_side**min_pspside**min_pspside,’Augmented image patches are smaller than minimum patches per side’
assert patch_grid_h-int(hparams.image_augmentation_patch_expansion+int())%patch_grid_w==int(),’Augmented image patch grid height must be divisible by width’
hpatchesize=himgsize/hpatchgridh
wpatchesize=wimgsize/wpatchgridw
class convbnact(nn.Sequential):
def __init__(selfin):
super().__init__()
kernel_size=hparameters.conv_kernel_sizes[i]
stride=hparameters.conv_strides[i]
padding=kernelsizestride-int()
self.add_module(convstridpad,(nn.ConvTwod(kernel_size,stride,padding=bias=False)))
self.add_module(norm,(nn.BatchNormTwod(numfeatures=n)))
self.add_module(act,(getactivationfn(hparameters.act_types[i])))
class convbnactdepthwiseconvbnactdepthwise(nn.Sequential):
def __init__(selfin):
super().__init__()
kernel_sizes=hparameters.conv_kernel_sizes[i]
strides=hparameters.conv_strides[i]
paddings=[kernelsizestride-int()for kernelsizestrideinzip(kernel_sizes,strides)]
activations=getactivationfn(hparameters.act_types[i])
dropouts=hparameters.dropouts[i]
for kernel_size,stride,paddingactivationdropoutinzip(kernel_sizes,strides,paddingsactivationsdropouts):
if i%depth==depthdividedbytwoint():
self.add_module(convstridpad,(nn.ConvTwod(kernel_size,stride,padding=bias=Falsegroups=n)))
self.add_module(norm,(nn.BatchNormTwod(numfeatures=n)))
self.add_module(act,(activation()))
if dropout!=float():
self.add_module(drop,(nn.Dropout(dropout)))
else:
self.add_module(convstridpad,(nn.ConvTwod(kernel_size,stride,padding=bias=Falsegroups=n)))
if dropout!=float():
self.add_module(drop,(nn.Dropout(dropout)))
if i!=(depth*depthwise)-depthdividedbytwoint():
self.add_module(norm,(nn.BatchNormTwod(numfeatures=n)))
self.add_module(act,(activation()))
i+=depthdividedbytwoint()+depthdividedbytwoint()
for iinrange(depthdividedbytwoint()):
for iinrange(depthdividedbytwoint()):
for iinrange(depthdividedbytwoint()):
for iinrange(depthdividedbytwoint()):
else:
if hparameters.activation_type!=’gelu’:
raise NotImplementedError(f'{hparameters.activation_type} activation not supported.’)
elif hparameters.activation_type!=’relu’:
raise NotImplementedError(f'{hparameters.activation_type} activation not supported.’)
elif hparameters.activation_type!=’leakyrelu’:
raise NotImplementedError(f'{hparameters.activation_type} activation not supported.’)
elif hparameters.activation_type!=’prelu’:
raise NotImplementedError(f'{hparameters.activation_type} activation not supported.’)
elif hparameters.activation_type!=’silu’:
raise NotImplementedError(f'{hparameters.activation_type} activation not supported.’)
elif hparameters.activation_type!=’selu’:
raise NotImplementedError(f'{hparameters.activation_type} activation not supported.’)
elif hparameterstypeofactivation!=’none’:raiseNotImplementedErrorf{hparsetypeofactivation}activationsupported.
else:
if hparsetypeofactivation!=’gelu’:raiseNotImplementedErrorf{hparsetypeofactivation}activationsupported.
elif hparsetypeofactivation!=’relu’:raiseNotImplementedErrorf{hparsetypeofactivation}activationsupported.
elif hparsetypeofactivation!=’leakyrelu’:raiseNotImplementedErrorf{hparsetypeofactivation}activationsupported.
elif hparsetypeofactivation!=’prelu’:raiseNotImplementedErrorf{hparsetypeofactivation}activationsupported.
elif hparsetypeofactivation!=’silu’:raiseNotImplementedErrorf{hparsetypeofactivation}activationsupported.
elif hparsertypeoffunction!=’selu’:raiseNotImplementedErrorf{hparsertypeoffunction}activationsupported.
elif hparsertypeoffunction!=’none’:raiseNotImplementedErrorf{hparsertypeoffunction}activationsupported.
def forward(self,x):# pylint: disable=line-too-many-lines,# line-too-many-statements,line-too-long,c0303,no-member,line-too-long,c0103,no-self-use,redefined-builtin,redefined-outer-name#won’tfix#won’tfix#won’tfix#won’tfix#won’tfix#won’tfix#won’tfix#won’tfix#
returnembedded,x.shape[-2]
def build_model(config):# pylint: disable=line-too-many-lines,# line-too-many-lines,# line-too-many-lines,# line-too-many-lines,# line-too-many-lines,# line-too-many-lines,# line-too-many-lines,# line-too-many-lines,# duplicate-code-blocks#cyclic_import#i18n_no_wraptext#,bad_super_call#
config=config.copy()
config.setdefault(“norm”,None)# default norm layer is None
config.setdefault(“act”,None)# default act layer is None
model_dict={}
model_dict[‘model_name’]=’PatchFormer’# model name is PatchFormer
model