ViTLite(
  (tokenizer): Tokenizer(
    (conv_layers): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 192, kernel_size=(4, 4), stride=(4, 4))
        (1): Identity()
        (2): Identity()
      )
    )
    (flattener): Flatten(start_dim=2, end_dim=3)
  )
  (classifier): TransformerClassifier(
    (dropout): Dropout(p=0.0, inplace=False)
    (blocks): ModuleList(
      (0): TransformerEncoderLayer(
        (pre_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (self_attn): Attention(
          (qkv): CirLinear(in_features=192, out_features=576, bias=False, fix_block_size=16, search_space=[1, 2, 4, 8, 16])
          (attn_drop): Dropout(p=0.1, inplace=False)
          (proj): CirLinear(in_features=192, out_features=192, bias=True, fix_block_size=4, search_space=[1, 2, 4, 8, 16])
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (linear1): CirLinear(in_features=192, out_features=384, bias=True, fix_block_size=2, search_space=[1, 2, 4, 8, 16])
        (dropout1): Dropout(p=0.0, inplace=False)
        (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (linear2): CirLinear(in_features=384, out_features=192, bias=True, fix_block_size=2, search_space=[1, 2, 4, 8, 16])
        (dropout2): Dropout(p=0.0, inplace=False)
        (drop_path): Identity()
      )
      (1): TransformerEncoderLayer(
        (pre_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (self_attn): Attention(
          (qkv): CirLinear(in_features=192, out_features=576, bias=False, fix_block_size=4, search_space=[1, 2, 4, 8, 16])
          (attn_drop): Dropout(p=0.1, inplace=False)
          (proj): CirLinear(in_features=192, out_features=192, bias=True, fix_block_size=1, search_space=[1, 2, 4, 8, 16])
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (linear1): CirLinear(in_features=192, out_features=384, bias=True, fix_block_size=4, search_space=[1, 2, 4, 8, 16])
        (dropout1): Dropout(p=0.0, inplace=False)
        (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (linear2): CirLinear(in_features=384, out_features=192, bias=True, fix_block_size=8, search_space=[1, 2, 4, 8, 16])
        (dropout2): Dropout(p=0.0, inplace=False)
        (drop_path): DropPath()
      )
      (2): TransformerEncoderLayer(
        (pre_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (self_attn): Attention(
          (qkv): CirLinear(in_features=192, out_features=576, bias=False, fix_block_size=4, search_space=[1, 2, 4, 8, 16])
          (attn_drop): Dropout(p=0.1, inplace=False)
          (proj): CirLinear(in_features=192, out_features=192, bias=True, fix_block_size=2, search_space=[1, 2, 4, 8, 16])
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (linear1): CirLinear(in_features=192, out_features=384, bias=True, fix_block_size=4, search_space=[1, 2, 4, 8, 16])
        (dropout1): Dropout(p=0.0, inplace=False)
        (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (linear2): CirLinear(in_features=384, out_features=192, bias=True, fix_block_size=8, search_space=[1, 2, 4, 8, 16])
        (dropout2): Dropout(p=0.0, inplace=False)
        (drop_path): DropPath()
      )
      (3): TransformerEncoderLayer(
        (pre_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (self_attn): Attention(
          (qkv): CirLinear(in_features=192, out_features=576, bias=False, fix_block_size=4, search_space=[1, 2, 4, 8, 16])
          (attn_drop): Dropout(p=0.1, inplace=False)
          (proj): CirLinear(in_features=192, out_features=192, bias=True, fix_block_size=2, search_space=[1, 2, 4, 8, 16])
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (linear1): CirLinear(in_features=192, out_features=384, bias=True, fix_block_size=8, search_space=[1, 2, 4, 8, 16])
        (dropout1): Dropout(p=0.0, inplace=False)
        (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (linear2): CirLinear(in_features=384, out_features=192, bias=True, fix_block_size=16, search_space=[1, 2, 4, 8, 16])
        (dropout2): Dropout(p=0.0, inplace=False)
        (drop_path): DropPath()
      )
      (4): TransformerEncoderLayer(
        (pre_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (self_attn): Attention(
          (qkv): CirLinear(in_features=192, out_features=576, bias=False, fix_block_size=4, search_space=[1, 2, 4, 8, 16])
          (attn_drop): Dropout(p=0.1, inplace=False)
          (proj): CirLinear(in_features=192, out_features=192, bias=True, fix_block_size=2, search_space=[1, 2, 4, 8, 16])
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (linear1): CirLinear(in_features=192, out_features=384, bias=True, fix_block_size=16, search_space=[1, 2, 4, 8, 16])
        (dropout1): Dropout(p=0.0, inplace=False)
        (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (linear2): CirLinear(in_features=384, out_features=192, bias=True, fix_block_size=16, search_space=[1, 2, 4, 8, 16])
        (dropout2): Dropout(p=0.0, inplace=False)
        (drop_path): DropPath()
      )
      (5): TransformerEncoderLayer(
        (pre_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (self_attn): Attention(
          (qkv): CirLinear(in_features=192, out_features=576, bias=False, fix_block_size=8, search_space=[1, 2, 4, 8, 16])
          (attn_drop): Dropout(p=0.1, inplace=False)
          (proj): CirLinear(in_features=192, out_features=192, bias=True, fix_block_size=2, search_space=[1, 2, 4, 8, 16])
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (linear1): CirLinear(in_features=192, out_features=384, bias=True, fix_block_size=16, search_space=[1, 2, 4, 8, 16])
        (dropout1): Dropout(p=0.0, inplace=False)
        (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (linear2): CirLinear(in_features=384, out_features=192, bias=True, fix_block_size=16, search_space=[1, 2, 4, 8, 16])
        (dropout2): Dropout(p=0.0, inplace=False)
        (drop_path): DropPath()
      )
      (6): TransformerEncoderLayer(
        (pre_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (self_attn): Attention(
          (qkv): CirLinear(in_features=192, out_features=576, bias=False, fix_block_size=16, search_space=[1, 2, 4, 8, 16])
          (attn_drop): Dropout(p=0.1, inplace=False)
          (proj): CirLinear(in_features=192, out_features=192, bias=True, fix_block_size=4, search_space=[1, 2, 4, 8, 16])
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (linear1): CirLinear(in_features=192, out_features=384, bias=True, fix_block_size=8, search_space=[1, 2, 4, 8, 16])
        (dropout1): Dropout(p=0.0, inplace=False)
        (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (linear2): CirLinear(in_features=384, out_features=192, bias=True, fix_block_size=16, search_space=[1, 2, 4, 8, 16])
        (dropout2): Dropout(p=0.0, inplace=False)
        (drop_path): DropPath()
      )
      (7): TransformerEncoderLayer(
        (pre_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (self_attn): Attention(
          (qkv): CirLinear(in_features=192, out_features=576, bias=False, fix_block_size=8, search_space=[1, 2, 4, 8, 16])
          (attn_drop): Dropout(p=0.1, inplace=False)
          (proj): CirLinear(in_features=192, out_features=192, bias=True, fix_block_size=4, search_space=[1, 2, 4, 8, 16])
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (linear1): CirLinear(in_features=192, out_features=384, bias=True, fix_block_size=4, search_space=[1, 2, 4, 8, 16])
        (dropout1): Dropout(p=0.0, inplace=False)
        (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (linear2): CirLinear(in_features=384, out_features=192, bias=True, fix_block_size=16, search_space=[1, 2, 4, 8, 16])
        (dropout2): Dropout(p=0.0, inplace=False)
        (drop_path): DropPath()
      )
      (8): TransformerEncoderLayer(
        (pre_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (self_attn): Attention(
          (qkv): CirLinear(in_features=192, out_features=576, bias=False, fix_block_size=16, search_space=[1, 2, 4, 8, 16])
          (attn_drop): Dropout(p=0.1, inplace=False)
          (proj): CirLinear(in_features=192, out_features=192, bias=True, fix_block_size=16, search_space=[1, 2, 4, 8, 16])
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (linear1): CirLinear(in_features=192, out_features=384, bias=True, fix_block_size=8, search_space=[1, 2, 4, 8, 16])
        (dropout1): Dropout(p=0.0, inplace=False)
        (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (linear2): CirLinear(in_features=384, out_features=192, bias=True, fix_block_size=16, search_space=[1, 2, 4, 8, 16])
        (dropout2): Dropout(p=0.0, inplace=False)
        (drop_path): DropPath()
      )
    )
    (norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
    (fc): CirLinear(in_features=192, out_features=200, bias=True, fix_block_size=1, search_space=[1, 2, 4, 8])
  )
)