| // Code generated by command: go run salsa20_amd64_asm.go -out ../salsa20_amd64.s -pkg salsa. DO NOT EDIT. |
| |
| //go:build amd64 && !purego && gc |
| |
| // func salsa2020XORKeyStream(out *byte, in *byte, n uint64, nonce *byte, key *byte) |
| // Requires: SSE2 |
| TEXT ·salsa2020XORKeyStream(SB), $456-40 |
| // This needs up to 64 bytes at 360(R12); hence the non-obvious frame size. |
| MOVQ out+0(FP), DI |
| MOVQ in+8(FP), SI |
| MOVQ n+16(FP), DX |
| MOVQ nonce+24(FP), CX |
| MOVQ key+32(FP), R8 |
| MOVQ SP, R12 |
| ADDQ $0x1f, R12 |
| ANDQ $-32, R12 |
| MOVQ DX, R9 |
| MOVQ CX, DX |
| MOVQ R8, R10 |
| CMPQ R9, $0x00 |
| JBE DONE |
| MOVL 20(R10), CX |
| MOVL (R10), R8 |
| MOVL (DX), AX |
| MOVL 16(R10), R11 |
| MOVL CX, (R12) |
| MOVL R8, 4(R12) |
| MOVL AX, 8(R12) |
| MOVL R11, 12(R12) |
| MOVL 8(DX), CX |
| MOVL 24(R10), R8 |
| MOVL 4(R10), AX |
| MOVL 4(DX), R11 |
| MOVL CX, 16(R12) |
| MOVL R8, 20(R12) |
| MOVL AX, 24(R12) |
| MOVL R11, 28(R12) |
| MOVL 12(DX), CX |
| MOVL 12(R10), DX |
| MOVL 28(R10), R8 |
| MOVL 8(R10), AX |
| MOVL DX, 32(R12) |
| MOVL CX, 36(R12) |
| MOVL R8, 40(R12) |
| MOVL AX, 44(R12) |
| MOVQ $0x61707865, DX |
| MOVQ $0x3320646e, CX |
| MOVQ $0x79622d32, R8 |
| MOVQ $0x6b206574, AX |
| MOVL DX, 48(R12) |
| MOVL CX, 52(R12) |
| MOVL R8, 56(R12) |
| MOVL AX, 60(R12) |
| CMPQ R9, $0x00000100 |
| JB BYTESBETWEEN1AND255 |
| MOVOA 48(R12), X0 |
| PSHUFL $0x55, X0, X1 |
| PSHUFL $0xaa, X0, X2 |
| PSHUFL $0xff, X0, X3 |
| PSHUFL $0x00, X0, X0 |
| MOVOA X1, 64(R12) |
| MOVOA X2, 80(R12) |
| MOVOA X3, 96(R12) |
| MOVOA X0, 112(R12) |
| MOVOA (R12), X0 |
| PSHUFL $0xaa, X0, X1 |
| PSHUFL $0xff, X0, X2 |
| PSHUFL $0x00, X0, X3 |
| PSHUFL $0x55, X0, X0 |
| MOVOA X1, 128(R12) |
| MOVOA X2, 144(R12) |
| MOVOA X3, 160(R12) |
| MOVOA X0, 176(R12) |
| MOVOA 16(R12), X0 |
| PSHUFL $0xff, X0, X1 |
| PSHUFL $0x55, X0, X2 |
| PSHUFL $0xaa, X0, X0 |
| MOVOA X1, 192(R12) |
| MOVOA X2, 208(R12) |
| MOVOA X0, 224(R12) |
| MOVOA 32(R12), X0 |
| PSHUFL $0x00, X0, X1 |
| PSHUFL $0xaa, X0, X2 |
| PSHUFL $0xff, X0, X0 |
| MOVOA X1, 240(R12) |
| MOVOA X2, 256(R12) |
| MOVOA X0, 272(R12) |
| |
| BYTESATLEAST256: |
| MOVL 16(R12), DX |
| MOVL 36(R12), CX |
| MOVL DX, 288(R12) |
| MOVL CX, 304(R12) |
| SHLQ $0x20, CX |
| ADDQ CX, DX |
| ADDQ $0x01, DX |
| MOVQ DX, CX |
| SHRQ $0x20, CX |
| MOVL DX, 292(R12) |
| MOVL CX, 308(R12) |
| ADDQ $0x01, DX |
| MOVQ DX, CX |
| SHRQ $0x20, CX |
| MOVL DX, 296(R12) |
| MOVL CX, 312(R12) |
| ADDQ $0x01, DX |
| MOVQ DX, CX |
| SHRQ $0x20, CX |
| MOVL DX, 300(R12) |
| MOVL CX, 316(R12) |
| ADDQ $0x01, DX |
| MOVQ DX, CX |
| SHRQ $0x20, CX |
| MOVL DX, 16(R12) |
| MOVL CX, 36(R12) |
| MOVQ R9, 352(R12) |
| MOVQ $0x00000014, DX |
| MOVOA 64(R12), X0 |
| MOVOA 80(R12), X1 |
| MOVOA 96(R12), X2 |
| MOVOA 256(R12), X3 |
| MOVOA 272(R12), X4 |
| MOVOA 128(R12), X5 |
| MOVOA 144(R12), X6 |
| MOVOA 176(R12), X7 |
| MOVOA 192(R12), X8 |
| MOVOA 208(R12), X9 |
| MOVOA 224(R12), X10 |
| MOVOA 304(R12), X11 |
| MOVOA 112(R12), X12 |
| MOVOA 160(R12), X13 |
| MOVOA 240(R12), X14 |
| MOVOA 288(R12), X15 |
| |
| MAINLOOP1: |
| MOVOA X1, 320(R12) |
| MOVOA X2, 336(R12) |
| MOVOA X13, X1 |
| PADDL X12, X1 |
| MOVOA X1, X2 |
| PSLLL $0x07, X1 |
| PXOR X1, X14 |
| PSRLL $0x19, X2 |
| PXOR X2, X14 |
| MOVOA X7, X1 |
| PADDL X0, X1 |
| MOVOA X1, X2 |
| PSLLL $0x07, X1 |
| PXOR X1, X11 |
| PSRLL $0x19, X2 |
| PXOR X2, X11 |
| MOVOA X12, X1 |
| PADDL X14, X1 |
| MOVOA X1, X2 |
| PSLLL $0x09, X1 |
| PXOR X1, X15 |
| PSRLL $0x17, X2 |
| PXOR X2, X15 |
| MOVOA X0, X1 |
| PADDL X11, X1 |
| MOVOA X1, X2 |
| PSLLL $0x09, X1 |
| PXOR X1, X9 |
| PSRLL $0x17, X2 |
| PXOR X2, X9 |
| MOVOA X14, X1 |
| PADDL X15, X1 |
| MOVOA X1, X2 |
| PSLLL $0x0d, X1 |
| PXOR X1, X13 |
| PSRLL $0x13, X2 |
| PXOR X2, X13 |
| MOVOA X11, X1 |
| PADDL X9, X1 |
| MOVOA X1, X2 |
| PSLLL $0x0d, X1 |
| PXOR X1, X7 |
| PSRLL $0x13, X2 |
| PXOR X2, X7 |
| MOVOA X15, X1 |
| PADDL X13, X1 |
| MOVOA X1, X2 |
| PSLLL $0x12, X1 |
| PXOR X1, X12 |
| PSRLL $0x0e, X2 |
| PXOR X2, X12 |
| MOVOA 320(R12), X1 |
| MOVOA X12, 320(R12) |
| MOVOA X9, X2 |
| PADDL X7, X2 |
| MOVOA X2, X12 |
| PSLLL $0x12, X2 |
| PXOR X2, X0 |
| PSRLL $0x0e, X12 |
| PXOR X12, X0 |
| MOVOA X5, X2 |
| PADDL X1, X2 |
| MOVOA X2, X12 |
| PSLLL $0x07, X2 |
| PXOR X2, X3 |
| PSRLL $0x19, X12 |
| PXOR X12, X3 |
| MOVOA 336(R12), X2 |
| MOVOA X0, 336(R12) |
| MOVOA X6, X0 |
| PADDL X2, X0 |
| MOVOA X0, X12 |
| PSLLL $0x07, X0 |
| PXOR X0, X4 |
| PSRLL $0x19, X12 |
| PXOR X12, X4 |
| MOVOA X1, X0 |
| PADDL X3, X0 |
| MOVOA X0, X12 |
| PSLLL $0x09, X0 |
| PXOR X0, X10 |
| PSRLL $0x17, X12 |
| PXOR X12, X10 |
| MOVOA X2, X0 |
| PADDL X4, X0 |
| MOVOA X0, X12 |
| PSLLL $0x09, X0 |
| PXOR X0, X8 |
| PSRLL $0x17, X12 |
| PXOR X12, X8 |
| MOVOA X3, X0 |
| PADDL X10, X0 |
| MOVOA X0, X12 |
| PSLLL $0x0d, X0 |
| PXOR X0, X5 |
| PSRLL $0x13, X12 |
| PXOR X12, X5 |
| MOVOA X4, X0 |
| PADDL X8, X0 |
| MOVOA X0, X12 |
| PSLLL $0x0d, X0 |
| PXOR X0, X6 |
| PSRLL $0x13, X12 |
| PXOR X12, X6 |
| MOVOA X10, X0 |
| PADDL X5, X0 |
| MOVOA X0, X12 |
| PSLLL $0x12, X0 |
| PXOR X0, X1 |
| PSRLL $0x0e, X12 |
| PXOR X12, X1 |
| MOVOA 320(R12), X0 |
| MOVOA X1, 320(R12) |
| MOVOA X4, X1 |
| PADDL X0, X1 |
| MOVOA X1, X12 |
| PSLLL $0x07, X1 |
| PXOR X1, X7 |
| PSRLL $0x19, X12 |
| PXOR X12, X7 |
| MOVOA X8, X1 |
| PADDL X6, X1 |
| MOVOA X1, X12 |
| PSLLL $0x12, X1 |
| PXOR X1, X2 |
| PSRLL $0x0e, X12 |
| PXOR X12, X2 |
| MOVOA 336(R12), X12 |
| MOVOA X2, 336(R12) |
| MOVOA X14, X1 |
| PADDL X12, X1 |
| MOVOA X1, X2 |
| PSLLL $0x07, X1 |
| PXOR X1, X5 |
| PSRLL $0x19, X2 |
| PXOR X2, X5 |
| MOVOA X0, X1 |
| PADDL X7, X1 |
| MOVOA X1, X2 |
| PSLLL $0x09, X1 |
| PXOR X1, X10 |
| PSRLL $0x17, X2 |
| PXOR X2, X10 |
| MOVOA X12, X1 |
| PADDL X5, X1 |
| MOVOA X1, X2 |
| PSLLL $0x09, X1 |
| PXOR X1, X8 |
| PSRLL $0x17, X2 |
| PXOR X2, X8 |
| MOVOA X7, X1 |
| PADDL X10, X1 |
| MOVOA X1, X2 |
| PSLLL $0x0d, X1 |
| PXOR X1, X4 |
| PSRLL $0x13, X2 |
| PXOR X2, X4 |
| MOVOA X5, X1 |
| PADDL X8, X1 |
| MOVOA X1, X2 |
| PSLLL $0x0d, X1 |
| PXOR X1, X14 |
| PSRLL $0x13, X2 |
| PXOR X2, X14 |
| MOVOA X10, X1 |
| PADDL X4, X1 |
| MOVOA X1, X2 |
| PSLLL $0x12, X1 |
| PXOR X1, X0 |
| PSRLL $0x0e, X2 |
| PXOR X2, X0 |
| MOVOA 320(R12), X1 |
| MOVOA X0, 320(R12) |
| MOVOA X8, X0 |
| PADDL X14, X0 |
| MOVOA X0, X2 |
| PSLLL $0x12, X0 |
| PXOR X0, X12 |
| PSRLL $0x0e, X2 |
| PXOR X2, X12 |
| MOVOA X11, X0 |
| PADDL X1, X0 |
| MOVOA X0, X2 |
| PSLLL $0x07, X0 |
| PXOR X0, X6 |
| PSRLL $0x19, X2 |
| PXOR X2, X6 |
| MOVOA 336(R12), X2 |
| MOVOA X12, 336(R12) |
| MOVOA X3, X0 |
| PADDL X2, X0 |
| MOVOA X0, X12 |
| PSLLL $0x07, X0 |
| PXOR X0, X13 |
| PSRLL $0x19, X12 |
| PXOR X12, X13 |
| MOVOA X1, X0 |
| PADDL X6, X0 |
| MOVOA X0, X12 |
| PSLLL $0x09, X0 |
| PXOR X0, X15 |
| PSRLL $0x17, X12 |
| PXOR X12, X15 |
| MOVOA X2, X0 |
| PADDL X13, X0 |
| MOVOA X0, X12 |
| PSLLL $0x09, X0 |
| PXOR X0, X9 |
| PSRLL $0x17, X12 |
| PXOR X12, X9 |
| MOVOA X6, X0 |
| PADDL X15, X0 |
| MOVOA X0, X12 |
| PSLLL $0x0d, X0 |
| PXOR X0, X11 |
| PSRLL $0x13, X12 |
| PXOR X12, X11 |
| MOVOA X13, X0 |
| PADDL X9, X0 |
| MOVOA X0, X12 |
| PSLLL $0x0d, X0 |
| PXOR X0, X3 |
| PSRLL $0x13, X12 |
| PXOR X12, X3 |
| MOVOA X15, X0 |
| PADDL X11, X0 |
| MOVOA X0, X12 |
| PSLLL $0x12, X0 |
| PXOR X0, X1 |
| PSRLL $0x0e, X12 |
| PXOR X12, X1 |
| MOVOA X9, X0 |
| PADDL X3, X0 |
| MOVOA X0, X12 |
| PSLLL $0x12, X0 |
| PXOR X0, X2 |
| PSRLL $0x0e, X12 |
| PXOR X12, X2 |
| MOVOA 320(R12), X12 |
| MOVOA 336(R12), X0 |
| SUBQ $0x02, DX |
| JA MAINLOOP1 |
| PADDL 112(R12), X12 |
| PADDL 176(R12), X7 |
| PADDL 224(R12), X10 |
| PADDL 272(R12), X4 |
| MOVD X12, DX |
| MOVD X7, CX |
| MOVD X10, R8 |
| MOVD X4, R9 |
| PSHUFL $0x39, X12, X12 |
| PSHUFL $0x39, X7, X7 |
| PSHUFL $0x39, X10, X10 |
| PSHUFL $0x39, X4, X4 |
| XORL (SI), DX |
| XORL 4(SI), CX |
| XORL 8(SI), R8 |
| XORL 12(SI), R9 |
| MOVL DX, (DI) |
| MOVL CX, 4(DI) |
| MOVL R8, 8(DI) |
| MOVL R9, 12(DI) |
| MOVD X12, DX |
| MOVD X7, CX |
| MOVD X10, R8 |
| MOVD X4, R9 |
| PSHUFL $0x39, X12, X12 |
| PSHUFL $0x39, X7, X7 |
| PSHUFL $0x39, X10, X10 |
| PSHUFL $0x39, X4, X4 |
| XORL 64(SI), DX |
| XORL 68(SI), CX |
| XORL 72(SI), R8 |
| XORL 76(SI), R9 |
| MOVL DX, 64(DI) |
| MOVL CX, 68(DI) |
| MOVL R8, 72(DI) |
| MOVL R9, 76(DI) |
| MOVD X12, DX |
| MOVD X7, CX |
| MOVD X10, R8 |
| MOVD X4, R9 |
| PSHUFL $0x39, X12, X12 |
| PSHUFL $0x39, X7, X7 |
| PSHUFL $0x39, X10, X10 |
| PSHUFL $0x39, X4, X4 |
| XORL 128(SI), DX |
| XORL 132(SI), CX |
| XORL 136(SI), R8 |
| XORL 140(SI), R9 |
| MOVL DX, 128(DI) |
| MOVL CX, 132(DI) |
| MOVL R8, 136(DI) |
| MOVL R9, 140(DI) |
| MOVD X12, DX |
| MOVD X7, CX |
| MOVD X10, R8 |
| MOVD X4, R9 |
| XORL 192(SI), DX |
| XORL 196(SI), CX |
| XORL 200(SI), R8 |
| XORL 204(SI), R9 |
| MOVL DX, 192(DI) |
| MOVL CX, 196(DI) |
| MOVL R8, 200(DI) |
| MOVL R9, 204(DI) |
| PADDL 240(R12), X14 |
| PADDL 64(R12), X0 |
| PADDL 128(R12), X5 |
| PADDL 192(R12), X8 |
| MOVD X14, DX |
| MOVD X0, CX |
| MOVD X5, R8 |
| MOVD X8, R9 |
| PSHUFL $0x39, X14, X14 |
| PSHUFL $0x39, X0, X0 |
| PSHUFL $0x39, X5, X5 |
| PSHUFL $0x39, X8, X8 |
| XORL 16(SI), DX |
| XORL 20(SI), CX |
| XORL 24(SI), R8 |
| XORL 28(SI), R9 |
| MOVL DX, 16(DI) |
| MOVL CX, 20(DI) |
| MOVL R8, 24(DI) |
| MOVL R9, 28(DI) |
| MOVD X14, DX |
| MOVD X0, CX |
| MOVD X5, R8 |
| MOVD X8, R9 |
| PSHUFL $0x39, X14, X14 |
| PSHUFL $0x39, X0, X0 |
| PSHUFL $0x39, X5, X5 |
| PSHUFL $0x39, X8, X8 |
| XORL 80(SI), DX |
| XORL 84(SI), CX |
| XORL 88(SI), R8 |
| XORL 92(SI), R9 |
| MOVL DX, 80(DI) |
| MOVL CX, 84(DI) |
| MOVL R8, 88(DI) |
| MOVL R9, 92(DI) |
| MOVD X14, DX |
| MOVD X0, CX |
| MOVD X5, R8 |
| MOVD X8, R9 |
| PSHUFL $0x39, X14, X14 |
| PSHUFL $0x39, X0, X0 |
| PSHUFL $0x39, X5, X5 |
| PSHUFL $0x39, X8, X8 |
| XORL 144(SI), DX |
| XORL 148(SI), CX |
| XORL 152(SI), R8 |
| XORL 156(SI), R9 |
| MOVL DX, 144(DI) |
| MOVL CX, 148(DI) |
| MOVL R8, 152(DI) |
| MOVL R9, 156(DI) |
| MOVD X14, DX |
| MOVD X0, CX |
| MOVD X5, R8 |
| MOVD X8, R9 |
| XORL 208(SI), DX |
| XORL 212(SI), CX |
| XORL 216(SI), R8 |
| XORL 220(SI), R9 |
| MOVL DX, 208(DI) |
| MOVL CX, 212(DI) |
| MOVL R8, 216(DI) |
| MOVL R9, 220(DI) |
| PADDL 288(R12), X15 |
| PADDL 304(R12), X11 |
| PADDL 80(R12), X1 |
| PADDL 144(R12), X6 |
| MOVD X15, DX |
| MOVD X11, CX |
| MOVD X1, R8 |
| MOVD X6, R9 |
| PSHUFL $0x39, X15, X15 |
| PSHUFL $0x39, X11, X11 |
| PSHUFL $0x39, X1, X1 |
| PSHUFL $0x39, X6, X6 |
| XORL 32(SI), DX |
| XORL 36(SI), CX |
| XORL 40(SI), R8 |
| XORL 44(SI), R9 |
| MOVL DX, 32(DI) |
| MOVL CX, 36(DI) |
| MOVL R8, 40(DI) |
| MOVL R9, 44(DI) |
| MOVD X15, DX |
| MOVD X11, CX |
| MOVD X1, R8 |
| MOVD X6, R9 |
| PSHUFL $0x39, X15, X15 |
| PSHUFL $0x39, X11, X11 |
| PSHUFL $0x39, X1, X1 |
| PSHUFL $0x39, X6, X6 |
| XORL 96(SI), DX |
| XORL 100(SI), CX |
| XORL 104(SI), R8 |
| XORL 108(SI), R9 |
| MOVL DX, 96(DI) |
| MOVL CX, 100(DI) |
| MOVL R8, 104(DI) |
| MOVL R9, 108(DI) |
| MOVD X15, DX |
| MOVD X11, CX |
| MOVD X1, R8 |
| MOVD X6, R9 |
| PSHUFL $0x39, X15, X15 |
| PSHUFL $0x39, X11, X11 |
| PSHUFL $0x39, X1, X1 |
| PSHUFL $0x39, X6, X6 |
| XORL 160(SI), DX |
| XORL 164(SI), CX |
| XORL 168(SI), R8 |
| XORL 172(SI), R9 |
| MOVL DX, 160(DI) |
| MOVL CX, 164(DI) |
| MOVL R8, 168(DI) |
| MOVL R9, 172(DI) |
| MOVD X15, DX |
| MOVD X11, CX |
| MOVD X1, R8 |
| MOVD X6, R9 |
| XORL 224(SI), DX |
| XORL 228(SI), CX |
| XORL 232(SI), R8 |
| XORL 236(SI), R9 |
| MOVL DX, 224(DI) |
| MOVL CX, 228(DI) |
| MOVL R8, 232(DI) |
| MOVL R9, 236(DI) |
| PADDL 160(R12), X13 |
| PADDL 208(R12), X9 |
| PADDL 256(R12), X3 |
| PADDL 96(R12), X2 |
| MOVD X13, DX |
| MOVD X9, CX |
| MOVD X3, R8 |
| MOVD X2, R9 |
| PSHUFL $0x39, X13, X13 |
| PSHUFL $0x39, X9, X9 |
| PSHUFL $0x39, X3, X3 |
| PSHUFL $0x39, X2, X2 |
| XORL 48(SI), DX |
| XORL 52(SI), CX |
| XORL 56(SI), R8 |
| XORL 60(SI), R9 |
| MOVL DX, 48(DI) |
| MOVL CX, 52(DI) |
| MOVL R8, 56(DI) |
| MOVL R9, 60(DI) |
| MOVD X13, DX |
| MOVD X9, CX |
| MOVD X3, R8 |
| MOVD X2, R9 |
| PSHUFL $0x39, X13, X13 |
| PSHUFL $0x39, X9, X9 |
| PSHUFL $0x39, X3, X3 |
| PSHUFL $0x39, X2, X2 |
| XORL 112(SI), DX |
| XORL 116(SI), CX |
| XORL 120(SI), R8 |
| XORL 124(SI), R9 |
| MOVL DX, 112(DI) |
| MOVL CX, 116(DI) |
| MOVL R8, 120(DI) |
| MOVL R9, 124(DI) |
| MOVD X13, DX |
| MOVD X9, CX |
| MOVD X3, R8 |
| MOVD X2, R9 |
| PSHUFL $0x39, X13, X13 |
| PSHUFL $0x39, X9, X9 |
| PSHUFL $0x39, X3, X3 |
| PSHUFL $0x39, X2, X2 |
| XORL 176(SI), DX |
| XORL 180(SI), CX |
| XORL 184(SI), R8 |
| XORL 188(SI), R9 |
| MOVL DX, 176(DI) |
| MOVL CX, 180(DI) |
| MOVL R8, 184(DI) |
| MOVL R9, 188(DI) |
| MOVD X13, DX |
| MOVD X9, CX |
| MOVD X3, R8 |
| MOVD X2, R9 |
| XORL 240(SI), DX |
| XORL 244(SI), CX |
| XORL 248(SI), R8 |
| XORL 252(SI), R9 |
| MOVL DX, 240(DI) |
| MOVL CX, 244(DI) |
| MOVL R8, 248(DI) |
| MOVL R9, 252(DI) |
| MOVQ 352(R12), R9 |
| SUBQ $0x00000100, R9 |
| ADDQ $0x00000100, SI |
| ADDQ $0x00000100, DI |
| CMPQ R9, $0x00000100 |
| JAE BYTESATLEAST256 |
| CMPQ R9, $0x00 |
| JBE DONE |
| |
| BYTESBETWEEN1AND255: |
| CMPQ R9, $0x40 |
| JAE NOCOPY |
| MOVQ DI, DX |
| LEAQ 360(R12), DI |
| MOVQ R9, CX |
| REP; MOVSB |
| LEAQ 360(R12), DI |
| LEAQ 360(R12), SI |
| |
| NOCOPY: |
| MOVQ R9, 352(R12) |
| MOVOA 48(R12), X0 |
| MOVOA (R12), X1 |
| MOVOA 16(R12), X2 |
| MOVOA 32(R12), X3 |
| MOVOA X1, X4 |
| MOVQ $0x00000014, CX |
| |
| MAINLOOP2: |
| PADDL X0, X4 |
| MOVOA X0, X5 |
| MOVOA X4, X6 |
| PSLLL $0x07, X4 |
| PSRLL $0x19, X6 |
| PXOR X4, X3 |
| PXOR X6, X3 |
| PADDL X3, X5 |
| MOVOA X3, X4 |
| MOVOA X5, X6 |
| PSLLL $0x09, X5 |
| PSRLL $0x17, X6 |
| PXOR X5, X2 |
| PSHUFL $0x93, X3, X3 |
| PXOR X6, X2 |
| PADDL X2, X4 |
| MOVOA X2, X5 |
| MOVOA X4, X6 |
| PSLLL $0x0d, X4 |
| PSRLL $0x13, X6 |
| PXOR X4, X1 |
| PSHUFL $0x4e, X2, X2 |
| PXOR X6, X1 |
| PADDL X1, X5 |
| MOVOA X3, X4 |
| MOVOA X5, X6 |
| PSLLL $0x12, X5 |
| PSRLL $0x0e, X6 |
| PXOR X5, X0 |
| PSHUFL $0x39, X1, X1 |
| PXOR X6, X0 |
| PADDL X0, X4 |
| MOVOA X0, X5 |
| MOVOA X4, X6 |
| PSLLL $0x07, X4 |
| PSRLL $0x19, X6 |
| PXOR X4, X1 |
| PXOR X6, X1 |
| PADDL X1, X5 |
| MOVOA X1, X4 |
| MOVOA X5, X6 |
| PSLLL $0x09, X5 |
| PSRLL $0x17, X6 |
| PXOR X5, X2 |
| PSHUFL $0x93, X1, X1 |
| PXOR X6, X2 |
| PADDL X2, X4 |
| MOVOA X2, X5 |
| MOVOA X4, X6 |
| PSLLL $0x0d, X4 |
| PSRLL $0x13, X6 |
| PXOR X4, X3 |
| PSHUFL $0x4e, X2, X2 |
| PXOR X6, X3 |
| PADDL X3, X5 |
| MOVOA X1, X4 |
| MOVOA X5, X6 |
| PSLLL $0x12, X5 |
| PSRLL $0x0e, X6 |
| PXOR X5, X0 |
| PSHUFL $0x39, X3, X3 |
| PXOR X6, X0 |
| PADDL X0, X4 |
| MOVOA X0, X5 |
| MOVOA X4, X6 |
| PSLLL $0x07, X4 |
| PSRLL $0x19, X6 |
| PXOR X4, X3 |
| PXOR X6, X3 |
| PADDL X3, X5 |
| MOVOA X3, X4 |
| MOVOA X5, X6 |
| PSLLL $0x09, X5 |
| PSRLL $0x17, X6 |
| PXOR X5, X2 |
| PSHUFL $0x93, X3, X3 |
| PXOR X6, X2 |
| PADDL X2, X4 |
| MOVOA X2, X5 |
| MOVOA X4, X6 |
| PSLLL $0x0d, X4 |
| PSRLL $0x13, X6 |
| PXOR X4, X1 |
| PSHUFL $0x4e, X2, X2 |
| PXOR X6, X1 |
| PADDL X1, X5 |
| MOVOA X3, X4 |
| MOVOA X5, X6 |
| PSLLL $0x12, X5 |
| PSRLL $0x0e, X6 |
| PXOR X5, X0 |
| PSHUFL $0x39, X1, X1 |
| PXOR X6, X0 |
| PADDL X0, X4 |
| MOVOA X0, X5 |
| MOVOA X4, X6 |
| PSLLL $0x07, X4 |
| PSRLL $0x19, X6 |
| PXOR X4, X1 |
| PXOR X6, X1 |
| PADDL X1, X5 |
| MOVOA X1, X4 |
| MOVOA X5, X6 |
| PSLLL $0x09, X5 |
| PSRLL $0x17, X6 |
| PXOR X5, X2 |
| PSHUFL $0x93, X1, X1 |
| PXOR X6, X2 |
| PADDL X2, X4 |
| MOVOA X2, X5 |
| MOVOA X4, X6 |
| PSLLL $0x0d, X4 |
| PSRLL $0x13, X6 |
| PXOR X4, X3 |
| PSHUFL $0x4e, X2, X2 |
| PXOR X6, X3 |
| SUBQ $0x04, CX |
| PADDL X3, X5 |
| MOVOA X1, X4 |
| MOVOA X5, X6 |
| PSLLL $0x12, X5 |
| PXOR X7, X7 |
| PSRLL $0x0e, X6 |
| PXOR X5, X0 |
| PSHUFL $0x39, X3, X3 |
| PXOR X6, X0 |
| JA MAINLOOP2 |
| PADDL 48(R12), X0 |
| PADDL (R12), X1 |
| PADDL 16(R12), X2 |
| PADDL 32(R12), X3 |
| MOVD X0, CX |
| MOVD X1, R8 |
| MOVD X2, R9 |
| MOVD X3, AX |
| PSHUFL $0x39, X0, X0 |
| PSHUFL $0x39, X1, X1 |
| PSHUFL $0x39, X2, X2 |
| PSHUFL $0x39, X3, X3 |
| XORL (SI), CX |
| XORL 48(SI), R8 |
| XORL 32(SI), R9 |
| XORL 16(SI), AX |
| MOVL CX, (DI) |
| MOVL R8, 48(DI) |
| MOVL R9, 32(DI) |
| MOVL AX, 16(DI) |
| MOVD X0, CX |
| MOVD X1, R8 |
| MOVD X2, R9 |
| MOVD X3, AX |
| PSHUFL $0x39, X0, X0 |
| PSHUFL $0x39, X1, X1 |
| PSHUFL $0x39, X2, X2 |
| PSHUFL $0x39, X3, X3 |
| XORL 20(SI), CX |
| XORL 4(SI), R8 |
| XORL 52(SI), R9 |
| XORL 36(SI), AX |
| MOVL CX, 20(DI) |
| MOVL R8, 4(DI) |
| MOVL R9, 52(DI) |
| MOVL AX, 36(DI) |
| MOVD X0, CX |
| MOVD X1, R8 |
| MOVD X2, R9 |
| MOVD X3, AX |
| PSHUFL $0x39, X0, X0 |
| PSHUFL $0x39, X1, X1 |
| PSHUFL $0x39, X2, X2 |
| PSHUFL $0x39, X3, X3 |
| XORL 40(SI), CX |
| XORL 24(SI), R8 |
| XORL 8(SI), R9 |
| XORL 56(SI), AX |
| MOVL CX, 40(DI) |
| MOVL R8, 24(DI) |
| MOVL R9, 8(DI) |
| MOVL AX, 56(DI) |
| MOVD X0, CX |
| MOVD X1, R8 |
| MOVD X2, R9 |
| MOVD X3, AX |
| XORL 60(SI), CX |
| XORL 44(SI), R8 |
| XORL 28(SI), R9 |
| XORL 12(SI), AX |
| MOVL CX, 60(DI) |
| MOVL R8, 44(DI) |
| MOVL R9, 28(DI) |
| MOVL AX, 12(DI) |
| MOVQ 352(R12), R9 |
| MOVL 16(R12), CX |
| MOVL 36(R12), R8 |
| ADDQ $0x01, CX |
| SHLQ $0x20, R8 |
| ADDQ R8, CX |
| MOVQ CX, R8 |
| SHRQ $0x20, R8 |
| MOVL CX, 16(R12) |
| MOVL R8, 36(R12) |
| CMPQ R9, $0x40 |
| JA BYTESATLEAST65 |
| JAE BYTESATLEAST64 |
| MOVQ DI, SI |
| MOVQ DX, DI |
| MOVQ R9, CX |
| REP; MOVSB |
| |
| BYTESATLEAST64: |
| DONE: |
| RET |
| |
| BYTESATLEAST65: |
| SUBQ $0x40, R9 |
| ADDQ $0x40, DI |
| ADDQ $0x40, SI |
| JMP BYTESBETWEEN1AND255 |