@@ -17,7 +17,6 @@
#include <linux/linkage.h>
-
.section .rodata
.align 16
/*
@@ -67,19 +66,22 @@
#define CRC %ecx
#endif
-
-
.text
/**
- * Calculate crc32
- * BUF - buffer (16 bytes aligned)
- * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63
- * CRC - initial crc32
- * return %eax crc32
- * uint crc32_pclmul_le_16(unsigned char const *buffer,
- * size_t len, uint crc32)
+ * crc32_pclmul_le_16 - Calculate CRC32 using x86 PCLMULQDQ instructions
+ * @buffer: address of data (32-bit %eax/64-bit %rdi, BUF macro);
+ * must be aligned to a multiple of 16
+ * @len: data size (32-bit %edx/64 bit %rsi, LEN macro);
+ * must be a multiple of 16 and greater than 63
+ * @crc32: initial CRC32 value (32-bit %ecx/64-bit $edx, CRC macro)
+ * only uses lower 32 bits
+ *
+ * This function supports both 32-bit and 64-bit CPUs.
+ * It requires data to be aligned and a minimum size.
+ *
+ * Return: (32-bit %eax/64-bit %rax) CRC32 value (in lower 32 bits)
+ * Prototype: asmlinkage u32 crc32_pclmul_le_16(const u8 *buffer, size_t len, u32 crc32);
*/
-
SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
movdqa (BUF), %xmm1
movdqa 0x10(BUF), %xmm2
@@ -70,22 +70,30 @@
.error "SMALL_ SIZE must be < 256"
.endif
-# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
-
.text
+/**
+ * crc_pcl - Calculate CRC32C using x86 CRC32 and PCLMULQDQ instructions
+ * @buffer: address of data (%rdi, bufp macro)
+ * @len: data size (%rsi, len macro)
+ * @crc_init: initial CRC32C value (%rdx, crc_init_arg macro);
+ * only using lower 32 bits
+ *
+ * This function supports 64-bit CPUs.
+ * It loops on 8-byte aligned QWORDs, but also supports unaligned
+ * addresses and all length values.
+ *
+ * Return: CRC32C value (upper 32 bits zero)(%rax)
+ * Prototype: asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len,
+ unsigned int crc_init);
+ */
SYM_FUNC_START(crc_pcl)
#define bufp rdi
-#define bufp_dw %edi
-#define bufp_w %di
-#define bufp_b %dil
#define bufptmp %rcx
#define block_0 %rcx
#define block_1 %rdx
#define block_2 %r11
#define len %rsi
#define len_dw %esi
-#define len_w %si
-#define len_b %sil
#define crc_init_arg %rdx
#define tmp %rbx
#define crc_init %r8
@@ -97,7 +105,7 @@ SYM_FUNC_START(crc_pcl)
pushq %rdi
pushq %rsi
- ## Move crc_init for Linux to a different
+ ## Move crc_init for Linux to a different register
mov crc_init_arg, crc_init
################################################################
@@ -216,7 +224,7 @@ LABEL crc_ %i
## 4) Combine three results:
################################################################
- lea (K_table-8)(%rip), %bufp # first entry is for idx 1
+ lea (K_table-8)(%rip), %bufp # first entry is for idx 1
shlq $3, %rax # rax *= 8
pmovzxdq (%bufp,%rax), %xmm0 # 2 consts: K1:K2
leal (%eax,%eax,2), %eax # rax *= 3 (total *24)
@@ -326,10 +334,9 @@ JMPTBL_ENTRY %i
i=i+1
.endr
-
################################################################
## PCLMULQDQ tables
- ## Table is 128 entries x 2 words (8 bytes) each
+ ## Table is 128 entries x 8 bytes each
################################################################
.align 8
K_table:
@@ -52,8 +52,6 @@
#include <linux/linkage.h>
-.text
-
#define init_crc %edi
#define buf %rsi
#define len %rdx
@@ -89,11 +87,23 @@
xorps \src_reg, \dst_reg
.endm
-#
-# u16 crc_t10dif_pcl(u16 init_crc, const *u8 buf, size_t len);
-#
-# Assumes len >= 16.
-#
+.text
+/**
+ * crc_t10dif_pcl - Calculate CRC16 per T10 DIF (data integrity format)
+ * using x86 PCLMULQDQ instructions
+ * @init_crc: initial CRC16 value (%rdi, init_crc macro);
+ * only uses lower 16 bits
+ * @buf: address of data (%rsi, buf macro);
+ * data buffer must be at least 16 bytes
+ * @len: data size (%rdx, len macro);
+ * must be >= 16
+ *
+ * This function supports 64-bit CPUs.
+ * It allows data to be at any offset.
+ *
+ * Return: (%rax) CRC16 value (upper 48 bits zero)
+ * Prototype: asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const *u8 buf, size_t len);
+ */
SYM_FUNC_START(crc_t10dif_pcl)
movdqa .Lbswap_mask(%rip), BSWAP_MASK