| // SPDX-License-Identifier: GPL-2.0-or-later |
| |
| /* P9 gzip sample code for demonstrating the P9 NX hardware interface. |
| * Not intended for productive uses or for performance or compression |
| * ratio measurements. For simplicity of demonstration, this sample |
| * code compresses in to fixed Huffman blocks only (Deflate btype=1) |
| * and has very simple memory management. Dynamic Huffman blocks |
| * (Deflate btype=2) are more involved as detailed in the user guide. |
| * Note also that /dev/crypto/gzip, VAS and skiboot support are |
| * required. |
| * |
| * Copyright 2020 IBM Corp. |
| * |
| * https://github.com/libnxz/power-gzip for zlib api and other utils |
| * |
| * Author: Bulent Abali <abali@us.ibm.com> |
| * |
| * Definitions of acronyms used here. See |
| * P9 NX Gzip Accelerator User's Manual for details: |
| * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf |
| * |
| * adler/crc: 32 bit checksums appended to stream tail |
| * ce: completion extension |
| * cpb: coprocessor parameter block (metadata) |
| * crb: coprocessor request block (command) |
| * csb: coprocessor status block (status) |
| * dht: dynamic huffman table |
| * dde: data descriptor element (address, length) |
| * ddl: list of ddes |
| * dh/fh: dynamic and fixed huffman types |
| * fc: coprocessor function code |
| * histlen: history/dictionary length |
| * history: sliding window of up to 32KB of data |
| * lzcount: Deflate LZ symbol counts |
| * rembytecnt: remaining byte count |
| * sfbt: source final block type; last block's type during decomp |
| * spbc: source processed byte count |
| * subc: source unprocessed bit count |
| * tebc: target ending bit count; valid bits in the last byte |
| * tpbc: target processed byte count |
| * vas: virtual accelerator switch; the user mode interface |
| */ |
| |
| #define _ISOC11_SOURCE // For aligned_alloc() |
| #define _DEFAULT_SOURCE // For endian.h |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <unistd.h> |
| #include <stdint.h> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <sys/time.h> |
| #include <sys/fcntl.h> |
| #include <sys/mman.h> |
| #include <endian.h> |
| #include <bits/endian.h> |
| #include <sys/ioctl.h> |
| #include <assert.h> |
| #include <errno.h> |
| #include <signal.h> |
| #include "utils.h" |
| #include "nxu.h" |
| #include "nx.h" |
| |
| int nx_dbg; |
| FILE *nx_gzip_log; |
| |
| #define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) |
| #define FNAME_MAX 1024 |
| #define FEXT ".nx.gz" |
| |
| #define SYSFS_MAX_REQ_BUF_PATH "devices/vio/ibm,compression-v1/nx_gzip_caps/req_max_processed_len" |
| |
| /* |
| * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure. |
| */ |
| static int compress_fht_sample(char *src, uint32_t srclen, char *dst, |
| uint32_t dstlen, int with_count, |
| struct nx_gzip_crb_cpb_t *cmdp, void *handle) |
| { |
| uint32_t fc; |
| |
| assert(!!cmdp); |
| |
| put32(cmdp->crb, gzip_fc, 0); /* clear */ |
| fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT : |
| GZIP_FC_COMPRESS_RESUME_FHT; |
| putnn(cmdp->crb, gzip_fc, fc); |
| putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */ |
| memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb)); |
| |
| /* Section 6.6 programming notes; spbc may be in two different |
| * places depending on FC. |
| */ |
| if (!with_count) |
| put32(cmdp->cpb, out_spbc_comp, 0); |
| else |
| put32(cmdp->cpb, out_spbc_comp_with_count, 0); |
| |
| /* Figure 6-3 6-4; CSB location */ |
| put64(cmdp->crb, csb_address, 0); |
| put64(cmdp->crb, csb_address, |
| (uint64_t) &cmdp->crb.csb & csb_address_mask); |
| |
| /* Source direct dde (scatter-gather list) */ |
| clear_dde(cmdp->crb.source_dde); |
| putnn(cmdp->crb.source_dde, dde_count, 0); |
| put32(cmdp->crb.source_dde, ddebc, srclen); |
| put64(cmdp->crb.source_dde, ddead, (uint64_t) src); |
| |
| /* Target direct dde (scatter-gather list) */ |
| clear_dde(cmdp->crb.target_dde); |
| putnn(cmdp->crb.target_dde, dde_count, 0); |
| put32(cmdp->crb.target_dde, ddebc, dstlen); |
| put64(cmdp->crb.target_dde, ddead, (uint64_t) dst); |
| |
| /* Submit the crb, the job descriptor, to the accelerator */ |
| return nxu_submit_job(cmdp, handle); |
| } |
| |
| /* |
| * Prepares a blank no filename no timestamp gzip header and returns |
| * the number of bytes written to buf. |
| * Gzip specification at https://tools.ietf.org/html/rfc1952 |
| */ |
| int gzip_header_blank(char *buf) |
| { |
| int i = 0; |
| |
| buf[i++] = 0x1f; /* ID1 */ |
| buf[i++] = 0x8b; /* ID2 */ |
| buf[i++] = 0x08; /* CM */ |
| buf[i++] = 0x00; /* FLG */ |
| buf[i++] = 0x00; /* MTIME */ |
| buf[i++] = 0x00; /* MTIME */ |
| buf[i++] = 0x00; /* MTIME */ |
| buf[i++] = 0x00; /* MTIME */ |
| buf[i++] = 0x04; /* XFL 4=fastest */ |
| buf[i++] = 0x03; /* OS UNIX */ |
| |
| return i; |
| } |
| |
| /* |
| * Z_SYNC_FLUSH as described in zlib.h. |
| * Returns number of appended bytes |
| */ |
| int append_sync_flush(char *buf, int tebc, int final) |
| { |
| uint64_t flush; |
| int shift = (tebc & 0x7); |
| |
| if (tebc > 0) { |
| /* Last byte is partially full */ |
| buf = buf - 1; |
| *buf = *buf & (unsigned char) ((1<<tebc)-1); |
| } else |
| *buf = 0; |
| flush = ((0x1ULL & final) << shift) | *buf; |
| shift = shift + 3; /* BFINAL and BTYPE written */ |
| shift = (shift <= 8) ? 8 : 16; |
| flush |= (0xFFFF0000ULL) << shift; /* Zero length block */ |
| shift = shift + 32; |
| while (shift > 0) { |
| *buf++ = (unsigned char) (flush & 0xffULL); |
| flush = flush >> 8; |
| shift = shift - 8; |
| } |
| return(((tebc > 5) || (tebc == 0)) ? 5 : 4); |
| } |
| |
| /* |
| * Final deflate block bit. This call assumes the block |
| * beginning is byte aligned. |
| */ |
| static void set_bfinal(void *buf, int bfinal) |
| { |
| char *b = buf; |
| |
| if (bfinal) |
| *b = *b | (unsigned char) 0x01; |
| else |
| *b = *b & (unsigned char) 0xfe; |
| } |
| |
| int compress_file(int argc, char **argv, void *handle) |
| { |
| char *inbuf, *outbuf, *srcbuf, *dstbuf; |
| char outname[FNAME_MAX]; |
| uint32_t srclen, dstlen; |
| uint32_t flushlen, chunk; |
| size_t inlen, outlen, dsttotlen, srctotlen; |
| uint32_t crc, spbc, tpbc, tebc; |
| int lzcounts = 0; |
| int cc; |
| int num_hdr_bytes; |
| struct nx_gzip_crb_cpb_t *cmdp; |
| uint32_t pagelen = 65536; |
| int fault_tries = NX_MAX_FAULTS; |
| char buf[32]; |
| |
| cmdp = (void *)(uintptr_t) |
| aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t), |
| sizeof(struct nx_gzip_crb_cpb_t)); |
| |
| if (argc != 2) { |
| fprintf(stderr, "usage: %s <fname>\n", argv[0]); |
| exit(-1); |
| } |
| if (read_file_alloc(argv[1], &inbuf, &inlen)) |
| exit(-1); |
| fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen); |
| |
| /* Generous output buffer for header/trailer */ |
| outlen = 2 * inlen + 1024; |
| |
| assert(NULL != (outbuf = (char *)malloc(outlen))); |
| nxu_touch_pages(outbuf, outlen, pagelen, 1); |
| |
| /* |
| * On PowerVM, the hypervisor defines the maximum request buffer |
| * size is defined and this value is available via sysfs. |
| */ |
| if (!read_sysfs_file(SYSFS_MAX_REQ_BUF_PATH, buf, sizeof(buf))) { |
| chunk = atoi(buf); |
| } else { |
| /* sysfs entry is not available on PowerNV */ |
| /* Compress piecemeal in smallish chunks */ |
| chunk = 1<<22; |
| } |
| |
| /* Write the gzip header to the stream */ |
| num_hdr_bytes = gzip_header_blank(outbuf); |
| dstbuf = outbuf + num_hdr_bytes; |
| outlen = outlen - num_hdr_bytes; |
| dsttotlen = num_hdr_bytes; |
| |
| srcbuf = inbuf; |
| srctotlen = 0; |
| |
| /* Init the CRB, the coprocessor request block */ |
| memset(&cmdp->crb, 0, sizeof(cmdp->crb)); |
| |
| /* Initial gzip crc32 */ |
| put32(cmdp->cpb, in_crc, 0); |
| |
| while (inlen > 0) { |
| |
| /* Submit chunk size source data per job */ |
| srclen = NX_MIN(chunk, inlen); |
| /* Supply large target in case data expands */ |
| dstlen = NX_MIN(2*srclen, outlen); |
| |
| /* Page faults are handled by the user code */ |
| |
| /* Fault-in pages; an improved code wouldn't touch so |
| * many pages but would try to estimate the |
| * compression ratio and adjust both the src and dst |
| * touch amounts. |
| */ |
| nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen, |
| 1); |
| nxu_touch_pages(srcbuf, srclen, pagelen, 0); |
| nxu_touch_pages(dstbuf, dstlen, pagelen, 1); |
| |
| cc = compress_fht_sample( |
| srcbuf, srclen, |
| dstbuf, dstlen, |
| lzcounts, cmdp, handle); |
| |
| if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC && |
| cc != ERR_NX_AT_FAULT) { |
| fprintf(stderr, "nx error: cc= %d\n", cc); |
| exit(-1); |
| } |
| |
| /* Page faults are handled by the user code */ |
| if (cc == ERR_NX_AT_FAULT) { |
| NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc)); |
| NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n", |
| fault_tries, |
| (unsigned long long) cmdp->crb.csb.fsaddr)); |
| fault_tries--; |
| if (fault_tries > 0) { |
| continue; |
| } else { |
| fprintf(stderr, "error: cannot progress; "); |
| fprintf(stderr, "too many faults\n"); |
| exit(-1); |
| } |
| } |
| |
| fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */ |
| |
| inlen = inlen - srclen; |
| srcbuf = srcbuf + srclen; |
| srctotlen = srctotlen + srclen; |
| |
| /* Two possible locations for spbc depending on the function |
| * code. |
| */ |
| spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) : |
| get32(cmdp->cpb, out_spbc_comp_with_count); |
| assert(spbc == srclen); |
| |
| /* Target byte count */ |
| tpbc = get32(cmdp->crb.csb, tpbc); |
| /* Target ending bit count */ |
| tebc = getnn(cmdp->cpb, out_tebc); |
| NXPRT(fprintf(stderr, "compressed chunk %d ", spbc)); |
| NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc)); |
| |
| if (inlen > 0) { /* More chunks to go */ |
| set_bfinal(dstbuf, 0); |
| dstbuf = dstbuf + tpbc; |
| dsttotlen = dsttotlen + tpbc; |
| outlen = outlen - tpbc; |
| /* Round up to the next byte with a flush |
| * block; do not set the BFINAqL bit. |
| */ |
| flushlen = append_sync_flush(dstbuf, tebc, 0); |
| dsttotlen = dsttotlen + flushlen; |
| outlen = outlen - flushlen; |
| dstbuf = dstbuf + flushlen; |
| NXPRT(fprintf(stderr, "added sync_flush %d bytes\n", |
| flushlen)); |
| } else { /* Done */ |
| /* Set the BFINAL bit of the last block per Deflate |
| * specification. |
| */ |
| set_bfinal(dstbuf, 1); |
| dstbuf = dstbuf + tpbc; |
| dsttotlen = dsttotlen + tpbc; |
| outlen = outlen - tpbc; |
| } |
| |
| /* Resuming crc32 for the next chunk */ |
| crc = get32(cmdp->cpb, out_crc); |
| put32(cmdp->cpb, in_crc, crc); |
| crc = be32toh(crc); |
| } |
| |
| /* Append crc32 and ISIZE to the end */ |
| memcpy(dstbuf, &crc, 4); |
| memcpy(dstbuf+4, &srctotlen, 4); |
| dsttotlen = dsttotlen + 8; |
| outlen = outlen - 8; |
| |
| assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT))); |
| strcpy(outname, argv[1]); |
| strcat(outname, FEXT); |
| if (write_file(outname, outbuf, dsttotlen)) { |
| fprintf(stderr, "write error: %s\n", outname); |
| exit(-1); |
| } |
| |
| fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen, |
| dsttotlen); |
| fprintf(stderr, "crc32 checksum = %08x\n", crc); |
| |
| if (inbuf != NULL) |
| free(inbuf); |
| |
| if (outbuf != NULL) |
| free(outbuf); |
| |
| return 0; |
| } |
| |
| int main(int argc, char **argv) |
| { |
| int rc; |
| struct sigaction act; |
| void *handle; |
| |
| nx_dbg = 0; |
| nx_gzip_log = NULL; |
| act.sa_handler = 0; |
| act.sa_sigaction = nxu_sigsegv_handler; |
| act.sa_flags = SA_SIGINFO; |
| act.sa_restorer = 0; |
| sigemptyset(&act.sa_mask); |
| sigaction(SIGSEGV, &act, NULL); |
| |
| handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0); |
| if (!handle) { |
| fprintf(stderr, "Unable to init NX, errno %d\n", errno); |
| exit(-1); |
| } |
| |
| rc = compress_file(argc, argv, handle); |
| |
| nx_function_end(handle); |
| |
| return rc; |
| } |