| import os |
| |
| import infra.basetest |
| |
| |
| class TestLlamaCpp(infra.basetest.BRTest): |
| config = """ |
| BR2_aarch64=y |
| BR2_TOOLCHAIN_EXTERNAL=y |
| BR2_TOOLCHAIN_EXTERNAL_BOOTLIN=y |
| BR2_SYSTEM_DHCP="eth0" |
| BR2_LINUX_KERNEL=y |
| BR2_LINUX_KERNEL_CUSTOM_VERSION=y |
| BR2_LINUX_KERNEL_CUSTOM_VERSION_VALUE="6.12.55" |
| BR2_LINUX_KERNEL_USE_CUSTOM_CONFIG=y |
| BR2_LINUX_KERNEL_CUSTOM_CONFIG_FILE="board/qemu/aarch64-virt/linux.config" |
| BR2_PACKAGE_CA_CERTIFICATES=y |
| BR2_PACKAGE_OPENSSL=y |
| BR2_PACKAGE_LIBCURL=y |
| BR2_PACKAGE_LIBCURL_CURL=y |
| BR2_PACKAGE_LLAMA_CPP=y |
| BR2_PACKAGE_LLAMA_CPP_TOOLS=y |
| BR2_TARGET_GENERIC_GETTY_PORT="ttyAMA0" |
| BR2_TARGET_ROOTFS_EXT2=y |
| BR2_TARGET_ROOTFS_EXT2_SIZE="1024M" |
| # BR2_TARGET_ROOTFS_TAR is not set |
| """ |
| |
| def login(self): |
| img = os.path.join(self.builddir, "images", "rootfs.ext2") |
| kern = os.path.join(self.builddir, "images", "Image") |
| self.emulator.boot( |
| arch="aarch64", |
| kernel=kern, |
| kernel_cmdline=["root=/dev/vda", "console=ttyAMA0"], |
| options=[ |
| "-M", "virt", |
| "-cpu", "cortex-a57", |
| "-smp", "4", |
| "-m", "2G", |
| "-drive", f"file={img},if=virtio,format=raw", |
| "-net", "nic,model=virtio", |
| "-net", "user" |
| ] |
| ) |
| self.emulator.login() |
| |
| def test_run(self): |
| self.login() |
| |
| # Check the program can execute. |
| self.assertRunOk("llama-cli --version") |
| |
| # We define a Hugging Face model to be downloaded. |
| # We choose a relatively small model, for testing. |
| hf_model = "ggml-org/gemma-3-270m-it-GGUF" |
| |
| # We define a common knowledge question to ask to the model. |
| prompt = "What is the capital of the United Kingdom?" |
| |
| # We define an expected keyword, to be present in the answer. |
| expected_answer = "london" |
| |
| # We set few llama-cli options: |
| # We don't want an interactive session |
| llama_opts = "--single-turn" |
| llama_opts += " --no-display-prompt" |
| # We set a fixed seed, to reduce variability of the test |
| llama_opts += " --seed 123456789" |
| llama_opts += f" --hf-repo {hf_model}" |
| llama_opts += f" --prompt '{prompt}'" |
| |
| # Run the command. We suppress all stderr output logs to get |
| # only the answer. Remove the redirection for debugging. |
| cmd = f"llama-cli {llama_opts} 2>/dev/null" |
| out, ret = self.emulator.run(cmd, timeout=60) |
| self.assertEqual(ret, 0) |
| out_str = "\n".join(out).lower() |
| self.assertIn(expected_answer, out_str) |