support/testing/tests/package/test_llama_cpp.py - buildroot - Git at Google

 import os

 import infra.basetest


 class TestLlamaCpp(infra.basetest.BRTest):
     config = """
         BR2_aarch64=y
         BR2_TOOLCHAIN_EXTERNAL=y
         BR2_TOOLCHAIN_EXTERNAL_BOOTLIN=y
         BR2_SYSTEM_DHCP="eth0"
         BR2_LINUX_KERNEL=y
         BR2_LINUX_KERNEL_CUSTOM_VERSION=y
         BR2_LINUX_KERNEL_CUSTOM_VERSION_VALUE="6.12.55"
         BR2_LINUX_KERNEL_USE_CUSTOM_CONFIG=y
         BR2_LINUX_KERNEL_CUSTOM_CONFIG_FILE="board/qemu/aarch64-virt/linux.config"
         BR2_PACKAGE_CA_CERTIFICATES=y
         BR2_PACKAGE_OPENSSL=y
         BR2_PACKAGE_LIBCURL=y
         BR2_PACKAGE_LIBCURL_CURL=y
         BR2_PACKAGE_LLAMA_CPP=y
         BR2_PACKAGE_LLAMA_CPP_TOOLS=y
         BR2_TARGET_GENERIC_GETTY_PORT="ttyAMA0"
         BR2_TARGET_ROOTFS_EXT2=y
         BR2_TARGET_ROOTFS_EXT2_SIZE="1024M"
         # BR2_TARGET_ROOTFS_TAR is not set
     """

     def login(self):
         img = os.path.join(self.builddir, "images", "rootfs.ext2")
         kern = os.path.join(self.builddir, "images", "Image")
         self.emulator.boot(
             arch="aarch64",
             kernel=kern,
             kernel_cmdline=["root=/dev/vda", "console=ttyAMA0"],
             options=[
                 "-M", "virt",
                 "-cpu", "cortex-a57",
                 "-smp", "4",
                 "-m", "2G",
                 "-drive", f"file={img},if=virtio,format=raw",
                 "-net", "nic,model=virtio",
                 "-net", "user"
             ]
         )
         self.emulator.login()

     def test_run(self):
         self.login()

         # Check the program can execute.
         self.assertRunOk("llama-cli --version")

         # We define a Hugging Face model to be downloaded.
         # We choose a relatively small model, for testing.
         hf_model = "ggml-org/gemma-3-270m-it-GGUF"

         # We define a common knowledge question to ask to the model.
         prompt = "What is the capital of the United Kingdom?"

         # We define an expected keyword, to be present in the answer.
         expected_answer = "london"

         # We set few llama-cli options:
         # We don't want an interactive session
         llama_opts = "--single-turn"
         llama_opts += " --no-display-prompt"
         # We set a fixed seed, to reduce variability of the test
         llama_opts += " --seed 123456789"
         llama_opts += f" --hf-repo {hf_model}"
         llama_opts += f" --prompt '{prompt}'"

         # Run the command. We suppress all stderr output logs to get
         # only the answer. Remove the redirection for debugging.
         cmd = f"llama-cli {llama_opts} 2>/dev/null"
         out, ret = self.emulator.run(cmd, timeout=60)
         self.assertEqual(ret, 0)
         out_str = "\n".join(out).lower()
         self.assertIn(expected_answer, out_str)
	import os

	import infra.basetest


	class TestLlamaCpp(infra.basetest.BRTest):
	config = """
	BR2_aarch64=y
	BR2_TOOLCHAIN_EXTERNAL=y
	BR2_TOOLCHAIN_EXTERNAL_BOOTLIN=y
	BR2_SYSTEM_DHCP="eth0"
	BR2_LINUX_KERNEL=y
	BR2_LINUX_KERNEL_CUSTOM_VERSION=y
	BR2_LINUX_KERNEL_CUSTOM_VERSION_VALUE="6.12.55"
	BR2_LINUX_KERNEL_USE_CUSTOM_CONFIG=y
	BR2_LINUX_KERNEL_CUSTOM_CONFIG_FILE="board/qemu/aarch64-virt/linux.config"
	BR2_PACKAGE_CA_CERTIFICATES=y
	BR2_PACKAGE_OPENSSL=y
	BR2_PACKAGE_LIBCURL=y
	BR2_PACKAGE_LIBCURL_CURL=y
	BR2_PACKAGE_LLAMA_CPP=y
	BR2_PACKAGE_LLAMA_CPP_TOOLS=y
	BR2_TARGET_GENERIC_GETTY_PORT="ttyAMA0"
	BR2_TARGET_ROOTFS_EXT2=y
	BR2_TARGET_ROOTFS_EXT2_SIZE="1024M"
	# BR2_TARGET_ROOTFS_TAR is not set
	"""

	def login(self):
	img = os.path.join(self.builddir, "images", "rootfs.ext2")
	kern = os.path.join(self.builddir, "images", "Image")
	self.emulator.boot(
	arch="aarch64",
	kernel=kern,
	kernel_cmdline=["root=/dev/vda", "console=ttyAMA0"],
	options=[
	"-M", "virt",
	"-cpu", "cortex-a57",
	"-smp", "4",
	"-m", "2G",
	"-drive", f"file={img},if=virtio,format=raw",
	"-net", "nic,model=virtio",
	"-net", "user"
	]
	)
	self.emulator.login()

	def test_run(self):
	self.login()

	# Check the program can execute.
	self.assertRunOk("llama-cli --version")

	# We define a Hugging Face model to be downloaded.
	# We choose a relatively small model, for testing.
	hf_model = "ggml-org/gemma-3-270m-it-GGUF"

	# We define a common knowledge question to ask to the model.
	prompt = "What is the capital of the United Kingdom?"

	# We define an expected keyword, to be present in the answer.
	expected_answer = "london"

	# We set few llama-cli options:
	# We don't want an interactive session
	llama_opts = "--single-turn"
	llama_opts += " --no-display-prompt"
	# We set a fixed seed, to reduce variability of the test
	llama_opts += " --seed 123456789"
	llama_opts += f" --hf-repo {hf_model}"
	llama_opts += f" --prompt '{prompt}'"

	# Run the command. We suppress all stderr output logs to get
	# only the answer. Remove the redirection for debugging.
	cmd = f"llama-cli {llama_opts} 2>/dev/null"
	out, ret = self.emulator.run(cmd, timeout=60)
	self.assertEqual(ret, 0)
	out_str = "\n".join(out).lower()
	self.assertIn(expected_answer, out_str)