Lokales Llama mit GPU Support und FineTuning

Aufgabenstellung

Ein lokales LLM als KI ohne Verbindung ins Internet
Einlesen (FineTuning) von persönlichen Dokumenten vom NAS
Nutzung dieser Informationen im ChatBot

Um die Nvidia zu nutzen kommt aktuell nur eine Windowsmaschine in Frage. Dank wsl kann aber hier auf Ubuntu gesetzt werden, so dass ein favorisiertes Betriebssystem genutzt werden kann.

Es gibt ein komplettes Howto unter https://dev.to/docteurrs/installing-privategpt-on-wsl-with-gpu-support-1m2a welches alle Schritte erläutert.

Die eigentliche Quelle ist: git clone https://github.com/zylon-ai/private-gpt

Die einzigen Anpassungen die (bei mir) nötig waren:

sudo apt install nvidia-cuda-toolkit

CMAKE_ARGS='-DLLAMA_CUBLAS=OFF -DGGML_CUDA=ON' poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python

oetry install --extras ui --extras llms-llama-cpp --extras vector-stores-qdrant --extras embeddings-huggingface

Danach kann mit:

make ingest /[Quellverzeichnis]/ -- --watch --log-file ./ingest.log ein Quellverzeichnis "überwacht" werden und entsprechend die Dokumente dort eingelesen werden.

Start

make run

und http://127.0.0.1:8001

Office Support

Da im Original nicht alle Officedateien unterstützt werden, habe ich ein wenig Angepasst.

poetry add python-pptx
poetry add openpyxl

/private-gpt/private_gpt/components/ingest/ingest_helper.py

import logging

from pathlib import Path

from typing import List, Union

from openpyxl import load_workbook

from openpyxl.utils.exceptions import InvalidFileException

from llama_index.core.readers import StringIterableReader

from llama_index.core.readers.base import BaseReader

from llama_index.core.readers.json import JSONReader

from llama_index.core.schema import Document

logger = logging.getLogger(__name__)

# Inspired by the `llama_index.core.readers.file.base` module

def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]:

try:

from llama_index.readers.file.docs import DocxReader, PDFReader # type: ignore

from llama_index.readers.file.slides import PptxReader # type: ignore

from llama_index.readers.file.tabular import PandasCSVReader # type: ignore

except ImportError as e:

raise ImportError("`llama-index-readers-file` package not found") from e

return {

".pdf": PDFReader,

".docx": DocxReader,

".pptx": PptxReader,

".csv": PandasCSVReader,

}

# Patching the default file reader to support other file types

FILE_READER_CLS = _try_loading_included_file_formats()

FILE_READER_CLS.update(

{

".json": JSONReader,

}

)

class IngestionHelper:

"""Helper class to transform a file or text into a list of documents."""

@staticmethod

def transform_file_into_documents(

file_name: str, file_data: Union[str, Path]

) -> List[Document]:

# Handle raw text

if isinstance(file_data, str) and "\n" in file_data:

logger.info("Detected raw text input; processing as text content.")

return IngestionHelper._handle_raw_text(file_data)

# Handle file paths

if isinstance(file_data, (str, Path)):

try:

file_data_path = Path(file_data).resolve(strict=True)

return IngestionHelper._load_file_to_documents(file_name, file_data_path)

except FileNotFoundError:

logger.error(f"File not found: {file_data}")

except OSError as e:

logger.error(f"Error accessing file: {file_data} - {e}")

return []

@staticmethod

def _handle_raw_text(raw_text: str) -> List[Document]:

"""Handle cases where the input is raw text instead of a file."""

string_reader = StringIterableReader()

try:

return string_reader.load_data([raw_text])

except Exception as e:

logger.error(f"Error processing raw text: {e}")

return []

@staticmethod

def _load_file_to_documents(file_name: str, file_data: Path) -> List[Document]:

logger.debug("Transforming file_name=%s into documents", file_name)

if not file_data.is_file():

logger.error(f"Path is not a file: {file_data}")

return []

extension = file_data.suffix.lower()

reader_cls = FILE_READER_CLS.get(extension)

if reader_cls is None:

logger.debug(

"No reader found for extension=%s, using default string reader",

extension,

)

try:

string_reader = StringIterableReader()

return string_reader.load_data([file_data.read_text(encoding="utf-8")])

except Exception as e:

logger.error(f"Failed to read text file {file_data}: {e}")

return []

logger.debug("Specific reader found for extension=%s", extension)

try:

documents = reader_cls().load_data(file_data)

except Exception as e:

logger.error(f"Error reading file {file_data} with extension {extension}: {e}")

documents = []

# Sanitize NUL bytes in text which can't be stored in databases

for i in range(len(documents)):

documents[i].text = documents[i].text.replace("\u0000", "")

return documents

@staticmethod

def _exclude_metadata(documents: List[Document]) -> None:

logger.debug("Excluding metadata from count=%s documents", len(documents))

for document in documents:

document.metadata["doc_id"] = document.doc_id

# We don't want the Embeddings search to receive this metadata

document.excluded_embed_metadata_keys = ["doc_id"]

# We don't want the LLM to receive these metadata in the context