convert : fix big-endian conversion (#17431)

* Fix convert_hf_to_gguf.py script on s390x

Assume converted model data is originally little-endian.
Byteswap data on s390x after reading it to put values in correct presentation
for any transformation needed, like calculating weight tensors.

Then byteswap data to little-endian before passing it to GGUFWriter while
GGUFWriter will byteswap data back to big endian if big endian output is requested.

byteswap(inplace=True) calls don't work with lazy tensor and array wrappers.
Use byteswap with copying data to workaround this behaviour.

* Make GGUFWriter accept tensors in native endianness instead of little-endian

With this change if no byteswapping is actually needed, 2 excessive byteswaps can be omitted on s390x

* Fix byteswapping in convert_hf_to_gguf.py for remote models
This commit is contained in:
Aleksei Nikiforov
2025-11-25 14:18:16 +01:00
committed by GitHub
parent 55ab25caf5
commit 05872ac885
2 changed files with 42 additions and 6 deletions
+9 -4
View File
@@ -4,6 +4,7 @@ import logging
import os
import shutil
import struct
import sys
import tempfile
from dataclasses import dataclass
from enum import Enum, auto
@@ -372,8 +373,10 @@ class GGUFWriter:
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
raw_dtype: GGMLQuantizationType | None = None,
) -> None:
if self.endianess == GGUFEndian.BIG:
tensor.byteswap(inplace=True)
if (self.endianess == GGUFEndian.BIG and sys.byteorder != 'big') or \
(self.endianess == GGUFEndian.LITTLE and sys.byteorder != 'little'):
# Don't byteswap inplace since lazy copies cannot handle it
tensor = tensor.byteswap(inplace=False)
if self.use_temp_file and self.temp_file is None:
fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024)
fp.seek(0)
@@ -399,8 +402,10 @@ class GGUFWriter:
raise ValueError(f'Expected output file to contain tensor info or weights, got {self.state}')
assert self.fout is not None
if self.endianess == GGUFEndian.BIG:
tensor.byteswap(inplace=True)
if (self.endianess == GGUFEndian.BIG and sys.byteorder != 'big') or \
(self.endianess == GGUFEndian.LITTLE and sys.byteorder != 'little'):
# Don't byteswap inplace since lazy copies cannot handle it
tensor = tensor.byteswap(inplace=False)
file_id = -1
for i, tensors in enumerate(self.tensors):