14
14
15
15
# Necessary to load the local gguf package
16
16
if "NO_LOCAL_GGUF" not in os .environ and (Path (__file__ ).parent .parent .parent / 'gguf-py' ).exists ():
17
- #print("add path", str(Path(__file__).parent.parent))
18
17
sys .path .insert (0 , str (Path (__file__ ).parent .parent ))
19
18
20
19
from gguf import GGUFReader , GGUFWriter , ReaderField , GGUFEndian , GGUFValueType , Keys # noqa: E402
@@ -31,87 +30,6 @@ def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
31
30
return (host_endian , file_endian )
32
31
33
32
34
- # For more information about what field.parts and field.data represent,
35
- # please see the comments in the modify_gguf.py example.
36
- def dump_metadata (reader : GGUFReader , args : argparse .Namespace ) -> None :
37
- host_endian , file_endian = get_file_host_endian (reader )
38
- print (f'* File is { file_endian } endian, script is running on a { host_endian } endian host.' )
39
- print (f'\n * Dumping { len (reader .fields )} key/value pair(s)' )
40
- for n , field in enumerate (reader .fields .values (), 1 ):
41
- if not field .types :
42
- pretty_type = 'N/A'
43
- elif field .types [0 ] == GGUFValueType .ARRAY :
44
- nest_count = len (field .types ) - 1
45
- pretty_type = '[' * nest_count + str (field .types [- 1 ].name ) + ']' * nest_count
46
- else :
47
- pretty_type = str (field .types [- 1 ].name )
48
- print (f' { n :5} : { pretty_type :11} | { len (field .data ):8} | { field .name } ' , end = '' )
49
- if len (field .types ) == 1 :
50
- curr_type = field .types [0 ]
51
- if curr_type == GGUFValueType .STRING :
52
- if not field .name [0 ] == Keys .General .FILE_MARK :
53
- print (' = {0}' .format (repr (str (bytes (field .parts [- 1 ]), encoding = 'utf8' )[:60 ])), end = '' )
54
- else :
55
- print (' = binary data' , end = '' )
56
- elif field .types [0 ] in reader .gguf_scalar_to_np :
57
- print (' = {0}' .format (field .parts [- 1 ][0 ]), end = '' )
58
- print ()
59
- if args .no_tensors :
60
- return
61
- print (f'\n * Dumping { len (reader .tensors )} tensor(s)' )
62
- for n , tensor in enumerate (reader .tensors , 1 ):
63
- prettydims = ', ' .join ('{0:5}' .format (d ) for d in list (tensor .shape ) + [1 ] * (4 - len (tensor .shape )))
64
- print (f' { n :5} : { tensor .n_elements :10} | { prettydims } | { tensor .tensor_type .name :7} | { tensor .name } ' )
65
-
66
-
67
- def dump_metadata_json (reader : GGUFReader , args : argparse .Namespace ) -> None :
68
- import json
69
- host_endian , file_endian = get_file_host_endian (reader )
70
- metadata : dict [str , Any ] = {}
71
- tensors : dict [str , Any ] = {}
72
- result = {
73
- "filename" : args .input ,
74
- "endian" : file_endian ,
75
- "metadata" : metadata ,
76
- "tensors" : tensors ,
77
- }
78
- for idx , field in enumerate (reader .fields .values ()):
79
- curr : dict [str , Any ] = {
80
- "index" : idx ,
81
- "type" : field .types [0 ].name if field .types else 'UNKNOWN' ,
82
- "offset" : field .offset ,
83
- }
84
- metadata [field .name ] = curr
85
- if field .types [:1 ] == [GGUFValueType .ARRAY ]:
86
- curr ["array_types" ] = [t .name for t in field .types ][1 :]
87
- if not args .json_array :
88
- continue
89
- itype = field .types [- 1 ]
90
- if itype == GGUFValueType .STRING :
91
- if not field .name [0 ] == Keys .General .FILE_MARK :
92
- curr ["value" ] = [str (bytes (field .parts [idx ]), encoding = "utf-8" ) for idx in field .data ]
93
- else :
94
- curr ["value" ] = [bytes (field .parts [idx ]) for idx in field .data ]
95
- else :
96
- curr ["value" ] = [pv for idx in field .data for pv in field .parts [idx ].tolist ()]
97
- elif field .types [0 ] == GGUFValueType .STRING :
98
- if not field .name [0 ] == Keys .General .FILE_MARK :
99
- curr ["value" ] = str (bytes (field .parts [- 1 ]), encoding = "utf-8" )
100
- else :
101
- curr ["value" ] = bytes (field .parts [- 1 ])
102
- else :
103
- curr ["value" ] = field .parts [- 1 ].tolist ()[0 ]
104
- if not args .no_tensors :
105
- for idx , tensor in enumerate (reader .tensors ):
106
- tensors [tensor .name ] = {
107
- "index" : idx ,
108
- "shape" : tensor .shape .tolist (),
109
- "type" : tensor .tensor_type .name ,
110
- "offset" : tensor .field .offset ,
111
- }
112
- json .dump (result , sys .stdout )
113
-
114
-
115
33
def get_byteorder (reader : GGUFReader ) -> GGUFEndian :
116
34
if np .uint32 (1 ) == np .uint32 (1 ).newbyteorder ("<" ):
117
35
# Host is little endian
@@ -215,9 +133,6 @@ def main() -> None:
215
133
parser .add_argument ("input" , type = str , help = "GGUF format model input filename" )
216
134
parser .add_argument ("output" , type = str , help = "GGUF format model output filename" )
217
135
parser .add_argument ("addfiles" , type = str , nargs = '+' , help = "add filenames ..." )
218
- parser .add_argument ("--no-tensors" , action = "store_true" , help = "Don't dump tensor metadata" )
219
- parser .add_argument ("--json" , action = "store_true" , help = "Produce JSON output" )
220
- parser .add_argument ("--json-array" , action = "store_true" , help = "Include full array values in JSON output (long)" )
221
136
parser .add_argument ("--verbose" , action = "store_true" , help = "Increase output verbosity" )
222
137
args = parser .parse_args (None if len (sys .argv ) > 1 else ["--help" ])
223
138
logging .basicConfig (level = logging .DEBUG if args .verbose else logging .INFO )
@@ -244,15 +159,6 @@ def main() -> None:
244
159
logger .info (f'* Adding: { key } = { path } ' )
245
160
copy_with_new_metadata (reader , writer , new_metadata )
246
161
247
- if args .json :
248
- dump_metadata_json (reader , args )
249
- else :
250
- dump_metadata (reader , args )
251
-
252
- logger .info (f'* Reading: { args .output } ' )
253
- reader = GGUFReader (args .output , 'r' )
254
- dump_metadata (reader , args )
255
-
256
162
257
163
if __name__ == '__main__' :
258
164
main ()
0 commit comments