usr/src/cmd/sgs/common/leb128.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include <stdio.h>
#include <dwarf.h>
#include <sys/types.h>
#include <sys/elf.h>

/*
 * Little Endian Base 128 (LEB128) numbers.
 * ----------------------------------------
 *
 * LEB128 is a scheme for encoding integers densely that exploits the
 * assumption that most integers are small in magnitude. (This encoding
 * is equally suitable whether the target machine architecture represents
 * data in big-endian or little- endian
 *
 * Unsigned LEB128 numbers are encoded as follows: start at the low order
 * end of an unsigned integer and chop it into 7-bit chunks. Place each
 * chunk into the low order 7 bits of a byte. Typically, several of the
 * high order bytes will be zero; discard them. Emit the remaining bytes in
 * a stream, starting with the low order byte; set the high order bit on
 * each byte except the last emitted byte. The high bit of zero on the last
 * byte indicates to the decoder that it has encountered the last byte.
 * The integer zero is a special case, consisting of a single zero byte.
 *
 * Signed, 2s complement LEB128 numbers are encoded in a similar except
 * that the criterion for discarding high order bytes is not whether they
 * are zero, but whether they consist entirely of sign extension bits.
 * Consider the 32-bit integer -2. The three high level bytes of the number
 * are sign extension, thus LEB128 would represent it as a single byte
 * containing the low order 7 bits, with the high order bit cleared to
 * indicate the end of the byte stream.
 *
 * Note that there is nothing within the LEB128 representation that
 * indicates whether an encoded number is signed or unsigned. The decoder
 * must know what type of number to expect.
 *
 * DWARF Exception Header Encoding
 * -------------------------------
 *
 * The DWARF Exception Header Encoding is used to describe the type of data
 * used in the .eh_frame_hdr section. The upper 4 bits indicate how the
 * value is to be applied. The lower 4 bits indicate the format of the data.
 *
 * DWARF Exception Header value format
 *
 * Name		Value Meaning
 * DW_EH_PE_omit	    0xff No value is present.
 * DW_EH_PE_absptr	    0x00 Value is a void*
 * DW_EH_PE_uleb128	    0x01 Unsigned value is encoded using the
 *				 Little Endian Base 128 (LEB128)
 * DW_EH_PE_udata2	    0x02 A 2 bytes unsigned value.
 * DW_EH_PE_udata4	    0x03 A 4 bytes unsigned value.
 * DW_EH_PE_udata8	    0x04 An 8 bytes unsigned value.
 * DW_EH_PE_signed          0x08 bit on for all signed encodings
 * DW_EH_PE_sleb128	    0x09 Signed value is encoded using the
 *				 Little Endian Base 128 (LEB128)
 * DW_EH_PE_sdata2	    0x0A A 2 bytes signed value.
 * DW_EH_PE_sdata4	    0x0B A 4 bytes signed value.
 * DW_EH_PE_sdata8	    0x0C An 8 bytes signed value.
 *
 * DWARF Exception Header application
 *
 * Name	    Value Meaning
 * DW_EH_PE_absptr	   0x00 Value is used with no modification.
 * DW_EH_PE_pcrel	   0x10 Value is reletive to the location of itself
 * DW_EH_PE_textrel	   0x20
 * DW_EH_PE_datarel	   0x30 Value is reletive to the beginning of the
 *				eh_frame_hdr segment ( segment type
 *			        PT_GNU_EH_FRAME )
 * DW_EH_PE_funcrel        0x40
 * DW_EH_PE_aligned        0x50 value is an aligned void*
 * DW_EH_PE_indirect       0x80 bit to signal indirection after relocation
 * DW_EH_PE_omit	   0xff No value is present.
 *
 */

dwarf_error_t
uleb_extract(unsigned char *data, uint64_t *dotp, size_t len, uint64_t *ret)
{
	uint64_t	dot = *dotp;
	uint64_t	res = 0;
	int		more = 1;
	int		shift = 0;
	int		val;

	data += dot;

	while (more) {
		if (dot > len)
			return (DW_OVERFLOW);

		/*
		 * Pull off lower 7 bits
		 */
		val = (*data) & 0x7f;

		/*
		 * Add prepend value to head of number.
		 */
		res = res | (val << shift);

		/*
		 * Increment shift & dot pointer
		 */
		shift += 7;
		dot++;

		/*
		 * Check to see if hi bit is set - if not, this
		 * is the last byte.
		 */
		more = ((*data++) & 0x80) >> 7;
	}
	*dotp = dot;
	*ret = res;
	return (DW_SUCCESS);
}

dwarf_error_t
sleb_extract(unsigned char *data, uint64_t *dotp, size_t len, int64_t *ret)
{
	uint64_t	dot = *dotp;
	int64_t		res = 0;
	int		more = 1;
	int		shift = 0;
	int		val;

	data += dot;

	while (more) {
		if (dot > len)
			return (DW_OVERFLOW);

		/*
		 * Pull off lower 7 bits
		 */
		val = (*data) & 0x7f;

		/*
		 * Add prepend value to head of number.
		 */
		res = res | (val << shift);

		/*
		 * Increment shift & dot pointer
		 */
		shift += 7;
		dot++;

		/*
		 * Check to see if hi bit is set - if not, this
		 * is the last byte.
		 */
		more = ((*data++) & 0x80) >> 7;
	}
	*dotp = dot;

	/*
	 * Make sure value is properly sign extended.
	 */
	res = (res << (64 - shift)) >> (64 - shift);
	*ret = res;
	return (DW_SUCCESS);
}

/*
 * Extract a DWARF encoded datum
 *
 * entry:
 *	data - Base of data buffer containing encoded bytes
 *	dotp - Address of variable containing index within data
 *		at which the desired datum starts.
 *	ehe_flags - DWARF encoding
 *	eident - ELF header e_ident[] array for object being processed
 *	frame_hdr - Boolean, true if we're extracting from .eh_frame_hdr
 *	sh_base - Base address of ELF section containing desired datum
 *	sh_offset - Offset relative to sh_base of desired datum.
 *	dbase - The base address to which DW_EH_PE_datarel is relative
 *		(if frame_hdr is false)
 */
dwarf_error_t
dwarf_ehe_extract(unsigned char *data, size_t len, uint64_t *dotp,
    uint64_t *ret, uint_t ehe_flags, unsigned char *eident,
    boolean_t frame_hdr, uint64_t sh_base, uint64_t sh_offset,
    uint64_t dbase)
{
	uint64_t    dot = *dotp;
	uint_t	    lsb;
	uint_t	    wordsize;
	uint_t	    fsize;
	uint64_t    result;

	if (eident[EI_DATA] == ELFDATA2LSB)
		lsb = 1;
	else
		lsb = 0;

	if (eident[EI_CLASS] == ELFCLASS64)
		wordsize = 8;
	else
		wordsize = 4;

	switch (ehe_flags & 0x0f) {
	case DW_EH_PE_omit:
		*ret = 0;
		return (DW_SUCCESS);
	case DW_EH_PE_absptr:
		fsize = wordsize;
		break;
	case DW_EH_PE_udata8:
	case DW_EH_PE_sdata8:
		fsize = 8;
		break;
	case DW_EH_PE_udata4:
	case DW_EH_PE_sdata4:
		fsize = 4;
		break;
	case DW_EH_PE_udata2:
	case DW_EH_PE_sdata2:
		fsize = 2;
		break;
	case DW_EH_PE_uleb128:
		return (uleb_extract(data, dotp, len, ret));
	case DW_EH_PE_sleb128:
		return (sleb_extract(data, dotp, len, (int64_t *)ret));
	default:
		*ret = 0;
		return (DW_BAD_ENCODING);
	}

	if (lsb) {
		/*
		 * Extract unaligned LSB formated data
		 */
		uint_t	cnt;

		result = 0;
		for (cnt = 0; cnt < fsize;
		    cnt++, dot++) {
			uint64_t val;

			if (dot > len)
				return (DW_OVERFLOW);
			val = data[dot];
			result |= val << (cnt * 8);
		}
	} else {
		/*
		 * Extract unaligned MSB formated data
		 */
		uint_t	cnt;
		result = 0;
		for (cnt = 0; cnt < fsize;
		    cnt++, dot++) {
			uint64_t val;

			if (dot > len)
				return (DW_OVERFLOW);
			val = data[dot];
			result |= val << ((fsize - cnt - 1) * 8);
		}
	}
	/*
	 * perform sign extension
	 */
	if ((ehe_flags & DW_EH_PE_signed) &&
	    (fsize < sizeof (uint64_t))) {
		int64_t	sresult;
		uint_t	bitshift;
		sresult = result;
		bitshift = (sizeof (uint64_t) - fsize) * 8;
		sresult = (sresult << bitshift) >> bitshift;
		result = sresult;
	}

	/*
	 * If value is relative to a base address, adjust it
	 */
	switch (ehe_flags & 0xf0) {
	case DW_EH_PE_pcrel:
		result += sh_base + sh_offset;
		break;

	/*
	 * datarel is relative to .eh_frame_hdr if within .eh_frame,
	 * but GOT if not.
	 */
	case DW_EH_PE_datarel:
		if (frame_hdr)
			result += sh_base;
		else
			result += dbase;
		break;
	}

	/* Truncate the result to its specified size */
	result = (result << ((sizeof (uint64_t) - fsize) * 8)) >>
	    ((sizeof (uint64_t) - fsize) * 8);

	*dotp = dot;
	*ret = result;
	return (DW_SUCCESS);
}