aboutsummaryrefslogtreecommitdiffstats
path: root/community/go/0001-runtime-use-entire-address-space-on-32-bit.patch
blob: 20ccd45d4189dba51effb142d59c7ec706549ed2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
From 2017cb5f865afa822b2d2355eeb4d5555395143c Mon Sep 17 00:00:00 2001
From: Elias Naur <elias.naur@gmail.com>
Date: Wed, 9 Mar 2016 10:00:12 +0100
Subject: [PATCH] runtime: use entire address space on 32 bit

In issue #13992, Russ mentioned that the heap bitmap footprint was
halved but that the bitmap size calculation hadn't been updated. This
presents the opportunity to either halve the bitmap size or double
the addressable virtual space. This CL doubles the addressable virtual
space. On 32 bit this can be tweaked further to allow the bitmap to
cover the entire 4GB virtual address space, removing a failure mode
if the kernel hands out memory with a too low address.

First, fix the calculation and double _MaxArena32 to cover 4GB virtual
memory space with the same bitmap size (256 MB).

Then, allow the fallback mode for the initial memory reservation
on 32 bit (or 64 bit with too little available virtual memory) to not
include space for the arena. mheap.sysAlloc will automatically reserve
additional space when the existing arena is full.

Finally, set arena_start to 0 in 32 bit mode, so that any address is
acceptable for subsequent (additional) reservations.

Before, the bitmap was always located just before arena_start, so
fix the two places relying on that assumption: Point the otherwise unused
mheap.bitmap to one byte after the end of the bitmap, and use it for
bitmap addressing instead of arena_start.

With arena_start set to 0 on 32 bit, the cgoInRange check is no longer a
sufficient check for Go pointers. Introduce and call inHeapOrStack to
check whether a pointer is to the Go heap or stack.

While we're here, remove sysReserveHigh which seems to be unused.

Fixes #13992

Change-Id: I592b513148a50b9d3967b5c5d94b86b3ec39acc2
Reviewed-on: https://go-review.googlesource.com/20471
Reviewed-by: Austin Clements <austin@google.com>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
(cherry picked from commit e6ec82067a9068c93db6e7041017060a1c863452)
---
 src/runtime/cgocall.go |  2 +-
 src/runtime/malloc.go  | 82 +++++++++++++++++++-------------------------------
 src/runtime/mbitmap.go |  4 +--
 src/runtime/mheap.go   | 24 ++++++++++++++-
 4 files changed, 57 insertions(+), 55 deletions(-)

diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go
index fef8add..30c3a67 100644
--- a/src/runtime/cgocall.go
+++ b/src/runtime/cgocall.go
@@ -568,7 +568,7 @@ func cgoIsGoPointer(p unsafe.Pointer) bool {
 		return false
 	}
 
-	if cgoInRange(p, mheap_.arena_start, mheap_.arena_used) {
+	if inHeapOrStack(uintptr(p)) {
 		return true
 	}
 
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index b520c68..f3c6e88 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -173,7 +173,7 @@ const (
 // Page number (address>>pageShift)
 type pageID uintptr
 
-const _MaxArena32 = 2 << 30
+const _MaxArena32 = 1<<32 - 1
 
 // OS-defined helpers:
 //
@@ -229,8 +229,8 @@ func mallocinit() {
 	limit = 0
 
 	// Set up the allocation arena, a contiguous area of memory where
-	// allocated data will be found.  The arena begins with a bitmap large
-	// enough to hold 4 bits per allocated word.
+	// allocated data will be found. The arena begins with a bitmap large
+	// enough to hold 2 bits per allocated word.
 	if sys.PtrSize == 8 && (limit == 0 || limit > 1<<30) {
 		// On a 64-bit machine, allocate from a single contiguous reservation.
 		// 512 GB (MaxMem) should be big enough for now.
@@ -262,7 +262,7 @@ func mallocinit() {
 		// translation buffers, the user address space is limited to 39 bits
 		// On darwin/arm64, the address space is even smaller.
 		arenaSize := round(_MaxMem, _PageSize)
-		bitmapSize = arenaSize / (sys.PtrSize * 8 / 4)
+		bitmapSize = arenaSize / (sys.PtrSize * 8 / 2)
 		spansSize = arenaSize / _PageSize * sys.PtrSize
 		spansSize = round(spansSize, _PageSize)
 		for i := 0; i <= 0x7f; i++ {
@@ -287,32 +287,26 @@ func mallocinit() {
 		// with a giant virtual address space reservation.
 		// Instead we map the memory information bitmap
 		// immediately after the data segment, large enough
-		// to handle another 2GB of mappings (256 MB),
+		// to handle the entire 4GB address space (256 MB),
 		// along with a reservation for an initial arena.
 		// When that gets used up, we'll start asking the kernel
-		// for any memory anywhere and hope it's in the 2GB
-		// following the bitmap (presumably the executable begins
-		// near the bottom of memory, so we'll have to use up
-		// most of memory before the kernel resorts to giving out
-		// memory before the beginning of the text segment).
-		//
-		// Alternatively we could reserve 512 MB bitmap, enough
-		// for 4GB of mappings, and then accept any memory the
-		// kernel threw at us, but normally that's a waste of 512 MB
-		// of address space, which is probably too much in a 32-bit world.
+		// for any memory anywhere.
 
 		// If we fail to allocate, try again with a smaller arena.
 		// This is necessary on Android L where we share a process
 		// with ART, which reserves virtual memory aggressively.
+		// In the worst case, fall back to a 0-sized initial arena,
+		// in the hope that subsequent reservations will succeed.
 		arenaSizes := []uintptr{
 			512 << 20,
 			256 << 20,
 			128 << 20,
+			0,
 		}
 
 		for _, arenaSize := range arenaSizes {
-			bitmapSize = _MaxArena32 / (sys.PtrSize * 8 / 4)
-			spansSize = _MaxArena32 / _PageSize * sys.PtrSize
+			bitmapSize = (_MaxArena32 + 1) / (sys.PtrSize * 8 / 2)
+			spansSize = (_MaxArena32 + 1) / _PageSize * sys.PtrSize
 			if limit > 0 && arenaSize+bitmapSize+spansSize > limit {
 				bitmapSize = (limit / 9) &^ ((1 << _PageShift) - 1)
 				arenaSize = bitmapSize * 8
@@ -347,10 +341,16 @@ func mallocinit() {
 	p1 := round(p, _PageSize)
 
 	mheap_.spans = (**mspan)(unsafe.Pointer(p1))
-	mheap_.bitmap = p1 + spansSize
-	mheap_.arena_start = p1 + (spansSize + bitmapSize)
-	mheap_.arena_used = mheap_.arena_start
+	mheap_.bitmap = p1 + spansSize + bitmapSize
+	if sys.PtrSize == 4 {
+		// Set arena_start such that we can accept memory
+		// reservations located anywhere in the 4GB virtual space.
+		mheap_.arena_start = 0
+	} else {
+		mheap_.arena_start = p1 + (spansSize + bitmapSize)
+	}
 	mheap_.arena_end = p + pSize
+	mheap_.arena_used = p1 + (spansSize + bitmapSize)
 	mheap_.arena_reserved = reserved
 
 	if mheap_.arena_start&(_PageSize-1) != 0 {
@@ -364,36 +364,17 @@ func mallocinit() {
 	_g_.m.mcache = allocmcache()
 }
 
-// sysReserveHigh reserves space somewhere high in the address space.
-// sysReserve doesn't actually reserve the full amount requested on
-// 64-bit systems, because of problems with ulimit. Instead it checks
-// that it can get the first 64 kB and assumes it can grab the rest as
-// needed. This doesn't work well with the "let the kernel pick an address"
-// mode, so don't do that. Pick a high address instead.
-func sysReserveHigh(n uintptr, reserved *bool) unsafe.Pointer {
-	if sys.PtrSize == 4 {
-		return sysReserve(nil, n, reserved)
-	}
-
-	for i := 0; i <= 0x7f; i++ {
-		p := uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
-		*reserved = false
-		p = uintptr(sysReserve(unsafe.Pointer(p), n, reserved))
-		if p != 0 {
-			return unsafe.Pointer(p)
-		}
-	}
-
-	return sysReserve(nil, n, reserved)
-}
-
+// sysAlloc allocates the next n bytes from the heap arena. The
+// returned pointer is always _PageSize aligned and between
+// h.arena_start and h.arena_end. sysAlloc returns nil on failure.
+// There is no corresponding free function.
 func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
 	if n > h.arena_end-h.arena_used {
 		// We are in 32-bit mode, maybe we didn't use all possible address space yet.
 		// Reserve some more space.
 		p_size := round(n+_PageSize, 256<<20)
 		new_end := h.arena_end + p_size // Careful: can overflow
-		if h.arena_end <= new_end && new_end <= h.arena_start+_MaxArena32 {
+		if h.arena_end <= new_end && new_end-h.arena_start-1 <= _MaxArena32 {
 			// TODO: It would be bad if part of the arena
 			// is reserved and part is not.
 			var reserved bool
@@ -404,7 +385,7 @@ func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
 			if p == h.arena_end {
 				h.arena_end = new_end
 				h.arena_reserved = reserved
-			} else if h.arena_start <= p && p+p_size <= h.arena_start+_MaxArena32 {
+			} else if h.arena_start <= p && p+p_size-h.arena_start-1 <= _MaxArena32 {
 				// Keep everything page-aligned.
 				// Our pages are bigger than hardware pages.
 				h.arena_end = p + p_size
@@ -441,23 +422,22 @@ func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
 	}
 
 	// If using 64-bit, our reservation is all we have.
-	if h.arena_end-h.arena_start >= _MaxArena32 {
+	if h.arena_end-h.arena_start > _MaxArena32 {
 		return nil
 	}
 
 	// On 32-bit, once the reservation is gone we can
-	// try to get memory at a location chosen by the OS
-	// and hope that it is in the range we allocated bitmap for.
+	// try to get memory at a location chosen by the OS.
 	p_size := round(n, _PageSize) + _PageSize
 	p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
 	if p == 0 {
 		return nil
 	}
 
-	if p < h.arena_start || uintptr(p)+p_size-h.arena_start >= _MaxArena32 {
+	if p < h.arena_start || uintptr(p)+p_size-h.arena_start > _MaxArena32 {
 		top := ^uintptr(0)
-		if top-h.arena_start > _MaxArena32 {
-			top = h.arena_start + _MaxArena32
+		if top-h.arena_start-1 > _MaxArena32 {
+			top = h.arena_start + _MaxArena32 + 1
 		}
 		print("runtime: memory allocated by OS (", hex(p), ") not in usable range [", hex(h.arena_start), ",", hex(top), ")\n")
 		sysFree(unsafe.Pointer(p), p_size, &memstats.heap_sys)
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index 336d4d8..9a5d83b 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -145,7 +145,7 @@ func (h *mheap) mapBits(arena_used uintptr) {
 		return
 	}
 
-	sysMap(unsafe.Pointer(h.arena_start-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
+	sysMap(unsafe.Pointer(h.bitmap-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
 	h.bitmap_mapped = n
 }
 
@@ -166,7 +166,7 @@ type heapBits struct {
 func heapBitsForAddr(addr uintptr) heapBits {
 	// 2 bits per work, 4 pairs per byte, and a mask is hard coded.
 	off := (addr - mheap_.arena_start) / sys.PtrSize
-	return heapBits{(*uint8)(unsafe.Pointer(mheap_.arena_start - off/4 - 1)), uint32(off & 3)}
+	return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap - off/4 - 1)), uint32(off & 3)}
 }
 
 // heapBitsForSpan returns the heapBits for the span base address base.
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index a153df0..1db6a49 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -46,7 +46,7 @@ type mheap struct {
 	nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
 
 	// range of addresses we might see in the heap
-	bitmap         uintptr
+	bitmap         uintptr // Points to one byte past the end of the bitmap
 	bitmap_mapped  uintptr
 	arena_start    uintptr
 	arena_used     uintptr // always mHeap_Map{Bits,Spans} before updating
@@ -217,6 +217,28 @@ func inheap(b uintptr) bool {
 	return true
 }
 
+// inHeapOrStack is a variant of inheap that returns true for pointers into stack spans.
+//go:nowritebarrier
+//go:nosplit
+func inHeapOrStack(b uintptr) bool {
+	if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
+		return false
+	}
+	// Not a beginning of a block, consult span table to find the block beginning.
+	s := h_spans[(b-mheap_.arena_start)>>_PageShift]
+	if s == nil || b < s.base() {
+		return false
+	}
+	switch s.state {
+	case mSpanInUse:
+		return b < s.limit
+	case _MSpanStack:
+		return b < s.base()+s.npages<<_PageShift
+	default:
+		return false
+	}
+}
+
 // TODO: spanOf and spanOfUnchecked are open-coded in a lot of places.
 // Use the functions instead.
 
-- 
2.8.2