1560 lines
50 KiB
C
1560 lines
50 KiB
C
static char sccsid[] = "@(#)09 1.29 src/bos/kernel/net/kern_malloc.c, sysnet, bos41J, 9511A_all 3/10/95 09:21:04";
|
|
/*
|
|
* COMPONENT_NAME: SYSNET
|
|
*
|
|
* FUNCTIONS: aix_kmem_alloc
|
|
* aix_kmem_suballoc
|
|
* kmem_alloc
|
|
* kmem_free
|
|
* kmem_suballoc
|
|
* kmeminit
|
|
* kmeminit_thread
|
|
* malloc_loan
|
|
* malloc_thread
|
|
* net_free
|
|
* net_malloc
|
|
* threadtimer
|
|
*
|
|
*
|
|
* ORIGINS: 26,27,85
|
|
*
|
|
*
|
|
* (C) COPYRIGHT International Business Machines Corp. 1988,1993
|
|
* All Rights Reserved
|
|
* Licensed Materials - Property of IBM
|
|
* US Government Users Restricted Rights - Use, duplication or
|
|
* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
|
|
*/
|
|
/*
|
|
* (c) Copyright 1990, 1991, 1992, 1993 OPEN SOFTWARE FOUNDATION, INC.
|
|
* ALL RIGHTS RESERVED
|
|
*/
|
|
/*
|
|
* OSF/1 1.2
|
|
*/
|
|
/*
|
|
* Copyright (c) 1987 Regents of the University of California.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms are permitted provided
|
|
* that: (1) source distributions retain this entire copyright notice and
|
|
* comment, and (2) distributions including binaries display the following
|
|
* acknowledgement: ``This product includes software developed by the
|
|
* University of California, Berkeley and its contributors'' in the
|
|
* documentation or other materials provided with the distribution and in
|
|
* all advertising materials mentioning features or use of this software.
|
|
* Neither the name of the University nor the names of its contributors may
|
|
* be used to endorse or promote products derived from this software without
|
|
* specific prior written permission.
|
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
|
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
*
|
|
* Base: kern_malloc.c 7.18 (Berkeley) 6/28/90
|
|
*/
|
|
|
|
|
|
|
|
void threadtimer();
|
|
void * aix_kmem_alloc();
|
|
void prime_the_buckets();
|
|
void * empty_bucket();
|
|
void * aix_kmem_suballoc();
|
|
#define kmem_alloc(map, size, cpu) aix_kmem_alloc(map, size, cpu)
|
|
#define kmem_suballoc(map, minp, maxp, size, pageable) \
|
|
aix_kmem_suballoc(minp, maxp, size)
|
|
#define kmem_free(map, addr, size) aix_kmem_free(map, addr, size)
|
|
|
|
#define thread_set_timeout(tmo) malloc_thread_set_timeout(tmo)
|
|
|
|
#define microtime(tv) { \
|
|
struct timestruc_t ct; \
|
|
\
|
|
curtime(&ct); \
|
|
(tv)->tv_sec = (int) ct.tv_sec; \
|
|
(tv)->tv_usec = (int) ct.tv_nsec / 1000; \
|
|
}
|
|
|
|
#define EVENT_NETMALLOC 0x0912
|
|
|
|
#define MACH_ASSERT 1 /* enable error checking */
|
|
|
|
#define MAXINDX 14 /* WARNING! Must change if MAXALLOCSAVE changes */
|
|
|
|
#include "sys/types.h"
|
|
#include "sys/errids.h"
|
|
#include "sys/systm.h"
|
|
#include "sys/intr.h"
|
|
#include "sys/xmalloc.h"
|
|
#include "sys/sleep.h"
|
|
#include "sys/time.h"
|
|
#include "sys/syspest.h"
|
|
#include "sys/vmker.h"
|
|
#include "net/spl.h"
|
|
#include "net/net_globals.h"
|
|
#include "net/net_malloc.h"
|
|
#include "sys/errno.h"
|
|
#include "sys/trchkid.h"
|
|
#include "sys/systemcfg.h"
|
|
|
|
extern struct heap *pinned_heap;
|
|
extern struct heap *kernel_heap;
|
|
extern u_int mbclusters, mbufs;
|
|
extern int thewall, thewall_dflt;
|
|
int allocated;
|
|
extern struct xmem mclxmemd;
|
|
simple_lock_data_t alloc_lock;
|
|
|
|
#include "sys/param.h"
|
|
#include "sys/time.h"
|
|
|
|
#include "sys/atomic_op.h" /* for fetch_and_add() */
|
|
|
|
/*
|
|
* Reference:
|
|
* "Design of a General Purpose Memory Allocator for the 4.3BSD UNIX Kernel"
|
|
* Summer Usenix '88 proceedings pp 295-301.
|
|
*
|
|
* OSF enhanced to include
|
|
* Parallelization
|
|
* Asynchronous allocation and freeing of memory buckets
|
|
* Borrowing of memory for small allocations from larger buckets
|
|
* Garbage collection and freeing of "extra" memory
|
|
* Tunable
|
|
* AIX enhancements include:
|
|
* FOR UP only:
|
|
* Serialization done via disable_ints() and enable_ints().
|
|
* Reduced granularity to:
|
|
* serialize the mallod_thread() code.
|
|
* serialize all bucket access,
|
|
* all usage structs, and all kmemstats stuff.
|
|
* Thewall enforcement (with errlogging when thewall is exceeded).
|
|
* Initial pin of memory based on actual memory size.
|
|
* Thewall initial value based on actual memory size.
|
|
*
|
|
*/
|
|
|
|
#ifdef _POWER_MP
|
|
|
|
/*
|
|
* For SMP systems, disabling the current CPU is done explicitly via splhigh()
|
|
* as was done by OSF. Thus, these lock defines only grab the locks.
|
|
*/
|
|
#define MALLOCTHREAD_LOCK(s) simple_lock(&bucket[0][0].kb_lock);
|
|
#define MALLOCTHREAD_UNLOCK(s) simple_unlock(&bucket[0][0].kb_lock);
|
|
#define KBP_LOCK(kbp) simple_lock(&kbp->kb_lock);
|
|
#define KBP_UNLOCK(kbp) simple_unlock(&kbp->kb_lock);
|
|
#else
|
|
#define MALLOCTHREAD_LOCK(s) s = disable_ints();
|
|
#define MALLOCTHREAD_UNLOCK(s) enable_ints(s);
|
|
#define BUCKET_LOCK(s) s = disable_ints();
|
|
#define BUCKET_UNLOCK(s) enable_ints(s);
|
|
#define FAST_BUCKET_LOCK(s) s = disable_ints();
|
|
#define FAST_BUCKET_UNLOCK(s) enable_ints(s);
|
|
#endif
|
|
|
|
struct kmemstats kmemstats[M_LAST];
|
|
const char kmemnames[M_LAST][KMEMNAMSZ] = INITKMEMNAMES;
|
|
|
|
|
|
/*
|
|
* bucket is a 2 dimensional array creating one bucket array structure
|
|
* for each CPU on the system...
|
|
*/
|
|
struct kmembuckets bucket[MAXCPU][MINBUCKET + 16];
|
|
struct kmemusage *kmemusage;
|
|
void *kmembase, *kmemlimit;
|
|
void *kmemfreelater;
|
|
long wantkmemmap;
|
|
vm_map_t kmemmap;
|
|
thread_t kmemthread;
|
|
|
|
/*
|
|
* Tuning parameters.
|
|
* kmempages Size (in pages) of malloc map. Configurable only
|
|
* at startup, unused after.
|
|
* kmemreserve The malloc reserve is some number of pages which
|
|
* are held as some number of elements in the largest
|
|
* bucket. This must not be larger than the bucket's
|
|
* highwater. This is verified at startup, but not if
|
|
* changed later.
|
|
* kmemgcintvl Frequency in seconds of "normal" garbage collection
|
|
* passes. Settable to 0 to disable all gc's.
|
|
* kmemgcscale Scaling factor for gc aggressiveness. Any overage
|
|
* detected in gc is reduced linearly by this slope
|
|
* until it reaches 0, then constantly until done.
|
|
*/
|
|
int kmempages = 0;
|
|
int kmemreserve; /* set in kmeminit() */
|
|
int kmemgcintvl = 2;
|
|
int kmemgcscale = 64;
|
|
|
|
/*
|
|
* RESERVE is the number of 16K chunks to keep in each 16K bucket...
|
|
*/
|
|
#define RESERVE ((kmemreserve * PAGE_SIZE) / MAXALLOCSAVE)
|
|
static void *malloc_loan(u_long, int, int);
|
|
static long pagindx, maxindx;
|
|
|
|
int nrpages;
|
|
|
|
/*
|
|
* Below is code to help debug net_malloc() abusers. These are kernel
|
|
* modules and extensions who free memory twice, pass in bogus addresses,
|
|
* muck with memory after freeing it, etc...
|
|
*
|
|
* The main check here is to verify at net_free() time, that a chunk
|
|
* being freed hasn't already been freed. This is non trivial for
|
|
* chunks < 4K. This is implemented by a 4 word bit array in a struct
|
|
* parallel to the kmem usage struct. The new struct, called police_usage
|
|
* gets allocated when a user turns on net_malloc_police via the no command.
|
|
* This bit array has one position for each chunk that makes
|
|
* up the page in question. When a chunk from the page is allocated via
|
|
* net_malloc(), the corresponding bit is reset in the bit array for that
|
|
* page. When the net_free() for that chunk happens, the bit array is
|
|
* checked to ensure that the chunk is really allocated. If not, then
|
|
* we assert, else we turn on the appropriate bit. We handle buckets
|
|
* from 32B to 2K.
|
|
*/
|
|
|
|
/*
|
|
* net_malloc_police is a runtime boolean that enables/disables policing
|
|
* net_malloc()/net_free() users and abusers. Settable via the no command.
|
|
*/
|
|
int net_malloc_police_dflt = 0;
|
|
int net_malloc_police = 0;
|
|
|
|
/*
|
|
* Part of policing the users is to keep a list of past malloc/frees.
|
|
* Memory for police_events is allocated when net_malloc_police is turned
|
|
* on via the no command. The array consists of this struct:
|
|
*/
|
|
struct police_event {
|
|
char bogus; /* Alignment is good. */
|
|
char type; /* 'M'=net_malloc, 'F'=net_free */
|
|
short size; /* Size being allocated */
|
|
void *addr; /* Memory address of chunk 'o mem */
|
|
void *caller1; /* caller of net_malloc/net_free */
|
|
void *caller2; /* caller's caller */
|
|
};
|
|
|
|
struct police_event *police_events;
|
|
int police_index;
|
|
int police_event_size;
|
|
|
|
simple_lock_data_t police_lock;
|
|
#define POLICE_LOCK(s) s = disable_lock(INTMAX, &police_lock);
|
|
#define POLICE_UNLOCK(s) unlock_enable(s, &police_lock);
|
|
|
|
struct police_usage {
|
|
ulong pu_freebits[4];
|
|
};
|
|
|
|
#define btopup(addr) \
|
|
(&police_usage[((char *)(addr) - (char *)kmembase) / PAGE_SIZE])
|
|
|
|
struct police_usage *police_usage;
|
|
|
|
/*
|
|
* Assumes the bucket lock is held.
|
|
*/
|
|
void
|
|
log_police_event(type, size, addr, caller1, caller2)
|
|
char type;
|
|
short size;
|
|
void *addr;
|
|
void *caller1;
|
|
void *caller2;
|
|
{
|
|
if (type == 'M')
|
|
TRCHKL4T(HKWD_NET_MALLOC | hkwd_net_malloc, size, addr, \
|
|
caller1, caller2);
|
|
else
|
|
TRCHKL4T(HKWD_NET_MALLOC | hkwd_net_free, size, addr, \
|
|
caller1, caller2);
|
|
police_events[police_index].type = type;
|
|
police_events[police_index].size = size;
|
|
police_events[police_index].addr = addr;
|
|
police_events[police_index].caller1 = caller1;
|
|
police_events[police_index].caller2 = caller2;
|
|
if (++police_index == police_event_size)
|
|
police_index = 0;
|
|
}
|
|
|
|
/*
|
|
* Net_malloc_police_station gets called when a user changes the
|
|
* net_malloc_police no option. The user calls the police station
|
|
* to start or stop policing of the malloc users. Get it? ar ar...
|
|
*/
|
|
net_malloc_police_station(start_policing, nop)
|
|
int *start_policing;
|
|
struct netopt *nop;
|
|
{
|
|
int size;
|
|
int indx;
|
|
int s;
|
|
|
|
if (*start_policing) {
|
|
|
|
/*
|
|
* If this is our first policing, then allocate all the
|
|
* needed memory.
|
|
*/
|
|
size = round_page(sizeof (struct police_usage) * kmempages);
|
|
if (!police_usage) {
|
|
if ((police_usage = (struct police_usage *)
|
|
xmalloc(size, PGSHIFT, pinned_heap)) == NULL)
|
|
return(ENOBUFS);
|
|
}
|
|
if (!police_events) {
|
|
|
|
/*
|
|
* The passed in value can affect the event array
|
|
* size, but keep a minimum of 1024.
|
|
*/
|
|
police_event_size = MAX(*start_policing, 1024);
|
|
if ((police_events = (struct police_event *)
|
|
xmalloc(police_event_size *
|
|
sizeof(struct police_event), PGSHIFT,
|
|
pinned_heap)) == NULL)
|
|
return(ENOBUFS);
|
|
}
|
|
lock_alloc(&police_lock, LOCK_ALLOC_PIN, KMEMSTAT_LOCK_FAMILY,
|
|
M_LAST+2);
|
|
simple_lock_init(&police_lock);
|
|
|
|
POLICE_LOCK(s);
|
|
|
|
/* Clear the event trace array. */
|
|
bzero(police_events, police_event_size *
|
|
sizeof(struct police_event));
|
|
|
|
/* Mark all buffers as malloced... */
|
|
bzero(police_usage, size);
|
|
|
|
police_index = 0;
|
|
POLICE_UNLOCK(s);
|
|
}
|
|
net_malloc_police=*start_policing;
|
|
return(0);
|
|
}
|
|
|
|
/*
|
|
* Police_net_malloc() does some sanity checking and marks the police_usage
|
|
* free bits appropriately. It also logs the event.
|
|
*
|
|
* Assumes the bucket lock for va is held!.
|
|
*/
|
|
void
|
|
police_net_malloc(va, size, type)
|
|
void *va;
|
|
long size;
|
|
int type;
|
|
{
|
|
register struct kmemusage *kup;
|
|
int s;
|
|
|
|
|
|
POLICE_LOCK(s);
|
|
if (kmembase == NULL)
|
|
panic("malloc: not initialized");
|
|
if (type <= 0 || type >= M_LAST)
|
|
panic("malloc: bogus type");
|
|
|
|
if (va != NULL) {
|
|
struct police_usage *pup;
|
|
struct kmembuckets *kbp;
|
|
void *addr;
|
|
|
|
kup = btokup(va);
|
|
kbp = &bucket[kup->ku_cpu][kup->ku_indx];
|
|
|
|
/*
|
|
* Assert that the next chunk on the free list is reasonable.
|
|
*/
|
|
addr = kbp->kb_next;
|
|
assert ((addr >= kmembase && addr < kmemlimit) || (addr == 0));
|
|
|
|
pup = btopup(va);
|
|
size = 1 << kup->ku_indx;
|
|
if (size < PAGESIZE) {
|
|
caddr_t paddr;
|
|
int i;
|
|
int elmnum;
|
|
ulong bit;
|
|
|
|
paddr = (caddr_t) ((int)va & ~(PAGESIZE-1));
|
|
elmnum = ((caddr_t)va - paddr) / size;
|
|
bit = elmnum % (sizeof(ulong)*NBPB);
|
|
i = elmnum / (sizeof(ulong)*NBPB);
|
|
pup->pu_freebits[i] &= ~(1<<bit);
|
|
}
|
|
log_police_event('M', size, va, getcaller2(), getcaller3());
|
|
}
|
|
POLICE_UNLOCK(s);
|
|
}
|
|
|
|
static const u_long addrmask[] = { 0x00000000,
|
|
0x00000001, 0x00000003, 0x00000007, 0x0000000f,
|
|
0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
|
|
0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
|
|
0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff
|
|
};
|
|
|
|
/*
|
|
* Police_net_free() does some sanity checking and verifies via the police_usage
|
|
* bit array that the chunk being freed hasn't already been freed.
|
|
* If so, it asserts, else it sets the freebits.
|
|
* Also logs the event.
|
|
*
|
|
* Assumes the bucket lock is held.
|
|
*/
|
|
void
|
|
police_net_free(addr, size, type)
|
|
void *addr;
|
|
long size;
|
|
int type;
|
|
{
|
|
register struct kmemusage *kup;
|
|
long alloc;
|
|
int s;
|
|
|
|
POLICE_LOCK(s);
|
|
log_police_event('F', size, addr, getcaller2(), getcaller3());
|
|
|
|
kup = btokup(addr);
|
|
|
|
if (type <= 0 || type >= M_LAST)
|
|
panic("free: bogus type");
|
|
if (kup->ku_indx < MINBUCKET || kup->ku_indx >= MINBUCKET+16)
|
|
panic("free: bad indx");
|
|
if (size >= PAGE_SIZE)
|
|
alloc = addrmask[pagindx];
|
|
else if (kup->ku_indx < sizeof addrmask / sizeof addrmask[0])
|
|
alloc = addrmask[kup->ku_indx];
|
|
else
|
|
panic("free: humungous PAGE_SIZE");
|
|
if (((u_long)addr & alloc) != 0) {
|
|
printf("free: unaligned addr 0x%x, size %d, type %d, mask %x\n",
|
|
addr, size, type, alloc);
|
|
panic("free: unaligned addr");
|
|
}
|
|
if (size < PAGESIZE) {
|
|
struct police_usage *pup;
|
|
caddr_t paddr;
|
|
u_long bit;
|
|
int elmnum, i;
|
|
|
|
pup = btopup(addr);
|
|
paddr = (caddr_t)((int)addr & ~(PAGESIZE-1));
|
|
elmnum = ((caddr_t)addr - paddr) / size;
|
|
bit = elmnum % (sizeof(ulong)*NBPB);
|
|
i = elmnum / (sizeof(ulong)*NBPB);
|
|
assert(!(pup->pu_freebits[i] & (1<<bit)));
|
|
pup->pu_freebits[i] |= (1<<bit);
|
|
}
|
|
POLICE_UNLOCK(s);
|
|
}
|
|
|
|
/*
|
|
* Allocate a block of memory
|
|
*/
|
|
void *
|
|
net_malloc( u_long size,
|
|
int type,
|
|
int flags)
|
|
{
|
|
register struct kmembuckets *kbp;
|
|
register struct kmemusage *kup;
|
|
long indx;
|
|
int s;
|
|
void *va;
|
|
register struct kmemstats *ksp = &kmemstats[type];
|
|
int cpu;
|
|
|
|
indx = BUCKETINDX(size);
|
|
#ifdef _POWER_MP
|
|
s = splhigh();
|
|
#endif
|
|
cpu = mycpu();
|
|
kbp = &bucket[cpu][indx];
|
|
|
|
#ifdef _POWER_MP
|
|
KBP_LOCK(kbp);
|
|
#else
|
|
FAST_BUCKET_LOCK(s);
|
|
#endif
|
|
if ((va = kbp->kb_next) == NULL) {
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(kbp);
|
|
splx(s);
|
|
#else
|
|
FAST_BUCKET_UNLOCK(s);
|
|
#endif
|
|
va = empty_bucket(size, indx, flags, ksp, cpu);
|
|
if (net_malloc_police) {
|
|
if (va) {
|
|
#ifdef _POWER_MP
|
|
s = splhigh();
|
|
KBP_LOCK(kbp);
|
|
#else
|
|
FAST_BUCKET_LOCK(s);
|
|
#endif
|
|
police_net_malloc(va, size, type);
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(kbp);
|
|
splx(s);
|
|
#else
|
|
FAST_BUCKET_UNLOCK(s);
|
|
#endif
|
|
}
|
|
}
|
|
return(va);
|
|
}
|
|
|
|
kbp->kb_next = *(void **)va;
|
|
kup = btokup(va);
|
|
#ifdef DEBUG
|
|
if (kup->ku_indx != indx)
|
|
panic("net_malloc: wrong bucket");
|
|
if (kup->ku_freecnt <= 0)
|
|
panic("net_malloc: lost data");
|
|
if (kup->ku_cpu != cpu)
|
|
panic("net_malloc: bad usage struct");
|
|
#endif /* DEBUG */
|
|
|
|
if (net_malloc_police) {
|
|
police_net_malloc(va, size, type);
|
|
}
|
|
kup->ku_freecnt--;
|
|
kbp->kb_totalfree--;
|
|
kbp->kb_calls++;
|
|
if (indx == MAXINDX && kbp->kb_totalfree < RESERVE)
|
|
et_post(EVENT_NETMALLOC, kmemthread->t_tid);
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(kbp);
|
|
#endif
|
|
fetch_and_add(&ksp->ks_memuse, 1 << indx);
|
|
out:
|
|
fetch_and_add(&ksp->ks_inuse, 1);
|
|
fetch_and_add(&ksp->ks_calls, 1);
|
|
if (ksp->ks_memuse > ksp->ks_maxused)
|
|
ksp->ks_maxused = ksp->ks_memuse;
|
|
|
|
#ifdef _POWER_MP
|
|
splx(s);
|
|
#else
|
|
FAST_BUCKET_UNLOCK(s);
|
|
#endif
|
|
return va;
|
|
}
|
|
|
|
void *
|
|
empty_bucket(size, indx, flags, ksp, cpu)
|
|
u_long size;
|
|
long indx;
|
|
int flags;
|
|
struct kmemstats *ksp;
|
|
int cpu;
|
|
{
|
|
register struct kmembuckets *kbp;
|
|
register struct kmemusage *kup;
|
|
long allocsize;
|
|
int s, s1;
|
|
void *va, *cp, *savedlist;
|
|
|
|
kbp = &bucket[cpu][indx];
|
|
#ifdef _POWER_MP
|
|
s = splhigh();
|
|
again:
|
|
KBP_LOCK(kbp);
|
|
#else
|
|
again:
|
|
BUCKET_LOCK(s);
|
|
#endif
|
|
if (kbp->kb_next == NULL) {
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(kbp);
|
|
#endif
|
|
if (size > MAXALLOCSAVE)
|
|
allocsize = round_page(size);
|
|
else
|
|
allocsize = 1 << indx;
|
|
size = round_page(allocsize);
|
|
if ((va = malloc_loan(size, flags, cpu)) == NULL) {
|
|
|
|
/*
|
|
* Failed once at least. Each time we fail, double
|
|
* kmemreserve until it reaches the highwat mark of
|
|
* the MAXALLOCSAVE bucket...
|
|
*
|
|
*/
|
|
kmemreserve = MIN(kmemreserve*2,
|
|
(bucket[cpu][ maxindx].kb_highwat *
|
|
MAXALLOCSAVE / PAGE_SIZE));
|
|
if (flags & M_NOWAIT) {
|
|
if (allocsize <= MAXALLOCSAVE)
|
|
STATS_ACTION(&kbp->kb_lock, kbp->kb_failed++);
|
|
STATS_ACTION(&ksp->ks_lock, ksp->ks_failed++);
|
|
#ifdef _POWER_MP
|
|
splx(s);
|
|
#else
|
|
BUCKET_UNLOCK(s);
|
|
#endif
|
|
return (0);
|
|
}
|
|
#ifdef _POWER_MP
|
|
splx(s);
|
|
#else
|
|
BUCKET_UNLOCK(s);
|
|
#endif
|
|
va = (void *)kmem_alloc(kmemmap, size, cpu);
|
|
#ifdef _POWER_MP
|
|
s = splhigh();
|
|
#else
|
|
BUCKET_LOCK(s);
|
|
#endif
|
|
if (va == NULL) {
|
|
STATS_ACTION(&ksp->ks_lock,ksp->ks_mapblocks++);
|
|
MALLOCTHREAD_LOCK(s1);
|
|
wantkmemmap |= (1 << indx);
|
|
assert_wait_mesg((int)&wantkmemmap,
|
|
FALSE, "memory");
|
|
MALLOCTHREAD_UNLOCK(s1);
|
|
#ifndef _POWER_MP
|
|
BUCKET_UNLOCK(s);
|
|
#endif
|
|
et_post(EVENT_NETMALLOC, kmemthread->t_tid);
|
|
thread_block();
|
|
goto again;
|
|
}
|
|
}
|
|
kup = btokup(va);
|
|
kup->ku_indx = indx;
|
|
if (allocsize > MAXALLOCSAVE) {
|
|
if ((size = allocsize / PAGE_SIZE) > USHRT_MAX)
|
|
panic("malloc: allocation too large");
|
|
kup->ku_pagecnt = size;
|
|
fetch_and_add(&ksp->ks_memuse, allocsize);
|
|
goto out;
|
|
}
|
|
#ifdef _POWER_MP
|
|
KBP_LOCK(kbp);
|
|
#endif
|
|
kup->ku_freecnt = kbp->kb_elmpercl;
|
|
kbp->kb_total += kbp->kb_elmpercl;
|
|
kbp->kb_totalfree += kbp->kb_elmpercl;
|
|
/*
|
|
* Just in case we blocked while allocating memory,
|
|
* and someone else also allocated memory for this
|
|
* bucket, don't assume the list is still empty.
|
|
*/
|
|
savedlist = kbp->kb_next;
|
|
kbp->kb_next = (char *)va + size - allocsize;
|
|
for (cp = kbp->kb_next; cp > va; cp = (char *)cp - allocsize)
|
|
*(void **)cp = (char *)cp - allocsize;
|
|
*(void **)cp = savedlist;
|
|
/* Note: two locks held. Safe, and more efficient. */
|
|
MALLOCTHREAD_LOCK(s1);
|
|
if (wantkmemmap & (1 << indx)) {
|
|
wantkmemmap &= ~(1 << indx);
|
|
MALLOCTHREAD_UNLOCK(s1);
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(kbp);
|
|
#else
|
|
BUCKET_UNLOCK(s);
|
|
#endif
|
|
thread_wakeup((int)&wantkmemmap);
|
|
goto again;
|
|
}
|
|
MALLOCTHREAD_UNLOCK(s1);
|
|
}
|
|
va = kbp->kb_next;
|
|
|
|
kbp->kb_next = *(void **)va;
|
|
kup = btokup(va);
|
|
#ifdef DEBUG
|
|
if (kup->ku_indx != indx)
|
|
panic("empty_bucket: wrong bucket");
|
|
if (kup->ku_freecnt <= 0)
|
|
panic("empty_bucket: lost data");
|
|
if (kup->ku_cpu != cpu)
|
|
panic("empty_bucket: bad usage struct");
|
|
#endif /* DEBUG */
|
|
kup->ku_freecnt--;
|
|
kbp->kb_totalfree--;
|
|
kbp->kb_calls++;
|
|
if (indx == maxindx && kbp->kb_totalfree < RESERVE)
|
|
et_post(EVENT_NETMALLOC, kmemthread->t_tid);
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(kbp);
|
|
#endif
|
|
fetch_and_add(&ksp->ks_memuse, 1 << indx);
|
|
out:
|
|
fetch_and_add(&ksp->ks_inuse, 1);
|
|
fetch_and_add(&ksp->ks_calls, 1);
|
|
if (ksp->ks_memuse > ksp->ks_maxused)
|
|
ksp->ks_maxused = ksp->ks_memuse;
|
|
#ifdef _POWER_MP
|
|
splx(s);
|
|
#else
|
|
BUCKET_UNLOCK(s);
|
|
#endif
|
|
return va;
|
|
}
|
|
|
|
|
|
/*
|
|
* Free a block of memory allocated by malloc.
|
|
*/
|
|
void
|
|
net_free( void * addr,
|
|
int type)
|
|
{
|
|
register struct kmembuckets *kbp;
|
|
register struct kmemusage *kup;
|
|
long indx, size;
|
|
int s, wake;
|
|
register struct kmemstats *ksp = &kmemstats[type];
|
|
|
|
if (addr < kmembase || addr >= kmemlimit)
|
|
panic("free: bad addr");
|
|
kup = btokup(addr);
|
|
indx = kup->ku_indx;
|
|
size = 1 << indx;
|
|
|
|
#ifndef _POWER_MP
|
|
FAST_BUCKET_LOCK(s);
|
|
#else
|
|
s = splhigh();
|
|
#endif
|
|
if (size > MAXALLOCSAVE) {
|
|
large_free(addr);
|
|
goto out;
|
|
}
|
|
kbp = &bucket[kup->ku_cpu][indx];
|
|
#ifdef _POWER_MP
|
|
KBP_LOCK(kbp);
|
|
#endif
|
|
|
|
if (net_malloc_police) {
|
|
police_net_free(addr, size, type);
|
|
}
|
|
*(void **)addr = kbp->kb_next;
|
|
kbp->kb_next = addr;
|
|
kbp->kb_totalfree++;
|
|
kup->ku_freecnt++;
|
|
|
|
/*
|
|
* If we're dealing with a bucket < 4K, and there's at least
|
|
* a page worth of elements free, then try and coalesce...
|
|
*/
|
|
if ((kup->ku_freecnt >= kbp->kb_elmpercl) &&
|
|
(kbp->kb_elmpercl > 1))
|
|
coalesce(kup, kbp, indx, addr);
|
|
#ifdef _POWER_MP
|
|
else
|
|
KBP_UNLOCK(kbp);
|
|
#endif
|
|
out:
|
|
fetch_and_add(&ksp->ks_memuse, -size);
|
|
fetch_and_add(&ksp->ks_inuse, -1);
|
|
#ifndef _POWER_MP
|
|
FAST_BUCKET_UNLOCK(s);
|
|
#endif
|
|
if (wantkmemmap & (1 << indx))
|
|
thread_wakeup((int)&wantkmemmap);
|
|
#ifdef _POWER_MP
|
|
splx(s);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* MP: assume we're at splhigh()...
|
|
*/
|
|
large_free(addr)
|
|
void * addr;
|
|
{
|
|
long alloc;
|
|
long size;
|
|
int s1;
|
|
register struct kmemusage *kup;
|
|
|
|
kup = btokup(addr);
|
|
size = kup->ku_pagecnt * PAGE_SIZE;
|
|
#if DEBUG
|
|
for (alloc = kup->ku_pagecnt; --alloc >= 0; ) {
|
|
if (alloc && kup[alloc].ku_indx >= 0)
|
|
panic("free: overlap");
|
|
}
|
|
#endif
|
|
/*
|
|
* Cannot call kmem_free in interrupt context,
|
|
* so play it safe and let kmemthread do it.
|
|
kmem_free(kmemmap, (vm_offset_t)addr, size);
|
|
*/
|
|
*(int *)((void **)addr + 1) = size;
|
|
MALLOCTHREAD_LOCK(s1);
|
|
*(void **)addr = kmemfreelater;
|
|
kmemfreelater = addr;
|
|
MALLOCTHREAD_UNLOCK(s1);
|
|
if (kmemthread)
|
|
et_post(EVENT_NETMALLOC, kmemthread->t_tid);
|
|
}
|
|
|
|
|
|
/*
|
|
* MP assume bucket lock for kbp is held. Will be released before
|
|
* returning from coalesce...
|
|
*/
|
|
coalesce(kup, kbp, indx, addr)
|
|
struct kmembuckets *kbp;
|
|
struct kmemusage *kup;
|
|
long indx;
|
|
void * addr;
|
|
{
|
|
|
|
if (kup->ku_freecnt > kbp->kb_elmpercl)
|
|
panic("free: multiple frees");
|
|
|
|
if (kbp->kb_totalfree > kbp->kb_highwat) {
|
|
|
|
/*
|
|
* Bump small, free buckets up to pages.
|
|
*/
|
|
register void *q, *p = kbp->kb_next;
|
|
register void **pp = &kbp->kb_next;
|
|
register struct kmembuckets *pkbp =
|
|
&bucket[kup->ku_cpu][pagindx];
|
|
do {
|
|
q = *(void **)p;
|
|
if (btokup(p) == kup)
|
|
*pp = q;
|
|
else
|
|
pp = (void **)p;
|
|
} while (p = q);
|
|
kbp->kb_couldfree++;
|
|
kbp->kb_total -= kbp->kb_elmpercl;
|
|
kbp->kb_totalfree -= kbp->kb_elmpercl;
|
|
kup->ku_indx = indx = pagindx;
|
|
kup->ku_freecnt = 1;
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(kbp);
|
|
#endif
|
|
/* Add page to page-size bucket */
|
|
addr = (void *) trunc_page(addr);
|
|
#ifdef _POWER_MP
|
|
KBP_LOCK(pkbp);
|
|
#endif
|
|
*(void **)addr = pkbp->kb_next;
|
|
pkbp->kb_next = addr;
|
|
pkbp->kb_total++;
|
|
pkbp->kb_totalfree++;
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(pkbp);
|
|
#endif
|
|
}
|
|
#ifdef _POWER_MP
|
|
else
|
|
KBP_UNLOCK(kbp);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* When smaller bucket is empty, borrow from larger.
|
|
* Called at splhigh, with size rounded to page. If
|
|
* possible, don't loan out the reserve to allocations
|
|
* which can wait.
|
|
*/
|
|
static void *
|
|
malloc_loan(
|
|
u_long size,
|
|
int flags,
|
|
int cpu)
|
|
{
|
|
register struct kmembuckets *kbp;
|
|
register struct kmemusage *kup;
|
|
register long newsize;
|
|
void *va = 0;
|
|
|
|
kbp = &bucket[cpu][ BUCKETINDX(size)];
|
|
for (newsize = size; newsize <= MAXALLOCSAVE; newsize <<= 1) {
|
|
#ifdef _POWER_MP
|
|
KBP_LOCK(kbp);
|
|
#endif
|
|
if (!(flags & M_NOWAIT) && newsize >= MAXALLOCSAVE &&
|
|
kbp->kb_totalfree <= RESERVE) {
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(kbp);
|
|
#endif
|
|
return 0;
|
|
}
|
|
if (va = kbp->kb_next) {
|
|
kbp->kb_next = *(void **)va;
|
|
kbp->kb_total--;
|
|
kbp->kb_totalfree--;
|
|
}
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(kbp);
|
|
#endif
|
|
if (va)
|
|
break;
|
|
++kbp;
|
|
}
|
|
if (newsize >= MAXALLOCSAVE && kmemthread)
|
|
et_post(EVENT_NETMALLOC, kmemthread->t_tid);
|
|
|
|
if (va && newsize != size) {
|
|
void *nva = (char *)va + size;
|
|
/* For simplicity, toss on as pages */
|
|
kbp = &bucket[cpu][ pagindx];
|
|
#ifdef _POWER_MP
|
|
KBP_LOCK(kbp);
|
|
#endif
|
|
do {
|
|
kup = btokup(nva);
|
|
kup->ku_indx = pagindx;
|
|
kup->ku_freecnt = 1;
|
|
kbp->kb_total++;
|
|
kbp->kb_totalfree++;
|
|
*(void **)nva = kbp->kb_next;
|
|
kbp->kb_next = nva;
|
|
newsize -= PAGE_SIZE;
|
|
nva = (char *)nva + PAGE_SIZE;
|
|
} while (newsize != size);
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(kbp);
|
|
#endif
|
|
}
|
|
return va;
|
|
}
|
|
|
|
/*
|
|
* Service thread for malloc/free.
|
|
*/
|
|
static void
|
|
malloc_thread(void)
|
|
{
|
|
struct kmembuckets *kbp, *mkbp;
|
|
register struct kmemusage *kup;
|
|
struct timeval now, lastgc;
|
|
vm_offset_t addr;
|
|
long indx;
|
|
void *va;
|
|
int s, i, n, tmo = 0;
|
|
int s1;
|
|
int cpu;
|
|
|
|
spl0();
|
|
microtime(&lastgc);
|
|
for (;;) {
|
|
if (tmo) {
|
|
if (tmo > 0) thread_set_timeout(tmo);
|
|
et_wait(EVENT_NETMALLOC, EVENT_NETMALLOC, EVENT_SHORT);
|
|
}
|
|
/*
|
|
* Zeroth job: perform delayed frees of large allocations.
|
|
*/
|
|
#ifdef _POWER_MP
|
|
s1 = splhigh();
|
|
#endif
|
|
MALLOCTHREAD_LOCK(s1);
|
|
va = kmemfreelater;
|
|
kmemfreelater = 0;
|
|
if (va && (i = wantkmemmap))
|
|
wantkmemmap = 0;
|
|
else
|
|
i = 0;
|
|
MALLOCTHREAD_UNLOCK(s1);
|
|
#ifdef _POWER_MP
|
|
splx(s1);
|
|
#endif
|
|
while (va) {
|
|
addr = (vm_offset_t)va;
|
|
indx = *(int *)((void **)va + 1); /* size */
|
|
va = *(void **)va;
|
|
kmem_free(kmemmap, addr, indx);
|
|
}
|
|
if (i)
|
|
thread_wakeup((int)&wantkmemmap);
|
|
/*
|
|
* First job: minimize probability of malloc(M_NOWAIT)
|
|
* returning NULL by keeping its biggest bucket full.
|
|
* Note if MAXALLOCSAVE is much greater than PAGE_SIZE,
|
|
* fragmentation may result, there is a limit enforced
|
|
* in kmeminit() below which attempts to minimize this.
|
|
*/
|
|
|
|
for (cpu=0; cpu < _system_configuration.ncpus; cpu++) {
|
|
|
|
mkbp = &bucket[cpu][ maxindx];
|
|
i = RESERVE; /* target */
|
|
#ifdef _POWER_MP
|
|
s = splhigh();
|
|
KBP_LOCK(mkbp);
|
|
#else
|
|
BUCKET_LOCK(s);
|
|
#endif
|
|
i -= mkbp->kb_totalfree;
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(mkbp);
|
|
splx(s);
|
|
#else
|
|
BUCKET_UNLOCK(s);
|
|
#endif
|
|
if (i > 0 || (i==0 && wantkmemmap) ) {
|
|
if ((va = (void *)kmem_alloc(kmemmap,
|
|
MAXALLOCSAVE, cpu)) != NULL) {
|
|
kup = btokup(va);
|
|
kup->ku_indx = maxindx;
|
|
#ifdef _POWER_MP
|
|
s = splhigh();
|
|
KBP_LOCK(mkbp);
|
|
#else
|
|
BUCKET_LOCK(s);
|
|
#endif
|
|
kup->ku_freecnt = 1;
|
|
mkbp->kb_total++;
|
|
mkbp->kb_totalfree++;
|
|
*(void **)va = mkbp->kb_next;
|
|
mkbp->kb_next = va;
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(mkbp);
|
|
#else
|
|
BUCKET_UNLOCK(s);
|
|
#endif
|
|
MALLOCTHREAD_LOCK(s1);
|
|
i = wantkmemmap;
|
|
wantkmemmap = 0;
|
|
MALLOCTHREAD_UNLOCK(s1);
|
|
#ifdef _POWER_MP
|
|
splx(s);
|
|
#endif
|
|
if (i)
|
|
thread_wakeup((int)&wantkmemmap);
|
|
tmo = 0; /* go around now */
|
|
continue;
|
|
}
|
|
/*
|
|
* If alloc fails, force a gc pass and go around
|
|
* in 1/2 sec if we fail to free anything.
|
|
*/
|
|
tmo = hz / 2;
|
|
} else
|
|
tmo = -1;
|
|
}
|
|
|
|
/*
|
|
* If timeout is -1, then check interval since last gc and
|
|
* gc or go back to sleep. Check for time warps.
|
|
* If timeout < hz, then failure dictates some gc is
|
|
* necessary before retry.
|
|
*/
|
|
microtime(&now);
|
|
if (tmo < 0) {
|
|
if (kmemgcintvl <= 0)
|
|
continue;
|
|
tmo = kmemgcintvl - (now.tv_sec - lastgc.tv_sec);
|
|
if (tmo > 0) {
|
|
if (tmo > kmemgcintvl) /* timewarp */
|
|
tmo = kmemgcintvl;
|
|
tmo *= hz;
|
|
continue;
|
|
}
|
|
tmo = kmemgcintvl * hz;
|
|
}
|
|
lastgc = now;
|
|
|
|
/*
|
|
* Second job: garbage collect pages scavenged in free().
|
|
* Scale aggressiveness in freeing memory to the amount
|
|
* of overage and settable scale. Currently functional
|
|
* only if KMEMSTATS.
|
|
*/
|
|
for (cpu=0; cpu < _system_configuration.ncpus; cpu++) {
|
|
indx = maxindx;
|
|
mkbp = &bucket[cpu][ maxindx];
|
|
kbp = mkbp;
|
|
n = 0;
|
|
#ifdef _POWER_MP
|
|
s = splhigh();
|
|
#else
|
|
BUCKET_LOCK(s);
|
|
#endif
|
|
do {
|
|
#ifdef _POWER_MP
|
|
KBP_LOCK(kbp);
|
|
#endif
|
|
i = (kbp->kb_totalfree - kbp->kb_highwat) *
|
|
(1 << (indx - pagindx));
|
|
if (i > 0)
|
|
n += i;
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(kbp);
|
|
#endif
|
|
kbp--;
|
|
indx--;
|
|
} while (indx >= pagindx);
|
|
#ifdef _POWER_MP
|
|
splx(s);
|
|
#else
|
|
BUCKET_UNLOCK(s);
|
|
#endif
|
|
/*
|
|
* Do linear scaling above "scale" extra pages,
|
|
* constant scaling below "scale", nothing if none.
|
|
* Note smaller scale = higher aggressiveness.
|
|
* Typical value == 8 @ 2 sec gcintvl, yielding
|
|
* recovery from 50 page overage in ~1 minute.
|
|
* 100 big ethernet packets might cause this, e.g.
|
|
*/
|
|
if (n > 0 && (n /= kmemgcscale) == 0)
|
|
n = 1;
|
|
++indx;
|
|
++kbp;
|
|
while (n > 0 && indx <= maxindx) {
|
|
va = 0;
|
|
#ifdef _POWER_MP
|
|
s1 = splhigh();
|
|
KBP_LOCK(kbp);
|
|
#else
|
|
BUCKET_LOCK(s1);
|
|
#endif
|
|
if (kbp->kb_totalfree > kbp->kb_highwat) {
|
|
va = kbp->kb_next;
|
|
kbp->kb_next = *(void **)va;
|
|
kbp->kb_couldfree++;
|
|
kbp->kb_total--;
|
|
kbp->kb_totalfree--;
|
|
}
|
|
if (va) {
|
|
n -= 1 << (indx - pagindx); /* page count */
|
|
if (tmo < hz) /* want retry */
|
|
tmo = 0;
|
|
i = 1 << indx;
|
|
#if MACH_ASSERT
|
|
kup = btokup(va);
|
|
s = 1 << (indx - pagindx); /* page count */
|
|
while (--s >= 0) {
|
|
if (s && kup[s].ku_indx >= 0)
|
|
panic("free: gc overlap");
|
|
kup[s].ku_indx = -1;
|
|
kup[s].ku_freecnt = 0;
|
|
}
|
|
#endif
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(kbp);
|
|
splx(s1);
|
|
#else
|
|
BUCKET_UNLOCK(s1);
|
|
#endif
|
|
kmem_free(kmemmap, (vm_offset_t)va, i);
|
|
} else {
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(kbp);
|
|
splx(s1);
|
|
#else
|
|
BUCKET_UNLOCK(s1);
|
|
#endif
|
|
kbp++;
|
|
indx++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/*NOTREACHED*/
|
|
}
|
|
|
|
#ifdef notyet
|
|
/*
|
|
* Set limit for type.
|
|
*/
|
|
int
|
|
kmemsetlimit(
|
|
int type,
|
|
long limit)
|
|
{
|
|
int s;
|
|
register struct kmemstats *ksp = &kmemstats[type];
|
|
|
|
if (type > 0 && type < M_LAST) {
|
|
if (limit <= 0)
|
|
limit = LONG_MAX;
|
|
#ifdef _POWER_MP
|
|
s = splhigh();
|
|
KSP_LOCK(ksp);
|
|
#else
|
|
BUCKET_LOCK(s);
|
|
#endif
|
|
ksp->ks_limit = limit;
|
|
#ifdef _POWER_MP
|
|
KSP_UNLOCK(ksp);
|
|
splx(s);
|
|
#else
|
|
BUCKET_UNLOCK(s);
|
|
#endif
|
|
thread_wakeup((int)ksp);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
#endif /* notyet */
|
|
|
|
/*
|
|
* Initialize the kernel memory allocator
|
|
*/
|
|
|
|
void
|
|
kmeminit(void)
|
|
{
|
|
register long indx;
|
|
vm_offset_t min, max;
|
|
int cpu;
|
|
|
|
#if ((MAXALLOCSAVE & (MAXALLOCSAVE - 1)) != 0)
|
|
#error kmeminit: MAXALLOCSAVE not power of 2
|
|
#endif
|
|
#if (MAXALLOCSAVE > MINALLOCSIZE * 32768) /* see BUCKETINDX macro */
|
|
#error kmeminit: MAXALLOCSAVE too big
|
|
#endif
|
|
#ifdef not
|
|
if (MAXALLOCSAVE > 4 * PAGE_SIZE)
|
|
panic("kmeminit: MAXALLOCSAVE/PAGE_SIZE too big");
|
|
if (MAXALLOCSAVE < PAGE_SIZE)
|
|
panic("kmeminit: MAXALLOCSAVE/PAGE_SIZE too small");
|
|
#endif
|
|
|
|
nrpages = vmker.nrpages - vmker.badpages;
|
|
kmempages = MIN( (nrpages / 2), (THEWALL_MAX / PAGE_SIZE));
|
|
|
|
/* lite */
|
|
if (nrpages <= 2048) /* 8meg */
|
|
kmemreserve = 4;
|
|
else
|
|
if (nrpages <= 4096) /* 16meg */
|
|
kmemreserve = 16;
|
|
else
|
|
kmemreserve = 24; /* >16meg */
|
|
|
|
lock_alloc(&alloc_lock, LOCK_ALLOC_PIN, KMEMSTAT_LOCK_FAMILY, M_LAST+1);
|
|
simple_lock_init(&alloc_lock);
|
|
|
|
/*
|
|
* Set the wall to be 1/4 of the number of pages we have mapped.
|
|
* Note since thewall is the number of 1K blocks that can be
|
|
* allocated and kmempages is 4K blocks, a simple assignment
|
|
* achieves the 1/4 goal. Thus, thewall defaults to 1/8
|
|
* real memory (since kmempages is 1/2 of real mem, and thewall
|
|
* is 1/4 kmempages) or THEWALL_MAX, whichever is smaller.
|
|
*/
|
|
thewall_dflt = thewall = MIN(kmempages, (THEWALL_MAX / PAGE_SIZE));
|
|
|
|
/*
|
|
* allocated tracks whether we've hit thewall. It's a running total
|
|
* of memory xmalloced from the kernel heap.
|
|
*/
|
|
allocated = 0;
|
|
|
|
if (kmemmap = (vm_map_t) kmem_suballoc(kernel_map, &min, &max,
|
|
(vm_size_t) (kmempages * PAGE_SIZE), FALSE)) {
|
|
kmembase = (void *)min;
|
|
kmemlimit = (void *)max;
|
|
} else
|
|
panic("kmeminit map");
|
|
indx = round_page(sizeof (struct kmemusage) * kmempages);
|
|
if ((kmemusage = (struct kmemusage *)kup_init(kmemmap, indx)) == NULL)
|
|
panic("kmeminit structs");
|
|
mclxmemd.aspace_id = XMEM_INVAL;
|
|
xmemat(kmembase, CLUSTERSIZE*NBPG, &mclxmemd);
|
|
mbclusters = mbufs = (int)kmembase;
|
|
bzero(kmemusage, indx);
|
|
|
|
#if MACH_ASSERT
|
|
for (indx = 0; indx < kmempages; indx++) {
|
|
kmemusage[indx].ku_indx = -1;
|
|
kmemusage[indx].ku_freecnt = -1; /* -1 for aix_kmem_alloc */
|
|
kmemusage[indx].ku_cpu = -1;
|
|
}
|
|
#endif
|
|
|
|
pagindx = BUCKETINDX(PAGE_SIZE);
|
|
maxindx = BUCKETINDX(MAXALLOCSAVE);
|
|
for (cpu = 0; cpu < _system_configuration.ncpus; cpu++) {
|
|
for (indx = 0; indx < MINBUCKET + 16; indx++) {
|
|
if ((bucket[cpu][indx].kb_elmpercl = PAGE_SIZE /
|
|
(1 << indx)) <= 1) {
|
|
bucket[cpu][indx].kb_elmpercl = 1;
|
|
bucket[cpu][indx].kb_highwat = 10;
|
|
if (indx >= maxindx && RESERVE > 10)
|
|
bucket[cpu][indx].kb_highwat = RESERVE;
|
|
} else
|
|
bucket[cpu][indx].kb_highwat =
|
|
5 * bucket[cpu][indx].kb_elmpercl;
|
|
lock_alloc(&(bucket[cpu][indx].kb_lock),
|
|
LOCK_ALLOC_PIN, BUCKET_LOCK_FAMILY,
|
|
(cpu + 1) * indx);
|
|
simple_lock_init(&(bucket[cpu][indx].kb_lock));
|
|
}
|
|
|
|
/*
|
|
* Special case important sizes. Scale to amount of memory.
|
|
*/
|
|
bucket[cpu][pagindx].kb_highwat = 5 * kmemreserve;
|
|
bucket[cpu][BUCKETINDX(256)].kb_highwat = 16 * kmemreserve;
|
|
bucket[cpu][maxindx].kb_highwat = MAX(10, kmemreserve);
|
|
}
|
|
|
|
for (indx = 0; indx < M_LAST; indx++) {
|
|
kmemstats[indx].ks_limit = kmempages * PAGE_SIZE * 6 / 10;
|
|
/* Round it off */
|
|
kmemstats[indx].ks_limit += MINALLOCSIZE - 1;
|
|
kmemstats[indx].ks_limit &= ~(MINALLOCSIZE - 1);
|
|
lock_alloc(&kmemstats[indx].ks_lock, LOCK_ALLOC_PIN,
|
|
KMEMSTAT_LOCK_FAMILY, indx);
|
|
simple_lock_init(&kmemstats[indx].ks_lock);
|
|
}
|
|
|
|
prime_the_buckets();
|
|
|
|
|
|
|
|
/*
|
|
* if DEBUG, then call the malloc police station and start policing!
|
|
*/
|
|
#ifdef DEBUG
|
|
{
|
|
int i=1024;
|
|
|
|
(void)net_malloc_police_station(&i, 0);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void
|
|
prime_the_buckets()
|
|
{
|
|
int num_nuggets, i;
|
|
int cpu, s;
|
|
struct kmembuckets *kbp, *mkbp;
|
|
struct kmemusage *kup;
|
|
void *va;
|
|
|
|
/*
|
|
* Fill the biggest bucket of each bucket array with
|
|
* MIN(1/64 * real memory, MAX_INITIAL_PIN)
|
|
* bytes worth of bucket nuggets.
|
|
* For example if MAXALLOCSAVE is 16K, 1 cpu, and
|
|
* real mem is 16Meg, we need to net_malloc 16 nuggets.
|
|
*/
|
|
|
|
num_nuggets = MIN(nrpages * 64 / MAXALLOCSAVE, MAX_INITIAL_PIN);
|
|
|
|
for (cpu=0; cpu < _system_configuration.ncpus; cpu++) {
|
|
mkbp = &bucket[cpu][ maxindx];
|
|
for (i=0; i<num_nuggets; i++) {
|
|
|
|
if ((va = (void *)kmem_alloc(kmemmap,
|
|
MAXALLOCSAVE, cpu)) != NULL) {
|
|
kup = btokup(va);
|
|
kup->ku_indx = maxindx;
|
|
#ifdef _POWER_MP
|
|
s = splhigh();
|
|
KBP_LOCK(mkbp);
|
|
#else
|
|
BUCKET_LOCK(s);
|
|
#endif
|
|
kup->ku_freecnt = 1;
|
|
mkbp->kb_total++;
|
|
mkbp->kb_totalfree++;
|
|
*(void **)va = mkbp->kb_next;
|
|
mkbp->kb_next = va;
|
|
#ifdef _POWER_MP
|
|
KBP_UNLOCK(mkbp);
|
|
#else
|
|
BUCKET_UNLOCK(s);
|
|
#endif
|
|
#ifdef _POWER_MP
|
|
splx(s);
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
kmeminit_thread(int pri)
|
|
{
|
|
extern task_t first_task;
|
|
pid_t pid;
|
|
|
|
/*
|
|
* Start the kmem thread at the specified priority. Should
|
|
* be the same as the netisr threads.
|
|
*/
|
|
|
|
pid = creatp();
|
|
assert(pid != -1);
|
|
kmemthread = (PROCPTR(pid))->p_threadlist;
|
|
initp(pid, malloc_thread, 0, 0, "netm");
|
|
assert(setpri(pid, MBUF_RTPRI) != -1);
|
|
if (kmemthread == NULL)
|
|
panic("kmeminit_thread");
|
|
}
|
|
|
|
|
|
void *
|
|
aix_kmem_suballoc(minp, maxp, size)
|
|
int *minp;
|
|
int *maxp;
|
|
u_int size;
|
|
{
|
|
caddr_t area;
|
|
|
|
area = (caddr_t)xmalloc(size, PGSHIFT, kernel_heap);
|
|
if (area == (caddr_t)NULL)
|
|
panic("aix_kmem_suballoc: xmalloc");
|
|
*minp = (int)area;
|
|
*maxp = (int)(area + size);
|
|
return(area);
|
|
}
|
|
|
|
kup_init(map, size)
|
|
caddr_t map;
|
|
u_int size;
|
|
{
|
|
return(xmalloc(size, PGSHIFT, pinned_heap));
|
|
}
|
|
|
|
struct err_rec0 alloc_log = { ERRID_LOW_MBUFS, "SYSNET" };
|
|
unsigned long tv_sec=0;
|
|
|
|
void *
|
|
aix_kmem_alloc(map, size, cpu)
|
|
caddr_t map;
|
|
u_int size;
|
|
int cpu;
|
|
{
|
|
caddr_t addr;
|
|
int i, j;
|
|
int pages;
|
|
int gotit;
|
|
struct kmemusage *kup;
|
|
int s;
|
|
struct timestruc_t ctv;
|
|
|
|
assert(csa->prev == NULL);
|
|
if (csa->intpri != INTBASE) {
|
|
assert(size <= MAXALLOCSAVE);
|
|
return((caddr_t)NULL);
|
|
}
|
|
|
|
s = disable_lock(INTMAX, &alloc_lock);
|
|
if (allocated + size > thewall*1024) {
|
|
unlock_enable(s, &alloc_lock);
|
|
/*
|
|
* Don't flood the errlog. Log message at a
|
|
* minimum of once every 30 minutes.
|
|
*/
|
|
(void) curtime(&ctv);
|
|
if ( (tv_sec + (60*30)) < ctv.tv_sec) {
|
|
(void) errsave(&alloc_log,sizeof(alloc_log));
|
|
tv_sec = ctv.tv_sec;
|
|
}
|
|
return((caddr_t)NULL);
|
|
}
|
|
|
|
size = round_page(size);
|
|
pages = size/PAGE_SIZE;
|
|
addr = NULL;
|
|
for (i = 0; i < kmempages - pages ; i++) {
|
|
gotit = 1;
|
|
for (j = 0; j < pages ; j++) {
|
|
if (kmemusage[i+j].ku_freecnt != (u_short)-1) {
|
|
gotit = 0;
|
|
i += j; /* skip over used or too small areas */
|
|
break;
|
|
}
|
|
}
|
|
if (gotit)
|
|
break;
|
|
}
|
|
if (gotit) {
|
|
addr = (caddr_t)((int)kmembase + (i * PAGE_SIZE));
|
|
kup = btokup(addr);
|
|
for (i = 0; i < pages ; i++) {
|
|
kup->ku_freecnt = 0;
|
|
|
|
/*
|
|
* Remember which cpu/bucket array we put this
|
|
* memory into soos we can free it back to the
|
|
* same bucket array. Otherwise we cannot
|
|
* Coalesce easily.
|
|
*/
|
|
kup->ku_cpu = cpu;
|
|
kup++;
|
|
}
|
|
allocated += size;
|
|
unlock_enable(s, &alloc_lock);
|
|
|
|
if (pin(addr, size)) {
|
|
s = disable_lock(INTMAX, &alloc_lock);
|
|
kup = btokup(addr);
|
|
for (i = 0; i < size/PAGE_SIZE; i++) {
|
|
kup->ku_freecnt = -1;
|
|
kup++;
|
|
}
|
|
allocated -= size;
|
|
unlock_enable(s, &alloc_lock);
|
|
addr = 0;
|
|
}
|
|
} else
|
|
unlock_enable(s, &alloc_lock);
|
|
return(addr);
|
|
}
|
|
|
|
aix_kmem_free(map, addr, size)
|
|
caddr_t map;
|
|
caddr_t addr;
|
|
u_int size;
|
|
{
|
|
int i;
|
|
struct kmemusage *kup;
|
|
int s;
|
|
|
|
s = disable_lock(INTMAX, &alloc_lock);
|
|
size = round_page(size);
|
|
allocated -= size;
|
|
kup = btokup(addr);
|
|
for (i = 0; i < size/PAGE_SIZE; i++) {
|
|
kup->ku_freecnt = -1;
|
|
kup++;
|
|
}
|
|
unlock_enable(s, &alloc_lock);
|
|
unpin(addr, size);
|
|
}
|
|
|
|
static struct trb *threadtime;
|
|
|
|
void
|
|
threadtimer() {
|
|
et_post(EVENT_NETMALLOC, kmemthread->t_tid);
|
|
}
|
|
|
|
malloc_thread_set_timeout(tmo)
|
|
{
|
|
if (threadtime == (struct trb *) NULL) {
|
|
threadtime = talloc();
|
|
threadtime->func = threadtimer;
|
|
threadtime->func_data = (ulong) 0;
|
|
threadtime->ipri = INTCLASS2;
|
|
}
|
|
threadtime->timeout.it_value.tv_sec = tmo/HZ;
|
|
threadtime->timeout.it_value.tv_nsec = (tmo % HZ) * (1000000000 / HZ);
|
|
#ifndef _POWER_MP
|
|
tstop(threadtime);
|
|
#else
|
|
while (tstop(threadtime));
|
|
#endif
|
|
tstart(threadtime);
|
|
}
|