summaryrefslogtreecommitdiffstats
path: root/libcutils/tests/memset_mips/test_memset.c
diff options
context:
space:
mode:
Diffstat (limited to 'libcutils/tests/memset_mips/test_memset.c')
-rw-r--r--libcutils/tests/memset_mips/test_memset.c235
1 files changed, 235 insertions, 0 deletions
diff --git a/libcutils/tests/memset_mips/test_memset.c b/libcutils/tests/memset_mips/test_memset.c
new file mode 100644
index 0000000..9705c65
--- /dev/null
+++ b/libcutils/tests/memset_mips/test_memset.c
@@ -0,0 +1,235 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <cutils/memory.h>
+#include <time.h>
+
+/*
+ * All systems must implement or emulate the rdhwr instruction to read
+ * the userlocal register. Systems that emulate also return teh count register
+ * when accessing register $2 so this should work on most systems
+ */
+#define USE_RDHWR
+
+#ifdef USE_RDHWR
+#define UNITS "cycles"
+#define SCALE 2 /* Most CPU's */
+static inline uint32_t
+get_count(void)
+{
+ uint32_t res;
+ asm volatile (".set push; .set mips32r2; rdhwr %[res],$2; .set pop" : [res] "=r" (res) : : "memory");
+ return res;
+}
+#else
+#define UNITS "ns"
+#define SCALE 1
+static inline uint32_t
+get_count(void)
+{
+ struct timespec now;
+ uint32_t res;
+ clock_gettime(CLOCK_REALTIME, &now);
+ res = (uint32_t)(now.tv_sec * 1000000000LL + now.tv_nsec);
+ // printf ("now=%d.%09d res=%d\n", (int)now.tv_sec, (int)now.tv_nsec, res);
+ return res;
+}
+#endif
+
+uint32_t overhead;
+void
+measure_overhead(void)
+{
+ int i;
+ uint32_t start, stop, delta;
+ for (i = 0; i < 32; i++) {
+ start = get_count();
+ stop = get_count();
+ delta = stop - start;
+ if (overhead == 0 || delta < overhead)
+ overhead = delta;
+ }
+ printf("overhead is %d"UNITS"\n", overhead);
+}
+
+uint32_t
+timeone(void (*fn)(), void *d, uint32_t val, uint32_t bytes)
+{
+ uint32_t start, stop, delta;
+ start = get_count();
+ (*fn)(d, val, bytes);
+ stop = get_count();
+ delta = stop - start - overhead;
+ // printf ("start=0x%08x stop=0x%08x delta=0x%08x\n", start, stop, delta);
+ return delta * SCALE;
+}
+
+/* define VERIFY to check that memset only touches the bytes it's supposed to */
+/*#define VERIFY*/
+
+/*
+ * Using a big arena means that memset will most likely miss in the cache
+ * NB Enabling verification effectively warms up the cache...
+ */
+#define ARENASIZE 0x1000000
+#ifdef VERIFY
+char arena[ARENASIZE+8]; /* Allow space for guard words */
+#else
+char arena[ARENASIZE];
+#endif
+
+void
+testone(char *tag, void (*fn)(), int trials, int minbytes, int maxbytes, int size, int threshold)
+{
+ int offset;
+ void *d;
+ void *p;
+ uint32_t v, notv = 0;
+ uint32_t n;
+ int i, units;
+ int totalunits = 0, totalbytes = 0, samples = 0;
+
+ /* Reset RNG to ensure each test uses same random values */
+ srand(0); /* FIXME should be able to use some other seed than 0 */
+
+ for (i = 0; i < trials; i++) {
+ n = minbytes + (rand() % (maxbytes-minbytes)); /* How many bytes to do */
+ offset = ((rand() % (ARENASIZE-n))); /* Where to start */
+
+#ifdef VERIFY
+ offset += 4; /* Allow space for guard word at beginning */
+#endif
+ v = rand();
+
+ /* Adjust alignment and sizes based on transfer size */
+ switch (size) {
+ case 1:
+ v &= 0xff;
+ notv = ~v & 0xff;
+ break;
+ case 2:
+ v &= 0xffff;
+ notv = ~v & 0xffff;
+ offset &= ~1;
+ n &= ~1;
+ break;
+ case 4:
+ notv = ~v;
+ offset &= ~3;
+ n &= ~3;
+ break;
+ }
+
+ d = &arena[offset];
+
+#ifdef VERIFY
+ /* Initialise the area and guard words */
+ for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) {
+ if (size == 1)
+ *(uint8_t *)p = notv;
+ else if (size == 2)
+ *(uint16_t *)p = notv;
+ else if (size == 4)
+ *(uint32_t *)p = notv;
+ }
+#endif
+ units = timeone(fn, d, v, n);
+#ifdef VERIFY
+ /* Check the area and guard words */
+ for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) {
+ uint32_t got = 0;
+ if (size == 1)
+ got = *(uint8_t *)p;
+ else if (size == 2)
+ got = *(uint16_t *)p;
+ else if (size == 4)
+ got = *(uint32_t *)p;
+ if (p < (void *)&arena[offset]) {
+ if (got != notv)
+ printf ("%s: verify failure: preguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, got, n);
+ }
+ else if (p < (void *)&arena[offset+n]) {
+ if (got != v)
+ printf ("%s: verify failure: arena:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n);
+ }
+ else {
+ if (got != notv)
+ printf ("%s: verify failure: postguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n);
+ }
+ }
+#endif
+
+ /* If the cycle count looks reasonable include it in the statistics */
+ if (units < threshold) {
+ totalbytes += n;
+ totalunits += units;
+ samples++;
+ }
+ }
+
+ printf("%s: samples=%d avglen=%d avg" UNITS "=%d bp"UNITS"=%g\n",
+ tag, samples, totalbytes/samples, totalunits/samples, (double)totalbytes/(double)totalunits);
+}
+
+extern void android_memset32_dumb(uint32_t* dst, uint32_t value, size_t size);
+extern void android_memset16_dumb(uint32_t* dst, uint16_t value, size_t size);
+extern void android_memset32_test(uint32_t* dst, uint32_t value, size_t size);
+extern void android_memset16_test(uint32_t* dst, uint16_t value, size_t size);
+extern void memset_cmips(void* dst, int value, size_t size);
+extern void memset_omips(void* dst, int value, size_t size);
+
+int
+main(int argc, char **argv)
+{
+ int i;
+ struct {
+ char *type;
+ int trials;
+ int minbytes, maxbytes;
+ } *pp, params[] = {
+ {"small", 10000, 0, 64},
+ {"medium", 10000, 64, 512},
+ {"large", 10000, 512, 1280},
+ {"varied", 10000, 0, 1280},
+ };
+#define NPARAMS (sizeof(params)/sizeof(params[0]))
+ struct {
+ char *name;
+ void (*fn)();
+ int size;
+ } *fp, functions[] = {
+ {"dmemset16", (void (*)())android_memset16_dumb, 2},
+ {"tmemset16", (void (*)())android_memset16_test, 2},
+ {"lmemset16", (void (*)())android_memset16, 2},
+
+ {"dmemset32", (void (*)())android_memset32_dumb, 4},
+ {"tmemset32", (void (*)())android_memset32_test, 4},
+ {"lmemset32", (void (*)())android_memset32, 4},
+
+ {"cmemset", (void (*)())memset_cmips, 1},
+ {"omemset", (void (*)())memset_omips, 1},
+ {"lmemset", (void (*)())memset, 1},
+ };
+#define NFUNCTIONS (sizeof(functions)/sizeof(functions[0]))
+ char tag[40];
+ int threshold;
+
+ measure_overhead();
+
+ /* Warm up the page cache */
+ memset(arena, 0xff, ARENASIZE); /* use 0xff now to avoid COW later */
+
+ for (fp = functions; fp < &functions[NFUNCTIONS]; fp++) {
+ (fp->fn)(arena, 0xffffffff, ARENASIZE); /* one call to get the code into Icache */
+ for (pp = params; pp < &params[NPARAMS]; pp++) {
+ sprintf(tag, "%10s: %7s %4d-%4d", fp->name, pp->type, pp->minbytes, pp->maxbytes);
+
+ /* Set the cycle threshold */
+ threshold = pp->maxbytes * 4 * 10; /* reasonable for cycles and ns */
+ testone(tag, fp->fn, pp->trials, pp->minbytes, pp->maxbytes, fp->size, threshold);
+ }
+ printf ("\n");
+ }
+
+ return 0;
+}