/* * $Id$ * Simple tokenizer interface. */ #include "Python.h" #include "tokenizer.h" static PyObject* token_list(PyObject* self, PyObject* args) { /* get token list, suitable for use with a topdown parser */ struct tok_state* tok; PyObject* tokens; char* str; if (!PyArg_ParseTuple(args, "s:token_list", &str)) return NULL; tok = PyTokenizer_FromString(str); tokens = PyList_New(0); /* FIXME: precalculate/reuse symbol names (lazily?) */ /* FIXME: add proper error handling */ for (;;) { char *a, *b; int type; char* symbol = NULL; switch (type = PyTokenizer_Get(tok, &a, &b)) { case ENDMARKER: symbol = "(end)"; a = symbol; b = symbol + 5; break; case NAME: symbol = "(name)"; break; case NUMBER: case STRING: symbol = "(literal)"; break; case NEWLINE: symbol = "(newline)"; break; case INDENT: symbol = "(indent)"; break; case DEDENT: symbol = "(dedent)"; break; default: if (type <= OP) symbol = "(operator)"; } if (!symbol) break; PyList_Append(tokens, Py_BuildValue("ss#", symbol, a, b - a)); if (type == ENDMARKER) break; } PyTokenizer_Free(tok); return tokens; } /* -------------------------------------------------------------------- */ static PyMethodDef functions[] = { {"token_list", (PyCFunction) token_list, METH_VARARGS}, {NULL, NULL} /* sentinel */ }; DL_EXPORT(void) initpytoken(void) { Py_InitModule("pytoken", functions); }