erlang.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
  1. /*jshint unused:true, eqnull:true, curly:true, bitwise:true */
  2. /*jshint undef:true, latedef:true, trailing:true */
  3. /*global CodeMirror:true */
  4. // erlang mode.
  5. // tokenizer -> token types -> CodeMirror styles
  6. // tokenizer maintains a parse stack
  7. // indenter uses the parse stack
  8. // TODO indenter:
  9. // bit syntax
  10. // old guard/bif/conversion clashes (e.g. "float/1")
  11. // type/spec/opaque
  12. (function(mod) {
  13. if (typeof exports == "object" && typeof module == "object") // CommonJS
  14. mod(require("../../lib/codemirror"));
  15. else if (typeof define == "function" && define.amd) // AMD
  16. define(["../../lib/codemirror"], mod);
  17. else // Plain browser env
  18. mod(CodeMirror);
  19. })(function(CodeMirror) {
  20. "use strict";
  21. CodeMirror.defineMIME("text/x-erlang", "erlang");
  22. CodeMirror.defineMode("erlang", function(cmCfg) {
  23. "use strict";
  24. /////////////////////////////////////////////////////////////////////////////
  25. // constants
  26. var typeWords = [
  27. "-type", "-spec", "-export_type", "-opaque"];
  28. var keywordWords = [
  29. "after","begin","catch","case","cond","end","fun","if",
  30. "let","of","query","receive","try","when"];
  31. var separatorRE = /[\->,;]/;
  32. var separatorWords = [
  33. "->",";",","];
  34. var operatorAtomWords = [
  35. "and","andalso","band","bnot","bor","bsl","bsr","bxor",
  36. "div","not","or","orelse","rem","xor"];
  37. var operatorSymbolRE = /[\+\-\*\/<>=\|:!]/;
  38. var operatorSymbolWords = [
  39. "=","+","-","*","/",">",">=","<","=<","=:=","==","=/=","/=","||","<-","!"];
  40. var openParenRE = /[<\(\[\{]/;
  41. var openParenWords = [
  42. "<<","(","[","{"];
  43. var closeParenRE = /[>\)\]\}]/;
  44. var closeParenWords = [
  45. "}","]",")",">>"];
  46. var guardWords = [
  47. "is_atom","is_binary","is_bitstring","is_boolean","is_float",
  48. "is_function","is_integer","is_list","is_number","is_pid",
  49. "is_port","is_record","is_reference","is_tuple",
  50. "atom","binary","bitstring","boolean","function","integer","list",
  51. "number","pid","port","record","reference","tuple"];
  52. var bifWords = [
  53. "abs","adler32","adler32_combine","alive","apply","atom_to_binary",
  54. "atom_to_list","binary_to_atom","binary_to_existing_atom",
  55. "binary_to_list","binary_to_term","bit_size","bitstring_to_list",
  56. "byte_size","check_process_code","contact_binary","crc32",
  57. "crc32_combine","date","decode_packet","delete_module",
  58. "disconnect_node","element","erase","exit","float","float_to_list",
  59. "garbage_collect","get","get_keys","group_leader","halt","hd",
  60. "integer_to_list","internal_bif","iolist_size","iolist_to_binary",
  61. "is_alive","is_atom","is_binary","is_bitstring","is_boolean",
  62. "is_float","is_function","is_integer","is_list","is_number","is_pid",
  63. "is_port","is_process_alive","is_record","is_reference","is_tuple",
  64. "length","link","list_to_atom","list_to_binary","list_to_bitstring",
  65. "list_to_existing_atom","list_to_float","list_to_integer",
  66. "list_to_pid","list_to_tuple","load_module","make_ref","module_loaded",
  67. "monitor_node","node","node_link","node_unlink","nodes","notalive",
  68. "now","open_port","pid_to_list","port_close","port_command",
  69. "port_connect","port_control","pre_loaded","process_flag",
  70. "process_info","processes","purge_module","put","register",
  71. "registered","round","self","setelement","size","spawn","spawn_link",
  72. "spawn_monitor","spawn_opt","split_binary","statistics",
  73. "term_to_binary","time","throw","tl","trunc","tuple_size",
  74. "tuple_to_list","unlink","unregister","whereis"];
  75. // upper case: [A-Z] [Ø-Þ] [À-Ö]
  76. // lower case: [a-z] [ß-ö] [ø-ÿ]
  77. var anumRE = /[\w@Ø-ÞÀ-Öß-öø-ÿ]/;
  78. var escapesRE =
  79. /[0-7]{1,3}|[bdefnrstv\\"']|\^[a-zA-Z]|x[0-9a-zA-Z]{2}|x{[0-9a-zA-Z]+}/;
  80. /////////////////////////////////////////////////////////////////////////////
  81. // tokenizer
  82. function tokenizer(stream,state) {
  83. // in multi-line string
  84. if (state.in_string) {
  85. state.in_string = (!doubleQuote(stream));
  86. return rval(state,stream,"string");
  87. }
  88. // in multi-line atom
  89. if (state.in_atom) {
  90. state.in_atom = (!singleQuote(stream));
  91. return rval(state,stream,"atom");
  92. }
  93. // whitespace
  94. if (stream.eatSpace()) {
  95. return rval(state,stream,"whitespace");
  96. }
  97. // attributes and type specs
  98. if (!peekToken(state) &&
  99. stream.match(/-\s*[a-zß-öø-ÿ][\wØ-ÞÀ-Öß-öø-ÿ]*/)) {
  100. if (is_member(stream.current(),typeWords)) {
  101. return rval(state,stream,"type");
  102. }else{
  103. return rval(state,stream,"attribute");
  104. }
  105. }
  106. var ch = stream.next();
  107. // comment
  108. if (ch == '%') {
  109. stream.skipToEnd();
  110. return rval(state,stream,"comment");
  111. }
  112. // colon
  113. if (ch == ":") {
  114. return rval(state,stream,"colon");
  115. }
  116. // macro
  117. if (ch == '?') {
  118. stream.eatSpace();
  119. stream.eatWhile(anumRE);
  120. return rval(state,stream,"macro");
  121. }
  122. // record
  123. if (ch == "#") {
  124. stream.eatSpace();
  125. stream.eatWhile(anumRE);
  126. return rval(state,stream,"record");
  127. }
  128. // dollar escape
  129. if (ch == "$") {
  130. if (stream.next() == "\\" && !stream.match(escapesRE)) {
  131. return rval(state,stream,"error");
  132. }
  133. return rval(state,stream,"number");
  134. }
  135. // dot
  136. if (ch == ".") {
  137. return rval(state,stream,"dot");
  138. }
  139. // quoted atom
  140. if (ch == '\'') {
  141. if (!(state.in_atom = (!singleQuote(stream)))) {
  142. if (stream.match(/\s*\/\s*[0-9]/,false)) {
  143. stream.match(/\s*\/\s*[0-9]/,true);
  144. return rval(state,stream,"fun"); // 'f'/0 style fun
  145. }
  146. if (stream.match(/\s*\(/,false) || stream.match(/\s*:/,false)) {
  147. return rval(state,stream,"function");
  148. }
  149. }
  150. return rval(state,stream,"atom");
  151. }
  152. // string
  153. if (ch == '"') {
  154. state.in_string = (!doubleQuote(stream));
  155. return rval(state,stream,"string");
  156. }
  157. // variable
  158. if (/[A-Z_Ø-ÞÀ-Ö]/.test(ch)) {
  159. stream.eatWhile(anumRE);
  160. return rval(state,stream,"variable");
  161. }
  162. // atom/keyword/BIF/function
  163. if (/[a-z_ß-öø-ÿ]/.test(ch)) {
  164. stream.eatWhile(anumRE);
  165. if (stream.match(/\s*\/\s*[0-9]/,false)) {
  166. stream.match(/\s*\/\s*[0-9]/,true);
  167. return rval(state,stream,"fun"); // f/0 style fun
  168. }
  169. var w = stream.current();
  170. if (is_member(w,keywordWords)) {
  171. return rval(state,stream,"keyword");
  172. }else if (is_member(w,operatorAtomWords)) {
  173. return rval(state,stream,"operator");
  174. }else if (stream.match(/\s*\(/,false)) {
  175. // 'put' and 'erlang:put' are bifs, 'foo:put' is not
  176. if (is_member(w,bifWords) &&
  177. ((peekToken(state).token != ":") ||
  178. (peekToken(state,2).token == "erlang"))) {
  179. return rval(state,stream,"builtin");
  180. }else if (is_member(w,guardWords)) {
  181. return rval(state,stream,"guard");
  182. }else{
  183. return rval(state,stream,"function");
  184. }
  185. }else if (is_member(w,operatorAtomWords)) {
  186. return rval(state,stream,"operator");
  187. }else if (lookahead(stream) == ":") {
  188. if (w == "erlang") {
  189. return rval(state,stream,"builtin");
  190. } else {
  191. return rval(state,stream,"function");
  192. }
  193. }else if (is_member(w,["true","false"])) {
  194. return rval(state,stream,"boolean");
  195. }else if (is_member(w,["true","false"])) {
  196. return rval(state,stream,"boolean");
  197. }else{
  198. return rval(state,stream,"atom");
  199. }
  200. }
  201. // number
  202. var digitRE = /[0-9]/;
  203. var radixRE = /[0-9a-zA-Z]/; // 36#zZ style int
  204. if (digitRE.test(ch)) {
  205. stream.eatWhile(digitRE);
  206. if (stream.eat('#')) { // 36#aZ style integer
  207. if (!stream.eatWhile(radixRE)) {
  208. stream.backUp(1); //"36#" - syntax error
  209. }
  210. } else if (stream.eat('.')) { // float
  211. if (!stream.eatWhile(digitRE)) {
  212. stream.backUp(1); // "3." - probably end of function
  213. } else {
  214. if (stream.eat(/[eE]/)) { // float with exponent
  215. if (stream.eat(/[-+]/)) {
  216. if (!stream.eatWhile(digitRE)) {
  217. stream.backUp(2); // "2e-" - syntax error
  218. }
  219. } else {
  220. if (!stream.eatWhile(digitRE)) {
  221. stream.backUp(1); // "2e" - syntax error
  222. }
  223. }
  224. }
  225. }
  226. }
  227. return rval(state,stream,"number"); // normal integer
  228. }
  229. // open parens
  230. if (nongreedy(stream,openParenRE,openParenWords)) {
  231. return rval(state,stream,"open_paren");
  232. }
  233. // close parens
  234. if (nongreedy(stream,closeParenRE,closeParenWords)) {
  235. return rval(state,stream,"close_paren");
  236. }
  237. // separators
  238. if (greedy(stream,separatorRE,separatorWords)) {
  239. return rval(state,stream,"separator");
  240. }
  241. // operators
  242. if (greedy(stream,operatorSymbolRE,operatorSymbolWords)) {
  243. return rval(state,stream,"operator");
  244. }
  245. return rval(state,stream,null);
  246. }
  247. /////////////////////////////////////////////////////////////////////////////
  248. // utilities
  249. function nongreedy(stream,re,words) {
  250. if (stream.current().length == 1 && re.test(stream.current())) {
  251. stream.backUp(1);
  252. while (re.test(stream.peek())) {
  253. stream.next();
  254. if (is_member(stream.current(),words)) {
  255. return true;
  256. }
  257. }
  258. stream.backUp(stream.current().length-1);
  259. }
  260. return false;
  261. }
  262. function greedy(stream,re,words) {
  263. if (stream.current().length == 1 && re.test(stream.current())) {
  264. while (re.test(stream.peek())) {
  265. stream.next();
  266. }
  267. while (0 < stream.current().length) {
  268. if (is_member(stream.current(),words)) {
  269. return true;
  270. }else{
  271. stream.backUp(1);
  272. }
  273. }
  274. stream.next();
  275. }
  276. return false;
  277. }
  278. function doubleQuote(stream) {
  279. return quote(stream, '"', '\\');
  280. }
  281. function singleQuote(stream) {
  282. return quote(stream,'\'','\\');
  283. }
  284. function quote(stream,quoteChar,escapeChar) {
  285. while (!stream.eol()) {
  286. var ch = stream.next();
  287. if (ch == quoteChar) {
  288. return true;
  289. }else if (ch == escapeChar) {
  290. stream.next();
  291. }
  292. }
  293. return false;
  294. }
  295. function lookahead(stream) {
  296. var m = stream.match(/([\n\s]+|%[^\n]*\n)*(.)/,false);
  297. return m ? m.pop() : "";
  298. }
  299. function is_member(element,list) {
  300. return (-1 < list.indexOf(element));
  301. }
  302. function rval(state,stream,type) {
  303. // parse stack
  304. pushToken(state,realToken(type,stream));
  305. // map erlang token type to CodeMirror style class
  306. // erlang -> CodeMirror tag
  307. switch (type) {
  308. case "atom": return "atom";
  309. case "attribute": return "attribute";
  310. case "boolean": return "special";
  311. case "builtin": return "builtin";
  312. case "close_paren": return null;
  313. case "colon": return null;
  314. case "comment": return "comment";
  315. case "dot": return null;
  316. case "error": return "error";
  317. case "fun": return "meta";
  318. case "function": return "tag";
  319. case "guard": return "property";
  320. case "keyword": return "keyword";
  321. case "macro": return "variable-2";
  322. case "number": return "number";
  323. case "open_paren": return null;
  324. case "operator": return "operator";
  325. case "record": return "bracket";
  326. case "separator": return null;
  327. case "string": return "string";
  328. case "type": return "def";
  329. case "variable": return "variable";
  330. default: return null;
  331. }
  332. }
  333. function aToken(tok,col,ind,typ) {
  334. return {token: tok,
  335. column: col,
  336. indent: ind,
  337. type: typ};
  338. }
  339. function realToken(type,stream) {
  340. return aToken(stream.current(),
  341. stream.column(),
  342. stream.indentation(),
  343. type);
  344. }
  345. function fakeToken(type) {
  346. return aToken(type,0,0,type);
  347. }
  348. function peekToken(state,depth) {
  349. var len = state.tokenStack.length;
  350. var dep = (depth ? depth : 1);
  351. if (len < dep) {
  352. return false;
  353. }else{
  354. return state.tokenStack[len-dep];
  355. }
  356. }
  357. function pushToken(state,token) {
  358. if (!(token.type == "comment" || token.type == "whitespace")) {
  359. state.tokenStack = maybe_drop_pre(state.tokenStack,token);
  360. state.tokenStack = maybe_drop_post(state.tokenStack);
  361. }
  362. }
  363. function maybe_drop_pre(s,token) {
  364. var last = s.length-1;
  365. if (0 < last && s[last].type === "record" && token.type === "dot") {
  366. s.pop();
  367. }else if (0 < last && s[last].type === "group") {
  368. s.pop();
  369. s.push(token);
  370. }else{
  371. s.push(token);
  372. }
  373. return s;
  374. }
  375. function maybe_drop_post(s) {
  376. var last = s.length-1;
  377. if (s[last].type === "dot") {
  378. return [];
  379. }
  380. if (s[last].type === "fun" && s[last-1].token === "fun") {
  381. return s.slice(0,last-1);
  382. }
  383. switch (s[s.length-1].token) {
  384. case "}": return d(s,{g:["{"]});
  385. case "]": return d(s,{i:["["]});
  386. case ")": return d(s,{i:["("]});
  387. case ">>": return d(s,{i:["<<"]});
  388. case "end": return d(s,{i:["begin","case","fun","if","receive","try"]});
  389. case ",": return d(s,{e:["begin","try","when","->",
  390. ",","(","[","{","<<"]});
  391. case "->": return d(s,{r:["when"],
  392. m:["try","if","case","receive"]});
  393. case ";": return d(s,{E:["case","fun","if","receive","try","when"]});
  394. case "catch":return d(s,{e:["try"]});
  395. case "of": return d(s,{e:["case"]});
  396. case "after":return d(s,{e:["receive","try"]});
  397. default: return s;
  398. }
  399. }
  400. function d(stack,tt) {
  401. // stack is a stack of Token objects.
  402. // tt is an object; {type:tokens}
  403. // type is a char, tokens is a list of token strings.
  404. // The function returns (possibly truncated) stack.
  405. // It will descend the stack, looking for a Token such that Token.token
  406. // is a member of tokens. If it does not find that, it will normally (but
  407. // see "E" below) return stack. If it does find a match, it will remove
  408. // all the Tokens between the top and the matched Token.
  409. // If type is "m", that is all it does.
  410. // If type is "i", it will also remove the matched Token and the top Token.
  411. // If type is "g", like "i", but add a fake "group" token at the top.
  412. // If type is "r", it will remove the matched Token, but not the top Token.
  413. // If type is "e", it will keep the matched Token but not the top Token.
  414. // If type is "E", it behaves as for type "e", except if there is no match,
  415. // in which case it will return an empty stack.
  416. for (var type in tt) {
  417. var len = stack.length-1;
  418. var tokens = tt[type];
  419. for (var i = len-1; -1 < i ; i--) {
  420. if (is_member(stack[i].token,tokens)) {
  421. var ss = stack.slice(0,i);
  422. switch (type) {
  423. case "m": return ss.concat(stack[i]).concat(stack[len]);
  424. case "r": return ss.concat(stack[len]);
  425. case "i": return ss;
  426. case "g": return ss.concat(fakeToken("group"));
  427. case "E": return ss.concat(stack[i]);
  428. case "e": return ss.concat(stack[i]);
  429. }
  430. }
  431. }
  432. }
  433. return (type == "E" ? [] : stack);
  434. }
  435. /////////////////////////////////////////////////////////////////////////////
  436. // indenter
  437. function indenter(state,textAfter) {
  438. var t;
  439. var unit = cmCfg.indentUnit;
  440. var wordAfter = wordafter(textAfter);
  441. var currT = peekToken(state,1);
  442. var prevT = peekToken(state,2);
  443. if (state.in_string || state.in_atom) {
  444. return CodeMirror.Pass;
  445. }else if (!prevT) {
  446. return 0;
  447. }else if (currT.token == "when") {
  448. return currT.column+unit;
  449. }else if (wordAfter === "when" && prevT.type === "function") {
  450. return prevT.indent+unit;
  451. }else if (wordAfter === "(" && currT.token === "fun") {
  452. return currT.column+3;
  453. }else if (wordAfter === "catch" && (t = getToken(state,["try"]))) {
  454. return t.column;
  455. }else if (is_member(wordAfter,["end","after","of"])) {
  456. t = getToken(state,["begin","case","fun","if","receive","try"]);
  457. return t ? t.column : CodeMirror.Pass;
  458. }else if (is_member(wordAfter,closeParenWords)) {
  459. t = getToken(state,openParenWords);
  460. return t ? t.column : CodeMirror.Pass;
  461. }else if (is_member(currT.token,[",","|","||"]) ||
  462. is_member(wordAfter,[",","|","||"])) {
  463. t = postcommaToken(state);
  464. return t ? t.column+t.token.length : unit;
  465. }else if (currT.token == "->") {
  466. if (is_member(prevT.token, ["receive","case","if","try"])) {
  467. return prevT.column+unit+unit;
  468. }else{
  469. return prevT.column+unit;
  470. }
  471. }else if (is_member(currT.token,openParenWords)) {
  472. return currT.column+currT.token.length;
  473. }else{
  474. t = defaultToken(state);
  475. return truthy(t) ? t.column+unit : 0;
  476. }
  477. }
  478. function wordafter(str) {
  479. var m = str.match(/,|[a-z]+|\}|\]|\)|>>|\|+|\(/);
  480. return truthy(m) && (m.index === 0) ? m[0] : "";
  481. }
  482. function postcommaToken(state) {
  483. var objs = state.tokenStack.slice(0,-1);
  484. var i = getTokenIndex(objs,"type",["open_paren"]);
  485. return truthy(objs[i]) ? objs[i] : false;
  486. }
  487. function defaultToken(state) {
  488. var objs = state.tokenStack;
  489. var stop = getTokenIndex(objs,"type",["open_paren","separator","keyword"]);
  490. var oper = getTokenIndex(objs,"type",["operator"]);
  491. if (truthy(stop) && truthy(oper) && stop < oper) {
  492. return objs[stop+1];
  493. } else if (truthy(stop)) {
  494. return objs[stop];
  495. } else {
  496. return false;
  497. }
  498. }
  499. function getToken(state,tokens) {
  500. var objs = state.tokenStack;
  501. var i = getTokenIndex(objs,"token",tokens);
  502. return truthy(objs[i]) ? objs[i] : false;
  503. }
  504. function getTokenIndex(objs,propname,propvals) {
  505. for (var i = objs.length-1; -1 < i ; i--) {
  506. if (is_member(objs[i][propname],propvals)) {
  507. return i;
  508. }
  509. }
  510. return false;
  511. }
  512. function truthy(x) {
  513. return (x !== false) && (x != null);
  514. }
  515. /////////////////////////////////////////////////////////////////////////////
  516. // this object defines the mode
  517. return {
  518. startState:
  519. function() {
  520. return {tokenStack: [],
  521. in_string: false,
  522. in_atom: false};
  523. },
  524. token:
  525. function(stream, state) {
  526. return tokenizer(stream, state);
  527. },
  528. indent:
  529. function(state, textAfter) {
  530. return indenter(state,textAfter);
  531. },
  532. lineComment: "%"
  533. };
  534. });
  535. });