From fd4a48eb76ca6ac89c4b4d19544ec5c62aa10c85 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Thu, 22 Feb 2024 14:02:21 +0100 Subject: [PATCH 01/32] server/llm: first steps towards supporting ollama --- .../components/language-model-icon.tsx | 4 + .../frontend/components/ollama-avatar.tsx | 33 ++ src/packages/frontend/components/ollama.png | Bin 0 -> 18226 bytes .../components/openai/vendor-status-check.tsx | 8 + src/packages/pnpm-lock.yaml | 423 +++++++++++++++++- src/packages/server/llm/client.ts | 35 +- src/packages/server/llm/embeddings.ts | 2 +- src/packages/server/llm/index.ts | 100 +++-- src/packages/server/llm/ollama.ts | 56 +++ src/packages/server/package.json | 1 + src/packages/util/db-schema/openai.ts | 12 +- src/packages/util/db-schema/site-defaults.ts | 8 + .../util/db-schema/site-settings-extras.ts | 13 +- 13 files changed, 641 insertions(+), 54 deletions(-) create mode 100644 src/packages/frontend/components/ollama-avatar.tsx create mode 100644 src/packages/frontend/components/ollama.png create mode 100644 src/packages/server/llm/ollama.ts diff --git a/src/packages/frontend/components/language-model-icon.tsx b/src/packages/frontend/components/language-model-icon.tsx index 1c84bbf196..bef2211d43 100644 --- a/src/packages/frontend/components/language-model-icon.tsx +++ b/src/packages/frontend/components/language-model-icon.tsx @@ -5,6 +5,7 @@ import { unreachable } from "@cocalc/util/misc"; import AIAvatar from "./ai-avatar"; import GoogleGeminiLogo from "./google-gemini-avatar"; import GooglePalmLogo from "./google-palm-avatar"; +import OllamaAvatar from "./ollama-avatar"; import OpenAIAvatar from "./openai-avatar"; export function LanguageModelVendorAvatar( @@ -40,6 +41,9 @@ export function LanguageModelVendorAvatar( return fallback(); } } + case "ollama": + return ; + default: unreachable(vendor); return fallback(); diff --git a/src/packages/frontend/components/ollama-avatar.tsx b/src/packages/frontend/components/ollama-avatar.tsx new file mode 100644 index 0000000000..c9c33f93c1 --- /dev/null +++ b/src/packages/frontend/components/ollama-avatar.tsx @@ -0,0 +1,33 @@ +import { CSS } from "../app-framework"; +import ollamaPng from "./ollama.png"; + +export default function OllamaAvatar({ + size = 64, + style, +}: { + size: number; + style?: CSS; +}) { + // render the ollamaPng (a square png image with transparent background) with the given size and background color + + return ( +
+ +
+ ); +} diff --git a/src/packages/frontend/components/ollama.png b/src/packages/frontend/components/ollama.png new file mode 100644 index 0000000000000000000000000000000000000000..1f142c8d534e0165096cd7ae1387be9bf0e83068 GIT binary patch literal 18226 zcmeIaWl$vDvM!9fyVJP4H}3B4+PJ&ByE}us!(f99gElg_3^uq73@!r<3?J{i_c>?p zxcASA`0l^m5mjBaR_2qLPiC#C>dNRiH5FM@BqAgT2nbYpIVp|z@1DOl1eo{z_X?d= z2nYb1zm}e-hPe;G)!oI~&JhIg^m7FPK)!a?5D>oWl?D3QBz)=8Z>{krP)Fth1SrQ# zX#P(@5eYyyFT=Z{X%~?6CS-z!95!yQ_xDX9F z`b`uVY`R^{Ga!O2I&e7{~H$ zy$_O06#>gNuc_uUk=VlLp9_K8N`ItTQEIP(6!MyBlA6<60G|!2T|&BNj8S>sZpz5Z zZq9qJ&VFsp&QD&SrQa=5uDUdT4&08qIjiZbe7l(F2?@dv@fVG`C7f5SZw~qOA$Yqm zZ*)4VvdJ{#*^=>6u*q;!A>|^uRD1hTw_7?O1U^Jizh6FTzsvMx%REx9+u01pJev4t z|3wR-Me>owegT1x(&g6=1|)8Dqikm`-K~{rDyJh}pIeS`J}1%ijiY_u3NSdHpmViD z*<_~2{qw{0mukq4NtOh{K!_pJ7;ZUUfjPbV^CU=$db3%Y#Un$d3Y?i7O3?E@NcRQRnWesE3j5BEhw;+ zPv_e=Rj#bpiMe$ywK-HSuNb(LfBoQpQMvlv|FV1*Qx=MQs-`?iWGpwsM&&~2>Bj`D zM^pF0WPP``u3=CT58sdWTh`u-&fjDKAAA=eMOB2)@3>|@npLEOm~byYDveTw9#AB(OXEd8^#_+i&`A-(Urw-hT!t8-k`fBylsO8qAti~E@s_+ zYcD0++J24=s#8li#aEG z9^Vzuw!#G;AHRJWT-Tx}f03E2x^_aDBlx5*YFCG#(UfL}6?M5r3y`l2*^R=NEh1hf zHT~EQZ^8@`RlXd^bZjhQ!K3^mqTA#dSe!r?LDNPI^lDoAvS8Ys;s(c^w@_C6^~Q5N zvDI9QZR>n;b7QLZP^}Fi(B+ZMVYzegPj7=y``u$2M}dnUm=1tb{DD7*Iobk_4li|s zE9^d4@g2%ube=O0)lM_AFMXPE(RnKzo%ZkTSYv*%jg#I%C|C&;FOCqglWyNHZ?>8! z%2GH>XEw_eY($J$FiTT#ZqEwf{U%w5BbYJ0M0Ar1#UArDicKvhmKb&i-K(U-w;MI!r6HpO@}lf{ay-$$~Juv7w;)TUcO+<+qd>C{336b4e}3 z%)xTG`$*D!xzA*Cfnmdg!Ash@6Q6r1aHt#AfsmU#UMz5$w^s2;rw^0p-->p{fVk@? zfbdJW04oT7A05m<{?tf0ct6@s%G+)qk{PUykl2a)dyJ?Z-S6zu7sTHXZH~l911Lr5 z&^wBpXQ!i>d+{rK7)0v5dhiCD8nWXo6^!nWd&K%xAR z*^`XGr(lfI9XaCgZV9clb?1QgmP5^7Iq3{9{4B4!4Cv^UqjjI(c8x=8Eo62q8UHh{ z5!P%u14hcj2@lD=p7bC*XBLNq(~q5}VE-ONc59UFTIcLaWwRj6DH@Jfa@yw%YVt#B zP-=gqyaMKG-tDHPKn2V+2jbiwHh4qE+{(*p656&-HJF_*B|zjUHJ{6fzgv*H{gSJbX4A8*LX)B(eTehy9YHE>%vf z&qVJH^H6acdTM@EA;8n!_nfL9nr=@qH+W4q3~EEdJDkRzo7t?mj(~~WoREOJxCsB- zW~2JV!!G`GnYu~{Wyr6bx}6Os41&BVah)xc;BEP#^e=A7ES>s@BAHWg#dEGovas@_ z_MCjr=uE%u8rZI?yb4iJmvKhAjS6<{`DL#l67|gTr|Vg8Xi&t$mY^_((u#Y`P?Vl) z+C@+&VY86#B!RJ_kLsvEo)~dSybnI;LP1PcDBUPy>bC-L3dsl`C6d*$0xrDZf6=zj zL##-RV!E^IuAtiaYoHa2(3L0hns$=B(Tk!>O;Z`mJhUP3X4FBD%WPGzBi56_!rk`(dhSw||%yv8xW2bBzR* z2F?_on0{nR@G)ziVB|YM7{wa%=4u3s?Jz&OP<9YT zKoQs<;PsRM{8#;(ghJJHwJi9v($XElYMz(}>nmpN(@Jk*reo-@wf=CKxzyFwnc2`8 zn0T?#8BoEn{Q`n+u>Fu)(*l9PrNUK4de*9`X^;@&dA9&B_3VuCm|b8=l!&3R1(2dT z2v=95dI=H$2Tz`f4!t!}Bfrg3kg(Peos9Jb#*By~zJ|R_;nfk6HO;EXz(gReOtzzP zGc9nWw+(?W-uUN_zyp+%bB4$04>nm-dG zN5;{(qs77oEnwk0n<+d&#U7^c!iwKfvOl9xvud=}2+{0TNxD;0$ni_o;fW1Flpqm4 zSEe%ZLh6dIQ1z9b8B$Xk(s)Kq9Sx{K;d~+iL{&z=R5<>+`psbYE#SfRp7D!h(h>ttgH|IO<{ZInr$Vzw2B*gIH^2C~QkM);g^3>l@6K?%xE|(wf zIVdmkMGaD@nAxP#l5>c{?N=sNYENL~&t@tkUYckW2a7%o4LrwGzhy;@C2be5n6)7@ zF;YLl3dnj)d-+LtfeYJ&sl*IK)FYwlCTH4o<|QPGt>71+q>rGY3eA_^Nf7s2U9s(T2V2*VRZi5&cDm=&=Cfat!WqBsnz z3E7jzGZ@Dx4^`iQ0ra_`c5rJ>VPnMvmcSRn`v!DL@!@evls~q8G?uXBD5qwS6Huvr zHCWD2#Thil_3JU`k53kb_Ov3^zKn=Pp^e|nwDo~r*||d)`lgCb$xzG4x0I=#ODTz9HK*>d65dD zeD{N-nX%85Y$d!9Wc}X{ziiZo2WsU%MW|jA4T5{C6(@5-$HYf!%xhSa-$fT zuUz|xwDeicuXRBgnLX&+A|!_B4U~C@b(VcdHL#@JhBISv6b*BQyQbR3AEDA$T!CwU5OeX z`2I+?2%n5i6)SfX!qBfT8;Vhb^Zf{z8~H6Fp#c|*)JY*%qx_i-+qUPT0lpB!1!_G# zhZhiHj}vQ3c26kyX;wolzBa z+3d@D^_nVw4>6M7>rZ1iiQ;c&;jMJbRlx5$tr#Py>)V`k>HV=qpP&w)rDLU%y_*rk zau(oZB835PDmEMvD0GoI#IPcWg{t<$x+uhm(2c(QAeL;i&CG>;&0|JgV^uIi^=t0MGVj9YMRNLt$0yhch5)+(IM8U0*IWOFhM?8_VF{d zXOT*q<6Jo4{z+zEHxmMrqaZ+B@IhguO3~#y;cXzNUcbZ=vO}S>{n&P&Iz^LI@X_J% z$JPj+-L=v}keAv^m*ACjg21x)cwGOT1=x?y8fcJU7|KOc{#~ugC^U-7rH!IIv5fv} zbr+F^s2Z`BPbNrPPm=kzNRJfn-n{OQP0%kpejYx; zfgN_%_bGFSa6J@2=Y>EdU|;uf|2TX^aAzJgyd;1DZ#xo=Xl9?*I*V$P1bJfOM;}jG zLVzx(PFeF*NjjV)FE&(rq-R-{Ql&{iAtiIy$a^nxL%pCFfT!ZMrOap(0ni1ShY$R*N6&flraoi{hgT`lJ3n}N=5RXH-Dgxex z-$7;w#8)`fwD=U}8;_Bs=u{_;-+uC8fhwk%=w?4vt_**PJ{k5p5hX=LIN{RdgS|^~ zsaHJN58vgbA?n>UzI46yh#N)V3N=|fqgRZcY^;}4$(2FdS*RdCxJEC&B12WNdS=i> zNruw((P4h@%t|aaV^dE|d*0bP?eVZPiWv2d?7A5S$1OP|e&=nTS8h_Ud{R?<6C~A7 zLd`8{QS*kZqQp=dR;we;zW`HdEcZs?mBKy58GJ%(U%((*N*;%c5o^233k~%ryC1@6 z>=V^@O73HuYbAesWNS=p#Y3J>6Q-p5QJ2O!LtZgyD{@Wsddz@viKb)4?3k`2!=+K9g$0Do{-pe8XEkTRe%!P|p@>ci0-6b>8)2EDoiSi7%wr!Nc(16^Ka=JU$Z*dHga6@8D%=g>|T zyQz2xzAgwMYTG64gvTLURBYT%U8|)f6KP>2{yom$^+jFf(6q(0iUk~I=$CFReR{Q_91Tm|n6htdtF?wx_r&Fj$-TA0Bgo=uj#^g2h@%_v0_} zmJFwvu!HOU%OqV^l;tx|CqTB)X-C$d(R#GRZH^Bnm;&*A`ZZEYZgGa`Uy(~R`)iav z(cb8vs+y!B4SFZm`6Xg-uGY2Cd+oC=?N^bUcf)?mZRZxTOC29{(_mxUoAz=~Ccui& zGvBFGxFM6Ji$e_O;P{b5N6kprD+q`|{h)5andAp+_biq0)HM&oRlN1VzTTc`yy1N` z_mq!H>aE7)1vtx!0U}Fxlhe%4{U8FXmp(LgLJS znzbU!nhQm}OJPKt$mT6WxvcPlV0gP4Q_CzN>M0kYL-H!@3AfC&S0r}=h4at{A^zd~ zD4pVYA*k8+8!@N~c6ukOM)159rXtS&yRhc8GsM9b8^muVhEUBC51pKveU)3mUCRob z6gCc(dxjQ@3fd{t^Dtql#|z!>Wn;EWxuY%SX%AdAQgL z?CCY66lzvLM{(FIU*3dp2N`3s30*a4IPw~TI`pVA z=2v%%_r3vD#)9Y4f`(wTlo5I=ey1u@(w=&n!71$^p(fi|KZO=u8fQp~>!-%1LVP9F zuR5fH<)LL@emc5U^<{Ao8UqY7?H{JF(hAI4&4Dg3CKZY;*`Jy-L^O#RTw^F*0p|8( zFYDY#jbmzz3yFddampqys~k6FREj4Kn z^zc$=IRa=hqjTrlp`13gTvKr;00>k~Jy_tlnaWFq6NP!U3?WX-Rspz{9-0iI7V+IM zf}ul&1z!~Je+~pdtBG-fiZHAJ$gO4q6jSwRE<*Ulk-ZT}rhD+yr)Q|-KlB1cf1H?F zHl-??AyG2gsWu}SToDtzmbZ`bnz|?-TZ3Wc1Boon+ zCE>%ZZr0n?6D&Ajb&u?y$|GWUH-!f4Sy4|1A|j`~3yXZ@TF}1mMIv`5rm5VCEbH6= zfu`pzhQ3i4Qv#VaDq;_|#>2C})sYhW+kTp{e#-E^_XJ*)6zaW}?z^SO_+o-*Z#dJ@ zcFay=VGqT!cyN;~_>RS^Pa9TjbXjA35R^8QH?g=#PCXBBRAW;c`^u8NOJXy;Gtdc+ zxOy}_$;7!Wu}s9r(dhl$QPh#f9&r%6RRARep3%q_LN%dJ0^DL)*COoF1NMl^(ccAj zLFPziUOzT5%AWfq6G?wmUwe2a1ZokUSDG5g=5U;lFgu&qR`_vAXkdKgJb?l;<{XAMR4^9jlQ%jy^dMcsI7My8b zmr!9PqzDx}H0~cAVCtdUtUf*%KWM!Ln2UQ2Dn*?OG2iwM&`S=wkC&FiBTrO4kK*F= zRVSyZQ?{StFxjOPWzkJ&{wy{5(VlO^b_>RIl050AdRBidjU6IoUV3byv9f*Sfzi}_ zYhS+==9ju(3Ts;XQ^l72XC{`RrO_wE+ts9ZeLct@UtfEh^FiTMhK7cN^vs3$XW|bA z-Ye?t3db2>WOkp*kCsrc`WS^;I|nG!;g>c8xHT*|D~Dgq7V;uwm=tioK2xzGev&g# zxQXTj1L^TfS&Ou8LNRz%ndH4n8a-QfGH7;Ibfq>;qb7;`Sp)6+Sk3UCb@m_cD=UQ3 z*CFF59DK6YnwgS(2S&1uai1(HU{Kn}Z*akJGU(oz3c-|VFEksiE9on)3**)EzgfKG45t#38qltTxgWx+={aGpFXATd>&_@ zR4~0xD&Kd?n3r5BS4>=$F-dzev{|cr;MQ7jfq@x%mF=Yq!4H&+;Kv;o!n@^wIwv@@NKRqidnHr7sNV^r-HI?cj3WrRe{ORZ|A|J?vy-(o@6LEe&}d8m zl=^=5c_En)N{{Osqs9=!;XtX({p5#-95EC79};sOW$4v4AW|&H-=1lYo3NXZAz+zA zVjp7X5^4AbiD*3wHD+*gNc+L~>TK)xx@~cQ-*6+EV^tOLNuAD_`3b&yZNE^Q>AAi1 zo;%H_BmaI{>g&|R#VR@^mU?2lt+@PD9O54uAL`L@$^AZh)l$px&jfj4rM_6RV^SE# zS))%Q@h-514^9A4&>x&in4+?kYGOo8fn1TjcEGf%nC&;~fP3OghzhyblPJX6ZvE3i!*yu;}s8GppM%4m{GGWe%&!hyy9OedT3B_2Zf!V0c9=>Z6ac zz9tfNO>`;(lP?l(8@f(Oo|%J<>VjqVf+XjKw2&IR22Q^%N#&3RjS5`8riFC>WK!JY za&_0nLJxR$aSs)XaM-8Er<{C4Vp>L7;DFAg6;Vt2+UrtFe#jZWj#=gvc!YTDWmVSq zu;9|axuXy1^%_d&Ws#XWOO~+wDCz>NBNWHQ3k(Uylw+;>cIJZWq+Yn^b$~s5-sc~! z#6w3Kx|r?mD~HO_w~a(CTyYm1fHePl5q>X_UvfJ_ukAi})+K&utx&Gubc-QIDNv-o zIs>O>)D|pQ??CkfEgQtfO&ms5;o85#m(;q{0f^P@oycA6iYFwf1)3+ODk@JkoqwdP zHuqd@vVOR|(`twQ|MOi7l{IK7NIa+Ac5P)!CKMNI=nr z+pc{Et#J-VfAv?u(uY}HA{|rUN`3IfY&$^`5>-*XM#doxH`>TW!!&dKw?5Lllo50b zI)w$f7wYdx>EO1lbGqxL7vJ@bhUA3=kM|_+yA3gjph;FK#WMVm)AUMqTg&l3h}_gP ztKguzx6Wti!NhgH;m1Lh`lPJ{m4F0@!<`Z`1KOGZ)h8RUt(+q9xS9|ZkAhLV!w9r#l0$RE_ zv6@@CSb$i4om}5LF(4p>M15V&Ege9f01J?fowG3IW$$N7fSr{vr7n*$yRxe!$ktBI z-yNjsucBq??_eolMJXzRB;*Tx2XF#;nge{D9GyLYzQUA$;{xB0{~Bha1pF=H=^#w0 zr>q8$ba4j(xLLVb*;%B0?YucDMUVhO?pD@74Jny_LcE^|Q`&lZx&qnQe0+RZeYjX% z+-=x61Ox=w*g4raIa%H%SUmikJ|Eq?F)_cPW zn+C|k#mn6iB<&4y_N4k(2rJ9~)_3)CclN*#8!Omr_;+O1W5i{pFs# zlrZIA_JLL|mUdRazYjsyRvcWsJlrhS9M;w>++6H@Ean27d@L61{Cpq_Yd$VZEAxMW zl6Us-G{RL(94#feou;jE9;AP>oH0NO9=Hs?tF}LF9V&N6wv*zaI;^4I4 z<^LPX$`UB!;_hVro=!U_a~lwwtFz7D6Mqp76jzfMrsQO0|DO>xM{`f>cLiZeB|B#? z-~XA=vU37ydYb>mCI=roKQ9L-w}1dIFCRPCKMehirvq~Lc(262Fge&+x&DFsS6YDY z!Mqb|{#Tv81N;s99t%*?9c1q5;;!Z5;wVh{7X;uh&A-bVAoP!<$k}t_v`zg1@&L!cK?@T zSzB|O^Ke_Uzo%N@J>Q%h@5usztXTLstvNv)?C&=S)}Vh!_i(ZH^f7k_iQBwK`X0@D z0sS2ffc|eP8U9_`#}@P#PwbrBEbN>toSa%5JU}jPAddhOI|q=Ros#XJ0ki#eRsUDS zLTvvBCqjP<{L3=%uJ@0z_vPh%wPO44wm?-|4R5j)%72`{#OkAuY~_oUH`w)h4eoUJRs-yyC9$U$C(q~@SpFGLQocpvQiKb zbxBm5?`Mdva{3++5NN}HZII(GHCFF~@SgI@((orxcxaS@G&-ji5D)}h@>1ejzU${j zeu3J{9pj$``8=QyEhR!>hL|BSF{c3W@p9S?+T`}duFzj_f=}1~R!!*!-+S zl%$QwoAIJecYV{>(U??^v~tfK?3&JX6*d^UYR)X4a-G0f6fqm25C-M1SN5 zsOB#`%3vKnw9z2Su8!HobT|}pt04zX2y5V4ch`68wk@TQxHmOq@rS;=StSw=Vp2;t zAu*k38?i7ri+anY1=4e%Jpl!bv4nzYln01quHB0&{9Yu&6VU~#WA2Uuy4A4ygISD& zacSqs(>2%Sl4ydy?NoC*g{1G>1J}ZLdDFku#J>UsPCjge70o z3QBZO7xjJN0P+6tlx+u#>J0t_Lb%*WNcpA8zrjru+EakOEvz|!4xg=9IEP<17aSY(q zmT}JPA&@8h4%sd0GIBekRKiQOqqfZK0U7jMl1qlglP9`^MZ(m!wr?PytMl`hjRRgz zT#*n#sRSd13=nh1#<%Z(u($1RSc0uIH^z2zuZ8!>8R3f%14d=^5 z;z(Idr3*lgoRY=_cKL)ByA41Q)Gv-J>NdOA5;NX@kL+C9^Rv36NCzD_hGXfuV>KI)bgF7%hY&n#^80Me)Wu5L!$W#gh zmsJ{g>yItu+7Sw7iQut{P9I}or@D0Yfm5gC68bsj?^**d0lncu1z2bXW6$jKoCTOw z#fJg+jtQu&ZwqaHw5h7CdfU-l%&VQP0^s(wil0n2EnhhcEGD`_-Iz~MTs0bCi|SYs zl&a)jkMjEL2{-nlY|bjvtn)FjwOwE&Uof;~r@ca@~NeFQjY2w)jbHx&4Zl4IPdYYoyaO6!Yo^cZ7e!%p& zhHjCC%+-YUeSnT1;4~tP5}{R9049-^%G?xEpMO3Gc}mqlQAfbe zYADi#Chwer8CytNrQ;F7fP-E5oD@S!d#wBoDJx$~^{jBSr#jp^Cc>UADr^Y_rsG(HGlXYoNiqq@X53|e z1S<`uN^wryQRg_O+wQP0lT^mhHrrR+hA(bi;7nXY_XJE{rA=jkK^XgaV{#c1cJKq1 z)5kY74r`NSXbi+%ojZx4e8;)bn~!ET)E-3bxto7?Tf>zeBnB^0X z`ItGO>95t*E^Uyerjrf=b`GdHU-GNtg27y`3r;Dqf-|RINSV3IH@lHG7oGFIlJ|hm zb$r8{H@~-zCuWV#h=ReLz`NIP^t2%W1M7WEPq<)Gv% z(k1dyZ1N8a=8WSc5cD|Eyq5;Ok7~qo2JC*6=aE}!Hx~XD`bqQMmOJ6=$;q}zz#5!T zOh7~vF}ejBL#KAX$$9ILI_i_y`;TLDU$pO#!?dp~x#oGX??7hExe}I6&H~k*Fp}^& zlLw+sGxSNVt>!_UyDe_oz%C84veZ8kTpKF|IkpZo<}okmoWYy3nUa43o?tt8ljAh=X7DMLW*IrF*LOThv*G5rL0lO`#l#OZQ$ zQC1{k$jaOl)SOY<1q&E9!QZEx*IiSUu;M<>OJI-NX;B&**%VF%ZQ7M#-Hab#4%|Ta zazYbM)5~bei7rO)+8?qXa;p#OX_DOdk3&z}+fgSNJr_9U*QCSo-U{^6gNosAr&C8? zUq+-z4?d?>Z@~cwH1fu96A{I`^>?_}9Wzw*r_t6oIhF9bZXK|-Q$<-*+~tZ~q4bq@ z(Z=#w)Q!7(=lYnC*S@%U;YNSh9Pg*K2ZYF9xLIlm5{xUGv*KG9N9WrdYS(RD`J^u16rad1})U)}GxLr_~^e2*PJPMD&R8PhFj?p3Xr)&h|MlkVfV>xUSJi z1bEvv_$htR?pnBDw&;ul^}4JWb+ zM#(J<0DL2e)Hi9wK*eMKy*TT9yZG6c$b4lYcd#fE7L=o7sxVHm@o- z%u%;*ht1$Z%X!0fJ#uF-A3Y5poGae7oZ(@h;!C5NWp7itGPq0s6b(o~ngx&XrG&vc zG-?)ojn&cu`NqOqe?$RmF;2HG%N!3(oaL@V5&6=hMZZ2ty_Z9)|H2{>_0^uG6VR-q zK!r26I>L5KwE@zvtV^1%F_Ty@ zRhZTTv@rIhi!B%{!<~MXW?L>T%0O*_0Wo2TRiS^ZWf8X9UJnc>tvl)C-NAsav3i0^ zvTVWUaAO2m=Tj$ryB3CwLm|M1&_PNSr$Ln)MMPqAX@z0!AbaMn$c%)bdo4Qo0sV74 z8~2b9C3#B>FCdJ>`X*F2HoHy6+pke%32fuI;}<~6nd!;y;YHyESG z!SeAPP)mjI_J%`NGluECwaGF$H*WpZ~`?4l9s?nMO753H63u``Z?J6eF2= zle^ys{kqMLB$`v^KE$+(U2JCk$qFXIo$Gu)s~n_V^OcEEm5sB@8VyTJEHHHf%ECHo zhBcAE7-lk~kZ-tZ?vyIQF6So`v|3ShL`ADMn{rdaGdvOSZ+c5yG72_M)i@K*+AK*{ z-Qej(Bw0d#jt{Mg6`@9p8CU`;jFQt74+^k?YeO<<2KMvkyT_YjxGHPQF`GE(?=M?K zuP|!XWm~1w9gqI3g$tQ#zgGEn?{(*shopOaKFX4~4Hw#wzvryb1RqWVwtr$^*jH4& zexOT$J>1|l+#Z7F$2^97rN+g4(iT|5c|FU} z)=^-^KYGZT?N?MsRprX~`5=fZ&(fFEt8Aqj z>wqVhc|>2ahLTyzxTfl_${UrlM^lxCbV6@-P?@yMA*OP`qn48VX(J2-~2;l=qSoxHlM9Et)#;Nz1#zHE4erf>9Z?Ukkz zL(Z*V^gQe3uK1tch+mh~hV$K=xqE4#6@z3el!(`sZs_R1+=YYsyb#~MBSsJFY6q0O z%>|AyX-AI*L6X85sEyhm!uo?SbaNZu#)gtVJJT$3X2cootIK8TeQZS85D(>5F6GFU z2z2^+_ftQum~-^dt##9__FW=L1SP06duW|j(|UH# zeh%!v4c~TO=f~#HVA^E1&eR7l8x8Yl42!j=3%H|tjJ(SKcJ!!PghIcmDmV@(qp%L! z6Q4Y=*tjfqV%=tLFXX@7&K#!ve965TI9^+AL_6EdGVqF+vN$*~GWJ2sgXBx7T!asyAQVv%iV_==geAo$pfHC{J#cTAcADFKy=EPm5p7~kC8z&efa@s*!aEP5uuC_+9wymo$N9SJbUhe9?C|>{ zhk-JU{4dvwPukYqe#tJL%27El?=Qh;LT8QlXv?5QUX*kC2QoQwv~k6nM}e*Csp zsNR8-^l`pAX%yX(7j?O3+N}QU7yC8-7gs@jzpt_+oWN#Twz&PS=F!tN6uFmyPE|}N ztQzg@*L#(sV$>)h@FCgd_tl9f>foXYQHryrh`0$z+9Md4Cu)zz(|AV<<7%CZ0ge0^ zgJX!^!{|q)Cu0H3sjzh<+6<+pb-vc7|Bt7?IbNDiFGlH_DSyQZDkP3 zg|7vWs_#z=o#T=i6hHmCYTn3KBQr`QKard|x=@B-e%{_5u?t}eE1U5%lZ0QM_l)?W z-IvGwgtXF8^%#^>hXHz3j_hdB&+3%Eu^D6wWA@G4Z=XQ+3FxT$tNauDR?*LWZ3={C zg-G13Y79igI$+&NS)_yhp0#F%1K?}N&>8F8X<4R!mAQ3PLg^9uR@Gx#WXe;F6wZw( zQv$Eit}c0&I;(Nvl;74=`hYSX8VNtWMPE+wu@{3+{wjvdZ>@8dF;z#fAsj=b% zN{Eq4`*5Ed`)&(=>Y2m89Yzr^=6yXZtdA}aR+c{UarD6o=qbEB?z0CqxeNKEg3|VG z8XLO41x2eH*-OUd@7^JGwIrHO#X@MeVjfZGA90KB^X)*if2B`wwKEU0YAY~rU-&JU z<+9FL4jdm%7M`IDrhS48{ooSJnqV}O1qURJeN+Ltw=QTjQin=ygaS-U^=t%;oRDjH zFT4ekMaCSavm|+KykmQA>JmggeAyD=@o!turj=F4eIr9;Wjm$<-5G7L2(ogv8z;Wv z>3HC?nQcaToNC#ku({*c8%voW<+!$f^{LXBS}i!93=NbP02HhM#YE zAc=3nO`SSp#cECR+DB4|o8R9S=kr8|TBM*v>q-`aP}6rtlP&R3+h^@C8xFBIwMx&r zn1+*!w4un9HU3qYwFJ3w=~lF%gb#M1Smdru5bacsvIs8!hYFJy$pM76#tfviFSRS{ zXXKiObpV$FuYTtokC8$y#db}mGJiyTDEEp%o=q^t6iadq??Y?KJwdwM$XxgraV7~$ zI>D^o07qIbKd=@ocfEU_*>yoz+x)|;qr`xrz%Ql4rBGv$teYA_=7de01H62o)7l51 zE`j?aD<6ER#T~x}i#iXF);A^j_MV_@FeXi|HB6Xh?X(#MekzWd53eK^mmWB=M9ZGU zSpWk6H1NU*pBn6A*!BfQm=N>h%Mt$9z^a9fl{>7HuQ(sBu`mvb^_QoHAnvsybz8I; z=f4Go=Wh2I!?g}NrXD($Ev2aSa3c|pE79gEtdV_O!%0}K_EkD}#+Ly9Vj2~P!!)YP zBx%}*)TDVkFvZbSOek<#NR>mQ;!k2Y$g>Zd1XORK4b96-I&@;PVPG8S$X_6+2YX}< z;#8?w5iw-&@|9V&OxU82)u72IdqBn&0q_g-i#sWrHM5Ik=_fSrdWYC zLmZuo18@(SDPV%C%3|Q?#dsLZr70r%mMJHOm(uvdt+Ye(`#X=&C=b5IqnYm!-ARGG zl92YJ)Yw+(8Imbh1;t}HVno&BYSY;}v{F!b=+WA73Y4s|Z>*#pVqLJJZb^3|gmeW8 zu?p^&EF$V~vK2bkik7OFFRN@hu2K9tp!}-72CPN)#Yo%PR|+LluxVNlwhbfunI>?% z+*63KXH&Rz$lVlPNqjeD=<7}S;$~3j-m+G0#fOf-b`XJM7o6J0&!VDF3!k~==;}*B zZr!l(0yx{#YG~+!fg);iI*R)i-A4|K2QJW?{l<^OV)+%Qn3`ES*m=Cv`Q;6D6hm(+ z`+eBZsu48(C9m_+hWMs7e*W`p3`y*_a-N@>iLdQL=uhk(@fKmMhhXO_wMn+?0{Manbab5NfrZr!rSww z_(SqZ#9%0VPOdhAm)CA%0bwz2B}Ry=ZgbEm#gP?P*ndsz5il8c?D=DJm3t zK~<0THG?V$n?p4x6w{7wR1aJx2fi0MSPlfOsr2YzvNrb3)PeEf^&28zhz7o-^0P-G zosN0<6cW6MYVgfx*{Afei~sRy)fsY!(PH3Qw@r3`(01yLa>W3= zjimRP6P%QKX#bqYJur+^B(0|$J;YV%*c=$x^4&{l@PGf)21w~DSO~w4OeRR4_C1sv zeT7slKQVbTNU;G!`QfP>wHyInyH683g^;*uyEje>jq&pov&zVPSzy~KJ>(54=$@m2 zVzHNb81hvzcd)~g21j_+7{!4N1KfNLFE$2r_)FxT;Y8wM1HZ#nFbE<~y^{l2;i-Hk z`^KLrPh!?EAA?Gg-Xb}mDt~z5(5~a_srgA`%)}9283E#kYLY`+^*)e(Mc#pd9`a*; z8)}cvx^TkmKIaXuV%UrCBgN|a$7+n?%(5oYpQ(KGsnz2Q2S1Q<_#(+~V5AM5k~G1F z;r5d21Du*+eH1DbtEcXs$f5`Nfr6*HLF?bweLO~#p5`6`H@Hr7`2MUQ)Ybdil+RUu;1h ); + case "ollama": + return ( + <> + This Ollama based API endpoint does not have a status page. If you are + experiencing issues you have to check with the API service directly or + try again later. + + ); default: unreachable(vendor); } diff --git a/src/packages/pnpm-lock.yaml b/src/packages/pnpm-lock.yaml index c6a1b7496f..d975732d90 100644 --- a/src/packages/pnpm-lock.yaml +++ b/src/packages/pnpm-lock.yaml @@ -1339,6 +1339,9 @@ importers: '@isaacs/ttlcache': specifier: ^1.2.1 version: 1.2.1 + '@langchain/community': + specifier: ^0.0.32 + version: 0.0.32(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21) '@node-saml/passport-saml': specifier: ^4.0.4 version: 4.0.4 @@ -3979,6 +3982,307 @@ packages: - crypto dev: false + /@langchain/community@0.0.32(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21): + resolution: {integrity: sha512-jN4BxGKAmLbA87hqXH5Mx1IRMMVOgcn1TY1MLOVyBcBa12EvHFx8suogtXgA2ekfc8U8nIryVb1ftSupwUBv/A==} + engines: {node: '>=18'} + peerDependencies: + '@aws-crypto/sha256-js': ^5.0.0 + '@aws-sdk/client-bedrock-agent-runtime': ^3.485.0 + '@aws-sdk/client-bedrock-runtime': ^3.422.0 + '@aws-sdk/client-dynamodb': ^3.310.0 + '@aws-sdk/client-kendra': ^3.352.0 + '@aws-sdk/client-lambda': ^3.310.0 + '@aws-sdk/client-sagemaker-runtime': ^3.310.0 + '@aws-sdk/client-sfn': ^3.310.0 + '@aws-sdk/credential-provider-node': ^3.388.0 + '@azure/search-documents': ^12.0.0 + '@clickhouse/client': ^0.2.5 + '@cloudflare/ai': '*' + '@datastax/astra-db-ts': ^0.1.4 + '@elastic/elasticsearch': ^8.4.0 + '@getmetal/metal-sdk': '*' + '@getzep/zep-js': ^0.9.0 + '@gomomento/sdk': ^1.51.1 + '@gomomento/sdk-core': ^1.51.1 + '@google-ai/generativelanguage': ^0.2.1 + '@gradientai/nodejs-sdk': ^1.2.0 + '@huggingface/inference': ^2.6.4 + '@mozilla/readability': '*' + '@opensearch-project/opensearch': '*' + '@pinecone-database/pinecone': '*' + '@planetscale/database': ^1.8.0 + '@qdrant/js-client-rest': ^1.2.0 + '@raycast/api': ^1.55.2 + '@rockset/client': ^0.9.1 + '@smithy/eventstream-codec': ^2.0.5 + '@smithy/protocol-http': ^3.0.6 + '@smithy/signature-v4': ^2.0.10 + '@smithy/util-utf8': ^2.0.0 + '@supabase/postgrest-js': ^1.1.1 + '@supabase/supabase-js': ^2.10.0 + '@tensorflow-models/universal-sentence-encoder': '*' + '@tensorflow/tfjs-converter': '*' + '@tensorflow/tfjs-core': '*' + '@upstash/redis': ^1.20.6 + '@upstash/vector': ^1.0.2 + '@vercel/kv': ^0.2.3 + '@vercel/postgres': ^0.5.0 + '@writerai/writer-sdk': ^0.40.2 + '@xata.io/client': ^0.28.0 + '@xenova/transformers': ^2.5.4 + '@zilliz/milvus2-sdk-node': '>=2.2.7' + better-sqlite3: ^9.4.0 + cassandra-driver: ^4.7.2 + chromadb: '*' + closevector-common: 0.1.3 + closevector-node: 0.1.6 + closevector-web: 0.1.6 + cohere-ai: '*' + convex: ^1.3.1 + discord.js: ^14.14.1 + dria: ^0.0.3 + faiss-node: ^0.5.1 + firebase-admin: ^11.9.0 || ^12.0.0 + google-auth-library: ^8.9.0 + googleapis: ^126.0.1 + hnswlib-node: ^1.4.2 + html-to-text: ^9.0.5 + ioredis: ^5.3.2 + jsdom: '*' + llmonitor: ^0.5.9 + lodash: ^4.17.21 + lunary: ^0.6.11 + mongodb: '>=5.2.0' + mysql2: ^3.3.3 + neo4j-driver: '*' + node-llama-cpp: '*' + pg: ^8.11.0 + pg-copy-streams: ^6.0.5 + pickleparser: ^0.2.1 + portkey-ai: ^0.1.11 + redis: '*' + replicate: ^0.18.0 + typeorm: ^0.3.12 + typesense: ^1.5.3 + usearch: ^1.1.1 + vectordb: ^0.1.4 + voy-search: 0.6.2 + weaviate-ts-client: '*' + web-auth-library: ^1.0.3 + ws: ^8.14.2 + peerDependenciesMeta: + '@aws-crypto/sha256-js': + optional: true + '@aws-sdk/client-bedrock-agent-runtime': + optional: true + '@aws-sdk/client-bedrock-runtime': + optional: true + '@aws-sdk/client-dynamodb': + optional: true + '@aws-sdk/client-kendra': + optional: true + '@aws-sdk/client-lambda': + optional: true + '@aws-sdk/client-sagemaker-runtime': + optional: true + '@aws-sdk/client-sfn': + optional: true + '@aws-sdk/credential-provider-node': + optional: true + '@azure/search-documents': + optional: true + '@clickhouse/client': + optional: true + '@cloudflare/ai': + optional: true + '@datastax/astra-db-ts': + optional: true + '@elastic/elasticsearch': + optional: true + '@getmetal/metal-sdk': + optional: true + '@getzep/zep-js': + optional: true + '@gomomento/sdk': + optional: true + '@gomomento/sdk-core': + optional: true + '@google-ai/generativelanguage': + optional: true + '@gradientai/nodejs-sdk': + optional: true + '@huggingface/inference': + optional: true + '@mozilla/readability': + optional: true + '@opensearch-project/opensearch': + optional: true + '@pinecone-database/pinecone': + optional: true + '@planetscale/database': + optional: true + '@qdrant/js-client-rest': + optional: true + '@raycast/api': + optional: true + '@rockset/client': + optional: true + '@smithy/eventstream-codec': + optional: true + '@smithy/protocol-http': + optional: true + '@smithy/signature-v4': + optional: true + '@smithy/util-utf8': + optional: true + '@supabase/postgrest-js': + optional: true + '@supabase/supabase-js': + optional: true + '@tensorflow-models/universal-sentence-encoder': + optional: true + '@tensorflow/tfjs-converter': + optional: true + '@tensorflow/tfjs-core': + optional: true + '@upstash/redis': + optional: true + '@upstash/vector': + optional: true + '@vercel/kv': + optional: true + '@vercel/postgres': + optional: true + '@writerai/writer-sdk': + optional: true + '@xata.io/client': + optional: true + '@xenova/transformers': + optional: true + '@zilliz/milvus2-sdk-node': + optional: true + better-sqlite3: + optional: true + cassandra-driver: + optional: true + chromadb: + optional: true + closevector-common: + optional: true + closevector-node: + optional: true + closevector-web: + optional: true + cohere-ai: + optional: true + convex: + optional: true + discord.js: + optional: true + dria: + optional: true + faiss-node: + optional: true + firebase-admin: + optional: true + google-auth-library: + optional: true + googleapis: + optional: true + hnswlib-node: + optional: true + html-to-text: + optional: true + ioredis: + optional: true + jsdom: + optional: true + llmonitor: + optional: true + lodash: + optional: true + lunary: + optional: true + mongodb: + optional: true + mysql2: + optional: true + neo4j-driver: + optional: true + node-llama-cpp: + optional: true + pg: + optional: true + pg-copy-streams: + optional: true + pickleparser: + optional: true + portkey-ai: + optional: true + redis: + optional: true + replicate: + optional: true + typeorm: + optional: true + typesense: + optional: true + usearch: + optional: true + vectordb: + optional: true + voy-search: + optional: true + weaviate-ts-client: + optional: true + web-auth-library: + optional: true + ws: + optional: true + dependencies: + '@google-ai/generativelanguage': 1.1.0(encoding@0.1.13) + '@langchain/core': 0.1.32 + '@langchain/openai': 0.0.14(encoding@0.1.13) + flat: 5.0.2 + google-auth-library: 9.4.1(encoding@0.1.13) + langsmith: 0.1.3 + lodash: 4.17.21 + uuid: 9.0.1 + zod: 3.22.4 + transitivePeerDependencies: + - encoding + dev: false + + /@langchain/core@0.1.32: + resolution: {integrity: sha512-7b8wBQMej2QxaDDS0fCQa3/zrA2raTh1RBe2h1som7QxFpWJkHSxwVwdvGUotX9SopmsY99TK54sK0amfDvBBA==} + engines: {node: '>=18'} + dependencies: + ansi-styles: 5.2.0 + camelcase: 6.3.0 + decamelize: 1.2.0 + js-tiktoken: 1.0.10 + langsmith: 0.1.3 + ml-distance: 4.0.1 + p-queue: 6.6.2 + p-retry: 4.6.2 + uuid: 9.0.1 + zod: 3.22.4 + zod-to-json-schema: 3.22.4(zod@3.22.4) + dev: false + + /@langchain/openai@0.0.14(encoding@0.1.13): + resolution: {integrity: sha512-co6nRylPrLGY/C3JYxhHt6cxLq07P086O7K3QaZH7SFFErIN9wSzJonpvhZR07DEUq6eK6wKgh2ORxA/NcjSRQ==} + engines: {node: '>=18'} + dependencies: + '@langchain/core': 0.1.32 + js-tiktoken: 1.0.10 + openai: 4.27.0(encoding@0.1.13) + zod: 3.22.4 + zod-to-json-schema: 3.22.4(zod@3.22.4) + transitivePeerDependencies: + - encoding + dev: false + /@lumino/algorithm@1.9.2: resolution: {integrity: sha512-Z06lp/yuhz8CtIir3PNTGnuk7909eXt4ukJsCzChsGuot2l5Fbs96RJ/FOHgwCedaX74CtxPjXHXoszFbUA+4A==} dev: false @@ -5498,6 +5802,10 @@ packages: '@types/node': 18.18.13 dev: false + /@types/retry@0.12.0: + resolution: {integrity: sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==} + dev: false + /@types/sanitize-html@2.8.0: resolution: {integrity: sha512-Uih6caOm3DsBYnVGOYn0A9NoTNe1c4aPStmHC/YA2JrpP9kx//jzaRcIklFvSpvVQEcpl/ZCr4DgISSf/YxTvg==} dependencies: @@ -5552,6 +5860,10 @@ packages: /@types/uuid@8.3.4: resolution: {integrity: sha512-c/I8ZRb51j+pYGAu5CrFMRxqZ2ke4y2grEBO5AUjgSkSk+qT2Ea+OdWElz/OiMf5MNpn2b17kuVBwZLQJXzihw==} + /@types/uuid@9.0.8: + resolution: {integrity: sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==} + dev: false + /@types/xml-crypto@1.4.3: resolution: {integrity: sha512-pnvKYb7vUsUIMc+C6JM/j779YWQgOMcwjnqHJ9cdaWXwWEBE1hAqthzeszRx62V5RWMvS+XS9w9tXMOYyUc8zg==} dependencies: @@ -6143,7 +6455,6 @@ packages: /ansi-styles@5.2.0: resolution: {integrity: sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==} engines: {node: '>=10'} - dev: true /ansi-styles@6.2.1: resolution: {integrity: sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==} @@ -6751,6 +7062,10 @@ packages: resolution: {integrity: sha512-H0ea4Fd3lS1+sTEB2TgcLoK21lLhwEJzlQv3IN47pJS976Gx4zoWe0ak3q+uYh60ppQxg9F16Ri4tS1sfD4+jA==} dev: false + /binary-search@1.3.6: + resolution: {integrity: sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA==} + dev: false + /bindings@1.5.0: resolution: {integrity: sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==} dependencies: @@ -7018,7 +7333,6 @@ packages: /camelcase@6.3.0: resolution: {integrity: sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==} engines: {node: '>=10'} - dev: true /caniuse-lite@1.0.30001564: resolution: {integrity: sha512-DqAOf+rhof+6GVx1y+xzbFPeOumfQnhYzVnZD6LAXijR77yPtm9mfOcqOnT3mpnJiZVT+kwLAFnRlZcIz+c6bg==} @@ -7488,7 +7802,6 @@ packages: /commander@10.0.1: resolution: {integrity: sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==} engines: {node: '>=14'} - dev: true /commander@2.20.3: resolution: {integrity: sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==} @@ -7698,7 +8011,6 @@ packages: loose-envify: 1.4.0 object-assign: 4.1.1 dev: false - bundledDependencies: false /create-server@1.0.2: resolution: {integrity: sha512-hie+Kyero+jxt6dwKhLKtN23qSNiMn8mNIEjTjwzaZwH2y4tr4nYloeFrpadqV+ZqV9jQ15t3AKotaK8dOo45w==} @@ -9408,7 +9720,6 @@ packages: /flat@5.0.2: resolution: {integrity: sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==} hasBin: true - dev: true /flatted@3.2.7: resolution: {integrity: sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==} @@ -10726,6 +11037,10 @@ packages: is-decimal: 2.0.1 dev: false + /is-any-array@2.0.1: + resolution: {integrity: sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==} + dev: false + /is-arguments@1.1.1: resolution: {integrity: sha512-8Q7EARjzEnKpt/PCD7e1cgUS0a6X8u5tdSiMqXhojOdoV9TsMsiO+9VLC5vAmO8N7/GmXn7yjR8qnA6bVAEzfA==} engines: {node: '>= 0.4'} @@ -12238,6 +12553,12 @@ packages: resolution: {integrity: sha512-HvdH2LzI/EAZcUwA8+0nKNtWHqS+ZmijLA30RwZA0bo7ToCckjK5MkGhjED9KoRcXO6BaGI3I9UIzSA1FKFPOQ==} dev: false + /js-tiktoken@1.0.10: + resolution: {integrity: sha512-ZoSxbGjvGyMT13x6ACo9ebhDha/0FHdKA+OsQcMOWcm1Zs7r90Rhk5lhERLzji+3rA7EKpXCgwXcM5fF3DMpdA==} + dependencies: + base64-js: 1.5.1 + dev: false + /js-tokens@4.0.0: resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==} @@ -12509,6 +12830,17 @@ packages: resolution: {integrity: sha512-v4pxOBEQVN1WBTfB1crhTtxzNLZU9HPWgadlwzWKISJtt6Ku/CnpBrwVy+jFv8StjxsPfwPFzO0CMwdZLJ0/BA==} dev: false + /langsmith@0.1.3: + resolution: {integrity: sha512-kQMS3QySeU0Qt9A71d9trUXbeKn33HfxpRc7hRjSB967zcdTAngh66NcqYqBflD3nOL4FK6LKmvfb3vbNDEoPg==} + hasBin: true + dependencies: + '@types/uuid': 9.0.8 + commander: 10.0.1 + p-queue: 6.6.2 + p-retry: 4.6.2 + uuid: 9.0.1 + dev: false + /ldap-filter@0.3.3: resolution: {integrity: sha512-/tFkx5WIn4HuO+6w9lsfxq4FN3O+fDZeO9Mek8dCD8rTUpqzRa766BOBO7BcGkn3X86m5+cBm1/2S/Shzz7gMg==} engines: {node: '>=0.8'} @@ -13186,6 +13518,37 @@ packages: resolution: {integrity: sha512-bauHShmaxVQiEvlrAPWxSPn8spSL8gDVRl11r8vLT4r/KdnknLqtqwQbToZ2Oa8sJkExYY1z6/d+X7pNiqo4yg==} dev: true + /ml-array-mean@1.1.6: + resolution: {integrity: sha512-MIdf7Zc8HznwIisyiJGRH9tRigg3Yf4FldW8DxKxpCCv/g5CafTw0RRu51nojVEOXuCQC7DRVVu5c7XXO/5joQ==} + dependencies: + ml-array-sum: 1.1.6 + dev: false + + /ml-array-sum@1.1.6: + resolution: {integrity: sha512-29mAh2GwH7ZmiRnup4UyibQZB9+ZLyMShvt4cH4eTK+cL2oEMIZFnSyB3SS8MlsTh6q/w/yh48KmqLxmovN4Dw==} + dependencies: + is-any-array: 2.0.1 + dev: false + + /ml-distance-euclidean@2.0.0: + resolution: {integrity: sha512-yC9/2o8QF0A3m/0IXqCTXCzz2pNEzvmcE/9HFKOZGnTjatvBbsn4lWYJkxENkA4Ug2fnYl7PXQxnPi21sgMy/Q==} + dev: false + + /ml-distance@4.0.1: + resolution: {integrity: sha512-feZ5ziXs01zhyFUUUeZV5hwc0f5JW0Sh0ckU1koZe/wdVkJdGxcP06KNQuF0WBTj8FttQUzcvQcpcrOp/XrlEw==} + dependencies: + ml-array-mean: 1.1.6 + ml-distance-euclidean: 2.0.0 + ml-tree-similarity: 1.0.0 + dev: false + + /ml-tree-similarity@1.0.0: + resolution: {integrity: sha512-XJUyYqjSuUQkNQHMscr6tcjldsOoAekxADTplt40QKfwW6nd++1wHWV9AArl0Zvw/TIHgNaZZNvr8QGvE8wLRg==} + dependencies: + binary-search: 1.3.6 + num-sort: 2.1.0 + dev: false + /mocha@10.2.0: resolution: {integrity: sha512-IDY7fl/BecMwFHzoqF2sg/SHHANeBoMMXFlS9r0OXKDssYE1M5O43wUY/9BVPeIvfH2zmEbBfseqN9gBQZzXkg==} engines: {node: '>= 14.0.0'} @@ -13693,6 +14056,11 @@ packages: dependencies: boolbase: 1.0.0 + /num-sort@2.1.0: + resolution: {integrity: sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg==} + engines: {node: '>=8'} + dev: false + /number-is-integer@1.0.1: resolution: {integrity: sha512-Dq3iuiFBkrbmuQjGFFF3zckXNCQoSD37/SdSbgcBailUx6knDvDwb5CympBgcoWHy36sfS12u74MHYkXyHq6bg==} engines: {node: '>=0.10.0'} @@ -13943,6 +14311,11 @@ packages: engines: {node: '>=0.10.0'} dev: false + /p-finally@1.0.0: + resolution: {integrity: sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow==} + engines: {node: '>=4'} + dev: false + /p-limit@2.3.0: resolution: {integrity: sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==} engines: {node: '>=6'} @@ -13980,6 +14353,29 @@ packages: dependencies: aggregate-error: 3.1.0 + /p-queue@6.6.2: + resolution: {integrity: sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==} + engines: {node: '>=8'} + dependencies: + eventemitter3: 4.0.7 + p-timeout: 3.2.0 + dev: false + + /p-retry@4.6.2: + resolution: {integrity: sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==} + engines: {node: '>=8'} + dependencies: + '@types/retry': 0.12.0 + retry: 0.13.1 + dev: false + + /p-timeout@3.2.0: + resolution: {integrity: sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==} + engines: {node: '>=8'} + dependencies: + p-finally: 1.0.0 + dev: false + /p-try@2.2.0: resolution: {integrity: sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==} engines: {node: '>=6'} @@ -16302,6 +16698,11 @@ packages: - supports-color dev: false + /retry@0.13.1: + resolution: {integrity: sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==} + engines: {node: '>= 4'} + dev: false + /reusify@1.0.4: resolution: {integrity: sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==} engines: {iojs: '>=1.0.0', node: '>=0.10.0'} @@ -19079,10 +19480,22 @@ packages: resolution: {integrity: sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==} dev: false + /zod-to-json-schema@3.22.4(zod@3.22.4): + resolution: {integrity: sha512-2Ed5dJ+n/O3cU383xSY28cuVi0BCQhF8nYqWU5paEpl7fVdqdAmiLdqLyfblbNdfOFwFfi/mqU4O1pwc60iBhQ==} + peerDependencies: + zod: ^3.22.4 + dependencies: + zod: 3.22.4 + dev: false + /zod@3.21.4: resolution: {integrity: sha512-m46AKbrzKVzOzs/DZgVnG5H55N1sv1M8qZU3A8RIKbs3mrACDNeIOeilDymVb2HdmP8uwshOCF4uJ8uM9rCqJw==} dev: false + /zod@3.22.4: + resolution: {integrity: sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==} + dev: false + /zwitch@2.0.4: resolution: {integrity: sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==} dev: false diff --git a/src/packages/server/llm/client.ts b/src/packages/server/llm/client.ts index d6f2ba86c0..1f50dab0c0 100644 --- a/src/packages/server/llm/client.ts +++ b/src/packages/server/llm/client.ts @@ -11,12 +11,13 @@ import { getServerSettings } from "@cocalc/database/settings/server-settings"; import { LanguageModel, model2vendor } from "@cocalc/util/db-schema/openai"; import { unreachable } from "@cocalc/util/misc"; import { VertexAIClient } from "./vertex-ai-client"; +import { Ollama } from "@langchain/community/llms/ollama"; const log = getLogger("llm:client"); const clientCache: { [key: string]: OpenAI | VertexAIClient } = {}; -export default async function getClient( +export async function getClient( model?: LanguageModel, ): Promise { const vendor = model == null ? "openai" : model2vendor(model); @@ -56,8 +57,40 @@ export default async function getClient( clientCache[key] = vai; return vai; + case "ollama": + throw new Error("Use the getOllama function instead"); + default: unreachable(vendor); throw new Error(`unknown vendor: ${vendor}`); } } + +const ollamaCache: { [key: string]: Ollama } = {}; + +export async function getOllama(model: string) { + // model is the unique key in the ServerSettings.ollama_configuration mapping + if (ollamaCache[model]) { + return ollamaCache[model]; + } + + const settings = await getServerSettings(); + const config = settings.ollama_configuration?.[model]; + if (!config) { + throw new Error( + `Ollama model ${model} not configured – you have to create an entry {${model}: {url: "https://...", ...}} in the "Ollama Configuration" entry of the server settings`, + ); + } + + const baseUrl = config.url; + + if (!baseUrl) { + throw new Error(`The url of the Ollama model ${model} is not configured`); + } + + const keepAlive = config.keepAlive ?? -1; + + const client = new Ollama({ baseUrl, model, keepAlive }); + ollamaCache[model] = client; + return client; +} diff --git a/src/packages/server/llm/embeddings.ts b/src/packages/server/llm/embeddings.ts index 06ae65e3ac..0e805e4708 100644 --- a/src/packages/server/llm/embeddings.ts +++ b/src/packages/server/llm/embeddings.ts @@ -1,7 +1,7 @@ import { sha1, uuidsha1 } from "@cocalc/backend/sha1"; import { getClient as getDB } from "@cocalc/database/pool"; import * as qdrant from "@cocalc/database/qdrant"; -import getClient from "./client"; +import { getClient } from "./client"; import checkForAbuse from "./embeddings-abuse"; import { VertexAIClient } from "./vertex-ai-client"; diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts index 6708288c99..8f2800d53e 100644 --- a/src/packages/server/llm/index.ts +++ b/src/packages/server/llm/index.ts @@ -31,9 +31,10 @@ import { import { ChatOptions, ChatOutput, History } from "@cocalc/util/types/llm"; import { checkForAbuse } from "./abuse"; import { callChatGPTAPI } from "./call-chatgpt"; -import getClient from "./client"; +import { getClient } from "./client"; import { saveResponse } from "./save-response"; import { VertexAIClient } from "./vertex-ai-client"; +import { evaluateOllama } from "./ollama"; const log = getLogger("llm"); @@ -59,38 +60,6 @@ export async function evaluate(opts: ChatOptions): Promise { } } -async function evaluteCall({ - system, - history, - input, - client, - model, - maxTokens, - stream, -}) { - if (client instanceof VertexAIClient) { - return await evaluateVertexAI({ - system, - history, - input, - client, - maxTokens, - model, - stream, - }); - } - - return await evaluateOpenAI({ - system, - history, - input, - client, - model, - maxTokens, - stream, - }); -} - async function evaluateImpl({ input, system, @@ -104,7 +73,7 @@ async function evaluateImpl({ stream, maxTokens, }: ChatOptions): Promise { - log.debug("evaluate", { + log.debug("evaluateImpl", { input, history, system, @@ -124,15 +93,28 @@ async function evaluateImpl({ const client = await getClient(model); const { output, total_tokens, prompt_tokens, completion_tokens } = - await evaluteCall({ - system, - history, - input, - client, - model, - maxTokens, - stream, - }); + await (async () => { + if (model.startsWith("ollama-")) { + return await evaluateOllama({ + system, + history, + input, + model, + maxTokens, + stream, + }); + } else { + return await evaluteCall({ + system, + history, + input, + client, + model, + maxTokens, + stream, + }); + } + })(); log.debug("response: ", { output, total_tokens, prompt_tokens }); const total_time_s = (Date.now() - start) / 1000; @@ -192,6 +174,38 @@ async function evaluateImpl({ return output; } +async function evaluteCall({ + system, + history, + input, + client, + model, + maxTokens, + stream, +}) { + if (client instanceof VertexAIClient) { + return await evaluateVertexAI({ + system, + history, + input, + client, + maxTokens, + model, + stream, + }); + } + + return await evaluateOpenAI({ + system, + history, + input, + client, + model, + maxTokens, + stream, + }); +} + interface EvalVertexAIProps { client: VertexAIClient; system?: string; diff --git a/src/packages/server/llm/ollama.ts b/src/packages/server/llm/ollama.ts new file mode 100644 index 0000000000..91ad6317f2 --- /dev/null +++ b/src/packages/server/llm/ollama.ts @@ -0,0 +1,56 @@ +import getLogger from "@cocalc/backend/logger"; +import { ChatOutput, History } from "@cocalc/util/types/llm"; +import { getOllama } from "./client"; + +const log = getLogger("llm:ollama"); + +// subset of ChatOptions, but model is a string +interface OllamaOpts { + input: string; // new input that user types + system?: string; // extra setup that we add for relevance and context + history?: History; + model: string; // this must be ollama-[model] + stream?: (output?: string) => void; + maxTokens?: number; +} + +export async function evaluateOllama( + opts: Readonly, +): Promise { + if (!opts.model.startsWith("ollama-")) { + throw new Error(`model ${opts.model} not supported`); + } + const model = opts.model.slice("ollama-".length); + const { system, history, input, maxTokens, stream } = opts; + log.debug("evaluateOllama", { + input, + history, + system, + model, + stream: stream != null, + maxTokens, + }); + + const ollama = await getOllama(model); + + const chunks = await ollama.stream(input); + + let output = ""; + for await (const chunk of chunks) { + output += chunk; + opts.stream?.(chunk); + } + + // and an empty call when done + opts.stream?.(); + + const prompt_tokens = 10; + const completion_tokens = 10; + + return { + output, + total_tokens: prompt_tokens + completion_tokens, + completion_tokens, + prompt_tokens, + }; +} diff --git a/src/packages/server/package.json b/src/packages/server/package.json index 9f8d42b723..a167fa941a 100644 --- a/src/packages/server/package.json +++ b/src/packages/server/package.json @@ -46,6 +46,7 @@ "@google-cloud/monitoring": "^4.0.0", "@google/generative-ai": "^0.1.3", "@isaacs/ttlcache": "^1.2.1", + "@langchain/community": "^0.0.32", "@node-saml/passport-saml": "^4.0.4", "@passport-js/passport-twitter": "^1.0.8", "@passport-next/passport-google-oauth2": "^1.0.0", diff --git a/src/packages/util/db-schema/openai.ts b/src/packages/util/db-schema/openai.ts index 83181e061a..372f47be07 100644 --- a/src/packages/util/db-schema/openai.ts +++ b/src/packages/util/db-schema/openai.ts @@ -72,7 +72,7 @@ export type LanguageService = | "google-embedding-gecko-001" | "google-gemini-pro"; -const LANGUAGE_MODEL_VENDORS = ["openai", "google"] as const; +const LANGUAGE_MODEL_VENDORS = ["openai", "google", "ollama"] as const; export type Vendor = (typeof LANGUAGE_MODEL_VENDORS)[number]; // used e.g. for checking "account-id={string}" and other things like that @@ -122,6 +122,8 @@ export const DEFAULT_MODEL: LanguageModel = "gpt-3.5-turbo"; export function model2vendor(model: LanguageModel): Vendor { if (model.startsWith("gpt-")) { return "openai"; + } else if (model.startsWith("ollama-")) { + return "ollama"; } else { return "google"; } @@ -193,6 +195,8 @@ export function getVendorStatusCheckMD(vendor: Vendor): string { return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`; case "google": return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`; + case "ollama": + return `No status information for Ollama available – you have to check with the particular backend for the model.`; default: unreachable(vendor); } @@ -266,8 +270,10 @@ const LLM_COST: { [name in LanguageModel]: Cost } = { }, } as const; -export function isValidModel(model?: Model) { - return model != null && LLM_COST[model ?? ""] != null; +export function isValidModel(model?: string): boolean { + if (model == null) return false; + if (model.startsWith("ollama-")) return true; + return LLM_COST[model ?? ""] != null; } export function getMaxTokens(model?: Model): number { diff --git a/src/packages/util/db-schema/site-defaults.ts b/src/packages/util/db-schema/site-defaults.ts index 57a196bd73..b976ef9464 100644 --- a/src/packages/util/db-schema/site-defaults.ts +++ b/src/packages/util/db-schema/site-defaults.ts @@ -26,6 +26,7 @@ export type SiteSettingsKeys = | "policies" | "openai_enabled" | "google_vertexai_enabled" + | "ollama_enabled" | "neural_search_enabled" | "jupyter_api_enabled" | "organization_name" @@ -595,6 +596,13 @@ export const site_settings_conf: SiteSettings = { valid: only_booleans, to_val: to_bool, }, + ollama_enabled: { + name: "Ollama LLM UI", + desc: "Controls visibility of UI elements related to Ollama integration. To make this actually work, configure the list of API/model endpoints in the Ollama configuration.", + default: "no", + valid: only_booleans, + to_val: to_bool, + }, neural_search_enabled: { name: "OpenAI Neural Search UI", desc: "Controls visibility of UI elements related to Neural Search integration. You must **also set your OpenAI API key** below and fully configure the **Qdrant vector database** for neural search to work.", diff --git a/src/packages/util/db-schema/site-settings-extras.ts b/src/packages/util/db-schema/site-settings-extras.ts index 692c0942c1..b338ae5507 100644 --- a/src/packages/util/db-schema/site-settings-extras.ts +++ b/src/packages/util/db-schema/site-settings-extras.ts @@ -67,8 +67,9 @@ const pii_retention_display = (retention: string) => { const openai_enabled = (conf: SiteSettings) => to_bool(conf.openai_enabled); const vertexai_enabled = (conf: SiteSettings) => to_bool(conf.google_vertexai_enabled); +const ollama_enabled = (conf: SiteSettings) => to_bool(conf.ollama_enabled); const any_llm_enabled = (conf: SiteSettings) => - openai_enabled(conf) || vertexai_enabled(conf); + openai_enabled(conf) || vertexai_enabled(conf) || ollama_enabled(conf); const compute_servers_enabled = (conf: SiteSettings) => to_bool(conf.compute_servers_enabled); @@ -104,6 +105,7 @@ export type SiteSettingsExtrasKeys = | "openai_section" | "openai_api_key" | "google_vertexai_key" + | "ollama_configuration" | "qdrant_section" | "qdrant_api_key" | "qdrant_cluster_url" @@ -180,6 +182,15 @@ export const EXTRAS: SettingsExtras = { password: true, show: vertexai_enabled, }, + ollama_configuration: { + name: "Ollama Configuration", + desc: "This is the configuration for the Ollama LLM API endpoints.", + default: "", + multiline: 5, + show: ollama_enabled, + to_val: from_json, + valid: parsableJson, + }, qdrant_section: { name: "Qdrant Configuration", desc: "", From b6f4c87ef6c477021498198308e7e1a16912b9d4 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Fri, 23 Feb 2024 13:33:35 +0100 Subject: [PATCH 02/32] ollama: starting with configuration + frontend --- .../frontend/account/other-settings.tsx | 37 +++--- .../account/useLanguageModelSetting.tsx | 31 ++++- .../admin/site-settings/row-entry.tsx | 3 +- src/packages/frontend/chat/message.tsx | 2 +- .../codemirror/extensions/ai-formula.tsx | 29 ++++- src/packages/frontend/customize.tsx | 28 +++-- .../frame-editors/code-editor/actions.ts | 4 +- .../frame-editors/frame-tree/format-error.tsx | 14 ++- .../frame-editors/frame-tree/title-bar.tsx | 2 +- .../latex-editor/errors-and-warnings.tsx | 2 +- .../frame-editors/latex-editor/gutters.tsx | 9 +- .../{chatgpt => llm}/context.tsx | 0 .../{chatgpt => llm}/create-chat.ts | 0 .../{chatgpt => llm}/help-me-fix.tsx | 0 .../{chatgpt => llm}/model-switch.tsx | 29 ++++- .../{chatgpt => llm}/shorten-error.ts | 0 .../title-bar-button-tour.tsx | 0 .../{chatgpt => llm}/title-bar-button.tsx | 0 .../frame-editors/{chatgpt => llm}/types.ts | 0 .../frontend/jupyter/chatgpt/error.tsx | 3 +- .../frontend/jupyter/chatgpt/explain.tsx | 4 +- .../jupyter/insert-cell/ai-cell-generator.tsx | 4 +- .../page/home-page/ai-generate-jupyter.tsx | 6 +- src/packages/frontend/projects/store.ts | 22 +++- src/packages/frontend/sagews/chatgpt.ts | 12 +- src/packages/hub/servers/server-settings.ts | 12 +- src/packages/hub/webapp-configuration.ts | 50 ++++++-- .../llm/{call-chatgpt.ts => call-llm.ts} | 2 +- src/packages/server/llm/client.ts | 50 ++++++-- src/packages/server/llm/index.ts | 2 +- src/packages/util/db-schema/openai.ts | 51 +++++++-- .../util/db-schema/site-settings-extras.ts | 107 +++++++++++++++--- src/packages/util/types/llm.ts | 7 ++ 33 files changed, 405 insertions(+), 117 deletions(-) rename src/packages/frontend/frame-editors/{chatgpt => llm}/context.tsx (100%) rename src/packages/frontend/frame-editors/{chatgpt => llm}/create-chat.ts (100%) rename src/packages/frontend/frame-editors/{chatgpt => llm}/help-me-fix.tsx (100%) rename src/packages/frontend/frame-editors/{chatgpt => llm}/model-switch.tsx (80%) rename src/packages/frontend/frame-editors/{chatgpt => llm}/shorten-error.ts (100%) rename src/packages/frontend/frame-editors/{chatgpt => llm}/title-bar-button-tour.tsx (100%) rename src/packages/frontend/frame-editors/{chatgpt => llm}/title-bar-button.tsx (100%) rename src/packages/frontend/frame-editors/{chatgpt => llm}/types.ts (100%) rename src/packages/server/llm/{call-chatgpt.ts => call-llm.ts} (98%) diff --git a/src/packages/frontend/account/other-settings.tsx b/src/packages/frontend/account/other-settings.tsx index f1c0428156..e01d3ddbc9 100644 --- a/src/packages/frontend/account/other-settings.tsx +++ b/src/packages/frontend/account/other-settings.tsx @@ -378,28 +378,21 @@ export class OtherSettings extends Component { render_language_model(): Rendered { const projectsStore = redux.getStore("projects"); - const haveOpenAI = projectsStore.hasLanguageModelEnabled( - undefined, - undefined, - "openai", - ); - const haveGoogle = projectsStore.hasLanguageModelEnabled( - undefined, - undefined, - "google", - ); + const enabled = projectsStore.llmEnabledSummary(); + const ollama = redux.getStore("customize").get("ollama")?.toJS() ?? {}; const defaultModel = getValidLanguageModelName( this.props.other_settings.get(SETTINGS_LANGUAGE_MODEL_KEY), - { openai: haveOpenAI, google: haveGoogle }, + enabled, + Object.keys(ollama), ); const options: { value: string; display: JSX.Element }[] = []; for (const key of USER_SELECTABLE_LANGUAGE_MODELS) { const vendor = model2vendor(key); - if (vendor === "google" && !haveGoogle) continue; - if (vendor === "openai" && !haveOpenAI) continue; + if (vendor === "google" && !enabled.google) continue; + if (vendor === "openai" && !enabled.openai) continue; const txt = isFreeModel(key) ? " (free)" : ""; const display = ( @@ -410,6 +403,18 @@ export class OtherSettings extends Component { options.push({ value: key, display }); } + if (enabled.ollama) { + for (const key in ollama) { + const title = ollama[key].display ?? key; + const display = ( + <> + {title} (Ollama) + + ); + options.push({ value: key, display }); + } + } + return ( { redux.getStore("projects").clearOpenAICache(); }} > - Disable all AI integrations, e.g., - code generation or explanation buttons in Jupyter, @chatgpt - mentions, etc. + Disable all AI integrations, e.g., code + generation or explanation buttons in Jupyter, @chatgpt mentions, + etc. )} {this.render_language_model()} diff --git a/src/packages/frontend/account/useLanguageModelSetting.tsx b/src/packages/frontend/account/useLanguageModelSetting.tsx index 3c79ff3ef5..d6719d3627 100644 --- a/src/packages/frontend/account/useLanguageModelSetting.tsx +++ b/src/packages/frontend/account/useLanguageModelSetting.tsx @@ -2,26 +2,49 @@ import { redux, useMemo, useTypedRedux } from "@cocalc/frontend/app-framework"; import { LanguageModel, USER_SELECTABLE_LANGUAGE_MODELS, + fromOllamaModel, getValidLanguageModelName, + isOllamaLLM, } from "@cocalc/util/db-schema/openai"; export const SETTINGS_LANGUAGE_MODEL_KEY = "language_model"; export function useLanguageModelSetting(): [ - LanguageModel, - (llm: LanguageModel) => void, + LanguageModel | string, + (llm: LanguageModel | string) => void, ] { const other_settings = useTypedRedux("account", "other_settings"); + const ollama = useTypedRedux("customize", "ollama"); + const haveOpenAI = useTypedRedux("customize", "openai_enabled"); + const haveGoogle = useTypedRedux("customize", "google_vertexai_enabled"); + const haveOllama = useTypedRedux("customize", "ollama_enabled"); + + const filter = useMemo(() => { + const projectsStore = redux.getStore("projects"); + return projectsStore.llmEnabledSummary(); + }, [haveOpenAI, haveGoogle, haveOllama]); + const llm = useMemo(() => { - return getValidLanguageModelName(other_settings?.get("language_model")); + return getValidLanguageModelName( + other_settings?.get("language_model"), + filter, + Object.keys(ollama?.toJS() ?? {}), + ); }, [other_settings]); - function setLLM(llm: LanguageModel) { + function setLLM(llm: LanguageModel | string) { if (USER_SELECTABLE_LANGUAGE_MODELS.includes(llm as any)) { redux .getActions("account") .set_other_settings(SETTINGS_LANGUAGE_MODEL_KEY, llm); } + + // check if llm is a key in the ollama typedmap + if (isOllamaLLM(llm) && ollama?.get(fromOllamaModel(llm))) { + redux + .getActions("account") + .set_other_settings(SETTINGS_LANGUAGE_MODEL_KEY, llm); + } } return [llm, setLLM]; diff --git a/src/packages/frontend/admin/site-settings/row-entry.tsx b/src/packages/frontend/admin/site-settings/row-entry.tsx index aa4f3864fc..1597623bf0 100644 --- a/src/packages/frontend/admin/site-settings/row-entry.tsx +++ b/src/packages/frontend/admin/site-settings/row-entry.tsx @@ -120,7 +120,8 @@ export function RowEntry({ {displayed_val != null && ( {" "} - Interpreted as {displayed_val}.{" "} + {valid ? "Interpreted as" : "Invalid:"}{" "} + {displayed_val}.{" "} )} {valid != null && Array.isArray(valid) && ( diff --git a/src/packages/frontend/chat/message.tsx b/src/packages/frontend/chat/message.tsx index f60a3a75f7..6c51d776af 100644 --- a/src/packages/frontend/chat/message.tsx +++ b/src/packages/frontend/chat/message.tsx @@ -17,7 +17,7 @@ import { import { Gap, Icon, TimeAgo, Tip } from "@cocalc/frontend/components"; import MostlyStaticMarkdown from "@cocalc/frontend/editors/slate/mostly-static-markdown"; import { IS_TOUCH } from "@cocalc/frontend/feature"; -import { modelToName } from "@cocalc/frontend/frame-editors/chatgpt/model-switch"; +import { modelToName } from "@cocalc/frontend/frame-editors/llm/model-switch"; import { COLORS } from "@cocalc/util/theme"; import { ChatActions } from "./actions"; import { getUserName } from "./chat-log"; diff --git a/src/packages/frontend/codemirror/extensions/ai-formula.tsx b/src/packages/frontend/codemirror/extensions/ai-formula.tsx index 3c982090da..71fc33ce5e 100644 --- a/src/packages/frontend/codemirror/extensions/ai-formula.tsx +++ b/src/packages/frontend/codemirror/extensions/ai-formula.tsx @@ -1,23 +1,28 @@ import { Button, Divider, Input, Modal, Space } from "antd"; import { useLanguageModelSetting } from "@cocalc/frontend/account/useLanguageModelSetting"; -import { redux, useEffect, useState } from "@cocalc/frontend/app-framework"; +import { + redux, + useEffect, + useState, + useTypedRedux, +} from "@cocalc/frontend/app-framework"; import { HelpIcon, Markdown, Paragraph, - Title, Text, + Title, } from "@cocalc/frontend/components"; import { LanguageModelVendorAvatar } from "@cocalc/frontend/components/language-model-icon"; import ModelSwitch, { modelToName, -} from "@cocalc/frontend/frame-editors/chatgpt/model-switch"; +} from "@cocalc/frontend/frame-editors/llm/model-switch"; import { show_react_modal } from "@cocalc/frontend/misc"; +import track from "@cocalc/frontend/user-tracking"; import { webapp_client } from "@cocalc/frontend/webapp-client"; +import { isFreeModel, isLanguageModel } from "@cocalc/util/db-schema/openai"; import { unreachable } from "@cocalc/util/misc"; -import { isFreeModel } from "@cocalc/util/db-schema/openai"; -import track from "@cocalc/frontend/user-tracking"; type Mode = "tex" | "md"; @@ -47,6 +52,7 @@ function AiGenFormula({ mode, text = "", project_id, cb }: Props) { const [formula, setFormula] = useState(""); const [generating, setGenerating] = useState(false); const [error, setError] = useState(undefined); + const ollama = useTypedRedux("customize", "ollama"); const enabled = redux .getStore("projects") @@ -134,12 +140,23 @@ function AiGenFormula({ mode, text = "", project_id, cb }: Props) { } }, [text]); + function renderModel2Name(): string { + if (isLanguageModel(model)) { + return modelToName(model); + } + const om = ollama?.get(model); + if (om) { + return om.get("title") ?? `Ollama ${model}`; + } + return model; + } + function renderTitle() { return ( <> <LanguageModelVendorAvatar model={model} /> Generate LaTeX Formula - using {modelToName(model)} + using {renderModel2Name()} {enabled ? ( <> diff --git a/src/packages/frontend/customize.tsx b/src/packages/frontend/customize.tsx index 9171a641a6..edfe62c02d 100644 --- a/src/packages/frontend/customize.tsx +++ b/src/packages/frontend/customize.tsx @@ -8,6 +8,7 @@ import { fromJS, List, Map } from "immutable"; import { join } from "path"; + import { Actions, rclass, @@ -22,13 +23,14 @@ import { import { A, build_date, + Gap, Loading, r_join, smc_git_rev, smc_version, - Gap, UNIT, } from "@cocalc/frontend/components"; +import { getGoogleCloudImages, getImages } from "@cocalc/frontend/compute/api"; import { appBasePath } from "@cocalc/frontend/customize/app-base-path"; import { callback2, retry_until_success } from "@cocalc/util/async-utils"; import { @@ -37,6 +39,10 @@ import { FALLBACK_SOFTWARE_ENV, } from "@cocalc/util/compute-images"; import { DEFAULT_COMPUTE_IMAGE } from "@cocalc/util/db-schema"; +import type { + GoogleCloudImages, + Images, +} from "@cocalc/util/db-schema/compute-servers"; import { KUCALC_COCALC_COM, KUCALC_DISABLED, @@ -44,16 +50,12 @@ import { site_settings_conf, } from "@cocalc/util/db-schema/site-defaults"; import { deep_copy, dict, YEAR } from "@cocalc/util/misc"; +import { reuseInFlight } from "@cocalc/util/reuse-in-flight"; import { sanitizeSoftwareEnv } from "@cocalc/util/sanitize-software-envs"; import * as theme from "@cocalc/util/theme"; +import { OllamaPublic } from "@cocalc/util/types/llm"; import { DefaultQuotaSetting, Upgrades } from "@cocalc/util/upgrades/quota"; export { TermsOfService } from "@cocalc/frontend/customize/terms-of-service"; -import type { - GoogleCloudImages, - Images, -} from "@cocalc/util/db-schema/compute-servers"; -import { getImages, getGoogleCloudImages } from "@cocalc/frontend/compute/api"; -import { reuseInFlight } from "@cocalc/util/reuse-in-flight"; // this sets UI modes for using a kubernetes based back-end // 'yes' (historic value) equals 'cocalc.com' @@ -93,6 +95,8 @@ export type SoftwareEnvironments = TypedMap<{ export interface CustomizeState { is_commercial: boolean; openai_enabled: boolean; + google_vertexai_enabled: boolean; + ollama_enabled: boolean; neural_search_enabled: boolean; datastore: boolean; ssh_gateway: boolean; @@ -148,6 +152,8 @@ export interface CustomizeState { compute_servers_dns?: string; compute_servers_images?: TypedMap | string | null; compute_servers_images_google?: TypedMap | string | null; + + ollama?: TypedMap<{ [key: string]: TypedMap }>; } export class CustomizeStore extends Store { @@ -238,10 +244,12 @@ async function init_customize() { registration, strategies, software = null, + ollama = null, // the derived public information } = customize; process_kucalc(configuration); process_software(software, configuration.is_cocalc_com); process_customize(configuration); // this sets _is_configured to true + process_ollama(ollama); const actions = redux.getActions("account"); // Which account creation strategies we support. actions.setState({ strategies }); @@ -251,6 +259,12 @@ async function init_customize() { init_customize(); +function process_ollama(ollama) { + if (ollama) { + actions.setState({ ollama: fromJS(ollama) }); + } +} + function process_kucalc(obj) { // TODO make this a to_val function in site_settings_conf.kucalc obj.kucalc = validate_kucalc(obj.kucalc); diff --git a/src/packages/frontend/frame-editors/code-editor/actions.ts b/src/packages/frontend/frame-editors/code-editor/actions.ts index 40026b4b1e..286cc8801b 100644 --- a/src/packages/frontend/frame-editors/code-editor/actions.ts +++ b/src/packages/frontend/frame-editors/code-editor/actions.ts @@ -64,8 +64,8 @@ import { len, uuid, } from "@cocalc/util/misc"; -import languageModelCreateChat, { Options } from "../chatgpt/create-chat"; -import type { Scope as LanguageModelScope } from "../chatgpt/types"; +import languageModelCreateChat, { Options } from "../llm/create-chat"; +import type { Scope as LanguageModelScope } from "../llm/types"; import { default_opts } from "../codemirror/cm-options"; import { print_code } from "../frame-tree/print-code"; import * as tree_ops from "../frame-tree/tree-ops"; diff --git a/src/packages/frontend/frame-editors/frame-tree/format-error.tsx b/src/packages/frontend/frame-editors/frame-tree/format-error.tsx index 949812e01d..fbc4d248d6 100644 --- a/src/packages/frontend/frame-editors/frame-tree/format-error.tsx +++ b/src/packages/frontend/frame-editors/frame-tree/format-error.tsx @@ -1,11 +1,12 @@ // A dismissable error message that appears when formatting code. -import { useMemo } from "react"; import { Alert, Button } from "antd"; +import { useMemo } from "react"; + +import { file_associations } from "@cocalc/frontend/file-associations"; import { useFrameContext } from "@cocalc/frontend/frame-editors/frame-tree/frame-context"; +import HelpMeFix from "@cocalc/frontend/frame-editors/llm/help-me-fix"; import { CodeMirrorStatic } from "@cocalc/frontend/jupyter/codemirror-static"; -import HelpMeFix from "@cocalc/frontend/frame-editors/chatgpt/help-me-fix"; -import { file_associations } from "@cocalc/frontend/file-associations"; interface Props { formatError: string; @@ -14,10 +15,13 @@ interface Props { export default function FormatError({ formatError, formatInput }: Props) { const { actions } = useFrameContext(); - const language = useMemo(() => actions?.languageModelGetLanguage(), [actions]); + const language = useMemo( + () => actions?.languageModelGetLanguage(), + [actions], + ); const mode = useMemo( () => file_associations[language]?.opts?.mode ?? language, - [language] + [language], ); if (actions == null) return null; diff --git a/src/packages/frontend/frame-editors/frame-tree/title-bar.tsx b/src/packages/frontend/frame-editors/frame-tree/title-bar.tsx index 9919bb49c0..c123495e3a 100644 --- a/src/packages/frontend/frame-editors/frame-tree/title-bar.tsx +++ b/src/packages/frontend/frame-editors/frame-tree/title-bar.tsx @@ -34,7 +34,7 @@ import { Actions } from "../code-editor/actions"; import { is_safari } from "../generic/browser"; import { SaveButton } from "./save-button"; import { ConnectionStatus, EditorDescription, EditorSpec } from "./types"; -import LanguageModelTitleBarButton from "../chatgpt/title-bar-button"; +import LanguageModelTitleBarButton from "../llm/title-bar-button"; import userTracking from "@cocalc/frontend/user-tracking"; import TitleBarTour from "./title-bar-tour"; import { IS_MOBILE } from "@cocalc/frontend/feature"; diff --git a/src/packages/frontend/frame-editors/latex-editor/errors-and-warnings.tsx b/src/packages/frontend/frame-editors/latex-editor/errors-and-warnings.tsx index 31a36ade7d..358e636608 100644 --- a/src/packages/frontend/frame-editors/latex-editor/errors-and-warnings.tsx +++ b/src/packages/frontend/frame-editors/latex-editor/errors-and-warnings.tsx @@ -18,7 +18,7 @@ import { useRedux, } from "@cocalc/frontend/app-framework"; import { Icon, IconName, Loading } from "@cocalc/frontend/components"; -import HelpMeFix from "@cocalc/frontend/frame-editors/chatgpt/help-me-fix"; +import HelpMeFix from "@cocalc/frontend/frame-editors/llm/help-me-fix"; import { capitalize, is_different, path_split } from "@cocalc/util/misc"; import { COLORS } from "@cocalc/util/theme"; import { EditorState } from "../frame-tree/types"; diff --git a/src/packages/frontend/frame-editors/latex-editor/gutters.tsx b/src/packages/frontend/frame-editors/latex-editor/gutters.tsx index ed00bcb300..e6b0eb8c7f 100644 --- a/src/packages/frontend/frame-editors/latex-editor/gutters.tsx +++ b/src/packages/frontend/frame-editors/latex-editor/gutters.tsx @@ -9,12 +9,13 @@ // one gets a gutter mark, with pref to errors. The main error log shows everything, so this should be OK. import { Popover } from "antd"; -import { capitalize } from "@cocalc/util/misc"; + import { Icon } from "@cocalc/frontend/components"; -import { SPEC, SpecItem } from "./errors-and-warnings"; -import { IProcessedLatexLog, Error } from "./latex-log-parser"; -import HelpMeFix from "@cocalc/frontend/frame-editors/chatgpt/help-me-fix"; +import HelpMeFix from "@cocalc/frontend/frame-editors/llm/help-me-fix"; +import { capitalize } from "@cocalc/util/misc"; import { Actions } from "../code-editor/actions"; +import { SPEC, SpecItem } from "./errors-and-warnings"; +import { Error, IProcessedLatexLog } from "./latex-log-parser"; export function update_gutters(opts: { log: IProcessedLatexLog; diff --git a/src/packages/frontend/frame-editors/chatgpt/context.tsx b/src/packages/frontend/frame-editors/llm/context.tsx similarity index 100% rename from src/packages/frontend/frame-editors/chatgpt/context.tsx rename to src/packages/frontend/frame-editors/llm/context.tsx diff --git a/src/packages/frontend/frame-editors/chatgpt/create-chat.ts b/src/packages/frontend/frame-editors/llm/create-chat.ts similarity index 100% rename from src/packages/frontend/frame-editors/chatgpt/create-chat.ts rename to src/packages/frontend/frame-editors/llm/create-chat.ts diff --git a/src/packages/frontend/frame-editors/chatgpt/help-me-fix.tsx b/src/packages/frontend/frame-editors/llm/help-me-fix.tsx similarity index 100% rename from src/packages/frontend/frame-editors/chatgpt/help-me-fix.tsx rename to src/packages/frontend/frame-editors/llm/help-me-fix.tsx diff --git a/src/packages/frontend/frame-editors/chatgpt/model-switch.tsx b/src/packages/frontend/frame-editors/llm/model-switch.tsx similarity index 80% rename from src/packages/frontend/frame-editors/chatgpt/model-switch.tsx rename to src/packages/frontend/frame-editors/llm/model-switch.tsx index bee9d20dae..a99d6f6621 100644 --- a/src/packages/frontend/frame-editors/chatgpt/model-switch.tsx +++ b/src/packages/frontend/frame-editors/llm/model-switch.tsx @@ -1,6 +1,6 @@ import { Radio, Tooltip } from "antd"; -import { CSS, redux } from "@cocalc/frontend/app-framework"; +import { CSS, redux, useTypedRedux } from "@cocalc/frontend/app-framework"; import { DEFAULT_MODEL, LLM_USERNAMES, @@ -8,14 +8,15 @@ import { USER_SELECTABLE_LANGUAGE_MODELS, isFreeModel, model2service, + toOllamaModel, } from "@cocalc/util/db-schema/openai"; export { DEFAULT_MODEL }; export type { LanguageModel }; interface Props { - model: LanguageModel; - setModel: (model: LanguageModel) => void; + model: LanguageModel | string; + setModel: (model: LanguageModel | string) => void; size?; style?: CSS; project_id: string; @@ -45,6 +46,12 @@ export default function ModelSwitch({ undefined, "google", ); + const showOllama = projectsStore.hasLanguageModelEnabled( + project_id, + undefined, + "ollama", + ); + const ollama = useTypedRedux("customize", "ollama"); function renderLLMButton(btnModel: LanguageModel, title: string) { if (!USER_SELECTABLE_LANGUAGE_MODELS.includes(btnModel)) return; @@ -98,6 +105,21 @@ export default function ModelSwitch({ ); } + function renderOllama() { + if (!showOllama || !ollama) return null; + + return Object.entries(ollama.toJS()).map(([key, config]) => { + const title = config.display ?? `Ollama: ${key}`; + return ( + + {title} + + ); + }); + } + + console.log("model", model); + // all models selectable here must be in util/db-schema/openai::USER_SELECTABLE_LANGUAGE_MODELS return ( {renderOpenAI()} {renderGoogle()} + {renderOllama()} ); } diff --git a/src/packages/frontend/frame-editors/chatgpt/shorten-error.ts b/src/packages/frontend/frame-editors/llm/shorten-error.ts similarity index 100% rename from src/packages/frontend/frame-editors/chatgpt/shorten-error.ts rename to src/packages/frontend/frame-editors/llm/shorten-error.ts diff --git a/src/packages/frontend/frame-editors/chatgpt/title-bar-button-tour.tsx b/src/packages/frontend/frame-editors/llm/title-bar-button-tour.tsx similarity index 100% rename from src/packages/frontend/frame-editors/chatgpt/title-bar-button-tour.tsx rename to src/packages/frontend/frame-editors/llm/title-bar-button-tour.tsx diff --git a/src/packages/frontend/frame-editors/chatgpt/title-bar-button.tsx b/src/packages/frontend/frame-editors/llm/title-bar-button.tsx similarity index 100% rename from src/packages/frontend/frame-editors/chatgpt/title-bar-button.tsx rename to src/packages/frontend/frame-editors/llm/title-bar-button.tsx diff --git a/src/packages/frontend/frame-editors/chatgpt/types.ts b/src/packages/frontend/frame-editors/llm/types.ts similarity index 100% rename from src/packages/frontend/frame-editors/chatgpt/types.ts rename to src/packages/frontend/frame-editors/llm/types.ts diff --git a/src/packages/frontend/jupyter/chatgpt/error.tsx b/src/packages/frontend/jupyter/chatgpt/error.tsx index 58866ab9a9..a459a11303 100644 --- a/src/packages/frontend/jupyter/chatgpt/error.tsx +++ b/src/packages/frontend/jupyter/chatgpt/error.tsx @@ -3,8 +3,9 @@ Use ChatGPT to explain an error message and help the user fix it. */ import { CSSProperties } from "react"; -import HelpMeFix from "@cocalc/frontend/frame-editors/chatgpt/help-me-fix"; + import { useFrameContext } from "@cocalc/frontend/frame-editors/frame-tree/frame-context"; +import HelpMeFix from "@cocalc/frontend/frame-editors/llm/help-me-fix"; interface Props { style?: CSSProperties; diff --git a/src/packages/frontend/jupyter/chatgpt/explain.tsx b/src/packages/frontend/jupyter/chatgpt/explain.tsx index 50e753a9b3..40a388bc3d 100644 --- a/src/packages/frontend/jupyter/chatgpt/explain.tsx +++ b/src/packages/frontend/jupyter/chatgpt/explain.tsx @@ -12,12 +12,12 @@ import { Icon } from "@cocalc/frontend/components/icon"; import { LanguageModelVendorAvatar } from "@cocalc/frontend/components/language-model-icon"; import PopconfirmKeyboard from "@cocalc/frontend/components/popconfirm-keyboard"; import StaticMarkdown from "@cocalc/frontend/editors/slate/static-markdown"; +import { useFrameContext } from "@cocalc/frontend/frame-editors/frame-tree/frame-context"; import ModelSwitch, { LanguageModel, modelToMention, modelToName, -} from "@cocalc/frontend/frame-editors/chatgpt/model-switch"; -import { useFrameContext } from "@cocalc/frontend/frame-editors/frame-tree/frame-context"; +} from "@cocalc/frontend/frame-editors/llm/model-switch"; import { ProjectsStore } from "@cocalc/frontend/projects/store"; import type { JupyterActions } from "../browser-actions"; diff --git a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx index a188396d8c..b20fa35bb7 100644 --- a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx +++ b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx @@ -9,10 +9,10 @@ import { Paragraph } from "@cocalc/frontend/components"; import { Icon } from "@cocalc/frontend/components/icon"; import { LanguageModelVendorAvatar } from "@cocalc/frontend/components/language-model-icon"; import StaticMarkdown from "@cocalc/frontend/editors/slate/static-markdown"; +import { NotebookFrameActions } from "@cocalc/frontend/frame-editors/jupyter-editor/cell-notebook/actions"; import ModelSwitch, { modelToName, -} from "@cocalc/frontend/frame-editors/chatgpt/model-switch"; -import { NotebookFrameActions } from "@cocalc/frontend/frame-editors/jupyter-editor/cell-notebook/actions"; +} from "@cocalc/frontend/frame-editors/llm/model-switch"; import { splitCells } from "@cocalc/frontend/jupyter/chatgpt/split-cells"; import track from "@cocalc/frontend/user-tracking"; import { webapp_client } from "@cocalc/frontend/webapp-client"; diff --git a/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx b/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx index cd4d8d32c2..04016fcb5e 100644 --- a/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx +++ b/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx @@ -34,11 +34,11 @@ import { LanguageModelVendorAvatar } from "@cocalc/frontend/components/language- import ProgressEstimate from "@cocalc/frontend/components/progress-estimate"; import SelectKernel from "@cocalc/frontend/components/run-button/select-kernel"; import StaticMarkdown from "@cocalc/frontend/editors/slate/static-markdown"; -import ModelSwitch, { - modelToName, -} from "@cocalc/frontend/frame-editors/chatgpt/model-switch"; import type { JupyterEditorActions } from "@cocalc/frontend/frame-editors/jupyter-editor/actions"; import { NotebookFrameActions } from "@cocalc/frontend/frame-editors/jupyter-editor/cell-notebook/actions"; +import ModelSwitch, { + modelToName, +} from "@cocalc/frontend/frame-editors/llm/model-switch"; import { splitCells } from "@cocalc/frontend/jupyter/chatgpt/split-cells"; import getKernelSpec from "@cocalc/frontend/jupyter/kernelspecs"; import { StartButton } from "@cocalc/frontend/project/start-button"; diff --git a/src/packages/frontend/projects/store.ts b/src/packages/frontend/projects/store.ts index f443a0b878..29c1adfc29 100644 --- a/src/packages/frontend/projects/store.ts +++ b/src/packages/frontend/projects/store.ts @@ -734,10 +734,22 @@ export class ProjectsStore extends Store { openAICache.clear(); } + public llmEnabledSummary(project_id: string = "global", tag?: string) { + const haveOpenAI = this.hasLanguageModelEnabled(project_id, tag, "openai"); + const haveGoogle = this.hasLanguageModelEnabled(project_id, tag, "google"); + const haveOllama = this.hasLanguageModelEnabled(project_id, tag, "ollama"); + + return { + openai: haveOpenAI, + google: haveGoogle, + ollama: haveOllama, + }; + } + hasLanguageModelEnabled( project_id: string = "global", tag?: string, - vendor: "openai" | "google" | "any" = "any", + vendor: "openai" | "google" | "ollama" | "any" = "any", ): boolean { // cache answer for a few seconds, in case this gets called a lot: @@ -769,17 +781,19 @@ export class ProjectsStore extends Store { private _hasLanguageModelEnabled( project_id: string | "global" = "global", courseLimited?: boolean, - vendor: "openai" | "google" | "any" = "any", + vendor: "openai" | "google" | "ollama" | "any" = "any", ): boolean { const customize = redux.getStore("customize"); const haveOpenAI = customize.get("openai_enabled"); const haveGoogle = customize.get("google_vertexai_enabled"); + const haveOllama = customize.get("ollama_enabled"); - if (!haveOpenAI && !haveGoogle) return false; // the vendor == "any" case + if (!haveOpenAI && !haveGoogle && !haveOllama) return false; // the vendor == "any" case if (vendor === "openai" && !haveOpenAI) return false; if (vendor === "google" && !haveGoogle) return false; + if (vendor === "ollama" && !haveOllama) return false; - // this customization accounts for disabling any language model vendor + // this customization parameter accounts for disabling **any** language model vendor const openai_disabled = redux .getStore("account") .getIn(["other_settings", "openai_disabled"]); diff --git a/src/packages/frontend/sagews/chatgpt.ts b/src/packages/frontend/sagews/chatgpt.ts index fe46bbd7ea..84313e3bc3 100644 --- a/src/packages/frontend/sagews/chatgpt.ts +++ b/src/packages/frontend/sagews/chatgpt.ts @@ -1,7 +1,8 @@ import { redux } from "@cocalc/frontend/app-framework"; -import { getHelp } from "@cocalc/frontend/frame-editors/chatgpt/help-me-fix"; +import { getHelp } from "@cocalc/frontend/frame-editors/llm/help-me-fix"; import { getValidLanguageModelName } from "@cocalc/util/db-schema/openai"; import { MARKERS } from "@cocalc/util/sagews"; +import { SETTINGS_LANGUAGE_MODEL_KEY } from "../account/useLanguageModelSetting"; export function isEnabled(project_id: string): boolean { return redux @@ -24,8 +25,15 @@ export function helpMeFix({ // use the currently set language model from the account store // https://github.com/sagemathinc/cocalc/pull/7278 const other_settings = redux.getStore("account").get("other_settings"); + + const projectsStore = redux.getStore("projects"); + const enabled = projectsStore.llmEnabledSummary(); + const ollama = redux.getStore("customize").get("ollama")?.toJS() ?? {}; + const model = getValidLanguageModelName( - other_settings?.get("language_model"), + other_settings?.get(SETTINGS_LANGUAGE_MODEL_KEY), + enabled, + Object.keys(ollama), ); getHelp({ project_id, diff --git a/src/packages/hub/servers/server-settings.ts b/src/packages/hub/servers/server-settings.ts index b1068bb212..ba3150098b 100644 --- a/src/packages/hub/servers/server-settings.ts +++ b/src/packages/hub/servers/server-settings.ts @@ -7,11 +7,13 @@ Synchronized table that tracks server settings. */ +import { isEmpty } from "lodash"; + import { once } from "@cocalc/util/async-utils"; import { EXTRAS as SERVER_SETTINGS_EXTRAS } from "@cocalc/util/db-schema/site-settings-extras"; +import { AllSiteSettings } from "@cocalc/util/db-schema/types"; import { startswith } from "@cocalc/util/misc"; import { site_settings_conf as SITE_SETTINGS_CONF } from "@cocalc/util/schema"; -import { isEmpty } from "lodash"; import { database } from "./database"; // Returns: @@ -22,16 +24,16 @@ import { database } from "./database"; // - table: the table, so you can watch for on change events... // These get automatically updated when the database changes. -interface ServerSettings { - all: object; +export interface ServerSettingsDynamic { + all: AllSiteSettings; pub: object; version: object; table: any; } -let serverSettings: ServerSettings | undefined = undefined; +let serverSettings: ServerSettingsDynamic | undefined = undefined; -export default async function getServerSettings(): Promise { +export default async function getServerSettings(): Promise { if (serverSettings != null) { return serverSettings; } diff --git a/src/packages/hub/webapp-configuration.ts b/src/packages/hub/webapp-configuration.ts index 9b01b8d21f..43a3fdb173 100644 --- a/src/packages/hub/webapp-configuration.ts +++ b/src/packages/hub/webapp-configuration.ts @@ -11,25 +11,29 @@ import { delay } from "awaiting"; import debug from "debug"; +import { isEmpty } from "lodash"; +import LRU from "lru-cache"; import type { PostgreSQL } from "@cocalc/database/postgres/types"; +import { get_passport_manager, PassportManager } from "@cocalc/server/hub/auth"; import { getSoftwareEnvironments } from "@cocalc/server/software-envs"; import { callback2 as cb2 } from "@cocalc/util/async-utils"; import { EXTRAS as SERVER_SETTINGS_EXTRAS } from "@cocalc/util/db-schema/site-settings-extras"; import { SoftwareEnvConfig } from "@cocalc/util/sanitize-software-envs"; import { site_settings_conf as SITE_SETTINGS_CONF } from "@cocalc/util/schema"; +import { OllamaPublic } from "@cocalc/util/types/llm"; import { parseDomain, ParseResultType } from "parse-domain"; -import { get_passport_manager, PassportManager } from "@cocalc/server/hub/auth"; -import getServerSettings from "./servers/server-settings"; +import getServerSettings, { + ServerSettingsDynamic, +} from "./servers/server-settings"; import { have_active_registration_tokens } from "./utils"; const L = debug("hub:webapp-config"); -import LRU from "lru-cache"; const CACHE = new LRU({ max: 1000, ttl: 60 * 1000 }); // 1 minutes export function clear_cache(): void { - CACHE.reset(); + CACHE.clear(); } type Theme = { [key: string]: string | boolean }; @@ -40,6 +44,7 @@ interface Config { registration: any; strategies: object; software: SoftwareEnvConfig | null; + ollama: { [key: string]: OllamaPublic }; } async function get_passport_manager_async(): Promise { @@ -53,7 +58,7 @@ async function get_passport_manager_async(): Promise { return pp_manager; } else { L( - `WARNING: Passport Manager not available yet -- trying again in ${ms}ms` + `WARNING: Passport Manager not available yet -- trying again in ${ms}ms`, ); await delay(ms); ms = Math.min(10000, 1.3 * ms); @@ -63,7 +68,7 @@ async function get_passport_manager_async(): Promise { export class WebappConfiguration { private readonly db: PostgreSQL; - private data?: any; + private data?: ServerSettingsDynamic; constructor({ db }) { this.db = db; @@ -168,14 +173,43 @@ export class WebappConfiguration { return strategies as object; } + // derives the public ollama model configuration from the private one + private get_ollama_public(): { [key: string]: OllamaPublic } { + if (this.data == null) { + throw new Error("server settings not yet initialized"); + } + const ollama = this.data.all.ollama_configuration; + if (isEmpty(ollama)) return {}; + + const public_ollama = {}; + for (const key in ollama) { + const conf = ollama[key]; + const cocalc = conf.cocalc ?? {}; + const model = conf.model ?? key; + public_ollama[key] = { + key, + model, + display: cocalc.display ?? `Ollama ${model}`, + icon: cocalc.icon, // fallback is the Ollama icon, frontend does that + }; + } + return public_ollama; + } + private async get_config({ country, host }): Promise { - const [configuration, registration, software] = await Promise.all([ + while (this.data == null) { + L.debug("waiting for server settings to be initialized"); + await delay(100); + } + + const [configuration, registration, software, ollama] = await Promise.all([ this.get_configuration({ host, country }), have_active_registration_tokens(this.db), getSoftwareEnvironments("webapp"), + this.get_ollama_public(), ]); const strategies = await this.get_strategies(); - return { configuration, registration, strategies, software }; + return { configuration, registration, strategies, software, ollama }; } // it returns a shallow copy, hence you can modify/add keys in the returned map! diff --git a/src/packages/server/llm/call-chatgpt.ts b/src/packages/server/llm/call-llm.ts similarity index 98% rename from src/packages/server/llm/call-chatgpt.ts rename to src/packages/server/llm/call-llm.ts index 4763e260b6..a807d26237 100644 --- a/src/packages/server/llm/call-chatgpt.ts +++ b/src/packages/server/llm/call-llm.ts @@ -7,7 +7,7 @@ import { ChatOutput } from "@cocalc/util/types/llm"; import { Stream } from "openai/streaming"; import { totalNumTokens } from "./chatgpt-numtokens"; -const log = getLogger("llm:call-chatgpt"); +const log = getLogger("llm:call-llm"); interface CallChatGPTOpts { openai: OpenAI; diff --git a/src/packages/server/llm/client.ts b/src/packages/server/llm/client.ts index 1f50dab0c0..79a0bff122 100644 --- a/src/packages/server/llm/client.ts +++ b/src/packages/server/llm/client.ts @@ -5,13 +5,15 @@ You do not have to worry too much about throwing an exception, because they're c */ import OpenAI from "openai"; +import jsonStable from "json-stable-stringify"; +import { Ollama } from "@langchain/community/llms/ollama"; +import * as _ from "lodash"; import getLogger from "@cocalc/backend/logger"; import { getServerSettings } from "@cocalc/database/settings/server-settings"; import { LanguageModel, model2vendor } from "@cocalc/util/db-schema/openai"; import { unreachable } from "@cocalc/util/misc"; import { VertexAIClient } from "./vertex-ai-client"; -import { Ollama } from "@langchain/community/llms/ollama"; const log = getLogger("llm:client"); @@ -68,29 +70,59 @@ export async function getClient( const ollamaCache: { [key: string]: Ollama } = {}; +/** + * The idea here is: the ollama config contains all available endpoints and their configuration. + * The "model" is the unique key in the ollama_configuration mapping, it was prefixed by "ollama-". + * For the actual Ollama client instantitation, we pick the model parameter from the config or just use the unique model name as a fallback. + * In particular, this means you can query the same Ollama model with differnet parameters, or even have several ollama servers running. + * All other config parameters are passed to the Ollama constructor (e.g. topK, temperature, etc.). + */ export async function getOllama(model: string) { - // model is the unique key in the ServerSettings.ollama_configuration mapping - if (ollamaCache[model]) { - return ollamaCache[model]; + if (model.startsWith("ollama-")) { + throw new Error( + `At this point, the model name should no longer have the "ollama-" prefix`, + ); } const settings = await getServerSettings(); const config = settings.ollama_configuration?.[model]; if (!config) { throw new Error( - `Ollama model ${model} not configured – you have to create an entry {${model}: {url: "https://...", ...}} in the "Ollama Configuration" entry of the server settings`, + `Ollama model ${model} not configured – you have to create an entry {${model}: {baseUrl: "https://...", ...}} in the "Ollama Configuration" entry of the server settings!`, ); } - const baseUrl = config.url; + // the key is a hash of the model name and the specific config – such that changes in the config will invalidate the cache + const key = `${model}:${jsonStable(config)}`; + + // model is the unique key in the ServerSettings.ollama_configuration mapping + if (ollamaCache[key]) { + log.debug(`Using cached Ollama client for model ${model}`); + return ollamaCache[key]; + } + + const baseUrl = config.baseUrl; if (!baseUrl) { - throw new Error(`The url of the Ollama model ${model} is not configured`); + throw new Error( + `The "baseUrl" field of the Ollama model ${model} is not configured`, + ); } const keepAlive = config.keepAlive ?? -1; - const client = new Ollama({ baseUrl, model, keepAlive }); - ollamaCache[model] = client; + // extract all other properties from the config, except the url, model, keepAlive field and the "cocalc" field + const other = _.omit(config, ["baseUrl", "model", "keepAlive", "cocalc"]); + const ollamaConfig = { + baseUrl, + model: config.model ?? model, + keepAlive, + ...other, + }; + + log.debug("Instantiating Ollama client with config", ollamaConfig); + + const client = new Ollama(ollamaConfig); + ollamaCache[key] = client; return client; } diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts index 8f2800d53e..dcd4bbbc83 100644 --- a/src/packages/server/llm/index.ts +++ b/src/packages/server/llm/index.ts @@ -30,7 +30,7 @@ import { } from "@cocalc/util/db-schema/openai"; import { ChatOptions, ChatOutput, History } from "@cocalc/util/types/llm"; import { checkForAbuse } from "./abuse"; -import { callChatGPTAPI } from "./call-chatgpt"; +import { callChatGPTAPI } from "./call-llm"; import { getClient } from "./client"; import { saveResponse } from "./save-response"; import { VertexAIClient } from "./vertex-ai-client"; diff --git a/src/packages/util/db-schema/openai.ts b/src/packages/util/db-schema/openai.ts index 372f47be07..bd2b64ab32 100644 --- a/src/packages/util/db-schema/openai.ts +++ b/src/packages/util/db-schema/openai.ts @@ -43,16 +43,30 @@ export function isLanguageModel(model?: string): model is LanguageModel { export function getValidLanguageModelName( model: string | undefined, - filter: { google: boolean; openai: boolean } = { google: true, openai: true }, -): LanguageModel { - const dftl = filter.openai === true ? DEFAULT_MODEL : "chat-bison-001"; + filter: { google: boolean; openai: boolean; ollama: boolean } = { + google: true, + openai: true, + ollama: false, + }, + ollama: string[] = [], // keys of ollama models +): LanguageModel | string { + const dftl = + filter.openai === true + ? DEFAULT_MODEL + : filter.ollama && ollama?.length > 0 + ? toOllamaModel(ollama[0]) + : "chat-bison-001"; + console.log("getValidLanguageModelName", model, filter, ollama, dftl); if (model == null) { return dftl; } - if (!LANGUAGE_MODELS.includes(model as LanguageModel)) { - return dftl; + if (LANGUAGE_MODELS.includes(model as LanguageModel)) { + return model; } - return model as LanguageModel; + if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) { + return model; + } + return dftl; } export interface OpenAIMessage { @@ -129,6 +143,18 @@ export function model2vendor(model: LanguageModel): Vendor { } } +export function toOllamaModel(model: string) { + return `ollama-${model}`; +} + +export function fromOllamaModel(model: string) { + return model.replace(/^ollama-/, ""); +} + +export function isOllamaLLM(model: string) { + return model.startsWith("ollama-"); +} + const MODELS_OPENAI = [ "gpt-3.5-turbo", "gpt-3.5-turbo-16k", @@ -166,13 +192,14 @@ export const LLM_USERNAMES = { "gemini-pro": "Gemini Pro", } as const; -export function isFreeModel(model: Model) { +export function isFreeModel(model: string) { + if (!LANGUAGE_MODELS.includes(model as LanguageModel)) return false; return ( - model == "gpt-3.5-turbo" || - model == "text-bison-001" || - model == "chat-bison-001" || - model == "embedding-gecko-001" || - model == "gemini-pro" + (model as Model) == "gpt-3.5-turbo" || + (model as Model) == "text-bison-001" || + (model as Model) == "chat-bison-001" || + (model as Model) == "embedding-gecko-001" || + (model as Model) == "gemini-pro" ); } diff --git a/src/packages/util/db-schema/site-settings-extras.ts b/src/packages/util/db-schema/site-settings-extras.ts index b338ae5507..4557aa0a64 100644 --- a/src/packages/util/db-schema/site-settings-extras.ts +++ b/src/packages/util/db-schema/site-settings-extras.ts @@ -9,31 +9,31 @@ // You can use markdown in the descriptions below and it is rendered properly! +import { isValidUUID } from "@cocalc/util/misc"; import { Config, + SiteSettings, + displayJson, + from_json, is_email_enabled, - only_for_smtp, - only_for_sendgrid, - only_for_password_reset_smtp, - to_bool, - only_booleans, - to_int, - only_nonneg_int, - toFloat, onlyNonnegFloat, onlyPosFloat, - only_pos_int, - only_commercial, + only_booleans, only_cocalc_com, - from_json, + only_commercial, + only_for_password_reset_smtp, + only_for_sendgrid, + only_for_smtp, + only_nonneg_int, + only_pos_int, parsableJson, - displayJson, + toFloat, + to_bool, + to_int, to_trimmed_str, - SiteSettings, } from "./site-defaults"; -import { isValidUUID } from "@cocalc/util/misc"; -import { is_valid_email_address, expire_time } from "@cocalc/util/misc"; +import { expire_time, is_valid_email_address } from "@cocalc/util/misc"; export const pii_retention_parse = (retention: string): number | false => { if (retention == "never" || retention == null) return false; @@ -84,6 +84,80 @@ const neural_search_enabled = (conf: SiteSettings) => const jupyter_api_enabled = (conf: SiteSettings) => to_bool(conf.jupyter_api_enabled); +function ollama_valid(value: string): boolean { + if (!parsableJson(value)) { + return false; + } + const obj = from_json(value); + if (typeof obj !== "object") { + return false; + } + for (const key in obj) { + const val = obj[key] as any; + if (typeof val !== "object") { + return false; + } + if (typeof val.baseUrl !== "string") { + return false; + } + if (val.model && typeof val.model !== "string") { + return false; + } + const c = val.cocalc; + if (c != null) { + if (typeof c !== "object") { + return false; + } + if (c.display && typeof c.display !== "string") { + return false; + } + if (c.enabled && typeof c.enabled !== "boolean") { + return false; + } + } + } + return true; +} + +function ollama_display(value: string): string { + if (!parsableJson(value)) { + return "Ollama JSON not parseable. Must be {[key : string] : {model: string, baseUrL: string, cocalc: {display: string, ...}, ...}"; + } + const obj = from_json(value); + if (typeof obj !== "object") { + return "Ollama JSON must be an object"; + } + const ret: string[] = []; + for (const key in obj) { + const val = obj[key] as any; + if (typeof val !== "object") { + return `Ollama config ${key} must be an object`; + } + if (typeof val.baseUrl !== "string") { + return `Ollama config ${key} baseUrl field must be a string`; + } + if (val.model && typeof val.model !== "string") { + return `Ollama config ${key} model field must be a string`; + } + const c = val.cocalc; + if (c != null) { + if (typeof c !== "object") { + return `Ollama config ${key} cocalc field must be an object`; + } + if (c.display && typeof c.display !== "string") { + return `Ollama config ${key} cocalc.display field must be a string`; + } + if (c.enabled && typeof c.enabled !== "boolean") { + return `Ollama config ${key} cocalc.enabled field must be a boolean`; + } + } + ret.push( + `Olama ${key} at ${val.baseUrl} named ${c?.display ?? val.model ?? key}`, + ); + } + return `[${ret.join(", ")}]`; +} + export type SiteSettingsExtrasKeys = | "pii_retention" | "stripe_heading" @@ -189,7 +263,8 @@ export const EXTRAS: SettingsExtras = { multiline: 5, show: ollama_enabled, to_val: from_json, - valid: parsableJson, + valid: ollama_valid, + to_display: ollama_display, }, qdrant_section: { name: "Qdrant Configuration", diff --git a/src/packages/util/types/llm.ts b/src/packages/util/types/llm.ts index c028ea6e37..36c8d35668 100644 --- a/src/packages/util/types/llm.ts +++ b/src/packages/util/types/llm.ts @@ -30,3 +30,10 @@ export interface ChatOptions { stream?: (output?: string) => void; maxTokens?: number; } + +export interface OllamaPublic { + key: string; // the key in the dict + model: string; + display: string; + icon: string; +} From 71140d31868315f0d3d7faf175a2884ec0d085a7 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Fri, 23 Feb 2024 16:47:55 +0100 Subject: [PATCH 03/32] util/llm: refactoring to keep my sanity (and fixing circular imports) --- .../frontend/account/avatar/avatar.tsx | 2 +- src/packages/frontend/account/chatbot.ts | 2 +- .../frontend/account/other-settings.tsx | 2 +- .../account/useLanguageModelSetting.tsx | 2 +- src/packages/frontend/chat/actions.ts | 2 +- src/packages/frontend/client/openai.ts | 12 +- .../codemirror/extensions/ai-formula.tsx | 2 +- .../components/language-model-icon.tsx | 3 +- .../markdown-input/mentionable-users.tsx | 2 +- .../frontend/frame-editors/llm/create-chat.ts | 2 +- .../frame-editors/llm/model-switch.tsx | 15 +- .../frame-editors/llm/title-bar-button.tsx | 2 +- .../frontend/jupyter/chatgpt/explain.tsx | 2 +- .../jupyter/insert-cell/ai-cell-generator.tsx | 6 +- src/packages/frontend/misc/openai.ts | 10 +- .../page/home-page/ai-generate-jupyter.tsx | 2 +- src/packages/frontend/sagews/chatgpt.ts | 2 +- .../components/openai/vendor-status-check.tsx | 2 +- src/packages/server/llm/abuse.ts | 7 +- src/packages/server/llm/call-llm.ts | 2 +- src/packages/server/llm/client.ts | 6 +- src/packages/server/llm/index.ts | 7 +- src/packages/server/llm/vertex-ai-client.ts | 2 +- .../server/purchases/get-service-cost.ts | 2 +- .../server/purchases/is-purchase-allowed.ts | 2 +- src/packages/util/db-schema/llm.test.ts | 13 + src/packages/util/db-schema/llm.ts | 351 ++++++++++++++++++ src/packages/util/db-schema/openai.ts | 337 +---------------- .../util/db-schema/purchase-quotas.ts | 2 +- src/packages/util/types/llm.ts | 2 +- 30 files changed, 421 insertions(+), 384 deletions(-) create mode 100644 src/packages/util/db-schema/llm.test.ts create mode 100644 src/packages/util/db-schema/llm.ts diff --git a/src/packages/frontend/account/avatar/avatar.tsx b/src/packages/frontend/account/avatar/avatar.tsx index 089634f57f..dde3aa35e6 100644 --- a/src/packages/frontend/account/avatar/avatar.tsx +++ b/src/packages/frontend/account/avatar/avatar.tsx @@ -18,7 +18,7 @@ import { LanguageModelVendorAvatar } from "@cocalc/frontend/components/language- import { ProjectTitle } from "@cocalc/frontend/projects/project-title"; import { DEFAULT_COLOR } from "@cocalc/frontend/users/store"; import { webapp_client } from "@cocalc/frontend/webapp-client"; -import { service2model } from "@cocalc/util/db-schema/openai"; +import { service2model } from "@cocalc/util/db-schema/llm"; import { ensure_bound, startswith, trunc_middle } from "@cocalc/util/misc"; import { avatar_fontcolor } from "./font-color"; diff --git a/src/packages/frontend/account/chatbot.ts b/src/packages/frontend/account/chatbot.ts index d3f10280dd..4c237ec7fd 100644 --- a/src/packages/frontend/account/chatbot.ts +++ b/src/packages/frontend/account/chatbot.ts @@ -13,7 +13,7 @@ import { MODELS, Vendor, model2vendor, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; // we either check if the prefix is one of the known ones (used in some circumstances) // or if the account id is exactly one of the language models (more precise) diff --git a/src/packages/frontend/account/other-settings.tsx b/src/packages/frontend/account/other-settings.tsx index e01d3ddbc9..27a7734179 100644 --- a/src/packages/frontend/account/other-settings.tsx +++ b/src/packages/frontend/account/other-settings.tsx @@ -29,7 +29,7 @@ import { getValidLanguageModelName, isFreeModel, model2vendor, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { VBAR_EXPLANATION, VBAR_KEY, diff --git a/src/packages/frontend/account/useLanguageModelSetting.tsx b/src/packages/frontend/account/useLanguageModelSetting.tsx index d6719d3627..e3c431c9eb 100644 --- a/src/packages/frontend/account/useLanguageModelSetting.tsx +++ b/src/packages/frontend/account/useLanguageModelSetting.tsx @@ -5,7 +5,7 @@ import { fromOllamaModel, getValidLanguageModelName, isOllamaLLM, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; export const SETTINGS_LANGUAGE_MODEL_KEY = "language_model"; diff --git a/src/packages/frontend/chat/actions.ts b/src/packages/frontend/chat/actions.ts index fb1a8f786a..bdbe2413e7 100644 --- a/src/packages/frontend/chat/actions.ts +++ b/src/packages/frontend/chat/actions.ts @@ -22,7 +22,7 @@ import { model2vendor, type LanguageModel, LANGUAGE_MODEL_PREFIXES, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { cmp, isValidUUID, parse_hashtags, uuid } from "@cocalc/util/misc"; import { getSortedDates } from "./chat-log"; import { message_to_markdown } from "./message"; diff --git a/src/packages/frontend/client/openai.ts b/src/packages/frontend/client/openai.ts index aff1bc4dd4..70b70bd935 100644 --- a/src/packages/frontend/client/openai.ts +++ b/src/packages/frontend/client/openai.ts @@ -8,20 +8,17 @@ import { EventEmitter } from "events"; import { redux } from "@cocalc/frontend/app-framework"; import type { History } from "@cocalc/frontend/misc/openai"; // do not import until needed -- it is HUGE! -import type { - EmbeddingData, - LanguageModel, -} from "@cocalc/util/db-schema/openai"; +import type { EmbeddingData } from "@cocalc/util/db-schema/openai"; import { MAX_EMBEDDINGS_TOKENS, MAX_REMOVE_LIMIT, MAX_SAVE_LIMIT, MAX_SEARCH_LIMIT, - isFreeModel, - model2service, } from "@cocalc/util/db-schema/openai"; import * as message from "@cocalc/util/message"; import type { WebappClient } from "./client"; +import { LanguageModel, LanguageService } from "@cocalc/util/db-schema/llm"; +import { isFreeModel, model2service } from "@cocalc/util/db-schema/llm"; const DEFAULT_SYSTEM_PROMPT = "ASSUME THAT I HAVE FULL ACCESS TO COCALC AND I AM USING COCALC RIGHT NOW. ENCLOSE ALL MATH IN $. INCLUDE THE LANGUAGE DIRECTLY AFTER THE TRIPLE BACKTICKS IN ALL MARKDOWN CODE BLOCKS. BE BRIEF."; @@ -98,7 +95,8 @@ export class LLMClient { } if (!isFreeModel(model)) { - const service = model2service(model); + // Ollama and others are treated as "free" + const service = model2service(model) as LanguageService; // when client gets non-free openai model request, check if allowed. If not, show quota modal. const { allowed, reason } = await this.client.purchases_client.isPurchaseAllowed(service); diff --git a/src/packages/frontend/codemirror/extensions/ai-formula.tsx b/src/packages/frontend/codemirror/extensions/ai-formula.tsx index 71fc33ce5e..9f69441d06 100644 --- a/src/packages/frontend/codemirror/extensions/ai-formula.tsx +++ b/src/packages/frontend/codemirror/extensions/ai-formula.tsx @@ -21,7 +21,7 @@ import ModelSwitch, { import { show_react_modal } from "@cocalc/frontend/misc"; import track from "@cocalc/frontend/user-tracking"; import { webapp_client } from "@cocalc/frontend/webapp-client"; -import { isFreeModel, isLanguageModel } from "@cocalc/util/db-schema/openai"; +import { isFreeModel, isLanguageModel } from "@cocalc/util/db-schema/llm"; import { unreachable } from "@cocalc/util/misc"; type Mode = "tex" | "md"; diff --git a/src/packages/frontend/components/language-model-icon.tsx b/src/packages/frontend/components/language-model-icon.tsx index bef2211d43..80954715c9 100644 --- a/src/packages/frontend/components/language-model-icon.tsx +++ b/src/packages/frontend/components/language-model-icon.tsx @@ -1,6 +1,5 @@ -import { isLanguageModel, model2vendor } from "@cocalc/util/db-schema/openai"; - import { CSS } from "@cocalc/frontend/app-framework"; +import { isLanguageModel, model2vendor } from "@cocalc/util/db-schema/llm"; import { unreachable } from "@cocalc/util/misc"; import AIAvatar from "./ai-avatar"; import GoogleGeminiLogo from "./google-gemini-avatar"; diff --git a/src/packages/frontend/editors/markdown-input/mentionable-users.tsx b/src/packages/frontend/editors/markdown-input/mentionable-users.tsx index 30f5293619..badbffa148 100644 --- a/src/packages/frontend/editors/markdown-input/mentionable-users.tsx +++ b/src/packages/frontend/editors/markdown-input/mentionable-users.tsx @@ -12,7 +12,7 @@ import { LLM_USERNAMES, USER_SELECTABLE_LANGUAGE_MODELS, model2service, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { cmp, timestamp_cmp, trunc_middle } from "@cocalc/util/misc"; import { Item } from "./complete"; diff --git a/src/packages/frontend/frame-editors/llm/create-chat.ts b/src/packages/frontend/frame-editors/llm/create-chat.ts index 0c3b66d5bd..8bc5847244 100644 --- a/src/packages/frontend/frame-editors/llm/create-chat.ts +++ b/src/packages/frontend/frame-editors/llm/create-chat.ts @@ -9,7 +9,7 @@ export interface Options { command: string; allowEmpty?: boolean; tag?: string; - model: LanguageModel; + model: LanguageModel | string; } export default async function createChat({ diff --git a/src/packages/frontend/frame-editors/llm/model-switch.tsx b/src/packages/frontend/frame-editors/llm/model-switch.tsx index a99d6f6621..5743ad309c 100644 --- a/src/packages/frontend/frame-editors/llm/model-switch.tsx +++ b/src/packages/frontend/frame-editors/llm/model-switch.tsx @@ -6,10 +6,12 @@ import { LLM_USERNAMES, LanguageModel, USER_SELECTABLE_LANGUAGE_MODELS, + fromOllamaModel, isFreeModel, + isOllamaLLM, model2service, toOllamaModel, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; export { DEFAULT_MODEL }; export type { LanguageModel }; @@ -139,11 +141,18 @@ export default function ModelSwitch({ ); } -export function modelToName(model: LanguageModel): string { +export function modelToName(model: LanguageModel | string): string { + if (isOllamaLLM(model)) { + const ollama = redux.getStore("customize").get("ollama")?.toJS() ?? {}; + const om = ollama[fromOllamaModel(model)]; + if (om) { + return om.display ?? `Ollama ${model}`; + } + } return LLM_USERNAMES[model] ?? model; } -export function modelToMention(model: LanguageModel): string { +export function modelToMention(model: LanguageModel | string): string { return `@${modelToName(model)}`; diff --git a/src/packages/frontend/frame-editors/llm/title-bar-button.tsx b/src/packages/frontend/frame-editors/llm/title-bar-button.tsx index 0c46b7b2b3..638a81d358 100644 --- a/src/packages/frontend/frame-editors/llm/title-bar-button.tsx +++ b/src/packages/frontend/frame-editors/llm/title-bar-button.tsx @@ -434,7 +434,7 @@ async function updateInput( actions: Actions, id, scope, - model: LanguageModel, + model: LanguageModel | string, ): Promise<{ input: string; inputOrig: string }> { if (scope == "none") { return { input: "", inputOrig: "" }; diff --git a/src/packages/frontend/jupyter/chatgpt/explain.tsx b/src/packages/frontend/jupyter/chatgpt/explain.tsx index 40a388bc3d..f53f8a6563 100644 --- a/src/packages/frontend/jupyter/chatgpt/explain.tsx +++ b/src/packages/frontend/jupyter/chatgpt/explain.tsx @@ -140,7 +140,7 @@ async function getExplanation({ actions: JupyterActions; project_id: string; path: string; - model: LanguageModel; + model: LanguageModel | string; }) { const message = createMessage({ id, actions, model, open: false }); if (!message) { diff --git a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx index b20fa35bb7..15ac5ef97b 100644 --- a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx +++ b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx @@ -20,7 +20,7 @@ import { LanguageModel, getVendorStatusCheckMD, model2vendor, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { COLORS } from "@cocalc/util/theme"; import { JupyterActions } from "../browser-actions"; import { insertCell } from "./util"; @@ -168,7 +168,7 @@ interface QueryLanguageModelProps { actions: JupyterActions; frameActions: React.MutableRefObject; id: string; - model: LanguageModel; + model: LanguageModel | string; path: string; position: "above" | "below"; project_id: string; @@ -316,7 +316,7 @@ interface GetInputProps { actions: JupyterActions; frameActions: React.MutableRefObject; id: string; - model: LanguageModel; + model: LanguageModel | string; position: "above" | "below"; prompt: string; } diff --git a/src/packages/frontend/misc/openai.ts b/src/packages/frontend/misc/openai.ts index b51688602e..07dee27848 100644 --- a/src/packages/frontend/misc/openai.ts +++ b/src/packages/frontend/misc/openai.ts @@ -1,8 +1,8 @@ // NOTE! This gpt-3-tokenizer is LARGE, e.g., 1.6MB, so be // sure to async load it by clients of this code. import GPT3Tokenizer from "gpt3-tokenizer"; -import type { Model } from "@cocalc/util/db-schema/openai"; -import { getMaxTokens } from "@cocalc/util/db-schema/openai"; +import type { Model } from "@cocalc/util/db-schema/llm"; +import { getMaxTokens } from "@cocalc/util/db-schema/llm"; export { getMaxTokens }; @@ -25,7 +25,7 @@ const tokenizer = new GPT3Tokenizer({ type: "gpt3" }); export function numTokensUpperBound( content: string, - maxTokens: number + maxTokens: number, ): number { return ( tokenizer.encode(content.slice(0, maxTokens * APPROX_CHARACTERS_PER_TOKEN)) @@ -64,7 +64,7 @@ export function truncateMessage(content: string, maxTokens: number): string { export function truncateHistory( history: History, maxTokens: number, - model: Model + model: Model, ): History { if (maxTokens <= 0) { return []; @@ -101,7 +101,7 @@ export function truncateHistory( const before = tokens[largestIndex].length; const toRemove = Math.max( 1, - Math.min(maxTokens - total, Math.ceil(tokens[largestIndex].length / 5)) + Math.min(maxTokens - total, Math.ceil(tokens[largestIndex].length / 5)), ); tokens[largestIndex] = tokens[largestIndex].slice(0, -toRemove); const after = tokens[largestIndex].length; diff --git a/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx b/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx index 04016fcb5e..af1a2bd413 100644 --- a/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx +++ b/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx @@ -50,7 +50,7 @@ import { once } from "@cocalc/util/async-utils"; import { getVendorStatusCheckMD, model2vendor, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { field_cmp, to_iso_path } from "@cocalc/util/misc"; import { COLORS } from "@cocalc/util/theme"; import { ensure_project_running } from "../../project-start-warning"; diff --git a/src/packages/frontend/sagews/chatgpt.ts b/src/packages/frontend/sagews/chatgpt.ts index 84313e3bc3..33b3a72e99 100644 --- a/src/packages/frontend/sagews/chatgpt.ts +++ b/src/packages/frontend/sagews/chatgpt.ts @@ -1,6 +1,6 @@ import { redux } from "@cocalc/frontend/app-framework"; import { getHelp } from "@cocalc/frontend/frame-editors/llm/help-me-fix"; -import { getValidLanguageModelName } from "@cocalc/util/db-schema/openai"; +import { getValidLanguageModelName } from "@cocalc/util/db-schema/llm"; import { MARKERS } from "@cocalc/util/sagews"; import { SETTINGS_LANGUAGE_MODEL_KEY } from "../account/useLanguageModelSetting"; diff --git a/src/packages/next/components/openai/vendor-status-check.tsx b/src/packages/next/components/openai/vendor-status-check.tsx index 67a3ab0139..7b0192802c 100644 --- a/src/packages/next/components/openai/vendor-status-check.tsx +++ b/src/packages/next/components/openai/vendor-status-check.tsx @@ -1,4 +1,4 @@ -import { Vendor } from "@cocalc/util/db-schema/openai"; +import { Vendor } from "@cocalc/util/db-schema/llm"; import { unreachable } from "@cocalc/util/misc"; import A from "components/misc/A"; diff --git a/src/packages/server/llm/abuse.ts b/src/packages/server/llm/abuse.ts index 6b32d9583a..ae89cac4da 100644 --- a/src/packages/server/llm/abuse.ts +++ b/src/packages/server/llm/abuse.ts @@ -27,9 +27,10 @@ import { assertPurchaseAllowed } from "@cocalc/server/purchases/is-purchase-allo import { isFreeModel, LanguageModel, + LanguageService, model2service, MODELS, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { isValidUUID } from "@cocalc/util/misc"; const QUOTAS = { @@ -73,7 +74,7 @@ export async function checkForAbuse({ // This is a for-pay product, so let's make sure user can purchase it. await assertPurchaseAllowed({ account_id, - service: model2service(model), + service: model2service(model) as LanguageService, }); // We always allow usage of for pay models, since the user is paying for // them. Only free models need to be throttled. @@ -113,7 +114,7 @@ export async function checkForAbuse({ // This is a for-pay product, so let's make sure user can purchase it. await assertPurchaseAllowed({ account_id, - service: model2service(model), + service: model2service(model) as LanguageService, }); } } diff --git a/src/packages/server/llm/call-llm.ts b/src/packages/server/llm/call-llm.ts index a807d26237..f59ebdfe26 100644 --- a/src/packages/server/llm/call-llm.ts +++ b/src/packages/server/llm/call-llm.ts @@ -2,7 +2,7 @@ import { delay } from "awaiting"; import type OpenAI from "openai"; import getLogger from "@cocalc/backend/logger"; -import { ModelOpenAI, OpenAIMessages } from "@cocalc/util/db-schema/openai"; +import { ModelOpenAI, OpenAIMessages } from "@cocalc/util/db-schema/llm"; import { ChatOutput } from "@cocalc/util/types/llm"; import { Stream } from "openai/streaming"; import { totalNumTokens } from "./chatgpt-numtokens"; diff --git a/src/packages/server/llm/client.ts b/src/packages/server/llm/client.ts index 79a0bff122..4f0fa454ea 100644 --- a/src/packages/server/llm/client.ts +++ b/src/packages/server/llm/client.ts @@ -4,14 +4,14 @@ Get the client for the given LanguageModel. You do not have to worry too much about throwing an exception, because they're caught in ./index::evaluate */ -import OpenAI from "openai"; -import jsonStable from "json-stable-stringify"; import { Ollama } from "@langchain/community/llms/ollama"; +import jsonStable from "json-stable-stringify"; import * as _ from "lodash"; +import OpenAI from "openai"; import getLogger from "@cocalc/backend/logger"; import { getServerSettings } from "@cocalc/database/settings/server-settings"; -import { LanguageModel, model2vendor } from "@cocalc/util/db-schema/openai"; +import { LanguageModel, model2vendor } from "@cocalc/util/db-schema/llm"; import { unreachable } from "@cocalc/util/misc"; import { VertexAIClient } from "./vertex-ai-client"; diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts index dcd4bbbc83..53bdb8aecf 100644 --- a/src/packages/server/llm/index.ts +++ b/src/packages/server/llm/index.ts @@ -21,13 +21,14 @@ import { DEFAULT_MODEL, LLM_USERNAMES, LanguageModel, + LanguageService, OpenAIMessages, getLLMCost, isFreeModel, isValidModel, model2service, model2vendor, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { ChatOptions, ChatOutput, History } from "@cocalc/util/types/llm"; import { checkForAbuse } from "./abuse"; import { callChatGPTAPI } from "./call-llm"; @@ -136,9 +137,9 @@ async function evaluateImpl({ account_id, project_id, cost, - service: model2service(model), + service: model2service(model) as LanguageService, description: { - type: model2service(model), + type: model2service(model) as LanguageService, prompt_tokens, completion_tokens, }, diff --git a/src/packages/server/llm/vertex-ai-client.ts b/src/packages/server/llm/vertex-ai-client.ts index c80730af50..c7a8cf360c 100644 --- a/src/packages/server/llm/vertex-ai-client.ts +++ b/src/packages/server/llm/vertex-ai-client.ts @@ -5,7 +5,7 @@ */ import getLogger from "@cocalc/backend/logger"; -import { LanguageModel } from "@cocalc/util/db-schema/openai"; +import { LanguageModel } from "@cocalc/util/db-schema/llm"; import { ChatOutput, History } from "@cocalc/util/types/llm"; import { DiscussServiceClient, diff --git a/src/packages/server/purchases/get-service-cost.ts b/src/packages/server/purchases/get-service-cost.ts index f87432e134..5efb7daad3 100644 --- a/src/packages/server/purchases/get-service-cost.ts +++ b/src/packages/server/purchases/get-service-cost.ts @@ -9,7 +9,7 @@ import { getLLMCost, isLanguageModelService, service2model, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import type { Service } from "@cocalc/util/db-schema/purchases"; import { unreachable } from "@cocalc/util/misc"; diff --git a/src/packages/server/purchases/is-purchase-allowed.ts b/src/packages/server/purchases/is-purchase-allowed.ts index a469637ea0..26f576f74e 100644 --- a/src/packages/server/purchases/is-purchase-allowed.ts +++ b/src/packages/server/purchases/is-purchase-allowed.ts @@ -5,7 +5,7 @@ import { getMaxCost, isLanguageModelService, service2model, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; import { QUOTA_SPEC, Service } from "@cocalc/util/db-schema/purchase-quotas"; import { MAX_COST } from "@cocalc/util/db-schema/purchases"; import { currency, round2up, round2down } from "@cocalc/util/misc"; diff --git a/src/packages/util/db-schema/llm.test.ts b/src/packages/util/db-schema/llm.test.ts new file mode 100644 index 0000000000..f77084ceee --- /dev/null +++ b/src/packages/util/db-schema/llm.test.ts @@ -0,0 +1,13 @@ +// this tests the wrongly named openai.ts file + +import { isFreeModel } from "./llm"; + +describe("openai/llm", () => { + test("isFreeModel", () => { + expect(isFreeModel("gpt-3")).toBe(true); + expect(isFreeModel("gpt-4")).toBe(false); + // WARNING: if the following breaks, and ollama becomes non-free, then a couple of assumptions are broken as well. + // search for model2service(...) as LanguageService in the codebase! + expect(isFreeModel("ollama-1")).toBe(true); + }); +}); diff --git a/src/packages/util/db-schema/llm.ts b/src/packages/util/db-schema/llm.ts new file mode 100644 index 0000000000..36a20a8e78 --- /dev/null +++ b/src/packages/util/db-schema/llm.ts @@ -0,0 +1,351 @@ +// this contains bits and pieces from the wrongly named openai.ts file + +import type { LLMService, Service } from "@cocalc/util/db-schema/purchases"; +import { unreachable } from "../misc"; + +export const LANGUAGE_MODELS = [ + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-4", + "gpt-4-32k", + // google's are taken from here – we use the generative AI client lib + // https://developers.generativeai.google/models/language + "text-bison-001", + "chat-bison-001", + "embedding-gecko-001", + "text-embedding-ada-002", + "gemini-pro", +] as const; + +// This hardcodes which models can be selected by users. +// Make sure to update this when adding new models. +// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx +export const USER_SELECTABLE_LANGUAGE_MODELS: Readonly = [ + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-4", + // "chat-bison-001", // PaLM2 is not good, replies with no response too often + "gemini-pro", +] as const; + +export type LanguageModel = (typeof LANGUAGE_MODELS)[number]; + +export function isLanguageModel(model?: string): model is LanguageModel { + return LANGUAGE_MODELS.includes(model as LanguageModel); +} + +export function getValidLanguageModelName( + model: string | undefined, + filter: { google: boolean; openai: boolean; ollama: boolean } = { + google: true, + openai: true, + ollama: false, + }, + ollama: string[] = [], // keys of ollama models +): LanguageModel | string { + const dftl = + filter.openai === true + ? DEFAULT_MODEL + : filter.ollama && ollama?.length > 0 + ? toOllamaModel(ollama[0]) + : "chat-bison-001"; + if (model == null) { + return dftl; + } + if (LANGUAGE_MODELS.includes(model as LanguageModel)) { + return model; + } + if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) { + return model; + } + return dftl; +} + +export interface OpenAIMessage { + role: "system" | "user" | "assistant"; + content: string; +} +export type OpenAIMessages = OpenAIMessage[]; + +export type LanguageService = + | "openai-gpt-3.5-turbo" + | "openai-gpt-3.5-turbo-16k" + | "openai-gpt-4" + | "openai-gpt-4-32k" + | "openai-text-embedding-ada-002" + | "google-text-bison-001" + | "google-chat-bison-001" + | "google-embedding-gecko-001" + | "google-gemini-pro"; + +const LANGUAGE_MODEL_VENDORS = ["openai", "google", "ollama"] as const; +export type Vendor = (typeof LANGUAGE_MODEL_VENDORS)[number]; + +// used e.g. for checking "account-id={string}" and other things like that +export const LANGUAGE_MODEL_PREFIXES = [ + "chatgpt", + ...LANGUAGE_MODEL_VENDORS.map((v) => `${v}-`), +] as const; + +export function model2service( + model: LanguageModel | string, +): LanguageService | string { + if (model === "text-embedding-ada-002") { + return `openai-${model}`; + } + if (isLanguageModel(model)) { + if ( + model === "text-bison-001" || + model === "chat-bison-001" || + model === "embedding-gecko-001" || + model === "gemini-pro" + ) { + return `google-${model}`; + } else { + return `openai-${model}`; + } + } + if (isOllamaLLM(model)) { + return toOllamaModel(model); + } + throw new Error(`unknown model: ${model}`); +} + +// inverse of model2service, but robust for chat avatars, which might not have a prefix +// TODO: fix the mess +export function service2model( + service: LanguageService | "chatgpt", +): LanguageModel { + if (service === "chatgpt") { + return "gpt-3.5-turbo"; + } + // split off the first part of service, e.g., "openai-" or "google-" + const s = service.split("-")[0]; + const hasPrefix = s === "openai" || s === "google"; + const m = hasPrefix ? service.split("-").slice(1).join("-") : service; + if (!LANGUAGE_MODELS.includes(m as LanguageModel)) { + // We don't throw an error, since the frontend would crash + // throw new Error(`unknown service: ${service}`); + console.warn(`service2model: unknown service: ${service}`); + return "gpt-3.5-turbo"; + } + return m as LanguageModel; +} + +// Note: this must be an OpenAI model – otherwise change the getValidLanguageModelName function +export const DEFAULT_MODEL: LanguageModel = "gpt-3.5-turbo"; + +export function model2vendor(model: LanguageModel | string): Vendor { + if (model.startsWith("gpt-")) { + return "openai"; + } else if (model.startsWith("ollama-")) { + return "ollama"; + } else { + return "google"; + } +} + +export function toOllamaModel(model: string) { + return `ollama-${model}`; +} + +export function fromOllamaModel(model: string) { + return model.replace(/^ollama-/, ""); +} + +export function isOllamaLLM(model: string) { + return model.startsWith("ollama-"); +} + +const MODELS_OPENAI = [ + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-4", + "gpt-4-32k", +] as const; + +export const MODELS = [ + ...MODELS_OPENAI, + "text-embedding-ada-002", + "text-bison-001", + "chat-bison-001", + "embedding-gecko-001", + "gemini-pro", +] as const; + +export type Model = (typeof MODELS)[number]; + +export type ModelOpenAI = (typeof MODELS_OPENAI)[number]; + +// Map from psuedo account_id to what should be displayed to user. +// This is used in various places in the frontend. +// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing +export const LLM_USERNAMES = { + chatgpt: "GPT-3.5", + chatgpt3: "GPT-3.5", + chatgpt4: "GPT-4", + "gpt-4": "GPT-4", + "gpt-4-32k": "GPT-4-32k", + "gpt-3.5-turbo": "GPT-3.5", + "gpt-3.5-turbo-16k": "GPT-3.5-16k", + "text-bison-001": "PaLM 2", + "chat-bison-001": "PaLM 2", + "embedding-gecko-001": "PaLM 2", + "gemini-pro": "Gemini Pro", +} as const; + +export function isFreeModel(model: string) { + if (isOllamaLLM(model)) return true; + if (LANGUAGE_MODELS.includes(model as LanguageModel)) { + // of these models, the following are free + return ( + (model as Model) == "gpt-3.5-turbo" || + (model as Model) == "text-bison-001" || + (model as Model) == "chat-bison-001" || + (model as Model) == "embedding-gecko-001" || + (model as Model) == "gemini-pro" + ); + } + // all others are free + return true; +} + +// this is used in purchases/get-service-cost +// we only need to check for the vendor prefixes, no special cases! +export function isLanguageModelService( + service: Service, +): service is LLMService { + for (const v of LANGUAGE_MODEL_VENDORS) { + if (service.startsWith(`${v}-`)) { + return true; + } + } + return false; +} + +export function getVendorStatusCheckMD(vendor: Vendor): string { + switch (vendor) { + case "openai": + return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`; + case "google": + return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`; + case "ollama": + return `No status information for Ollama available – you have to check with the particular backend for the model.`; + default: + unreachable(vendor); + } + return ""; +} + +export function llmSupportsStreaming(model: LanguageModel): boolean { + return model2vendor(model) === "openai" || model === "gemini-pro"; +} + +interface Cost { + prompt_tokens: number; + completion_tokens: number; + max_tokens: number; +} + +// This is the official published cost that openai charges. +// It changes over time, so this will sometimes need to be updated. +// Our cost is a configurable multiple of this. +// https://openai.com/pricing#language-models +// There appears to be no api that provides the prices, unfortunately. +const LLM_COST: { [name in LanguageModel]: Cost } = { + "gpt-4": { + prompt_tokens: 0.03 / 1000, + completion_tokens: 0.06 / 1000, + max_tokens: 8192, + }, + "gpt-4-32k": { + prompt_tokens: 0.06 / 1000, + completion_tokens: 0.12 / 1000, + max_tokens: 32768, + }, + "gpt-3.5-turbo": { + prompt_tokens: 0.0015 / 1000, + completion_tokens: 0.002 / 1000, + max_tokens: 4096, + }, + "gpt-3.5-turbo-16k": { + prompt_tokens: 0.003 / 1000, + completion_tokens: 0.004 / 1000, + max_tokens: 16384, + }, + "text-embedding-ada-002": { + prompt_tokens: 0.0001 / 1000, + completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings + max_tokens: 8191, + }, + // https://developers.generativeai.google/models/language + "text-bison-001": { + // we assume 5 characters is 1 token on average + prompt_tokens: (5 * 0.0005) / 1000, + completion_tokens: (5 * 0.0005) / 1000, + max_tokens: 8196, + }, + "chat-bison-001": { + // we assume 5 characters is 1 token on average + prompt_tokens: (5 * 0.0005) / 1000, + completion_tokens: (5 * 0.0005) / 1000, + max_tokens: 8196, + }, + "embedding-gecko-001": { + prompt_tokens: (5 * 0.0001) / 1000, + completion_tokens: 0, + max_tokens: 8196, // ??? + }, + "gemini-pro": { + // https://ai.google.dev/models/gemini + prompt_tokens: (5 * 0.0001) / 1000, + completion_tokens: 0, + max_tokens: 30720, + }, +} as const; + +export function isValidModel(model?: string): boolean { + if (model == null) return false; + if (model.startsWith("ollama-")) return true; + return LLM_COST[model ?? ""] != null; +} + +export function getMaxTokens(model?: Model | string): number { + return LLM_COST[model ?? ""]?.max_tokens ?? 4096; +} + +export interface LLMCost { + prompt_tokens: number; + completion_tokens: number; +} + +export function getLLMCost( + model: Model, + markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.3 +): LLMCost { + const x = LLM_COST[model]; + if (x == null) { + throw Error(`unknown model "${model}"`); + } + const { prompt_tokens, completion_tokens } = x; + if (markup_percentage < 0) { + throw Error("markup percentage can't be negative"); + } + const f = 1 + markup_percentage / 100; + return { + prompt_tokens: prompt_tokens * f, + completion_tokens: completion_tokens * f, + }; +} + +// The maximum cost for one single call using the given model. +// We can't know the cost until after it happens, so this bound is useful for +// ensuring user can afford to make a call. +export function getMaxCost(model: Model, markup_percentage: number): number { + const { prompt_tokens, completion_tokens } = getLLMCost( + model, + markup_percentage, + ); + const { max_tokens } = LLM_COST[model]; + return Math.max(prompt_tokens, completion_tokens) * max_tokens; +} diff --git a/src/packages/util/db-schema/openai.ts b/src/packages/util/db-schema/openai.ts index bd2b64ab32..8a1b8e5b3d 100644 --- a/src/packages/util/db-schema/openai.ts +++ b/src/packages/util/db-schema/openai.ts @@ -2,347 +2,12 @@ // Mentally, just ignore "openai" and instead focus on "gpt-*" or "codey" or whatever they are called. // TODO: refactor this, the names of the tables, etc. to be more generic. -import type { LLMService, Service } from "@cocalc/util/db-schema/purchases"; - import { History } from "@cocalc/util/types/llm"; -import { unreachable } from "../misc"; import { CREATED_BY, ID } from "./crm"; import { SCHEMA as schema } from "./index"; +import { LanguageModel } from "./llm"; import { Table } from "./types"; -export const LANGUAGE_MODELS = [ - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-4", - "gpt-4-32k", - // google's are taken from here – we use the generative AI client lib - // https://developers.generativeai.google/models/language - "text-bison-001", - "chat-bison-001", - "embedding-gecko-001", - "text-embedding-ada-002", - "gemini-pro", -] as const; - -// This hardcodes which models can be selected by users. -// Make sure to update this when adding new models. -// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx -export const USER_SELECTABLE_LANGUAGE_MODELS: Readonly = [ - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-4", - // "chat-bison-001", // PaLM2 is not good, replies with no response too often - "gemini-pro", -] as const; - -export type LanguageModel = (typeof LANGUAGE_MODELS)[number]; - -export function isLanguageModel(model?: string): model is LanguageModel { - return LANGUAGE_MODELS.includes(model as LanguageModel); -} - -export function getValidLanguageModelName( - model: string | undefined, - filter: { google: boolean; openai: boolean; ollama: boolean } = { - google: true, - openai: true, - ollama: false, - }, - ollama: string[] = [], // keys of ollama models -): LanguageModel | string { - const dftl = - filter.openai === true - ? DEFAULT_MODEL - : filter.ollama && ollama?.length > 0 - ? toOllamaModel(ollama[0]) - : "chat-bison-001"; - console.log("getValidLanguageModelName", model, filter, ollama, dftl); - if (model == null) { - return dftl; - } - if (LANGUAGE_MODELS.includes(model as LanguageModel)) { - return model; - } - if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) { - return model; - } - return dftl; -} - -export interface OpenAIMessage { - role: "system" | "user" | "assistant"; - content: string; -} -export type OpenAIMessages = OpenAIMessage[]; - -export type LanguageService = - | "openai-gpt-3.5-turbo" - | "openai-gpt-3.5-turbo-16k" - | "openai-gpt-4" - | "openai-gpt-4-32k" - | "openai-text-embedding-ada-002" - | "google-text-bison-001" - | "google-chat-bison-001" - | "google-embedding-gecko-001" - | "google-gemini-pro"; - -const LANGUAGE_MODEL_VENDORS = ["openai", "google", "ollama"] as const; -export type Vendor = (typeof LANGUAGE_MODEL_VENDORS)[number]; - -// used e.g. for checking "account-id={string}" and other things like that -export const LANGUAGE_MODEL_PREFIXES = [ - "chatgpt", - ...LANGUAGE_MODEL_VENDORS.map((v) => `${v}-`), -] as const; - -export function model2service(model: LanguageModel): LanguageService { - if (model === "text-embedding-ada-002") return `openai-${model}`; - if ( - model === "text-bison-001" || - model === "chat-bison-001" || - model === "embedding-gecko-001" || - model === "gemini-pro" - ) { - return `google-${model}`; - } else { - return `openai-${model}`; - } -} - -// inverse of model2service, but robust for chat avatars, which might not have a prefix -// TODO: fix the mess -export function service2model( - service: LanguageService | "chatgpt", -): LanguageModel { - if (service === "chatgpt") { - return "gpt-3.5-turbo"; - } - // split off the first part of service, e.g., "openai-" or "google-" - const s = service.split("-")[0]; - const hasPrefix = s === "openai" || s === "google"; - const m = hasPrefix ? service.split("-").slice(1).join("-") : service; - if (!LANGUAGE_MODELS.includes(m as LanguageModel)) { - // We don't throw an error, since the frontend would crash - // throw new Error(`unknown service: ${service}`); - console.warn(`service2model: unknown service: ${service}`); - return "gpt-3.5-turbo"; - } - return m as LanguageModel; -} - -// Note: this must be an OpenAI model – otherwise change the getValidLanguageModelName function -export const DEFAULT_MODEL: LanguageModel = "gpt-3.5-turbo"; - -export function model2vendor(model: LanguageModel): Vendor { - if (model.startsWith("gpt-")) { - return "openai"; - } else if (model.startsWith("ollama-")) { - return "ollama"; - } else { - return "google"; - } -} - -export function toOllamaModel(model: string) { - return `ollama-${model}`; -} - -export function fromOllamaModel(model: string) { - return model.replace(/^ollama-/, ""); -} - -export function isOllamaLLM(model: string) { - return model.startsWith("ollama-"); -} - -const MODELS_OPENAI = [ - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-4", - "gpt-4-32k", -] as const; - -export const MODELS = [ - ...MODELS_OPENAI, - "text-embedding-ada-002", - "text-bison-001", - "chat-bison-001", - "embedding-gecko-001", - "gemini-pro", -] as const; - -export type Model = (typeof MODELS)[number]; - -export type ModelOpenAI = (typeof MODELS_OPENAI)[number]; - -// Map from psuedo account_id to what should be displayed to user. -// This is used in various places in the frontend. -// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing -export const LLM_USERNAMES = { - chatgpt: "GPT-3.5", - chatgpt3: "GPT-3.5", - chatgpt4: "GPT-4", - "gpt-4": "GPT-4", - "gpt-4-32k": "GPT-4-32k", - "gpt-3.5-turbo": "GPT-3.5", - "gpt-3.5-turbo-16k": "GPT-3.5-16k", - "text-bison-001": "PaLM 2", - "chat-bison-001": "PaLM 2", - "embedding-gecko-001": "PaLM 2", - "gemini-pro": "Gemini Pro", -} as const; - -export function isFreeModel(model: string) { - if (!LANGUAGE_MODELS.includes(model as LanguageModel)) return false; - return ( - (model as Model) == "gpt-3.5-turbo" || - (model as Model) == "text-bison-001" || - (model as Model) == "chat-bison-001" || - (model as Model) == "embedding-gecko-001" || - (model as Model) == "gemini-pro" - ); -} - -// this is used in purchases/get-service-cost -// we only need to check for the vendor prefixes, no special cases! -export function isLanguageModelService( - service: Service, -): service is LLMService { - for (const v of LANGUAGE_MODEL_VENDORS) { - if (service.startsWith(`${v}-`)) { - return true; - } - } - return false; -} - -export function getVendorStatusCheckMD(vendor: Vendor): string { - switch (vendor) { - case "openai": - return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`; - case "google": - return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`; - case "ollama": - return `No status information for Ollama available – you have to check with the particular backend for the model.`; - default: - unreachable(vendor); - } - return ""; -} - -export function llmSupportsStreaming(model: LanguageModel): boolean { - return model2vendor(model) === "openai" || model === "gemini-pro"; -} - -interface Cost { - prompt_tokens: number; - completion_tokens: number; - max_tokens: number; -} - -// This is the official published cost that openai charges. -// It changes over time, so this will sometimes need to be updated. -// Our cost is a configurable multiple of this. -// https://openai.com/pricing#language-models -// There appears to be no api that provides the prices, unfortunately. -const LLM_COST: { [name in LanguageModel]: Cost } = { - "gpt-4": { - prompt_tokens: 0.03 / 1000, - completion_tokens: 0.06 / 1000, - max_tokens: 8192, - }, - "gpt-4-32k": { - prompt_tokens: 0.06 / 1000, - completion_tokens: 0.12 / 1000, - max_tokens: 32768, - }, - "gpt-3.5-turbo": { - prompt_tokens: 0.0015 / 1000, - completion_tokens: 0.002 / 1000, - max_tokens: 4096, - }, - "gpt-3.5-turbo-16k": { - prompt_tokens: 0.003 / 1000, - completion_tokens: 0.004 / 1000, - max_tokens: 16384, - }, - "text-embedding-ada-002": { - prompt_tokens: 0.0001 / 1000, - completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings - max_tokens: 8191, - }, - // https://developers.generativeai.google/models/language - "text-bison-001": { - // we assume 5 characters is 1 token on average - prompt_tokens: (5 * 0.0005) / 1000, - completion_tokens: (5 * 0.0005) / 1000, - max_tokens: 8196, - }, - "chat-bison-001": { - // we assume 5 characters is 1 token on average - prompt_tokens: (5 * 0.0005) / 1000, - completion_tokens: (5 * 0.0005) / 1000, - max_tokens: 8196, - }, - "embedding-gecko-001": { - prompt_tokens: (5 * 0.0001) / 1000, - completion_tokens: 0, - max_tokens: 8196, // ??? - }, - "gemini-pro": { - // https://ai.google.dev/models/gemini - prompt_tokens: (5 * 0.0001) / 1000, - completion_tokens: 0, - max_tokens: 30720, - }, -} as const; - -export function isValidModel(model?: string): boolean { - if (model == null) return false; - if (model.startsWith("ollama-")) return true; - return LLM_COST[model ?? ""] != null; -} - -export function getMaxTokens(model?: Model): number { - return LLM_COST[model ?? ""]?.max_tokens ?? 4096; -} - -export interface LLMCost { - prompt_tokens: number; - completion_tokens: number; -} - -export function getLLMCost( - model: Model, - markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.3 -): LLMCost { - const x = LLM_COST[model]; - if (x == null) { - throw Error(`unknown model "${model}"`); - } - const { prompt_tokens, completion_tokens } = x; - if (markup_percentage < 0) { - throw Error("markup percentage can't be negative"); - } - const f = 1 + markup_percentage / 100; - return { - prompt_tokens: prompt_tokens * f, - completion_tokens: completion_tokens * f, - }; -} - -// The maximum cost for one single call using the given model. -// We can't know the cost until after it happens, so this bound is useful for -// ensuring user can afford to make a call. -export function getMaxCost(model: Model, markup_percentage: number): number { - const { prompt_tokens, completion_tokens } = getLLMCost( - model, - markup_percentage, - ); - const { max_tokens } = LLM_COST[model]; - return Math.max(prompt_tokens, completion_tokens) * max_tokens; -} - export interface ChatGPTLogEntry { id: number; account_id?: string; diff --git a/src/packages/util/db-schema/purchase-quotas.ts b/src/packages/util/db-schema/purchase-quotas.ts index a96b749e10..7932204824 100644 --- a/src/packages/util/db-schema/purchase-quotas.ts +++ b/src/packages/util/db-schema/purchase-quotas.ts @@ -2,7 +2,7 @@ import { Table } from "./types"; import { CREATED_BY, ID } from "./crm"; import { SCHEMA as schema } from "./index"; import type { Service } from "./purchases"; -import { isFreeModel } from "./openai"; +import { isFreeModel } from "./llm"; export type { Service }; diff --git a/src/packages/util/types/llm.ts b/src/packages/util/types/llm.ts index 36c8d35668..70cce04b8c 100644 --- a/src/packages/util/types/llm.ts +++ b/src/packages/util/types/llm.ts @@ -1,4 +1,4 @@ -import { LanguageModel } from "@cocalc/util/db-schema/openai"; +import { LanguageModel } from "@cocalc/util/db-schema/llm"; export type History = { role: "assistant" | "user" | "system"; From 34139326f13733060159c3d4e1251eeb02cbdf81 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Fri, 23 Feb 2024 18:43:38 +0100 Subject: [PATCH 04/32] server/ollama: some progress ... --- src/packages/frontend/customize.tsx | 7 +- .../frame-editors/llm/model-switch.tsx | 2 - src/packages/hub/webapp-configuration.ts | 1 + src/packages/pnpm-lock.yaml | 290 +++++++++++++++++- src/packages/server/llm/abuse.ts | 4 +- src/packages/server/llm/client.ts | 16 +- src/packages/server/llm/index.ts | 11 +- src/packages/server/llm/ollama.ts | 64 +++- src/packages/server/package.json | 7 + src/packages/util/db-schema/llm.ts | 10 +- 10 files changed, 388 insertions(+), 24 deletions(-) diff --git a/src/packages/frontend/customize.tsx b/src/packages/frontend/customize.tsx index edfe62c02d..07f0c5a9a7 100644 --- a/src/packages/frontend/customize.tsx +++ b/src/packages/frontend/customize.tsx @@ -259,10 +259,9 @@ async function init_customize() { init_customize(); -function process_ollama(ollama) { - if (ollama) { - actions.setState({ ollama: fromJS(ollama) }); - } +function process_ollama(ollama?) { + if (!ollama) return; + actions.setState({ ollama: fromJS(ollama) }); } function process_kucalc(obj) { diff --git a/src/packages/frontend/frame-editors/llm/model-switch.tsx b/src/packages/frontend/frame-editors/llm/model-switch.tsx index 5743ad309c..046bf90aff 100644 --- a/src/packages/frontend/frame-editors/llm/model-switch.tsx +++ b/src/packages/frontend/frame-editors/llm/model-switch.tsx @@ -120,8 +120,6 @@ export default function ModelSwitch({ }); } - console.log("model", model); - // all models selectable here must be in util/db-schema/openai::USER_SELECTABLE_LANGUAGE_MODELS return ( =6.9.0'} @@ -5671,7 +5693,6 @@ packages: resolution: {integrity: sha512-AMZ2UWx+woHNfM11PyAEQmfSxi05jm9OlkxczuHeEqmvwPkYj6MWv44gbzDPefYOLysTOFyI3ziiy2ONmUZfpA==} dependencies: undici-types: 5.26.5 - dev: true /@types/node@18.19.4: resolution: {integrity: sha512-xNzlUhzoHotIsnFoXmJB+yWmBvFZgKCI9TtPIEdYIMM1KWfwuY8zh7wvc1u1OAXlC7dlf6mZVx/s+Y5KfFz19A==} @@ -9467,6 +9488,10 @@ packages: jest-util: 29.7.0 dev: true + /expr-eval@2.0.2: + resolution: {integrity: sha512-4EMSHGOPSwAfBiibw3ndnP0AvjDWLsMvGOvWEZ2F96IGk0bIVdjQisOHxReSkE13mHcfbuCiXw+G4y0zv6N8Eg==} + dev: false + /express-rate-limit@6.7.0(express@4.18.2): resolution: {integrity: sha512-vhwIdRoqcYB/72TK3tRZI+0ttS8Ytrk24GfmsxDXK9o9IhHNO5bXRiXQSExPQ4GbaE5tvIS7j1SGrxsuWs+sGA==} engines: {node: '>= 12.9.0'} @@ -12582,7 +12607,6 @@ packages: hasBin: true dependencies: argparse: 2.0.1 - dev: true /jsesc@2.5.2: resolution: {integrity: sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==} @@ -12658,6 +12682,11 @@ packages: resolution: {integrity: sha512-trvBk1ki43VZptdBI5rIlG4YOzyeH/WefQt5rj1grasPn4iiZWKet8nkgc4GlsAylaztn0qZfUYOiTsASJFdNA==} dev: false + /jsonpointer@5.0.1: + resolution: {integrity: sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==} + engines: {node: '>=0.10.0'} + dev: false + /jsonwebtoken@9.0.2: resolution: {integrity: sha512-PRp66vJ865SSqOlgqS8hujT5U4AOgMfhrwYIuIhfKaoSCZcirrmASQr8CX7cUg+RMih+hgznrjp99o+W4pJLHQ==} engines: {node: '>=12', npm: '>=6'} @@ -12826,6 +12855,254 @@ packages: engines: {node: '>=18.0.0'} dev: false + /langchain@0.1.21(@google-ai/generativelanguage@1.1.0)(axios@1.6.7)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21): + resolution: {integrity: sha512-OOcCFIgx23WyyNS1VJBLbC3QL5plQBVfp2drXw1OJAarZ8yEY3cgJq8NbTY37sMnLoJ2olFEzMuAOdlTur4cwQ==} + engines: {node: '>=18'} + peerDependencies: + '@aws-sdk/client-s3': ^3.310.0 + '@aws-sdk/client-sagemaker-runtime': ^3.310.0 + '@aws-sdk/client-sfn': ^3.310.0 + '@aws-sdk/credential-provider-node': ^3.388.0 + '@azure/storage-blob': ^12.15.0 + '@gomomento/sdk': ^1.51.1 + '@gomomento/sdk-core': ^1.51.1 + '@gomomento/sdk-web': ^1.51.1 + '@google-ai/generativelanguage': ^0.2.1 + '@google-cloud/storage': ^6.10.1 || ^7.7.0 + '@notionhq/client': ^2.2.10 + '@pinecone-database/pinecone': '*' + '@supabase/supabase-js': ^2.10.0 + '@vercel/kv': ^0.2.3 + '@xata.io/client': ^0.28.0 + apify-client: ^2.7.1 + assemblyai: ^4.0.0 + axios: '*' + cheerio: ^1.0.0-rc.12 + chromadb: '*' + convex: ^1.3.1 + couchbase: ^4.2.10 + d3-dsv: ^2.0.0 + epub2: ^3.0.1 + faiss-node: '*' + fast-xml-parser: ^4.2.7 + google-auth-library: ^8.9.0 + handlebars: ^4.7.8 + html-to-text: ^9.0.5 + ignore: ^5.2.0 + ioredis: ^5.3.2 + jsdom: '*' + mammoth: ^1.6.0 + mongodb: '>=5.2.0' + node-llama-cpp: '*' + notion-to-md: ^3.1.0 + officeparser: ^4.0.4 + pdf-parse: 1.1.1 + peggy: ^3.0.2 + playwright: ^1.32.1 + puppeteer: ^19.7.2 + pyodide: ^0.24.1 + redis: ^4.6.4 + sonix-speech-recognition: ^2.1.1 + srt-parser-2: ^1.2.3 + typeorm: ^0.3.12 + weaviate-ts-client: '*' + web-auth-library: ^1.0.3 + ws: ^8.14.2 + youtube-transcript: ^1.0.6 + youtubei.js: ^5.8.0 + peerDependenciesMeta: + '@aws-sdk/client-s3': + optional: true + '@aws-sdk/client-sagemaker-runtime': + optional: true + '@aws-sdk/client-sfn': + optional: true + '@aws-sdk/credential-provider-node': + optional: true + '@azure/storage-blob': + optional: true + '@gomomento/sdk': + optional: true + '@gomomento/sdk-core': + optional: true + '@gomomento/sdk-web': + optional: true + '@google-ai/generativelanguage': + optional: true + '@google-cloud/storage': + optional: true + '@notionhq/client': + optional: true + '@pinecone-database/pinecone': + optional: true + '@supabase/supabase-js': + optional: true + '@vercel/kv': + optional: true + '@xata.io/client': + optional: true + apify-client: + optional: true + assemblyai: + optional: true + axios: + optional: true + cheerio: + optional: true + chromadb: + optional: true + convex: + optional: true + couchbase: + optional: true + d3-dsv: + optional: true + epub2: + optional: true + faiss-node: + optional: true + fast-xml-parser: + optional: true + google-auth-library: + optional: true + handlebars: + optional: true + html-to-text: + optional: true + ignore: + optional: true + ioredis: + optional: true + jsdom: + optional: true + mammoth: + optional: true + mongodb: + optional: true + node-llama-cpp: + optional: true + notion-to-md: + optional: true + officeparser: + optional: true + pdf-parse: + optional: true + peggy: + optional: true + playwright: + optional: true + puppeteer: + optional: true + pyodide: + optional: true + redis: + optional: true + sonix-speech-recognition: + optional: true + srt-parser-2: + optional: true + typeorm: + optional: true + weaviate-ts-client: + optional: true + web-auth-library: + optional: true + ws: + optional: true + youtube-transcript: + optional: true + youtubei.js: + optional: true + dependencies: + '@anthropic-ai/sdk': 0.9.1(encoding@0.1.13) + '@google-ai/generativelanguage': 1.1.0(encoding@0.1.13) + '@langchain/community': 0.0.32(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21) + '@langchain/core': 0.1.32 + '@langchain/openai': 0.0.14(encoding@0.1.13) + axios: 1.6.7 + binary-extensions: 2.2.0 + expr-eval: 2.0.2 + google-auth-library: 9.4.1(encoding@0.1.13) + js-tiktoken: 1.0.10 + js-yaml: 4.1.0 + jsonpointer: 5.0.1 + langchainhub: 0.0.8 + langsmith: 0.1.3 + ml-distance: 4.0.1 + openapi-types: 12.1.3 + p-retry: 4.6.2 + uuid: 9.0.1 + yaml: 2.3.4 + zod: 3.22.4 + zod-to-json-schema: 3.22.4(zod@3.22.4) + transitivePeerDependencies: + - '@aws-crypto/sha256-js' + - '@aws-sdk/client-bedrock-agent-runtime' + - '@aws-sdk/client-bedrock-runtime' + - '@aws-sdk/client-dynamodb' + - '@aws-sdk/client-kendra' + - '@aws-sdk/client-lambda' + - '@azure/search-documents' + - '@clickhouse/client' + - '@cloudflare/ai' + - '@datastax/astra-db-ts' + - '@elastic/elasticsearch' + - '@getmetal/metal-sdk' + - '@getzep/zep-js' + - '@gradientai/nodejs-sdk' + - '@huggingface/inference' + - '@mozilla/readability' + - '@opensearch-project/opensearch' + - '@planetscale/database' + - '@qdrant/js-client-rest' + - '@raycast/api' + - '@rockset/client' + - '@smithy/eventstream-codec' + - '@smithy/protocol-http' + - '@smithy/signature-v4' + - '@smithy/util-utf8' + - '@supabase/postgrest-js' + - '@tensorflow-models/universal-sentence-encoder' + - '@tensorflow/tfjs-converter' + - '@tensorflow/tfjs-core' + - '@upstash/redis' + - '@upstash/vector' + - '@vercel/postgres' + - '@writerai/writer-sdk' + - '@xenova/transformers' + - '@zilliz/milvus2-sdk-node' + - better-sqlite3 + - cassandra-driver + - closevector-common + - closevector-node + - closevector-web + - cohere-ai + - discord.js + - dria + - encoding + - firebase-admin + - googleapis + - hnswlib-node + - llmonitor + - lodash + - lunary + - mysql2 + - neo4j-driver + - pg + - pg-copy-streams + - pickleparser + - portkey-ai + - replicate + - typesense + - usearch + - vectordb + - voy-search + dev: false + + /langchainhub@0.0.8: + resolution: {integrity: sha512-Woyb8YDHgqqTOZvWIbm2CaFDGfZ4NTSyXV687AG4vXEfoNo7cGQp7nhl7wL3ehenKWmNEmcxCLgOZzW8jE6lOQ==} + dev: false + /langs@2.0.0: resolution: {integrity: sha512-v4pxOBEQVN1WBTfB1crhTtxzNLZU9HPWgadlwzWKISJtt6Ku/CnpBrwVy+jFv8StjxsPfwPFzO0CMwdZLJ0/BA==} dev: false @@ -14277,6 +14554,10 @@ packages: - encoding dev: false + /openapi-types@12.1.3: + resolution: {integrity: sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==} + dev: false + /opener@1.5.2: resolution: {integrity: sha512-ur5UIdyw5Y7yEj9wLzhqXiy6GZ3Mwx0yGI+5sMn2r0N0v3cKJvUmFH5yPP+WXh9e0xfyzyJX95D8l088DNFj7A==} hasBin: true @@ -19395,6 +19676,11 @@ packages: resolution: {integrity: sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==} requiresBuild: true + /yaml@2.3.4: + resolution: {integrity: sha512-8aAvwVUSHpfEqTQ4w/KMlf3HcRdt50E5ODIQJBw1fQ5RL34xabzxtUlzTXVqc4rkZsPbvrXKWnABCD7kWSmocA==} + engines: {node: '>= 14'} + dev: false + /yargs-parser@18.1.3: resolution: {integrity: sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==} engines: {node: '>=6'} diff --git a/src/packages/server/llm/abuse.ts b/src/packages/server/llm/abuse.ts index ae89cac4da..fabe29c8c4 100644 --- a/src/packages/server/llm/abuse.ts +++ b/src/packages/server/llm/abuse.ts @@ -26,6 +26,7 @@ import getPool from "@cocalc/database/pool"; import { assertPurchaseAllowed } from "@cocalc/server/purchases/is-purchase-allowed"; import { isFreeModel, + isOllamaLLM, LanguageModel, LanguageService, model2service, @@ -66,7 +67,8 @@ export async function checkForAbuse({ // at least some amount of tracking. throw Error("at least one of account_id or analytics_cookie must be set"); } - if (!MODELS.includes(model)) { + + if (!MODELS.includes(model) && !isOllamaLLM(model)) { throw Error(`invalid model "${model}"`); } diff --git a/src/packages/server/llm/client.ts b/src/packages/server/llm/client.ts index 4f0fa454ea..6d6ab77f85 100644 --- a/src/packages/server/llm/client.ts +++ b/src/packages/server/llm/client.ts @@ -11,7 +11,11 @@ import OpenAI from "openai"; import getLogger from "@cocalc/backend/logger"; import { getServerSettings } from "@cocalc/database/settings/server-settings"; -import { LanguageModel, model2vendor } from "@cocalc/util/db-schema/llm"; +import { + LanguageModel, + isOllamaLLM, + model2vendor, +} from "@cocalc/util/db-schema/llm"; import { unreachable } from "@cocalc/util/misc"; import { VertexAIClient } from "./vertex-ai-client"; @@ -78,9 +82,9 @@ const ollamaCache: { [key: string]: Ollama } = {}; * All other config parameters are passed to the Ollama constructor (e.g. topK, temperature, etc.). */ export async function getOllama(model: string) { - if (model.startsWith("ollama-")) { + if (isOllamaLLM(model)) { throw new Error( - `At this point, the model name should no longer have the "ollama-" prefix`, + `At this point, the model name should be one of Ollama, but it was ${model}`, ); } @@ -92,6 +96,10 @@ export async function getOllama(model: string) { ); } + if (config.cocalc?.disabled) { + throw new Error(`Ollama model ${model} is disabled`); + } + // the key is a hash of the model name and the specific config – such that changes in the config will invalidate the cache const key = `${model}:${jsonStable(config)}`; @@ -109,7 +117,7 @@ export async function getOllama(model: string) { ); } - const keepAlive = config.keepAlive ?? -1; + const keepAlive: string = config.keepAlive ?? "24h"; // extract all other properties from the config, except the url, model, keepAlive field and the "cocalc" field const other = _.omit(config, ["baseUrl", "model", "keepAlive", "cocalc"]); diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts index 53bdb8aecf..ac93ea4a96 100644 --- a/src/packages/server/llm/index.ts +++ b/src/packages/server/llm/index.ts @@ -25,6 +25,7 @@ import { OpenAIMessages, getLLMCost, isFreeModel, + isOllamaLLM, isValidModel, model2service, model2vendor, @@ -33,9 +34,9 @@ import { ChatOptions, ChatOutput, History } from "@cocalc/util/types/llm"; import { checkForAbuse } from "./abuse"; import { callChatGPTAPI } from "./call-llm"; import { getClient } from "./client"; +import { evaluateOllama } from "./ollama"; import { saveResponse } from "./save-response"; import { VertexAIClient } from "./vertex-ai-client"; -import { evaluateOllama } from "./ollama"; const log = getLogger("llm"); @@ -91,11 +92,9 @@ async function evaluateImpl({ const start = Date.now(); await checkForAbuse({ account_id, analytics_cookie, model }); - const client = await getClient(model); - const { output, total_tokens, prompt_tokens, completion_tokens } = await (async () => { - if (model.startsWith("ollama-")) { + if (isOllamaLLM(model)) { return await evaluateOllama({ system, history, @@ -109,7 +108,6 @@ async function evaluateImpl({ system, history, input, - client, model, maxTokens, stream, @@ -179,11 +177,12 @@ async function evaluteCall({ system, history, input, - client, model, maxTokens, stream, }) { + const client = await getClient(model); + if (client instanceof VertexAIClient) { return await evaluateVertexAI({ system, diff --git a/src/packages/server/llm/ollama.ts b/src/packages/server/llm/ollama.ts index 91ad6317f2..dc059930a7 100644 --- a/src/packages/server/llm/ollama.ts +++ b/src/packages/server/llm/ollama.ts @@ -1,6 +1,15 @@ +import { + ChatPromptTemplate, + MessagesPlaceholder, +} from "@langchain/core/prompts"; +import { RunnableWithMessageHistory } from "@langchain/core/runnables"; +import { ChatMessageHistory } from "langchain/stores/message/in_memory"; + import getLogger from "@cocalc/backend/logger"; +import { fromOllamaModel, isOllamaLLM } from "@cocalc/util/db-schema/llm"; import { ChatOutput, History } from "@cocalc/util/types/llm"; import { getOllama } from "./client"; +import { AIMessage, HumanMessage } from "@langchain/core/messages"; const log = getLogger("llm:ollama"); @@ -17,10 +26,10 @@ interface OllamaOpts { export async function evaluateOllama( opts: Readonly, ): Promise { - if (!opts.model.startsWith("ollama-")) { + if (!isOllamaLLM(opts.model)) { throw new Error(`model ${opts.model} not supported`); } - const model = opts.model.slice("ollama-".length); + const model = fromOllamaModel(opts.model); const { system, history, input, maxTokens, stream } = opts; log.debug("evaluateOllama", { input, @@ -33,7 +42,56 @@ export async function evaluateOllama( const ollama = await getOllama(model); - const chunks = await ollama.stream(input); + const msgs: ["ai" | "human", string][] = []; + + if (history) { + let nextRole: "model" | "user" = "user"; + for (const { content } of history) { + if (nextRole === "user") { + msgs.push(["human", content]); + } else { + msgs.push(["ai", content]); + } + nextRole = nextRole === "user" ? "model" : "user"; + } + } + + const prompt = ChatPromptTemplate.fromMessages([ + ["system", system ?? ""], + new MessagesPlaceholder("chat_history"), + ["human", "{input}"], + ]); + + const chain = prompt.pipe(ollama); + + const chainWithHistory = new RunnableWithMessageHistory({ + runnable: chain, + inputMessagesKey: "input", + historyMessagesKey: "chat_history", + getMessageHistory: async (_) => { + const chatHistory = new ChatMessageHistory(); + // await history.addMessage(new HumanMessage("be brief")); + // await history.addMessage(new AIMessage("ok")); + if (history) { + let nextRole: "model" | "user" = "user"; + for (const { content } of history) { + if (nextRole === "user") { + await chatHistory.addMessage(new HumanMessage(content)); + } else { + await chatHistory.addMessage(new AIMessage(content)); + } + nextRole = nextRole === "user" ? "model" : "user"; + } + } + + return chatHistory; + }, + }); + + const chunks = await chainWithHistory.stream( + { input }, + { configurable: { sessionId: "ignored" } }, + ); let output = ""; for await (const chunk of chunks) { diff --git a/src/packages/server/package.json b/src/packages/server/package.json index a167fa941a..a418403148 100644 --- a/src/packages/server/package.json +++ b/src/packages/server/package.json @@ -47,6 +47,7 @@ "@google/generative-ai": "^0.1.3", "@isaacs/ttlcache": "^1.2.1", "@langchain/community": "^0.0.32", + "@langchain/core": "^0.1.32", "@node-saml/passport-saml": "^4.0.4", "@passport-js/passport-twitter": "^1.0.8", "@passport-next/passport-google-oauth2": "^1.0.0", @@ -81,6 +82,7 @@ "json-stable-stringify": "^1.0.1", "jwt-decode": "^3.1.2", "lambda-cloud-node-api": "^1.0.1", + "langchain": "^0.1.21", "lodash": "^4.17.21", "lru-cache": "^7.14.1", "ms": "2.1.2", @@ -114,5 +116,10 @@ "devDependencies": { "@types/node": "^18.16.14", "expect": "^26.6.2" + }, + "pnpm": { + "overrides": { + "@langchain/core": "^0.1.32" + } } } diff --git a/src/packages/util/db-schema/llm.ts b/src/packages/util/db-schema/llm.ts index 36a20a8e78..38e46f7c60 100644 --- a/src/packages/util/db-schema/llm.ts +++ b/src/packages/util/db-schema/llm.ts @@ -146,11 +146,17 @@ export function model2vendor(model: LanguageModel | string): Vendor { } export function toOllamaModel(model: string) { + if (isOllamaLLM(model)) { + throw new Error(`already an ollama model: ${model}`); + } return `ollama-${model}`; } export function fromOllamaModel(model: string) { - return model.replace(/^ollama-/, ""); + if (!isOllamaLLM(model)) { + throw new Error(`not an ollama model: ${model}`); + } + return model.slice("ollama-".length); } export function isOllamaLLM(model: string) { @@ -306,7 +312,7 @@ const LLM_COST: { [name in LanguageModel]: Cost } = { export function isValidModel(model?: string): boolean { if (model == null) return false; - if (model.startsWith("ollama-")) return true; + if (isOllamaLLM(model)) return true; return LLM_COST[model ?? ""] != null; } From d6c763e3afdc46798c66dec52108351ba8682587 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Mon, 26 Feb 2024 15:11:38 +0100 Subject: [PATCH 05/32] frontend/ollama: various fixes, logo, starting chat integration (+bugfixes?) etc. --- src/packages/frontend/account/chatbot.ts | 14 +- .../frontend/account/other-settings.tsx | 2 +- .../account/useLanguageModelSetting.tsx | 12 +- src/packages/frontend/chat/actions.ts | 2 +- src/packages/frontend/chat/input.tsx | 8 +- .../components/language-model-icon.tsx | 21 ++- .../frontend/components/ollama-avatar.tsx | 43 +++++-- src/packages/frontend/components/ollama.png | Bin 18226 -> 0 bytes .../editors/markdown-input/component.tsx | 43 +++---- .../markdown-input/mentionable-users.tsx | 121 +++++++++++++----- .../editors/markdown-input/multimode.tsx | 8 -- .../editors/slate/editable-markdown.tsx | 13 +- .../frame-editors/llm/model-switch.tsx | 6 +- src/packages/frontend/project/context.tsx | 29 ++++- src/packages/frontend/projects/store.ts | 3 +- src/packages/frontend/sagews/chatgpt.ts | 2 +- src/packages/hub/webapp-configuration.ts | 3 +- src/packages/util/db-schema/llm.ts | 2 +- src/packages/util/types/llm.ts | 3 +- 19 files changed, 220 insertions(+), 115 deletions(-) delete mode 100644 src/packages/frontend/components/ollama.png diff --git a/src/packages/frontend/account/chatbot.ts b/src/packages/frontend/account/chatbot.ts index 4c237ec7fd..878765f1cf 100644 --- a/src/packages/frontend/account/chatbot.ts +++ b/src/packages/frontend/account/chatbot.ts @@ -7,20 +7,25 @@ When new models are added, e.g., Claude soon (!), they will go here. */ +import { redux } from "@cocalc/frontend/app-framework"; import { LANGUAGE_MODEL_PREFIXES, LLM_USERNAMES, MODELS, Vendor, + fromOllamaModel, + isOllamaLLM, model2vendor, } from "@cocalc/util/db-schema/llm"; // we either check if the prefix is one of the known ones (used in some circumstances) // or if the account id is exactly one of the language models (more precise) -export function isChatBot(account_id?: string) { +export function isChatBot(account_id?: string): boolean { + if (typeof account_id !== "string") return false; return ( LANGUAGE_MODEL_PREFIXES.some((prefix) => account_id?.startsWith(prefix)) || - MODELS.some((model) => account_id === model) + MODELS.some((model) => account_id === model) || + isOllamaLLM(account_id) ); } @@ -41,5 +46,10 @@ export function chatBotName(account_id?: string): string { if (account_id?.startsWith("google-")) { return LLM_USERNAMES[account_id.slice("google-".length)] ?? "Gemini"; } + if (typeof account_id === "string" && isOllamaLLM(account_id)) { + const ollama = redux.getStore("customize").get("ollama")?.toJS() ?? {}; + const key = fromOllamaModel(account_id); + return ollama[key]?.display ?? "Ollama"; + } return "ChatBot"; } diff --git a/src/packages/frontend/account/other-settings.tsx b/src/packages/frontend/account/other-settings.tsx index 27a7734179..220cc1d5ae 100644 --- a/src/packages/frontend/account/other-settings.tsx +++ b/src/packages/frontend/account/other-settings.tsx @@ -378,7 +378,7 @@ export class OtherSettings extends Component { render_language_model(): Rendered { const projectsStore = redux.getStore("projects"); - const enabled = projectsStore.llmEnabledSummary(); + const enabled = projectsStore.whichLLMareEnabled(); const ollama = redux.getStore("customize").get("ollama")?.toJS() ?? {}; const defaultModel = getValidLanguageModelName( diff --git a/src/packages/frontend/account/useLanguageModelSetting.tsx b/src/packages/frontend/account/useLanguageModelSetting.tsx index e3c431c9eb..e5e74f8254 100644 --- a/src/packages/frontend/account/useLanguageModelSetting.tsx +++ b/src/packages/frontend/account/useLanguageModelSetting.tsx @@ -6,28 +6,24 @@ import { getValidLanguageModelName, isOllamaLLM, } from "@cocalc/util/db-schema/llm"; +import { useProjectContext } from "../project/context"; export const SETTINGS_LANGUAGE_MODEL_KEY = "language_model"; +// ATTN: requires the project context export function useLanguageModelSetting(): [ LanguageModel | string, (llm: LanguageModel | string) => void, ] { const other_settings = useTypedRedux("account", "other_settings"); const ollama = useTypedRedux("customize", "ollama"); - const haveOpenAI = useTypedRedux("customize", "openai_enabled"); - const haveGoogle = useTypedRedux("customize", "google_vertexai_enabled"); - const haveOllama = useTypedRedux("customize", "ollama_enabled"); - const filter = useMemo(() => { - const projectsStore = redux.getStore("projects"); - return projectsStore.llmEnabledSummary(); - }, [haveOpenAI, haveGoogle, haveOllama]); + const { enabledLLMs } = useProjectContext(); const llm = useMemo(() => { return getValidLanguageModelName( other_settings?.get("language_model"), - filter, + enabledLLMs, Object.keys(ollama?.toJS() ?? {}), ); }, [other_settings]); diff --git a/src/packages/frontend/chat/actions.ts b/src/packages/frontend/chat/actions.ts index bdbe2413e7..496b69a5a7 100644 --- a/src/packages/frontend/chat/actions.ts +++ b/src/packages/frontend/chat/actions.ts @@ -735,8 +735,8 @@ function getReplyToRoot(message, messages): Date | undefined { return date ? new Date(date) : undefined; } +// We strip out any cased version of the string @chatgpt and also all mentions. function stripMentions(value: string): string { - // We strip out any cased version of the string @chatgpt and also all mentions. for (const name of ["@chatgpt4", "@chatgpt", "@palm"]) { while (true) { const i = value.toLowerCase().indexOf(name); diff --git a/src/packages/frontend/chat/input.tsx b/src/packages/frontend/chat/input.tsx index 50d92f9b15..413157d091 100644 --- a/src/packages/frontend/chat/input.tsx +++ b/src/packages/frontend/chat/input.tsx @@ -181,12 +181,6 @@ export default function ChatInput({ }} editBarStyle={editBarStyle} overflowEllipsis={true} - chatGPT={redux - .getStore("projects") - .hasLanguageModelEnabled(project_id, undefined, "openai")} - vertexAI={redux - .getStore("projects") - .hasLanguageModelEnabled(project_id, undefined, "google")} /> ); } @@ -194,7 +188,7 @@ export default function ChatInput({ function getPlaceholder(project_id, placeholder?: string): string { if (placeholder != null) return placeholder; if (redux.getStore("projects").hasLanguageModelEnabled(project_id)) { - return "Type a new message (use @chatgpt for ChatGPT)..."; + return "Type a new message (mention a LLM via @chatgpt, @gemini, …)..."; } return "Type a new message..."; } diff --git a/src/packages/frontend/components/language-model-icon.tsx b/src/packages/frontend/components/language-model-icon.tsx index 80954715c9..6bfe97c838 100644 --- a/src/packages/frontend/components/language-model-icon.tsx +++ b/src/packages/frontend/components/language-model-icon.tsx @@ -1,5 +1,9 @@ import { CSS } from "@cocalc/frontend/app-framework"; -import { isLanguageModel, model2vendor } from "@cocalc/util/db-schema/llm"; +import { + isLanguageModel, + isOllamaLLM, + model2vendor, +} from "@cocalc/util/db-schema/llm"; import { unreachable } from "@cocalc/util/misc"; import AIAvatar from "./ai-avatar"; import GoogleGeminiLogo from "./google-gemini-avatar"; @@ -19,12 +23,16 @@ export function LanguageModelVendorAvatar( const style: CSS = { marginRight: "5px", ...props.style, - }; + } as const; function fallback() { return ; } + if (model == null) { + return fallback(); + } + if (isLanguageModel(model)) { const vendor = model2vendor(model); switch (vendor) { @@ -40,6 +48,7 @@ export function LanguageModelVendorAvatar( return fallback(); } } + case "ollama": return ; @@ -47,7 +56,11 @@ export function LanguageModelVendorAvatar( unreachable(vendor); return fallback(); } - } else { - return fallback(); } + + if (isOllamaLLM(model)) { + return ; + } + + return fallback(); } diff --git a/src/packages/frontend/components/ollama-avatar.tsx b/src/packages/frontend/components/ollama-avatar.tsx index c9c33f93c1..3b25614446 100644 --- a/src/packages/frontend/components/ollama-avatar.tsx +++ b/src/packages/frontend/components/ollama-avatar.tsx @@ -1,15 +1,14 @@ -import { CSS } from "../app-framework"; -import ollamaPng from "./ollama.png"; +import { CSS } from "@cocalc/frontend/app-framework"; export default function OllamaAvatar({ size = 64, style, + backgroundColor = "transparent", }: { size: number; style?: CSS; + backgroundColor?: string; }) { - // render the ollamaPng (a square png image with transparent background) with the given size and background color - return (
- + > + + + + + + + + + + + + +
); } diff --git a/src/packages/frontend/components/ollama.png b/src/packages/frontend/components/ollama.png deleted file mode 100644 index 1f142c8d534e0165096cd7ae1387be9bf0e83068..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18226 zcmeIaWl$vDvM!9fyVJP4H}3B4+PJ&ByE}us!(f99gElg_3^uq73@!r<3?J{i_c>?p zxcASA`0l^m5mjBaR_2qLPiC#C>dNRiH5FM@BqAgT2nbYpIVp|z@1DOl1eo{z_X?d= z2nYb1zm}e-hPe;G)!oI~&JhIg^m7FPK)!a?5D>oWl?D3QBz)=8Z>{krP)Fth1SrQ# zX#P(@5eYyyFT=Z{X%~?6CS-z!95!yQ_xDX9F z`b`uVY`R^{Ga!O2I&e7{~H$ zy$_O06#>gNuc_uUk=VlLp9_K8N`ItTQEIP(6!MyBlA6<60G|!2T|&BNj8S>sZpz5Z zZq9qJ&VFsp&QD&SrQa=5uDUdT4&08qIjiZbe7l(F2?@dv@fVG`C7f5SZw~qOA$Yqm zZ*)4VvdJ{#*^=>6u*q;!A>|^uRD1hTw_7?O1U^Jizh6FTzsvMx%REx9+u01pJev4t z|3wR-Me>owegT1x(&g6=1|)8Dqikm`-K~{rDyJh}pIeS`J}1%ijiY_u3NSdHpmViD z*<_~2{qw{0mukq4NtOh{K!_pJ7;ZUUfjPbV^CU=$db3%Y#Un$d3Y?i7O3?E@NcRQRnWesE3j5BEhw;+ zPv_e=Rj#bpiMe$ywK-HSuNb(LfBoQpQMvlv|FV1*Qx=MQs-`?iWGpwsM&&~2>Bj`D zM^pF0WPP``u3=CT58sdWTh`u-&fjDKAAA=eMOB2)@3>|@npLEOm~byYDveTw9#AB(OXEd8^#_+i&`A-(Urw-hT!t8-k`fBylsO8qAti~E@s_+ zYcD0++J24=s#8li#aEG z9^Vzuw!#G;AHRJWT-Tx}f03E2x^_aDBlx5*YFCG#(UfL}6?M5r3y`l2*^R=NEh1hf zHT~EQZ^8@`RlXd^bZjhQ!K3^mqTA#dSe!r?LDNPI^lDoAvS8Ys;s(c^w@_C6^~Q5N zvDI9QZR>n;b7QLZP^}Fi(B+ZMVYzegPj7=y``u$2M}dnUm=1tb{DD7*Iobk_4li|s zE9^d4@g2%ube=O0)lM_AFMXPE(RnKzo%ZkTSYv*%jg#I%C|C&;FOCqglWyNHZ?>8! z%2GH>XEw_eY($J$FiTT#ZqEwf{U%w5BbYJ0M0Ar1#UArDicKvhmKb&i-K(U-w;MI!r6HpO@}lf{ay-$$~Juv7w;)TUcO+<+qd>C{336b4e}3 z%)xTG`$*D!xzA*Cfnmdg!Ash@6Q6r1aHt#AfsmU#UMz5$w^s2;rw^0p-->p{fVk@? zfbdJW04oT7A05m<{?tf0ct6@s%G+)qk{PUykl2a)dyJ?Z-S6zu7sTHXZH~l911Lr5 z&^wBpXQ!i>d+{rK7)0v5dhiCD8nWXo6^!nWd&K%xAR z*^`XGr(lfI9XaCgZV9clb?1QgmP5^7Iq3{9{4B4!4Cv^UqjjI(c8x=8Eo62q8UHh{ z5!P%u14hcj2@lD=p7bC*XBLNq(~q5}VE-ONc59UFTIcLaWwRj6DH@Jfa@yw%YVt#B zP-=gqyaMKG-tDHPKn2V+2jbiwHh4qE+{(*p656&-HJF_*B|zjUHJ{6fzgv*H{gSJbX4A8*LX)B(eTehy9YHE>%vf z&qVJH^H6acdTM@EA;8n!_nfL9nr=@qH+W4q3~EEdJDkRzo7t?mj(~~WoREOJxCsB- zW~2JV!!G`GnYu~{Wyr6bx}6Os41&BVah)xc;BEP#^e=A7ES>s@BAHWg#dEGovas@_ z_MCjr=uE%u8rZI?yb4iJmvKhAjS6<{`DL#l67|gTr|Vg8Xi&t$mY^_((u#Y`P?Vl) z+C@+&VY86#B!RJ_kLsvEo)~dSybnI;LP1PcDBUPy>bC-L3dsl`C6d*$0xrDZf6=zj zL##-RV!E^IuAtiaYoHa2(3L0hns$=B(Tk!>O;Z`mJhUP3X4FBD%WPGzBi56_!rk`(dhSw||%yv8xW2bBzR* z2F?_on0{nR@G)ziVB|YM7{wa%=4u3s?Jz&OP<9YT zKoQs<;PsRM{8#;(ghJJHwJi9v($XElYMz(}>nmpN(@Jk*reo-@wf=CKxzyFwnc2`8 zn0T?#8BoEn{Q`n+u>Fu)(*l9PrNUK4de*9`X^;@&dA9&B_3VuCm|b8=l!&3R1(2dT z2v=95dI=H$2Tz`f4!t!}Bfrg3kg(Peos9Jb#*By~zJ|R_;nfk6HO;EXz(gReOtzzP zGc9nWw+(?W-uUN_zyp+%bB4$04>nm-dG zN5;{(qs77oEnwk0n<+d&#U7^c!iwKfvOl9xvud=}2+{0TNxD;0$ni_o;fW1Flpqm4 zSEe%ZLh6dIQ1z9b8B$Xk(s)Kq9Sx{K;d~+iL{&z=R5<>+`psbYE#SfRp7D!h(h>ttgH|IO<{ZInr$Vzw2B*gIH^2C~QkM);g^3>l@6K?%xE|(wf zIVdmkMGaD@nAxP#l5>c{?N=sNYENL~&t@tkUYckW2a7%o4LrwGzhy;@C2be5n6)7@ zF;YLl3dnj)d-+LtfeYJ&sl*IK)FYwlCTH4o<|QPGt>71+q>rGY3eA_^Nf7s2U9s(T2V2*VRZi5&cDm=&=Cfat!WqBsnz z3E7jzGZ@Dx4^`iQ0ra_`c5rJ>VPnMvmcSRn`v!DL@!@evls~q8G?uXBD5qwS6Huvr zHCWD2#Thil_3JU`k53kb_Ov3^zKn=Pp^e|nwDo~r*||d)`lgCb$xzG4x0I=#ODTz9HK*>d65dD zeD{N-nX%85Y$d!9Wc}X{ziiZo2WsU%MW|jA4T5{C6(@5-$HYf!%xhSa-$fT zuUz|xwDeicuXRBgnLX&+A|!_B4U~C@b(VcdHL#@JhBISv6b*BQyQbR3AEDA$T!CwU5OeX z`2I+?2%n5i6)SfX!qBfT8;Vhb^Zf{z8~H6Fp#c|*)JY*%qx_i-+qUPT0lpB!1!_G# zhZhiHj}vQ3c26kyX;wolzBa z+3d@D^_nVw4>6M7>rZ1iiQ;c&;jMJbRlx5$tr#Py>)V`k>HV=qpP&w)rDLU%y_*rk zau(oZB835PDmEMvD0GoI#IPcWg{t<$x+uhm(2c(QAeL;i&CG>;&0|JgV^uIi^=t0MGVj9YMRNLt$0yhch5)+(IM8U0*IWOFhM?8_VF{d zXOT*q<6Jo4{z+zEHxmMrqaZ+B@IhguO3~#y;cXzNUcbZ=vO}S>{n&P&Iz^LI@X_J% z$JPj+-L=v}keAv^m*ACjg21x)cwGOT1=x?y8fcJU7|KOc{#~ugC^U-7rH!IIv5fv} zbr+F^s2Z`BPbNrPPm=kzNRJfn-n{OQP0%kpejYx; zfgN_%_bGFSa6J@2=Y>EdU|;uf|2TX^aAzJgyd;1DZ#xo=Xl9?*I*V$P1bJfOM;}jG zLVzx(PFeF*NjjV)FE&(rq-R-{Ql&{iAtiIy$a^nxL%pCFfT!ZMrOap(0ni1ShY$R*N6&flraoi{hgT`lJ3n}N=5RXH-Dgxex z-$7;w#8)`fwD=U}8;_Bs=u{_;-+uC8fhwk%=w?4vt_**PJ{k5p5hX=LIN{RdgS|^~ zsaHJN58vgbA?n>UzI46yh#N)V3N=|fqgRZcY^;}4$(2FdS*RdCxJEC&B12WNdS=i> zNruw((P4h@%t|aaV^dE|d*0bP?eVZPiWv2d?7A5S$1OP|e&=nTS8h_Ud{R?<6C~A7 zLd`8{QS*kZqQp=dR;we;zW`HdEcZs?mBKy58GJ%(U%((*N*;%c5o^233k~%ryC1@6 z>=V^@O73HuYbAesWNS=p#Y3J>6Q-p5QJ2O!LtZgyD{@Wsddz@viKb)4?3k`2!=+K9g$0Do{-pe8XEkTRe%!P|p@>ci0-6b>8)2EDoiSi7%wr!Nc(16^Ka=JU$Z*dHga6@8D%=g>|T zyQz2xzAgwMYTG64gvTLURBYT%U8|)f6KP>2{yom$^+jFf(6q(0iUk~I=$CFReR{Q_91Tm|n6htdtF?wx_r&Fj$-TA0Bgo=uj#^g2h@%_v0_} zmJFwvu!HOU%OqV^l;tx|CqTB)X-C$d(R#GRZH^Bnm;&*A`ZZEYZgGa`Uy(~R`)iav z(cb8vs+y!B4SFZm`6Xg-uGY2Cd+oC=?N^bUcf)?mZRZxTOC29{(_mxUoAz=~Ccui& zGvBFGxFM6Ji$e_O;P{b5N6kprD+q`|{h)5andAp+_biq0)HM&oRlN1VzTTc`yy1N` z_mq!H>aE7)1vtx!0U}Fxlhe%4{U8FXmp(LgLJS znzbU!nhQm}OJPKt$mT6WxvcPlV0gP4Q_CzN>M0kYL-H!@3AfC&S0r}=h4at{A^zd~ zD4pVYA*k8+8!@N~c6ukOM)159rXtS&yRhc8GsM9b8^muVhEUBC51pKveU)3mUCRob z6gCc(dxjQ@3fd{t^Dtql#|z!>Wn;EWxuY%SX%AdAQgL z?CCY66lzvLM{(FIU*3dp2N`3s30*a4IPw~TI`pVA z=2v%%_r3vD#)9Y4f`(wTlo5I=ey1u@(w=&n!71$^p(fi|KZO=u8fQp~>!-%1LVP9F zuR5fH<)LL@emc5U^<{Ao8UqY7?H{JF(hAI4&4Dg3CKZY;*`Jy-L^O#RTw^F*0p|8( zFYDY#jbmzz3yFddampqys~k6FREj4Kn z^zc$=IRa=hqjTrlp`13gTvKr;00>k~Jy_tlnaWFq6NP!U3?WX-Rspz{9-0iI7V+IM zf}ul&1z!~Je+~pdtBG-fiZHAJ$gO4q6jSwRE<*Ulk-ZT}rhD+yr)Q|-KlB1cf1H?F zHl-??AyG2gsWu}SToDtzmbZ`bnz|?-TZ3Wc1Boon+ zCE>%ZZr0n?6D&Ajb&u?y$|GWUH-!f4Sy4|1A|j`~3yXZ@TF}1mMIv`5rm5VCEbH6= zfu`pzhQ3i4Qv#VaDq;_|#>2C})sYhW+kTp{e#-E^_XJ*)6zaW}?z^SO_+o-*Z#dJ@ zcFay=VGqT!cyN;~_>RS^Pa9TjbXjA35R^8QH?g=#PCXBBRAW;c`^u8NOJXy;Gtdc+ zxOy}_$;7!Wu}s9r(dhl$QPh#f9&r%6RRARep3%q_LN%dJ0^DL)*COoF1NMl^(ccAj zLFPziUOzT5%AWfq6G?wmUwe2a1ZokUSDG5g=5U;lFgu&qR`_vAXkdKgJb?l;<{XAMR4^9jlQ%jy^dMcsI7My8b zmr!9PqzDx}H0~cAVCtdUtUf*%KWM!Ln2UQ2Dn*?OG2iwM&`S=wkC&FiBTrO4kK*F= zRVSyZQ?{StFxjOPWzkJ&{wy{5(VlO^b_>RIl050AdRBidjU6IoUV3byv9f*Sfzi}_ zYhS+==9ju(3Ts;XQ^l72XC{`RrO_wE+ts9ZeLct@UtfEh^FiTMhK7cN^vs3$XW|bA z-Ye?t3db2>WOkp*kCsrc`WS^;I|nG!;g>c8xHT*|D~Dgq7V;uwm=tioK2xzGev&g# zxQXTj1L^TfS&Ou8LNRz%ndH4n8a-QfGH7;Ibfq>;qb7;`Sp)6+Sk3UCb@m_cD=UQ3 z*CFF59DK6YnwgS(2S&1uai1(HU{Kn}Z*akJGU(oz3c-|VFEksiE9on)3**)EzgfKG45t#38qltTxgWx+={aGpFXATd>&_@ zR4~0xD&Kd?n3r5BS4>=$F-dzev{|cr;MQ7jfq@x%mF=Yq!4H&+;Kv;o!n@^wIwv@@NKRqidnHr7sNV^r-HI?cj3WrRe{ORZ|A|J?vy-(o@6LEe&}d8m zl=^=5c_En)N{{Osqs9=!;XtX({p5#-95EC79};sOW$4v4AW|&H-=1lYo3NXZAz+zA zVjp7X5^4AbiD*3wHD+*gNc+L~>TK)xx@~cQ-*6+EV^tOLNuAD_`3b&yZNE^Q>AAi1 zo;%H_BmaI{>g&|R#VR@^mU?2lt+@PD9O54uAL`L@$^AZh)l$px&jfj4rM_6RV^SE# zS))%Q@h-514^9A4&>x&in4+?kYGOo8fn1TjcEGf%nC&;~fP3OghzhyblPJX6ZvE3i!*yu;}s8GppM%4m{GGWe%&!hyy9OedT3B_2Zf!V0c9=>Z6ac zz9tfNO>`;(lP?l(8@f(Oo|%J<>VjqVf+XjKw2&IR22Q^%N#&3RjS5`8riFC>WK!JY za&_0nLJxR$aSs)XaM-8Er<{C4Vp>L7;DFAg6;Vt2+UrtFe#jZWj#=gvc!YTDWmVSq zu;9|axuXy1^%_d&Ws#XWOO~+wDCz>NBNWHQ3k(Uylw+;>cIJZWq+Yn^b$~s5-sc~! z#6w3Kx|r?mD~HO_w~a(CTyYm1fHePl5q>X_UvfJ_ukAi})+K&utx&Gubc-QIDNv-o zIs>O>)D|pQ??CkfEgQtfO&ms5;o85#m(;q{0f^P@oycA6iYFwf1)3+ODk@JkoqwdP zHuqd@vVOR|(`twQ|MOi7l{IK7NIa+Ac5P)!CKMNI=nr z+pc{Et#J-VfAv?u(uY}HA{|rUN`3IfY&$^`5>-*XM#doxH`>TW!!&dKw?5Lllo50b zI)w$f7wYdx>EO1lbGqxL7vJ@bhUA3=kM|_+yA3gjph;FK#WMVm)AUMqTg&l3h}_gP ztKguzx6Wti!NhgH;m1Lh`lPJ{m4F0@!<`Z`1KOGZ)h8RUt(+q9xS9|ZkAhLV!w9r#l0$RE_ zv6@@CSb$i4om}5LF(4p>M15V&Ege9f01J?fowG3IW$$N7fSr{vr7n*$yRxe!$ktBI z-yNjsucBq??_eolMJXzRB;*Tx2XF#;nge{D9GyLYzQUA$;{xB0{~Bha1pF=H=^#w0 zr>q8$ba4j(xLLVb*;%B0?YucDMUVhO?pD@74Jny_LcE^|Q`&lZx&qnQe0+RZeYjX% z+-=x61Ox=w*g4raIa%H%SUmikJ|Eq?F)_cPW zn+C|k#mn6iB<&4y_N4k(2rJ9~)_3)CclN*#8!Omr_;+O1W5i{pFs# zlrZIA_JLL|mUdRazYjsyRvcWsJlrhS9M;w>++6H@Ean27d@L61{Cpq_Yd$VZEAxMW zl6Us-G{RL(94#feou;jE9;AP>oH0NO9=Hs?tF}LF9V&N6wv*zaI;^4I4 z<^LPX$`UB!;_hVro=!U_a~lwwtFz7D6Mqp76jzfMrsQO0|DO>xM{`f>cLiZeB|B#? z-~XA=vU37ydYb>mCI=roKQ9L-w}1dIFCRPCKMehirvq~Lc(262Fge&+x&DFsS6YDY z!Mqb|{#Tv81N;s99t%*?9c1q5;;!Z5;wVh{7X;uh&A-bVAoP!<$k}t_v`zg1@&L!cK?@T zSzB|O^Ke_Uzo%N@J>Q%h@5usztXTLstvNv)?C&=S)}Vh!_i(ZH^f7k_iQBwK`X0@D z0sS2ffc|eP8U9_`#}@P#PwbrBEbN>toSa%5JU}jPAddhOI|q=Ros#XJ0ki#eRsUDS zLTvvBCqjP<{L3=%uJ@0z_vPh%wPO44wm?-|4R5j)%72`{#OkAuY~_oUH`w)h4eoUJRs-yyC9$U$C(q~@SpFGLQocpvQiKb zbxBm5?`Mdva{3++5NN}HZII(GHCFF~@SgI@((orxcxaS@G&-ji5D)}h@>1ejzU${j zeu3J{9pj$``8=QyEhR!>hL|BSF{c3W@p9S?+T`}duFzj_f=}1~R!!*!-+S zl%$QwoAIJecYV{>(U??^v~tfK?3&JX6*d^UYR)X4a-G0f6fqm25C-M1SN5 zsOB#`%3vKnw9z2Su8!HobT|}pt04zX2y5V4ch`68wk@TQxHmOq@rS;=StSw=Vp2;t zAu*k38?i7ri+anY1=4e%Jpl!bv4nzYln01quHB0&{9Yu&6VU~#WA2Uuy4A4ygISD& zacSqs(>2%Sl4ydy?NoC*g{1G>1J}ZLdDFku#J>UsPCjge70o z3QBZO7xjJN0P+6tlx+u#>J0t_Lb%*WNcpA8zrjru+EakOEvz|!4xg=9IEP<17aSY(q zmT}JPA&@8h4%sd0GIBekRKiQOqqfZK0U7jMl1qlglP9`^MZ(m!wr?PytMl`hjRRgz zT#*n#sRSd13=nh1#<%Z(u($1RSc0uIH^z2zuZ8!>8R3f%14d=^5 z;z(Idr3*lgoRY=_cKL)ByA41Q)Gv-J>NdOA5;NX@kL+C9^Rv36NCzD_hGXfuV>KI)bgF7%hY&n#^80Me)Wu5L!$W#gh zmsJ{g>yItu+7Sw7iQut{P9I}or@D0Yfm5gC68bsj?^**d0lncu1z2bXW6$jKoCTOw z#fJg+jtQu&ZwqaHw5h7CdfU-l%&VQP0^s(wil0n2EnhhcEGD`_-Iz~MTs0bCi|SYs zl&a)jkMjEL2{-nlY|bjvtn)FjwOwE&Uof;~r@ca@~NeFQjY2w)jbHx&4Zl4IPdYYoyaO6!Yo^cZ7e!%p& zhHjCC%+-YUeSnT1;4~tP5}{R9049-^%G?xEpMO3Gc}mqlQAfbe zYADi#Chwer8CytNrQ;F7fP-E5oD@S!d#wBoDJx$~^{jBSr#jp^Cc>UADr^Y_rsG(HGlXYoNiqq@X53|e z1S<`uN^wryQRg_O+wQP0lT^mhHrrR+hA(bi;7nXY_XJE{rA=jkK^XgaV{#c1cJKq1 z)5kY74r`NSXbi+%ojZx4e8;)bn~!ET)E-3bxto7?Tf>zeBnB^0X z`ItGO>95t*E^Uyerjrf=b`GdHU-GNtg27y`3r;Dqf-|RINSV3IH@lHG7oGFIlJ|hm zb$r8{H@~-zCuWV#h=ReLz`NIP^t2%W1M7WEPq<)Gv% z(k1dyZ1N8a=8WSc5cD|Eyq5;Ok7~qo2JC*6=aE}!Hx~XD`bqQMmOJ6=$;q}zz#5!T zOh7~vF}ejBL#KAX$$9ILI_i_y`;TLDU$pO#!?dp~x#oGX??7hExe}I6&H~k*Fp}^& zlLw+sGxSNVt>!_UyDe_oz%C84veZ8kTpKF|IkpZo<}okmoWYy3nUa43o?tt8ljAh=X7DMLW*IrF*LOThv*G5rL0lO`#l#OZQ$ zQC1{k$jaOl)SOY<1q&E9!QZEx*IiSUu;M<>OJI-NX;B&**%VF%ZQ7M#-Hab#4%|Ta zazYbM)5~bei7rO)+8?qXa;p#OX_DOdk3&z}+fgSNJr_9U*QCSo-U{^6gNosAr&C8? zUq+-z4?d?>Z@~cwH1fu96A{I`^>?_}9Wzw*r_t6oIhF9bZXK|-Q$<-*+~tZ~q4bq@ z(Z=#w)Q!7(=lYnC*S@%U;YNSh9Pg*K2ZYF9xLIlm5{xUGv*KG9N9WrdYS(RD`J^u16rad1})U)}GxLr_~^e2*PJPMD&R8PhFj?p3Xr)&h|MlkVfV>xUSJi z1bEvv_$htR?pnBDw&;ul^}4JWb+ zM#(J<0DL2e)Hi9wK*eMKy*TT9yZG6c$b4lYcd#fE7L=o7sxVHm@o- z%u%;*ht1$Z%X!0fJ#uF-A3Y5poGae7oZ(@h;!C5NWp7itGPq0s6b(o~ngx&XrG&vc zG-?)ojn&cu`NqOqe?$RmF;2HG%N!3(oaL@V5&6=hMZZ2ty_Z9)|H2{>_0^uG6VR-q zK!r26I>L5KwE@zvtV^1%F_Ty@ zRhZTTv@rIhi!B%{!<~MXW?L>T%0O*_0Wo2TRiS^ZWf8X9UJnc>tvl)C-NAsav3i0^ zvTVWUaAO2m=Tj$ryB3CwLm|M1&_PNSr$Ln)MMPqAX@z0!AbaMn$c%)bdo4Qo0sV74 z8~2b9C3#B>FCdJ>`X*F2HoHy6+pke%32fuI;}<~6nd!;y;YHyESG z!SeAPP)mjI_J%`NGluECwaGF$H*WpZ~`?4l9s?nMO753H63u``Z?J6eF2= zle^ys{kqMLB$`v^KE$+(U2JCk$qFXIo$Gu)s~n_V^OcEEm5sB@8VyTJEHHHf%ECHo zhBcAE7-lk~kZ-tZ?vyIQF6So`v|3ShL`ADMn{rdaGdvOSZ+c5yG72_M)i@K*+AK*{ z-Qej(Bw0d#jt{Mg6`@9p8CU`;jFQt74+^k?YeO<<2KMvkyT_YjxGHPQF`GE(?=M?K zuP|!XWm~1w9gqI3g$tQ#zgGEn?{(*shopOaKFX4~4Hw#wzvryb1RqWVwtr$^*jH4& zexOT$J>1|l+#Z7F$2^97rN+g4(iT|5c|FU} z)=^-^KYGZT?N?MsRprX~`5=fZ&(fFEt8Aqj z>wqVhc|>2ahLTyzxTfl_${UrlM^lxCbV6@-P?@yMA*OP`qn48VX(J2-~2;l=qSoxHlM9Et)#;Nz1#zHE4erf>9Z?Ukkz zL(Z*V^gQe3uK1tch+mh~hV$K=xqE4#6@z3el!(`sZs_R1+=YYsyb#~MBSsJFY6q0O z%>|AyX-AI*L6X85sEyhm!uo?SbaNZu#)gtVJJT$3X2cootIK8TeQZS85D(>5F6GFU z2z2^+_ftQum~-^dt##9__FW=L1SP06duW|j(|UH# zeh%!v4c~TO=f~#HVA^E1&eR7l8x8Yl42!j=3%H|tjJ(SKcJ!!PghIcmDmV@(qp%L! z6Q4Y=*tjfqV%=tLFXX@7&K#!ve965TI9^+AL_6EdGVqF+vN$*~GWJ2sgXBx7T!asyAQVv%iV_==geAo$pfHC{J#cTAcADFKy=EPm5p7~kC8z&efa@s*!aEP5uuC_+9wymo$N9SJbUhe9?C|>{ zhk-JU{4dvwPukYqe#tJL%27El?=Qh;LT8QlXv?5QUX*kC2QoQwv~k6nM}e*Csp zsNR8-^l`pAX%yX(7j?O3+N}QU7yC8-7gs@jzpt_+oWN#Twz&PS=F!tN6uFmyPE|}N ztQzg@*L#(sV$>)h@FCgd_tl9f>foXYQHryrh`0$z+9Md4Cu)zz(|AV<<7%CZ0ge0^ zgJX!^!{|q)Cu0H3sjzh<+6<+pb-vc7|Bt7?IbNDiFGlH_DSyQZDkP3 zg|7vWs_#z=o#T=i6hHmCYTn3KBQr`QKard|x=@B-e%{_5u?t}eE1U5%lZ0QM_l)?W z-IvGwgtXF8^%#^>hXHz3j_hdB&+3%Eu^D6wWA@G4Z=XQ+3FxT$tNauDR?*LWZ3={C zg-G13Y79igI$+&NS)_yhp0#F%1K?}N&>8F8X<4R!mAQ3PLg^9uR@Gx#WXe;F6wZw( zQv$Eit}c0&I;(Nvl;74=`hYSX8VNtWMPE+wu@{3+{wjvdZ>@8dF;z#fAsj=b% zN{Eq4`*5Ed`)&(=>Y2m89Yzr^=6yXZtdA}aR+c{UarD6o=qbEB?z0CqxeNKEg3|VG z8XLO41x2eH*-OUd@7^JGwIrHO#X@MeVjfZGA90KB^X)*if2B`wwKEU0YAY~rU-&JU z<+9FL4jdm%7M`IDrhS48{ooSJnqV}O1qURJeN+Ltw=QTjQin=ygaS-U^=t%;oRDjH zFT4ekMaCSavm|+KykmQA>JmggeAyD=@o!turj=F4eIr9;Wjm$<-5G7L2(ogv8z;Wv z>3HC?nQcaToNC#ku({*c8%voW<+!$f^{LXBS}i!93=NbP02HhM#YE zAc=3nO`SSp#cECR+DB4|o8R9S=kr8|TBM*v>q-`aP}6rtlP&R3+h^@C8xFBIwMx&r zn1+*!w4un9HU3qYwFJ3w=~lF%gb#M1Smdru5bacsvIs8!hYFJy$pM76#tfviFSRS{ zXXKiObpV$FuYTtokC8$y#db}mGJiyTDEEp%o=q^t6iadq??Y?KJwdwM$XxgraV7~$ zI>D^o07qIbKd=@ocfEU_*>yoz+x)|;qr`xrz%Ql4rBGv$teYA_=7de01H62o)7l51 zE`j?aD<6ER#T~x}i#iXF);A^j_MV_@FeXi|HB6Xh?X(#MekzWd53eK^mmWB=M9ZGU zSpWk6H1NU*pBn6A*!BfQm=N>h%Mt$9z^a9fl{>7HuQ(sBu`mvb^_QoHAnvsybz8I; z=f4Go=Wh2I!?g}NrXD($Ev2aSa3c|pE79gEtdV_O!%0}K_EkD}#+Ly9Vj2~P!!)YP zBx%}*)TDVkFvZbSOek<#NR>mQ;!k2Y$g>Zd1XORK4b96-I&@;PVPG8S$X_6+2YX}< z;#8?w5iw-&@|9V&OxU82)u72IdqBn&0q_g-i#sWrHM5Ik=_fSrdWYC zLmZuo18@(SDPV%C%3|Q?#dsLZr70r%mMJHOm(uvdt+Ye(`#X=&C=b5IqnYm!-ARGG zl92YJ)Yw+(8Imbh1;t}HVno&BYSY;}v{F!b=+WA73Y4s|Z>*#pVqLJJZb^3|gmeW8 zu?p^&EF$V~vK2bkik7OFFRN@hu2K9tp!}-72CPN)#Yo%PR|+LluxVNlwhbfunI>?% z+*63KXH&Rz$lVlPNqjeD=<7}S;$~3j-m+G0#fOf-b`XJM7o6J0&!VDF3!k~==;}*B zZr!l(0yx{#YG~+!fg);iI*R)i-A4|K2QJW?{l<^OV)+%Qn3`ES*m=Cv`Q;6D6hm(+ z`+eBZsu48(C9m_+hWMs7e*W`p3`y*_a-N@>iLdQL=uhk(@fKmMhhXO_wMn+?0{Manbab5NfrZr!rSww z_(SqZ#9%0VPOdhAm)CA%0bwz2B}Ry=ZgbEm#gP?P*ndsz5il8c?D=DJm3t zK~<0THG?V$n?p4x6w{7wR1aJx2fi0MSPlfOsr2YzvNrb3)PeEf^&28zhz7o-^0P-G zosN0<6cW6MYVgfx*{Afei~sRy)fsY!(PH3Qw@r3`(01yLa>W3= zjimRP6P%QKX#bqYJur+^B(0|$J;YV%*c=$x^4&{l@PGf)21w~DSO~w4OeRR4_C1sv zeT7slKQVbTNU;G!`QfP>wHyInyH683g^;*uyEje>jq&pov&zVPSzy~KJ>(54=$@m2 zVzHNb81hvzcd)~g21j_+7{!4N1KfNLFE$2r_)FxT;Y8wM1HZ#nFbE<~y^{l2;i-Hk z`^KLrPh!?EAA?Gg-Xb}mDt~z5(5~a_srgA`%)}9283E#kYLY`+^*)e(Mc#pd9`a*; z8)}cvx^TkmKIaXuV%UrCBgN|a$7+n?%(5oYpQ(KGsnz2Q2S1Q<_#(+~V5AM5k~G1F z;r5d21Du*+eH1DbtEcXs$f5`Nfr6*HLF?bweLO~#p5`6`H@Hr7`2MUQ)Ybdil+RUu;1h void; onUploadEnd?: () => void; enableMentions?: boolean; - chatGPT?: boolean; - vertexAI?: boolean; submitMentionsRef?: MutableRefObject<(fragmentId?: FragmentId) => string>; style?: CSSProperties; onShiftEnter?: (value: string) => void; // also ctrl/alt/cmd-enter call this; see https://github.com/sagemathinc/cocalc/issues/1914 @@ -129,8 +128,6 @@ export function MarkdownInput(props: Props) { onUploadStart, onUploadEnd, enableMentions, - chatGPT, - vertexAI, submitMentionsRef, style, onChange, @@ -200,6 +197,8 @@ export function MarkdownInput(props: Props) { from: { line: number; ch: number }; }>(); + const mentionableUsers = useMentionableUsers(); + const focus = useCallback(() => { if (isFocusedRef.current) return; // already focused const ed = cm.current; @@ -781,7 +780,7 @@ export function MarkdownInput(props: Props) { if (project_id == null) { throw Error("project_id and path must be set if enableMentions is set."); } - const v = mentionableUsers(project_id, undefined, chatGPT, vertexAI); + const v = mentionableUsers(undefined); if (v.length == 0) { // nobody to mention (e.g., admin doesn't have this) return; diff --git a/src/packages/frontend/editors/markdown-input/mentionable-users.tsx b/src/packages/frontend/editors/markdown-input/mentionable-users.tsx index badbffa148..671f888b07 100644 --- a/src/packages/frontend/editors/markdown-input/mentionable-users.tsx +++ b/src/packages/frontend/editors/markdown-input/mentionable-users.tsx @@ -3,61 +3,104 @@ * License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details */ +import { isEmpty } from "lodash"; + import { Avatar } from "@cocalc/frontend/account/avatar/avatar"; -import { redux } from "@cocalc/frontend/app-framework"; +import { redux, useMemo, useTypedRedux } from "@cocalc/frontend/app-framework"; import GoogleGeminiLogo from "@cocalc/frontend/components/google-gemini-avatar"; import GooglePalmLogo from "@cocalc/frontend/components/google-palm-avatar"; +import OllamaAvatar from "@cocalc/frontend/components/ollama-avatar"; import OpenAIAvatar from "@cocalc/frontend/components/openai-avatar"; +import { useProjectContext } from "@cocalc/frontend/project/context"; import { LLM_USERNAMES, USER_SELECTABLE_LANGUAGE_MODELS, model2service, + toOllamaModel, } from "@cocalc/util/db-schema/llm"; import { cmp, timestamp_cmp, trunc_middle } from "@cocalc/util/misc"; +import { OllamaPublic } from "@cocalc/util/types/llm"; import { Item } from "./complete"; -export function mentionableUsers( - project_id: string, - search: string | undefined, - chatGPT: boolean | undefined, - vertexAI: boolean | undefined, -): Item[] { +export function useMentionableUsers(): (search: string | undefined) => Item[] { + const { project_id, enabledLLMs } = useProjectContext(); + + const ollama = useTypedRedux("customize", "ollama"); + + return useMemo(() => { + return (search: string | undefined) => { + return mentionableUsers({ + search, + project_id, + enabledLLMs, + ollama: ollama?.toJS() ?? {}, + }); + }; + }, [project_id, JSON.stringify(enabledLLMs), ollama]); +} + +interface Props { + search: string | undefined; + project_id: string; + ollama: { [key: string]: OllamaPublic }; + enabledLLMs: { + openai: boolean; + google: boolean; + ollama: boolean; + }; +} + +function mentionableUsers({ + search, + project_id, + enabledLLMs, + ollama, +}: Props): Item[] { const users = redux .getStore("projects") .getIn(["project_map", project_id, "users"]); + const last_active = redux .getStore("projects") .getIn(["project_map", project_id, "last_active"]); + if (users == null || last_active == null) return []; // e.g., for an admin + const my_account_id = redux.getStore("account").get("account_id"); - const project_users: { - account_id: string; - last_active: Date | undefined; - }[] = []; - for (const [account_id] of users) { - project_users.push({ - account_id, - last_active: last_active.get(account_id), + + function getProjectUsers() { + const project_users: { + account_id: string; + last_active: Date | undefined; + }[] = []; + for (const [account_id] of users) { + project_users.push({ + account_id, + last_active: last_active.get(account_id), + }); + } + project_users.sort((a, b) => { + // always push self to bottom... + if (a.account_id == my_account_id) { + return 1; + } + if (b.account_id == my_account_id) { + return -1; + } + if (a == null || b == null) return cmp(a.account_id, b.account_id); + if (a == null && b != null) return 1; + if (a != null && b == null) return -1; + return timestamp_cmp(a, b, "last_active"); }); + return project_users; } - project_users.sort((a, b) => { - // always push self to bottom... - if (a.account_id == my_account_id) { - return 1; - } - if (b.account_id == my_account_id) { - return -1; - } - if (a == null || b == null) return cmp(a.account_id, b.account_id); - if (a == null && b != null) return 1; - if (a != null && b == null) return -1; - return timestamp_cmp(a, b, "last_active"); - }); + + const project_users = getProjectUsers(); const users_store = redux.getStore("users"); const v: Item[] = []; - if (chatGPT) { + if (enabledLLMs.openai) { if (USER_SELECTABLE_LANGUAGE_MODELS.includes("gpt-3.5-turbo")) { if (!search || "chatgpt3".includes(search)) { v.push({ @@ -103,8 +146,9 @@ export function mentionableUsers( } } - if (vertexAI) { + if (enabledLLMs.google) { if (USER_SELECTABLE_LANGUAGE_MODELS.includes("chat-bison-001")) { + // ATTN: palm is no longer supported, but have to keep this to avoid breaking old chats. if (!search || "palm".includes(search)) { v.push({ value: model2service("chat-bison-001"), @@ -133,6 +177,23 @@ export function mentionableUsers( } } + if (enabledLLMs.ollama && !isEmpty(ollama)) { + for (const [key, conf] of Object.entries(ollama)) { + if (!search || key.includes(search) || conf.display.includes(search)) { + const value = toOllamaModel(key); + v.push({ + value, + label: ( + + {conf.display} + + ), + search: value, + }); + } + } + } + for (const { account_id } of project_users) { const fullname = users_store.get_name(account_id) ?? ""; const s = fullname.toLowerCase(); diff --git a/src/packages/frontend/editors/markdown-input/multimode.tsx b/src/packages/frontend/editors/markdown-input/multimode.tsx index 029d53b33a..5aa50fe1f6 100644 --- a/src/packages/frontend/editors/markdown-input/multimode.tsx +++ b/src/packages/frontend/editors/markdown-input/multimode.tsx @@ -79,8 +79,6 @@ interface Props { modeSwitchStyle?: CSSProperties; autoFocus?: boolean; // note - this is broken on safari for the slate editor, but works on chrome and firefox. enableMentions?: boolean; - chatGPT?: boolean; // if true, add @chatgpt as an option for @mentions. - vertexAI?: boolean; // if true, add @palm as an option for @mentions. enableUpload?: boolean; // whether to enable upload of files via drag-n-drop or paste. This is on by default! (Note: not possible to disable for slate editor mode anyways.) onUploadStart?: () => void; onUploadEnd?: () => void; @@ -152,8 +150,6 @@ export default function MultiMarkdownInput(props: Props) { style, autoFocus, enableMentions, - chatGPT, - vertexAI, enableUpload = true, onUploadStart, onUploadEnd, @@ -391,8 +387,6 @@ export default function MultiMarkdownInput(props: Props) { onUploadStart={onUploadStart} onUploadEnd={onUploadEnd} enableMentions={enableMentions} - chatGPT={chatGPT} - vertexAI={vertexAI} onShiftEnter={onShiftEnter} placeholder={placeholder ?? "Type markdown..."} fontSize={fontSize} @@ -503,8 +497,6 @@ export default function MultiMarkdownInput(props: Props) { unregisterEditor={unregisterEditor} placeholder={placeholder ?? "Type text..."} submitMentionsRef={submitMentionsRef} - chatGPT={chatGPT} - vertexAI={vertexAI} editBar2={editBar2} dirtyRef={dirtyRef} /> diff --git a/src/packages/frontend/editors/slate/editable-markdown.tsx b/src/packages/frontend/editors/slate/editable-markdown.tsx index fc00b9a584..e4a1842d56 100644 --- a/src/packages/frontend/editors/slate/editable-markdown.tsx +++ b/src/packages/frontend/editors/slate/editable-markdown.tsx @@ -23,7 +23,7 @@ import { useRef, useState, } from "@cocalc/frontend/app-framework"; -import { mentionableUsers } from "@cocalc/frontend/editors/markdown-input/mentionable-users"; +import { useMentionableUsers } from "@cocalc/frontend/editors/markdown-input/mentionable-users"; import { submit_mentions } from "@cocalc/frontend/editors/markdown-input/mentions"; import { EditorFunctions } from "@cocalc/frontend/editors/markdown-input/multimode"; import { SAVE_DEBOUNCE_MS } from "@cocalc/frontend/frame-editors/code-editor/const"; @@ -32,7 +32,7 @@ import { Path } from "@cocalc/frontend/frame-editors/frame-tree/path"; import { EditorState } from "@cocalc/frontend/frame-editors/frame-tree/types"; import { markdown_to_html } from "@cocalc/frontend/markdown"; import Fragment, { FragmentId } from "@cocalc/frontend/misc/fragment-id"; -import { createEditor, Descendant, Editor, Range, Transforms } from "slate"; +import { Descendant, Editor, Range, Transforms, createEditor } from "slate"; import { resetSelection } from "./control"; import { useBroadcastCursors, useCursorDecorate } from "./cursors"; import { EditBar, useLinkURL, useListProperties, useMarks } from "./edit-bar"; @@ -119,10 +119,8 @@ interface Props { unregisterEditor?: () => void; getValueRef?: MutableRefObject<() => string>; // see comment in src/packages/frontend/editors/markdown-input/multimode.tsx submitMentionsRef?: MutableRefObject<(fragmentId?: FragmentId) => string>; // when called this will submit all mentions in the document, and also returns current value of the document (for compat with markdown editor). If not set, mentions are submitted when you create them. This prop is used mainly for implementing chat, which has a clear "time of submission". - chatGPT?: boolean; editBar2?: MutableRefObject; dirtyRef?: MutableRefObject; - vertexAI?: boolean; minimal?: boolean; } @@ -161,8 +159,6 @@ export const EditableMarkdown: React.FC = React.memo((props: Props) => { submitMentionsRef, editBar2, dirtyRef, - chatGPT, - vertexAI, minimal, } = props; const { project_id, path, desc } = useFrameContext(); @@ -336,6 +332,8 @@ export const EditableMarkdown: React.FC = React.memo((props: Props) => { return estimateSize({ node, fontSize: font_size }); }, []); + const mentionableUsers = useMentionableUsers(); + const mentions = useMentions({ editor, insertMention: (editor, account_id) => { @@ -348,8 +346,7 @@ export const EditableMarkdown: React.FC = React.memo((props: Props) => { submit_mentions(project_id, path, [{ account_id, description: "" }]); } }, - matchingUsers: (search) => - mentionableUsers(project_id, search, chatGPT, vertexAI), + matchingUsers: (search) => mentionableUsers(search), }); const emojis = useEmojis({ diff --git a/src/packages/frontend/frame-editors/llm/model-switch.tsx b/src/packages/frontend/frame-editors/llm/model-switch.tsx index 046bf90aff..85d9b804e9 100644 --- a/src/packages/frontend/frame-editors/llm/model-switch.tsx +++ b/src/packages/frontend/frame-editors/llm/model-switch.tsx @@ -111,10 +111,10 @@ export default function ModelSwitch({ if (!showOllama || !ollama) return null; return Object.entries(ollama.toJS()).map(([key, config]) => { - const title = config.display ?? `Ollama: ${key}`; + const { display } = config; return ( - - {title} + + {display} ); }); diff --git a/src/packages/frontend/project/context.tsx b/src/packages/frontend/project/context.tsx index d79c1f0817..2afed6d081 100644 --- a/src/packages/frontend/project/context.tsx +++ b/src/packages/frontend/project/context.tsx @@ -7,11 +7,16 @@ import { Context, createContext, useContext, useMemo, useState } from "react"; import { ProjectActions, + redux, useActions, useTypedRedux, } from "@cocalc/frontend/app-framework"; import { UserGroup } from "@cocalc/frontend/projects/store"; import { ProjectStatus } from "@cocalc/frontend/todo-types"; +import { + KUCALC_COCALC_COM, + KUCALC_DISABLED, +} from "@cocalc/util/db-schema/site-defaults"; import { useProject } from "./page/common"; import { init as INIT_PROJECT_STATE, @@ -20,10 +25,6 @@ import { import { useProjectStatus } from "./page/project-status-hook"; import { useProjectHasInternetAccess } from "./settings/has-internet-access-hook"; import { Project } from "./settings/types"; -import { - KUCALC_COCALC_COM, - KUCALC_DISABLED, -} from "@cocalc/util/db-schema/site-defaults"; export interface ProjectContextState { actions?: ProjectActions; @@ -38,6 +39,11 @@ export interface ProjectContextState { flipTabs: [number, React.Dispatch>]; onCoCalcCom: boolean; onCoCalcDocker: boolean; + enabledLLMs: { + openai: boolean; + google: boolean; + ollama: boolean; + }; } export const ProjectContext: Context = @@ -54,6 +60,11 @@ export const ProjectContext: Context = flipTabs: [0, () => {}], onCoCalcCom: true, onCoCalcDocker: false, + enabledLLMs: { + openai: false, + google: false, + ollama: false, + }, }); export function useProjectContext() { @@ -90,6 +101,15 @@ export function useProjectContextProvider( const onCoCalcCom = kucalc === KUCALC_COCALC_COM; const onCoCalcDocker = kucalc === KUCALC_DISABLED; + const haveOpenAI = useTypedRedux("customize", "openai_enabled"); + const haveGoogle = useTypedRedux("customize", "google_vertexai_enabled"); + const haveOllama = useTypedRedux("customize", "ollama_enabled"); + + const enabledLLMs = useMemo(() => { + const projectsStore = redux.getStore("projects"); + return projectsStore.whichLLMareEnabled(project_id); + }, [haveOpenAI, haveGoogle, haveOllama]); + return { actions, active_project_tab, @@ -103,5 +123,6 @@ export function useProjectContextProvider( flipTabs, onCoCalcCom, onCoCalcDocker, + enabledLLMs, }; } diff --git a/src/packages/frontend/projects/store.ts b/src/packages/frontend/projects/store.ts index 29c1adfc29..4a6489743f 100644 --- a/src/packages/frontend/projects/store.ts +++ b/src/packages/frontend/projects/store.ts @@ -734,7 +734,8 @@ export class ProjectsStore extends Store { openAICache.clear(); } - public llmEnabledSummary(project_id: string = "global", tag?: string) { + // ATTN: the useLanguageModelSetting hook computes this dynamically, with dependencies + public whichLLMareEnabled(project_id: string = "global", tag?: string) { const haveOpenAI = this.hasLanguageModelEnabled(project_id, tag, "openai"); const haveGoogle = this.hasLanguageModelEnabled(project_id, tag, "google"); const haveOllama = this.hasLanguageModelEnabled(project_id, tag, "ollama"); diff --git a/src/packages/frontend/sagews/chatgpt.ts b/src/packages/frontend/sagews/chatgpt.ts index 33b3a72e99..cf60e1f5f8 100644 --- a/src/packages/frontend/sagews/chatgpt.ts +++ b/src/packages/frontend/sagews/chatgpt.ts @@ -27,7 +27,7 @@ export function helpMeFix({ const other_settings = redux.getStore("account").get("other_settings"); const projectsStore = redux.getStore("projects"); - const enabled = projectsStore.llmEnabledSummary(); + const enabled = projectsStore.whichLLMareEnabled(); const ollama = redux.getStore("customize").get("ollama")?.toJS() ?? {}; const model = getValidLanguageModelName( diff --git a/src/packages/hub/webapp-configuration.ts b/src/packages/hub/webapp-configuration.ts index 94ad822742..8dd725d646 100644 --- a/src/packages/hub/webapp-configuration.ts +++ b/src/packages/hub/webapp-configuration.ts @@ -181,14 +181,13 @@ export class WebappConfiguration { const ollama = this.data.all.ollama_configuration; if (isEmpty(ollama)) return {}; - const public_ollama = {}; + const public_ollama: { [key: string]: OllamaPublic } = {}; for (const key in ollama) { const conf = ollama[key]; const cocalc = conf.cocalc ?? {}; if (cocalc.disabled) continue; const model = conf.model ?? key; public_ollama[key] = { - key, model, display: cocalc.display ?? `Ollama ${model}`, icon: cocalc.icon, // fallback is the Ollama icon, frontend does that diff --git a/src/packages/util/db-schema/llm.ts b/src/packages/util/db-schema/llm.ts index 38e46f7c60..c0c41cc21e 100644 --- a/src/packages/util/db-schema/llm.ts +++ b/src/packages/util/db-schema/llm.ts @@ -138,7 +138,7 @@ export const DEFAULT_MODEL: LanguageModel = "gpt-3.5-turbo"; export function model2vendor(model: LanguageModel | string): Vendor { if (model.startsWith("gpt-")) { return "openai"; - } else if (model.startsWith("ollama-")) { + } else if (isOllamaLLM(model)) { return "ollama"; } else { return "google"; diff --git a/src/packages/util/types/llm.ts b/src/packages/util/types/llm.ts index 70cce04b8c..f9395d166d 100644 --- a/src/packages/util/types/llm.ts +++ b/src/packages/util/types/llm.ts @@ -32,8 +32,7 @@ export interface ChatOptions { } export interface OllamaPublic { - key: string; // the key in the dict model: string; display: string; - icon: string; + icon?: string; // fallback to OllamaAvatar } From 35abc065610617b47bc2ad69339a0f806bf83bc6 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Tue, 27 Feb 2024 16:29:16 +0100 Subject: [PATCH 06/32] llm: make model selection a dropdown, show more of the explanations, add ollama description, and fix bug with ai-formula (no project context) --- .../account/useLanguageModelSetting.tsx | 14 +- .../codemirror/extensions/ai-formula.tsx | 39 ++--- .../frontend/components/ai-avatar.tsx | 2 +- src/packages/frontend/components/llm-name.tsx | 34 ++++ .../frame-editors/llm/help-me-fix.tsx | 5 +- .../frame-editors/llm/model-switch.tsx | 160 +++++++++++------- .../frame-editors/llm/title-bar-button.tsx | 8 +- .../frontend/jupyter/chatgpt/explain.tsx | 6 +- .../jupyter/insert-cell/ai-cell-generator.tsx | 10 +- src/packages/frontend/project/context.tsx | 11 +- .../page/home-page/ai-generate-jupyter.tsx | 4 +- src/packages/hub/webapp-configuration.ts | 1 + .../util/db-schema/site-settings-extras.ts | 18 +- src/packages/util/types/llm.ts | 3 +- 14 files changed, 200 insertions(+), 115 deletions(-) create mode 100644 src/packages/frontend/components/llm-name.tsx diff --git a/src/packages/frontend/account/useLanguageModelSetting.tsx b/src/packages/frontend/account/useLanguageModelSetting.tsx index e5e74f8254..c96a83b4ad 100644 --- a/src/packages/frontend/account/useLanguageModelSetting.tsx +++ b/src/packages/frontend/account/useLanguageModelSetting.tsx @@ -1,4 +1,5 @@ import { redux, useMemo, useTypedRedux } from "@cocalc/frontend/app-framework"; +import { EnabledLLMs } from "@cocalc/frontend/project/context"; import { LanguageModel, USER_SELECTABLE_LANGUAGE_MODELS, @@ -6,20 +7,17 @@ import { getValidLanguageModelName, isOllamaLLM, } from "@cocalc/util/db-schema/llm"; -import { useProjectContext } from "../project/context"; export const SETTINGS_LANGUAGE_MODEL_KEY = "language_model"; -// ATTN: requires the project context -export function useLanguageModelSetting(): [ - LanguageModel | string, - (llm: LanguageModel | string) => void, -] { +// ATTN: it is tempting to use the `useProjectContext` hook here, but it is not possible +// The "AI Formula" dialog is outside the project context (unfortunately) +export function useLanguageModelSetting( + enabledLLMs: EnabledLLMs, +): [LanguageModel | string, (llm: LanguageModel | string) => void] { const other_settings = useTypedRedux("account", "other_settings"); const ollama = useTypedRedux("customize", "ollama"); - const { enabledLLMs } = useProjectContext(); - const llm = useMemo(() => { return getValidLanguageModelName( other_settings?.get("language_model"), diff --git a/src/packages/frontend/codemirror/extensions/ai-formula.tsx b/src/packages/frontend/codemirror/extensions/ai-formula.tsx index 9f69441d06..a1171d8ef6 100644 --- a/src/packages/frontend/codemirror/extensions/ai-formula.tsx +++ b/src/packages/frontend/codemirror/extensions/ai-formula.tsx @@ -1,12 +1,7 @@ import { Button, Divider, Input, Modal, Space } from "antd"; import { useLanguageModelSetting } from "@cocalc/frontend/account/useLanguageModelSetting"; -import { - redux, - useEffect, - useState, - useTypedRedux, -} from "@cocalc/frontend/app-framework"; +import { redux, useEffect, useState } from "@cocalc/frontend/app-framework"; import { HelpIcon, Markdown, @@ -14,14 +9,13 @@ import { Text, Title, } from "@cocalc/frontend/components"; -import { LanguageModelVendorAvatar } from "@cocalc/frontend/components/language-model-icon"; -import ModelSwitch, { - modelToName, -} from "@cocalc/frontend/frame-editors/llm/model-switch"; +import AIAvatar from "@cocalc/frontend/components/ai-avatar"; +import { LLMModelName } from "@cocalc/frontend/components/llm-name"; +import ModelSwitch from "@cocalc/frontend/frame-editors/llm/model-switch"; import { show_react_modal } from "@cocalc/frontend/misc"; import track from "@cocalc/frontend/user-tracking"; import { webapp_client } from "@cocalc/frontend/webapp-client"; -import { isFreeModel, isLanguageModel } from "@cocalc/util/db-schema/llm"; +import { isFreeModel } from "@cocalc/util/db-schema/llm"; import { unreachable } from "@cocalc/util/misc"; type Mode = "tex" | "md"; @@ -47,12 +41,13 @@ interface Props extends Opts { } function AiGenFormula({ mode, text = "", project_id, cb }: Props) { - const [model, setModel] = useLanguageModelSetting(); + const projectsStore = redux.getStore("projects"); + const enabledLLMs = projectsStore.whichLLMareEnabled(project_id); + const [model, setModel] = useLanguageModelSetting(enabledLLMs); const [input, setInput] = useState(text); const [formula, setFormula] = useState(""); const [generating, setGenerating] = useState(false); const [error, setError] = useState(undefined); - const ollama = useTypedRedux("customize", "ollama"); const enabled = redux .getStore("projects") @@ -140,30 +135,18 @@ function AiGenFormula({ mode, text = "", project_id, cb }: Props) { } }, [text]); - function renderModel2Name(): string { - if (isLanguageModel(model)) { - return modelToName(model); - } - const om = ollama?.get(model); - if (om) { - return om.get("title") ?? `Ollama ${model}`; - } - return model; - } - function renderTitle() { return ( <> - <LanguageModelVendorAvatar model={model} /> Generate LaTeX Formula - using {renderModel2Name()} + <AIAvatar size={24} /> Generate LaTeX Formula using{" "} + <LLMModelName model={model} /> {enabled ? ( <> Select language model:{" "} @@ -213,7 +196,7 @@ function AiGenFormula({ mode, text = "", project_id, cb }: Props) { return ( - Use the selected AI language model to generate a LaTeX formula from a + The selected AI language model will generate a LaTeX formula the description. {help} diff --git a/src/packages/frontend/components/ai-avatar.tsx b/src/packages/frontend/components/ai-avatar.tsx index 9e8dfde346..7c83c17aae 100644 --- a/src/packages/frontend/components/ai-avatar.tsx +++ b/src/packages/frontend/components/ai-avatar.tsx @@ -3,7 +3,7 @@ import { CSSProperties } from "react"; import { CSS } from "@cocalc/frontend/app-framework"; interface Props { - size?; + size: number; // e.g. 16 or 24 backgroundColor?; style?: CSSProperties; innerStyle?: CSSProperties; diff --git a/src/packages/frontend/components/llm-name.tsx b/src/packages/frontend/components/llm-name.tsx new file mode 100644 index 0000000000..d2286530f6 --- /dev/null +++ b/src/packages/frontend/components/llm-name.tsx @@ -0,0 +1,34 @@ +import { useTypedRedux } from "@cocalc/frontend/app-framework"; +import { modelToName } from "@cocalc/frontend/frame-editors/llm/model-switch"; +import { + fromOllamaModel, + isLanguageModel, + isOllamaLLM, +} from "@cocalc/util/db-schema/llm"; +import { LanguageModelVendorAvatar } from "./language-model-icon"; + +export function LLMModelName(props: Readonly<{ model: string }>) { + const { model } = props; + + const ollama = useTypedRedux("customize", "ollama"); + + function renderTitle() { + if (isLanguageModel(model)) { + return modelToName(model); + } + + if (isOllamaLLM(model)) { + const om = ollama?.get(fromOllamaModel(model)); + if (om) { + return om.get("display") ?? `Ollama ${model}`; + } + } + return model; + } + + return ( + <> + {renderTitle()} + + ); +} diff --git a/src/packages/frontend/frame-editors/llm/help-me-fix.tsx b/src/packages/frontend/frame-editors/llm/help-me-fix.tsx index 4d5ed71a59..6ebc23a178 100644 --- a/src/packages/frontend/frame-editors/llm/help-me-fix.tsx +++ b/src/packages/frontend/frame-editors/llm/help-me-fix.tsx @@ -51,7 +51,9 @@ export default function HelpMeFix({ const { redux, project_id, path } = useFrameContext(); const [gettingHelp, setGettingHelp] = useState(false); const [errorGettingHelp, setErrorGettingHelp] = useState(""); - const [model, setModel] = useLanguageModelSetting(); + const projectsStore = redux.getStore("projects"); + const enabledLLMs = projectsStore.whichLLMareEnabled(project_id); + const [model, setModel] = useLanguageModelSetting(enabledLLMs); if ( redux == null || !(redux.getStore("projects") as ProjectsStore).hasLanguageModelEnabled( @@ -69,7 +71,6 @@ export default function HelpMeFix({ <> Get Help from{" "} void; - size?; + size?: SizeType; style?: CSS; project_id: string; } @@ -32,7 +38,7 @@ export default function ModelSwitch({ style, model, setModel, - size, + size = "middle", project_id, }: Props) { // ATTN: you cannot use useProjectContext because this component is used outside a project context @@ -55,87 +61,125 @@ export default function ModelSwitch({ ); const ollama = useTypedRedux("customize", "ollama"); - function renderLLMButton(btnModel: LanguageModel, title: string) { - if (!USER_SELECTABLE_LANGUAGE_MODELS.includes(btnModel)) return; - const prefix = isFreeModel(btnModel) ? "FREE" : "NOT FREE"; - return ( - - - {modelToName(btnModel)} - {btnModel === model - ? !isFreeModel(btnModel) - ? " (not free)" - : " (free)" - : undefined} - - + function getPrice(btnModel): JSX.Element { + return isFreeModel(btnModel) ? ( + free + ) : ( + paid ); } - function renderOpenAI() { - if (!showOpenAI) return null; - return ( + function makeLLMOption( + ret: NonNullable, + btnModel: LanguageModel, + title: string, + ) { + if (!USER_SELECTABLE_LANGUAGE_MODELS.includes(btnModel)) return; + + const display = ( <> - {renderLLMButton( - "gpt-3.5-turbo", - "OpenAI's fastest model, great for most everyday tasks (4k token context)", - )} - {renderLLMButton( - "gpt-3.5-turbo-16k", - `Same as ${modelToName( - "gpt-3.5-turbo", - )} but with much larger context size (16k token context)`, - )} - {renderLLMButton( - "gpt-4", - "OpenAI's most capable model, great for tasks that require creativity and advanced reasoning (8k token context)", - )} + {modelToName(btnModel)} {getPrice(btnModel)} ); + const text = ( + <> + {display}: {title} + + ); + ret.push({ + value: btnModel, + display, + label: ( + + {text} + + ), + }); } - function renderGoogle() { + function appendOpenAI(ret: NonNullable) { + if (!showOpenAI) return null; + + makeLLMOption( + ret, + "gpt-3.5-turbo", + "OpenAI's fastest model, great for most everyday tasks (4k token context)", + ); + makeLLMOption( + ret, + "gpt-3.5-turbo-16k", + `Same as ${modelToName( + "gpt-3.5-turbo", + )} but with much larger context size (16k token context)`, + ); + makeLLMOption( + ret, + "gpt-4", + "OpenAI's most capable model, great for tasks that require creativity and advanced reasoning (8k token context)", + ); + } + + function appendGoogle(ret: NonNullable) { if (!showGoogle) return null; return ( <> - {renderLLMButton( + {makeLLMOption( + ret, GOOGLE_GEMINI, - `Google's Gemini Pro Generative AI model ('${GOOGLE_GEMINI}', 30k token context)`, + `Google's Gemini Pro Generative AI model (30k token context)`, )} ); } - function renderOllama() { + function appendOllama(ret: NonNullable) { if (!showOllama || !ollama) return null; - return Object.entries(ollama.toJS()).map(([key, config]) => { - const { display } = config; - return ( - - {display} - + for (const [key, config] of Object.entries(ollama.toJS())) { + const { display, desc } = config; + const ollamaModel = toOllamaModel(key); + const text = ( + <> + {display} {getPrice(ollamaModel)}: {desc ?? "Ollama"} + ); - }); + ret.push({ + value: ollamaModel, + display: ( + <> + {modelToName(ollamaModel)} {getPrice(ollamaModel)} + + ), + label: ( + + {text} + + ), + }); + } + } + + function getOptions(): SelectProps["options"] { + const ret: NonNullable = []; + appendOpenAI(ret); + appendGoogle(ret); + appendOllama(ret); + return ret; } - // all models selectable here must be in util/db-schema/openai::USER_SELECTABLE_LANGUAGE_MODELS + // all models selectable here must be in util/db-schema/openai::USER_SELECTABLE_LANGUAGE_MODELS + the custom ones from the ollama configuration return ( - { - setModel(value); - }} - > - {renderOpenAI()} - {renderGoogle()} - {renderOllama()} - + onChange={setModel} + style={{ width: 300 }} + optionLabelProp={"display"} + popupMatchSelectWidth={false} + options={getOptions()} + /> ); } diff --git a/src/packages/frontend/frame-editors/llm/title-bar-button.tsx b/src/packages/frontend/frame-editors/llm/title-bar-button.tsx index 638a81d358..5d5af34a04 100644 --- a/src/packages/frontend/frame-editors/llm/title-bar-button.tsx +++ b/src/packages/frontend/frame-editors/llm/title-bar-button.tsx @@ -11,7 +11,9 @@ to do the work. import { Alert, Button, Input, Popover, Radio, Space, Tooltip } from "antd"; import { useEffect, useMemo, useRef, useState } from "react"; + import { useLanguageModelSetting } from "@cocalc/frontend/account/useLanguageModelSetting"; +import { redux } from "@cocalc/frontend/app-framework"; import { Icon, IconName, @@ -156,7 +158,10 @@ export default function LanguageModelTitleBarButton({ const scopeRef = useRef(null); const contextRef = useRef(null); const submitRef = useRef(null); - const [model, setModel] = useLanguageModelSetting(); + + const projectsStore = redux.getStore("projects"); + const enabledLLMs = projectsStore.whichLLMareEnabled(project_id); + const [model, setModel] = useLanguageModelSetting(enabledLLMs); useEffect(() => { if (showDialog) { @@ -275,7 +280,6 @@ export default function LanguageModelTitleBarButton({ Switch model:{" "} diff --git a/src/packages/frontend/jupyter/chatgpt/explain.tsx b/src/packages/frontend/jupyter/chatgpt/explain.tsx index f53f8a6563..5f5f9278d0 100644 --- a/src/packages/frontend/jupyter/chatgpt/explain.tsx +++ b/src/packages/frontend/jupyter/chatgpt/explain.tsx @@ -6,6 +6,7 @@ import { Alert, Button } from "antd"; import { CSSProperties, useState } from "react"; import { useLanguageModelSetting } from "@cocalc/frontend/account/useLanguageModelSetting"; +import { redux } from "@cocalc/frontend/app-framework"; import getChatActions from "@cocalc/frontend/chat/get-actions"; import AIAvatar from "@cocalc/frontend/components/ai-avatar"; import { Icon } from "@cocalc/frontend/components/icon"; @@ -31,7 +32,9 @@ export default function ChatGPTExplain({ actions, id, style }: Props) { const { project_id, path } = useFrameContext(); const [gettingExplanation, setGettingExplanation] = useState(false); const [error, setError] = useState(""); - const [model, setModel] = useLanguageModelSetting(); + const projectsStore = redux.getStore("projects"); + const enabledLLMs = projectsStore.whichLLMareEnabled(project_id); + const [model, setModel] = useLanguageModelSetting(enabledLLMs); if ( actions == null || @@ -49,7 +52,6 @@ export default function ChatGPTExplain({ actions, id, style }: Props) { Get explanation of this code from{" "} (false); const { project_id, path } = useFrameContext(); + + const projectsStore = redux.getStore("projects"); + const enabledLLMs = projectsStore.whichLLMareEnabled(project_id); + const [model, setModel] = useLanguageModelSetting(enabledLLMs); + const [querying, setQuerying] = useState(false); const [prompt, setPrompt] = useState(""); const input = useMemo(() => { if (!showChatGPT) return ""; @@ -70,7 +73,6 @@ export default function AIGenerateCodeCell({ cell using{" "} diff --git a/src/packages/frontend/project/context.tsx b/src/packages/frontend/project/context.tsx index 2afed6d081..727dd6fc37 100644 --- a/src/packages/frontend/project/context.tsx +++ b/src/packages/frontend/project/context.tsx @@ -26,6 +26,11 @@ import { useProjectStatus } from "./page/project-status-hook"; import { useProjectHasInternetAccess } from "./settings/has-internet-access-hook"; import { Project } from "./settings/types"; +export interface EnabledLLMs { + openai: boolean; + google: boolean; + ollama: boolean; +} export interface ProjectContextState { actions?: ProjectActions; active_project_tab?: string; @@ -39,11 +44,7 @@ export interface ProjectContextState { flipTabs: [number, React.Dispatch>]; onCoCalcCom: boolean; onCoCalcDocker: boolean; - enabledLLMs: { - openai: boolean; - google: boolean; - ollama: boolean; - }; + enabledLLMs: EnabledLLMs; } export const ProjectContext: Context = diff --git a/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx b/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx index af1a2bd413..f31733f1e5 100644 --- a/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx +++ b/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx @@ -86,7 +86,9 @@ export default function AIGenerateJupyterNotebook({ onSuccess, project_id, }: Props) { - const [model, setModel] = useLanguageModelSetting(); + const projectsStore = redux.getStore("projects"); + const enabledLLMs = projectsStore.whichLLMareEnabled(project_id); + const [model, setModel] = useLanguageModelSetting(enabledLLMs); const [kernelSpecs, setKernelSpecs] = useState( null, ); diff --git a/src/packages/hub/webapp-configuration.ts b/src/packages/hub/webapp-configuration.ts index 8dd725d646..ba72d40177 100644 --- a/src/packages/hub/webapp-configuration.ts +++ b/src/packages/hub/webapp-configuration.ts @@ -191,6 +191,7 @@ export class WebappConfiguration { model, display: cocalc.display ?? `Ollama ${model}`, icon: cocalc.icon, // fallback is the Ollama icon, frontend does that + desc: cocalc.desc ?? "", }; } return public_ollama; diff --git a/src/packages/util/db-schema/site-settings-extras.ts b/src/packages/util/db-schema/site-settings-extras.ts index 4557aa0a64..7092a46a48 100644 --- a/src/packages/util/db-schema/site-settings-extras.ts +++ b/src/packages/util/db-schema/site-settings-extras.ts @@ -34,6 +34,7 @@ import { } from "./site-defaults"; import { expire_time, is_valid_email_address } from "@cocalc/util/misc"; +import { isEmpty } from "lodash"; export const pii_retention_parse = (retention: string): number | false => { if (retention == "never" || retention == null) return false; @@ -85,7 +86,7 @@ const jupyter_api_enabled = (conf: SiteSettings) => to_bool(conf.jupyter_api_enabled); function ollama_valid(value: string): boolean { - if (!parsableJson(value)) { + if (isEmpty(value) || !parsableJson(value)) { return false; } const obj = from_json(value); @@ -111,6 +112,9 @@ function ollama_valid(value: string): boolean { if (c.display && typeof c.display !== "string") { return false; } + if (c.desc && typeof c.desc !== "string") { + return false; + } if (c.enabled && typeof c.enabled !== "boolean") { return false; } @@ -120,8 +124,13 @@ function ollama_valid(value: string): boolean { } function ollama_display(value: string): string { + const structure = + "Must be {[key : string] : {model: string, baseUrL: string, cocalc?: {display?: string, desc?: string, ...}, ...}"; + if (isEmpty(value)) { + return `Empty. ${structure}`; + } if (!parsableJson(value)) { - return "Ollama JSON not parseable. Must be {[key : string] : {model: string, baseUrL: string, cocalc: {display: string, ...}, ...}"; + return `Ollama JSON not parseable. ${structure}`; } const obj = from_json(value); if (typeof obj !== "object") { @@ -142,11 +151,14 @@ function ollama_display(value: string): string { const c = val.cocalc; if (c != null) { if (typeof c !== "object") { - return `Ollama config ${key} cocalc field must be an object`; + return `Ollama config ${key} cocalc field must be an object: {display?: string, desc?: string, enabled?: boolean, ...}`; } if (c.display && typeof c.display !== "string") { return `Ollama config ${key} cocalc.display field must be a string`; } + if (c.desc && typeof c.desc !== "string") { + return `Ollama config ${key} cocalc.desc field must be a (markdown) string`; + } if (c.enabled && typeof c.enabled !== "boolean") { return `Ollama config ${key} cocalc.enabled field must be a boolean`; } diff --git a/src/packages/util/types/llm.ts b/src/packages/util/types/llm.ts index f9395d166d..9cbd785746 100644 --- a/src/packages/util/types/llm.ts +++ b/src/packages/util/types/llm.ts @@ -33,6 +33,7 @@ export interface ChatOptions { export interface OllamaPublic { model: string; - display: string; + display: string; // name of the model + desc?: string; // additional description icon?: string; // fallback to OllamaAvatar } From a1ddbed97ddc84ce8ad97c50eb8260b1dfd87517 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Wed, 28 Feb 2024 12:49:33 +0100 Subject: [PATCH 07/32] frontend/jupyter: cleanup weird "chatgpt" object, fix codebar offset, box around cell number, etc --- src/packages/frontend/jupyter/cell-input.tsx | 35 ++++++++++--------- src/packages/frontend/jupyter/cell-list.tsx | 8 ++--- src/packages/frontend/jupyter/cell-output.tsx | 24 +++++++++---- src/packages/frontend/jupyter/cell.tsx | 26 ++++++++------ .../frontend/jupyter/codemirror-static.tsx | 1 + .../frontend/jupyter/history-viewer.tsx | 16 +++++---- .../frontend/jupyter/insert-cell/index.tsx | 6 ++-- src/packages/frontend/jupyter/main.tsx | 3 +- .../jupyter/output-messages/message.tsx | 30 ++++++++-------- 9 files changed, 85 insertions(+), 64 deletions(-) diff --git a/src/packages/frontend/jupyter/cell-input.tsx b/src/packages/frontend/jupyter/cell-input.tsx index dcd2ee12e4..0212d91203 100644 --- a/src/packages/frontend/jupyter/cell-input.tsx +++ b/src/packages/frontend/jupyter/cell-input.tsx @@ -6,13 +6,17 @@ /* React component that describes the input of a cell */ +import { Button, Tooltip } from "antd"; +import { delay } from "awaiting"; import { Map } from "immutable"; import { useCallback, useEffect, useRef, useState } from "react"; -import { Button, Tooltip } from "antd"; + import { React, Rendered } from "@cocalc/frontend/app-framework"; import { Icon } from "@cocalc/frontend/components"; import CopyButton from "@cocalc/frontend/components/copy-button"; +import { HiddenXS } from "@cocalc/frontend/components/hidden-visible"; import PasteButton from "@cocalc/frontend/components/paste-button"; +import ComputeServer from "@cocalc/frontend/compute/inline"; import MarkdownInput from "@cocalc/frontend/editors/markdown-input/multimode"; import MostlyStaticMarkdown from "@cocalc/frontend/editors/slate/mostly-static-markdown"; import { SAVE_DEBOUNCE_MS } from "@cocalc/frontend/frame-editors/code-editor/const"; @@ -24,12 +28,10 @@ import { JupyterActions } from "./browser-actions"; import { CellHiddenPart } from "./cell-hidden-part"; import CellTiming from "./cell-output-time"; import { CellToolbar } from "./cell-toolbar"; +import * as chatgpt from "./chatgpt"; import { CodeMirror } from "./codemirror-component"; import { InputPrompt } from "./prompt/input"; import { get_blob_url } from "./server-urls"; -import { delay } from "awaiting"; -import { HiddenXS } from "@cocalc/frontend/components/hidden-visible"; -import ComputeServer from "@cocalc/frontend/compute/inline"; function attachmentTransform( project_id: string | undefined, @@ -76,7 +78,7 @@ export interface CellInputProps { is_scrolling?: boolean; id: string; index: number; - chatgpt?; + showAItools: boolean; computeServerId?: number; } @@ -399,7 +401,7 @@ export const CellInput: React.FC = React.memo( style={{ position: "absolute", right: "2px", - top: "-20px", + top: "-21px", }} className="hidden-xs" > @@ -421,12 +423,9 @@ export const CellInput: React.FC = React.memo( /> )} - {props.chatgpt != null && ( - - )} + {props.showAItools ? ( + + ) : undefined} {/* Should only show formatter button if there is a way to format this code. */} {!props.is_readonly && props.actions != null && ( @@ -469,18 +468,20 @@ export const CellInput: React.FC = React.memo( } /> )} - {input && ( + {input ? (
{props.index + 1}
- )} + ) : undefined} ); diff --git a/src/packages/frontend/jupyter/cell-list.tsx b/src/packages/frontend/jupyter/cell-list.tsx index d6c0e2605c..53ad746f81 100644 --- a/src/packages/frontend/jupyter/cell-list.tsx +++ b/src/packages/frontend/jupyter/cell-list.tsx @@ -97,7 +97,7 @@ interface CellListProps { sel_ids?: immutable.Set; // set of selected cells trust?: boolean; use_windowed_list?: boolean; - chatgpt?; + showAItools: boolean; // if yes, it's a usual cell and we enable AI tools computeServerId?: number; } @@ -125,7 +125,7 @@ export const CellList: React.FC = (props: CellListProps) => { sel_ids, trust, use_windowed_list, - chatgpt, + showAItools, computeServerId, } = props; @@ -426,7 +426,7 @@ export const CellList: React.FC = (props: CellListProps) => { return ( = (props: CellListProps) => { trust={trust} is_scrolling={isScrolling} delayRendering={delayRendering} - chatgpt={chatgpt} + showAItools={showAItools} computeServerId={computeServerId} /> diff --git a/src/packages/frontend/jupyter/cell-output.tsx b/src/packages/frontend/jupyter/cell-output.tsx index f23452f3f8..122e612fe5 100644 --- a/src/packages/frontend/jupyter/cell-output.tsx +++ b/src/packages/frontend/jupyter/cell-output.tsx @@ -29,7 +29,7 @@ interface CellOutputProps { hidePrompt?: boolean; style?: React.CSSProperties; divRef?; - chatgpt?; + showAItools: boolean; } export function CellOutput({ @@ -45,7 +45,7 @@ export function CellOutput({ hidePrompt, divRef, style, - chatgpt, + showAItools, }: CellOutputProps) { const minHeight = complete ? "60vh" : undefined; @@ -88,12 +88,24 @@ export function CellOutput({ directory={directory} name={name} trust={trust} - chatgpt={chatgpt} + showAItools={showAItools} /> ); } +interface OutputColumnProps { + cell: ImmutableMap; + id: string; + actions?: JupyterActions; + more_output?: ImmutableMap; + project_id?: string; + directory?: string; + name?: string; + trust?: boolean; + showAItools: boolean; +} + function OutputColumn({ cell, id, @@ -103,8 +115,8 @@ function OutputColumn({ directory, name, trust, - chatgpt, -}) { + showAItools +}: OutputColumnProps) { if (cell.get("collapsed")) { return ; } @@ -138,7 +150,7 @@ function OutputColumn({ name={name} trust={trust} id={id} - chatgpt={chatgpt} + showAItools={showAItools} /> ); } diff --git a/src/packages/frontend/jupyter/cell.tsx b/src/packages/frontend/jupyter/cell.tsx index 463a7e4a3d..af1d4f38d9 100644 --- a/src/packages/frontend/jupyter/cell.tsx +++ b/src/packages/frontend/jupyter/cell.tsx @@ -4,22 +4,25 @@ */ /* -React component that describes a single cella +React component that describes a single cell */ import { Map } from "immutable"; -import { React, Rendered, useDelayedRender } from "../app-framework"; -import { clear_selection } from "../misc/clear-selection"; + +import { + React, + Rendered, + useDelayedRender, +} from "@cocalc/frontend/app-framework"; +import useNotebookFrameActions from "@cocalc/frontend/frame-editors/jupyter-editor/cell-notebook/hook"; import { COLORS } from "@cocalc/util/theme"; -import { INPUT_PROMPT_COLOR } from "./prompt/base"; import { Icon, Tip } from "../components"; +import { clear_selection } from "../misc/clear-selection"; +import { JupyterActions } from "./browser-actions"; import { CellInput } from "./cell-input"; import { CellOutput } from "./cell-output"; - -import { JupyterActions } from "./browser-actions"; -import useNotebookFrameActions from "@cocalc/frontend/frame-editors/jupyter-editor/cell-notebook/hook"; - import { NBGraderMetadata } from "./nbgrader/cell-metadata"; +import { INPUT_PROMPT_COLOR } from "./prompt/base"; interface Props { cell: Map; // TODO: types @@ -44,7 +47,7 @@ interface Props { is_scrolling?: boolean; height?: number; // optional fixed height delayRendering?: number; - chatgpt?; + showAItools: boolean; computeServerId?: number; } @@ -67,6 +70,7 @@ function areEqual(props: Props, nextProps: Props): boolean { nextProps.is_scrolling !== props.is_scrolling || nextProps.height !== props.height || nextProps.computeServerId !== props.computeServerId || + nextProps.showAItools !== props.showAItools || (nextProps.complete !== props.complete && // only worry about complete when editing this cell (nextProps.is_current || props.is_current)) ); @@ -112,7 +116,7 @@ export const Cell: React.FC = React.memo((props) => { trust={props.trust} is_readonly={!is_editable()} is_scrolling={props.is_scrolling} - chatgpt={props.chatgpt} + showAItools={props.showAItools} computeServerId={props.computeServerId} /> ); @@ -136,7 +140,7 @@ export const Cell: React.FC = React.memo((props) => { more_output={props.more_output} trust={props.trust} complete={props.is_current && props.complete != null} - chatgpt={props.chatgpt} + showAItools={props.showAItools} /> ); } diff --git a/src/packages/frontend/jupyter/codemirror-static.tsx b/src/packages/frontend/jupyter/codemirror-static.tsx index 19c5abeab5..7b8925fa1e 100644 --- a/src/packages/frontend/jupyter/codemirror-static.tsx +++ b/src/packages/frontend/jupyter/codemirror-static.tsx @@ -21,6 +21,7 @@ nextjs friendly code editor to make it editable. (This is NOT codemirror.) */ import React, { ReactNode } from "react"; + import CodeMirror from "@cocalc/frontend/codemirror/static"; import CodeEditor from "@cocalc/frontend/components/code-editor"; diff --git a/src/packages/frontend/jupyter/history-viewer.tsx b/src/packages/frontend/jupyter/history-viewer.tsx index a418384604..def11b2ab8 100644 --- a/src/packages/frontend/jupyter/history-viewer.tsx +++ b/src/packages/frontend/jupyter/history-viewer.tsx @@ -8,25 +8,26 @@ History viewer for Jupyter notebooks */ import { fromJS, List, Map } from "immutable"; + +import { Redux, useTypedRedux } from "@cocalc/frontend/app-framework"; +import { ErrorDisplay } from "@cocalc/frontend/components"; +import * as cell_utils from "@cocalc/jupyter/util/cell-utils"; import { SyncDB } from "@cocalc/sync/editor/db/sync"; -import { Redux, useTypedRedux } from "../app-framework"; -import { createRoot } from "react-dom/client"; import { path_split } from "@cocalc/util/misc"; -import * as cell_utils from "@cocalc/jupyter/util/cell-utils"; +import { createRoot } from "react-dom/client"; import { CellList } from "./cell-list"; import { cm_options } from "./cm_options"; -import { ErrorDisplay } from "../components"; import { ERROR_STYLE } from "./main"; function get_cells( syncdb: SyncDB, - version?: Date + version?: Date, ): { cells: Map; cell_list: List } { let cells = Map(); const othercells = syncdb.version(version).get({ type: "cell" }); if (othercells != null) { othercells.forEach( - (cell: any) => (cells = cells.set(cell.get("id"), cell)) + (cell: any) => (cells = cells.set(cell.get("id"), cell)), ); } const cell_list = cell_utils.sorted_cell_list(cells); @@ -84,6 +85,7 @@ export const HistoryViewer: React.FC = ({ project_id={project_id} directory={directory} trust={false} + showAItools={false} /> ); @@ -115,7 +117,7 @@ export function jupyter_history_viewer_jquery_shim(syncdb: SyncDB) { root.render( - + , ); }, to_str(version) { diff --git a/src/packages/frontend/jupyter/insert-cell/index.tsx b/src/packages/frontend/jupyter/insert-cell/index.tsx index ea50875089..fcc0506720 100644 --- a/src/packages/frontend/jupyter/insert-cell/index.tsx +++ b/src/packages/frontend/jupyter/insert-cell/index.tsx @@ -34,7 +34,7 @@ export interface InsertCellProps { actions: JupyterActions; id: string; position: "above" | "below"; - chatgpt?; + showAItools: boolean; } export interface InsertCellState { @@ -43,13 +43,13 @@ export interface InsertCellState { export function InsertCell({ position, - chatgpt, + showAItools, actions, id, }: InsertCellProps) { const { project_id } = useFrameContext(); const haveChatGTP = - chatgpt && + showAItools && redux .getStore("projects") .hasLanguageModelEnabled(project_id, "generate-cell"); diff --git a/src/packages/frontend/jupyter/main.tsx b/src/packages/frontend/jupyter/main.tsx index 32c312c2df..0c01f69aa2 100644 --- a/src/packages/frontend/jupyter/main.tsx +++ b/src/packages/frontend/jupyter/main.tsx @@ -43,7 +43,6 @@ import { KernelSelector } from "./select-kernel"; import { Kernel } from "./status"; import { NotebookMode, Scroll } from "@cocalc/jupyter/types"; import { Kernels as KernelsType } from "@cocalc/jupyter/util/misc"; -import * as chatgpt from "./chatgpt"; import KernelWarning from "./kernel-warning"; import { ComputeServerDocStatus } from "@cocalc/frontend/compute/doc-status"; @@ -286,7 +285,7 @@ export const JupyterEditor: React.FC = React.memo((props: Props) => { sel_ids={sel_ids} trust={trust} use_windowed_list={useWindowedListRef.current} - chatgpt={chatgpt} + showAItools={true} computeServerId={computeServerId} /> ); diff --git a/src/packages/frontend/jupyter/output-messages/message.tsx b/src/packages/frontend/jupyter/output-messages/message.tsx index 1fa3fb134d..3707d727cd 100644 --- a/src/packages/frontend/jupyter/output-messages/message.tsx +++ b/src/packages/frontend/jupyter/output-messages/message.tsx @@ -7,19 +7,21 @@ Handling of output messages. */ -import React from "react"; +import Anser from "anser"; import type { Map } from "immutable"; +import React from "react"; + import type { JupyterActions } from "@cocalc/jupyter/redux/actions"; -import { OUTPUT_STYLE, OUTPUT_STYLE_SCROLLED } from "./style"; -import { Stdout } from "./stdout"; -import { Stderr } from "./stderr"; -import { MoreOutput } from "./more-output"; +import * as chatgpt from "../chatgpt"; import { Input } from "./input"; import { InputDone } from "./input-done"; import { Data } from "./mime-types/data"; -import { Traceback } from "./traceback"; +import { MoreOutput } from "./more-output"; import { NotImplemented } from "./not-implemented"; -import Anser from "anser"; +import { Stderr } from "./stderr"; +import { Stdout } from "./stdout"; +import { OUTPUT_STYLE, OUTPUT_STYLE_SCROLLED } from "./style"; +import { Traceback } from "./traceback"; function messageComponent(message: Map): any { if (message.get("more_output") != null) { @@ -71,7 +73,7 @@ export const CellOutputMessage: React.FC = React.memo( id={props.id} /> ); - } + }, ); interface CellOutputMessagesProps { @@ -83,7 +85,7 @@ interface CellOutputMessagesProps { scrolled?: boolean; trust?: boolean; id?: string; - chatgpt?; + showAItools: boolean; } function shouldMemoize(prev, next) { @@ -104,11 +106,11 @@ export const CellOutputMessages: React.FC = React.memo( scrolled, trust, id, - chatgpt, + showAItools, }: CellOutputMessagesProps) => { const obj: Map[] = React.useMemo( () => messageList(output), - [output] + [output], ); const v: JSX.Element[] = []; @@ -139,12 +141,12 @@ export const CellOutputMessages: React.FC = React.memo( name={name} trust={trust} id={id} - /> + />, ); } } const help = - hasError && id && actions && chatgpt ? ( + hasError && id && actions && showAItools ? ( = React.memo( ); }, - shouldMemoize + shouldMemoize, ); function numericallyOrderedKeys(obj: object): number[] { From eef5a852e05bd67997d01de5f929d796de60d17d Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Fri, 8 Mar 2024 20:12:47 +0100 Subject: [PATCH 08/32] llm: get rid of silly "string" fallback typing --- .../frontend/account/other-settings.tsx | 1 + .../account/useLanguageModelSetting.tsx | 8 +- src/packages/frontend/client/openai.ts | 10 +- .../components/language-model-icon.tsx | 3 +- src/packages/frontend/components/llm-name.tsx | 3 +- .../frontend/components/selector-input.tsx | 4 +- .../frontend/frame-editors/llm/create-chat.ts | 2 +- .../frame-editors/llm/model-switch.tsx | 13 ++- .../frame-editors/llm/title-bar-button.tsx | 2 +- .../frontend/jupyter/chatgpt/explain.tsx | 2 +- .../jupyter/insert-cell/ai-cell-generator.tsx | 4 +- src/packages/frontend/misc/openai.ts | 6 +- src/packages/jupyter/types/types.ts | 4 +- src/packages/server/llm/client.ts | 2 +- src/packages/util/db-schema/llm.test.ts | 13 ++- src/packages/util/db-schema/llm.ts | 93 +++++++++++++------ 16 files changed, 109 insertions(+), 61 deletions(-) diff --git a/src/packages/frontend/account/other-settings.tsx b/src/packages/frontend/account/other-settings.tsx index 220cc1d5ae..f31f233210 100644 --- a/src/packages/frontend/account/other-settings.tsx +++ b/src/packages/frontend/account/other-settings.tsx @@ -390,6 +390,7 @@ export class OtherSettings extends Component { const options: { value: string; display: JSX.Element }[] = []; for (const key of USER_SELECTABLE_LANGUAGE_MODELS) { + if (typeof key !== "string") continue const vendor = model2vendor(key); if (vendor === "google" && !enabled.google) continue; if (vendor === "openai" && !enabled.openai) continue; diff --git a/src/packages/frontend/account/useLanguageModelSetting.tsx b/src/packages/frontend/account/useLanguageModelSetting.tsx index 2029028659..01a7f78daa 100644 --- a/src/packages/frontend/account/useLanguageModelSetting.tsx +++ b/src/packages/frontend/account/useLanguageModelSetting.tsx @@ -1,7 +1,7 @@ import { redux, useMemo, useTypedRedux } from "@cocalc/frontend/app-framework"; import { EnabledLLMs } from "@cocalc/frontend/project/context"; import { - LanguageModel, + LanguageService, USER_SELECTABLE_LANGUAGE_MODELS, fromOllamaModel, getValidLanguageModelName, @@ -14,7 +14,7 @@ export const SETTINGS_LANGUAGE_MODEL_KEY = "language_model"; // The "AI Formula" dialog is outside the project context (unfortunately) export function useLanguageModelSetting( project_id?: string, -): [LanguageModel | string, (llm: LanguageModel | string) => void] { +): [LanguageService, (llm: LanguageService) => void] { const other_settings = useTypedRedux("account", "other_settings"); const ollama = useTypedRedux("customize", "ollama"); @@ -27,7 +27,7 @@ export function useLanguageModelSetting( return projectsStore.whichLLMareEnabled(project_id); }, [haveOpenAI, haveGoogle, haveOllama]); - const llm = useMemo(() => { + const llm: LanguageService = useMemo(() => { return getValidLanguageModelName( other_settings?.get("language_model"), enabledLLMs, @@ -35,7 +35,7 @@ export function useLanguageModelSetting( ); }, [other_settings]); - function setLLM(llm: LanguageModel | string) { + function setLLM(llm: LanguageService) { if (USER_SELECTABLE_LANGUAGE_MODELS.includes(llm as any)) { redux .getActions("account") diff --git a/src/packages/frontend/client/openai.ts b/src/packages/frontend/client/openai.ts index 70b70bd935..23e9528347 100644 --- a/src/packages/frontend/client/openai.ts +++ b/src/packages/frontend/client/openai.ts @@ -8,6 +8,11 @@ import { EventEmitter } from "events"; import { redux } from "@cocalc/frontend/app-framework"; import type { History } from "@cocalc/frontend/misc/openai"; // do not import until needed -- it is HUGE! +import { + LanguageModel, + isFreeModel, + model2service, +} from "@cocalc/util/db-schema/llm"; import type { EmbeddingData } from "@cocalc/util/db-schema/openai"; import { MAX_EMBEDDINGS_TOKENS, @@ -15,10 +20,9 @@ import { MAX_SAVE_LIMIT, MAX_SEARCH_LIMIT, } from "@cocalc/util/db-schema/openai"; +import { LLMService } from "@cocalc/util/db-schema/purchases"; import * as message from "@cocalc/util/message"; import type { WebappClient } from "./client"; -import { LanguageModel, LanguageService } from "@cocalc/util/db-schema/llm"; -import { isFreeModel, model2service } from "@cocalc/util/db-schema/llm"; const DEFAULT_SYSTEM_PROMPT = "ASSUME THAT I HAVE FULL ACCESS TO COCALC AND I AM USING COCALC RIGHT NOW. ENCLOSE ALL MATH IN $. INCLUDE THE LANGUAGE DIRECTLY AFTER THE TRIPLE BACKTICKS IN ALL MARKDOWN CODE BLOCKS. BE BRIEF."; @@ -96,7 +100,7 @@ export class LLMClient { if (!isFreeModel(model)) { // Ollama and others are treated as "free" - const service = model2service(model) as LanguageService; + const service = model2service(model) as LLMService; // when client gets non-free openai model request, check if allowed. If not, show quota modal. const { allowed, reason } = await this.client.purchases_client.isPurchaseAllowed(service); diff --git a/src/packages/frontend/components/language-model-icon.tsx b/src/packages/frontend/components/language-model-icon.tsx index 6bfe97c838..b5e28f6adc 100644 --- a/src/packages/frontend/components/language-model-icon.tsx +++ b/src/packages/frontend/components/language-model-icon.tsx @@ -1,5 +1,6 @@ import { CSS } from "@cocalc/frontend/app-framework"; import { + LanguageModel, isLanguageModel, isOllamaLLM, model2vendor, @@ -13,7 +14,7 @@ import OpenAIAvatar from "./openai-avatar"; export function LanguageModelVendorAvatar( props: Readonly<{ - model?: string; + model?: LanguageModel; size?: number; style?: CSS; }>, diff --git a/src/packages/frontend/components/llm-name.tsx b/src/packages/frontend/components/llm-name.tsx index d2286530f6..6cb9f8464f 100644 --- a/src/packages/frontend/components/llm-name.tsx +++ b/src/packages/frontend/components/llm-name.tsx @@ -1,13 +1,14 @@ import { useTypedRedux } from "@cocalc/frontend/app-framework"; import { modelToName } from "@cocalc/frontend/frame-editors/llm/model-switch"; import { + LanguageModel, fromOllamaModel, isLanguageModel, isOllamaLLM, } from "@cocalc/util/db-schema/llm"; import { LanguageModelVendorAvatar } from "./language-model-icon"; -export function LLMModelName(props: Readonly<{ model: string }>) { +export function LLMModelName(props: Readonly<{ model: LanguageModel }>) { const { model } = props; const ollama = useTypedRedux("customize", "ollama"); diff --git a/src/packages/frontend/components/selector-input.tsx b/src/packages/frontend/components/selector-input.tsx index 5945cc67e5..4dc9bb1a85 100644 --- a/src/packages/frontend/components/selector-input.tsx +++ b/src/packages/frontend/components/selector-input.tsx @@ -3,9 +3,11 @@ * License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details */ +import { Form, Select } from "antd"; import React from "react"; + import * as misc from "@cocalc/util/misc"; -import { Form, Select } from "antd"; + const { Option } = Select; interface Props { diff --git a/src/packages/frontend/frame-editors/llm/create-chat.ts b/src/packages/frontend/frame-editors/llm/create-chat.ts index 8bc5847244..0c3b66d5bd 100644 --- a/src/packages/frontend/frame-editors/llm/create-chat.ts +++ b/src/packages/frontend/frame-editors/llm/create-chat.ts @@ -9,7 +9,7 @@ export interface Options { command: string; allowEmpty?: boolean; tag?: string; - model: LanguageModel | string; + model: LanguageModel; } export default async function createChat({ diff --git a/src/packages/frontend/frame-editors/llm/model-switch.tsx b/src/packages/frontend/frame-editors/llm/model-switch.tsx index 448230446a..74d5e6939a 100644 --- a/src/packages/frontend/frame-editors/llm/model-switch.tsx +++ b/src/packages/frontend/frame-editors/llm/model-switch.tsx @@ -23,8 +23,8 @@ export type { LanguageModel }; type SizeType = ConfigProviderProps["componentSize"]; interface Props { - model: LanguageModel | string; - setModel: (model: LanguageModel | string) => void; + model: LanguageModel; + setModel: (model: LanguageModel) => void; size?: SizeType; style?: CSS; project_id: string; @@ -75,6 +75,7 @@ export default function ModelSwitch({ title: string, ) { if (!USER_SELECTABLE_LANGUAGE_MODELS.includes(btnModel)) return; + if (typeof btnModel !== "string") return; const display = ( <> @@ -183,18 +184,16 @@ export default function ModelSwitch({ ); } -export function modelToName(model: LanguageModel | string): string { +export function modelToName(model: LanguageModel): string { if (isOllamaLLM(model)) { const ollama = redux.getStore("customize").get("ollama")?.toJS() ?? {}; const om = ollama[fromOllamaModel(model)]; - if (om) { - return om.display ?? `Ollama ${model}`; - } + return om ? om.display : `Ollama ${model}`; } return LLM_USERNAMES[model] ?? model; } -export function modelToMention(model: LanguageModel | string): string { +export function modelToMention(model: LanguageModel): string { return `@${modelToName(model)}`; diff --git a/src/packages/frontend/frame-editors/llm/title-bar-button.tsx b/src/packages/frontend/frame-editors/llm/title-bar-button.tsx index 9b4e82fc6b..9f5e78bfff 100644 --- a/src/packages/frontend/frame-editors/llm/title-bar-button.tsx +++ b/src/packages/frontend/frame-editors/llm/title-bar-button.tsx @@ -435,7 +435,7 @@ async function updateInput( actions: Actions, id, scope, - model: LanguageModel | string, + model: LanguageModel, ): Promise<{ input: string; inputOrig: string }> { if (scope == "none") { return { input: "", inputOrig: "" }; diff --git a/src/packages/frontend/jupyter/chatgpt/explain.tsx b/src/packages/frontend/jupyter/chatgpt/explain.tsx index 0037376232..4fe0cd2af8 100644 --- a/src/packages/frontend/jupyter/chatgpt/explain.tsx +++ b/src/packages/frontend/jupyter/chatgpt/explain.tsx @@ -134,7 +134,7 @@ async function getExplanation({ actions: JupyterActions; project_id: string; path: string; - model: LanguageModel | string; + model: LanguageModel; }) { const message = createMessage({ id, actions, model, open: false }); if (!message) { diff --git a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx index 0e86ba36d3..03c2157b2c 100644 --- a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx +++ b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx @@ -158,7 +158,7 @@ interface QueryLanguageModelProps { actions: JupyterActions; frameActions: React.MutableRefObject; id: string; - model: LanguageModel | string; + model: LanguageModel; path: string; position: "above" | "below"; project_id: string; @@ -306,7 +306,7 @@ interface GetInputProps { actions: JupyterActions; frameActions: React.MutableRefObject; id: string; - model: LanguageModel | string; + model: LanguageModel; position: "above" | "below"; prompt: string; } diff --git a/src/packages/frontend/misc/openai.ts b/src/packages/frontend/misc/openai.ts index 07dee27848..e6a5abb23b 100644 --- a/src/packages/frontend/misc/openai.ts +++ b/src/packages/frontend/misc/openai.ts @@ -1,8 +1,8 @@ // NOTE! This gpt-3-tokenizer is LARGE, e.g., 1.6MB, so be // sure to async load it by clients of this code. -import GPT3Tokenizer from "gpt3-tokenizer"; -import type { Model } from "@cocalc/util/db-schema/llm"; +import type { LanguageModel } from "@cocalc/util/db-schema/llm"; import { getMaxTokens } from "@cocalc/util/db-schema/llm"; +import GPT3Tokenizer from "gpt3-tokenizer"; export { getMaxTokens }; @@ -64,7 +64,7 @@ export function truncateMessage(content: string, maxTokens: number): string { export function truncateHistory( history: History, maxTokens: number, - model: Model, + model: LanguageModel, ): History { if (maxTokens <= 0) { return []; diff --git a/src/packages/jupyter/types/types.ts b/src/packages/jupyter/types/types.ts index ba6923f125..c731608b10 100644 --- a/src/packages/jupyter/types/types.ts +++ b/src/packages/jupyter/types/types.ts @@ -78,8 +78,8 @@ export type KernelMetadata = { }; export interface AiTools { - model: LanguageModel | string; - setModel: (llm: LanguageModel | string) => void; + model: LanguageModel ; + setModel: (llm: LanguageModel ) => void; toolComponents: { ChatGPTExplain; ChatGPTError; diff --git a/src/packages/server/llm/client.ts b/src/packages/server/llm/client.ts index 6d6ab77f85..598a96ada9 100644 --- a/src/packages/server/llm/client.ts +++ b/src/packages/server/llm/client.ts @@ -76,7 +76,7 @@ const ollamaCache: { [key: string]: Ollama } = {}; /** * The idea here is: the ollama config contains all available endpoints and their configuration. - * The "model" is the unique key in the ollama_configuration mapping, it was prefixed by "ollama-". + * The "model" is the unique key in the ollama_configuration mapping, it was prefixed by $OLLAMA_PREFIX. * For the actual Ollama client instantitation, we pick the model parameter from the config or just use the unique model name as a fallback. * In particular, this means you can query the same Ollama model with differnet parameters, or even have several ollama servers running. * All other config parameters are passed to the Ollama constructor (e.g. topK, temperature, etc.). diff --git a/src/packages/util/db-schema/llm.test.ts b/src/packages/util/db-schema/llm.test.ts index f77084ceee..e9d16c50fb 100644 --- a/src/packages/util/db-schema/llm.test.ts +++ b/src/packages/util/db-schema/llm.test.ts @@ -1,13 +1,20 @@ // this tests the wrongly named openai.ts file -import { isFreeModel } from "./llm"; +import { isFreeModel, LANGUAGE_MODELS, LLM_COST, OLLAMA_PREFIX } from "./llm"; -describe("openai/llm", () => { +describe("llm", () => { test("isFreeModel", () => { expect(isFreeModel("gpt-3")).toBe(true); expect(isFreeModel("gpt-4")).toBe(false); // WARNING: if the following breaks, and ollama becomes non-free, then a couple of assumptions are broken as well. // search for model2service(...) as LanguageService in the codebase! - expect(isFreeModel("ollama-1")).toBe(true); + expect(isFreeModel(`${OLLAMA_PREFIX}-1`)).toBe(true); + }); + + test("all keys in the LLM_COST object are valid model names", () => { + // ATTN: don't use isValidModel to test! + for (const model in LLM_COST) { + expect(LANGUAGE_MODELS.includes(model as any)).toBe(true); + } }); }); diff --git a/src/packages/util/db-schema/llm.ts b/src/packages/util/db-schema/llm.ts index c0c41cc21e..ca946980ee 100644 --- a/src/packages/util/db-schema/llm.ts +++ b/src/packages/util/db-schema/llm.ts @@ -1,8 +1,9 @@ // this contains bits and pieces from the wrongly named openai.ts file import type { LLMService, Service } from "@cocalc/util/db-schema/purchases"; -import { unreachable } from "../misc"; +import { unreachable } from "@cocalc/util/misc"; +// the hardcoded list of available language models – there are also dynamic ones, like OllamaLLM objects export const LANGUAGE_MODELS = [ "gpt-3.5-turbo", "gpt-3.5-turbo-16k", @@ -28,12 +29,21 @@ export const USER_SELECTABLE_LANGUAGE_MODELS: Readonly = [ "gemini-pro", ] as const; -export type LanguageModel = (typeof LANGUAGE_MODELS)[number]; +export type OllamaLLM = string; -export function isLanguageModel(model?: string): model is LanguageModel { - return LANGUAGE_MODELS.includes(model as LanguageModel); +export type LanguageModel = (typeof LANGUAGE_MODELS)[number] | OllamaLLM; + +// we check if the given object is any known language model +export function isLanguageModel(model?: unknown): model is LanguageModel { + if (model == null) return false; + if (isOllamaLLM(model)) return true; + if (typeof model !== "string") return false; + return LANGUAGE_MODELS.includes(model as any); } +// this is used in initialization functions. e.g. to get a default model depending on the overall availability +// usually, this should just return the chatgpt3 model, but e.g. if neither google or openai is available, +// then it might even falls back to an available ollama model. It needs to return a string, though, for the frontend, etc. export function getValidLanguageModelName( model: string | undefined, filter: { google: boolean; openai: boolean; ollama: boolean } = { @@ -42,7 +52,7 @@ export function getValidLanguageModelName( ollama: false, }, ollama: string[] = [], // keys of ollama models -): LanguageModel | string { +): LanguageModel { const dftl = filter.openai === true ? DEFAULT_MODEL @@ -52,10 +62,10 @@ export function getValidLanguageModelName( if (model == null) { return dftl; } - if (LANGUAGE_MODELS.includes(model as LanguageModel)) { + if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) { return model; } - if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) { + if (typeof model === "string" && isLanguageModel(model)) { return model; } return dftl; @@ -67,6 +77,15 @@ export interface OpenAIMessage { } export type OpenAIMessages = OpenAIMessage[]; +export const OLLAMA_PREFIX = "ollama-"; + +export type OllamaService = string; + +export function isOllamaService(service: string): service is OllamaService { + return isOllamaLLM(service); +} + +// we encode the in the frontend and elsewhere with the service name as a prefix export type LanguageService = | "openai-gpt-3.5-turbo" | "openai-gpt-3.5-turbo-16k" @@ -76,7 +95,8 @@ export type LanguageService = | "google-text-bison-001" | "google-chat-bison-001" | "google-embedding-gecko-001" - | "google-gemini-pro"; + | "google-gemini-pro" + | OllamaService; const LANGUAGE_MODEL_VENDORS = ["openai", "google", "ollama"] as const; export type Vendor = (typeof LANGUAGE_MODEL_VENDORS)[number]; @@ -87,12 +107,14 @@ export const LANGUAGE_MODEL_PREFIXES = [ ...LANGUAGE_MODEL_VENDORS.map((v) => `${v}-`), ] as const; -export function model2service( - model: LanguageModel | string, -): LanguageService | string { +// we encode the in the frontend and elsewhere with the service name as a prefix +export function model2service(model: LanguageModel): LanguageService { if (model === "text-embedding-ada-002") { return `openai-${model}`; } + if (isOllamaLLM(model)) { + return toOllamaModel(model); + } if (isLanguageModel(model)) { if ( model === "text-bison-001" || @@ -105,9 +127,7 @@ export function model2service( return `openai-${model}`; } } - if (isOllamaLLM(model)) { - return toOllamaModel(model); - } + throw new Error(`unknown model: ${model}`); } @@ -119,11 +139,15 @@ export function service2model( if (service === "chatgpt") { return "gpt-3.5-turbo"; } + // split off the first part of service, e.g., "openai-" or "google-" const s = service.split("-")[0]; - const hasPrefix = s === "openai" || s === "google"; + const hasPrefix = s === "openai" || s === "google" || s === "ollama"; const m = hasPrefix ? service.split("-").slice(1).join("-") : service; - if (!LANGUAGE_MODELS.includes(m as LanguageModel)) { + if (hasPrefix && s === "ollama") { + return toOllamaModel(m); + } + if (!LANGUAGE_MODELS.includes(m as any)) { // We don't throw an error, since the frontend would crash // throw new Error(`unknown service: ${service}`); console.warn(`service2model: unknown service: ${service}`); @@ -135,32 +159,40 @@ export function service2model( // Note: this must be an OpenAI model – otherwise change the getValidLanguageModelName function export const DEFAULT_MODEL: LanguageModel = "gpt-3.5-turbo"; -export function model2vendor(model: LanguageModel | string): Vendor { - if (model.startsWith("gpt-")) { - return "openai"; - } else if (isOllamaLLM(model)) { +export function model2vendor(model): Vendor { + if (isOllamaLLM(model)) { return "ollama"; + } else if (model.startsWith("gpt-")) { + return "openai"; } else { return "google"; } } -export function toOllamaModel(model: string) { +// wraps the model name in an object that indicates that it's an ollama model +// TODO: maybe it will be necessary at some point to pass in the list of available ollama models +// TODO: in the future, this object will also contain info like the max tokens and other parameters (from the DB) +export function toOllamaModel(model: string): OllamaLLM { if (isOllamaLLM(model)) { throw new Error(`already an ollama model: ${model}`); } return `ollama-${model}`; } -export function fromOllamaModel(model: string) { +// unwraps the model name from an object that indicates that it's an ollama model +export function fromOllamaModel(model: OllamaLLM) { if (!isOllamaLLM(model)) { throw new Error(`not an ollama model: ${model}`); } - return model.slice("ollama-".length); + return model.slice(OLLAMA_PREFIX.length); } -export function isOllamaLLM(model: string) { - return model.startsWith("ollama-"); +export function isOllamaLLM(model: unknown): model is OllamaLLM { + return ( + typeof model === "string" && + model.startsWith(OLLAMA_PREFIX) && + model.length > OLLAMA_PREFIX.length + ); } const MODELS_OPENAI = [ @@ -180,7 +212,6 @@ export const MODELS = [ ] as const; export type Model = (typeof MODELS)[number]; - export type ModelOpenAI = (typeof MODELS_OPENAI)[number]; // Map from psuedo account_id to what should be displayed to user. @@ -200,9 +231,9 @@ export const LLM_USERNAMES = { "gemini-pro": "Gemini Pro", } as const; -export function isFreeModel(model: string) { +export function isFreeModel(model: unknown) { if (isOllamaLLM(model)) return true; - if (LANGUAGE_MODELS.includes(model as LanguageModel)) { + if (LANGUAGE_MODELS.includes(model as any)) { // of these models, the following are free return ( (model as Model) == "gpt-3.5-turbo" || @@ -258,7 +289,7 @@ interface Cost { // Our cost is a configurable multiple of this. // https://openai.com/pricing#language-models // There appears to be no api that provides the prices, unfortunately. -const LLM_COST: { [name in LanguageModel]: Cost } = { +export const LLM_COST: { [name in string]: Cost } = { "gpt-4": { prompt_tokens: 0.03 / 1000, completion_tokens: 0.06 / 1000, @@ -316,7 +347,9 @@ export function isValidModel(model?: string): boolean { return LLM_COST[model ?? ""] != null; } -export function getMaxTokens(model?: Model | string): number { +export function getMaxTokens(model?: LanguageModel): number { + // TODO: store max tokens in the model object itself, this is just a fallback + if (isOllamaLLM(model)) return 8192; return LLM_COST[model ?? ""]?.max_tokens ?? 4096; } From ab08bfabfa99e91f65ba456674664b87adc451e3 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Mon, 11 Mar 2024 15:10:22 +0100 Subject: [PATCH 09/32] llm: start supporting mistral ai --- src/packages/frontend/account/chatbot.ts | 4 +- .../account/useLanguageModelSetting.tsx | 7 +- .../components/language-model-icon.tsx | 4 + .../frontend/components/mistral-avatar.tsx | 37 ++ src/packages/frontend/components/mistral.png | Bin 0 -> 1088 bytes src/packages/frontend/customize.tsx | 1 + .../markdown-input/mentionable-users.tsx | 14 +- .../frame-editors/llm/model-switch.tsx | 21 +- src/packages/frontend/project/context.tsx | 12 +- src/packages/frontend/projects/store.ts | 20 +- .../components/openai/vendor-status-check.tsx | 15 +- src/packages/pnpm-lock.yaml | 333 +++--------------- src/packages/server/llm/client.ts | 3 + src/packages/server/llm/index.ts | 11 + src/packages/server/llm/mistral.ts | 133 +++++++ src/packages/server/package.json | 7 +- src/packages/util/db-schema/llm.test.ts | 23 +- src/packages/util/db-schema/llm.ts | 104 +++++- src/packages/util/db-schema/site-defaults.ts | 10 +- .../util/db-schema/site-settings-extras.ts | 11 +- 20 files changed, 439 insertions(+), 331 deletions(-) create mode 100644 src/packages/frontend/components/mistral-avatar.tsx create mode 100644 src/packages/frontend/components/mistral.png create mode 100644 src/packages/server/llm/mistral.ts diff --git a/src/packages/frontend/account/chatbot.ts b/src/packages/frontend/account/chatbot.ts index 32bd267975..bc75425bbf 100644 --- a/src/packages/frontend/account/chatbot.ts +++ b/src/packages/frontend/account/chatbot.ts @@ -12,7 +12,7 @@ import { LANGUAGE_MODELS, LANGUAGE_MODEL_PREFIXES, LLM_USERNAMES, - Vendor, + LLMVendor, fromOllamaModel, isOllamaLLM, model2vendor, @@ -29,7 +29,7 @@ export function isChatBot(account_id?: string): boolean { ); } -export function getChatBotVendor(account_id?: string): Vendor { +export function getChatBotVendor(account_id?: string): LLMVendor { if (account_id == null) { return "openai"; } diff --git a/src/packages/frontend/account/useLanguageModelSetting.tsx b/src/packages/frontend/account/useLanguageModelSetting.tsx index 01a7f78daa..98cd6bfa3f 100644 --- a/src/packages/frontend/account/useLanguageModelSetting.tsx +++ b/src/packages/frontend/account/useLanguageModelSetting.tsx @@ -1,6 +1,6 @@ import { redux, useMemo, useTypedRedux } from "@cocalc/frontend/app-framework"; -import { EnabledLLMs } from "@cocalc/frontend/project/context"; import { + LLMServicesAvailable, LanguageService, USER_SELECTABLE_LANGUAGE_MODELS, fromOllamaModel, @@ -21,11 +21,12 @@ export function useLanguageModelSetting( const haveOpenAI = useTypedRedux("customize", "openai_enabled"); const haveGoogle = useTypedRedux("customize", "google_vertexai_enabled"); const haveOllama = useTypedRedux("customize", "ollama_enabled"); + const haveMistral = useTypedRedux("customize", "mistral_enabled"); - const enabledLLMs: EnabledLLMs = useMemo(() => { + const enabledLLMs: LLMServicesAvailable = useMemo(() => { const projectsStore = redux.getStore("projects"); return projectsStore.whichLLMareEnabled(project_id); - }, [haveOpenAI, haveGoogle, haveOllama]); + }, [haveOpenAI, haveGoogle, haveOllama, haveMistral]); const llm: LanguageService = useMemo(() => { return getValidLanguageModelName( diff --git a/src/packages/frontend/components/language-model-icon.tsx b/src/packages/frontend/components/language-model-icon.tsx index b5e28f6adc..aba06bae62 100644 --- a/src/packages/frontend/components/language-model-icon.tsx +++ b/src/packages/frontend/components/language-model-icon.tsx @@ -9,6 +9,7 @@ import { unreachable } from "@cocalc/util/misc"; import AIAvatar from "./ai-avatar"; import GoogleGeminiLogo from "./google-gemini-avatar"; import GooglePalmLogo from "./google-palm-avatar"; +import MistralAvatar from "./mistral-avatar"; import OllamaAvatar from "./ollama-avatar"; import OpenAIAvatar from "./openai-avatar"; @@ -50,6 +51,9 @@ export function LanguageModelVendorAvatar( } } + case "mistralai": + return ; + case "ollama": return ; diff --git a/src/packages/frontend/components/mistral-avatar.tsx b/src/packages/frontend/components/mistral-avatar.tsx new file mode 100644 index 0000000000..778fead56c --- /dev/null +++ b/src/packages/frontend/components/mistral-avatar.tsx @@ -0,0 +1,37 @@ +import { CSS } from "@cocalc/frontend/app-framework"; + +import MistralPNG from "./mistral.png"; + +export default function MistralAvatar({ + size = 64, + style, + backgroundColor = "transparent", +}: { + size: number; + style?: CSS; + backgroundColor?: string; +}) { + return ( +
+
+ +
+
+ ); +} diff --git a/src/packages/frontend/components/mistral.png b/src/packages/frontend/components/mistral.png new file mode 100644 index 0000000000000000000000000000000000000000..0298e5631be770730bb34b31a654976efd6a4630 GIT binary patch literal 1088 zcmeAS@N?(olHy`uVBq!ia0vp^CqS5k4M?tyST~P>frZ`E#WAE}&YQdT-7#OqkAEyK zfBrprvWeFt`LN!p393RHHf*@87Ura4R>?C>iEG)GEngH_l*+uas;#ZpPTSDx7879Z zJ>zb+p!ZLKbLC&o9eT|gJ@4=OpOi-0vx1MO`+ul zvzC2xTKn_&yC(~t+g0z1+flpa!prB!Onl8(y~<1fcJ6s;#pk)V?*Ca`#dQ5y=H>sF z%*n6^tNWTV=T%#;XrC5de|N6Tm-M@hIgrx89zS z`xABkf5HBBrD^fiOXc?b$ccS{h=Z(ScXSS~EegD6X>3d%N71_T}dhb2o z^ZkF6{uj&h&fkwsi+uK+-|mecUzN;>%G>YmUA_0t?!|^x-^4Apzv@2w|4*a44zsSFNmmbM>my`|n%y=E#<@^`$-VF-@cO^? zuATc|-ItC&rYuH!BEuTcO%tD9-M>YBzqDo9f#|(w-_=*|x3%2*ZDT`8hKuT7RAS?aAVaze^6BT(y5;?Y#ZB-gMv0T7LKSZo8R$+2vGrS!P_) qXNx()`y9#2QmDSIXJugc|G%%HEaAV) { + if (!showMistral) return null; + + return ( + <> + {makeLLMOption(ret, MISTRAL_MODELS[0], `Mistral's "small" model`)} + {makeLLMOption(ret, MISTRAL_MODELS[1], `Mistral's "medium" model`)} + {makeLLMOption(ret, MISTRAL_MODELS[2], `Mistral's "large" model`)} + + ); + } + function appendOllama(ret: NonNullable) { if (!showOllama || !ollama) return null; @@ -165,6 +183,7 @@ export default function ModelSwitch({ const ret: NonNullable = []; appendOpenAI(ret); appendGoogle(ret); + appendMistral(ret); appendOllama(ret); return ret; } diff --git a/src/packages/frontend/project/context.tsx b/src/packages/frontend/project/context.tsx index 727dd6fc37..5d7ff3c0aa 100644 --- a/src/packages/frontend/project/context.tsx +++ b/src/packages/frontend/project/context.tsx @@ -13,6 +13,7 @@ import { } from "@cocalc/frontend/app-framework"; import { UserGroup } from "@cocalc/frontend/projects/store"; import { ProjectStatus } from "@cocalc/frontend/todo-types"; +import { LLMServicesAvailable } from "@cocalc/util/db-schema/llm"; import { KUCALC_COCALC_COM, KUCALC_DISABLED, @@ -26,11 +27,6 @@ import { useProjectStatus } from "./page/project-status-hook"; import { useProjectHasInternetAccess } from "./settings/has-internet-access-hook"; import { Project } from "./settings/types"; -export interface EnabledLLMs { - openai: boolean; - google: boolean; - ollama: boolean; -} export interface ProjectContextState { actions?: ProjectActions; active_project_tab?: string; @@ -44,7 +40,7 @@ export interface ProjectContextState { flipTabs: [number, React.Dispatch>]; onCoCalcCom: boolean; onCoCalcDocker: boolean; - enabledLLMs: EnabledLLMs; + enabledLLMs: LLMServicesAvailable; } export const ProjectContext: Context = @@ -65,6 +61,7 @@ export const ProjectContext: Context = openai: false, google: false, ollama: false, + mistral: false, }, }); @@ -105,11 +102,12 @@ export function useProjectContextProvider( const haveOpenAI = useTypedRedux("customize", "openai_enabled"); const haveGoogle = useTypedRedux("customize", "google_vertexai_enabled"); const haveOllama = useTypedRedux("customize", "ollama_enabled"); + const haveMistral = useTypedRedux("customize", "mistral_enabled"); const enabledLLMs = useMemo(() => { const projectsStore = redux.getStore("projects"); return projectsStore.whichLLMareEnabled(project_id); - }, [haveOpenAI, haveGoogle, haveOllama]); + }, [haveOpenAI, haveGoogle, haveOllama, haveMistral]); return { actions, diff --git a/src/packages/frontend/projects/store.ts b/src/packages/frontend/projects/store.ts index 4a6489743f..427aa7ddb6 100644 --- a/src/packages/frontend/projects/store.ts +++ b/src/packages/frontend/projects/store.ts @@ -11,6 +11,7 @@ import { StudentProjectFunctionality } from "@cocalc/frontend/course/configurati import { CUSTOM_IMG_PREFIX } from "@cocalc/frontend/custom-software/util"; import { WebsocketState } from "@cocalc/frontend/project/websocket/websocket-state"; import { webapp_client } from "@cocalc/frontend/webapp-client"; +import { LLMServicesAvailable, LLMVendor } from "@cocalc/util/db-schema/llm"; import { cmp, coerce_codomain_to_numbers, @@ -735,22 +736,31 @@ export class ProjectsStore extends Store { } // ATTN: the useLanguageModelSetting hook computes this dynamically, with dependencies - public whichLLMareEnabled(project_id: string = "global", tag?: string) { + public whichLLMareEnabled( + project_id: string = "global", + tag?: string, + ): LLMServicesAvailable { const haveOpenAI = this.hasLanguageModelEnabled(project_id, tag, "openai"); const haveGoogle = this.hasLanguageModelEnabled(project_id, tag, "google"); const haveOllama = this.hasLanguageModelEnabled(project_id, tag, "ollama"); + const haveMistral = this.hasLanguageModelEnabled( + project_id, + tag, + "mistralai", + ); return { openai: haveOpenAI, google: haveGoogle, ollama: haveOllama, + mistral: haveMistral, }; } hasLanguageModelEnabled( project_id: string = "global", tag?: string, - vendor: "openai" | "google" | "ollama" | "any" = "any", + vendor: LLMVendor | "any" = "any", ): boolean { // cache answer for a few seconds, in case this gets called a lot: @@ -782,17 +792,19 @@ export class ProjectsStore extends Store { private _hasLanguageModelEnabled( project_id: string | "global" = "global", courseLimited?: boolean, - vendor: "openai" | "google" | "ollama" | "any" = "any", + vendor: LLMVendor | "any" = "any", ): boolean { const customize = redux.getStore("customize"); const haveOpenAI = customize.get("openai_enabled"); const haveGoogle = customize.get("google_vertexai_enabled"); const haveOllama = customize.get("ollama_enabled"); + const haveMistral = customize.get("mistral_enabled"); - if (!haveOpenAI && !haveGoogle && !haveOllama) return false; // the vendor == "any" case + if (!haveOpenAI && !haveGoogle && !haveOllama && !haveMistral) return false; // the vendor == "any" case if (vendor === "openai" && !haveOpenAI) return false; if (vendor === "google" && !haveGoogle) return false; if (vendor === "ollama" && !haveOllama) return false; + if (vendor === "mistralai" && !haveMistral) return false; // this customization parameter accounts for disabling **any** language model vendor const openai_disabled = redux diff --git a/src/packages/next/components/openai/vendor-status-check.tsx b/src/packages/next/components/openai/vendor-status-check.tsx index 7b0192802c..df532f887b 100644 --- a/src/packages/next/components/openai/vendor-status-check.tsx +++ b/src/packages/next/components/openai/vendor-status-check.tsx @@ -1,8 +1,12 @@ -import { Vendor } from "@cocalc/util/db-schema/llm"; +import { LLMVendor } from "@cocalc/util/db-schema/llm"; import { unreachable } from "@cocalc/util/misc"; import A from "components/misc/A"; -export function VendorStatusCheck({ vendor }: { vendor: Vendor }): JSX.Element { +export function VendorStatusCheck({ + vendor, +}: { + vendor: LLMVendor; +}): JSX.Element { switch (vendor) { case "openai": return ( @@ -29,6 +33,13 @@ export function VendorStatusCheck({ vendor }: { vendor: Vendor }): JSX.Element { try again later. ); + case "mistralai": + return ( + <> + This Mistral based API endpoint does not have a status page. If you + are experiencing issues, use another model or try again later. + + ); default: unreachable(vendor); } diff --git a/src/packages/pnpm-lock.yaml b/src/packages/pnpm-lock.yaml index fdbfe84522..6b6b958fd2 100644 --- a/src/packages/pnpm-lock.yaml +++ b/src/packages/pnpm-lock.yaml @@ -1347,11 +1347,14 @@ importers: specifier: ^1.2.1 version: 1.2.1 '@langchain/community': - specifier: ^0.0.32 - version: 0.0.32(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21) + specifier: ^0.0.36 + version: 0.0.36(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21) '@langchain/core': - specifier: ^0.1.32 - version: 0.1.43 + specifier: ^0.1.44 + version: 0.1.44 + '@langchain/mistralai': + specifier: ^0.0.13 + version: 0.0.13(encoding@0.1.13) '@node-saml/passport-saml': specifier: ^4.0.4 version: 4.0.4 @@ -4037,8 +4040,8 @@ packages: - crypto dev: false - /@langchain/community@0.0.32(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21): - resolution: {integrity: sha512-jN4BxGKAmLbA87hqXH5Mx1IRMMVOgcn1TY1MLOVyBcBa12EvHFx8suogtXgA2ekfc8U8nIryVb1ftSupwUBv/A==} + /@langchain/community@0.0.36(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21): + resolution: {integrity: sha512-4OOKH6F9orGZZHgbvYvekMV6JesL89JC3cmJxNvAr5x7vr7wNNa4fuLq+H7Ew8tnsQOYZk+K57UMYkcwVuwySA==} engines: {node: '>=18'} peerDependencies: '@aws-crypto/sha256-js': ^5.0.0 @@ -4296,8 +4299,8 @@ packages: optional: true dependencies: '@google-ai/generativelanguage': 1.1.0(encoding@0.1.13) - '@langchain/core': 0.1.43 - '@langchain/openai': 0.0.18(encoding@0.1.13) + '@langchain/core': 0.1.44 + '@langchain/openai': 0.0.19(encoding@0.1.13) flat: 5.0.2 google-auth-library: 9.4.1(encoding@0.1.13) langsmith: 0.1.13 @@ -4308,279 +4311,8 @@ packages: - encoding dev: false - /@langchain/community@0.0.35(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21): - resolution: {integrity: sha512-xZGjiqlS7X0EDWM67s2PxSLg0Rz/Wfc741IPF0Ok/f4yFwFseWjtcWXwBwe0dVnapIstpKR82q+RDAa06xFxyw==} - engines: {node: '>=18'} - peerDependencies: - '@aws-crypto/sha256-js': ^5.0.0 - '@aws-sdk/client-bedrock-agent-runtime': ^3.485.0 - '@aws-sdk/client-bedrock-runtime': ^3.422.0 - '@aws-sdk/client-dynamodb': ^3.310.0 - '@aws-sdk/client-kendra': ^3.352.0 - '@aws-sdk/client-lambda': ^3.310.0 - '@aws-sdk/client-sagemaker-runtime': ^3.310.0 - '@aws-sdk/client-sfn': ^3.310.0 - '@aws-sdk/credential-provider-node': ^3.388.0 - '@azure/search-documents': ^12.0.0 - '@clickhouse/client': ^0.2.5 - '@cloudflare/ai': '*' - '@datastax/astra-db-ts': ^0.1.4 - '@elastic/elasticsearch': ^8.4.0 - '@getmetal/metal-sdk': '*' - '@getzep/zep-js': ^0.9.0 - '@gomomento/sdk': ^1.51.1 - '@gomomento/sdk-core': ^1.51.1 - '@google-ai/generativelanguage': ^0.2.1 - '@gradientai/nodejs-sdk': ^1.2.0 - '@huggingface/inference': ^2.6.4 - '@mozilla/readability': '*' - '@opensearch-project/opensearch': '*' - '@pinecone-database/pinecone': '*' - '@planetscale/database': ^1.8.0 - '@qdrant/js-client-rest': ^1.2.0 - '@raycast/api': ^1.55.2 - '@rockset/client': ^0.9.1 - '@smithy/eventstream-codec': ^2.0.5 - '@smithy/protocol-http': ^3.0.6 - '@smithy/signature-v4': ^2.0.10 - '@smithy/util-utf8': ^2.0.0 - '@supabase/postgrest-js': ^1.1.1 - '@supabase/supabase-js': ^2.10.0 - '@tensorflow-models/universal-sentence-encoder': '*' - '@tensorflow/tfjs-converter': '*' - '@tensorflow/tfjs-core': '*' - '@upstash/redis': ^1.20.6 - '@upstash/vector': ^1.0.2 - '@vercel/kv': ^0.2.3 - '@vercel/postgres': ^0.5.0 - '@writerai/writer-sdk': ^0.40.2 - '@xata.io/client': ^0.28.0 - '@xenova/transformers': ^2.5.4 - '@zilliz/milvus2-sdk-node': '>=2.2.7' - better-sqlite3: ^9.4.0 - cassandra-driver: ^4.7.2 - chromadb: '*' - closevector-common: 0.1.3 - closevector-node: 0.1.6 - closevector-web: 0.1.6 - cohere-ai: '*' - convex: ^1.3.1 - discord.js: ^14.14.1 - dria: ^0.0.3 - faiss-node: ^0.5.1 - firebase-admin: ^11.9.0 || ^12.0.0 - google-auth-library: ^8.9.0 - googleapis: ^126.0.1 - hnswlib-node: ^1.4.2 - html-to-text: ^9.0.5 - ioredis: ^5.3.2 - jsdom: '*' - llmonitor: ^0.5.9 - lodash: ^4.17.21 - lunary: ^0.6.11 - mongodb: '>=5.2.0' - mysql2: ^3.3.3 - neo4j-driver: '*' - node-llama-cpp: '*' - pg: ^8.11.0 - pg-copy-streams: ^6.0.5 - pickleparser: ^0.2.1 - portkey-ai: ^0.1.11 - redis: '*' - replicate: ^0.18.0 - typeorm: ^0.3.12 - typesense: ^1.5.3 - usearch: ^1.1.1 - vectordb: ^0.1.4 - voy-search: 0.6.2 - weaviate-ts-client: '*' - web-auth-library: ^1.0.3 - ws: ^8.14.2 - peerDependenciesMeta: - '@aws-crypto/sha256-js': - optional: true - '@aws-sdk/client-bedrock-agent-runtime': - optional: true - '@aws-sdk/client-bedrock-runtime': - optional: true - '@aws-sdk/client-dynamodb': - optional: true - '@aws-sdk/client-kendra': - optional: true - '@aws-sdk/client-lambda': - optional: true - '@aws-sdk/client-sagemaker-runtime': - optional: true - '@aws-sdk/client-sfn': - optional: true - '@aws-sdk/credential-provider-node': - optional: true - '@azure/search-documents': - optional: true - '@clickhouse/client': - optional: true - '@cloudflare/ai': - optional: true - '@datastax/astra-db-ts': - optional: true - '@elastic/elasticsearch': - optional: true - '@getmetal/metal-sdk': - optional: true - '@getzep/zep-js': - optional: true - '@gomomento/sdk': - optional: true - '@gomomento/sdk-core': - optional: true - '@google-ai/generativelanguage': - optional: true - '@gradientai/nodejs-sdk': - optional: true - '@huggingface/inference': - optional: true - '@mozilla/readability': - optional: true - '@opensearch-project/opensearch': - optional: true - '@pinecone-database/pinecone': - optional: true - '@planetscale/database': - optional: true - '@qdrant/js-client-rest': - optional: true - '@raycast/api': - optional: true - '@rockset/client': - optional: true - '@smithy/eventstream-codec': - optional: true - '@smithy/protocol-http': - optional: true - '@smithy/signature-v4': - optional: true - '@smithy/util-utf8': - optional: true - '@supabase/postgrest-js': - optional: true - '@supabase/supabase-js': - optional: true - '@tensorflow-models/universal-sentence-encoder': - optional: true - '@tensorflow/tfjs-converter': - optional: true - '@tensorflow/tfjs-core': - optional: true - '@upstash/redis': - optional: true - '@upstash/vector': - optional: true - '@vercel/kv': - optional: true - '@vercel/postgres': - optional: true - '@writerai/writer-sdk': - optional: true - '@xata.io/client': - optional: true - '@xenova/transformers': - optional: true - '@zilliz/milvus2-sdk-node': - optional: true - better-sqlite3: - optional: true - cassandra-driver: - optional: true - chromadb: - optional: true - closevector-common: - optional: true - closevector-node: - optional: true - closevector-web: - optional: true - cohere-ai: - optional: true - convex: - optional: true - discord.js: - optional: true - dria: - optional: true - faiss-node: - optional: true - firebase-admin: - optional: true - google-auth-library: - optional: true - googleapis: - optional: true - hnswlib-node: - optional: true - html-to-text: - optional: true - ioredis: - optional: true - jsdom: - optional: true - llmonitor: - optional: true - lodash: - optional: true - lunary: - optional: true - mongodb: - optional: true - mysql2: - optional: true - neo4j-driver: - optional: true - node-llama-cpp: - optional: true - pg: - optional: true - pg-copy-streams: - optional: true - pickleparser: - optional: true - portkey-ai: - optional: true - redis: - optional: true - replicate: - optional: true - typeorm: - optional: true - typesense: - optional: true - usearch: - optional: true - vectordb: - optional: true - voy-search: - optional: true - weaviate-ts-client: - optional: true - web-auth-library: - optional: true - ws: - optional: true - dependencies: - '@google-ai/generativelanguage': 1.1.0(encoding@0.1.13) - '@langchain/core': 0.1.43 - '@langchain/openai': 0.0.18(encoding@0.1.13) - flat: 5.0.2 - google-auth-library: 9.4.1(encoding@0.1.13) - langsmith: 0.1.13 - lodash: 4.17.21 - uuid: 9.0.1 - zod: 3.22.4 - transitivePeerDependencies: - - encoding - dev: false - - /@langchain/core@0.1.43: - resolution: {integrity: sha512-owE+UU38e4TsUq5yoaKCF+ag6u0ppwgdaqEt2Q57pdcr9nEcy8/PgTunxB10Vksq4fTJgnwWEYf/wMGZnFlRow==} + /@langchain/core@0.1.44: + resolution: {integrity: sha512-6kzRRf8X1TgkAfc8xOZ1qCHalasPDxdcNLnL++ZCGtWLyiKMFP7HcDwG/UewYsbqQd3sTQUPiP9+PHdPWXHd/Q==} engines: {node: '>=18'} dependencies: ansi-styles: 5.2.0 @@ -4596,11 +4328,36 @@ packages: zod-to-json-schema: 3.22.4(zod@3.22.4) dev: false + /@langchain/mistralai@0.0.13(encoding@0.1.13): + resolution: {integrity: sha512-0oNTICsukEnZLJ1HwtlCADZi5jqircK8B+svLrRbp+1HVue5hXPsU36b54mr0WEwhmY0QIXJ9CwEaGRSfEEZcg==} + engines: {node: '>=18'} + dependencies: + '@langchain/core': 0.1.44 + '@mistralai/mistralai': 0.1.3(encoding@0.1.13) + zod: 3.22.4 + zod-to-json-schema: 3.22.4(zod@3.22.4) + transitivePeerDependencies: + - encoding + dev: false + /@langchain/openai@0.0.18(encoding@0.1.13): resolution: {integrity: sha512-SBY1PlwiHIcjW185yVXHo4XXgTVAyGxw7IHpuEqs7201/EVjFW91HskzGRvduYm2td3/NV91BBVFgXhJQcvtmA==} engines: {node: '>=18'} dependencies: - '@langchain/core': 0.1.43 + '@langchain/core': 0.1.44 + js-tiktoken: 1.0.10 + openai: 4.27.0(encoding@0.1.13) + zod: 3.22.4 + zod-to-json-schema: 3.22.4(zod@3.22.4) + transitivePeerDependencies: + - encoding + dev: false + + /@langchain/openai@0.0.19(encoding@0.1.13): + resolution: {integrity: sha512-b1CHZCNVc2u4LZbFc1Ls0T7U4LWNGvMeQsrNidLZT5jbjg6VzzDZuVCDPGqCwUc8bzFWvPBO52oT1Wy+aCOX6w==} + engines: {node: '>=18'} + dependencies: + '@langchain/core': 0.1.44 js-tiktoken: 1.0.10 openai: 4.27.0(encoding@0.1.13) zod: 3.22.4 @@ -4801,6 +4558,14 @@ packages: - encoding dev: false + /@mistralai/mistralai@0.1.3(encoding@0.1.13): + resolution: {integrity: sha512-WUHxC2xdeqX9PTXJEqdiNY54vT2ir72WSJrZTTBKRnkfhX6zIfCYA24faRlWjUB5WTpn+wfdGsTMl3ArijlXFA==} + dependencies: + node-fetch: 2.6.7(encoding@0.1.13) + transitivePeerDependencies: + - encoding + dev: false + /@next/env@14.1.0: resolution: {integrity: sha512-Py8zIo+02ht82brwwhTg36iogzFqGLPXlRGKQw5s+qP/kMNc4MAyDeEwBKDijk6zTIbegEgu8Qy7C1LboslQAw==} dev: false @@ -13383,8 +13148,8 @@ packages: dependencies: '@anthropic-ai/sdk': 0.9.1(encoding@0.1.13) '@google-ai/generativelanguage': 1.1.0(encoding@0.1.13) - '@langchain/community': 0.0.35(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21) - '@langchain/core': 0.1.43 + '@langchain/community': 0.0.36(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21) + '@langchain/core': 0.1.44 '@langchain/openai': 0.0.18(encoding@0.1.13) axios: 1.6.7 binary-extensions: 2.2.0 diff --git a/src/packages/server/llm/client.ts b/src/packages/server/llm/client.ts index 598a96ada9..7c5455195a 100644 --- a/src/packages/server/llm/client.ts +++ b/src/packages/server/llm/client.ts @@ -66,6 +66,9 @@ export async function getClient( case "ollama": throw new Error("Use the getOllama function instead"); + case "mistralai": + throw new Error("Use the getMistral function instead"); + default: unreachable(vendor); throw new Error(`unknown vendor: ${vendor}`); diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts index 93ef513ffb..7a97d03b1f 100644 --- a/src/packages/server/llm/index.ts +++ b/src/packages/server/llm/index.ts @@ -24,6 +24,7 @@ import { OpenAIMessages, getLLMCost, isFreeModel, + isMistralService, isOllamaLLM, isValidModel, model2service, @@ -33,6 +34,7 @@ import { ChatOptions, ChatOutput, History } from "@cocalc/util/types/llm"; import { checkForAbuse } from "./abuse"; import { callChatGPTAPI } from "./call-llm"; import { getClient } from "./client"; +import { evaluateMistral } from "./mistral"; import { evaluateOllama } from "./ollama"; import { saveResponse } from "./save-response"; import { VertexAIClient } from "./vertex-ai-client"; @@ -102,6 +104,15 @@ async function evaluateImpl({ maxTokens, stream, }); + } else if (isMistralService(model)) { + return await evaluateMistral({ + system, + history, + input, + model, + maxTokens, + stream, + }); } else { return await evaluteCall({ system, diff --git a/src/packages/server/llm/mistral.ts b/src/packages/server/llm/mistral.ts new file mode 100644 index 0000000000..5237c1e5af --- /dev/null +++ b/src/packages/server/llm/mistral.ts @@ -0,0 +1,133 @@ +import { + ChatPromptTemplate, + MessagesPlaceholder, +} from "@langchain/core/prompts"; +import { RunnableWithMessageHistory } from "@langchain/core/runnables"; +import { ChatMistralAI } from "@langchain/mistralai"; +import { ChatMessageHistory } from "langchain/stores/message/in_memory"; + +import getLogger from "@cocalc/backend/logger"; +import { getServerSettings } from "@cocalc/database/settings"; +import { + fromMistralService, + isMistralService, +} from "@cocalc/util/db-schema/llm"; +import { ChatOutput, History } from "@cocalc/util/types/llm"; +import { AIMessage, HumanMessage } from "@langchain/core/messages"; + +const log = getLogger("llm:mistral"); + +interface MistralOpts { + input: string; // new input that user types + system?: string; // extra setup that we add for relevance and context + history?: History; + model: string; // this must be ollama-[model] + stream?: (output?: string) => void; + maxTokens?: number; +} + +export async function evaluateMistral( + opts: Readonly, +): Promise { + if (!isMistralService(opts.model)) { + throw new Error(`model ${opts.model} not supported`); + } + const model = fromMistralService(opts.model); + const { system, history, input, maxTokens, stream } = opts; + + log.debug("evaluateMistral", { + input, + history, + system, + model, + stream: stream != null, + maxTokens, + }); + + const settings = await getServerSettings(); + const { mistral_enabled, mistral_api_key } = settings; + + if (!mistral_enabled) { + throw new Error(`Mistral is not enabled.`); + } + + if (!mistral_api_key) { + throw new Error(`Mistral api key is not configured.`); + } + + const msgs: ["ai" | "human", string][] = []; + + if (history) { + let nextRole: "model" | "user" = "user"; + for (const { content } of history) { + if (nextRole === "user") { + msgs.push(["human", content]); + } else { + msgs.push(["ai", content]); + } + nextRole = nextRole === "user" ? "model" : "user"; + } + } + + const mistral = new ChatMistralAI({ + apiKey: mistral_api_key, + modelName: model, + }); + + const prompt = ChatPromptTemplate.fromMessages([ + ["system", system ?? ""], + new MessagesPlaceholder("chat_history"), + ["human", "{input}"], + ]); + + const chain = prompt.pipe(mistral); + + const chainWithHistory = new RunnableWithMessageHistory({ + runnable: chain, + inputMessagesKey: "input", + historyMessagesKey: "chat_history", + getMessageHistory: async (_) => { + const chatHistory = new ChatMessageHistory(); + // await history.addMessage(new HumanMessage("be brief")); + // await history.addMessage(new AIMessage("ok")); + if (history) { + let nextRole: "model" | "user" = "user"; + for (const { content } of history) { + if (nextRole === "user") { + await chatHistory.addMessage(new HumanMessage(content)); + } else { + await chatHistory.addMessage(new AIMessage(content)); + } + nextRole = nextRole === "user" ? "model" : "user"; + } + } + + return chatHistory; + }, + }); + + const chunks = await chainWithHistory.stream( + { input }, + { configurable: { sessionId: "ignored" } }, + ); + + let output = ""; + for await (const chunk of chunks) { + if (typeof chunk !== "string") continue; + output += chunk; + opts.stream?.(chunk); + } + + // and an empty call when done + opts.stream?.(); + + const prompt_tokens = 10; + const completion_tokens = 10; + + return { + output, + total_tokens: prompt_tokens + completion_tokens, + completion_tokens, + prompt_tokens, + }; +} diff --git a/src/packages/server/package.json b/src/packages/server/package.json index 0b79072857..c3dc70c2a3 100644 --- a/src/packages/server/package.json +++ b/src/packages/server/package.json @@ -46,8 +46,9 @@ "@google-cloud/monitoring": "^4.0.0", "@google/generative-ai": "^0.1.3", "@isaacs/ttlcache": "^1.2.1", - "@langchain/community": "^0.0.32", - "@langchain/core": "^0.1.32", + "@langchain/community": "^0.0.36", + "@langchain/core": "^0.1.44", + "@langchain/mistralai": "^0.0.13", "@node-saml/passport-saml": "^4.0.4", "@passport-js/passport-twitter": "^1.0.8", "@passport-next/passport-google-oauth2": "^1.0.0", @@ -119,7 +120,7 @@ }, "pnpm": { "overrides": { - "@langchain/core": "^0.1.32" + "@langchain/core": "^0.1.44" } } } diff --git a/src/packages/util/db-schema/llm.test.ts b/src/packages/util/db-schema/llm.test.ts index e9d16c50fb..03d79ab78d 100644 --- a/src/packages/util/db-schema/llm.test.ts +++ b/src/packages/util/db-schema/llm.test.ts @@ -1,6 +1,13 @@ // this tests the wrongly named openai.ts file -import { isFreeModel, LANGUAGE_MODELS, LLM_COST, OLLAMA_PREFIX } from "./llm"; +import { + isFreeModel, + LANGUAGE_MODEL_VENDORS, + LANGUAGE_MODELS, + LLM_COST, + OLLAMA_PREFIX, + USER_SELECTABLE_LANGUAGE_MODELS, +} from "./llm"; describe("llm", () => { test("isFreeModel", () => { @@ -17,4 +24,18 @@ describe("llm", () => { expect(LANGUAGE_MODELS.includes(model as any)).toBe(true); } }); + + test("all user selectable ones are valid", () => { + for (const model of USER_SELECTABLE_LANGUAGE_MODELS) { + expect(LANGUAGE_MODELS.includes(model)).toBe(true); + } + }); + + test("none of the user selectable models start with any of the vendor prefixes", () => { + for (const model of USER_SELECTABLE_LANGUAGE_MODELS) { + for (const prefix of LANGUAGE_MODEL_VENDORS) { + expect(model.startsWith(prefix)).toBe(false); + } + } + }); }); diff --git a/src/packages/util/db-schema/llm.ts b/src/packages/util/db-schema/llm.ts index fa9fdcd11b..2269fd31e8 100644 --- a/src/packages/util/db-schema/llm.ts +++ b/src/packages/util/db-schema/llm.ts @@ -12,9 +12,24 @@ const MODELS_OPENAI = [ export type ModelOpenAI = (typeof MODELS_OPENAI)[number]; +// ATTN: when you modify this list, also change frontend/.../llm/model-switch.tsx! +export const MISTRAL_MODELS = [ + // yes, all 3 of them have an extra mistral-prefix, on top of the vendor prefix + "mistral-small-latest", + "mistral-medium-latest", + "mistral-large-latest", +] as const; + +export type MistralModel = (typeof MISTRAL_MODELS)[number]; + +export function isMistralModel(model: unknown): model is MistralModel { + return MISTRAL_MODELS.includes(model as any); +} + // the hardcoded list of available language models – there are also dynamic ones, like OllamaLLM objects export const LANGUAGE_MODELS = [ ...MODELS_OPENAI, + ...MISTRAL_MODELS, // google's are taken from here – we use the generative AI client lib // https://developers.generativeai.google/models/language "text-bison-001", @@ -27,12 +42,13 @@ export const LANGUAGE_MODELS = [ // This hardcodes which models can be selected by users. // Make sure to update this when adding new models. // This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx -export const USER_SELECTABLE_LANGUAGE_MODELS: Readonly = [ +export const USER_SELECTABLE_LANGUAGE_MODELS = [ "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", // "chat-bison-001", // PaLM2 is not good, replies with no response too often "gemini-pro", + ...MISTRAL_MODELS, ] as const; export type OllamaLLM = string; @@ -47,15 +63,23 @@ export function isLanguageModel(model?: unknown): model is LanguageModel { return LANGUAGE_MODELS.includes(model as any); } +export interface LLMServicesAvailable { + google: boolean; + openai: boolean; + ollama: boolean; + mistral: boolean; +} + // this is used in initialization functions. e.g. to get a default model depending on the overall availability // usually, this should just return the chatgpt3 model, but e.g. if neither google or openai is available, // then it might even falls back to an available ollama model. It needs to return a string, though, for the frontend, etc. export function getValidLanguageModelName( model: string | undefined, - filter: { google: boolean; openai: boolean; ollama: boolean } = { + filter: LLMServicesAvailable = { google: true, openai: true, ollama: false, + mistral: false, }, ollama: string[] = [], // keys of ollama models ): LanguageModel { @@ -84,13 +108,17 @@ export interface OpenAIMessage { export type OpenAIMessages = OpenAIMessage[]; export const OLLAMA_PREFIX = "ollama-"; - export type OllamaService = string; - export function isOllamaService(service: string): service is OllamaService { return isOllamaLLM(service); } +export const MISTRAL_PREFIX = "mistralai-"; +export type MistralService = string; +export function isMistralService(service: string): service is MistralService { + return service.startsWith(MISTRAL_PREFIX); +} + // we encode the in the frontend and elsewhere with the service name as a prefix // ATTN: don't change the encoding pattern of [vendor]-[model] // for whatever reason, it's also described that way in purchases/close.ts @@ -104,10 +132,16 @@ export type LanguageService = | "google-chat-bison-001" | "google-embedding-gecko-001" | "google-gemini-pro" - | OllamaService; - -const LANGUAGE_MODEL_VENDORS = ["openai", "google", "ollama"] as const; -export type Vendor = (typeof LANGUAGE_MODEL_VENDORS)[number]; + | OllamaService + | MistralService; + +export const LANGUAGE_MODEL_VENDORS = [ + "openai", + "google", + "ollama", + "mistralai", // the "*ai" is deliberately, because their model names start with "mistral-..." and we have to distinguish it from the prefix +] as const; +export type LLMVendor = (typeof LANGUAGE_MODEL_VENDORS)[number]; // used e.g. for checking "account-id={string}" and other things like that export const LANGUAGE_MODEL_PREFIXES = [ @@ -123,6 +157,9 @@ export function model2service(model: LanguageModel): LanguageService { if (isOllamaLLM(model)) { return toOllamaModel(model); } + if (isMistralModel(model)) { + return toMistralService(model); + } if (isLanguageModel(model)) { if ( model === "text-bison-001" || @@ -150,7 +187,8 @@ export function service2model( // split off the first part of service, e.g., "openai-" or "google-" const s = service.split("-")[0]; - const hasPrefix = s === "openai" || s === "google" || s === "ollama"; + const hasPrefix = + s === "openai" || s === "google" || s === "ollama" || s === "mistral"; const m = hasPrefix ? service.split("-").slice(1).join("-") : service; if (hasPrefix && s === "ollama") { return toOllamaModel(m); @@ -167,9 +205,11 @@ export function service2model( // Note: this must be an OpenAI model – otherwise change the getValidLanguageModelName function export const DEFAULT_MODEL: LanguageModel = "gpt-3.5-turbo"; -export function model2vendor(model): Vendor { +export function model2vendor(model): LLMVendor { if (isOllamaLLM(model)) { return "ollama"; + } else if (isMistralModel(model)) { + return "mistralai"; } else if (model.startsWith("gpt-")) { return "openai"; } else { @@ -184,7 +224,7 @@ export function toOllamaModel(model: string): OllamaLLM { if (isOllamaLLM(model)) { throw new Error(`already an ollama model: ${model}`); } - return `ollama-${model}`; + return `${OLLAMA_PREFIX}${model}`; } // unwraps the model name from an object that indicates that it's an ollama model @@ -203,10 +243,33 @@ export function isOllamaLLM(model: unknown): model is OllamaLLM { ); } +export function toMistralService(model: string): MistralService { + if (isMistralService(model)) { + throw new Error(`already a mistral model: ${model}`); + } + return `${MISTRAL_PREFIX}${model}`; +} + +export function fromMistralService(model: MistralService) { + if (!isMistralService(model)) { + throw new Error(`not a mistral model: ${model}`); + } + return model.slice(MISTRAL_PREFIX.length); +} + // Map from psuedo account_id to what should be displayed to user. // This is used in various places in the frontend. // Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing -export const LLM_USERNAMES = { +export const LLM_USERNAMES: { + [key in + | (typeof USER_SELECTABLE_LANGUAGE_MODELS)[number] + | "chatgpt" // some additional ones, backwards compatibility + | "chatgpt3" + | "chatgpt4" + | "gpt-4-32k" + | "text-bison-001" + | "chat-bison-001"]: string; +} = { chatgpt: "GPT-3.5", chatgpt3: "GPT-3.5", chatgpt4: "GPT-4", @@ -216,12 +279,15 @@ export const LLM_USERNAMES = { "gpt-3.5-turbo-16k": "GPT-3.5-16k", "text-bison-001": "PaLM 2", "chat-bison-001": "PaLM 2", - "embedding-gecko-001": "PaLM 2", "gemini-pro": "Gemini Pro", + "mistral-small-latest": "Mistral AI Small", + "mistral-medium-latest": "Mistral AI Medium", + "mistral-large-latest": "Mistral AI Large", } as const; export function isFreeModel(model: unknown) { if (isOllamaLLM(model)) return true; + if (isMistralModel(model)) return true; if (LANGUAGE_MODELS.includes(model as any)) { // of these models, the following are free return ( @@ -249,7 +315,7 @@ export function isLanguageModelService( return false; } -export function getVendorStatusCheckMD(vendor: Vendor): string { +export function getVendorStatusCheckMD(vendor: LLMVendor): string { switch (vendor) { case "openai": return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`; @@ -257,6 +323,8 @@ export function getVendorStatusCheckMD(vendor: Vendor): string { return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`; case "ollama": return `No status information for Ollama available – you have to check with the particular backend for the model.`; + case "mistralai": + return `No status information for Mistral AI available.`; default: unreachable(vendor); } @@ -264,7 +332,11 @@ export function getVendorStatusCheckMD(vendor: Vendor): string { } export function llmSupportsStreaming(model: LanguageModel): boolean { - return model2vendor(model) === "openai" || model === "gemini-pro"; + return ( + model2vendor(model) === "openai" || + model === "gemini-pro" || + model2vendor(model) === "mistralai" + ); } interface Cost { @@ -333,12 +405,14 @@ export const LLM_COST: { [name in string]: Cost } = { export function isValidModel(model?: string): boolean { if (model == null) return false; if (isOllamaLLM(model)) return true; + if (isMistralModel(model)) return true; return LLM_COST[model ?? ""] != null; } export function getMaxTokens(model?: LanguageModel): number { // TODO: store max tokens in the model object itself, this is just a fallback if (isOllamaLLM(model)) return 8192; + if (isMistralModel(model)) return 4096; // TODO: check with MistralAI return LLM_COST[model ?? ""]?.max_tokens ?? 4096; } diff --git a/src/packages/util/db-schema/site-defaults.ts b/src/packages/util/db-schema/site-defaults.ts index b976ef9464..dadc3a10f3 100644 --- a/src/packages/util/db-schema/site-defaults.ts +++ b/src/packages/util/db-schema/site-defaults.ts @@ -5,8 +5,8 @@ // Default settings to customize a given site, typically a private install of CoCalc. -import jsonic from "jsonic"; import { is_valid_email_address } from "@cocalc/util/misc"; +import jsonic from "jsonic"; export type ConfigValid = Readonly | ((val: string) => boolean); @@ -26,6 +26,7 @@ export type SiteSettingsKeys = | "policies" | "openai_enabled" | "google_vertexai_enabled" + | "mistral_enabled" | "ollama_enabled" | "neural_search_enabled" | "jupyter_api_enabled" @@ -596,6 +597,13 @@ export const site_settings_conf: SiteSettings = { valid: only_booleans, to_val: to_bool, }, + mistral_enabled: { + name: "Mistral AI UI", + desc: "Controls visibility of UI elements related to Mistral AI integration. You must **also set your Mistral API key** below for this functionality to work.", + default: "no", + valid: only_booleans, + to_val: to_bool, + }, ollama_enabled: { name: "Ollama LLM UI", desc: "Controls visibility of UI elements related to Ollama integration. To make this actually work, configure the list of API/model endpoints in the Ollama configuration.", diff --git a/src/packages/util/db-schema/site-settings-extras.ts b/src/packages/util/db-schema/site-settings-extras.ts index 7092a46a48..d8235fd66f 100644 --- a/src/packages/util/db-schema/site-settings-extras.ts +++ b/src/packages/util/db-schema/site-settings-extras.ts @@ -68,6 +68,7 @@ const pii_retention_display = (retention: string) => { const openai_enabled = (conf: SiteSettings) => to_bool(conf.openai_enabled); const vertexai_enabled = (conf: SiteSettings) => to_bool(conf.google_vertexai_enabled); +const mistral_enabled = (conf: SiteSettings) => to_bool(conf.mistral_enabled); const ollama_enabled = (conf: SiteSettings) => to_bool(conf.ollama_enabled); const any_llm_enabled = (conf: SiteSettings) => openai_enabled(conf) || vertexai_enabled(conf) || ollama_enabled(conf); @@ -192,6 +193,7 @@ export type SiteSettingsExtrasKeys = | "openai_api_key" | "google_vertexai_key" | "ollama_configuration" + | "mistral_api_key" | "qdrant_section" | "qdrant_api_key" | "qdrant_cluster_url" @@ -263,11 +265,18 @@ export const EXTRAS: SettingsExtras = { }, google_vertexai_key: { name: "Google Gemini Generative AI API Key", - desc: "Create an [API Key](https://aistudio.google.com/app/apikey) in [Google's AI Studio](https://aistudio.google.com/) and paste the content here.", + desc: "Create an [API Key](https://aistudio.google.com/app/apikey) in [Google's AI Studio](https://aistudio.google.com/) and paste it here.", default: "", password: true, show: vertexai_enabled, }, + mistral_api_key: { + name: "Mistral AI API Key", + desc: "Create an API Key in the [Mistral AI Console](https://console.mistral.ai/api-keys/) and paste it here.", + default: "", + password: true, + show: mistral_enabled, + }, ollama_configuration: { name: "Ollama Configuration", desc: "This is the configuration for the Ollama LLM API endpoints.", From 472a644d95f1902731e36f84e7ab1a8d18d5481b Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Tue, 12 Mar 2024 14:08:01 +0100 Subject: [PATCH 10/32] llm: adding mistral and more in-depth refactoring --- .../frontend/account/avatar/avatar.tsx | 2 +- src/packages/frontend/account/chatbot.ts | 2 +- .../frontend/account/other-settings.tsx | 2 +- .../account/useLanguageModelSetting.tsx | 2 +- src/packages/frontend/chat/actions.ts | 8 +- src/packages/frontend/client/client.ts | 2 +- .../frontend/client/{openai.ts => llm.ts} | 28 +- src/packages/frontend/client/types.ts | 6 + .../codemirror/extensions/ai-formula.tsx | 12 +- .../components/language-model-icon.tsx | 2 +- src/packages/frontend/components/llm-name.tsx | 2 +- .../markdown-input/mentionable-users.tsx | 2 +- .../frontend/frame-editors/llm/create-chat.ts | 2 +- .../frame-editors/llm/model-switch.tsx | 8 +- .../frame-editors/llm/title-bar-button.tsx | 2 +- .../jupyter/insert-cell/ai-cell-generator.tsx | 4 +- .../frontend/misc/{openai.ts => llm.ts} | 13 +- src/packages/frontend/project/context.tsx | 2 +- .../page/home-page/ai-generate-jupyter.tsx | 6 +- src/packages/frontend/projects/store.ts | 2 +- src/packages/frontend/sagews/chatgpt.ts | 2 +- src/packages/frontend/search/embeddings.ts | 15 +- src/packages/jupyter/types/types.ts | 2 +- .../components/openai/vendor-status-check.tsx | 2 +- src/packages/server/llm/abuse.ts | 2 +- src/packages/server/llm/call-llm.ts | 2 +- src/packages/server/llm/client.ts | 2 +- src/packages/server/llm/embeddings-api.ts | 26 +- src/packages/server/llm/index.ts | 6 +- src/packages/server/llm/mistral.ts | 45 +- src/packages/server/llm/ollama.ts | 24 +- src/packages/server/llm/save-response.ts | 6 +- src/packages/server/llm/vertex-ai-client.ts | 2 +- .../server/purchases/get-service-cost.ts | 2 +- .../server/purchases/is-purchase-allowed.ts | 2 +- src/packages/util/db-schema/index.ts | 4 +- .../{llm.test.ts => llm-utils.test.ts} | 2 +- src/packages/util/db-schema/llm-utils.ts | 456 ++++++++++++ src/packages/util/db-schema/llm.ts | 686 ++++++------------ src/packages/util/db-schema/openai.ts | 246 ------- .../util/db-schema/purchase-quotas.ts | 2 +- src/packages/util/db-schema/purchases.ts | 2 +- src/packages/util/types/llm.ts | 2 +- 43 files changed, 809 insertions(+), 840 deletions(-) rename src/packages/frontend/client/{openai.ts => llm.ts} (92%) create mode 100644 src/packages/frontend/client/types.ts rename src/packages/frontend/misc/{openai.ts => llm.ts} (94%) rename src/packages/util/db-schema/{llm.test.ts => llm-utils.test.ts} (98%) create mode 100644 src/packages/util/db-schema/llm-utils.ts delete mode 100644 src/packages/util/db-schema/openai.ts diff --git a/src/packages/frontend/account/avatar/avatar.tsx b/src/packages/frontend/account/avatar/avatar.tsx index dde3aa35e6..7e4677e682 100644 --- a/src/packages/frontend/account/avatar/avatar.tsx +++ b/src/packages/frontend/account/avatar/avatar.tsx @@ -18,7 +18,7 @@ import { LanguageModelVendorAvatar } from "@cocalc/frontend/components/language- import { ProjectTitle } from "@cocalc/frontend/projects/project-title"; import { DEFAULT_COLOR } from "@cocalc/frontend/users/store"; import { webapp_client } from "@cocalc/frontend/webapp-client"; -import { service2model } from "@cocalc/util/db-schema/llm"; +import { service2model } from "@cocalc/util/db-schema/llm-utils"; import { ensure_bound, startswith, trunc_middle } from "@cocalc/util/misc"; import { avatar_fontcolor } from "./font-color"; diff --git a/src/packages/frontend/account/chatbot.ts b/src/packages/frontend/account/chatbot.ts index bc75425bbf..747d405870 100644 --- a/src/packages/frontend/account/chatbot.ts +++ b/src/packages/frontend/account/chatbot.ts @@ -16,7 +16,7 @@ import { fromOllamaModel, isOllamaLLM, model2vendor, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; // we either check if the prefix is one of the known ones (used in some circumstances) // or if the account id is exactly one of the language models (more precise) diff --git a/src/packages/frontend/account/other-settings.tsx b/src/packages/frontend/account/other-settings.tsx index f31f233210..4d9a392b99 100644 --- a/src/packages/frontend/account/other-settings.tsx +++ b/src/packages/frontend/account/other-settings.tsx @@ -29,7 +29,7 @@ import { getValidLanguageModelName, isFreeModel, model2vendor, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; import { VBAR_EXPLANATION, VBAR_KEY, diff --git a/src/packages/frontend/account/useLanguageModelSetting.tsx b/src/packages/frontend/account/useLanguageModelSetting.tsx index 98cd6bfa3f..e61fbd00b3 100644 --- a/src/packages/frontend/account/useLanguageModelSetting.tsx +++ b/src/packages/frontend/account/useLanguageModelSetting.tsx @@ -6,7 +6,7 @@ import { fromOllamaModel, getValidLanguageModelName, isOllamaLLM, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; export const SETTINGS_LANGUAGE_MODEL_KEY = "language_model"; diff --git a/src/packages/frontend/chat/actions.ts b/src/packages/frontend/chat/actions.ts index 496b69a5a7..47d843e9f6 100644 --- a/src/packages/frontend/chat/actions.ts +++ b/src/packages/frontend/chat/actions.ts @@ -6,23 +6,23 @@ import { fromJS, Map as immutableMap } from "immutable"; import { Actions, redux } from "@cocalc/frontend/app-framework"; +import { History as LanguageModelHistory } from "@cocalc/frontend/client/types"; import type { HashtagState, SelectedHashtags, } from "@cocalc/frontend/editors/task-editor/types"; import { open_new_tab } from "@cocalc/frontend/misc"; -import { History as LanguageModelHistory } from "@cocalc/frontend/misc/openai"; import enableSearchEmbeddings from "@cocalc/frontend/search/embeddings"; import track from "@cocalc/frontend/user-tracking"; import { webapp_client } from "@cocalc/frontend/webapp-client"; import { SyncDB } from "@cocalc/sync/editor/db"; import { + LANGUAGE_MODEL_PREFIXES, getVendorStatusCheckMD, model2service, model2vendor, type LanguageModel, - LANGUAGE_MODEL_PREFIXES, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; import { cmp, isValidUUID, parse_hashtags, uuid } from "@cocalc/util/misc"; import { getSortedDates } from "./chat-log"; import { message_to_markdown } from "./message"; @@ -606,7 +606,7 @@ export class ChatActions extends Actions { setTimeout(() => { this.chatStreams.delete(id); }, 3 * 60 * 1000); - const chatStream = webapp_client.openai_client.languageModelStream({ + const chatStream = webapp_client.openai_client.queryStream({ input, history: reply_to ? this.getChatGPTHistory(reply_to) : undefined, project_id, diff --git a/src/packages/frontend/client/client.ts b/src/packages/frontend/client/client.ts index 0930aae55e..7af303e899 100644 --- a/src/packages/frontend/client/client.ts +++ b/src/packages/frontend/client/client.ts @@ -14,7 +14,7 @@ import { TimeClient } from "./time"; import { AccountClient } from "./account"; import { ProjectClient } from "./project"; import { AdminClient } from "./admin"; -import { LLMClient } from "./openai"; +import { LLMClient } from "./llm"; import { PurchasesClient } from "./purchases"; import { JupyterClient } from "./jupyter"; import { SyncClient } from "@cocalc/sync/client/sync-client"; diff --git a/src/packages/frontend/client/openai.ts b/src/packages/frontend/client/llm.ts similarity index 92% rename from src/packages/frontend/client/openai.ts rename to src/packages/frontend/client/llm.ts index a28f78bd58..2f81730461 100644 --- a/src/packages/frontend/client/openai.ts +++ b/src/packages/frontend/client/llm.ts @@ -7,24 +7,24 @@ import { delay } from "awaiting"; import { EventEmitter } from "events"; import { redux } from "@cocalc/frontend/app-framework"; -import type { History } from "@cocalc/frontend/misc/openai"; // do not import until needed -- it is HUGE! -import { - LanguageModel, - isFreeModel, - model2service, -} from "@cocalc/util/db-schema/llm"; -import type { EmbeddingData } from "@cocalc/util/db-schema/openai"; +import type { EmbeddingData } from "@cocalc/util/db-schema/llm"; import { MAX_EMBEDDINGS_TOKENS, MAX_REMOVE_LIMIT, MAX_SAVE_LIMIT, MAX_SEARCH_LIMIT, -} from "@cocalc/util/db-schema/openai"; +} from "@cocalc/util/db-schema/llm"; +import { + LanguageModel, + isFreeModel, + model2service, +} from "@cocalc/util/db-schema/llm-utils"; import * as message from "@cocalc/util/message"; import type { WebappClient } from "./client"; +import type { History } from "./types"; // do not import until needed -- it is HUGE! const DEFAULT_SYSTEM_PROMPT = - "ASSUME THAT I HAVE FULL ACCESS TO COCALC AND I AM USING COCALC RIGHT NOW. ENCLOSE ALL MATH IN $. INCLUDE THE LANGUAGE DIRECTLY AFTER THE TRIPLE BACKTICKS IN ALL MARKDOWN CODE BLOCKS. BE BRIEF."; + "Assume full access to CoCalc and using CoCalc right now. Enclose all math formulas in $. Include the language directly after the triple backticks in all markdown code blocks. Be brief."; interface EmbeddingsQuery { scope: string | string[]; @@ -42,11 +42,11 @@ export class LLMClient { this.client = client; } - public async chatgpt(opts): Promise { + public async query(opts): Promise { return await this.queryLanguageModel(opts); } - public languageModelStream(opts, startExplicitly = false): ChatStream { + public queryStream(opts, startExplicitly = false): ChatStream { const chatStream = new ChatStream(); (async () => { try { @@ -99,7 +99,7 @@ export class LLMClient { if (!isFreeModel(model)) { // Ollama and others are treated as "free" - const service = model2service(model) ; + const service = model2service(model); // when client gets non-free openai model request, check if allowed. If not, show quota modal. const { allowed, reason } = await this.client.purchases_client.isPurchaseAllowed(service); @@ -123,7 +123,7 @@ export class LLMClient { truncateHistory, truncateMessage, getMaxTokens, - } = await import("@cocalc/frontend/misc/openai"); + } = await import("@cocalc/frontend/misc/llm"); // We always leave some room for output: const maxTokens = getMaxTokens(model) - 1000; input = truncateMessage(input, maxTokens); @@ -226,7 +226,7 @@ export class LLMClient { data: EmbeddingData[]; }): Promise { this.assertHasNeuralSearch(); - const { truncateMessage } = await import("@cocalc/frontend/misc/openai"); + const { truncateMessage } = await import("@cocalc/frontend/misc/llm"); // Make data be data0, but without mutate data0 // and with any text truncated to fit in the diff --git a/src/packages/frontend/client/types.ts b/src/packages/frontend/client/types.ts new file mode 100644 index 0000000000..638b18ca0a --- /dev/null +++ b/src/packages/frontend/client/types.ts @@ -0,0 +1,6 @@ +export interface Message { + role: "assistant" | "user" | "system"; + content: string; +} + +export type History = Message[]; diff --git a/src/packages/frontend/codemirror/extensions/ai-formula.tsx b/src/packages/frontend/codemirror/extensions/ai-formula.tsx index c8f5886c35..7f10b3ba8b 100644 --- a/src/packages/frontend/codemirror/extensions/ai-formula.tsx +++ b/src/packages/frontend/codemirror/extensions/ai-formula.tsx @@ -15,7 +15,7 @@ import ModelSwitch from "@cocalc/frontend/frame-editors/llm/model-switch"; import { show_react_modal } from "@cocalc/frontend/misc"; import track from "@cocalc/frontend/user-tracking"; import { webapp_client } from "@cocalc/frontend/webapp-client"; -import { isFreeModel } from "@cocalc/util/db-schema/llm"; +import { isFreeModel } from "@cocalc/util/db-schema/llm-utils"; import { unreachable } from "@cocalc/util/misc"; type Mode = "tex" | "md"; @@ -53,11 +53,13 @@ function AiGenFormula({ mode, text = "", project_id, cb }: Props) { function getPrompt() { const description = input || text; + const p1 = `Convert the following plain-text description of a formula to a LaTeX formula`; + const p2 = `Return the LaTeX formula, and only the formula. Enclose the formula in a single snippet delimited by $. Do not add any explanations.`; switch (mode) { case "tex": - return `Convert the following plain-text description of a formula to a LaTeX formula in a *.tex file. Assume the package amsmath is available. Only return the LaTeX formula in a single code snippet, delimited by $ or $$. Do not add any explanations:\n\n${description}`; + return `${p1} in a *.tex file. Assume the package "amsmath" is available. ${p2}:\n\n${description}`; case "md": - return `Convert the following plain-text description of a formula to a LaTeX formula in a markdown file. Only return the LaTeX formula in a single code snippet, delimited by $ or $$. Do not add any explanations:\n\n${description}`; + return `${p1} in a markdown file. ${p2}\n\n${description}`; default: unreachable(mode); } @@ -111,12 +113,12 @@ function AiGenFormula({ mode, text = "", project_id, cb }: Props) { setGenerating(true); const tag = `generate-formula`; track("chatgpt", { project_id, tag, mode, type: "generate", model }); - const tex = await webapp_client.openai_client.chatgpt({ + const tex = await webapp_client.openai_client.query({ input: getPrompt(), project_id, tag, model, - system: null, + system: "", }); processFormula(tex); } catch (err) { diff --git a/src/packages/frontend/components/language-model-icon.tsx b/src/packages/frontend/components/language-model-icon.tsx index aba06bae62..0bebaf1dac 100644 --- a/src/packages/frontend/components/language-model-icon.tsx +++ b/src/packages/frontend/components/language-model-icon.tsx @@ -4,7 +4,7 @@ import { isLanguageModel, isOllamaLLM, model2vendor, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; import { unreachable } from "@cocalc/util/misc"; import AIAvatar from "./ai-avatar"; import GoogleGeminiLogo from "./google-gemini-avatar"; diff --git a/src/packages/frontend/components/llm-name.tsx b/src/packages/frontend/components/llm-name.tsx index 6cb9f8464f..b3910d4cdb 100644 --- a/src/packages/frontend/components/llm-name.tsx +++ b/src/packages/frontend/components/llm-name.tsx @@ -5,7 +5,7 @@ import { fromOllamaModel, isLanguageModel, isOllamaLLM, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; import { LanguageModelVendorAvatar } from "./language-model-icon"; export function LLMModelName(props: Readonly<{ model: LanguageModel }>) { diff --git a/src/packages/frontend/editors/markdown-input/mentionable-users.tsx b/src/packages/frontend/editors/markdown-input/mentionable-users.tsx index cc6a302577..591a2dc9f2 100644 --- a/src/packages/frontend/editors/markdown-input/mentionable-users.tsx +++ b/src/packages/frontend/editors/markdown-input/mentionable-users.tsx @@ -18,7 +18,7 @@ import { USER_SELECTABLE_LANGUAGE_MODELS, model2service, toOllamaModel, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; import { cmp, timestamp_cmp, trunc_middle } from "@cocalc/util/misc"; import { OllamaPublic } from "@cocalc/util/types/llm"; import { Item } from "./complete"; diff --git a/src/packages/frontend/frame-editors/llm/create-chat.ts b/src/packages/frontend/frame-editors/llm/create-chat.ts index 0c3b66d5bd..b4682c76b1 100644 --- a/src/packages/frontend/frame-editors/llm/create-chat.ts +++ b/src/packages/frontend/frame-editors/llm/create-chat.ts @@ -42,7 +42,7 @@ export default async function createChat({ } // Truncate input (also this MUST lazy import): const { truncateMessage, getMaxTokens } = await import( - "@cocalc/frontend/misc/openai" + "@cocalc/frontend/misc/llm" ); const maxTokens = getMaxTokens(model) - 1000; // 1000 tokens reserved for output and the prompt below. input = truncateMessage(input, maxTokens); diff --git a/src/packages/frontend/frame-editors/llm/model-switch.tsx b/src/packages/frontend/frame-editors/llm/model-switch.tsx index 26230a28ec..b5bcf5f3cb 100644 --- a/src/packages/frontend/frame-editors/llm/model-switch.tsx +++ b/src/packages/frontend/frame-editors/llm/model-switch.tsx @@ -15,7 +15,7 @@ import { isOllamaLLM, model2service, toOllamaModel, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; import type { OllamaPublic } from "@cocalc/util/types/llm"; export { DEFAULT_MODEL }; @@ -145,9 +145,9 @@ export default function ModelSwitch({ return ( <> - {makeLLMOption(ret, MISTRAL_MODELS[0], `Mistral's "small" model`)} - {makeLLMOption(ret, MISTRAL_MODELS[1], `Mistral's "medium" model`)} - {makeLLMOption(ret, MISTRAL_MODELS[2], `Mistral's "large" model`)} + {makeLLMOption(ret, MISTRAL_MODELS[0], `Mistral AI's "small" model`)} + {makeLLMOption(ret, MISTRAL_MODELS[1], `Mistral AI's "medium" model`)} + {makeLLMOption(ret, MISTRAL_MODELS[2], `Mistral AI's "large" model`)} ); } diff --git a/src/packages/frontend/frame-editors/llm/title-bar-button.tsx b/src/packages/frontend/frame-editors/llm/title-bar-button.tsx index 9f5e78bfff..2958caeeff 100644 --- a/src/packages/frontend/frame-editors/llm/title-bar-button.tsx +++ b/src/packages/frontend/frame-editors/llm/title-bar-button.tsx @@ -445,7 +445,7 @@ async function updateInput( if (input.length > 2000) { // Truncate input (also this MUST be a lazy import): const { truncateMessage, getMaxTokens } = await import( - "@cocalc/frontend/misc/openai" + "@cocalc/frontend/misc/llm" ); const maxTokens = getMaxTokens(model) - 1000; // 1000 tokens reserved for output and the prompt below. input = truncateMessage(input, maxTokens); diff --git a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx index 03c2157b2c..3f6dc90986 100644 --- a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx +++ b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx @@ -20,7 +20,7 @@ import { LanguageModel, getVendorStatusCheckMD, model2vendor, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; import { COLORS } from "@cocalc/util/theme"; import { JupyterActions } from "../browser-actions"; import { insertCell } from "./util"; @@ -230,7 +230,7 @@ async function queryLanguageModel({ let curCellPos = 0; let numCells = 1; - const reply = await webapp_client.openai_client.languageModelStream({ + const reply = await webapp_client.openai_client.queryStream({ input, project_id, path, diff --git a/src/packages/frontend/misc/openai.ts b/src/packages/frontend/misc/llm.ts similarity index 94% rename from src/packages/frontend/misc/openai.ts rename to src/packages/frontend/misc/llm.ts index e6a5abb23b..dd53caecc2 100644 --- a/src/packages/frontend/misc/openai.ts +++ b/src/packages/frontend/misc/llm.ts @@ -1,9 +1,11 @@ // NOTE! This gpt-3-tokenizer is LARGE, e.g., 1.6MB, so be // sure to async load it by clients of this code. -import type { LanguageModel } from "@cocalc/util/db-schema/llm"; -import { getMaxTokens } from "@cocalc/util/db-schema/llm"; import GPT3Tokenizer from "gpt3-tokenizer"; +import type { History } from "@cocalc/frontend/client/types"; +import type { LanguageModel } from "@cocalc/util/db-schema/llm-utils"; +import { getMaxTokens } from "@cocalc/util/db-schema/llm-utils"; + export { getMaxTokens }; // "For an average English text, it's reasonable to assume that each word is @@ -34,13 +36,6 @@ export function numTokensUpperBound( ); } -export interface Message { - role: "assistant" | "user" | "system"; - content: string; -} - -export type History = Message[]; - /* We truncate the message. For performance considerations (see WARNING by numTokensEstimate above), we may sometimes truncate too much text, since we first compute an estimate on the number diff --git a/src/packages/frontend/project/context.tsx b/src/packages/frontend/project/context.tsx index 5d7ff3c0aa..602f46dc7b 100644 --- a/src/packages/frontend/project/context.tsx +++ b/src/packages/frontend/project/context.tsx @@ -13,7 +13,7 @@ import { } from "@cocalc/frontend/app-framework"; import { UserGroup } from "@cocalc/frontend/projects/store"; import { ProjectStatus } from "@cocalc/frontend/todo-types"; -import { LLMServicesAvailable } from "@cocalc/util/db-schema/llm"; +import { LLMServicesAvailable } from "@cocalc/util/db-schema/llm-utils"; import { KUCALC_COCALC_COM, KUCALC_DISABLED, diff --git a/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx b/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx index 4b045ab85f..f8c897db25 100644 --- a/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx +++ b/src/packages/frontend/project/page/home-page/ai-generate-jupyter.tsx @@ -19,7 +19,7 @@ import { useActions, useTypedRedux, } from "@cocalc/frontend/app-framework"; -import { ChatStream } from "@cocalc/frontend/client/openai"; +import { ChatStream } from "@cocalc/frontend/client/llm"; import { A, HelpIcon, @@ -50,7 +50,7 @@ import { once } from "@cocalc/util/async-utils"; import { getVendorStatusCheckMD, model2vendor, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; import { field_cmp, to_iso_path } from "@cocalc/util/misc"; import { COLORS } from "@cocalc/util/theme"; import { ensure_project_running } from "../../project-start-warning"; @@ -158,7 +158,7 @@ export default function AIGenerateJupyterNotebook({ try { setQuerying(true); - const llmStream = webapp_client.openai_client.languageModelStream({ + const llmStream = webapp_client.openai_client.queryStream({ input, project_id, path: current_path, // mainly for analytics / metadata -- can't put the actual notebook path since the model outputs that. diff --git a/src/packages/frontend/projects/store.ts b/src/packages/frontend/projects/store.ts index 427aa7ddb6..78ea9d7a6e 100644 --- a/src/packages/frontend/projects/store.ts +++ b/src/packages/frontend/projects/store.ts @@ -11,7 +11,7 @@ import { StudentProjectFunctionality } from "@cocalc/frontend/course/configurati import { CUSTOM_IMG_PREFIX } from "@cocalc/frontend/custom-software/util"; import { WebsocketState } from "@cocalc/frontend/project/websocket/websocket-state"; import { webapp_client } from "@cocalc/frontend/webapp-client"; -import { LLMServicesAvailable, LLMVendor } from "@cocalc/util/db-schema/llm"; +import { LLMServicesAvailable, LLMVendor } from "@cocalc/util/db-schema/llm-utils"; import { cmp, coerce_codomain_to_numbers, diff --git a/src/packages/frontend/sagews/chatgpt.ts b/src/packages/frontend/sagews/chatgpt.ts index cf60e1f5f8..0a2cb46290 100644 --- a/src/packages/frontend/sagews/chatgpt.ts +++ b/src/packages/frontend/sagews/chatgpt.ts @@ -1,6 +1,6 @@ import { redux } from "@cocalc/frontend/app-framework"; import { getHelp } from "@cocalc/frontend/frame-editors/llm/help-me-fix"; -import { getValidLanguageModelName } from "@cocalc/util/db-schema/llm"; +import { getValidLanguageModelName } from "@cocalc/util/db-schema/llm-utils"; import { MARKERS } from "@cocalc/util/sagews"; import { SETTINGS_LANGUAGE_MODEL_KEY } from "../account/useLanguageModelSetting"; diff --git a/src/packages/frontend/search/embeddings.ts b/src/packages/frontend/search/embeddings.ts index 50c87e5e76..34e7a473e6 100644 --- a/src/packages/frontend/search/embeddings.ts +++ b/src/packages/frontend/search/embeddings.ts @@ -23,14 +23,15 @@ slightly temporary issues of too much or too little data in the search index are not "fatal data loss" for us, since this is just search. */ -import { webapp_client } from "@cocalc/frontend/webapp-client"; -import { debounce } from "lodash"; import jsonStable from "json-stable-stringify"; +import { debounce } from "lodash"; import sha1 from "sha1"; -import { close, copy_with, len, uuidsha1 } from "@cocalc/util/misc"; -import type { EmbeddingData } from "@cocalc/util/db-schema/openai"; + +import { webapp_client } from "@cocalc/frontend/webapp-client"; import type { SyncDB } from "@cocalc/sync/editor/db"; import type { Document } from "@cocalc/sync/editor/generic/types"; +import { EmbeddingData } from "@cocalc/util/db-schema/llm"; +import { close, copy_with, len, uuidsha1 } from "@cocalc/util/misc"; // How long until we update the index, if users stops using this file actively. const DEBOUNCE_MS = 7500; @@ -113,11 +114,11 @@ class Embeddings { } catch (err) { console.warn( `WARNING: issue syncing embeddings for "${this.path}:"`, - err + err, ); this.waitUntil = Date.now() + 60 * 1000; // wait a bit before trying again. } - }, debounceMs) + }, debounceMs), ); syncdb.once("closed", () => { close(this); @@ -141,7 +142,7 @@ class Embeddings { await this.sync(); } catch (err) { console.warn( - `WARNING: issue initializing embeddings for ${this.url}: ${err}` + `WARNING: issue initializing embeddings for ${this.url}: ${err}`, ); } } diff --git a/src/packages/jupyter/types/types.ts b/src/packages/jupyter/types/types.ts index c731608b10..75557a573e 100644 --- a/src/packages/jupyter/types/types.ts +++ b/src/packages/jupyter/types/types.ts @@ -3,7 +3,7 @@ * License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details */ -import { LanguageModel } from "@cocalc/util/db-schema/llm"; +import { LanguageModel } from "@cocalc/util/db-schema/llm-utils"; import type * as immutable from "immutable"; export type NotebookMode = "edit" | "escape"; diff --git a/src/packages/next/components/openai/vendor-status-check.tsx b/src/packages/next/components/openai/vendor-status-check.tsx index df532f887b..cf9721ac09 100644 --- a/src/packages/next/components/openai/vendor-status-check.tsx +++ b/src/packages/next/components/openai/vendor-status-check.tsx @@ -1,4 +1,4 @@ -import { LLMVendor } from "@cocalc/util/db-schema/llm"; +import { LLMVendor } from "@cocalc/util/db-schema/llm-utils"; import { unreachable } from "@cocalc/util/misc"; import A from "components/misc/A"; diff --git a/src/packages/server/llm/abuse.ts b/src/packages/server/llm/abuse.ts index d1f7245b5d..05d5ee5f25 100644 --- a/src/packages/server/llm/abuse.ts +++ b/src/packages/server/llm/abuse.ts @@ -29,7 +29,7 @@ import { isFreeModel, isLanguageModel, model2service, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; import { isValidUUID } from "@cocalc/util/misc"; const QUOTAS = { diff --git a/src/packages/server/llm/call-llm.ts b/src/packages/server/llm/call-llm.ts index f59ebdfe26..55def49201 100644 --- a/src/packages/server/llm/call-llm.ts +++ b/src/packages/server/llm/call-llm.ts @@ -2,7 +2,7 @@ import { delay } from "awaiting"; import type OpenAI from "openai"; import getLogger from "@cocalc/backend/logger"; -import { ModelOpenAI, OpenAIMessages } from "@cocalc/util/db-schema/llm"; +import { ModelOpenAI, OpenAIMessages } from "@cocalc/util/db-schema/llm-utils"; import { ChatOutput } from "@cocalc/util/types/llm"; import { Stream } from "openai/streaming"; import { totalNumTokens } from "./chatgpt-numtokens"; diff --git a/src/packages/server/llm/client.ts b/src/packages/server/llm/client.ts index 7c5455195a..5162f51e0f 100644 --- a/src/packages/server/llm/client.ts +++ b/src/packages/server/llm/client.ts @@ -15,7 +15,7 @@ import { LanguageModel, isOllamaLLM, model2vendor, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; import { unreachable } from "@cocalc/util/misc"; import { VertexAIClient } from "./vertex-ai-client"; diff --git a/src/packages/server/llm/embeddings-api.ts b/src/packages/server/llm/embeddings-api.ts index fe5e251be5..b435537ef6 100644 --- a/src/packages/server/llm/embeddings-api.ts +++ b/src/packages/server/llm/embeddings-api.ts @@ -1,13 +1,13 @@ -import * as embeddings from "./embeddings"; -import { isValidUUID, is_array } from "@cocalc/util/misc"; import isCollaborator from "@cocalc/server/projects/is-collaborator"; -import type { EmbeddingData } from "@cocalc/util/db-schema/openai"; +import type { EmbeddingData } from "@cocalc/util/db-schema/llm"; import { - MAX_SEARCH_TEXT, - MAX_SEARCH_LIMIT, - MAX_SAVE_LIMIT, MAX_REMOVE_LIMIT, -} from "@cocalc/util/db-schema/openai"; + MAX_SAVE_LIMIT, + MAX_SEARCH_LIMIT, + MAX_SEARCH_TEXT, +} from "@cocalc/util/db-schema/llm"; +import { isValidUUID, is_array } from "@cocalc/util/misc"; +import * as embeddings from "./embeddings"; function validateSearchParams({ text, filter, limit, selector, offset }) { if (text != null) { @@ -48,7 +48,7 @@ function validateSearchParams({ text, filter, limit, selector, offset }) { if (selector != null) { if (typeof selector != "object") { throw Error( - "selector must object of the form { include?: string[]; exclude?: string[] }" + "selector must object of the form { include?: string[]; exclude?: string[] }", ); } } @@ -96,7 +96,7 @@ export async function search({ async function scopeFilter( account_id: string, scope: string | string[], - filter: object = {} + filter: object = {}, ): Promise { if (typeof scope != "string" && !is_array(scope)) { throw Error("scope must be a string or string[]"); @@ -123,7 +123,7 @@ async function scopeFilter( !(await isCollaborator({ project_id, account_id })) ) { throw Error( - `must be a collaborator on the project with id '${project_id}'` + `must be a collaborator on the project with id '${project_id}'`, ); } knownProjects.add(project_id); @@ -155,7 +155,7 @@ async function prepareData( project_id: string, path: string, data: EmbeddingData[], - needsText: boolean + needsText: boolean, ): Promise { if (!is_array(data)) { throw Error("data must be an array"); @@ -169,7 +169,7 @@ async function prepareData( for (const { id, text, meta, hash } of data) { if (!id || typeof id != "string") { throw Error( - "you must specify the id for each item and it must be a nonempty string" + "you must specify the id for each item and it must be a nonempty string", ); } if (needsText) { @@ -218,7 +218,7 @@ export async function save({ project_id, path, data, - true + true, ); return await embeddings.save(data2, account_id); } diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts index 7a97d03b1f..c59f276474 100644 --- a/src/packages/server/llm/index.ts +++ b/src/packages/server/llm/index.ts @@ -24,12 +24,12 @@ import { OpenAIMessages, getLLMCost, isFreeModel, - isMistralService, + isMistralModel, isOllamaLLM, isValidModel, model2service, model2vendor, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; import { ChatOptions, ChatOutput, History } from "@cocalc/util/types/llm"; import { checkForAbuse } from "./abuse"; import { callChatGPTAPI } from "./call-llm"; @@ -104,7 +104,7 @@ async function evaluateImpl({ maxTokens, stream, }); - } else if (isMistralService(model)) { + } else if (isMistralModel(model)) { return await evaluateMistral({ system, history, diff --git a/src/packages/server/llm/mistral.ts b/src/packages/server/llm/mistral.ts index 5237c1e5af..bbd9478a73 100644 --- a/src/packages/server/llm/mistral.ts +++ b/src/packages/server/llm/mistral.ts @@ -8,10 +8,7 @@ import { ChatMessageHistory } from "langchain/stores/message/in_memory"; import getLogger from "@cocalc/backend/logger"; import { getServerSettings } from "@cocalc/database/settings"; -import { - fromMistralService, - isMistralService, -} from "@cocalc/util/db-schema/llm"; +import { isMistralModel } from "@cocalc/util/db-schema/llm-utils"; import { ChatOutput, History } from "@cocalc/util/types/llm"; import { AIMessage, HumanMessage } from "@langchain/core/messages"; @@ -29,11 +26,10 @@ interface MistralOpts { export async function evaluateMistral( opts: Readonly, ): Promise { - if (!isMistralService(opts.model)) { + if (!isMistralModel(opts.model)) { throw new Error(`model ${opts.model} not supported`); } - const model = fromMistralService(opts.model); - const { system, history, input, maxTokens, stream } = opts; + const { system, history, input, maxTokens, stream, model } = opts; log.debug("evaluateMistral", { input, @@ -55,20 +51,6 @@ export async function evaluateMistral( throw new Error(`Mistral api key is not configured.`); } - const msgs: ["ai" | "human", string][] = []; - - if (history) { - let nextRole: "model" | "user" = "user"; - for (const { content } of history) { - if (nextRole === "user") { - msgs.push(["human", content]); - } else { - msgs.push(["ai", content]); - } - nextRole = nextRole === "user" ? "model" : "user"; - } - } - const mistral = new ChatMistralAI({ apiKey: mistral_api_key, modelName: model, @@ -76,7 +58,7 @@ export async function evaluateMistral( const prompt = ChatPromptTemplate.fromMessages([ ["system", system ?? ""], - new MessagesPlaceholder("chat_history"), + new MessagesPlaceholder("history"), ["human", "{input}"], ]); @@ -84,12 +66,11 @@ export async function evaluateMistral( const chainWithHistory = new RunnableWithMessageHistory({ runnable: chain, + config: { configurable: { sessionId: "ignored" } }, inputMessagesKey: "input", - historyMessagesKey: "chat_history", + historyMessagesKey: "history", getMessageHistory: async (_) => { const chatHistory = new ChatMessageHistory(); - // await history.addMessage(new HumanMessage("be brief")); - // await history.addMessage(new AIMessage("ok")); if (history) { let nextRole: "model" | "user" = "user"; for (const { content } of history) { @@ -106,16 +87,16 @@ export async function evaluateMistral( }, }); - const chunks = await chainWithHistory.stream( - { input }, - { configurable: { sessionId: "ignored" } }, - ); + const chunks = await chainWithHistory.stream({ input }); let output = ""; for await (const chunk of chunks) { - if (typeof chunk !== "string") continue; - output += chunk; - opts.stream?.(chunk); + const { content } = chunk; + log.debug(typeof chunk, { content, chunk }); + + if (typeof content !== "string") continue; + output += content; + opts.stream?.(content); } // and an empty call when done diff --git a/src/packages/server/llm/ollama.ts b/src/packages/server/llm/ollama.ts index dc059930a7..e7b6d206a1 100644 --- a/src/packages/server/llm/ollama.ts +++ b/src/packages/server/llm/ollama.ts @@ -6,10 +6,10 @@ import { RunnableWithMessageHistory } from "@langchain/core/runnables"; import { ChatMessageHistory } from "langchain/stores/message/in_memory"; import getLogger from "@cocalc/backend/logger"; -import { fromOllamaModel, isOllamaLLM } from "@cocalc/util/db-schema/llm"; +import { fromOllamaModel, isOllamaLLM } from "@cocalc/util/db-schema/llm-utils"; import { ChatOutput, History } from "@cocalc/util/types/llm"; -import { getOllama } from "./client"; import { AIMessage, HumanMessage } from "@langchain/core/messages"; +import { getOllama } from "./client"; const log = getLogger("llm:ollama"); @@ -42,20 +42,6 @@ export async function evaluateOllama( const ollama = await getOllama(model); - const msgs: ["ai" | "human", string][] = []; - - if (history) { - let nextRole: "model" | "user" = "user"; - for (const { content } of history) { - if (nextRole === "user") { - msgs.push(["human", content]); - } else { - msgs.push(["ai", content]); - } - nextRole = nextRole === "user" ? "model" : "user"; - } - } - const prompt = ChatPromptTemplate.fromMessages([ ["system", system ?? ""], new MessagesPlaceholder("chat_history"), @@ -66,6 +52,7 @@ export async function evaluateOllama( const chainWithHistory = new RunnableWithMessageHistory({ runnable: chain, + config: { configurable: { sessionId: "ignored" } }, inputMessagesKey: "input", historyMessagesKey: "chat_history", getMessageHistory: async (_) => { @@ -88,10 +75,7 @@ export async function evaluateOllama( }, }); - const chunks = await chainWithHistory.stream( - { input }, - { configurable: { sessionId: "ignored" } }, - ); + const chunks = await chainWithHistory.stream({ input }); let output = ""; for await (const chunk of chunks) { diff --git a/src/packages/server/llm/save-response.ts b/src/packages/server/llm/save-response.ts index 11a4a489ad..7abb6be462 100644 --- a/src/packages/server/llm/save-response.ts +++ b/src/packages/server/llm/save-response.ts @@ -2,12 +2,12 @@ import getLogger from "@cocalc/backend/logger"; import getPool from "@cocalc/database/pool"; import { pii_retention_to_future } from "@cocalc/database/postgres/pii"; import { getServerSettings } from "@cocalc/database/settings/server-settings"; -import { ChatGPTLogEntry } from "@cocalc/util/db-schema/openai"; +import { LLMLogEntry } from "@cocalc/util/db-schema/llm"; const log = getLogger("llm:save-response"); // time, id is set by the database, and expire in the saveResponse function -type SaveResponseProps = Omit; +type SaveResponseProps = Omit; // Save the response to the database. @@ -29,7 +29,7 @@ export async function saveResponse({ total_time_s, total_tokens, }: SaveResponseProps) { - const expire: ChatGPTLogEntry["expire"] = await getExpiration(account_id); + const expire: LLMLogEntry["expire"] = await getExpiration(account_id); const pool = getPool(); try { await pool.query( diff --git a/src/packages/server/llm/vertex-ai-client.ts b/src/packages/server/llm/vertex-ai-client.ts index c7a8cf360c..ad35007713 100644 --- a/src/packages/server/llm/vertex-ai-client.ts +++ b/src/packages/server/llm/vertex-ai-client.ts @@ -5,7 +5,7 @@ */ import getLogger from "@cocalc/backend/logger"; -import { LanguageModel } from "@cocalc/util/db-schema/llm"; +import { LanguageModel } from "@cocalc/util/db-schema/llm-utils"; import { ChatOutput, History } from "@cocalc/util/types/llm"; import { DiscussServiceClient, diff --git a/src/packages/server/purchases/get-service-cost.ts b/src/packages/server/purchases/get-service-cost.ts index 5efb7daad3..a895c7727d 100644 --- a/src/packages/server/purchases/get-service-cost.ts +++ b/src/packages/server/purchases/get-service-cost.ts @@ -9,7 +9,7 @@ import { getLLMCost, isLanguageModelService, service2model, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; import type { Service } from "@cocalc/util/db-schema/purchases"; import { unreachable } from "@cocalc/util/misc"; diff --git a/src/packages/server/purchases/is-purchase-allowed.ts b/src/packages/server/purchases/is-purchase-allowed.ts index aa36e951d2..4149b2afbe 100644 --- a/src/packages/server/purchases/is-purchase-allowed.ts +++ b/src/packages/server/purchases/is-purchase-allowed.ts @@ -5,7 +5,7 @@ import { getMaxCost, isLanguageModelService, service2model, -} from "@cocalc/util/db-schema/llm"; +} from "@cocalc/util/db-schema/llm-utils"; import { QUOTA_SPEC, Service } from "@cocalc/util/db-schema/purchase-quotas"; import { MAX_COST } from "@cocalc/util/db-schema/purchases"; import { currency, round2up, round2down } from "@cocalc/util/misc"; diff --git a/src/packages/util/db-schema/index.ts b/src/packages/util/db-schema/index.ts index ec3c8ac4c5..ea4eaee6b5 100644 --- a/src/packages/util/db-schema/index.ts +++ b/src/packages/util/db-schema/index.ts @@ -36,10 +36,10 @@ import "./hub-servers"; import "./instances"; // probably deprecated import "./jupyter"; import "./listings"; +import "./llm"; import "./lti"; import "./mentions"; import "./news"; -import "./openai"; import "./organizations"; import "./password-reset"; import "./pg-system"; @@ -50,8 +50,8 @@ import "./project-status"; import "./projects"; import "./public-path-stars"; import "./public-paths"; -import "./purchases"; import "./purchase-quotas"; +import "./purchases"; import "./registration-tokens"; import "./retention"; import "./server-settings"; diff --git a/src/packages/util/db-schema/llm.test.ts b/src/packages/util/db-schema/llm-utils.test.ts similarity index 98% rename from src/packages/util/db-schema/llm.test.ts rename to src/packages/util/db-schema/llm-utils.test.ts index 03d79ab78d..9e5817dffa 100644 --- a/src/packages/util/db-schema/llm.test.ts +++ b/src/packages/util/db-schema/llm-utils.test.ts @@ -7,7 +7,7 @@ import { LLM_COST, OLLAMA_PREFIX, USER_SELECTABLE_LANGUAGE_MODELS, -} from "./llm"; +} from "./llm-utils"; describe("llm", () => { test("isFreeModel", () => { diff --git a/src/packages/util/db-schema/llm-utils.ts b/src/packages/util/db-schema/llm-utils.ts new file mode 100644 index 0000000000..2269fd31e8 --- /dev/null +++ b/src/packages/util/db-schema/llm-utils.ts @@ -0,0 +1,456 @@ +// this contains bits and pieces from the wrongly named openai.ts file + +import type { Service } from "@cocalc/util/db-schema/purchases"; +import { unreachable } from "@cocalc/util/misc"; + +const MODELS_OPENAI = [ + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-4", + "gpt-4-32k", +] as const; + +export type ModelOpenAI = (typeof MODELS_OPENAI)[number]; + +// ATTN: when you modify this list, also change frontend/.../llm/model-switch.tsx! +export const MISTRAL_MODELS = [ + // yes, all 3 of them have an extra mistral-prefix, on top of the vendor prefix + "mistral-small-latest", + "mistral-medium-latest", + "mistral-large-latest", +] as const; + +export type MistralModel = (typeof MISTRAL_MODELS)[number]; + +export function isMistralModel(model: unknown): model is MistralModel { + return MISTRAL_MODELS.includes(model as any); +} + +// the hardcoded list of available language models – there are also dynamic ones, like OllamaLLM objects +export const LANGUAGE_MODELS = [ + ...MODELS_OPENAI, + ...MISTRAL_MODELS, + // google's are taken from here – we use the generative AI client lib + // https://developers.generativeai.google/models/language + "text-bison-001", + "chat-bison-001", + "embedding-gecko-001", + "text-embedding-ada-002", + "gemini-pro", +] as const; + +// This hardcodes which models can be selected by users. +// Make sure to update this when adding new models. +// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx +export const USER_SELECTABLE_LANGUAGE_MODELS = [ + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-4", + // "chat-bison-001", // PaLM2 is not good, replies with no response too often + "gemini-pro", + ...MISTRAL_MODELS, +] as const; + +export type OllamaLLM = string; + +export type LanguageModel = (typeof LANGUAGE_MODELS)[number] | OllamaLLM; + +// we check if the given object is any known language model +export function isLanguageModel(model?: unknown): model is LanguageModel { + if (model == null) return false; + if (typeof model !== "string") return false; + if (isOllamaLLM(model)) return true; + return LANGUAGE_MODELS.includes(model as any); +} + +export interface LLMServicesAvailable { + google: boolean; + openai: boolean; + ollama: boolean; + mistral: boolean; +} + +// this is used in initialization functions. e.g. to get a default model depending on the overall availability +// usually, this should just return the chatgpt3 model, but e.g. if neither google or openai is available, +// then it might even falls back to an available ollama model. It needs to return a string, though, for the frontend, etc. +export function getValidLanguageModelName( + model: string | undefined, + filter: LLMServicesAvailable = { + google: true, + openai: true, + ollama: false, + mistral: false, + }, + ollama: string[] = [], // keys of ollama models +): LanguageModel { + const dftl = + filter.openai === true + ? DEFAULT_MODEL + : filter.ollama && ollama?.length > 0 + ? toOllamaModel(ollama[0]) + : "chat-bison-001"; + if (model == null) { + return dftl; + } + if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) { + return model; + } + if (typeof model === "string" && isLanguageModel(model)) { + return model; + } + return dftl; +} + +export interface OpenAIMessage { + role: "system" | "user" | "assistant"; + content: string; +} +export type OpenAIMessages = OpenAIMessage[]; + +export const OLLAMA_PREFIX = "ollama-"; +export type OllamaService = string; +export function isOllamaService(service: string): service is OllamaService { + return isOllamaLLM(service); +} + +export const MISTRAL_PREFIX = "mistralai-"; +export type MistralService = string; +export function isMistralService(service: string): service is MistralService { + return service.startsWith(MISTRAL_PREFIX); +} + +// we encode the in the frontend and elsewhere with the service name as a prefix +// ATTN: don't change the encoding pattern of [vendor]-[model] +// for whatever reason, it's also described that way in purchases/close.ts +export type LanguageService = + | "openai-gpt-3.5-turbo" + | "openai-gpt-3.5-turbo-16k" + | "openai-gpt-4" + | "openai-gpt-4-32k" + | "openai-text-embedding-ada-002" + | "google-text-bison-001" + | "google-chat-bison-001" + | "google-embedding-gecko-001" + | "google-gemini-pro" + | OllamaService + | MistralService; + +export const LANGUAGE_MODEL_VENDORS = [ + "openai", + "google", + "ollama", + "mistralai", // the "*ai" is deliberately, because their model names start with "mistral-..." and we have to distinguish it from the prefix +] as const; +export type LLMVendor = (typeof LANGUAGE_MODEL_VENDORS)[number]; + +// used e.g. for checking "account-id={string}" and other things like that +export const LANGUAGE_MODEL_PREFIXES = [ + "chatgpt", + ...LANGUAGE_MODEL_VENDORS.map((v) => `${v}-`), +] as const; + +// we encode the in the frontend and elsewhere with the service name as a prefix +export function model2service(model: LanguageModel): LanguageService { + if (model === "text-embedding-ada-002") { + return `openai-${model}`; + } + if (isOllamaLLM(model)) { + return toOllamaModel(model); + } + if (isMistralModel(model)) { + return toMistralService(model); + } + if (isLanguageModel(model)) { + if ( + model === "text-bison-001" || + model === "chat-bison-001" || + model === "embedding-gecko-001" || + model === "gemini-pro" + ) { + return `google-${model}`; + } else { + return `openai-${model}`; + } + } + + throw new Error(`unknown model: ${model}`); +} + +// inverse of model2service, but robust for chat avatars, which might not have a prefix +// TODO: fix the mess +export function service2model( + service: LanguageService | "chatgpt", +): LanguageModel { + if (service === "chatgpt") { + return "gpt-3.5-turbo"; + } + + // split off the first part of service, e.g., "openai-" or "google-" + const s = service.split("-")[0]; + const hasPrefix = + s === "openai" || s === "google" || s === "ollama" || s === "mistral"; + const m = hasPrefix ? service.split("-").slice(1).join("-") : service; + if (hasPrefix && s === "ollama") { + return toOllamaModel(m); + } + if (!LANGUAGE_MODELS.includes(m as any)) { + // We don't throw an error, since the frontend would crash + // throw new Error(`unknown service: ${service}`); + console.warn(`service2model: unknown service: ${service}`); + return "gpt-3.5-turbo"; + } + return m as LanguageModel; +} + +// Note: this must be an OpenAI model – otherwise change the getValidLanguageModelName function +export const DEFAULT_MODEL: LanguageModel = "gpt-3.5-turbo"; + +export function model2vendor(model): LLMVendor { + if (isOllamaLLM(model)) { + return "ollama"; + } else if (isMistralModel(model)) { + return "mistralai"; + } else if (model.startsWith("gpt-")) { + return "openai"; + } else { + return "google"; + } +} + +// wraps the model name in an object that indicates that it's an ollama model +// TODO: maybe it will be necessary at some point to pass in the list of available ollama models +// TODO: in the future, this object will also contain info like the max tokens and other parameters (from the DB) +export function toOllamaModel(model: string): OllamaLLM { + if (isOllamaLLM(model)) { + throw new Error(`already an ollama model: ${model}`); + } + return `${OLLAMA_PREFIX}${model}`; +} + +// unwraps the model name from an object that indicates that it's an ollama model +export function fromOllamaModel(model: OllamaLLM) { + if (!isOllamaLLM(model)) { + throw new Error(`not an ollama model: ${model}`); + } + return model.slice(OLLAMA_PREFIX.length); +} + +export function isOllamaLLM(model: unknown): model is OllamaLLM { + return ( + typeof model === "string" && + model.startsWith(OLLAMA_PREFIX) && + model.length > OLLAMA_PREFIX.length + ); +} + +export function toMistralService(model: string): MistralService { + if (isMistralService(model)) { + throw new Error(`already a mistral model: ${model}`); + } + return `${MISTRAL_PREFIX}${model}`; +} + +export function fromMistralService(model: MistralService) { + if (!isMistralService(model)) { + throw new Error(`not a mistral model: ${model}`); + } + return model.slice(MISTRAL_PREFIX.length); +} + +// Map from psuedo account_id to what should be displayed to user. +// This is used in various places in the frontend. +// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing +export const LLM_USERNAMES: { + [key in + | (typeof USER_SELECTABLE_LANGUAGE_MODELS)[number] + | "chatgpt" // some additional ones, backwards compatibility + | "chatgpt3" + | "chatgpt4" + | "gpt-4-32k" + | "text-bison-001" + | "chat-bison-001"]: string; +} = { + chatgpt: "GPT-3.5", + chatgpt3: "GPT-3.5", + chatgpt4: "GPT-4", + "gpt-4": "GPT-4", + "gpt-4-32k": "GPT-4-32k", + "gpt-3.5-turbo": "GPT-3.5", + "gpt-3.5-turbo-16k": "GPT-3.5-16k", + "text-bison-001": "PaLM 2", + "chat-bison-001": "PaLM 2", + "gemini-pro": "Gemini Pro", + "mistral-small-latest": "Mistral AI Small", + "mistral-medium-latest": "Mistral AI Medium", + "mistral-large-latest": "Mistral AI Large", +} as const; + +export function isFreeModel(model: unknown) { + if (isOllamaLLM(model)) return true; + if (isMistralModel(model)) return true; + if (LANGUAGE_MODELS.includes(model as any)) { + // of these models, the following are free + return ( + (model as LanguageModel) == "gpt-3.5-turbo" || + (model as LanguageModel) == "text-bison-001" || + (model as LanguageModel) == "chat-bison-001" || + (model as LanguageModel) == "embedding-gecko-001" || + (model as LanguageModel) == "gemini-pro" + ); + } + // all others are free + return true; +} + +// this is used in purchases/get-service-cost +// we only need to check for the vendor prefixes, no special cases! +export function isLanguageModelService( + service: Service, +): service is LanguageService { + for (const v of LANGUAGE_MODEL_VENDORS) { + if (service.startsWith(`${v}-`)) { + return true; + } + } + return false; +} + +export function getVendorStatusCheckMD(vendor: LLMVendor): string { + switch (vendor) { + case "openai": + return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`; + case "google": + return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`; + case "ollama": + return `No status information for Ollama available – you have to check with the particular backend for the model.`; + case "mistralai": + return `No status information for Mistral AI available.`; + default: + unreachable(vendor); + } + return ""; +} + +export function llmSupportsStreaming(model: LanguageModel): boolean { + return ( + model2vendor(model) === "openai" || + model === "gemini-pro" || + model2vendor(model) === "mistralai" + ); +} + +interface Cost { + prompt_tokens: number; + completion_tokens: number; + max_tokens: number; +} + +// This is the official published cost that openai charges. +// It changes over time, so this will sometimes need to be updated. +// Our cost is a configurable multiple of this. +// https://openai.com/pricing#language-models +// There appears to be no api that provides the prices, unfortunately. +export const LLM_COST: { [name in string]: Cost } = { + "gpt-4": { + prompt_tokens: 0.03 / 1000, + completion_tokens: 0.06 / 1000, + max_tokens: 8192, + }, + "gpt-4-32k": { + prompt_tokens: 0.06 / 1000, + completion_tokens: 0.12 / 1000, + max_tokens: 32768, + }, + "gpt-3.5-turbo": { + prompt_tokens: 0.0015 / 1000, + completion_tokens: 0.002 / 1000, + max_tokens: 4096, + }, + "gpt-3.5-turbo-16k": { + prompt_tokens: 0.003 / 1000, + completion_tokens: 0.004 / 1000, + max_tokens: 16384, + }, + "text-embedding-ada-002": { + prompt_tokens: 0.0001 / 1000, + completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings + max_tokens: 8191, + }, + // https://developers.generativeai.google/models/language + "text-bison-001": { + // we assume 5 characters is 1 token on average + prompt_tokens: (5 * 0.0005) / 1000, + completion_tokens: (5 * 0.0005) / 1000, + max_tokens: 8196, + }, + "chat-bison-001": { + // we assume 5 characters is 1 token on average + prompt_tokens: (5 * 0.0005) / 1000, + completion_tokens: (5 * 0.0005) / 1000, + max_tokens: 8196, + }, + "embedding-gecko-001": { + prompt_tokens: (5 * 0.0001) / 1000, + completion_tokens: 0, + max_tokens: 8196, // ??? + }, + "gemini-pro": { + // https://ai.google.dev/models/gemini + prompt_tokens: (5 * 0.0001) / 1000, + completion_tokens: 0, + max_tokens: 30720, + }, +} as const; + +export function isValidModel(model?: string): boolean { + if (model == null) return false; + if (isOllamaLLM(model)) return true; + if (isMistralModel(model)) return true; + return LLM_COST[model ?? ""] != null; +} + +export function getMaxTokens(model?: LanguageModel): number { + // TODO: store max tokens in the model object itself, this is just a fallback + if (isOllamaLLM(model)) return 8192; + if (isMistralModel(model)) return 4096; // TODO: check with MistralAI + return LLM_COST[model ?? ""]?.max_tokens ?? 4096; +} + +export interface LLMCost { + prompt_tokens: number; + completion_tokens: number; +} + +export function getLLMCost( + model: LanguageModel, + markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.3 +): LLMCost { + const x = LLM_COST[model]; + if (x == null) { + throw Error(`unknown model "${model}"`); + } + const { prompt_tokens, completion_tokens } = x; + if (markup_percentage < 0) { + throw Error("markup percentage can't be negative"); + } + const f = 1 + markup_percentage / 100; + return { + prompt_tokens: prompt_tokens * f, + completion_tokens: completion_tokens * f, + }; +} + +// The maximum cost for one single call using the given model. +// We can't know the cost until after it happens, so this bound is useful for +// ensuring user can afford to make a call. +export function getMaxCost( + model: LanguageModel, + markup_percentage: number, +): number { + const { prompt_tokens, completion_tokens } = getLLMCost( + model, + markup_percentage, + ); + const { max_tokens } = LLM_COST[model]; + return Math.max(prompt_tokens, completion_tokens) * max_tokens; +} diff --git a/src/packages/util/db-schema/llm.ts b/src/packages/util/db-schema/llm.ts index 2269fd31e8..ee72184661 100644 --- a/src/packages/util/db-schema/llm.ts +++ b/src/packages/util/db-schema/llm.ts @@ -1,456 +1,246 @@ -// this contains bits and pieces from the wrongly named openai.ts file - -import type { Service } from "@cocalc/util/db-schema/purchases"; -import { unreachable } from "@cocalc/util/misc"; - -const MODELS_OPENAI = [ - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-4", - "gpt-4-32k", -] as const; - -export type ModelOpenAI = (typeof MODELS_OPENAI)[number]; - -// ATTN: when you modify this list, also change frontend/.../llm/model-switch.tsx! -export const MISTRAL_MODELS = [ - // yes, all 3 of them have an extra mistral-prefix, on top of the vendor prefix - "mistral-small-latest", - "mistral-medium-latest", - "mistral-large-latest", -] as const; - -export type MistralModel = (typeof MISTRAL_MODELS)[number]; - -export function isMistralModel(model: unknown): model is MistralModel { - return MISTRAL_MODELS.includes(model as any); -} - -// the hardcoded list of available language models – there are also dynamic ones, like OllamaLLM objects -export const LANGUAGE_MODELS = [ - ...MODELS_OPENAI, - ...MISTRAL_MODELS, - // google's are taken from here – we use the generative AI client lib - // https://developers.generativeai.google/models/language - "text-bison-001", - "chat-bison-001", - "embedding-gecko-001", - "text-embedding-ada-002", - "gemini-pro", -] as const; - -// This hardcodes which models can be selected by users. -// Make sure to update this when adding new models. -// This is used in e.g. mentionable-users.tsx, model-switch.tsx and other-settings.tsx -export const USER_SELECTABLE_LANGUAGE_MODELS = [ - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-4", - // "chat-bison-001", // PaLM2 is not good, replies with no response too often - "gemini-pro", - ...MISTRAL_MODELS, -] as const; - -export type OllamaLLM = string; - -export type LanguageModel = (typeof LANGUAGE_MODELS)[number] | OllamaLLM; - -// we check if the given object is any known language model -export function isLanguageModel(model?: unknown): model is LanguageModel { - if (model == null) return false; - if (typeof model !== "string") return false; - if (isOllamaLLM(model)) return true; - return LANGUAGE_MODELS.includes(model as any); -} - -export interface LLMServicesAvailable { - google: boolean; - openai: boolean; - ollama: boolean; - mistral: boolean; -} - -// this is used in initialization functions. e.g. to get a default model depending on the overall availability -// usually, this should just return the chatgpt3 model, but e.g. if neither google or openai is available, -// then it might even falls back to an available ollama model. It needs to return a string, though, for the frontend, etc. -export function getValidLanguageModelName( - model: string | undefined, - filter: LLMServicesAvailable = { - google: true, - openai: true, - ollama: false, - mistral: false, - }, - ollama: string[] = [], // keys of ollama models -): LanguageModel { - const dftl = - filter.openai === true - ? DEFAULT_MODEL - : filter.ollama && ollama?.length > 0 - ? toOllamaModel(ollama[0]) - : "chat-bison-001"; - if (model == null) { - return dftl; - } - if (isOllamaLLM(model) && ollama.includes(fromOllamaModel(model))) { - return model; - } - if (typeof model === "string" && isLanguageModel(model)) { - return model; - } - return dftl; -} - -export interface OpenAIMessage { - role: "system" | "user" | "assistant"; - content: string; -} -export type OpenAIMessages = OpenAIMessage[]; - -export const OLLAMA_PREFIX = "ollama-"; -export type OllamaService = string; -export function isOllamaService(service: string): service is OllamaService { - return isOllamaLLM(service); -} - -export const MISTRAL_PREFIX = "mistralai-"; -export type MistralService = string; -export function isMistralService(service: string): service is MistralService { - return service.startsWith(MISTRAL_PREFIX); -} - -// we encode the in the frontend and elsewhere with the service name as a prefix -// ATTN: don't change the encoding pattern of [vendor]-[model] -// for whatever reason, it's also described that way in purchases/close.ts -export type LanguageService = - | "openai-gpt-3.5-turbo" - | "openai-gpt-3.5-turbo-16k" - | "openai-gpt-4" - | "openai-gpt-4-32k" - | "openai-text-embedding-ada-002" - | "google-text-bison-001" - | "google-chat-bison-001" - | "google-embedding-gecko-001" - | "google-gemini-pro" - | OllamaService - | MistralService; - -export const LANGUAGE_MODEL_VENDORS = [ - "openai", - "google", - "ollama", - "mistralai", // the "*ai" is deliberately, because their model names start with "mistral-..." and we have to distinguish it from the prefix -] as const; -export type LLMVendor = (typeof LANGUAGE_MODEL_VENDORS)[number]; - -// used e.g. for checking "account-id={string}" and other things like that -export const LANGUAGE_MODEL_PREFIXES = [ - "chatgpt", - ...LANGUAGE_MODEL_VENDORS.map((v) => `${v}-`), -] as const; - -// we encode the in the frontend and elsewhere with the service name as a prefix -export function model2service(model: LanguageModel): LanguageService { - if (model === "text-embedding-ada-002") { - return `openai-${model}`; - } - if (isOllamaLLM(model)) { - return toOllamaModel(model); - } - if (isMistralModel(model)) { - return toMistralService(model); - } - if (isLanguageModel(model)) { - if ( - model === "text-bison-001" || - model === "chat-bison-001" || - model === "embedding-gecko-001" || - model === "gemini-pro" - ) { - return `google-${model}`; - } else { - return `openai-${model}`; - } - } - - throw new Error(`unknown model: ${model}`); -} - -// inverse of model2service, but robust for chat avatars, which might not have a prefix -// TODO: fix the mess -export function service2model( - service: LanguageService | "chatgpt", -): LanguageModel { - if (service === "chatgpt") { - return "gpt-3.5-turbo"; - } - - // split off the first part of service, e.g., "openai-" or "google-" - const s = service.split("-")[0]; - const hasPrefix = - s === "openai" || s === "google" || s === "ollama" || s === "mistral"; - const m = hasPrefix ? service.split("-").slice(1).join("-") : service; - if (hasPrefix && s === "ollama") { - return toOllamaModel(m); - } - if (!LANGUAGE_MODELS.includes(m as any)) { - // We don't throw an error, since the frontend would crash - // throw new Error(`unknown service: ${service}`); - console.warn(`service2model: unknown service: ${service}`); - return "gpt-3.5-turbo"; - } - return m as LanguageModel; -} - -// Note: this must be an OpenAI model – otherwise change the getValidLanguageModelName function -export const DEFAULT_MODEL: LanguageModel = "gpt-3.5-turbo"; - -export function model2vendor(model): LLMVendor { - if (isOllamaLLM(model)) { - return "ollama"; - } else if (isMistralModel(model)) { - return "mistralai"; - } else if (model.startsWith("gpt-")) { - return "openai"; - } else { - return "google"; - } -} - -// wraps the model name in an object that indicates that it's an ollama model -// TODO: maybe it will be necessary at some point to pass in the list of available ollama models -// TODO: in the future, this object will also contain info like the max tokens and other parameters (from the DB) -export function toOllamaModel(model: string): OllamaLLM { - if (isOllamaLLM(model)) { - throw new Error(`already an ollama model: ${model}`); - } - return `${OLLAMA_PREFIX}${model}`; -} - -// unwraps the model name from an object that indicates that it's an ollama model -export function fromOllamaModel(model: OllamaLLM) { - if (!isOllamaLLM(model)) { - throw new Error(`not an ollama model: ${model}`); - } - return model.slice(OLLAMA_PREFIX.length); -} - -export function isOllamaLLM(model: unknown): model is OllamaLLM { - return ( - typeof model === "string" && - model.startsWith(OLLAMA_PREFIX) && - model.length > OLLAMA_PREFIX.length - ); -} - -export function toMistralService(model: string): MistralService { - if (isMistralService(model)) { - throw new Error(`already a mistral model: ${model}`); - } - return `${MISTRAL_PREFIX}${model}`; -} - -export function fromMistralService(model: MistralService) { - if (!isMistralService(model)) { - throw new Error(`not a mistral model: ${model}`); - } - return model.slice(MISTRAL_PREFIX.length); -} - -// Map from psuedo account_id to what should be displayed to user. -// This is used in various places in the frontend. -// Google PaLM: https://cloud.google.com/vertex-ai/docs/generative-ai/pricing -export const LLM_USERNAMES: { - [key in - | (typeof USER_SELECTABLE_LANGUAGE_MODELS)[number] - | "chatgpt" // some additional ones, backwards compatibility - | "chatgpt3" - | "chatgpt4" - | "gpt-4-32k" - | "text-bison-001" - | "chat-bison-001"]: string; -} = { - chatgpt: "GPT-3.5", - chatgpt3: "GPT-3.5", - chatgpt4: "GPT-4", - "gpt-4": "GPT-4", - "gpt-4-32k": "GPT-4-32k", - "gpt-3.5-turbo": "GPT-3.5", - "gpt-3.5-turbo-16k": "GPT-3.5-16k", - "text-bison-001": "PaLM 2", - "chat-bison-001": "PaLM 2", - "gemini-pro": "Gemini Pro", - "mistral-small-latest": "Mistral AI Small", - "mistral-medium-latest": "Mistral AI Medium", - "mistral-large-latest": "Mistral AI Large", -} as const; - -export function isFreeModel(model: unknown) { - if (isOllamaLLM(model)) return true; - if (isMistralModel(model)) return true; - if (LANGUAGE_MODELS.includes(model as any)) { - // of these models, the following are free - return ( - (model as LanguageModel) == "gpt-3.5-turbo" || - (model as LanguageModel) == "text-bison-001" || - (model as LanguageModel) == "chat-bison-001" || - (model as LanguageModel) == "embedding-gecko-001" || - (model as LanguageModel) == "gemini-pro" - ); - } - // all others are free - return true; -} - -// this is used in purchases/get-service-cost -// we only need to check for the vendor prefixes, no special cases! -export function isLanguageModelService( - service: Service, -): service is LanguageService { - for (const v of LANGUAGE_MODEL_VENDORS) { - if (service.startsWith(`${v}-`)) { - return true; - } - } - return false; -} - -export function getVendorStatusCheckMD(vendor: LLMVendor): string { - switch (vendor) { - case "openai": - return `OpenAI [status](https://status.openai.com) and [downdetector](https://downdetector.com/status/openai).`; - case "google": - return `Google [status](https://status.cloud.google.com) and [downdetector](https://downdetector.com/status/google-cloud).`; - case "ollama": - return `No status information for Ollama available – you have to check with the particular backend for the model.`; - case "mistralai": - return `No status information for Mistral AI available.`; - default: - unreachable(vendor); - } - return ""; -} - -export function llmSupportsStreaming(model: LanguageModel): boolean { - return ( - model2vendor(model) === "openai" || - model === "gemini-pro" || - model2vendor(model) === "mistralai" - ); -} - -interface Cost { +// NOTE: this is not just OpenAI, but also includes other models that we use +// Mentally, just ignore "openai" and instead focus on "gpt-*" or "codey" or whatever they are called. +// TODO: refactor this, the names of the tables, etc. to be more generic. + +import { History } from "@cocalc/util/types/llm"; +import { CREATED_BY, ID } from "./crm"; +import { SCHEMA as schema } from "./index"; +import { LanguageModel } from "./llm-utils"; +import { Table } from "./types"; + +export interface LLMLogEntry { + id: number; + account_id?: string; + analytics_cookie?: string; // at least one of analytics_cookie or account_id will be set + expire?: Date; + history?: History; + input: string; + model?: LanguageModel; + output: string; + path?: string; + project_id?: string; prompt_tokens: number; - completion_tokens: number; - max_tokens: number; -} - -// This is the official published cost that openai charges. -// It changes over time, so this will sometimes need to be updated. -// Our cost is a configurable multiple of this. -// https://openai.com/pricing#language-models -// There appears to be no api that provides the prices, unfortunately. -export const LLM_COST: { [name in string]: Cost } = { - "gpt-4": { - prompt_tokens: 0.03 / 1000, - completion_tokens: 0.06 / 1000, - max_tokens: 8192, - }, - "gpt-4-32k": { - prompt_tokens: 0.06 / 1000, - completion_tokens: 0.12 / 1000, - max_tokens: 32768, + system?: string; + tag?: string; // useful for keeping track of where queries come frome when doing analytics later + time: Date; + total_time_s: number; // how long the request took in s + total_tokens: number; +} + +Table({ + name: "openai_chatgpt_log", // historically a wrong name, don't change it + fields: { + id: ID, + time: { type: "timestamp", desc: "When this particular chat happened." }, + analytics_cookie: { + title: "Analytics Cookie", + type: "string", + desc: "The analytics cookie for the user that asked this question.", + }, + account_id: CREATED_BY, + system: { + title: "System Context", + type: "string", + desc: "System context prompt.", + render: { + type: "markdown", + }, + }, + input: { + title: "Input", + type: "string", + desc: "Input text that was sent to chatgpt", + render: { + type: "markdown", + }, + }, + output: { + title: "Output", + type: "string", + desc: "Output text that was returned from chatgpt", + render: { + type: "markdown", + }, + }, + history: { + title: "History", + type: "array", + pg_type: "JSONB[]", + desc: "Historical context for this thread of discussion", + render: { + type: "json", + }, + }, + total_tokens: { + type: "integer", + desc: "The total number of tokens involved in this API call.", + }, + prompt_tokens: { + type: "integer", + desc: "The number of tokens in the prompt.", + }, + total_time_s: { + type: "number", + desc: "Total amount of time the API call took in seconds.", + }, + project_id: { + type: "uuid", + render: { type: "project_link" }, + }, + path: { + type: "string", + }, + expire: { + type: "timestamp", + desc: "optional future date, when the entry will be deleted", + }, + model: { + type: "string", + }, + tag: { + type: "string", + desc: "A string that the client can include that is useful for analytics later", + }, }, - "gpt-3.5-turbo": { - prompt_tokens: 0.0015 / 1000, - completion_tokens: 0.002 / 1000, - max_tokens: 4096, + rules: { + desc: "Language Model Log", + primary_key: "id", + pg_indexes: ["account_id", "analytics_cookie", "time"], + user_query: { + get: { + pg_where: [{ "account_id = $::UUID": "account_id" }], + fields: { + id: null, + time: null, + account_id: null, + input: null, + system: null, + output: null, + total_tokens: null, + prompt_tokens: null, + total_time_s: null, + project_id: null, + path: null, + history: null, + expire: null, + model: null, + tag: null, + }, + }, + set: { + // this is so that a user can expire any chats they wanted to have expunged from + // the system completely. + fields: { + account_id: "account_id", + id: true, + expire: true, + }, + }, + }, }, - "gpt-3.5-turbo-16k": { - prompt_tokens: 0.003 / 1000, - completion_tokens: 0.004 / 1000, - max_tokens: 16384, +}); + +Table({ + name: "crm_openai_chatgpt_log", + rules: { + virtual: "openai_chatgpt_log", + primary_key: "id", + user_query: { + get: { + pg_where: [], + admin: true, + fields: { + id: null, + time: null, + account_id: null, + analytics_cookie: null, + input: null, + system: null, + output: null, + total_tokens: null, + prompt_tokens: null, + total_time_s: null, + project_id: null, + path: null, + history: null, + model: null, + tag: null, + }, + }, + }, }, - "text-embedding-ada-002": { - prompt_tokens: 0.0001 / 1000, - completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings - max_tokens: 8191, + fields: schema.openai_chatgpt_log.fields, +}); + +export interface EmbeddingData { + id: string; // fragment id, i.e., exactly what is after the # in the url + text?: string; // test that is embedded using a model + meta?: object; // extra metadata + hash?: string; // hash that is used to know when we need to update the point; e.g., hash of text and meta. +} + +// *technical* limit is 8K tokens, but there's no good reason for a search to be really longthere's no good reason for a search to be really long, +// and it could be costly. +export const MAX_SEARCH_TEXT = 4000; +// Limit on the number of outputs when doing a search. This should stay under 10MB total, +// to avoid message size limits. Use paging for more, which app client automatically does. +export const MAX_SEARCH_LIMIT = 200; + +// Maximum number of distinct embeddings that a single client can save at once. +// The app client itself will automatically chunk the saves at this size. +export const MAX_SAVE_LIMIT = 50; +// Similar limit on removing items; can be larger since no vector embedding computation, etc. +export const MAX_REMOVE_LIMIT = 100; +// See https://platform.openai.com/docs/guides/embeddings/what-are-embeddings +export const MAX_EMBEDDINGS_TOKENS = 8191; + +Table({ + name: "openai_embedding_log", + fields: { + id: ID, + time: { type: "timestamp", desc: "When this particular chat happened." }, + account_id: CREATED_BY, + tokens: { + type: "integer", + desc: "The total number of tokens of the input.", + }, + model: { + type: "string", + desc: "The model that was used; if left blank it is assumed to be text-embedding-ada-002.", + }, }, - // https://developers.generativeai.google/models/language - "text-bison-001": { - // we assume 5 characters is 1 token on average - prompt_tokens: (5 * 0.0005) / 1000, - completion_tokens: (5 * 0.0005) / 1000, - max_tokens: 8196, + rules: { + desc: "OpenAI Vector Embedding Log. This logs who is responsible for calls to openai. It is used to avoid abuse, have good analytics, and may eventually be used for pay-as-you-go, etc.", + primary_key: "id", + pg_indexes: ["((tokens IS NOT NULL))"], }, - "chat-bison-001": { - // we assume 5 characters is 1 token on average - prompt_tokens: (5 * 0.0005) / 1000, - completion_tokens: (5 * 0.0005) / 1000, - max_tokens: 8196, +}); + +Table({ + name: "openai_embedding_cache", + fields: { + input_sha1: { + title: "Sha1 hash of input", + type: "string", + pg_type: "char(40)", + }, + vector: { + type: "array", + pg_type: "double precision[]", + desc: "The vector obtained from openai.", + }, + model: { + type: "string", + desc: "The model that was used; if left blank it is assumed to be text-embedding-ada-002.", + }, + expire: { + type: "timestamp", + desc: "Date when the cache entry will be deleted. Some entries correspond to queries users type, so may be very frequent, or content in shared notebooks (e.g., students in class), so caching is very valuable when it is actively happening. Others don't get accessed, so we free up the space.", + }, }, - "embedding-gecko-001": { - prompt_tokens: (5 * 0.0001) / 1000, - completion_tokens: 0, - max_tokens: 8196, // ??? + rules: { + desc: "OpenAI Vector Embedding Cache. This is a cache of embeddings that we computed using openai. It helps us avoid having to recompute embeddings, which costs money and takes time. It is only used as a CACHE by our system. This entire table could be deleted at any time, and the only impact is that some things may be slower and we may have to pay to recompute embeddings, but nothing should *break*.", + primary_key: "input_sha1", + pg_indexes: ["((vector IS NOT NULL))"], }, - "gemini-pro": { - // https://ai.google.dev/models/gemini - prompt_tokens: (5 * 0.0001) / 1000, - completion_tokens: 0, - max_tokens: 30720, - }, -} as const; - -export function isValidModel(model?: string): boolean { - if (model == null) return false; - if (isOllamaLLM(model)) return true; - if (isMistralModel(model)) return true; - return LLM_COST[model ?? ""] != null; -} - -export function getMaxTokens(model?: LanguageModel): number { - // TODO: store max tokens in the model object itself, this is just a fallback - if (isOllamaLLM(model)) return 8192; - if (isMistralModel(model)) return 4096; // TODO: check with MistralAI - return LLM_COST[model ?? ""]?.max_tokens ?? 4096; -} - -export interface LLMCost { - prompt_tokens: number; - completion_tokens: number; -} - -export function getLLMCost( - model: LanguageModel, - markup_percentage: number, // a number like "30" would mean that we increase the wholesale price by multiplying by 1.3 -): LLMCost { - const x = LLM_COST[model]; - if (x == null) { - throw Error(`unknown model "${model}"`); - } - const { prompt_tokens, completion_tokens } = x; - if (markup_percentage < 0) { - throw Error("markup percentage can't be negative"); - } - const f = 1 + markup_percentage / 100; - return { - prompt_tokens: prompt_tokens * f, - completion_tokens: completion_tokens * f, - }; -} - -// The maximum cost for one single call using the given model. -// We can't know the cost until after it happens, so this bound is useful for -// ensuring user can afford to make a call. -export function getMaxCost( - model: LanguageModel, - markup_percentage: number, -): number { - const { prompt_tokens, completion_tokens } = getLLMCost( - model, - markup_percentage, - ); - const { max_tokens } = LLM_COST[model]; - return Math.max(prompt_tokens, completion_tokens) * max_tokens; -} +}); diff --git a/src/packages/util/db-schema/openai.ts b/src/packages/util/db-schema/openai.ts deleted file mode 100644 index 8a1b8e5b3d..0000000000 --- a/src/packages/util/db-schema/openai.ts +++ /dev/null @@ -1,246 +0,0 @@ -// NOTE: this is not just OpenAI, but also includes other models that we use -// Mentally, just ignore "openai" and instead focus on "gpt-*" or "codey" or whatever they are called. -// TODO: refactor this, the names of the tables, etc. to be more generic. - -import { History } from "@cocalc/util/types/llm"; -import { CREATED_BY, ID } from "./crm"; -import { SCHEMA as schema } from "./index"; -import { LanguageModel } from "./llm"; -import { Table } from "./types"; - -export interface ChatGPTLogEntry { - id: number; - account_id?: string; - analytics_cookie?: string; // at least one of analytics_cookie or account_id will be set - expire?: Date; - history?: History; - input: string; - model?: LanguageModel; - output: string; - path?: string; - project_id?: string; - prompt_tokens: number; - system?: string; - tag?: string; // useful for keeping track of where queries come frome when doing analytics later - time: Date; - total_time_s: number; // how long the request took in s - total_tokens: number; -} - -Table({ - name: "openai_chatgpt_log", - fields: { - id: ID, - time: { type: "timestamp", desc: "When this particular chat happened." }, - analytics_cookie: { - title: "Analytics Cookie", - type: "string", - desc: "The analytics cookie for the user that asked this question.", - }, - account_id: CREATED_BY, - system: { - title: "System Context", - type: "string", - desc: "System context prompt.", - render: { - type: "markdown", - }, - }, - input: { - title: "Input", - type: "string", - desc: "Input text that was sent to chatgpt", - render: { - type: "markdown", - }, - }, - output: { - title: "Output", - type: "string", - desc: "Output text that was returned from chatgpt", - render: { - type: "markdown", - }, - }, - history: { - title: "History", - type: "array", - pg_type: "JSONB[]", - desc: "Historical context for this thread of discussion", - render: { - type: "json", - }, - }, - total_tokens: { - type: "integer", - desc: "The total number of tokens involved in this API call.", - }, - prompt_tokens: { - type: "integer", - desc: "The number of tokens in the prompt.", - }, - total_time_s: { - type: "number", - desc: "Total amount of time the API call took in seconds.", - }, - project_id: { - type: "uuid", - render: { type: "project_link" }, - }, - path: { - type: "string", - }, - expire: { - type: "timestamp", - desc: "optional future date, when the entry will be deleted", - }, - model: { - type: "string", - }, - tag: { - type: "string", - desc: "A string that the client can include that is useful for analytics later", - }, - }, - rules: { - desc: "Language Model Log", - primary_key: "id", - pg_indexes: ["account_id", "analytics_cookie", "time"], - user_query: { - get: { - pg_where: [{ "account_id = $::UUID": "account_id" }], - fields: { - id: null, - time: null, - account_id: null, - input: null, - system: null, - output: null, - total_tokens: null, - prompt_tokens: null, - total_time_s: null, - project_id: null, - path: null, - history: null, - expire: null, - model: null, - tag: null, - }, - }, - set: { - // this is so that a user can expire any chats they wanted to have expunged from - // the system completely. - fields: { - account_id: "account_id", - id: true, - expire: true, - }, - }, - }, - }, -}); - -Table({ - name: "crm_openai_chatgpt_log", - rules: { - virtual: "openai_chatgpt_log", - primary_key: "id", - user_query: { - get: { - pg_where: [], - admin: true, - fields: { - id: null, - time: null, - account_id: null, - analytics_cookie: null, - input: null, - system: null, - output: null, - total_tokens: null, - prompt_tokens: null, - total_time_s: null, - project_id: null, - path: null, - history: null, - model: null, - tag: null, - }, - }, - }, - }, - fields: schema.openai_chatgpt_log.fields, -}); - -export interface EmbeddingData { - id: string; // fragment id, i.e., exactly what is after the # in the url - text?: string; // test that is embedded using a model - meta?: object; // extra metadata - hash?: string; // hash that is used to know when we need to update the point; e.g., hash of text and meta. -} - -// *technical* limit is 8K tokens, but there's no good reason for a search to be really longthere's no good reason for a search to be really long, -// and it could be costly. -export const MAX_SEARCH_TEXT = 4000; -// Limit on the number of outputs when doing a search. This should stay under 10MB total, -// to avoid message size limits. Use paging for more, which app client automatically does. -export const MAX_SEARCH_LIMIT = 200; - -// Maximum number of distinct embeddings that a single client can save at once. -// The app client itself will automatically chunk the saves at this size. -export const MAX_SAVE_LIMIT = 50; -// Similar limit on removing items; can be larger since no vector embedding computation, etc. -export const MAX_REMOVE_LIMIT = 100; -// See https://platform.openai.com/docs/guides/embeddings/what-are-embeddings -export const MAX_EMBEDDINGS_TOKENS = 8191; - -Table({ - name: "openai_embedding_log", - fields: { - id: ID, - time: { type: "timestamp", desc: "When this particular chat happened." }, - account_id: CREATED_BY, - tokens: { - type: "integer", - desc: "The total number of tokens of the input.", - }, - model: { - type: "string", - desc: "The model that was used; if left blank it is assumed to be text-embedding-ada-002.", - }, - }, - rules: { - desc: "OpenAI Vector Embedding Log. This logs who is responsible for calls to openai. It is used to avoid abuse, have good analytics, and may eventually be used for pay-as-you-go, etc.", - primary_key: "id", - pg_indexes: ["((tokens IS NOT NULL))"], - }, -}); - -Table({ - name: "openai_embedding_cache", - fields: { - input_sha1: { - title: "Sha1 hash of input", - type: "string", - pg_type: "char(40)", - }, - vector: { - type: "array", - pg_type: "double precision[]", - desc: "The vector obtained from openai.", - }, - model: { - type: "string", - desc: "The model that was used; if left blank it is assumed to be text-embedding-ada-002.", - }, - expire: { - type: "timestamp", - desc: "Date when the cache entry will be deleted. Some entries correspond to queries users type, so may be very frequent, or content in shared notebooks (e.g., students in class), so caching is very valuable when it is actively happening. Others don't get accessed, so we free up the space.", - }, - }, - rules: { - desc: "OpenAI Vector Embedding Cache. This is a cache of embeddings that we computed using openai. It helps us avoid having to recompute embeddings, which costs money and takes time. It is only used as a CACHE by our system. This entire table could be deleted at any time, and the only impact is that some things may be slower and we may have to pay to recompute embeddings, but nothing should *break*.", - primary_key: "input_sha1", - pg_indexes: ["((vector IS NOT NULL))"], - }, -}); diff --git a/src/packages/util/db-schema/purchase-quotas.ts b/src/packages/util/db-schema/purchase-quotas.ts index 7932204824..520269ac69 100644 --- a/src/packages/util/db-schema/purchase-quotas.ts +++ b/src/packages/util/db-schema/purchase-quotas.ts @@ -2,7 +2,7 @@ import { Table } from "./types"; import { CREATED_BY, ID } from "./crm"; import { SCHEMA as schema } from "./index"; import type { Service } from "./purchases"; -import { isFreeModel } from "./llm"; +import { isFreeModel } from "./llm-utils"; export type { Service }; diff --git a/src/packages/util/db-schema/purchases.ts b/src/packages/util/db-schema/purchases.ts index 610e562e63..869a11ad07 100644 --- a/src/packages/util/db-schema/purchases.ts +++ b/src/packages/util/db-schema/purchases.ts @@ -16,7 +16,7 @@ import { PurchaseInfo } from "@cocalc/util/licenses/purchase/types"; import * as computeServers from "./compute-servers"; import { CREATED_BY, ID } from "./crm"; import { SCHEMA as schema } from "./index"; -import { LanguageService } from "./llm"; +import { LanguageService } from "./llm-utils"; import type { CourseInfo } from "./projects"; import { Table } from "./types"; diff --git a/src/packages/util/types/llm.ts b/src/packages/util/types/llm.ts index 9cbd785746..50174138f1 100644 --- a/src/packages/util/types/llm.ts +++ b/src/packages/util/types/llm.ts @@ -1,4 +1,4 @@ -import { LanguageModel } from "@cocalc/util/db-schema/llm"; +import { LanguageModel } from "@cocalc/util/db-schema/llm-utils"; export type History = { role: "assistant" | "user" | "system"; From e8509d50daf88c3326c5120aca520ca4dcf33d46 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Tue, 12 Mar 2024 14:25:07 +0100 Subject: [PATCH 11/32] frontend/latex/llm: improve latex formula postprocessing --- .../frontend/codemirror/extensions/ai-formula.tsx | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/packages/frontend/codemirror/extensions/ai-formula.tsx b/src/packages/frontend/codemirror/extensions/ai-formula.tsx index 7f10b3ba8b..5b764d6be4 100644 --- a/src/packages/frontend/codemirror/extensions/ai-formula.tsx +++ b/src/packages/frontend/codemirror/extensions/ai-formula.tsx @@ -104,6 +104,21 @@ function AiGenFormula({ mode, text = "", project_id, cb }: Props) { if (!tex) { tex = formula; } + // convert tex to be on a single line + tex = tex.replace(/\n/g, " ").trim(); + // if there is "\[" and "\]" in the formula, replace both by $$ + if (tex.includes("\\[") && tex.includes("\\]")) { + tex = tex.replace(/\\\[|\\\]/g, "$$"); + } + // if there are at least two $$ or $ in the tex, we extract the part between the first and second $ or $$ + // This is necessary, because despite the prompt, some LLM return stuff like: "Here is the LaTeX formula: $$ ... $$." + for (const delimiter of ["$$", "$"]) { + const parts = tex.split(delimiter); + if (parts.length >= 3) { + tex = parts[1]; + break; + } + } setFormula(tex); } From 2b0861ca9875a5f6db652452515b71719307b743 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Tue, 12 Mar 2024 15:08:50 +0100 Subject: [PATCH 12/32] frontend/llm: more towards mistral --- src/packages/frontend/account/chatbot.ts | 23 +++++------- src/packages/frontend/chat/actions.ts | 5 ++- src/packages/frontend/chat/message.tsx | 16 ++++---- .../frontend/components/mistral-avatar.tsx | 10 ++++- src/packages/frontend/components/mistral.png | Bin 1088 -> 0 bytes .../markdown-input/mentionable-users.tsx | 35 +++++++++--------- src/packages/frontend/users/store.ts | 15 ++++---- src/packages/util/db-schema/llm-utils.ts | 5 +-- 8 files changed, 57 insertions(+), 52 deletions(-) delete mode 100644 src/packages/frontend/components/mistral.png diff --git a/src/packages/frontend/account/chatbot.ts b/src/packages/frontend/account/chatbot.ts index 747d405870..fdf0e1a26b 100644 --- a/src/packages/frontend/account/chatbot.ts +++ b/src/packages/frontend/account/chatbot.ts @@ -12,10 +12,10 @@ import { LANGUAGE_MODELS, LANGUAGE_MODEL_PREFIXES, LLM_USERNAMES, - LLMVendor, + fromMistralService, fromOllamaModel, + isMistralService, isOllamaLLM, - model2vendor, } from "@cocalc/util/db-schema/llm-utils"; // we either check if the prefix is one of the known ones (used in some circumstances) @@ -29,24 +29,21 @@ export function isChatBot(account_id?: string): boolean { ); } -export function getChatBotVendor(account_id?: string): LLMVendor { - if (account_id == null) { - return "openai"; - } - return model2vendor(account_id as any); -} - export function chatBotName(account_id?: string): string { - if (account_id?.startsWith("chatgpt")) { + if (typeof account_id !== "string") return "ChatBot"; + if (account_id.startsWith("chatgpt")) { return LLM_USERNAMES[account_id] ?? "ChatGPT"; } - if (account_id?.startsWith("openai-")) { + if (account_id.startsWith("openai-")) { return LLM_USERNAMES[account_id.slice("openai-".length)] ?? "ChatGPT"; } - if (account_id?.startsWith("google-")) { + if (account_id.startsWith("google-")) { return LLM_USERNAMES[account_id.slice("google-".length)] ?? "Gemini"; } - if (typeof account_id === "string" && isOllamaLLM(account_id)) { + if (isMistralService(account_id)) { + return LLM_USERNAMES[fromMistralService(account_id)] ?? "Mistral"; + } + if (isOllamaLLM(account_id)) { const ollama = redux.getStore("customize").get("ollama")?.toJS() ?? {}; const key = fromOllamaModel(account_id); return ollama[key]?.display ?? "Ollama"; diff --git a/src/packages/frontend/chat/actions.ts b/src/packages/frontend/chat/actions.ts index 47d843e9f6..04e54159a6 100644 --- a/src/packages/frontend/chat/actions.ts +++ b/src/packages/frontend/chat/actions.ts @@ -737,7 +737,7 @@ function getReplyToRoot(message, messages): Date | undefined { // We strip out any cased version of the string @chatgpt and also all mentions. function stripMentions(value: string): string { - for (const name of ["@chatgpt4", "@chatgpt", "@palm"]) { + for (const name of ["@chatgpt4", "@chatgpt"]) { while (true) { const i = value.toLowerCase().indexOf(name); if (i == -1) break; @@ -780,7 +780,8 @@ function getLanguageModel(input?: string): false | LanguageModel { return "gpt-3.5-turbo"; } // these prefexes should come from util/db-schema/openai::model2service - for (const prefix of ["account-id=openai-", "account-id=google-"]) { + for (const vendorprefix of LANGUAGE_MODEL_PREFIXES) { + const prefix = `account-id=${vendorprefix}`; const i = x.indexOf(prefix); if (i != -1) { const j = x.indexOf(">", i); diff --git a/src/packages/frontend/chat/message.tsx b/src/packages/frontend/chat/message.tsx index e8ea4e8dd2..7cccf89b5f 100644 --- a/src/packages/frontend/chat/message.tsx +++ b/src/packages/frontend/chat/message.tsx @@ -3,7 +3,7 @@ * License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details */ -import { Button, Col, Row, Popconfirm, Tooltip } from "antd"; +import { Button, Col, Popconfirm, Row, Tooltip } from "antd"; import { Map } from "immutable"; import { CSSProperties } from "react"; @@ -125,7 +125,7 @@ export default function Message(props: Props) { const is_viewers_message = sender_is_viewer(props.account_id, props.message); const verb = show_history ? "Hide" : "Show"; - const isChatGPTThread = useMemo( + const isLLMThread = useMemo( () => props.actions?.isLanguageModelThread(props.message.get("date")), [props.message], ); @@ -629,9 +629,9 @@ export default function Message(props: Props) { {!generating && ( Reply - {isChatGPTThread ? ` to ${modelToName(isChatGPTThread)}` : ""} - {isChatGPTThread && ( + {isLLMThread ? ` to ${modelToName(isLLMThread)}` : ""} + {isLLMThread && ( @@ -654,7 +654,7 @@ export default function Message(props: Props) { )} {!generating && - isChatGPTThread && + isLLMThread && props.actions && Date.now() - date <= regenerateCutoff && ( {formula ? ( - <> - {formula} - - Preview - - {fullReply ? ( - <> - Full reply - - - ) : undefined} - - + {formula}, + }, + { + key: "2", + label: "Preview", + children: , + }, + ...(fullReply + ? [ + { + key: "3", + label: "Full reply", + children: , + }, + ] + : []), + ]} + /> ) : undefined} {error ? {error} : undefined} {mode === "tex" ? ( From 2c507e67b637d97bb9f0e58abcaec48766b6bfff Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Wed, 13 Mar 2024 18:31:57 +0100 Subject: [PATCH 15/32] frontend/chat/llm: untangling ollama-[model] and other model names -- to be fixed later --- src/packages/frontend/chat/actions.ts | 16 ++++++++++++++-- .../frame-editors/llm/model-switch.tsx | 18 +++++++++++++++--- src/packages/util/db-schema/llm-utils.ts | 2 +- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/packages/frontend/chat/actions.ts b/src/packages/frontend/chat/actions.ts index 04e54159a6..fb874079b0 100644 --- a/src/packages/frontend/chat/actions.ts +++ b/src/packages/frontend/chat/actions.ts @@ -21,6 +21,7 @@ import { getVendorStatusCheckMD, model2service, model2vendor, + toOllamaModel, type LanguageModel, } from "@cocalc/util/db-schema/llm-utils"; import { cmp, isValidUUID, parse_hashtags, uuid } from "@cocalc/util/misc"; @@ -553,7 +554,13 @@ export class ChatActions extends Actions { input = stripMentions(input); // also important to strip details, since they tend to confuse chatgpt: //input = stripDetails(input); - const sender_id = model2service(model); + const sender_id = (function () { + try { + return model2service(model); + } catch { + return model; + } + })(); let date: string = this.send_reply({ message, reply: ":robot: Thinking...", @@ -785,7 +792,12 @@ function getLanguageModel(input?: string): false | LanguageModel { const i = x.indexOf(prefix); if (i != -1) { const j = x.indexOf(">", i); - return x.slice(i + prefix.length, j).trim() as any; + const model = x.slice(i + prefix.length, j).trim() as LanguageModel; + // for now, ollama must be prefixed – in the future, all model names will have a vendor prefix! + if (vendorprefix.startsWith("ollama")) { + return toOllamaModel(model); + } + return model; } } return false; diff --git a/src/packages/frontend/frame-editors/llm/model-switch.tsx b/src/packages/frontend/frame-editors/llm/model-switch.tsx index b5bcf5f3cb..eb0f01db5d 100644 --- a/src/packages/frontend/frame-editors/llm/model-switch.tsx +++ b/src/packages/frontend/frame-editors/llm/model-switch.tsx @@ -145,9 +145,21 @@ export default function ModelSwitch({ return ( <> - {makeLLMOption(ret, MISTRAL_MODELS[0], `Mistral AI's "small" model`)} - {makeLLMOption(ret, MISTRAL_MODELS[1], `Mistral AI's "medium" model`)} - {makeLLMOption(ret, MISTRAL_MODELS[2], `Mistral AI's "large" model`)} + {makeLLMOption( + ret, + MISTRAL_MODELS[0], + "Fast, simple queries, short answers, less capabilities", + )} + {makeLLMOption( + ret, + MISTRAL_MODELS[1], + "Intermediate tasks, summarizing, generating documents, etc.", + )} + {makeLLMOption( + ret, + MISTRAL_MODELS[2], + "Slowest, most powerful, large reasoning capabilities", + )} ); } diff --git a/src/packages/util/db-schema/llm-utils.ts b/src/packages/util/db-schema/llm-utils.ts index e49e048d05..6f7df255b5 100644 --- a/src/packages/util/db-schema/llm-utils.ts +++ b/src/packages/util/db-schema/llm-utils.ts @@ -154,7 +154,7 @@ export function model2service(model: LanguageModel): LanguageService { return `openai-${model}`; } if (isOllamaLLM(model)) { - return toOllamaModel(model); + return model; // already has the ollama prefix } if (isMistralModel(model)) { return toMistralService(model); From fabb771778d11576a6df8b266e82a26913d8bfd1 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Thu, 14 Mar 2024 13:02:25 +0100 Subject: [PATCH 16/32] npm: update langchain --- src/packages/pnpm-lock.yaml | 32 ++++++++++++++++++-------------- src/packages/server/package.json | 6 +++--- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/packages/pnpm-lock.yaml b/src/packages/pnpm-lock.yaml index d7d26f3739..b80d7b4319 100644 --- a/src/packages/pnpm-lock.yaml +++ b/src/packages/pnpm-lock.yaml @@ -1350,11 +1350,11 @@ importers: specifier: ^1.2.1 version: 1.2.1 '@langchain/community': - specifier: ^0.0.36 - version: 0.0.36(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21) + specifier: ^0.0.38 + version: 0.0.38(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21) '@langchain/core': - specifier: ^0.1.44 - version: 0.1.44 + specifier: ^0.1.46 + version: 0.1.46 '@langchain/mistralai': specifier: ^0.0.13 version: 0.0.13(encoding@0.1.13) @@ -4052,8 +4052,8 @@ packages: - crypto dev: false - /@langchain/community@0.0.36(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21): - resolution: {integrity: sha512-4OOKH6F9orGZZHgbvYvekMV6JesL89JC3cmJxNvAr5x7vr7wNNa4fuLq+H7Ew8tnsQOYZk+K57UMYkcwVuwySA==} + /@langchain/community@0.0.38(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21): + resolution: {integrity: sha512-JIPgVYjDecG7iQh7VejLwm9QihA51ptvPHyV5Acx3+wyO1tSRLNTWUCcuPEbqLsB5zM0Gl+A6Z35DbC00gMhoA==} engines: {node: '>=18'} peerDependencies: '@aws-crypto/sha256-js': ^5.0.0 @@ -4119,6 +4119,7 @@ packages: html-to-text: ^9.0.5 ioredis: ^5.3.2 jsdom: '*' + jsonwebtoken: ^9.0.2 llmonitor: ^0.5.9 lodash: ^4.17.21 lunary: ^0.6.11 @@ -4267,6 +4268,8 @@ packages: optional: true jsdom: optional: true + jsonwebtoken: + optional: true llmonitor: optional: true lodash: @@ -4311,7 +4314,7 @@ packages: optional: true dependencies: '@google-ai/generativelanguage': 1.1.0(encoding@0.1.13) - '@langchain/core': 0.1.44 + '@langchain/core': 0.1.46 '@langchain/openai': 0.0.19(encoding@0.1.13) flat: 5.0.2 google-auth-library: 9.4.1(encoding@0.1.13) @@ -4323,8 +4326,8 @@ packages: - encoding dev: false - /@langchain/core@0.1.44: - resolution: {integrity: sha512-6kzRRf8X1TgkAfc8xOZ1qCHalasPDxdcNLnL++ZCGtWLyiKMFP7HcDwG/UewYsbqQd3sTQUPiP9+PHdPWXHd/Q==} + /@langchain/core@0.1.46: + resolution: {integrity: sha512-UmPukG3rrYKdTWvMtXpF6o3HG6uV5DlbdMdmfT/Bne4rMXwrFmUMVw40R1q2kI+CXOuh9SMmYxpz0jS3I7maog==} engines: {node: '>=18'} dependencies: ansi-styles: 5.2.0 @@ -4344,7 +4347,7 @@ packages: resolution: {integrity: sha512-0oNTICsukEnZLJ1HwtlCADZi5jqircK8B+svLrRbp+1HVue5hXPsU36b54mr0WEwhmY0QIXJ9CwEaGRSfEEZcg==} engines: {node: '>=18'} dependencies: - '@langchain/core': 0.1.44 + '@langchain/core': 0.1.46 '@mistralai/mistralai': 0.1.3(encoding@0.1.13) zod: 3.22.4 zod-to-json-schema: 3.22.4(zod@3.22.4) @@ -4356,7 +4359,7 @@ packages: resolution: {integrity: sha512-SBY1PlwiHIcjW185yVXHo4XXgTVAyGxw7IHpuEqs7201/EVjFW91HskzGRvduYm2td3/NV91BBVFgXhJQcvtmA==} engines: {node: '>=18'} dependencies: - '@langchain/core': 0.1.44 + '@langchain/core': 0.1.46 js-tiktoken: 1.0.10 openai: 4.27.0(encoding@0.1.13) zod: 3.22.4 @@ -4369,7 +4372,7 @@ packages: resolution: {integrity: sha512-b1CHZCNVc2u4LZbFc1Ls0T7U4LWNGvMeQsrNidLZT5jbjg6VzzDZuVCDPGqCwUc8bzFWvPBO52oT1Wy+aCOX6w==} engines: {node: '>=18'} dependencies: - '@langchain/core': 0.1.44 + '@langchain/core': 0.1.46 js-tiktoken: 1.0.10 openai: 4.27.0(encoding@0.1.13) zod: 3.22.4 @@ -13157,8 +13160,8 @@ packages: dependencies: '@anthropic-ai/sdk': 0.9.1(encoding@0.1.13) '@google-ai/generativelanguage': 1.1.0(encoding@0.1.13) - '@langchain/community': 0.0.36(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21) - '@langchain/core': 0.1.44 + '@langchain/community': 0.0.38(@google-ai/generativelanguage@1.1.0)(encoding@0.1.13)(google-auth-library@9.4.1)(lodash@4.17.21) + '@langchain/core': 0.1.46 '@langchain/openai': 0.0.18(encoding@0.1.13) axios: 1.6.7 binary-extensions: 2.2.0 @@ -13224,6 +13227,7 @@ packages: - firebase-admin - googleapis - hnswlib-node + - jsonwebtoken - llmonitor - lodash - lunary diff --git a/src/packages/server/package.json b/src/packages/server/package.json index e5ae44b259..4c1867d3ab 100644 --- a/src/packages/server/package.json +++ b/src/packages/server/package.json @@ -46,8 +46,8 @@ "@google-cloud/monitoring": "^4.0.0", "@google/generative-ai": "^0.1.3", "@isaacs/ttlcache": "^1.2.1", - "@langchain/community": "^0.0.36", - "@langchain/core": "^0.1.44", + "@langchain/community": "^0.0.38", + "@langchain/core": "^0.1.46", "@langchain/mistralai": "^0.0.13", "@node-saml/passport-saml": "^4.0.4", "@passport-js/passport-twitter": "^1.0.8", @@ -123,7 +123,7 @@ }, "pnpm": { "overrides": { - "@langchain/core": "^0.1.44" + "@langchain/core": "^0.1.46" } } } From 91821ab7f2a82341bad34455779c0766f1127077 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Thu, 14 Mar 2024 14:23:41 +0100 Subject: [PATCH 17/32] llm: do not cache ollama, tweak model selector, count tokens, etc. --- .../frame-editors/llm/model-switch.tsx | 18 ++++++++++-------- src/packages/server/llm/client.ts | 16 +++------------- src/packages/server/llm/index.ts | 3 ++- src/packages/server/llm/mistral.ts | 7 +++++-- src/packages/server/llm/ollama.ts | 9 +++++---- 5 files changed, 25 insertions(+), 28 deletions(-) diff --git a/src/packages/frontend/frame-editors/llm/model-switch.tsx b/src/packages/frontend/frame-editors/llm/model-switch.tsx index eb0f01db5d..6fe8bdc6f2 100644 --- a/src/packages/frontend/frame-editors/llm/model-switch.tsx +++ b/src/packages/frontend/frame-editors/llm/model-switch.tsx @@ -83,24 +83,25 @@ export default function ModelSwitch({ if (!USER_SELECTABLE_LANGUAGE_MODELS.includes(btnModel as any)) return; if (typeof btnModel !== "string") return; - const display = ( + const model = ( <> {modelToName(btnModel)} {getPrice(btnModel)} ); - const text = ( + const tooltip = ( + <> + {model}: {title} + + ); + const display = ( <> - {display}: {title} + {tooltip} ); ret.push({ value: btnModel, display, - label: ( - - {text} - - ), + label: {display}, }); } @@ -179,6 +180,7 @@ export default function ModelSwitch({ value: ollamaModel, display: ( <> + {" "} {modelToName(ollamaModel)} {getPrice(ollamaModel)} ), diff --git a/src/packages/server/llm/client.ts b/src/packages/server/llm/client.ts index 5162f51e0f..6dd9ddbeb2 100644 --- a/src/packages/server/llm/client.ts +++ b/src/packages/server/llm/client.ts @@ -5,7 +5,6 @@ You do not have to worry too much about throwing an exception, because they're c */ import { Ollama } from "@langchain/community/llms/ollama"; -import jsonStable from "json-stable-stringify"; import * as _ from "lodash"; import OpenAI from "openai"; @@ -75,14 +74,14 @@ export async function getClient( } } -const ollamaCache: { [key: string]: Ollama } = {}; - /** * The idea here is: the ollama config contains all available endpoints and their configuration. * The "model" is the unique key in the ollama_configuration mapping, it was prefixed by $OLLAMA_PREFIX. * For the actual Ollama client instantitation, we pick the model parameter from the config or just use the unique model name as a fallback. * In particular, this means you can query the same Ollama model with differnet parameters, or even have several ollama servers running. * All other config parameters are passed to the Ollama constructor (e.g. topK, temperature, etc.). + * + * ATTN: do not cache the Ollama instance, we don't know if there are side effects */ export async function getOllama(model: string) { if (isOllamaLLM(model)) { @@ -103,15 +102,6 @@ export async function getOllama(model: string) { throw new Error(`Ollama model ${model} is disabled`); } - // the key is a hash of the model name and the specific config – such that changes in the config will invalidate the cache - const key = `${model}:${jsonStable(config)}`; - - // model is the unique key in the ServerSettings.ollama_configuration mapping - if (ollamaCache[key]) { - log.debug(`Using cached Ollama client for model ${model}`); - return ollamaCache[key]; - } - const baseUrl = config.baseUrl; if (!baseUrl) { @@ -120,6 +110,7 @@ export async function getOllama(model: string) { ); } + // this means the model is kept in the GPU memory for 24 hours – by default its only a few minutes or so const keepAlive: string = config.keepAlive ?? "24h"; // extract all other properties from the config, except the url, model, keepAlive field and the "cocalc" field @@ -134,6 +125,5 @@ export async function getOllama(model: string) { log.debug("Instantiating Ollama client with config", ollamaConfig); const client = new Ollama(ollamaConfig); - ollamaCache[key] = client; return client; } diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts index c59f276474..32591cb8a9 100644 --- a/src/packages/server/llm/index.ts +++ b/src/packages/server/llm/index.ts @@ -52,7 +52,8 @@ export async function evaluate(opts: ChatOptions): Promise { return await evaluateImpl(opts); } catch (err) { // We want to avoid leaking any information about the error to the client - log.debug("error calling AI language model", err); + log.debug("error calling AI language model", err, err.stack); + if (DEBUG_THROW_LLM_ERROR) throw err; throw new Error( `There is a problem calling ${ LLM_USERNAMES[model] ?? model diff --git a/src/packages/server/llm/mistral.ts b/src/packages/server/llm/mistral.ts index bbd9478a73..d1e8481e61 100644 --- a/src/packages/server/llm/mistral.ts +++ b/src/packages/server/llm/mistral.ts @@ -11,6 +11,7 @@ import { getServerSettings } from "@cocalc/database/settings"; import { isMistralModel } from "@cocalc/util/db-schema/llm-utils"; import { ChatOutput, History } from "@cocalc/util/types/llm"; import { AIMessage, HumanMessage } from "@langchain/core/messages"; +import { totalNumTokens } from "./chatgpt-numtokens"; const log = getLogger("llm:mistral"); @@ -102,8 +103,10 @@ export async function evaluateMistral( // and an empty call when done opts.stream?.(); - const prompt_tokens = 10; - const completion_tokens = 10; + // we use that GPT3 tokenizer to get an approximate number of tokens + const prompt_tokens = + totalNumTokens(history ?? []) + totalNumTokens([{ content: input }]); + const completion_tokens = totalNumTokens([{ content: output }]); return { output, diff --git a/src/packages/server/llm/ollama.ts b/src/packages/server/llm/ollama.ts index e7b6d206a1..b3d1651785 100644 --- a/src/packages/server/llm/ollama.ts +++ b/src/packages/server/llm/ollama.ts @@ -9,6 +9,7 @@ import getLogger from "@cocalc/backend/logger"; import { fromOllamaModel, isOllamaLLM } from "@cocalc/util/db-schema/llm-utils"; import { ChatOutput, History } from "@cocalc/util/types/llm"; import { AIMessage, HumanMessage } from "@langchain/core/messages"; +import { totalNumTokens } from "./chatgpt-numtokens"; import { getOllama } from "./client"; const log = getLogger("llm:ollama"); @@ -57,8 +58,6 @@ export async function evaluateOllama( historyMessagesKey: "chat_history", getMessageHistory: async (_) => { const chatHistory = new ChatMessageHistory(); - // await history.addMessage(new HumanMessage("be brief")); - // await history.addMessage(new AIMessage("ok")); if (history) { let nextRole: "model" | "user" = "user"; for (const { content } of history) { @@ -86,8 +85,10 @@ export async function evaluateOllama( // and an empty call when done opts.stream?.(); - const prompt_tokens = 10; - const completion_tokens = 10; + // we use that GPT3 tokenizer to get an approximate number of tokens + const prompt_tokens = + totalNumTokens(history ?? []) + totalNumTokens([{ content: input }]); + const completion_tokens = totalNumTokens([{ content: output }]); return { output, From f0d64e7b1b1261db5601981f3c7baf098b8c3f31 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Thu, 14 Mar 2024 15:38:17 +0100 Subject: [PATCH 18/32] =?UTF-8?q?frontend:=20refactor=20more=20chatgpt=20?= =?UTF-8?q?=E2=86=92=20llm?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/packages/frontend/jupyter/cell-input.tsx | 20 +++++++++++-------- src/packages/frontend/jupyter/cell-list.tsx | 8 ++++---- src/packages/frontend/jupyter/cell-output.tsx | 14 ++++++------- src/packages/frontend/jupyter/cell.tsx | 12 +++++------ .../frontend/jupyter/chatgpt/index.ts | 5 ----- .../jupyter/insert-cell/ai-cell-generator.tsx | 2 +- .../frontend/jupyter/insert-cell/index.tsx | 10 +++++----- .../jupyter/{chatgpt => llm}/error.tsx | 2 +- .../jupyter/{chatgpt => llm}/explain.tsx | 15 +++++++++----- src/packages/frontend/jupyter/llm/index.ts | 5 +++++ .../jupyter/{chatgpt => llm}/split-cells.ts | 0 src/packages/frontend/jupyter/main.tsx | 10 +++++----- .../jupyter/output-messages/message.tsx | 10 +++++----- .../page/home-page/ai-generate-jupyter.tsx | 2 +- src/packages/jupyter/types/types.ts | 10 +++++----- 15 files changed, 67 insertions(+), 58 deletions(-) delete mode 100644 src/packages/frontend/jupyter/chatgpt/index.ts rename src/packages/frontend/jupyter/{chatgpt => llm}/error.tsx (91%) rename src/packages/frontend/jupyter/{chatgpt => llm}/explain.tsx (94%) create mode 100644 src/packages/frontend/jupyter/llm/index.ts rename src/packages/frontend/jupyter/{chatgpt => llm}/split-cells.ts (100%) diff --git a/src/packages/frontend/jupyter/cell-input.tsx b/src/packages/frontend/jupyter/cell-input.tsx index 560a209a2b..095e013183 100644 --- a/src/packages/frontend/jupyter/cell-input.tsx +++ b/src/packages/frontend/jupyter/cell-input.tsx @@ -23,15 +23,15 @@ import MostlyStaticMarkdown from "@cocalc/frontend/editors/slate/mostly-static-m import { SAVE_DEBOUNCE_MS } from "@cocalc/frontend/frame-editors/code-editor/const"; import useNotebookFrameActions from "@cocalc/frontend/frame-editors/jupyter-editor/cell-notebook/hook"; import { FileContext, useFileContext } from "@cocalc/frontend/lib/file-context"; -import { AiTools } from "@cocalc/jupyter/types"; +import { LLMTools } from "@cocalc/jupyter/types"; import { filename_extension, startswith } from "@cocalc/util/misc"; import { COLORS } from "@cocalc/util/theme"; import { JupyterActions } from "./browser-actions"; import { CellHiddenPart } from "./cell-hidden-part"; import CellTiming from "./cell-output-time"; import { CellToolbar } from "./cell-toolbar"; -import { ChatGPTExplain } from "./chatgpt"; import { CodeMirror } from "./codemirror-component"; +import { LLMExplainCell } from "./llm"; import { InputPrompt } from "./prompt/input"; import { get_blob_url } from "./server-urls"; @@ -80,7 +80,7 @@ export interface CellInputProps { is_scrolling?: boolean; id: string; index: number; - aiTools?: AiTools; + llmTools?: LLMTools; computeServerId?: number; setShowChatGPT?; } @@ -172,7 +172,7 @@ export const CellInput: React.FC = React.memo( unregisterEditor={() => { frameActions.current?.unregister_input_editor(props.cell.get("id")); }} - setShowChatGPT={props.aiTools ? props.setShowChatGPT : undefined} + setShowChatGPT={props.llmTools ? props.setShowChatGPT : undefined} /> ); } @@ -428,9 +428,13 @@ export const CellInput: React.FC = React.memo( /> )} - {props.aiTools && ( - - )} + {props.llmTools ? ( + + ) : undefined} {/* Should only show formatter button if there is a way to format this code. */} {!props.is_readonly && props.actions != null && ( @@ -556,7 +560,7 @@ export const CellInput: React.FC = React.memo( next.is_readonly !== cur.is_readonly || next.is_scrolling !== cur.is_scrolling || next.cell_toolbar !== cur.cell_toolbar || - (next.aiTools?.model ?? "") !== (cur.aiTools?.model ?? "") || + (next.llmTools?.model ?? "") !== (cur.llmTools?.model ?? "") || next.index !== cur.index || next.computeServerId != cur.computeServerId || (next.cell_toolbar === "slideshow" && diff --git a/src/packages/frontend/jupyter/cell-list.tsx b/src/packages/frontend/jupyter/cell-list.tsx index 4f84b9f137..180b5c7bc1 100644 --- a/src/packages/frontend/jupyter/cell-list.tsx +++ b/src/packages/frontend/jupyter/cell-list.tsx @@ -34,7 +34,7 @@ import { import useVirtuosoScrollHook from "@cocalc/frontend/components/virtuoso-scroll-hook"; import useNotebookFrameActions from "@cocalc/frontend/frame-editors/jupyter-editor/cell-notebook/hook"; import { FileContext, useFileContext } from "@cocalc/frontend/lib/file-context"; -import { AiTools, NotebookMode, Scroll } from "@cocalc/jupyter/types"; +import { LLMTools, NotebookMode, Scroll } from "@cocalc/jupyter/types"; import { JupyterActions } from "./browser-actions"; import { Cell } from "./cell"; import HeadingTagComponent from "./heading-tag"; @@ -96,7 +96,7 @@ interface CellListProps { sel_ids?: immutable.Set; // set of selected cells trust?: boolean; use_windowed_list?: boolean; - aiTools?: AiTools; + llmTools?: LLMTools; computeServerId?: number; } @@ -125,7 +125,7 @@ export const CellList: React.FC = (props: CellListProps) => { sel_ids, trust, use_windowed_list, - aiTools, + llmTools, computeServerId, } = props; @@ -475,7 +475,7 @@ export const CellList: React.FC = (props: CellListProps) => { trust={trust} is_scrolling={isScrolling} delayRendering={delayRendering} - aiTools={aiTools} + llmTools={llmTools} computeServerId={computeServerId} isFirst={isFirst} isLast={isLast} diff --git a/src/packages/frontend/jupyter/cell-output.tsx b/src/packages/frontend/jupyter/cell-output.tsx index 0434c1cbe3..98e3816956 100644 --- a/src/packages/frontend/jupyter/cell-output.tsx +++ b/src/packages/frontend/jupyter/cell-output.tsx @@ -11,7 +11,7 @@ import { Alert } from "antd"; import type { Map as ImmutableMap } from "immutable"; import React from "react"; -import { AiTools } from "@cocalc/jupyter/types"; +import { LLMTools } from "@cocalc/jupyter/types"; import type { JupyterActions } from "./browser-actions"; import { CellHiddenPart } from "./cell-hidden-part"; import { CollapsedOutput, OutputToggle } from "./cell-output-toggle"; @@ -31,7 +31,7 @@ interface CellOutputProps { hidePrompt?: boolean; style?: React.CSSProperties; divRef?; - aiTools?: AiTools; + llmTools?: LLMTools; } export function CellOutput({ @@ -47,7 +47,7 @@ export function CellOutput({ hidePrompt, divRef, style, - aiTools, + llmTools, }: CellOutputProps) { const minHeight = complete ? "60vh" : undefined; @@ -90,7 +90,7 @@ export function CellOutput({ directory={directory} name={name} trust={trust} - aiTools={aiTools} + llmTools={llmTools} /> ); @@ -105,7 +105,7 @@ interface OutputColumnProps { directory?: string; name?: string; trust?: boolean; - aiTools?; + llmTools?; } function OutputColumn({ @@ -117,7 +117,7 @@ function OutputColumn({ directory, name, trust, - aiTools, + llmTools, }: OutputColumnProps) { if (cell.get("collapsed")) { return ; @@ -152,7 +152,7 @@ function OutputColumn({ name={name} trust={trust} id={id} - aiTools={aiTools} + llmTools={llmTools} /> ); } diff --git a/src/packages/frontend/jupyter/cell.tsx b/src/packages/frontend/jupyter/cell.tsx index bbf5aab66d..574fedb4fc 100644 --- a/src/packages/frontend/jupyter/cell.tsx +++ b/src/packages/frontend/jupyter/cell.tsx @@ -16,7 +16,7 @@ import { } from "@cocalc/frontend/app-framework"; import { IS_TOUCH } from "@cocalc/frontend/feature"; import useNotebookFrameActions from "@cocalc/frontend/frame-editors/jupyter-editor/cell-notebook/hook"; -import { AiTools } from "@cocalc/jupyter/types"; +import { LLMTools } from "@cocalc/jupyter/types"; import { COLORS } from "@cocalc/util/theme"; import { useState } from "react"; import { Icon, Tip } from "../components"; @@ -51,7 +51,7 @@ interface Props { is_scrolling?: boolean; height?: number; // optional fixed height delayRendering?: number; - aiTools?: AiTools; + llmTools?: LLMTools; computeServerId?: number; is_visible?: boolean; isFirst?: boolean; @@ -80,7 +80,7 @@ function areEqual(props: Props, nextProps: Props): boolean { nextProps.isFirst !== props.isFirst || nextProps.isLast !== props.isLast || nextProps.computeServerId !== props.computeServerId || - (nextProps.aiTools?.model ?? "") !== (props.aiTools?.model ?? "") || + (nextProps.llmTools?.model ?? "") !== (props.llmTools?.model ?? "") || (nextProps.complete !== props.complete && // only worry about complete when editing this cell (nextProps.is_current || props.is_current)) ); @@ -129,7 +129,7 @@ export const Cell: React.FC = React.memo((props) => { trust={props.trust} is_readonly={!is_editable()} is_scrolling={props.is_scrolling} - aiTools={props.aiTools} + llmTools={props.llmTools} computeServerId={props.computeServerId} setShowChatGPT={setShowChatGPT} /> @@ -154,7 +154,7 @@ export const Cell: React.FC = React.memo((props) => { more_output={props.more_output} trust={props.trust} complete={props.is_current && props.complete != null} - aiTools={props.aiTools} + llmTools={props.llmTools} /> ); } @@ -320,7 +320,7 @@ export const Cell: React.FC = React.memo((props) => { Paste - {aiTools && ( + {llmTools && ( (false); const [error, setError] = useState(""); - if (actions == null || aiTools == null) { + if (actions == null || llmTools == null) { return null; } - const { model, setModel } = aiTools; + const { model, setModel } = llmTools; return (
= React.memo((props: Props) => { const [model, setModel] = useLanguageModelSetting(project_id); // ATTN: if you add values here, make sure to check the memoize check functions in the components – // otherwise they will not re-render as expected. - const aiTools: AiTools = { + const llmTools: LLMTools = { model, setModel, toolComponents, @@ -300,11 +300,11 @@ export const JupyterEditor: React.FC = React.memo((props: Props) => { sel_ids={sel_ids} trust={trust} use_windowed_list={useWindowedListRef.current} - aiTools={ + llmTools={ redux .getStore("projects") .hasLanguageModelEnabled(project_id, "generate-cell") - ? aiTools + ? llmTools : undefined } computeServerId={computeServerId} diff --git a/src/packages/frontend/jupyter/output-messages/message.tsx b/src/packages/frontend/jupyter/output-messages/message.tsx index 64a03ceabb..db319142d3 100644 --- a/src/packages/frontend/jupyter/output-messages/message.tsx +++ b/src/packages/frontend/jupyter/output-messages/message.tsx @@ -12,6 +12,7 @@ import type { Map } from "immutable"; import React from "react"; import type { JupyterActions } from "@cocalc/jupyter/redux/actions"; +import { LLMTools } from "@cocalc/jupyter/types"; import { Input } from "./input"; import { InputDone } from "./input-done"; import { Data } from "./mime-types/data"; @@ -21,7 +22,6 @@ import { Stderr } from "./stderr"; import { Stdout } from "./stdout"; import { OUTPUT_STYLE, OUTPUT_STYLE_SCROLLED } from "./style"; import { Traceback } from "./traceback"; -import { AiTools } from "@cocalc/jupyter/types"; function messageComponent(message: Map): any { if (message.get("more_output") != null) { @@ -85,7 +85,7 @@ interface CellOutputMessagesProps { scrolled?: boolean; trust?: boolean; id?: string; - aiTools?: AiTools; + llmTools?: LLMTools; } function shouldMemoize(prev, next) { @@ -106,7 +106,7 @@ export const CellOutputMessages: React.FC = React.memo( scrolled, trust, id, - aiTools, + llmTools, }: CellOutputMessagesProps) => { const obj: Map[] = React.useMemo( () => messageList(output), @@ -146,8 +146,8 @@ export const CellOutputMessages: React.FC = React.memo( } } const help = - hasError && id && actions && aiTools ? ( - void; +export interface LLMTools { + model: LanguageModel; + setModel: (llm: LanguageModel) => void; toolComponents: { - ChatGPTExplain; - ChatGPTError; + LLMExplainCell; + LLMError; }; } From 1e51d9bb633875e7efb713bda39255e7de6f3115 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Thu, 14 Mar 2024 16:58:15 +0100 Subject: [PATCH 19/32] =?UTF-8?q?llm:=20dynamic=20standard=20system=20prom?= =?UTF-8?q?pt=20=E2=80=93=20otherwise=20mistral=20too=20confused?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/packages/frontend/client/llm.ts | 8 ++-- .../jupyter/insert-cell/ai-cell-generator.tsx | 2 +- src/packages/util/db-schema/llm-utils.ts | 46 +++++++++++++++++-- 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/src/packages/frontend/client/llm.ts b/src/packages/frontend/client/llm.ts index 2f81730461..02135ca82a 100644 --- a/src/packages/frontend/client/llm.ts +++ b/src/packages/frontend/client/llm.ts @@ -16,6 +16,7 @@ import { } from "@cocalc/util/db-schema/llm"; import { LanguageModel, + getSystemPrompt, isFreeModel, model2service, } from "@cocalc/util/db-schema/llm-utils"; @@ -23,9 +24,6 @@ import * as message from "@cocalc/util/message"; import type { WebappClient } from "./client"; import type { History } from "./types"; // do not import until needed -- it is HUGE! -const DEFAULT_SYSTEM_PROMPT = - "Assume full access to CoCalc and using CoCalc right now. Enclose all math formulas in $. Include the language directly after the triple backticks in all markdown code blocks. Be brief."; - interface EmbeddingsQuery { scope: string | string[]; limit: number; // client automatically deals with large limit by making multiple requests (i.e., there is no limit on the limit) @@ -64,7 +62,7 @@ export class LLMClient { private async queryLanguageModel({ input, model, - system = DEFAULT_SYSTEM_PROMPT, + system, // if not set, a default system prompt is used – disable by setting to "" history, project_id, path, @@ -81,6 +79,8 @@ export class LLMClient { tag?: string; startStreamExplicitly?: boolean; }): Promise { + system ??= getSystemPrompt(model, path); + if (!redux.getStore("projects").hasLanguageModelEnabled(project_id, tag)) { return `Language model support is not currently enabled ${ project_id ? "in this project" : "on this server" diff --git a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx index 09b6a66738..a3e185a863 100644 --- a/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx +++ b/src/packages/frontend/jupyter/insert-cell/ai-cell-generator.tsx @@ -344,7 +344,7 @@ function getInput({ return { input: `Create a new code cell for a Jupyter Notebook.\n\nKernel: "${kernel_name}".\n\nProgramming language: "${lang}".\n\The entire code cell must be in a single code block. Enclose this block in triple backticks. Do not say what the output will be. Add comments as code comments. ${prevCode}\n\nThe new cell should do the following:\n\n${prompt}`, - system: `Return a single code block in the language "${lang}".`, + system: `Return a single code block in the language "${lang}". Be brief.`, }; } diff --git a/src/packages/util/db-schema/llm-utils.ts b/src/packages/util/db-schema/llm-utils.ts index 6f7df255b5..2d5c975c61 100644 --- a/src/packages/util/db-schema/llm-utils.ts +++ b/src/packages/util/db-schema/llm-utils.ts @@ -3,6 +3,8 @@ import type { Service } from "@cocalc/util/db-schema/purchases"; import { unreachable } from "@cocalc/util/misc"; +const OPENAI_PREFIX = "openai-"; + const MODELS_OPENAI = [ "gpt-3.5-turbo", "gpt-3.5-turbo-16k", @@ -118,6 +120,8 @@ export function isMistralService(service: string): service is MistralService { return service.startsWith(MISTRAL_PREFIX); } +const GOOGLE_PREFIX = "google-"; + // we encode the in the frontend and elsewhere with the service name as a prefix // ATTN: don't change the encoding pattern of [vendor]-[model] // for whatever reason, it's also described that way in purchases/close.ts @@ -151,7 +155,7 @@ export const LANGUAGE_MODEL_PREFIXES = [ // we encode the in the frontend and elsewhere with the service name as a prefix export function model2service(model: LanguageModel): LanguageService { if (model === "text-embedding-ada-002") { - return `openai-${model}`; + return `${OPENAI_PREFIX}${model}`; } if (isOllamaLLM(model)) { return model; // already has the ollama prefix @@ -166,9 +170,9 @@ export function model2service(model: LanguageModel): LanguageService { model === "embedding-gecko-001" || model === "gemini-pro" ) { - return `google-${model}`; + return `${GOOGLE_PREFIX}${model}`; } else { - return `openai-${model}`; + return `${OPENAI_PREFIX}${model}`; } } @@ -453,3 +457,39 @@ export function getMaxCost( const { max_tokens } = LLM_COST[model]; return Math.max(prompt_tokens, completion_tokens) * max_tokens; } + +/** + * Initially, we just had one system promt for all LLMs. + * This was tuned for the ChatGPTs by OpenAI, but breaks down for others. + * For example, Gemini and Mistral are confused by mentioning "CoCalc" and insert code cells for all kinds of questions. + */ +export function getSystemPrompt( + model: LanguageModel, + _path: string | undefined, +) { + // TODO: for now, path is ignored. We might want to use it to customize the prompt in the future. + const common = "Be brief."; + const math = "Enclose any math formulas in $."; + + if (model2vendor(model) === "openai" || model.startsWith(OPENAI_PREFIX)) { + const mdCode = + "Include the language directly after the triple backticks in all markdown code blocks."; + return `Assume full access to CoCalc and using CoCalc right now.\n${mdCode}\n${math}\n${common}`; + } + + // mistral stupidly inserts anything mentioned in the prompt as examples, always. + if (model2vendor(model) === "mistralai" || model.startsWith(MISTRAL_PREFIX)) { + return common; + } + + if (model2vendor(model) === "google" || model.startsWith(GOOGLE_PREFIX)) { + return `${math}\n${common}`; + } + + if (model2vendor(model) === "ollama" || model.startsWith(OLLAMA_PREFIX)) { + return `${math}\n${common}`; + } + + const mdCode = `Any code blocks in triple backticks should mention the language after the first backticks. For example \`\`\`python\nprint("Hello, World!")\n\`\`\``; + return `${mdCode}\n${math}\n${common}`; +} From 3c706bf975cce0d0874a0827984d18599c27dc17 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Fri, 15 Mar 2024 11:17:59 +0100 Subject: [PATCH 20/32] llm: add support for OpenAI GPT-4 Turbo --- .../markdown-input/mentionable-users.tsx | 30 +++++++++++++++++-- .../frame-editors/llm/model-switch.tsx | 7 ++++- src/packages/frontend/projects/store.ts | 20 ++++++++----- src/packages/frontend/purchases/purchases.tsx | 4 +-- src/packages/util/db-schema/llm-utils.ts | 9 ++++++ .../util/db-schema/purchase-quotas.ts | 4 +++ 6 files changed, 61 insertions(+), 13 deletions(-) diff --git a/src/packages/frontend/editors/markdown-input/mentionable-users.tsx b/src/packages/frontend/editors/markdown-input/mentionable-users.tsx index 4eb7e905f9..ba1b8e3148 100644 --- a/src/packages/frontend/editors/markdown-input/mentionable-users.tsx +++ b/src/packages/frontend/editors/markdown-input/mentionable-users.tsx @@ -129,6 +129,7 @@ function mentionableUsers({ }); } } + if (USER_SELECTABLE_LANGUAGE_MODELS.includes("gpt-4")) { if (!search || "chatgpt4".includes(search)) { v.push({ @@ -142,6 +143,20 @@ function mentionableUsers({ }); } } + + if (USER_SELECTABLE_LANGUAGE_MODELS.includes("gpt-4-turbo-preview")) { + if (!search || "chatgpt4turbo".includes(search)) { + v.push({ + value: "openai-gpt-4-turbo-preview", + label: ( + + {LLM_USERNAMES["gpt-4-turbo-preview"]} + + ), + search: "chatgpt4turbo", + }); + } + } } if (enabledLLMs.google) { @@ -162,8 +177,13 @@ function mentionableUsers({ if (enabledLLMs.ollama && !isEmpty(ollama)) { for (const [key, conf] of Object.entries(ollama)) { - if (!search || key.includes(search) || conf.display.toLowerCase().includes(search)) { - const value = toOllamaModel(key); + const value = toOllamaModel(key); + if ( + !search || + key.includes(search) || + value.includes(search) || + conf.display.toLowerCase().includes(search) + ) { v.push({ value, label: ( @@ -181,7 +201,11 @@ function mentionableUsers({ for (const m of MISTRAL_MODELS) { if (!USER_SELECTABLE_LANGUAGE_MODELS.includes(m)) continue; const name = LLM_USERNAMES[m] ?? m; - if (!search || m.includes(search) || name.toLowerCase().includes(search)) { + if ( + !search || + m.includes(search) || + name.toLowerCase().includes(search) + ) { v.push({ value: model2service(m), label: ( diff --git a/src/packages/frontend/frame-editors/llm/model-switch.tsx b/src/packages/frontend/frame-editors/llm/model-switch.tsx index 6fe8bdc6f2..720b068543 100644 --- a/src/packages/frontend/frame-editors/llm/model-switch.tsx +++ b/src/packages/frontend/frame-editors/llm/model-switch.tsx @@ -123,7 +123,12 @@ export default function ModelSwitch({ makeLLMOption( ret, "gpt-4", - "OpenAI's most capable model, great for tasks that require creativity and advanced reasoning (8k token context)", + "GPT-4 can follow complex instructions in natural language and solve difficult problems with accuracy. (8k token context)", + ); + makeLLMOption( + ret, + "gpt-4-turbo-preview", + "GPT-4 Turbo is more powerful, has fresher knowledge and offered at a lower price than GPT-4. (128k token context)", ); } diff --git a/src/packages/frontend/projects/store.ts b/src/packages/frontend/projects/store.ts index 78ea9d7a6e..2b8bd6f224 100644 --- a/src/packages/frontend/projects/store.ts +++ b/src/packages/frontend/projects/store.ts @@ -11,7 +11,10 @@ import { StudentProjectFunctionality } from "@cocalc/frontend/course/configurati import { CUSTOM_IMG_PREFIX } from "@cocalc/frontend/custom-software/util"; import { WebsocketState } from "@cocalc/frontend/project/websocket/websocket-state"; import { webapp_client } from "@cocalc/frontend/webapp-client"; -import { LLMServicesAvailable, LLMVendor } from "@cocalc/util/db-schema/llm-utils"; +import { + LLMServicesAvailable, + LLMVendor, +} from "@cocalc/util/db-schema/llm-utils"; import { cmp, coerce_codomain_to_numbers, @@ -31,7 +34,10 @@ import { Upgrades } from "@cocalc/util/upgrades/types"; export type UserGroup = "admin" | "owner" | "collaborator" | "public"; -const openAICache = new LRU({ max: 50, ttl: 1000 * 60 }); +const aiCapabilitiesCache = new LRU({ + max: 50, + ttl: 1000 * 60, +}); const ZERO_QUOTAS = fromPairs( Object.keys(PROJECT_UPGRADES.params).map((x) => [x, 0]), @@ -732,7 +738,7 @@ export class ProjectsStore extends Store { } clearOpenAICache() { - openAICache.clear(); + aiCapabilitiesCache.clear(); } // ATTN: the useLanguageModelSetting hook computes this dynamically, with dependencies @@ -777,15 +783,15 @@ export class ProjectsStore extends Store { courseLimited = true; } const key = `${project_id}-${courseLimited}-${vendor}`; - if (openAICache.has(key)) { - return !!openAICache.get(key); + if (aiCapabilitiesCache.has(key)) { + return !!aiCapabilitiesCache.get(key); } const value = this._hasLanguageModelEnabled( project_id, courseLimited, vendor, ); - openAICache.set(key, value); + aiCapabilitiesCache.set(key, value); return value; } @@ -813,7 +819,7 @@ export class ProjectsStore extends Store { if (openai_disabled) { return false; } - if (project_id != "global") { + if (project_id !== "global") { const s = this.getIn([ "project_map", project_id, diff --git a/src/packages/frontend/purchases/purchases.tsx b/src/packages/frontend/purchases/purchases.tsx index 723e05165f..2ce442b98d 100644 --- a/src/packages/frontend/purchases/purchases.tsx +++ b/src/packages/frontend/purchases/purchases.tsx @@ -587,7 +587,7 @@ function Description({ description, period_end, service }) { if (description == null) { return null; } - if (service == "openai-gpt-4") { + if (service === "openai-gpt-4" || service === "openai-gpt-4-turbo-preview") { return ( ( @@ -598,7 +598,7 @@ function Description({ description, period_end, service }) {
)} > - GPT-4 + GPT-4 {service === "openai-gpt-4-turbo-preview" ? " Turbo" : ""}
); } diff --git a/src/packages/util/db-schema/llm-utils.ts b/src/packages/util/db-schema/llm-utils.ts index 2d5c975c61..2f30108e22 100644 --- a/src/packages/util/db-schema/llm-utils.ts +++ b/src/packages/util/db-schema/llm-utils.ts @@ -10,6 +10,7 @@ const MODELS_OPENAI = [ "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k", + "gpt-4-turbo-preview", ] as const; export type ModelOpenAI = (typeof MODELS_OPENAI)[number]; @@ -47,6 +48,7 @@ export const LANGUAGE_MODELS = [ export const USER_SELECTABLE_LANGUAGE_MODELS = [ "gpt-3.5-turbo", "gpt-3.5-turbo-16k", + "gpt-4-turbo-preview", "gpt-4", "gemini-pro", ...MISTRAL_MODELS, @@ -130,6 +132,7 @@ export type LanguageService = | "openai-gpt-3.5-turbo-16k" | "openai-gpt-4" | "openai-gpt-4-32k" + | "openai-gpt-4-turbo-preview" | "openai-text-embedding-ada-002" | "google-text-bison-001" | "google-chat-bison-001" @@ -280,6 +283,7 @@ export const LLM_USERNAMES: { "gpt-4-32k": "GPT-4-32k", "gpt-3.5-turbo": "GPT-3.5", "gpt-3.5-turbo-16k": "GPT-3.5-16k", + "gpt-4-turbo-preview": "GPT-4 Turbo", "text-bison-001": "PaLM 2", "chat-bison-001": "PaLM 2", "gemini-pro": "Gemini Pro", @@ -374,6 +378,11 @@ export const LLM_COST: { [name in string]: Cost } = { completion_tokens: 0.004 / 1000, max_tokens: 16384, }, + "gpt-4-turbo-preview": { + prompt_tokens: 0.01 / 1000, + completion_tokens: 0.03 / 1000, + max_tokens: 128000, + }, "text-embedding-ada-002": { prompt_tokens: 0.0001 / 1000, completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings diff --git a/src/packages/util/db-schema/purchase-quotas.ts b/src/packages/util/db-schema/purchase-quotas.ts index 520269ac69..d0d7399fc4 100644 --- a/src/packages/util/db-schema/purchase-quotas.ts +++ b/src/packages/util/db-schema/purchase-quotas.ts @@ -37,6 +37,10 @@ export const QUOTA_SPEC: QuotaSpec = { color: "#10a37f", noSet: true, // because this is not user visible yet }, + "openai-gpt-4-turbo-preview": { + display: "OpenAI GPT-4 Turbo", + color: "#10a37f", + }, "project-upgrade": { display: "Project Upgrade", color: "#5bc0de" }, "compute-server": { display: "Compute Server", color: "#2196f3" }, "compute-server-network-usage": { From 99285c9338d615031e9df6a929d210ab5fc0e23f Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Fri, 15 Mar 2024 11:57:36 +0100 Subject: [PATCH 21/32] llm: make Mistral Large a paid model --- src/packages/frontend/purchases/purchases.tsx | 101 +++++++++++------- src/packages/util/db-schema/llm-utils.ts | 36 +++++-- .../util/db-schema/purchase-quotas.ts | 8 +- 3 files changed, 97 insertions(+), 48 deletions(-) diff --git a/src/packages/frontend/purchases/purchases.tsx b/src/packages/frontend/purchases/purchases.tsx index 2ce442b98d..b01065b0be 100644 --- a/src/packages/frontend/purchases/purchases.tsx +++ b/src/packages/frontend/purchases/purchases.tsx @@ -1,7 +1,6 @@ -import { CSSProperties, useEffect, useState } from "react"; import { - Checkbox, Button, + Checkbox, Popover, Space, Spin, @@ -9,45 +8,54 @@ import { Tag, Tooltip, } from "antd"; +import { CSSProperties, useEffect, useState } from "react"; + +import { Avatar } from "@cocalc/frontend/account/avatar/avatar"; import { useTypedRedux } from "@cocalc/frontend/app-framework"; +import { A } from "@cocalc/frontend/components/A"; +import ShowError from "@cocalc/frontend/components/error"; +import { Icon } from "@cocalc/frontend/components/icon"; +import Next from "@cocalc/frontend/components/next"; import { SettingBox } from "@cocalc/frontend/components/setting-box"; -import * as api from "./api"; -import type { Service } from "@cocalc/util/db-schema/purchase-quotas"; -import type { Purchase, Description } from "@cocalc/util/db-schema/purchases"; -import { getAmountStyle } from "@cocalc/util/db-schema/purchases"; -import { ProjectTitle } from "@cocalc/frontend/projects/project-title"; import { TimeAgo } from "@cocalc/frontend/components/time-ago"; -import { Icon } from "@cocalc/frontend/components/icon"; -import ServiceTag from "./service"; +import { + ComputeServerDescription, + ComputeServerNetworkUsageDescription, +} from "@cocalc/frontend/compute/purchases"; +import StaticMarkdown from "@cocalc/frontend/editors/slate/static-markdown"; +import { load_target } from "@cocalc/frontend/history"; +import { open_new_tab } from "@cocalc/frontend/misc/open-browser-tab"; +import { ProjectTitle } from "@cocalc/frontend/projects/project-title"; +import { SiteLicensePublicInfo as License } from "@cocalc/frontend/site-licenses/site-license-public-info-component"; +import getSupportURL from "@cocalc/frontend/support/url"; +import { + LLM_USERNAMES, + MISTRAL_PREFIX, + service2model, +} from "@cocalc/util/db-schema/llm-utils"; +import type { + ProjectQuota, + Service, +} from "@cocalc/util/db-schema/purchase-quotas"; +import type { Purchase } from "@cocalc/util/db-schema/purchases"; +import { getAmountStyle } from "@cocalc/util/db-schema/purchases"; +import { describeQuotaFromInfo } from "@cocalc/util/licenses/describe-quota"; +import type { PurchaseInfo } from "@cocalc/util/licenses/purchase/types"; import { capitalize, + currency, plural, round1, round2down, round4, } from "@cocalc/util/misc"; -import { SiteLicensePublicInfo as License } from "@cocalc/frontend/site-licenses/site-license-public-info-component"; -import Next from "@cocalc/frontend/components/next"; -import { open_new_tab } from "@cocalc/frontend/misc/open-browser-tab"; -import { currency } from "@cocalc/util/misc"; +import AdminRefund from "./admin-refund"; +import * as api from "./api"; +import EmailStatement from "./email-statement"; +import Export from "./export"; import DynamicallyUpdatingCost from "./pay-as-you-go/dynamically-updating-cost"; -import type { ProjectQuota } from "@cocalc/util/db-schema/purchase-quotas"; -import { load_target } from "@cocalc/frontend/history"; -import { describeQuotaFromInfo } from "@cocalc/util/licenses/describe-quota"; -import type { PurchaseInfo } from "@cocalc/util/licenses/purchase/types"; import Refresh from "./refresh"; -import ShowError from "@cocalc/frontend/components/error"; -import Export from "./export"; -import EmailStatement from "./email-statement"; -import StaticMarkdown from "@cocalc/frontend/editors/slate/static-markdown"; -import { Avatar } from "@cocalc/frontend/account/avatar/avatar"; -import AdminRefund from "./admin-refund"; -import { A } from "@cocalc/frontend/components/A"; -import getSupportURL from "@cocalc/frontend/support/url"; -import { - ComputeServerDescription, - ComputeServerNetworkUsageDescription, -} from "@cocalc/frontend/compute/purchases"; +import ServiceTag from "./service"; const DEFAULT_LIMIT = 150; @@ -587,6 +595,10 @@ function Description({ description, period_end, service }) { if (description == null) { return null; } + if (typeof service !== "string") { + // service should be DescriptionType["type"] + return null; + } if (service === "openai-gpt-4" || service === "openai-gpt-4-turbo-preview") { return ( ); } + if (service.startsWith(MISTRAL_PREFIX)) { + return ( + ( +
+ Prompt tokens: {description.prompt_tokens} +
+ Completion tokens: {description.completion_tokens} +
+ )} + > + {LLM_USERNAMES[service2model(service)] ?? service} +
+ ); + } //
{JSON.stringify(description, undefined, 2)}
- if (service == "license") { + if (service === "license") { const { license_id } = description; return ( ); } - if (service == "credit") { + if (service === "credit") { return ( @@ -644,7 +671,7 @@ function Description({ description, period_end, service }) { ); } - if (service == "refund") { + if (service === "refund") { const { notes, reason, purchase_id } = description; return ( @@ -674,7 +701,7 @@ function Description({ description, period_end, service }) { ); } - if (service == "compute-server") { + if (service === "compute-server") { return ( @@ -702,7 +729,7 @@ function Description({ description, period_end, service }) { ); } - if (service == "edit-license") { + if (service === "edit-license") { const { license_id } = description; return ( Date: Fri, 15 Mar 2024 12:24:08 +0100 Subject: [PATCH 22/32] frontend/account/other-settings: convert to functional component, and use the ModelSwitch component --- .../frontend/account/other-settings.tsx | 330 +++++++----------- .../frame-editors/llm/model-switch.tsx | 2 +- 2 files changed, 136 insertions(+), 196 deletions(-) diff --git a/src/packages/frontend/account/other-settings.tsx b/src/packages/frontend/account/other-settings.tsx index 4d9a392b99..f48b5b981c 100644 --- a/src/packages/frontend/account/other-settings.tsx +++ b/src/packages/frontend/account/other-settings.tsx @@ -7,7 +7,7 @@ import { Card, InputNumber } from "antd"; import { Map } from "immutable"; import { Checkbox, Panel } from "@cocalc/frontend/antd-bootstrap"; -import { Component, Rendered, redux } from "@cocalc/frontend/app-framework"; +import { Rendered, redux } from "@cocalc/frontend/app-framework"; import { A, Icon, @@ -19,17 +19,11 @@ import { } from "@cocalc/frontend/components"; import AIAvatar from "@cocalc/frontend/components/ai-avatar"; import { IS_MOBILE, IS_TOUCH } from "@cocalc/frontend/feature"; +import ModelSwitch from "@cocalc/frontend/frame-editors/llm/model-switch"; import { NewFilenameFamilies } from "@cocalc/frontend/project/utils"; import track from "@cocalc/frontend/user-tracking"; import { webapp_client } from "@cocalc/frontend/webapp-client"; import { DEFAULT_NEW_FILENAMES, NEW_FILENAMES } from "@cocalc/util/db-schema"; -import { - LLM_USERNAMES, - USER_SELECTABLE_LANGUAGE_MODELS, - getValidLanguageModelName, - isFreeModel, - model2vendor, -} from "@cocalc/util/db-schema/llm-utils"; import { VBAR_EXPLANATION, VBAR_KEY, @@ -38,7 +32,7 @@ import { } from "../project/page/vbar"; import { dark_mode_mins, get_dark_mode_config } from "./dark-mode"; import Tours from "./tours"; -import { SETTINGS_LANGUAGE_MODEL_KEY } from "./useLanguageModelSetting"; +import { useLanguageModelSetting } from "./useLanguageModelSetting"; interface Props { other_settings: Map; @@ -46,37 +40,39 @@ interface Props { kucalc: string; } -export class OtherSettings extends Component { - private on_change(name: string, value: any): void { +export function OtherSettings(props: Readonly): Rendered { + const [model, setModel] = useLanguageModelSetting(); + + function on_change(name: string, value: any): void { redux.getActions("account").set_other_settings(name, value); } - private toggle_global_banner(val: boolean): void { + function toggle_global_banner(val: boolean): void { if (val) { // this must be "null", not "undefined" – otherwise the data isn't stored in the DB. - this.on_change("show_global_info2", null); + on_change("show_global_info2", null); } else { - this.on_change("show_global_info2", webapp_client.server_time()); + on_change("show_global_info2", webapp_client.server_time()); } } // private render_first_steps(): Rendered { - // if (this.props.kucalc !== KUCALC_COCALC_COM) return; + // if (props.kucalc !== KUCALC_COCALC_COM) return; // return ( // this.on_change("first_steps", e.target.checked)} + // checked={!!props.other_settings.get("first_steps")} + // onChange={(e) => on_change("first_steps", e.target.checked)} // > // Offer the First Steps guide // // ); // } - private render_global_banner(): Rendered { + function render_global_banner(): Rendered { return ( this.toggle_global_banner(e.target.checked)} + checked={!props.other_settings.get("show_global_info2")} + onChange={(e) => toggle_global_banner(e.target.checked)} > Show announcement banner: only shows up if there is a message @@ -84,11 +80,11 @@ export class OtherSettings extends Component { ); } - private render_time_ago_absolute(): Rendered { + function render_time_ago_absolute(): Rendered { return ( this.on_change("time_ago_absolute", e.target.checked)} + checked={!!props.other_settings.get("time_ago_absolute")} + onChange={(e) => on_change("time_ago_absolute", e.target.checked)} > Display timestamps as absolute points in time instead of relative to the current time @@ -96,12 +92,12 @@ export class OtherSettings extends Component { ); } - private render_confirm(): Rendered { + function render_confirm(): Rendered { if (!IS_MOBILE) { return ( this.on_change("confirm_close", e.target.checked)} + checked={!!props.other_settings.get("confirm_close")} + onChange={(e) => on_change("confirm_close", e.target.checked)} > Confirm Close: always ask for confirmation before closing the browser window @@ -110,11 +106,11 @@ export class OtherSettings extends Component { } } - private render_katex(): Rendered { + function render_katex(): Rendered { return ( this.on_change("katex", e.target.checked)} + checked={!!props.other_settings.get("katex")} + onChange={(e) => on_change("katex", e.target.checked)} > KaTeX: attempt to render formulas with{" "} KaTeX (much faster, but missing @@ -123,28 +119,28 @@ export class OtherSettings extends Component { ); } - private render_standby_timeout(): Rendered { + function render_standby_timeout(): Rendered { if (IS_TOUCH) { return; } return ( this.on_change("standby_timeout_m", n)} + on_change={(n) => on_change("standby_timeout_m", n)} min={1} max={180} unit="minutes" - number={this.props.other_settings.get("standby_timeout_m")} + number={props.other_settings.get("standby_timeout_m")} /> ); } - private render_mask_files(): Rendered { + function render_mask_files(): Rendered { return ( this.on_change("mask_files", e.target.checked)} + checked={!!props.other_settings.get("mask_files")} + onChange={(e) => on_change("mask_files", e.target.checked)} > Mask files: grey out files in the files viewer that you probably do not want to open @@ -152,13 +148,11 @@ export class OtherSettings extends Component { ); } - private render_hide_project_popovers(): Rendered { + function render_hide_project_popovers(): Rendered { return ( - this.on_change("hide_project_popovers", e.target.checked) - } + checked={!!props.other_settings.get("hide_project_popovers")} + onChange={(e) => on_change("hide_project_popovers", e.target.checked)} > Hide Project Tab Popovers: do not show the popovers over the project tabs @@ -166,11 +160,11 @@ export class OtherSettings extends Component { ); } - private render_hide_file_popovers(): Rendered { + function render_hide_file_popovers(): Rendered { return ( this.on_change("hide_file_popovers", e.target.checked)} + checked={!!props.other_settings.get("hide_file_popovers")} + onChange={(e) => on_change("hide_file_popovers", e.target.checked)} > Hide File Tab Popovers: do not show the popovers over file tabs @@ -178,13 +172,11 @@ export class OtherSettings extends Component { ); } - private render_hide_button_tooltips(): Rendered { + function render_hide_button_tooltips(): Rendered { return ( - this.on_change("hide_button_tooltips", e.target.checked) - } + checked={!!props.other_settings.get("hide_button_tooltips")} + onChange={(e) => on_change("hide_button_tooltips", e.target.checked)} > Hide Button Tooltips: hides some button tooltips (this is only partial) @@ -192,57 +184,57 @@ export class OtherSettings extends Component { ); } - private render_default_file_sort(): Rendered { + function render_default_file_sort(): Rendered { return ( this.on_change("default_file_sort", value)} + on_change={(value) => on_change("default_file_sort", value)} /> ); } - private render_new_filenames(): Rendered { + function render_new_filenames(): Rendered { const selected = - this.props.other_settings.get(NEW_FILENAMES) ?? DEFAULT_NEW_FILENAMES; + props.other_settings.get(NEW_FILENAMES) ?? DEFAULT_NEW_FILENAMES; return ( this.on_change(NEW_FILENAMES, value)} + on_change={(value) => on_change(NEW_FILENAMES, value)} /> ); } - private render_page_size(): Rendered { + function render_page_size(): Rendered { return ( this.on_change("page_size", n)} + on_change={(n) => on_change("page_size", n)} min={1} max={10000} - number={this.props.other_settings.get("page_size")} + number={props.other_settings.get("page_size")} /> ); } - private render_no_free_warnings(): Rendered { + function render_no_free_warnings(): Rendered { let extra; - if (!this.props.is_stripe_customer) { + if (!props.is_stripe_customer) { extra = (only available to customers); } else { extra = (thanks for being a customer); } return ( this.on_change("no_free_warnings", e.target.checked)} + disabled={!props.is_stripe_customer} + checked={!!props.other_settings.get("no_free_warnings")} + onChange={(e) => on_change("no_free_warnings", e.target.checked)} > Hide free warnings: do{" "} @@ -253,15 +245,15 @@ export class OtherSettings extends Component { ); } - private render_dark_mode(): Rendered { - const checked = !!this.props.other_settings.get("dark_mode"); - const config = get_dark_mode_config(this.props.other_settings.toJS()); + function render_dark_mode(): Rendered { + const checked = !!props.other_settings.get("dark_mode"); + const config = get_dark_mode_config(props.other_settings.toJS()); const label_style = { width: "100px", display: "inline-block" } as const; return (
this.on_change("dark_mode", e.target.checked)} + onChange={(e) => on_change("dark_mode", e.target.checked)} style={{ color: "rgba(229, 224, 216)", backgroundColor: "rgb(36, 37, 37)", @@ -286,7 +278,7 @@ export class OtherSettings extends Component { min={dark_mode_mins.brightness} max={100} value={config.brightness} - onChange={(x) => this.on_change("dark_mode_brightness", x)} + onChange={(x) => on_change("dark_mode_brightness", x)} />
Contrast @@ -294,7 +286,7 @@ export class OtherSettings extends Component { min={dark_mode_mins.contrast} max={100} value={config.contrast} - onChange={(x) => this.on_change("dark_mode_contrast", x)} + onChange={(x) => on_change("dark_mode_contrast", x)} />
Sepia @@ -302,7 +294,7 @@ export class OtherSettings extends Component { min={dark_mode_mins.sepia} max={100} value={config.sepia} - onChange={(x) => this.on_change("dark_mode_sepia", x)} + onChange={(x) => on_change("dark_mode_sepia", x)} />
Grayscale @@ -310,7 +302,7 @@ export class OtherSettings extends Component { min={dark_mode_mins.grayscale} max={100} value={config.grayscale} - onChange={(x) => this.on_change("dark_mode_grayscale", x)} + onChange={(x) => on_change("dark_mode_grayscale", x)} /> )} @@ -318,30 +310,30 @@ export class OtherSettings extends Component { ); } - private render_antd(): Rendered { + function render_antd(): Rendered { return ( <> this.on_change("antd_rounded", e.target.checked)} + checked={props.other_settings.get("antd_rounded", true)} + onChange={(e) => on_change("antd_rounded", e.target.checked)} > Rounded Design: use rounded corners for buttons, etc. this.on_change("antd_animate", e.target.checked)} + checked={props.other_settings.get("antd_animate", true)} + onChange={(e) => on_change("antd_animate", e.target.checked)} > Animations: briefly animate some aspects, e.g. buttons this.on_change("antd_brandcolors", e.target.checked)} + checked={props.other_settings.get("antd_brandcolors", false)} + onChange={(e) => on_change("antd_brandcolors", e.target.checked)} > Color Scheme: use brand colors instead of default colors this.on_change("antd_compact", e.target.checked)} + checked={props.other_settings.get("antd_compact", false)} + onChange={(e) => on_change("antd_compact", e.target.checked)} > Compact Design: use a more compact design @@ -349,10 +341,8 @@ export class OtherSettings extends Component { ); } - render_vertical_fixed_bar_options(): Rendered { - const selected = getValidVBAROption( - this.props.other_settings.get(VBAR_KEY), - ); + function render_vertical_fixed_bar_options(): Rendered { + const selected = getValidVBAROption(props.other_settings.get(VBAR_KEY)); return (
@@ -361,7 +351,7 @@ export class OtherSettings extends Component { selected={selected} options={VBAR_OPTIONS} on_change={(value) => { - this.on_change(VBAR_KEY, value); + on_change(VBAR_KEY, value); track("flyout", { aspect: "layout", how: "account", value }); }} /> @@ -376,46 +366,7 @@ export class OtherSettings extends Component { ); } - render_language_model(): Rendered { - const projectsStore = redux.getStore("projects"); - const enabled = projectsStore.whichLLMareEnabled(); - const ollama = redux.getStore("customize").get("ollama")?.toJS() ?? {}; - - const defaultModel = getValidLanguageModelName( - this.props.other_settings.get(SETTINGS_LANGUAGE_MODEL_KEY), - enabled, - Object.keys(ollama), - ); - - const options: { value: string; display: JSX.Element }[] = []; - - for (const key of USER_SELECTABLE_LANGUAGE_MODELS) { - if (typeof key !== "string") continue - const vendor = model2vendor(key); - if (vendor === "google" && !enabled.google) continue; - if (vendor === "openai" && !enabled.openai) continue; - - const txt = isFreeModel(key) ? " (free)" : ""; - const display = ( - <> - {LLM_USERNAMES[key]} {txt} - - ); - options.push({ value: key, display }); - } - - if (enabled.ollama) { - for (const key in ollama) { - const title = ollama[key].display ?? key; - const display = ( - <> - {title} (Ollama) - - ); - options.push({ value: key, display }); - } - } - + function render_language_model(): Rendered { return ( { } > - { - this.on_change(SETTINGS_LANGUAGE_MODEL_KEY, value); - }} - /> + ); } - render() { - if (this.props.other_settings == null) { - return ; - } - return ( - <> - - Theme - - } - > - {this.render_dark_mode()} - {this.render_antd()} - + if (props.other_settings == null) { + return ; + } + return ( + <> + + Theme + + } + > + {render_dark_mode()} + {render_antd()} + - - Other - - } - > - {this.render_confirm()} - {this.render_katex()} - {this.render_time_ago_absolute()} - {this.render_global_banner()} - {this.render_mask_files()} - {this.render_hide_project_popovers()} - {this.render_hide_file_popovers()} - {this.render_hide_button_tooltips()} - {this.render_no_free_warnings()} - {redux.getStore("customize").get("openai_enabled") && ( - { - this.on_change("openai_disabled", e.target.checked); - redux.getStore("projects").clearOpenAICache(); - }} - > - Disable all AI integrations, e.g., code - generation or explanation buttons in Jupyter, @chatgpt mentions, - etc. - - )} - {this.render_language_model()} + + Other + + } + > + {render_confirm()} + {render_katex()} + {render_time_ago_absolute()} + {render_global_banner()} + {render_mask_files()} + {render_hide_project_popovers()} + {render_hide_file_popovers()} + {render_hide_button_tooltips()} + {render_no_free_warnings()} + {redux.getStore("customize").get("openai_enabled") && ( { - this.on_change("disable_markdown_codebar", e.target.checked); + on_change("openai_disabled", e.target.checked); + redux.getStore("projects").clearOpenAICache(); }} > - Disable the markdown code bar in all markdown - documents. Checking this hides the extra run, copy, and explain - buttons in fenced code blocks. + Disable all AI integrations, e.g., code generation + or explanation buttons in Jupyter, @chatgpt mentions, etc. - {this.render_vertical_fixed_bar_options()} - {this.render_new_filenames()} - {this.render_default_file_sort()} - {this.render_page_size()} - {this.render_standby_timeout()} -
- - - - ); - } + )} + {render_language_model()} + { + on_change("disable_markdown_codebar", e.target.checked); + }} + > + Disable the markdown code bar in all markdown + documents. Checking this hides the extra run, copy, and explain + buttons in fenced code blocks. + + {render_vertical_fixed_bar_options()} + {render_new_filenames()} + {render_default_file_sort()} + {render_page_size()} + {render_standby_timeout()} +
+ + + + ); } diff --git a/src/packages/frontend/frame-editors/llm/model-switch.tsx b/src/packages/frontend/frame-editors/llm/model-switch.tsx index 720b068543..eda078644b 100644 --- a/src/packages/frontend/frame-editors/llm/model-switch.tsx +++ b/src/packages/frontend/frame-editors/llm/model-switch.tsx @@ -28,7 +28,7 @@ interface Props { setModel: (model: LanguageModel) => void; size?: SizeType; style?: CSS; - project_id: string; + project_id?: string; } // The tooltips below are adopted from chat.openai.com From 73b40f694972131294f31e391fb003d57088a492 Mon Sep 17 00:00:00 2001 From: Harald Schilly Date: Fri, 15 Mar 2024 12:35:09 +0100 Subject: [PATCH 23/32] frontend/latex/ai formula: add button to insert full reply, since sometimes it is interesting --- .../frontend/codemirror/extensions/ai-formula.tsx | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/packages/frontend/codemirror/extensions/ai-formula.tsx b/src/packages/frontend/codemirror/extensions/ai-formula.tsx index a00b124783..bb5e794c87 100644 --- a/src/packages/frontend/codemirror/extensions/ai-formula.tsx +++ b/src/packages/frontend/codemirror/extensions/ai-formula.tsx @@ -210,7 +210,8 @@ function AiGenFormula({ mode, text = "", project_id, cb }: Props) { Once you're happy, click the "Insert formula" button and the generated - LaTeX formula will be inserted at the current cursor position. + LaTeX formula will be inserted at the current cursor position. The + "Insert fully reply" button will, well, insert the entire answer. Prior to opening this dialog, you can even select a portion of your @@ -292,6 +293,13 @@ function AiGenFormula({ mode, text = "", project_id, cb }: Props) { return (
+
)} > - GPT-4 {service === "openai-gpt-4-turbo-preview" ? " Turbo" : ""} + GPT-4 {extra} ); } diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts index b01c6c5cfe..80f71973e8 100644 --- a/src/packages/server/llm/index.ts +++ b/src/packages/server/llm/index.ts @@ -285,6 +285,11 @@ async function evaluateOpenAI({ maxTokens, stream, }): Promise { + // the *-8k variant is artificial – the input is already limited/truncated to 8k + if (model === "gpt-4-turbo-preview-8k") { + model = "gpt-4-turbo-preview"; + } + const messages: OpenAIMessages = []; if (system) { messages.push({ role: "system", content: system }); diff --git a/src/packages/util/db-schema/llm-utils.ts b/src/packages/util/db-schema/llm-utils.ts index 973a84405e..4ad2986973 100644 --- a/src/packages/util/db-schema/llm-utils.ts +++ b/src/packages/util/db-schema/llm-utils.ts @@ -11,6 +11,7 @@ const MODELS_OPENAI = [ "gpt-4", "gpt-4-32k", "gpt-4-turbo-preview", + "gpt-4-turbo-preview-8k", // like above, but artificially limited to 8k tokens ] as const; export type ModelOpenAI = (typeof MODELS_OPENAI)[number]; @@ -49,6 +50,7 @@ export const USER_SELECTABLE_LANGUAGE_MODELS = [ "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4-turbo-preview", + "gpt-4-turbo-preview-8k", // like above, but artificially limited to 8k tokens "gpt-4", "gemini-pro", ...MISTRAL_MODELS, @@ -133,6 +135,7 @@ export type LanguageService = | "openai-gpt-4" | "openai-gpt-4-32k" | "openai-gpt-4-turbo-preview" + | "openai-gpt-4-turbo-preview-8k" | "openai-text-embedding-ada-002" | "google-text-bison-001" | "google-chat-bison-001" @@ -285,7 +288,8 @@ export const LLM_USERNAMES: LLM2String = { "gpt-4-32k": "GPT-4-32k", "gpt-3.5-turbo": "GPT-3.5", "gpt-3.5-turbo-16k": "GPT-3.5-16k", - "gpt-4-turbo-preview": "GPT-4 Turbo", + "gpt-4-turbo-preview": "GPT-4 Turbo 128k", + "gpt-4-turbo-preview-8k": "GPT-4 Turbo 8k", "text-bison-001": "PaLM 2", "chat-bison-001": "PaLM 2", "gemini-pro": "Gemini Pro", @@ -306,8 +310,9 @@ export const LLM_DESCR: LLM2String = { "gpt-4-32k": "", "gpt-3.5-turbo": "Fast, great for everyday tasks. (OpenAI, 4k token context)", "gpt-3.5-turbo-16k": `Same as ${LLM_USERNAMES["gpt-3.5-turbo"]} but with larger 16k token context`, - "gpt-4-turbo-preview": - "More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 128k token context)", + "gpt-4-turbo-preview-8k": + "More powerful, fresher knowledge, and lower price than GPT-4. (OpenAI, 8k token context)", + "gpt-4-turbo-preview": "Like GPT-4 Turob 8k, but with up to 128k token context", "text-bison-001": "", "chat-bison-001": "", "gemini-pro": "Google's Gemini Pro Generative AI model (30k token context)", @@ -413,6 +418,12 @@ export const LLM_COST: { [name in string]: Cost } = { completion_tokens: 0.03 / 1000, // $30.00 / 1M tokens max_tokens: 128000, // This is a lot: blows up the "max cost" calculation → requires raising the minimum balance and quota limit }, + // like above, but we limit the tokens to reduce how much money user has to commit to + "gpt-4-turbo-preview-8k": { + prompt_tokens: 0.01 / 1000, // $10.00 / 1M tokens + completion_tokens: 0.03 / 1000, // $30.00 / 1M tokens + max_tokens: 8192, // the actual reply is 8k, and we use this to truncate the input prompt! + }, "text-embedding-ada-002": { prompt_tokens: 0.0001 / 1000, completion_tokens: 0.0001 / 1000, // NOTE: this isn't a thing with embeddings diff --git a/src/packages/util/db-schema/purchase-quotas.ts b/src/packages/util/db-schema/purchase-quotas.ts index 7be2755688..80b5a476df 100644 --- a/src/packages/util/db-schema/purchase-quotas.ts +++ b/src/packages/util/db-schema/purchase-quotas.ts @@ -38,7 +38,11 @@ export const QUOTA_SPEC: QuotaSpec = { noSet: true, // because this is not user visible yet }, "openai-gpt-4-turbo-preview": { - display: "OpenAI GPT-4 Turbo", + display: "OpenAI GPT-4 Turbo 128k", + color: "#10a37f", + }, + "openai-gpt-4-turbo-preview-8k": { + display: "OpenAI GPT-4 Turbo 8k", color: "#10a37f", }, "mistralai-mistral-large-latest": {