From 5a562d12938da08b53e5b0e69b34ecc5b6baa573 Mon Sep 17 00:00:00 2001 From: Sweta Vooda Date: Thu, 25 Jan 2024 20:15:12 -0500 Subject: [PATCH] added open-ai integration --- .gitignore | 6 + README.md | 51 ++++- langcache/__init__.py | 0 langcache/adapter/openai.py | 24 ++ langcache/core.py | 7 +- langcache/evadb_data/evadb.db | Bin 0 -> 135168 bytes langcache/statistics/__init__.py | 0 requirements.txt | 131 +++++++++++ setup.py | 27 +++ test/open-ai-integration.ipynb | 379 +++++++++++++++++++++++++++++++ 10 files changed, 621 insertions(+), 4 deletions(-) create mode 100644 langcache/__init__.py create mode 100644 langcache/adapter/openai.py create mode 100644 langcache/evadb_data/evadb.db create mode 100644 langcache/statistics/__init__.py create mode 100644 requirements.txt create mode 100644 setup.py create mode 100644 test/open-ai-integration.ipynb diff --git a/.gitignore b/.gitignore index 68bc17f..e7449a9 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,11 @@ share/python-wheels/ .installed.cfg *.egg MANIFEST +langcache-venv/* +langcache/.DS_Store +test/.DS_Store +.DS_Store +.vscode/* # PyInstaller # Usually these files are written by a python script from a template @@ -50,6 +55,7 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ +test/evadb_data/* # Translations *.mo diff --git a/README.md b/README.md index cb1ab55..895b477 100644 --- a/README.md +++ b/README.md @@ -1 +1,50 @@ -# langcache \ No newline at end of file +# langcache +LangCache is a semantic caching library developed for Large Language Model (LLM) queries. Its primary purpose is to address the cost concerns associated with LLM API calls and to improve the speed of LLM applications. + +### Installation of langcache for developer setup +To install langcache, we recommend using the pip package manager. + +1. Create a new virtual environment called langcache-venv. +```python -m venv langcache-venv``` + +2. Now, activate the virtual environment: +```source langcache-venv/bin/activate``` + +3. Install the dependecies +```pip install .``` + +### OpenAI ChatGPT 3.5 ChatCompletion API usage with langcache enabled + +Before running the example, make sure the `OPENAI_API_KEY` environment variable is set by executing `echo $OPENAI_API_KEY`. +If it is not already set, it can be set by using `export OPENAI_API_KEY=YOUR_API_KEY` on Unix/Linux/MacOS systems or `set OPENAI_API_KEY=YOUR_API_KEY` on Windows systems. + +#### OpenAI API original usage +``` +import openai + +question = "What is ChatGPT?" +completion = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content":"You are an helpful assistant to answer all my questions within 15 words limit"}, + {"role": "user", "content": question} + ] + ) +``` +#### OpenAI API + LangCache, similar search cache +``` +from langcache.adapter.openai import OpenAI +from langcache.core import Cache + +cache = Cache(tune_frequency=5, tune_policy="recall") +client = OpenAI(cache) + +question = "What is ChatGPT?" +completion = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content":"You are an helpful assistant to answer all my questions within 15 words limit"}, + {"role": "user", "content": question} + ] + ) +``` diff --git a/langcache/__init__.py b/langcache/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/langcache/adapter/openai.py b/langcache/adapter/openai.py new file mode 100644 index 0000000..025792e --- /dev/null +++ b/langcache/adapter/openai.py @@ -0,0 +1,24 @@ +import openai +from ..core import Cache + +class LangCacheCompletions(openai.resources.chat.completions.Completions): + # TODO: Add remaining args + def create(self, messages, model): + # Get the last message with role "user" + last_user_message = [message for message in messages if message["role"] == "user"][-1]["content"] + cached_value = self._client.cache.get(last_user_message) + + if cached_value is None: + llm_output = openai.resources.chat.completions.Completions.create(self, messages=messages, model=model).choices[0].message.content + self._client.cache.put(last_user_message, llm_output) + return llm_output + else: + return cached_value + + +class OpenAI(openai.OpenAI): + # TODO: Add remaining args + def __init__(self, cache): + super(OpenAI, self).__init__() + self.cache = cache + self.chat.completions = LangCacheCompletions(self) diff --git a/langcache/core.py b/langcache/core.py index 35b7621..7107ffe 100644 --- a/langcache/core.py +++ b/langcache/core.py @@ -1,6 +1,6 @@ import os import evadb -import openai +from openai import OpenAI import string import random import multiprocessing as mp @@ -17,6 +17,7 @@ class Cache: def __init__(self, name=None, tune_frequency=5, tune_policy="precision"): self.cursor = evadb.connect().cursor() + self.client = OpenAI() # Setup needed functions. self.cursor.query( @@ -72,7 +73,7 @@ def _evaluate_and_tune(self, key: str, ret_key: str, distance: float, **kwargs): # LLM tuning. self.llm_msg[1]["content"] = f""" "{ret_key}" , "{key}" """ response = ( - openai.ChatCompletion.create( + self.client.chat.completions.create( model="gpt-3.5-turbo", messages=self.llm_msg, ) @@ -165,4 +166,4 @@ def put(self, key: str, value: str): f""" INSERT INTO {self.cache_name} (key, value) VALUES ("{key}", "{value}") """ - ).df() + ).df() \ No newline at end of file diff --git a/langcache/evadb_data/evadb.db b/langcache/evadb_data/evadb.db new file mode 100644 index 0000000000000000000000000000000000000000..a35eb7ed5c83034045bafc11b344da19eb3d07d2 GIT binary patch literal 135168 zcmeI531A!5nSeDqJ+`EY?bxzoLS&l|i#Ul6%d%ok62(@6i7h8SLXsAyV`*e7u_Z-E z5{J-W#|b1GpcK;r%~Fn~T!pewwtzL+blZkNTe^k9(gH0n()DAMkS9BAW*hB!-PegTgVhG1$ip_#@F^c$o39QFbuc z&zL3=>P81B5 zA(>9_d-ZMZ9@k1&SG~TF46s9-eyy{sc1oypu}z(QQ6enoqyvj-nB;ow##bC%xS z*`s&&wzuQ7)m?2J&aUT&+Mw@pt#Ea@+%2x|EL#&` zG8*)qZhfn(-G!>v;_Pm5wz_a(**dY^CsCHjlBcQAFnV5zN@p+-W1TsuS$vDgzsT$J^qMv>R>a}$au_fKv% z?5dhg+;B}6WTOCN3p*Ig>HsP9X*)CR{u6hFlt%;urrRZjhBd`X9k=wP4#-8qWPFjE zR5HqF6CGq&aEm867#l((QKFZMk0_A?#A*2*#F&5{9S&uf+XO%OBTOJRIEaTn{qoMv zc9+wQfwSQ-JIsjkCx)m*g4aL91tMse%DG@N4SJA z2`)3dpYJ3i;k22*sk= zj!&65{T|HHoae;sv}`}7<&(IwMWfW6Z#r53%Qz>~@iH=Pf00Tzzm^!$BvuUpZd;TI z#{!58T2$cWLb70W(vNNKR@XW`BhH6s&8)m=4!~+6Xx8z@201TQXc%2bs&uur#MoA$ z?PO)odRr)JEm;XuHj~XUF6`&hy->79h(y_Nl!>CQs&7RLm7cZ^VK(OiUM3n5iNain zW230jQ7()I=IqInMXRALqM;$baw1)xFj9pojn=B!rN%;NUdb1q@>FXmG*}Cjy0+S> z8bTJu!~!ts(V~&HDwS@*0%DXAJc6gaJ~qOoJf3@8Ao5La>XZ)R0{#9#X3#YyN8%70 zW{1*ltDC;12ja1`K&k6qFcptn%NbLygdFKq2utEoS|H;I27LbhSeQ+$i&L}FBtoT# z(WK{LP#UZxHCKQm{7SVM(Sck9 zLzFaSYw`Kf+!+fG<}H`}UM>*z`}|xOZA|d0ATLLv0MU$XHVXsoDG?MF^MwYxT&e4r zHx)opAA;J5w_gad~(UHlYb~Q$O!mE3;op2O2j24L<6*E zQzSN&W@$A|AF-v<6ZOK}dSxc3R9liI_R$p@4SGY8+a=T@?FP*?>gUy!Md8AS3w1~Y zFCYK}fB+Bx0zd!=0D);GFfxl&%^&HNbV$0<>YEFAxD{w`8w+!bdLvvovM90@g-f>t zgI;#g)?j#3B*c2SMT2ahzXeB}g?B>uHS68Ng;@uwMIh3Y)X(@51uP5=dpWDcYw2t9 zdN@;~hclb(oY&W2v$34f?(=#rBl-fBZX||`Yh?!qU7y&b1zWhlmPLd9zD1$o=s+-F zS!gmXilB=r3s7}7vHe_R5x0f)_ASa->qP}}$izllBj@$nJPkIV*KBXJHQCJ;ua`4g z8?08=*JK{Cs#Nnwi!eW7>EG)2`C>R~_*oi@q98aR=}nc5?6&yqW~<%oVNIOZ=(GFm z_&wWbH#M5gtjTO1IZLUk8W~1*w+yh+m8*L`OM5-2$o^0inc2izn;NZ64P2kq;xSqp zZC;KuvJH(kJ8A*8#)c86LRCFV%}03sL;gWF?2itAmX-#*LNFLLmQ?A;Xm>)mB6%6j zX0=&OM!VT&YhW#QZy#rFY%-cXsBtvnCT$zB%2j2fGE|3_a4__FSnomQ=G$%Z`K(r> zsnKGz*nNFA)@<`NSdD#_#s=1C_L=)emdg03ihZEK9U43O`m7CRv)RJ-aaN>iKFWA)f9T$9maGFn+vA)3=jYCu9Aq^_qPpmtEdr2b3|P~W5WOzUI^%76e6 z00KY&2mk>f00e*l5C8%|00{iQAW$lsPvpdTNm8aGgz##CkSdO1s)}S)L^49C)XJ)f zOc+j4AS)wqL`#n4nGlg|mds8}9&C~7WL9#rkOjW}FV%h^p~lcJcmV+*00e*l5C8%| z00;m9AOHk_01!AW35>|3D$9sdGGD?C^`X0s{DJEf_H!W!o(_*2;Pt zn#^{O-Qu--8c~TR6Nl`v*;&q(Q#QW-FQtAbp?*ibby~U@Dg^|901yBIKmZ5;0U!Vb zfB+Bx0zlxjCZLm9h)JRvQjM&d%mfJgphluo}0< zteLIRsE@0Us-IRLQQxP&UcE;hRkP~z)D7q)cmV+*00e*l5C8%|00;m9AaGg{D48Xn zTP*2lYhT;j&GdAxa=B}2W>(4#X<6%X#_jBI)zp+#$mge}tDW85=XZ9s*3=a1-QIrIs6JOA>-1 zZb%SXjEPrq5!N{r^l~*}ZjeI{e5@%il-E{C*idLN`4RIP;ei)5Um+E9M&=252hsB* z!Zn}#aZOKGHLg915Vt|ajY>uGa)mAa9NQ%_N^QbDSQDyKBmJ=D1rOUbpbQTw%TQkR@o z107Tb2mk>f00e*l5C8%|00;m9AOHkD8G#wHxkPr@XjX~LkP;+gMwR$u@5wx=fKde{ zt4+ypv`B|P;-1XY+tpT%)n>q`@L_07#8b#le>%Y=2g?r%)rCBP0kx9uk|MilN}` zc>yjd8ADNF99ba>oUX*Dr$ZD9e3T9<$SY)4l$IX{kl|9Yg8nVp;KRUfB+Bx0zd!=00AHX1b_e#00KY&2%P=|;QIgR?_;PT5C8%|00;m9 zAOHk_01yBIKmZ5;0oeaT8vp`800;m9AOHk_01yBIKmZ5;0U&Vt6TtWX&nh}Dp`M`* zQCCo#sO40Z_J6d$)lO*lYQLsETk}`VgPQ%CK~0CIUPG$?r2eV;YIR7xRDBjY30^<| z2mk>f00e*l5C8%|00?|Z2-M9YXIB+e%`PA$K8}sX!d%4N>g?)ru3ypK>FlwX>rC}} zOH+e^tgIxh;p8)Ckh689qzW0~^`kFh3wSsIb)CgrueTW# z=TM}*jFeO&{VNh4VO-myVb&83h8Z_I#Pzt=_0*Y+#(IOoswEdxk-GUfZx|&p&MYbaX(m0bGsFc>bXt>|wBXW^%xvVG`l8j~gB7MRp!lzmJH(sC)OLv5z4fJ`VA znuxjHa5jPZELl}clG%MWRXNlu%uV>mkof*TPC~8JzDNCz3Q(1lk-CZc28sr-+9#=h zXrtOUv>bH_b(H$0_OSL&?G@S=wLj8+N4t}{|4TB^ftf%62mk>f00e*l5C8%|00;m9 zAn;ic(8^~ML@LNc&MK6b5yJP=k(EM9Dsog%p_W$?L?SL!pcAF3%Cpi`Wui3Y%&atJ zsYYH!5Xk_IvZP3^BM2cHqbwHAN`+n&GtQ9P31ad{i-IbbFF=Vop%sNTPmDt+3QeWl zLJ*ULM&xS2v_xP)UQ{fv6)BY$Qt~+jkq%$T$Qkl#g2)6cWCg-0Sz!v9O3;{yNJy1g z>ZJQBDJZ0nrLBTUC?oTf z;p_iO>LCgBKhzV{L)7ck5$c66IeRc42mk>f00e*l5C8%|00;m9AOHk_!0AIkxR)Vy zPlZ%T%1ed&2&4+(_J?H5Kq?om`3qqJS%JKi#J3tqi{w=#naly-|9AQ{38)DW00KY& z2mk>f00e*l5C8%|00;nqsR+RH|ED4Yav%T%fB+Bx0zd!=00AHX1b_e#00O5E0l5Bu z`uGxR0tA2n5C8%|00;m9AOHk_01yBIKwv5YaQ%NOG9U*6KmZ5;0U!VbfB+Bx0zd!= z00AIy`Vhd^{|V|v3HlE&AOHk_01yBIKmZ5;0U!VbfB+Bx0zlx)N?f00e*l5C8%nodDeb|It}N zP9OjTfB+Bx0zd!=00AHX1b_e#00LiD0vhTaiAJ?gqCG_YNOPrjw>qq8En25Gk%x<_ z$qQ8b$O^^lst$RN;#%oravi}C6-33CwXPq>ZdbWdx86cX`~fewEwXvgALSS}77Yr= z%*J3JGvJRzgW+Mu!$#S`U_WD;M5r4bn5oj)Y{XbF%JvO%zF5E$^#=otCm4yQ&YVGk3)LD9WXOG_9+un}TR(G{^IJ?&CSGm^f>yjA-6)QTsTx~1eIH4}9 zXoJ4XwZhfqa<{m;vusU($!O4by7jHDb{DEvi?h4M+3LcDW$VOtpF~+AOP;1e!{~V> zDxJYVjCJOqW<3L0;PFxE6mW4GraCFUtoo*)Ve>usBqqd02hzaJ)<%IA8}5&!g(jo9 zL6CkT@V)M~HN7r<9k#4KgV^u=RNPW%px%*1It^1b|1^q3JO$|lrPbU z+?1dGxw;DZ-X93XqS=m5nK=C(%+j3a#O$Kyl4)IGJqOPiMMGKXl zwhm!7=K@|P8WD-YT!>?%sM1j`j0WcH$&*E^p)I1JA-{4WU7j#fg(}TPMdDZp%`5rh zQySQXg+`&lTBy{u)lStAvM44NfJu)QjjUCvbPE;`qm1AYJni+d5iaHN+~Wd~Z*o(o zbPyNl_XjeAt|>VZhuAPXly+O)^esIMg@&aCN?rGYsd(gC&X{r~>`dBN1Q)(eXNeD@sQmMPZHWiXd4aKAXBvn+6L}V)6 zym`c~xdI&FSE|j34&)*jqNFKXi_eed&RBRbYhjd_RsCLcbGYB<=fY@Xf>#B3IT8hk zW^A)r7-&z4V6uIT=95cqo%}CRw6DbAsV1Pn#`iL!+o~Re*)+;kPrP`7V?S%{QepsV=LsCqP zNwha;SQaHx(c%T<@jHz*#E9+c0K8c`N_eM$sI3}>oz77H=jOO#@?VZcsiRL|&FF$Xoa(cNl12 zlfCQ}?>jR4m;CTdp}~H}iAH7-N5&*NoHM$vQl;zYAjWD1-+0j~5UmZ9g|p~w3Je;P z@M88P>1T*Mc(UBsskp4%jfS|)oYWM{UU|{%F3;Izi;nv^;O4IBhQVR48>@*PO(qX*=>Akap?`@>HSuOPKVhDY(6> zyiAz4c6TS{g8ZO3-X7(|$;(CUV3G{j@mMT&IhNqnVrsxdo0`PhmD|R7V#3~GsvT)M zgRnQv-LQU!JIX|@>h+VEoHuqSGK-oa7Ui|T$wqCs>d>DVF7pB9c3NQq=6gVNJ18H{B6)xR4=Qu%w%F&WbWE&l{aiF(wX| zEW*L9sbyzsU4w_Kdc82^rjoK(mFa}+5iKE{#69r&|4(UC4{Ct`5C8%|00;m9AOHk_ z01yBIKmZ7Qfe66o|Gz-a08@Yf5C8%|00;m9AOHk_01yBIKmZ7QN(A8g|EB~N)B*t@ z00e*l5C8%|00;m9AOHk_01)^B5rFIeUm$0IDL?=S00AHX1b_e#00KY&2mk>f00cfI z0&xBRQvwTWfdCKy0zd!=00AHX1b_e#00KY&2z-GE!1ezxkTbv(AOHk_01yBIKmZ5; z0U!VbfB+Bx0-q8AeEnZW-6)}6pbnuw@B#ur00;m9AOHk_01yBIKmZ5;0U!VbJ_iD4 z%H(qmk~Td%q>m18`c2$$`n~Y&>1Rq$l1`jB9q<1Ma+3u8hZhh40zd!=00AHX1c1Qw z61cpCP|UqvD_YE5qPS*@GEvO_l0a-IG`!Fg408+7TQ2PPjPEaEeBt0wU(Dy@!sAzu z*L_Qp%x?L|R^B}R%*2Mr7xO(OAH;w9mv+8f_wdBV_qWl?yZarDuN-*T_u7W$n}_D{ zFWj%^M=yG9;-P0(#bsUB(N**Bo;b|BxA@Yl_Bw{=s}2U5OnmFJU*TOtQitK6E{K2g zOB?xpZ_kWw-oG&mmSg4v5R&5Eyv~bkDq%WuKjQgZR@<>@&3Acx?S;m$4|bumY;Lx z9)8Z)8}v7|w@&=Q<&VE|$CdHThdI9ekK=URpAS1Wx0Tbscx^M?<~GG+4_wNhzke}* z&ypbh^?QpQzj)#hef*wX@fR-odGm*_$sRuQl6iE=npf!6s)dd}9J<%>-PW`C7n=6+ z$LgPS+_3LD{`No4;V(T@#;=sWx!3;vv-z(XBJ{zhuA|@ByMcf6(IVch8K)l)m!4UcO?L zFTV4fEfc}6`y4-g*h@Emvuxs8j%@zCNk=PUbirT-$XDm3g`S$Oe%b!`2#7}uDPM)3CEG^zDuurN=Yk^-OL}f6+60*uXFso;XcQO%Q*f}rpg-iF z-2Tqui?6zW;^jZhq?hdcC%Ra)-Z3|R-+@0*G)fBSj{9r)_SNslUz&J?{;&4= zbg1n+@oU~a>{$Eg1if|PmHgK)RmNZ1GtNK4-NLW=zHZ5dzqu^_NdHmC^`1VOd92bg z{$L?rSKUhACfiHz_})$Q&U=FN@!Fs9cg#N0yyFLNHUD+x_52OTN8&#k4K?5Xhl9;O znDqm`eEYw|H|(_0=iI0|c+QcZ@Ga+DMK9?t_H&10psrcs>UKmlMf{cDy1*f?*b}#}@8kcn>~4PBb#?T*>vrF#+_;Y`ldJYSj_iCQ{+_A3vrr|E-Fzia==-rF4yzVv*&w(|G!N50?0GtbTBuay2J{zJZU z;>BIX^cvM8&G!Fz)FHX|u8EHAU*#*v2k5amj)S+}`Cs%uzj7wMS95?q_G>G@o>1`b zyxADP>UWp%wL4#S9KU#Z^DoP5Cw^N|yyW{|Uq)ZQ