From 61e345a9613829ac9ca812f44b26e29d721367d1 Mon Sep 17 00:00:00 2001 From: bvn13 Date: Tue, 29 Oct 2024 00:17:54 +0300 Subject: [PATCH] initial commit --- .gitignore | 4 ++ .idea/.gitignore | 3 ++ README.md | 105 +++++++++++++++++++++++++++++++++++++++ docs/c4model.png | Bin 0 -> 8619 bytes docs/sequence.png | Bin 0 -> 27962 bytes pyproject.toml | 26 ++++++++++ src/__init__.py | 0 src/app.py | 27 ++++++++++ src/dataset/__init__.py | 0 src/dataset/preparer.py | 5 ++ src/logging/__init__.py | 0 src/logging/logger.py | 5 ++ src/model/__init__.py | 0 src/model/trainer.py | 40 +++++++++++++++ src/model/updater.py | 5 ++ src/translate-dataset.py | 75 ++++++++++++++++++++++++++++ src/web/__init__.py | 0 src/web/server.py | 42 ++++++++++++++++ version | 1 + 19 files changed, 338 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/.gitignore create mode 100644 README.md create mode 100644 docs/c4model.png create mode 100644 docs/sequence.png create mode 100644 pyproject.toml create mode 100644 src/__init__.py create mode 100644 src/app.py create mode 100644 src/dataset/__init__.py create mode 100644 src/dataset/preparer.py create mode 100644 src/logging/__init__.py create mode 100644 src/logging/logger.py create mode 100644 src/model/__init__.py create mode 100644 src/model/trainer.py create mode 100644 src/model/updater.py create mode 100644 src/translate-dataset.py create mode 100644 src/web/__init__.py create mode 100644 src/web/server.py create mode 100644 version diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1d1fbfd --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +env +env/** +.idea +.idea/** diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/README.md b/README.md new file mode 100644 index 0000000..82bda57 --- /dev/null +++ b/README.md @@ -0,0 +1,105 @@ +# AntiSpam complex + +Комплекс ПО для борьбы со спамом + +## Компоненты + +### Диаграмма компонентов + +![c4model.png](docs/c4model.png) + +### Dataset + +- необходим первоначальный датасет (ham/spam) в формате CSV + +### Model + +- использует датасет +- модели генерируются https://pypi.org/project/spam-detector-ai/ + - naive_bayes_model.joblib + - random_forest_model.joblib + - svm_model.joblib + - logistic_regression_model.joblib + - xgb_model.joblib + +### Decision Maker + +- компонент, который принимает решения +- представлен в виде Web-сервера +- реализует API + - POST `/check-spam` - открытый для пользователей + - auth: none + - request + - body `{ "text": "SOME TEXT" } + - response + - status code 200 + - body `{ "is_spam": true }` + - POST `/update-model` - служебный, добавляет текст в датасет + - auth: TOKEN + - request + - body: `{ "text": "SOME TEXT", "is_spam": true }` + - response + - status code 200 + - body `{ "status": "OK" }` + - POST `/restart` - служебный, перезапускает данный компонент + - auth: TOKEN + - request + - body: none + - response + - status code 200 + - body `{ "status": "OK" }` + +#### Sequence diagram + +![sequence](docs/sequence.png) + +### Model Updater + +- добавляет в датасет новые тексты из базы дообучения +- запускается по графику, выполняет + - сделать бэкап модели + - запустить дообучение + - при успешном результате дообучения + - вызывает POST `/restart` компонента Decision Maker + +### Transport + +- используется [Rabbit MQ](https://rabbitmq.com/) + +## Use cases + +### Телеграм-бот + +- приходит сообщение в группу в телегу +- бот его читает (не входит в данный комплекс) +- отправляет на проверку +- получает результат: спам/не спам +- если спам - бот удаляет сообщение + +### На почте + +- та же схема, только должен быть работающий клиент почты, который читает каждое сообщение, и если обнаружился спам, то помечать его спамом (помещать в категорию спам) + +## Дообучение с учителем + +### Сбор доп.текстов через Телеграм + +- владелец бота в телеге встречает сообщение, которое является спамом, но не было удалено +- это сообщение отправляется в ТГ бот спам-определителя (не реализовано) +- сообщение помещается в базу для дообучения + +### Сбор доп.текстов через почту + +- спам-письмо можно отправить на почту, которую читает бот (не реализовано) +- бот кладет текст письма в базу дообучения + +### Дообучение + +- 1 р/сут происходит вычитывание всех сообщений из базы дообучения за день +- если в базе дообучения за день что-то есть, то + - датасет обновляется + - запускается процесс дообучения + - перезапускается модель + + + diff --git a/docs/c4model.png b/docs/c4model.png new file mode 100644 index 0000000000000000000000000000000000000000..78edc87c40fd80ced925a820c5a279e83cc37431 GIT binary patch literal 8619 zcmXY11yoeu*9GY=X_%qAOS(fsx*I`2L^=kL24MsVX(SXdfT5)uMoMa=8-|p0=;nL= z{@<*{%vv+=zWeUI=j^@DK0x(#HHh%(@X^rFh%}$78Uml+fxk@LN5K0}3uAdSG-e-7 zRV8DH?ST!Bxv|E~y+R6{rr2PAE8&s)Pg_=|cIau6;p3E!)0XN}TQ#zDTTJ@+_;yU; zFkvz=VT9{`EX^xp_;T&TFWWkSi{EoLfvFc#WAb&r&VepRGTXya7o&?e;luK8(k~T! z)WGt@xWcF^+xFn|{W%IMkX-QTYIk(V#o9&xJ5 zTCdpHUs;01pb?MdHmGOU#bRf8XH88_Y3ZLo6W_)P6~a--pC6xi4#<%@1NZ*&<;&Nv zU+?el!_dP{$5*>nyFTel%gJVMsSrT%Uch68k-f^~y)>E*%$g3}gARudwo0XmU6Uyh zllBkBEuLizXG1A-;DpoH zuowLA=AAy9S||Cs#vdTeX^%SJ&*whx!sp^owvwfHyNRS2>4-aAP zsr%opJ#F3ndH#P!d3oEq`49h|ujpcN_8=5afc@Oj(WFeLl+9mdi~5xTuhb@eRWqt} zV({PD@uKt!<=;c07UnM%*-O(zT=KD7`8K5!<|B+cPOi(!X|3-9_mU4@+{7^y04Kw0R zG005yOx?7 z#N33x$N5cAE{~x{opY4|#@+hC*FpDbMi)`6jAH*UZ22xl*oSwowEq}bk{hvZO86Tk06q=fuJsh?k4KR``;@1Lehf2=Jaw#{&C1Rc&2v39h zJeEHqzAp}>{d#}>B2ZTdIp5C_btx+?otvM}%*yJ7prRISBI8I&?MzMIuJf|87JvFA zU%R=wN=ix5)<$%^aKg117959z zCB3@3yOq?`*qN99}pf4*JhQ;`GXG2bO5bW2BZlaP@3`uYa6Q}p^P zldcaL9l7shLV3Vo{8Ku3p%hX)7G(%0VU=;(Y%OOv9D$f$I87i$O@V{?x68<+W`5ioze{`yMT1sQ>aH-Yd;;&}B^vV-*dt3ojd=8mh zzLf7<2t5OXj?cFSha^T@ll9Cu>*7y50_nD^jJ$6zy)iJNH-BZ>wLwy06tcKnC;%tz z84)5D-RK{2H|NgxbBi7rn zt%>KG{|&(kr8fm>94akv@$pUm{;j3GXCH7GLCS=DxWDm?@f;l;1#q2n&ocT&o7@VY zt+XnKljDqp@#N%Wc5d$V)fJv|6DJpZrO6xP(EH~*N+-32anzF>orlbSV58NfGk%Th zLn7T%Vbv(%v1o2+37#%*Hc1 zYMe%#mQ!!(Kh2Y2!E56Dm3!=|L2fEcj8)S+xTxn8g5@eK)0UPUPEd0^vA*#+zu;3u zrsxMsm#;iyI%eG$!-K{j>vSDGD!-;L@ICZ&EY({7RCu@r0`g;{=gJ34#z!SHr3I!Ve1S+C_z2HhGzjadfxu#e-5(`*V^VM#C!Ysf5kOSk1@w?kM|m392GG9>PDhUO%dtPj2`1Z25;>8+4r*73>7VEv-HpWhFr71ii%c%igE zKC^&+pvEd#+avJUDYd}Yz^_!kOM4eh)-hf&$T*0u**NZ!AD_iiQpA^$Gm>UguIrG-;BZ`HNO zm#IE_2>{K9qwojTJqUII1t7T8UQrdu1}RVDvHhNyNZqHTrcR@j?aLK^B_=L@b>7;> zeqYsjulxaj`YPx9DFdMIObiVE?AAvYV7vtFgPP?nx-tHyK-fyN#iOI52Jjua3d8S9 zD=Oa4WqJN#M<12}m?7I65-OZ!24~giUj?3wg*OKO>8ZGw*kzmF^=Q5fE)>|yAi6_T z45Ud*^;cBeeaXpFEiFT1-&iHS*2phdzB+{ML(my4_9>v@3{R=!+_ zgn)p@-St_%H65IPV{BxtXvSok#NKZ${5`2BNU8u>Urx&(Z7`!=@VOC|! zKGQ%19{?n2_E~dlSi&nf>mAmfxVX4eY(#f&FNP==aeF}lXdC)F=wyrEx{1`0nwv}G zgt#l<9v8mjC;eVMEh8h=S6#sO&+jjpuRj_X8nSS6L$k>oNG)b0a&vPR7Z*tnj^^s( zV0!{hDsSA~VY#_>wvQBG-g9+me+Oo7e5xdDkajD`?x6l9|cT z;N~mvmFbZjA)}Xidzc=K7k#x5;R6VNooOsP8tIOAgO)x%8v|t7_`n3Zj~L%t@pEx8 z@wOxtkSDKyFE0<-9HOjMW{s*T#e+w3 z{1FCv$(DtM1@0*+&j)`-2Y?&WI_~*ajM9yiv*Jr;8^1@!2$C`5&;6h}dr5qRkC;B9 z`@thDQB==^XfgIo*Ppw{^)oEw^z0UF%QIt6>tnda;+}U z&NiimU|z=`$pIzX-;)y+XBrSlciN!q39>N``mA6aC|V+4g4PP&zbEoJJg*b%AX{-k z-KzYDRP_}V{NAILCSLiM8k>dsymn_~V=h2QqRE3#>pTZEl}Vrqd9q&GPYIAanZ4gf zY=Pgy?T1M4@Vx!}tXl*4{Y$+`pkt_ek^VjHQ993wwMply>-YzzuJppRG>k)xM~_Ae z6~;=nbae6r@379e$b5=RI-?Kc6@27<#ohBqRpUOJtntSaP?;xWT_4#$Uq9`~h5(8s zMDdxtK%TbyQccC6*|H-MI`)||4 zxR%nx2`~1#O)DtEC2Pcy0PT-?_7QzE~{zovj=q@?E>6)bR3hZ*-7)Yp6Dar?L|Hzu-Us~2fT`13Mi zdaUgklG6z4*^i7K$sMDohP*R0>2x{k9bg(Fkw}<=xAQg%f<;kXHIDs7MMaBCOKE9o zN6YOCNk8ql_dBD0o_@K#XEyBxd0ep#t21mJI!fFI8-!aOr2hJ$UNAtDvMYOfxr?rE z;g)0#mYf|c@<&<)o>gB$*R5(GPcID-E*a1Je_vAuL)FdS(fxulXzp&8av~j>j=k#4 zRZ`7weGszATMpj!p9cvaR|ISU4*YYIPp)GkH{*tX3Q zqlkXqg^$uGrmvkezHRWx$xok#dj3KRZk*Q5xNebCg-e$gE~DE$h9$ zt8K9>wG-VdXHY|kOsl7R@O*eip?1S$luFP3oc`7dMG*?8U^>K`B1W+VSs}>TMS1qEA^f2mQ}~Z(Uv+pEYyM5k*=o`r z3%~Hj`tZ6H)*npb9-o*Ut~MBX9fEGo7=@=Cn0;BueuroEjh1MIL1gFttKRc{IP>3I zy@s%!qzy7O^|^Pfqy0>Z+H7|-4~#@hjgZ*Vhe2;@e$-2{sxLAP(1p2?Ri(Gr+a!Ko|u)k*IB_Jh@Z{Dn;2Dnp=V>)XC-y`}eY6r8)=!pMiR zze*fD#X@ues$r3-ks{j;p7{{-w#gZO-K!5J5}ym-m&NaIJiyHpSOV?#I4n1obfjuq ztJzu3cyBCt5@-hBO@2A^o$?^w7q&0811ef;x%<&Eg@;5K6y^#gZmjX2{j|+QMna%2 z?NKGlGHJ=?5g;=vhuJW9Ii^ALPtl3Z>yrL447{4<-WHV`RVB! z`}>Jin~QuiyX~=aMEaj5{zOJbhJ}SqPfy$0+JZnJF}jhTKYuPSFL!sNR{;zP7;#U; z#p`QpEo+^i&CSg~9RP(wKY#u#PXGwU1ZqJEA)&6zX+SPGI*jCsA5bK(5>rUjchIwN z!GV&_&d|_jD4ko9G=Yl0r$*nv0Aa6WV`F1vL{SAtFdvGP!(fTnLwu{aJT;1KYV>dL z!I2B&)6)Wif`NX1m@D>NoSbokHSZ5RrziAg8wIdZX=z?0aYt8;m{zXNI^SZV3i z{ml_je;{ZaAA4X&Q`*_;>oYyc2WxcrBOP?!aDX(%fB!Z#HKl^zTwXFbp{o;40rcv* z*o4m82^3gB`Ea<>NdiVIegmBE$@Z8-IOJ}-FnsnQ65mUcFDQbuAN9i1QO0+RFb`9f zXccfZQtbnh&{KFqu%VLSaAa-4^pOgnFam{Qf)^Alc8RJ@da}N{Pz^0ajSt(s<4p| zmV1Iz&q{YP5Bpq(Yw%IK(n2zHHQPXCwsS$_HclfU>_(4ZGelD;st%4vV>T358!Us)=z(p-C zpc4}lfEV-1)s<=Qy31s8Y6=JdZk;8 z0<`FEAlk@oWoQUkNuL7N)7O%c6iPXaz<8J10=-X_m7^I8u=B|GfB$+_cld7W))w`z z(xJKlQ(A}wbyYghP6LEFZsCHZ^t6gs8iZqy8}#<=TU}ki@33i0*S}Q$th_016K)bfryjv+MDJFmTP7<6L9{>i$IpGV9-(zr!d(mc`VtL zN}WO4@2QztmM8BRL>rj*Y=<$XJea87naGbey4GYz;huLn-nV4n{H(p&LijL6tShxgW)TtWA@LF(#nBO{fZoHl?|jE;_mY9uk| z4*vSJ?w*)iTPp(C36j3&_i|3#0Mfv*&Hz*is7;E74(;x`ofB83r$4@h11{J<*Z7!3 z;WmOT$M=_AXs8^qBx?UhWswY=V7-^FISN?zbNEB=Bf`gX_}4kv+40_8PD^!Erx79N ztlL1&PfbpqA1nl}cA=l&w*w{?vdUr6-u9b(Fzs?Wq^JApWIJ`l(TTKCF_vC2B9T}~ zs^RuC2@U?z!rBxgvh}pOFDfdky1E*OU?qsT<5=Vf{M&Cg%IVq;)oF)$=@-in)3tLp$J zho7HV7#YubrUF9>0ItaF>cF4?kPSzNHpweYz&0Gm7SL7%O%L$PLjFcnE69JU_y)PZ z-m8*a1wykz{<=EUFXvJ~4o1e5NwP{woT5SU3QpaFvCb(XD(b*a6ZN<8uUKqsEN-e$ zkAB~3bQz4@J{}M{tR~1hNG#SmrSRiEUikum4l^>;vRA?3w}_(F9;g7t3Kb+cv*s>N z+|A8Rz}mg?;~R6%&(7Yb0td70^THuN90S}mOa$@gN+>M8P`0RX78#B_(Rfm1C|GN# z;8q@H{6%TfG(Sb;;FEIkj#lwbMIR!AL)hw>BQH^}*2r_-5ywX5(?`?%&lWxMTNWfj zAKneCB<&@AuZZuuWW6Y+=t6am#}zX1%9A#!-{N5Eq-EOj=lG#>VIo?ROaL2XsjlxOZ(AcSV{Wxl`>uf%!_E3yB3Y8mS}_J^S$oGd(@M-5i9e zfsM^;S`)2*J(Dr!_Owb6cVbl3BXZqZWg(_`AP4b20_T#SzXY_n2Bq->4(4A_nd~VM$tce)*8cNr`dOV06#J^f*i2IGolf_2Z1zdtQ_Y1Isf%}f?f_cyj~cPTSfaV zVP<9q;K(`Dsyk*CxH>k3o0}VOYYzlxRvJAE(d7#NIj6sC#mdS9eSKxTk(g=z9o>C> z&VcJ6w>UI}^Un<@Wr+;Y$$`%$t0hn|Dum_Lp~ugVASqDfRH&PaqwU+XogG)e732VX zTfi!ei^Eyb-8{!sR+LRUyQGQ<$vy@w60c7-*4C2s0AEhwZBU>d#Py!rNxZ17ocZhs zs0duGA3uHoW}ikUCS8vbfM^C?Lqqr+vHHYPdPMj7S4oKCNLubWRZxz(+Gm~SL*faf>lDocvgPgW?hXwNRi6RsLq$a;5p8r2_22x?CB? zY||v4Est@4z~C~<(?7E>?aG3Z%v+pc+XYp${c*c7VIz`CVqMf8bJK0}XvJQDdfUNJ z|6EqabKKa})YR9f?lXf$`R}ai)#(wfRaJt|xf0QEaNi3+N@lU67A@-c5A5|!4? zk5)hch_boeUZMc;3ZyHK5r9+6S@S_G(L1!4{wYtM_F-ctoPuM7y9Edl zrz&;KbYUv>wC>2xaf$x_=!Tj8IX|L`pHYneG3(9&i%iDQTFL6_u7 z4PD8T2}Gsv1o8yvAH_G>s#=HP`x@EFgXV`4RQRbVnfLC<$9#Q!dKP)D7z9RKEZ`1iGfLih)g!r#rzmXQrLHVa40 z**ipV*5ty~!EY3jy(7P^r^Sx!r~^*1kpu(;?2C=hbXujD&%Qp6Ejwqss@kHY(tN^3 zQI)LMBqD!^yv+7pFCGose7I{G5M3`Gd!O9SL>6wrf|3ql?(<48X?VHVnI-msppO^i z=B}x(#w8>ynfL>=BA5x)aaq&Z9h6Q^0>S`F3rd4@C`uy+U80Cc z^V_3-<9XiqoXS0xub3<*s-8OKbXrA07ID{jJsZ~#U+oTE3@!X<~U1Wd1e5bIsIjS&p7rPQui+#NLY$0;(=w#M+R*luhtvhulb0EvR zIrsZF(eGJSLjwX!hl}aylp5B-bU49;XIW=u2r*>|UxXp}00;an3)~yY;3WVMWm#p zR8;U@Ez^0T(Vxbxm64c8|E$h&vWi7LqjC3hOk!dp0gZ%d`QkwO+ggY5goK2=ygWT) z?}g+WQfB33jMrjKot6eOBg7s(dbBoHE`Kw&+_=f7{O0q3qR3>rQ1pTyA5h@+AsX*~ zd-o2(;u3S2J4tnNPJxz|R=4xvRlAGP_Tx{vg@YUJEUBtb5GyMuK6%0_1ebENRJYvx zRWv;Tbt=l+*LTi@l$3O=<6~RMWI(2{%a<=-jLQuLiw)a?&uU>dB+c&8i-i`drSqRm zS0*P{R|_cUO}XE>^YZRDhEg@cF1QX}v3OePD|%0V$RO=IA^MtAz7bJ+IL9R1Ojf3Ng!`gwRk-@7g-2>8iobP6eSPd`clxVz<#+ z2fe_+K+HR~{n2SWCPN$oNQbvY_|u^|X4N-xr5uwL1KzxOrMUoySR*4IxR-SM=U+`}}(S)v%JZhd}J zy{J?klN?FAlmH{wkF7zu0sB!4Z!i@jXD3)nC}fM}(Z7&3@G!CDZ15AEbhR9bbn@h+ zq=6ecf-2Fg?@B^6H`gbtZza!l$3Nh{x%1xZgW^|Ga`Ijpk~m6X4U}%4%;}nq(Na=h z&B;7jj7uAn)e#=kh1W?Vlef#y{rv^6Uu%{fH#awLwXnLuNs$XXCu$WCSNzo8YjbvE zs+RLD$CENz;VKrEDr^UM2WO59*gbUbb0UpdEw z!|)R_fI{<|0+nQbt3Etxu@oLU9EKtgdOfJ zqhuC;_5p=1g@?SSm;6Wrvo2V7$7Nr|-pHvq74YrVTT&TO_{9$&^2AXKq^mt=6TMnk zl#|K7;BdU&59e#N(Oa6GQhP+%u*UXPl~w=U=XO}i8Fz`jdy`VVy@&M54QowL56VU` z-O4iY7Yvi08|zI)-KVB3q&m&4R8U+TbvXHeh+0x*GBl}3+ua>+Q)rNooV=W>V)kvr zo#*D}TVJwnWQiDvVyW(^Mo#PsIA`A~F;wWpT=>E+%1|OslJUe*PDRIVYn}(bIsEoq zXQSlN=a@$EB|biPo0Zn90fz!Jc?qZ$#zfSLWFl(8p8-kCl8C~6s64uzqddAL1^-S~ z9>r8Eu=lx9VDFu)JQ}J}V1I^1Td;{$yS5QtYiH6HJWX7ZF-u%xILoRnXqKdl*jS&; zq!eOeVxiHxydbjiA<(=(P13D@aqy+rx^kM(a8_@t1_AA{(WGw9wfhu5-+SFj*>QZk z@>cHw&xzIQyG>Fq{lpaaTUGItq~CvxSEqO@72HB)a5qOr@7r(9yQF&l*e=6%CMA$c zK2Xz@c=+Ipz>4PR`e*jH0%Dxg=RJmVI(QkJrnc7-8ExwGLqm`H0`x~BQI+3VS#wh; zb+kv!QgUzHOuC=^Zt6!?LPBTu&3b~4Ry#diT@|4V9dRn0s5;v>_N=T=`cjWl6-71l zAKtokD~`6MRVcB|da|lnJM2ECm-&;mu_Gn80}5ehe%sfsg+}QChqqPI`Hg94Xgt=& z2=L0jeAX^@n$}onu$`<@ZwbKAuf0HROv`IpQPpsNWl;Dao`$-l<~sWWlywY`nAext zY8#V6BT31%py^j3buWi4--md*=k{U|tZMvob5Od3D;#bNC(+OhKiXb+aKg-v87(JA z!Fgb&!iWl&HJ=#Fe4a+hH*V@UQqWMg!1QSU=qF`g z-InE?+QG_`T)yMuqYufAai`r^-*HPGz3AO9Ob&ThW>5;{UeE4<_7&vdWs`}!g^T=? zuR?@g3>U1*ioY1P48itzV(c%%dEt;_7mo``b$(N?*z zTTrTBak1VoZZA2HQ_NC_lJwA?$hrE+UMW6@`&K=U^_iKSE=rfa7^!{Rf#)BFazt+1 zH~HXG2sy%%d}tGKnZ03Vw%Jpvp0zs|K^uTtpB#Sn*>}^wb?YH_lh2Qinsk0Emg?4Q z^~`1tWR9>4`HnfIX`S%jW|%W{HW!pB8<7P*m};5vw}9E4Jd6cwi!C(WRE) zm9!C1E$N!foTbF8Rid4`v+~&ldaP`lQi)#ixXLFX*PY%$0@N-wV`c8pw(3VpAFuAynNW<*zI!a2jRKe&mOGBDVYR2LjV#(^StWh8 z6XO$@)r%>*B5rcbbeoa18sUCXFJQPk-u;BjxepF?Z#Qi`w|>RPBxZ-7UnIjRuddi# z^VzRm%H^S^mQF%i65(H#yylP{`grNXDMY`OI6h;=*-!V7DHE<_X-(@Z%(_^VP7~1 z$HxWe7gTLCm=T|n;bZ(~jbmw!$c1iy`=s6B-j1926oEH7J&jGemL+ zT*zRv|A#L8gSL>C%8V_H=bra&^)+ET-OgRHqw&-InO;Vl$4&F@F=}B}hb1%Hsw&`- z+sJ5>@8&rY4y&7;%<3f++l;&wV%4TE0B|Zapv;I_@rC0=u0LzuH|$Lst|m?6NJcDV zEvbznBntUsD=i+=-;KFko+Igd`)HpYpQ5pIy-(#4zv=o^e?MhM4xfc{UEqi=F-o^o z*JwtYKIHq4nLDrMpIy0qO2lwwhdjXQ!abfs0l_$b8r5L_5fuB1SnJjlUt}8Bo zTw9CCSI-Oxix_(rFMOm&`b_%7uVh&2U`e~faebnav)=-5pPF{)OIH>jf$U-UiW2V@!uDR7;{@Z8fuK1JSQ5AK`2!;mKr^T2BK2tU- zIpVq~MeP2`?G@=Nb&b(7>O2WJnsxQV+gAysN)Oaf*t-aVRjpCCP4?u$A%24fP4&yj z*7uU;!B+i3hX)^s$a=qiqP?!@qg7i|(+~N{X{P?p&o7ts#@izZM&ZfZ}FWHhsX^u4|wd7kvC zB{ANE>yPA~Mnx&z){9#Umt3~7Sbn?2J#)$Q!pDoa_)fazhWK`PTS`>*Z(7wQ_Y*g9 zeiAL4jg_69IaQFdQB_=J;(+a3Wu`t*i8A>9Dvr80ZEZ7mvzu3=0oB{B^UN4yuas!U z?K9T>1yN+R=e?~IUxKO6Qg7c%?3R5ID53eNBU!A3>~DIaCM4cJ6=q*me8w}{ch7C1 z4;wpzuFQ<5&LGhkF=!d(-_h~m6}$cW7umgAp3#$DUnK5Xbdx7!Y%jlg;{AOL4%$)H zlK$wgKu)+obLjN-Bhh_|reLeK57LC{i^C>E(euf2{;d;&*2(W$_!nAGO-DO)Ha0d2 zkGYU#`s5KW9L8mJr$pAKEwcP~TP>q-U7rwAOD>F?5Ii~_mUhwG8A!@Cc||XO?K7e( zg1+eF%1itw8rfkEXO|KU#>x)}G#H$Pp-!_hE4(hdWYN07>{0LY!y2KXqViyKVfWj4 zk?4478cyLpox1+~zJPM-`rdFszHXHDAnCfX@8+z9wtAK%tXV{Kc>U$VK}b~v2de1v zMfbpMMfT>5jZX%4?x;V@D25*j`$w^T(JkLT%W8D$V?nNHv-8{C7$tO0TtORn z%RH%sA~RP1X8{Xjco|j%OUb_I^CJ7m%Tv!c8J?>>{L1`=OyAFX5Kfc~%IM&g^`PPB zF>EiUZ{H4X-8u(NC4(+g?xj_XMe8*Fg=&<6aa3x1Qr0pk|6j>d+UOw(|E+Ebx>^pw-YJ z(%~#^GrrL{nsf<#ZY3A@XM=;N^vW)=V(@js6%3Wt3)(t7jk$P**9$G z-G}qy^VQ(JjkaX#6CcedsI==h(jD)l^@Z7Mr(Q{ANZvu<;p}JucKaAQE6vX*zea6LpTtilwiOlFZ>cH#N*I zcQ!gZJBvI5Jf}PgH74rg4IlcbgUDAg;}sHJ_bBB(cWx5|Q;s+WKE-5hdsa^@doy|A z4R8Jybo1;j>kpR7H}n=-Z^wkz&b0&O+MZXuUeBINb(r znqX>KDjD_9Vemk~>ZJ4v*_4p_uA{ot z&!wJrFdEGec6nc{#b#qkF%)o+Uv-rZu4=Gju7Q~KikoB)vG2_0#s;H-tK27oGLtQn z;XL9Q#Ebg+6t+)M-jRV8=FQfMWEhPXFCku9_AY&*$2?e;ZD1^Ln%>FhU%hnY7UoMf zqZhH;FXbHi^J|Y#RJ+ewXoB5jhO#X;4HwvLP(@#2jt*|n@pma2G~UIH<>twiSseL; z{)Shj>L3T9vo2!PZG}DAX-K<86IpP*u%|!;natXNqL#RO4m%u>yQ7ZJzK1QI;qA5%ChkceRV%XYxb!+_b;N9$Wu_aiX>tJ42|zqCt43Io^xB7O z0&;E%D6PI~OHAZj!;x&%f{TxRYf8MzGnDS*Yvrn`sN%zw(~Qu3{MD2Ur%cstY`;;m zNb%%mYC=&krz)U|ek!2SenuhGA1a_&Oc{`GeHoDb2k>t-XnJ`6-!wqpPwXwaqeaQD zKFoj2`pw3q%O>~DM%b8+@zaW7Rm=I4;}@aID>Dt#CINfQfyEar+A$6Bs2{#_Src_D zv?pc_@LqqYB6>!3N4+~f_pe^D%5o+6?4Mq-@$ReD(Nb^d*m{#&F1j%A7`@=&8QvQy z?En1v_~-Lg_DNyrG7nZcYtJSy>Y+S=0D~V4jb@N2n9mDk+bDfj^bzshMw(OHa~)9! z9qlC&l4^(XE3;;XXY|HNP5ekmn1>X)D(8fpEZ9%y^|NgA{`gno4%-=s8uJeo0H2vQh&5n|ciDxRW?*08ucu(xj14lTp z<{7o8h+O~B zV&ZJ$+SH5O*#EJn#GNI2C)_11@iyFZK|w)o;Ia2$hTxVYjdTE&`juPtc~k?v$*fxL zUuHz>qJk=q3o#NC#6ijkYU{Q83{BBg_w!b;O00#C9h-%PiKU{@-|VrMJ=D6|kx zR~xD9@AW*q#p%pD7B{r>-O)>VbUn`VvW~XkEEA!pZmrQv&(UWlAJ~&2R0PD6^PBw- z(zpzgNRAFZoLDQu`W7Qg)7}-p{f=;?JGmsG%mz2!zZBj|< z>%;m~0toEyJJT)})&2)Nj>IusLhs+dXUAdjWwg|)K!kBl%H~S>Bi6~s2(S+nW6xd(M1YM?(W&~}akAaefbt#{S>ZW4E-t)l zWs9=1vPqRdDn~^{l}U*Q9Uc7S(aOi<7qA&hd7(`%?Qr%136F6WNz?Zsacy}AC7U^K zPt0{nmj*53YJgE6Cm4R?h^tye! z?Y(eD1w)xXr*4_4BDZ0U=F0uiT;S5-&;;S>8ZYnKvtCfL)0oFtJ0=6s%C9b9E=gug?0M_%+DPhssOx4(dTp*(*Bhw0G zASt(Ds4EIlqFr&-Zp0GU&Gp&#@$m;?_sVXtUUwXSW+IJ;TvfF?EFZ;%$7~D@DOMe48`GWmkvin}-f@S#XMm7O zcgn9yR+QEsEGS?aONb&A4}t{z;Ag8Q`b>;kX5t)IM16ed^!vV-PBvNt5(;qR+mPyx z>m~Lke&5cn!a}C-^xS#_?VtEO5q3rG$^(-CS;DR@s+XP{Rb!c#*zUzkA3x+wZRId| zdQ0@3H^?@Xs#Ur;w)|xT;exkbxU&m!B2k^jrLY-_v*nmKoeWrCWFw|^wJu34GDyD^d`j&S^JuF~?p zHwVd;iFZqn?nNAHRqY^u!0ckrm!>mNfVSrE-st#Qv1sx1X4f( zGILh3FxhH1`+3jH%gf)Ejw3wX&w0SnY3QP@Cw;U{9D~cEk;O*&>cn)dDkkQ-rRDtc zo5zW_tJFtNT}UU-&dx>)Ka&d+RZ}P`>dQw(>X~v$L`NTAXK_v4+1s*m@N|g4@IQx# z=U%0JW{9Ax_1X|Z-Rcxo7RspcZY4-z6?AUxUk;Cs61!W}E+7$uQ@aBEHyd8OfMfF# zBp#c&nHL@o;izgy(czKVJGd$tAU6_|`JyO9+?QWFJZsXeFqN&dHhXX8NkT8``cVdi zt#g;yT4VLA+dDdtKFMB!HbYdpVpe^Z#uU!1J*#myx2u!X#jqL9u`-#db$)Ni?>13C zWhnjvBpx0f9&1d{gUpQ#s!OScKx#4ZSz#_#97Q1~F;pu24?Km)956n9L~wVEI3=^F z>vLX>uq-Mls9+#LRtp`+4(MuL*r^~IdDV>Ft0^f)Ieg~}y-Ghu&G*D$hz#*KS4>Y` zK`c4UvM8l^nuYUaAuK8?DiElB5f-A$>Sw4qq>U=i?l7M*$7!Zh(ieYGQ&ZFA`;$h> z@8N;O`wYqHjvz(UWn$tvOWb%h79cunPfMhJUmBj9uZ~}LVx9hW-LNMhqf1IGHF@LB z0zQ+ScZjDHn{YkKuA3;Xl^#{(Kq!NWjXl>24Hp7ekS5=_mGjY?=(Xp`= zE5=+-X1%fKU>g#6v}W>SZM@rdFW4{ah$)eY+vW9;xhkqF6Z7*5s3zv}Noh8=Hhj#; zIrTG-itN{QTIuD(nQ%HG>3b7D;@cWg+QzQjy7&)B3Cci`&wK;5AMb6$~vRw_0ohEl}U^zK~|RkYT~V+42y;7NOXdsi%ZwcXny*r3V)k^KDp`EsVk zq3q1Gw62>1-W#>!y3dRUZ{`xcbWgG=-i>@MIQ6Y8AtrOf7MA!PVuRMO)*cD@4S++- z-XdC$w4JWofYmW=@mSLh!NNzu@ifQ~^RTzIEq?y|XQf5Ae(BFy-{Jm#P6h^ZC`Fqs zC(WQ^F@4=o<3JP>5)!s6H^|KoblhkzEB^pX$=3P`s-prftCh z7m~%i4|gk5u%ERYi)Hk4Q$YmzETrlcs9#l@sRQ%@X@~DTF%#Pw$m>HnlANQMIOmd+ zlZlCmH)k3cpWj>yUD*84LT~_>>F7p?+G`Fesbf&q(aLrjr{2?#WY0`mauO0;7YE3_ ztsKosHNE!etM|#t$=SpLk0k`qa4uxPGk!AvGo#$NZFjZY73aFg)Jh^}v}*$sN+~cq zCMQ9gVg?z|!^0z6z$UJ7F^ZTqi~r<`6hBCR$q)LxQ7AZdHz-B`53ao{nOj;Kud&nV z3%G%}u$(6<%z*0!r@#=01>Wna*49=iNDY8@0U07TLxEl`0dK*lPoHqu z4E(WDaOHn|`{w;)RPRkAuqOj?;Sh?~I-{B{aT~gC&G!z_T)wOedE0t0<56+I(Y~u?!u#)sKmd;}jPcx7d7BwgZ=#ML83e z+oFwO{tldGvI(fY;_l1$bXZ4og`6n*8Ph-~$lLx{3F176*nIuO3diO(uLd;}P)8VP zi-3J)=>c?koE-aGcXGrhw;t8DViVAah>H(r3g2+X1aaQu_HBb2+eAw(y;=|xl(jA} zopY*AJ0acUtOTjvjF*gtL!Q^r%4#@Ak|SFtycAj5m4Bscr11KC0a@ke-rpBNpnloV z0G0>4$to-Dj>&49>*}yO!qx+1mV}w`t}s9UMT+p`u?jQ!lIE*ZP>Z&?YF~NhwJ@sL zVm{_}4i84>mfjqhzBUeN+t+jrR>y~Xgaj+>SyZ7uTOY3(jRIs1J{7Y7ntn+2pnsxS zUA}!;)wXDOs46Sj9esU}csvlz&#&HWW)9LM;Xp)6epis+Bks=-bQsDIT!Z}k8nN=7 z909HCcG#QC!t%1H9fC0g8bCjN`tqfli^`p?EecV$g2}CKifU>D3;hoa20>P& z4^KWBQ&${~qmhgb(9exUR{^6J?t5#`S`s3xE3l&`Q50Lyx+!Yw&~+WTJV=+p0fSHP)=RI=F0G19jyDwm4%DQe6)-ALPCQR~B}>wGjQzV2 zBbH8)8acG{CS*>x1ujA3o;qFtyb-z?MX#R_w&l-d4 z%OTFeHNmA@rtj`v)A%SFrKqTgPsS_b%$KePP+IZi`1p8&R7c4Hh1CG28EOt}Ow1HE ztuwM@2UI5|*)s8v7W>X-h;SFjT7)jr3*1>$vwr5ZcRDvWm$qG1Sok^GATjwy1?WoH zf2;$rS!|TrL)_QkIy$z(2^0OHxLbAfbLi(9cQV;a7 zRiPST&Dv|JuD|Mv|45`oEgf*{xGwgaS~f4?>e2DRX5>U(c2Z)=J9MX-k-_0l)(ZqU z2S>m3{Yp~N9iqf|=qa$hMC-AkaiTRzi~3jcp+oiyCnYBr(uzA=_Fhl5vF%ps6bRCs z9Uq4Vu?oCKm6er!uhsu`vA`bKH=Z0H;Ns#oj zEL*8pPyO|l|Gfg)-aH!$yde?W+r(sOv)cM8CQxxv8Th|n`kT-x5MOqQSw8)OHp@r&&Fk6QJcUsC+nqCsBl3{d|?61|DD3A;7psDy@qzo{m}8O(6B zU#>$oCbJcm7t`$TYsJHc8?g{U_AfY$fKvzRs0bUI?e5o4e(95qP_n^hLo4pdb>&K& zftbf?4IJ#U+N+^%tz-3fzkSXY_qtn}t>5g|(9#kZae*34BQ`^|@i>G3g^3yt3p~J@ zm6MZWXJ-dr@v(nk5nO4eLB>^~k=MT-)-!6Yv?(JE%v9!!XyS)Y?E4cXhz5yIcL$PCLaYKCt_GB!DU5D`c zr^tcyqYsVV#B*k8kUdl+vL>d3yk_40_B{XGV zqoQ{iRA9eYeAq!C{g6BU3b14zw6wY4537Y*zBA zgg1Y6L|7PTSd|9V)@&$~7O8n96fF&naf6$cZAEG68gM_AxK>-=zjw4Gz2?Afbe!aW zXXzbvLu#*ca1j5cOlP=c2KNzT$n9ou076m;E`*?iB4hP~-mczl;=>Ec+n*Jn2ro@3gjo+va%_HXjT{>Rw>%(Jj7Xb0q5EV%&T8r`kA{xZfAipxHoYG9ayW9Y|B zj=n?|wTE<0auxv8;_xgjprn6&^Sh+KMj<&%j%)1R>IVQSI(zTo;nJI<<9z~b~fDXeR2V;bXeE% zKgD~jRF7w6g>97wVEF=IW1_4g8!Rx$FeVucWXsNhWBIfZO&i^_WL>)q~^lQnBVExxmA35i>(=$dG8|;cqff zPQiiT0jSP4{2c*@h{kAeNJ!yQ+TuE5@WY2d;NQ4%v$6&d8w2SAJ<7SI#UY&KvLRTF zsdQAo^W=`>z(YPpCZ^EmhDzFkWt*%waxAZfo`C6QGJw#woC1N_MwlD{AdQq=dmg;8 z&@hLE06sl4SEBO-0AaE(@&H_o1rP4HiFvMzn0Ml=2wX2jc>s0fZ+%Ac%WRXlj-aCu zKyWzQ>-Zm_2O?aN^4V6N)jUJQdSDnzM}_|@VRwbpid?z!1=wc4ptB0f93Nyj5)t?p z7gkmbAW?u~M`y9IkX381nFvkQ!QLLSMElj?;6^uXpydvjA1?!u2u)W7G<6a`(cQ5Q z1mdhcxxT4sUTLWVI0*+QOTbJS{rvfJpoAYh&_f_n3hYUFj2*|HW=BLQ$;ik=U1SEx zYO$e*4?n&JUPBjV9Bk8pSA%qDY-^nSW%@Zm-|4W5ozK`1_h*tUxTiF+%L7i1yh6=K zup=guoCJ#)p!@N%x99kIG!KiWC2AvI#RJ!*cKv$)PA><;5M=FY7m;$J%a?@!zg45+ zaf}}SXPer0!;nb3v#TpSJRI88vGH-eBZ6D9S}46|ae1NsWr;Ry7L1AdEdi1YqMb!J zoP(2lk%SokDodueNQ<^~Nc&4e+0fG9m1po-gqJ-u zMN{aK`3+IV|GIoGk&xIX$db@3{ea3&@QKw@iXPqX6xhp&27t#BGWag|r-9B3-i;l+ zYGPudoZY9ypkU0fQ|kENzs*g^d1j)~`%aNlx?>fA_}I%Y<(D$DGRCRf8z!~PXSoG zZ%79%@jxcdeYAMAarbh6Mj~=>*<^O_x$;xa_`I6sajUa zG3HPi36UEbeE$ede?yDER4E`J(EiY0MgD)9M*$5~nj}Y`{#z|VqlP~)L@s9($6P!Z zasM^d|0AIZKDxb|u?@&Gh~DLRVJp{9f!9E8xMyvkGjxQxhkobrWFPJf$tzepz|cF#_mDxN#pp^$ZhKT29Uv z2ec*L&p=lrN5Xsg^`o~>ZhSxM9k*fe^yc#`C^VJ9(-XZxy?CJVx*26>vqSz{Y66t_ zc8;}tuh1UMA61PyUQ<<8=S|X5AwKdAPmYiFf+f0l_#Rv_q8Nof{1vI?tDOtqkC<#t zMr#BP@SfVETWvbz+tZ>twZOMigRcw-2F<2b+7VKyR?x$?Va!+X2qo$su7bp*bc1p(|s3$ z5rdv^RFh?3ES1*djd3imQJU{;y201xD&O$qSuQl_EH!JPxH2JkL3xmJ2_>cw|TVNh{=?%t)- zsdxPZnLyb7U8Eo$F0Pqs>?<)gHs#fdKN1jjDqks{yy7TAY@ou8rb-fbjN_!5^`#sN z9mb&KDUZ6#I3qSdl>*gXT_SHo@SIZh@mGf~A0?U3vTD z@f5tri6}mG03SRjZm)|%;AZN#0eenPL=+S-ab zcRxjd$mm{WtK#bVHIj&Ng5K0Cl6|t3kU%|KtQJU@o`d~;V-WDH239*Nz{o;fbq*GH zK}ECVaCm6_oU{7tlcz>6ch4&e*xKwQqQ0>NK_*!FCTQpUtFtB7Mc~Kqlt$HKXS%?) zgBe<1@bYWOVLXn>+{^l3TA<|t6@h@9zW^$J#dD|Wv>&Za+Hw_P>ZGI(po<5FfQO$y z>BpVbckH^d7jy(|$+k4PbOi(j1sMfi@69LQcpa6jxS>w~=ctV0o!q~!b8JW+h7QV* zb>hGZ$}OSe^BE2IqwGnW-|B48Cxm+Z{Iya2GS9CcW$Ui?BR8g9TC4|BO4>Nse^)FPGxY5)n=NV7NB@`?`wh5yD72bk7Tey|& zH(i>T{Ye<~1oiaBy*~Iq2lBGa7FY*;KYu^J1cGztDEQ1VV+69e&WpVzfgSAW>4EKK zoMXC~_l7`7Qc@CPO)B8o{KXQ}t;S7-~u&WmenYs<;Wy~gZl`yHm} zq^$kwNnL@(@Hj;OhN5`&@JCb;nl?t?FyAg76V6Nd zJF1`*xStx739jI|`UE;|b?n#h#(~^_z0u{0D(W6DxRx2jA!P`YBX`e>u);+rl1hRs zf8~lZ>;mw@Y)#(ZiyGYldV^GV{``4L0qX(KyG*^emxV)WN#{AX+h8YEP34FY;Z>)S>#TivC)jU=;nlx%WZ@!0dA^kfUV5klM0|&$HS?Kt-Kel1?+SW<6J9??3ioDRAlMF;- ztPlp$k4aCEs~$QXsFV%9Kc7O=4px(SK^R*JN)BhHIWPHJS8$U+jsnHieGLg?STJ?R z&lrj=S*?^ENe8U&{fC>-{#-GvS)T7rF03O12lFeUAbKh)5!bu|ax*hC$Y+jYW#oq7 z2%+`Cngb#J^Wq}b`PIk4lWuf`>#eOa+n{l;fNrlU+23{w)JhObsHmu7t1Eqe*gMM6 z9UUHi<-sKIfC6g*`RHAl)?+Mtv=~@hDh|81_8XBsgpinUg2Netm;VPG~u};~Awle-A z4-AOD(#Oky4UQwhk4h0X@AzmluzG(JRNWY!=E&pn`gAOy=7E~bXAqGFM^x$h+8U?-evl&hi!gmm!AEBLpBDs2k@!bI5N9tS z5l-!)xZ1>QZ-K}jOtZOH%qZD>T}>?ofnQbWG%Zw55`5qkPjuUCY02~s2m5S^DG&izT0v5^ zznFuxD@Ln1y>K1}(oS;$a;^>LZyVaO?!T^tSx<(0;TAtY#^K(rmL5aON3}+ocOKbK z1udpP|FLo^Bj{qe&1bJiy1+hxEb{Rv%z&qsI%DE*Eey}yOd1=i@zd} zV8HnBdNk}j>BGb@+ld+>xm2TM6p`WYfjFbPuJXPfPX`TJl<6%i2;ncTF*o}05qPwjE$9%7XlH@#3! zWJn4?kJ7F`TYuaOc7U#{bFioy>6>RBaa#4IBqGY;P)A-Ue8!LoP!hPBop!vA$oTjr znq2-Il)Y^X8+zigCoh6V(x>8?*r4t;+qz~4kQe&@tk?gAv=Dk46|z8i`zC}0WDx_P z1(ug>pO*hED@>`WsK}m3Nl7J$UFt=GJ-!WkR-APhT?#n-rc}}kn2GK#86|}|BFS3D z9qc5AFmQV{s8|_Q2gwDJL!>*Jx=5q70*3v9(j`JdO(+Av_qx(92*8E+6ciPWp^$43 zDR;p*^T;)FR{yL6V@=(!YyaH|zOxnJ?Us_0NX*Fz_}q!RlA75GJZEZJnkNh#154

c6h#Pnz`ECZeaurF;xoQG2# zfU8(i`zCAblAk;Q*W46j_lFD6KR^X?nr#$K5pgg&-CJq15%*^CQpA7k5Jce%w}t2w zT0V0HAe;&YArXOyPmODbEY-&iUWnl`X%3w-9be5j~h3v422#~_T~ zur39=GT3+|=l!r8I{L+(q zx)DquU^J+6`QSV_Bt-s?nPTQHq=vf+s9pqqK90&wm_Ki1>4q9aDdgA#-veINrsMs! zMx@C4XM>lfHKMN`{jz2VQk&M0KlG)Qjs7J(M>-kTyPjtK3ce5Rvo~%G139l>eLd5< zpHcOlsM~zx3=ZLF8xI!&vf~Tvim=OUR_)~*e{_q|NeR1=0?2@0bwBkq*=&UUwSEBI z@4c}$z2c9iw1Ch6N^loESp!?}?1wqbqM{;I4?jOY>){-y!7EO|)Z(6balMvFltIs) zxqv&VUw#y-*m)9;8ps`Gwa4_5lKx=sL8syIUH*PjPLPAMc+J`Y-t6)R^%UA{ME>=~ zznLq0xIs!<;Eala`-U9&(qk>n5<;IvBd>9qK`|O~etU+|6ehs^VziaL;TsUtfGS50 z1a>ely5X3iXHw?{nb9Hvps!*>KR*c*Yax>t?zk4t@WcR8_H);qB^*p*|D?XZBC8I~ zN=faO1^?_cRfc?8-z~EcD)#O7?uk%j3FjHX-vj1w9uVOfT0<8>s`%Cwn2k|t>J=cb1?A$FXRbaiHCSMgm`}{=Moq_ zHa0dsed6<6*DuaPaOer%DXM_5Q>{`S$PnzG5>5b9^+I`Q$KUxeFA4o9hgYwRn*Gvw zG#4O*U;{=r2XgEmgpB+V(`g^g-vj(DLk-*4IZ0&YMj(nCfo}T#-r3?WkHJ~8v;2mU zmsc}B_xx7`ThSU}nsAU#Yq_&@fkDw~n}uo8N63WtKeO_@U+Bj&hw-OSIzn>x_V*d8-qaS5 zlWz=2{Z~R>WMmf!N%A(3iM*Cn1optdK z@c<3&>Kmm{ZV=1=D#d?bfe?nc$Le-1-!kMn7lu$7@tjwV{8gwGbgczy;A^cS5C9cC zARov<3&i9)cFDx|=WT#^{(m=K0bhoGXu~h>;QxGR1Kj>! z8vXA<4l@xX@jj_@`DJa$|KF(jUmq1LAxnr~?*~gTwnGSTk6SG5VrUNj zM9`EqoPc&H(>?C+h7X0tC{DYrssH?mzNBpAE8(h>0QAErFeCoZ@7cUC-%;<_lR$rX z!Y{+7d93FRPnn<>Q5&t$wI&oZ19{KYKu+$==5@KY@#vGWEcAz2TM!%2i$Zx7^WVQ} zVoKTIn1eC$_N_m+&Ko_=PAo$G>kUJghp{wq%d-bN0=Uzcq0+z} zLBC8S#CHH)Eupm8;QN-qmc{^8)xK~rfH=?Go$n#CmIZw{!?Xnrn)H?lyC4<-6~G+f zF!})}t{?9;Vmrs{d!`PG7O+Gv708;0f^`6=u7byV0iu!?42>_lG_{nBw*fzViZsSUBE(dxMdI;dKXa z>0p8nWni0x*@X1lzoiosSiIki`aSJ;!=CW~uF|SRv2_GNt|pK^tN`Xvw#IqsecN|3Wm0Y$fkUV3aEPWMBZ6XS((=^1eSt19~{? zfu~#n`!y6xinwAJdb7*y0gyf5!U-`2hBh5wJd{HQM#l70z=`6WNd26-L+kgo4dng# z8>=ZiU34U9Pbe4*BJjwyUwS&FFbr8J_DOPN-r+@tgwqo=n9r%Gs}J_zos-_1C!Xgs zWR3X<_~=|Fnl5MQOvR_Epj!u%rdg(@phI`R*zIBzKE+j>Y|vG+yb6v8?V{mXAU`q? zh#sXK=5ADYs$3QN;WXT9RFSN75@F{^lDK{i2DJBT8yXsdJIoY4WI%7{NQ#rBU*Z-o zgqi}HI6|TZ+uO^_OBbdJg4Hp-$38JO~E~yhDFtfs7e;KQE+m0MFZ|!PAU1tgd+&K9f4L% zc0yZNp>S{wOS#EzcL_bRU+pfXDmUt(R3XL#mlH3QrSI!N^#yx?)X9Tgi(CFygOUjv2e5`3_J z5!UBb3@N0B7g@gzpAF8u~_ci4v>l7WyA%S$yI@@vDfnRdaPsocr#o$ zmbTy{vDPu=f;CM~iDom5}R z7I#Nyt?|LtYJGHWWb^v&RWR@2!IV>*3y*fCw65{ z^sk&+JYJA5CsoDMXP0Y&9wbzb(=?t%+r@Y(`s{8Hw&yiAs*K{uxu(3$p$nDu_6AlZ z^ds97Nm-nbtt}RK&QY6|hHo$-*201lRIwGo5_B?*ahqBKL*P2#t?rwx=!pwOV>F(I zhf+YA3Xh0*ybT@d)(Eg8Ft^0T#fA2TfLa3?ooFrM7pblKFH-xD#cb_uMd2tMC@{)` zmz?QH&h)>!F@v5;#;cKATSB0`Z>yC4>q;EK^8~)C`y_`j*2txmuK=kySHkw@63 z=bfkP)K@VR@Ba_$y#6r&vt_~{Qz+4v9gc6}=jZ34MqJ4c?&|FBrWJ9$$36%Z5^}B} z%%%LF!mc}<3jS{+L|G>(**h~T3Xi?Fj1wZ+hm-7x$X;ddm8^_|kVG=F31xJmD0`Ik zgjDAHIXqAGJip)jzJHvn>(b>M=X<`Nb${;rK7`sOtfIiNf;l0(VU@MZ!+CXD)tZkJ z8tta0raG*BNX)nS{=n*&e;p}&(x5KwY~U{Nxf5&o`X!RwWpZ(`&Z;X8)>pN)wXago zE1RxC;Se>nRf6@yzew^zY;E942V^%8-6GJT^)i~kGy_@i%mLS|-(E*^JzN}2MR42* zh&|H3pzDwts*00denfjfp~Z3Egx&(M{kUM1XmjF8`G+`c2&fwzw zN9ev+;Ps^{h-DG$6Uvt%A(WSsqoSVFFdNvcjE3*ACe6tE~_qJYdDqo`&f%HUjI% zQ>?745SB_Tg}YT@D%^sOP+=5`8Cb{J1O&*$rw)>|dn~mo&1M-@#`5>&h5&cYT#3 zLy5mFkEG-dB#Z&Ri{XvCn|uNSxgmmt*IE?pnyc=f;V`G2_@Ui>LYdBW??IXD^fZWY^V^7ogdE26%a=*$*!c{Sm)?=@2@I$&}S-& zEv+4$HEKmeH3Ik%24nkRatew^&}FruJwx`N>}SL($&9`Q7U>4nId1N-pvX@AUjZW$ zEa6scSiwe0|3%}sE5tauTSdQaJ)wqB$AL^4ZM)N|D{pbM4N2= zeLOfzggEwu&E`yjdMomLj3Dca0Us}B>RDdqqY0!Xl-o=fX9M@OVEG5^S_^A z#0kPsNnUJ;RE~E4MPx~$T;5X$3H^F6r%BAAf7zc~U)q()f;vRpd9oF}*cPDns;Q~P zodoRG7A6`T_Xajh)9zo{!eK1|*bX^^sE~vNL%}4pCc6Fq1r6qRf#ou+$H&1i6=v=8 z-LN*=+1UZzj}vstB{#$CROm~Gvi_Y(ZYlX}SO% z(!euURI71kiZ$jb46rcj!3%VaxBz36`0(L1z~R`~2)ufXt0E6x@Ao#rJQtOli=*v3 zJSF@a60QWFjRF~e0uX{32*%a?+Jm=)9A(_TJWT_e6&CXZ%Hhjmo>Vbc8z4P+0#@rV zS~GOb_kJ~+o}MxL=)W$QvBXgx4CurNSf(Ke{bHflk>UNizHU-(=I<4&psRcAGlQ6m zf&*dLXD6+m6Udf^130(`t8dd{wI8Jq~6d_sVJ%Ld@#%u_ie}`G&?G1qXVd5<@DEkPV zYRU{;P1{yMnyo2DihVAOHJKd{c zJx(lrn1U0kzi)7_nwSh%e;*iP!wKYo%mA@4BO(T+0{iraF`VaAD^QO8s*iaF{~8lY zzW`KJUS1CMS0>|;QO%tlaXz~9HX6W&hL$nDKH1bu75z|`13x>!g!QfkGpuI0ygWT4 zL7xnAiyoLy6h%jC+uCvyT(X|ezpm``*Hb)rvOo6WDlDrX>fLWh za6ZwAGbSYXN_}skR%4LGF4^gLQ8D|;5$92JFm`lSdXiFK0IK!1Yh!S?fIWKc$cOzL z{CgzhzxG1>I4pN@dtqfIx-aruq{F|a7kHX3SZVNHF5yRDq9@0j&As8DB&Q<$l&8NmiklC%8A! zl=|YgWUmms`!p;Seme-d!TqA}dkERT_xSwg)55k5$e8NFJ1G@h2+!E(0+#sKp?TlX zfgTE^T5^N90*0eo)8kwyE+T&T--?Rai=O+x?R+8|6#ArolB5BN#W|86D6Ia&t{f2I zzscYSFmAIGhHK+=>2G#G|Hq5{69L};)$6*y!NuS;fpY~GdWNim2#;iD7IB`mQln6d zKkZxLGNZOz#|KrbsVB8Ep_gaIF-_QRwZy1$objVMoh4{Pskunb#jY)6SVj+*xkx0q z#M_(T%}lF-F2#f2@Xz=zw|jB&gOhE1Ku2wR;B)F5^l)b<$G{x@bm9%3ncHewT3g>5 zm7A37Mug?%nzfp^R{amzG5cl|%rR_j{@D_3MP!fwfDCV8fjWrv<$6ar;W3G>g?qH!nK+$@9Fgl)w`KFn-zHM%qvoAi|@b8 zeMz{**c26uir#~NaiS)-9j|7ZaH>bT4eiA1u)^TlcADc^N09iN!G@3sE_lpmBUsDk6fQ7fSA|qs=i+gruybDkS&> zCrqC&)NUF0E-fTz8*S(eo|n|9?ZC|W+^Q|EEpEV6H+VIE&ZN-AZc`Sa@ejd+qzLUE zsxPu9kGb+f26F{Z4wLqQ71_HNgE=7t55E|UjfVl`8*^Wwgo@Bgpjd4C0__HJ?%sr5hfi~s^JbV-pj2Koty!NtW|lfX zEY!+Q>rI8_>FiG@1mxu9(sF=NUXUV}1tdt;&tBa8IcT9{HXZFz=RgufgaHy+3PUj`*9X9 zVa{Y*hXDlFR2bm}un;W_eR#g7GuGXbRj@STLA&ufwbHMG;?U$|JNAvY_Ig3oNm{Ax zXmE`%m^+`MFT!p@n^D9yK}}3Ppd3WD86#!kCp%Ju_8_2Z28?WOx31G26fJu>J(I&PpoydBA0 z(?fv?xHo6o652hZ=IPq7hJu0{lL(sZlCJJUhwcd2&H3MHS_H2D#)`CUONhpud|f?o z_a8bLymQ!Rwae5b0$Jj*LaXo+TuUqx|709m$Re0MC9J}XI5*GZqtF0r8`iETJBK_sztTK)3)#{s(+|bztW8gAP7a^2 z-gusZZ%@6Rh{npdu`x_n!i$LQZOeI<%fzGSJeK)bVrQ?52WK`tvEvslv}z9GsS7@y zbV+15H0Fpi!)1I0&M-|w#4xY{8Dx#z-5o3@AH%|TW_B0IxeFwH!-ElNX}+PsRT}EX zWsgqfhCfVVvRntv(rP<>7uXn55B0%xKUrFxG^ksSQ-Rp~>+Izys={ImbWswO_)%ZB z*K#N1YFe&)FkgUcp@Vs)O{rtT$qZwj-Sp{AQ6-{0Iq^fj!Rp_q zD#z^HkyWsc*rA{^?~B6NWb#;~@aIVD&yP$ zLyNX$u{yPKORn{}W@nqP2S0qFN@TfwAMDkSrBv7((Gfa+TRsGqC7*^I`~2{=`@F04 zNXV21q2u*`mwv$~fRVzHHJDRhK)jYxj+eo#F71L(nD5PSimYmWZ{o59Jf|)t^xEvw zeB+^e`}~N1MxU zZ7{BU*z(VO^%<1aP~2`twGsT>x;`DA~1F?{x#V~m9u=8>(tAN zO6sE7OtDnn{g$+3!7XotGA;U>Ts9^`->vIsqrDiX%`;`$*AX6MUTVw;_dbC@cYTpH z4Md9-;(oZA3C3@p>2^R}UBJaLlOyV;zLK#t@4)P*xbY~d9r$ha|5&4GY9fExy&{Bc z!D~J>cIP%xJyXHN#FFXlB~AbMyB(v~k%LxhW+4Rg$I-6hg8h0Qd0L5cKb^VCCi0-@ z(whd%eF>w8h6?6OZ5`TkC?@e2ThbPLbD8vtCX~w+q*-bg+~)PZd^!I{Uz1_T7$yVoeAALS{}ysk%|uaO#@g(v05bIyMFPlpdVDdgMi6ohmnpLHn5 zs3u9?#H2pcGspXNOlk8~5UM(chM&`X8+Y%Jxvyf{N2saK`}v9l#8BhYNnS2YGd=8? zg6bpjX=Mz|gq+3b)}6(O?3K`K6*BUoPOQ<#k7P^oUGLx6*`jQI8qzSFuL#Sf$@g2p z(PowxwrG72wy<=(SCF?hpuv)GSb&;KBRrCk)k*hD%I#8gng(eP-piK`f+pYF$D4d# zJnpNy&fx%{nzX>=eK~ufw2+Gxo7h4hWV+$-KAw$7t#s8R7V+YeDQ#)KGEmQ@2dw0>@d(Hn z=G9~jqO>DixX3Wk#)1)G_|owQf!)K0%q8;)Z4#0`23@`{=9)S?g;$_~y;VR(Z+{cn z{qm*%*X^N~cuzBKjD%E%$vE0^3pP?SjAnru&-J%;Wsg#uzSc;Si261J%$mI(sd2U> zvz7Iazk0!@N5Ot~_^4_t;`YY&eWn&-!VtH0L?86 zo{mXV%?~^KxRn~oay5X>LKPW!=g@k}URCFOzvaZ`^*`z&>8oWF=e6euwRmY=C6R}$ z3s0pVCUmDA&dAsXvO{?_DM9{v4|tIn_mFQ^N`(REwnax;Pb3&}>(g3HTYD6(#H|#z zc(mxwhGMxep0o3moLt&vq)%?4|6!lev3yPAq(FQkX2ZtsWvEM}0cxQw{dJy4zq9Vb zlzK=YfZXh%atj$@&HuTIZ~pr;*O^Nc zot})#SNSM$?_N<=mBzh2B>^Du&>Oqn1a@(=<%agWnxk7no#&EjqaxXaKr@8V;@Mj^eZ#E-}SRCjV2 z`H>?b#8%e(R$heK;&3WY-UQy8Y|I^mLcS&GH;lK{BolWEEJl=NF)4 zx+Hf-Y#R?tups0Aq`yOnu@M>8!lF+)T-2x{d5*cFukWUyfNFZc^j%W^XV*WAfF4&} z#W3epHNheVeT+Z_S@}mE<_EJbmVg)@RJzzjiX!xGsD727zkU^7kiN)s`0YJbq<|R< zQUDKThvgW|tgj04DD8J`Y?44cX7;p9bwciIPFsjdQg!p*v?{gt{PB61RBc@>6Zduj znumu^WG^Bh$kAU`OP#vteZFQYd^^ePUUYK#@s}?#K$*?S>pjN<_DbVoE{bgezEPew zHYc{2YdO7-YP>=gZ$Ct9p>(uf3HoLlP2x}N?(EIX_;_tB8qLhRegZ+gNu6lLlP6bv zo-35**42rhJxj{KV^^uc%g_J5YGJ5x^(0"] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.12" +scikit-learn = "^1.5.2" +numpy = "^2.1.2" +spam-detector-ai = "^2.1.18" +nltk = "^3.9.1" +joblib = "^1.4.2" +xgboost = "^2.1.2" +imblearn = "^0.0" +requests = "^2.32.3" +googletrans = "^4.0.0rc1" +tqdm = "^4.66.5" +tornado = "^6.4.1" +asyncio = "^3.4.3" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/app.py b/src/app.py new file mode 100644 index 0000000..c3585dc --- /dev/null +++ b/src/app.py @@ -0,0 +1,27 @@ +import argparse +import os +from dataset.preparer import download_words +from model.trainer import train +import web.server +import model.updater + + +parser = argparse.ArgumentParser(prog='Antispam complex') +parser.add_argument('-i', '--init', action=argparse.BooleanOptionalAction, help='Initializing, must be run beforehand, --dataset is required') +parser.add_argument('-m', '--decision-maker', action=argparse.BooleanOptionalAction, help='Start as Decision maker') +parser.add_argument('-d', '--dataset', required=False, help='Path to CSV (ham/spam) dataset') +parser.add_argument('-u', '--model-updater', required=False, help='Start as Model updater') +args = parser.parse_args() + + +_port = 8080 if os.getenv('PORT') is None else os.getenv('PORT') + +if __name__ == '__main__': + if args.init: + assert args.dataset is not None, "Dataset is required, show --help" + download_words() + train(args.dataset) + elif args.decision_maker: + web.server.start(port=_port) + elif args.model_updater: + model.updater.start() diff --git a/src/dataset/__init__.py b/src/dataset/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/dataset/preparer.py b/src/dataset/preparer.py new file mode 100644 index 0000000..f3d23f1 --- /dev/null +++ b/src/dataset/preparer.py @@ -0,0 +1,5 @@ +import nltk + +def download_words() -> None: + nltk.download('wordnet') + nltk.download('stopwords') \ No newline at end of file diff --git a/src/logging/__init__.py b/src/logging/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/logging/logger.py b/src/logging/logger.py new file mode 100644 index 0000000..9ae5088 --- /dev/null +++ b/src/logging/logger.py @@ -0,0 +1,5 @@ +import logging + +logging.basicConfig(format="%(asctime)s | %(name)s | %(levelname)s | %(message)s") +logger = logging.getLogger(__package__) +logger.setLevel(logging.INFO) diff --git a/src/model/__init__.py b/src/model/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/model/trainer.py b/src/model/trainer.py new file mode 100644 index 0000000..3d05c08 --- /dev/null +++ b/src/model/trainer.py @@ -0,0 +1,40 @@ +# spam_detector_ai/trainer.py +import os +import sys +from pathlib import Path +from sklearn.model_selection import train_test_split +from spam_detector_ai.classifiers.classifier_types import ClassifierType +from spam_detector_ai.training.train_models import ModelTrainer +from src.logging.logger import logger + + +def _train_model(classifier_type, model_filename, vectoriser_filename, X_train, y_train): + logger.info(f'Training {classifier_type}') + trainer_ = ModelTrainer(data=None, classifier_type=classifier_type, logger=logger) + trainer_.train(X_train, y_train) + trainer_.save_model(model_filename, vectoriser_filename) + + +def train(data_path: str) -> None: + # Load and preprocess data once + # data_path = os.path.join(project_root, 'spam.csv') + initial_trainer = ModelTrainer(data_path=data_path, logger=logger) + processed_data = initial_trainer.preprocess_data_() + + # Split the data once + X__train, _, y__train, _ = train_test_split(processed_data['processed_text'], processed_data['label'], + test_size=0.2, random_state=0) + + # Configurations for each model + configurations = [ + (ClassifierType.SVM, 'svm_model.joblib', 'svm_vectoriser.joblib'), + (ClassifierType.NAIVE_BAYES, 'naive_bayes_model.joblib', 'naive_bayes_vectoriser.joblib'), + (ClassifierType.RANDOM_FOREST, 'random_forest_model.joblib', 'random_forest_vectoriser.joblib'), + (ClassifierType.XGB, 'xgb_model.joblib', 'xgb_vectoriser.joblib'), + (ClassifierType.LOGISTIC_REGRESSION, 'logistic_regression_model.joblib', 'logistic_regression_vectoriser.joblib') + ] + + # Train each model with the pre-split data + logger.info(f"Train each model with the pre-split data\n") + for ct, mf, vf in configurations: + _train_model(ct, mf, vf, X__train, y__train) diff --git a/src/model/updater.py b/src/model/updater.py new file mode 100644 index 0000000..490f5ab --- /dev/null +++ b/src/model/updater.py @@ -0,0 +1,5 @@ +from src.logging.logger import logger + + +def start() -> None: + logger.info("Starting...") \ No newline at end of file diff --git a/src/translate-dataset.py b/src/translate-dataset.py new file mode 100644 index 0000000..e4dedb7 --- /dev/null +++ b/src/translate-dataset.py @@ -0,0 +1,75 @@ +# https://thepythoncode.com/article/translate-text-in-python +from os import close + +from googletrans import Translator +import csv +from tqdm import tqdm +import argparse +import sys +import os.path +import shutil + +csv.field_size_limit(sys.maxsize) + +parser = argparse.ArgumentParser(prog='translate-dataset') +parser.add_argument('-i', '--input', help='Source file') +parser.add_argument('-o', '--output', help='Destination file') +args = parser.parse_args() +#/home/bvn13/develop/spam-detector-1/spam.csv + +translator = Translator() + +translation = translator.translate("Hola Mundo", dest="ru") +print(f"{translation.origin} ({translation.src}) --> {translation.text} ({translation.dest})") + +total = 0 +with open(args.input, "r") as f: + reader = csv.reader(f) + for row in reader: + total += 1 +skip = 0 +bup = None +if os.path.exists(args.output): + bup = f"{args.output}.bup" + shutil.copyfile(args.output, bup) + with open(args.output, "r") as f: + reader = csv.reader(f) + for row in reader: + skip += 1 + +progress = tqdm(total=total, unit='row', unit_scale=2) +n = 0 +with open(args.input, "r") as f: + with open(args.output, "w") as tf: + bupf = None + bupcsv = None + if bup is not None: + bupf = open(bup, "r") + bupcsv = csv.reader(bupf) + next(bupcsv) + try: + reader = csv.reader(f) + progress.update(1) + ru = csv.writer(tf, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) + ru.writerow(['label', 'text']) + header = next(reader) + skipped = 1 + for row in reader: + progress.update(1) + decision = row[0] + text = row[1] + if skipped < skip: + skipped += 1 + already_translated = next(bupcsv) + ru.writerow(already_translated) + else: + try: + translated_text = translator.translate(text, dest='ru') + ru.writerow([decision] + [translated_text.text]) + except Exception as e: + print(f"Skipping line: {e}") + except Exception as e: + print(e) + finally: + if bupf is not None: + close(bupf) \ No newline at end of file diff --git a/src/web/__init__.py b/src/web/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/web/server.py b/src/web/server.py new file mode 100644 index 0000000..f12ba30 --- /dev/null +++ b/src/web/server.py @@ -0,0 +1,42 @@ +import asyncio +import json +import os +import tornado +from spam_detector_ai.prediction.predict import VotingSpamDetector +from src.logging.logger import logger + + +_spam_detector = VotingSpamDetector() + + +def _json(data) -> str: + return json.dumps(data) + +def start(port: int) -> None: + logger.info("Starting...") + + class CheckSpamHandler(tornado.web.RequestHandler): + def set_default_headers(self): + self.set_header("Access-Control-Allow-Origin", "*") + + def get(self): + body = json.loads(self.request.body) + if not 'text' in body: + self.write_error(400, body=_json({"error": "text is not specified"})) + else: + r = json.dumps({"is_spam": _spam_detector.is_spam(body['text'])}) + self.write(r) + + async def start_web_server(): + logger.info(f"Starting web server on port {port}") + app = tornado.web.Application( + [ + (r"/check-spam", CheckSpamHandler), + ], + template_path=os.path.join(os.path.dirname(__file__), "templates"), + static_path=os.path.join(os.path.dirname(__file__), "static"), + ) + app.listen(port) + await asyncio.Event().wait() + + asyncio.run(start_web_server()) \ No newline at end of file diff --git a/version b/version new file mode 100644 index 0000000..2830201 --- /dev/null +++ b/version @@ -0,0 +1 @@ +0.0.1-beta \ No newline at end of file