Compare commits

..

2 Commits

Author SHA1 Message Date
bvn13
c6ab8791ca working web server and updater 2024-11-01 17:06:38 +03:00
bvn13
3ad7a72515 updater and web server are separated in two docker containers 2024-11-01 15:19:44 +03:00
24 changed files with 6380 additions and 46 deletions

2
.gitignore vendored
View File

@ -2,3 +2,5 @@ env
env/** env/**
.idea .idea
.idea/** .idea/**
models
models/**

30
.idea/csv-editor.xml Normal file
View File

@ -0,0 +1,30 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CsvFileAttributes">
<option name="attributeMap">
<map>
<entry key="/spam-ru.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
<entry key="/spam.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
<entry key="/spam_Emails_data-ru.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
</map>
</option>
</component>
</project>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

7
.idea/misc.xml Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Poetry (spam-detector-1)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Poetry (spam-detector-1)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/spam-detector-1.iml" filepath="$PROJECT_DIR$/.idea/spam-detector-1.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

53
Dockerfile Normal file
View File

@ -0,0 +1,53 @@
# https://stackoverflow.com/a/57374374/2798461
#######################################################################################################################
FROM python:3.10-alpine AS dist
ENV PYTHONUNBUFFERED=1
WORKDIR /app
COPY lib /app/lib
COPY requirements.txt /app
COPY src /app/src
#######################################################################################################################
FROM dist AS installation
RUN apk update
RUN pip install --upgrade pip
RUN pip install --no-index --find-links /app/lib nvidia-nccl-cu12
RUN apk add curl bash gcc g++ cmake make musl-dev
#######################################################################################################################
FROM installation AS building
RUN pip install -r requirements.txt
RUN apk del gcc g++ cmake make musl-dev
#######################################################################################################################
FROM building AS runner
RUN apk add libgomp libstdc++
RUN mkdir /app/nltk_data
RUN ln -s /app/nltk_data /root/nltk_data
ENV PYTHONPATH=/app
ARG DATASET
ARG PORT
ARG WEB_API_URL
ENV PORT=${PORT} \
DATASET=${DATASET} \
WORKING_DIR=/app/nltk_data \
FUCKING_DIR=/usr/local/lib/python3.10/site-packages/spam_detector_ai/models \
MODELS_DIR=/app/models \
WEB_API_URL=${WEB_API_URL}
RUN python3 -m src.preparer
ENTRYPOINT [ "bash", "-c", "while true; do sleep 1; done" ]

3
before-build.sh Executable file
View File

@ -0,0 +1,3 @@
#!/usr/bin/env bash
poetry export -f requirements.txt > requirements.txt

40
docker-compose.yaml Normal file
View File

@ -0,0 +1,40 @@
networks:
spam-detector-internal:
driver: bridge
services:
decision-maker:
build: ./
container_name: spam-detector-decision-maker
environment:
- PORT=${PORT:-8080}
- TOKEN=token12345
entrypoint: [ "python", "-m", "src.app", "-m" ]
volumes:
- type: bind
source: $DATASET
target: /app/dataset.csv
- ${MODELS_DIR}:/app/models
restart: always
ports:
- "8080:${PORT:-8080}"
networks:
- spam-detector-internal
model-updater:
build: ./
container_name: spam-detector-model-updater
environment:
- WEB_API_URL=http://spam-detector-decision-maker:${PORT:-8080}
- TOKEN=token12345
entrypoint: [ "python", "-m", "src.app", "-u", "-d", "/app/dataset.csv" ]
# entrypoint: [ "bash", "-c", "while true; do sleep 1; done" ]
volumes:
- type: bind
source: $DATASET
target: /app/dataset.csv
- ${MODELS_DIR}:/app/models/
networks:
- spam-detector-internal

Binary file not shown.

View File

@ -1,12 +1,15 @@
[tool.poetry] [tool.poetry]
name = "spam-detector-1" name = "spam-detector"
version = "0.1.0" version = "0.1.0"
description = "" description = ""
authors = ["bvn13 <from.github@bvn13.me>"] authors = ["bvn13 <from.github@bvn13.me>"]
readme = "README.md" readme = "README.md"
packages = [
{ include = "src" }
]
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.12" python = "^3.10"
scikit-learn = "^1.5.2" scikit-learn = "^1.5.2"
numpy = "^2.1.2" numpy = "^2.1.2"
spam-detector-ai = "^2.1.18" spam-detector-ai = "^2.1.18"
@ -19,7 +22,11 @@ googletrans = "^4.0.0rc1"
tqdm = "^4.66.5" tqdm = "^4.66.5"
tornado = "^6.4.1" tornado = "^6.4.1"
asyncio = "^3.4.3" asyncio = "^3.4.3"
exceptiongroup = "^1.0.0rc8"
scheduler = "^0.8.7"
[tool.poetry.scripts]
app = "src.app:start"
[build-system] [build-system]
requires = ["poetry-core"] requires = ["poetry-core"]

498
requirements.txt Normal file
View File

@ -0,0 +1,498 @@
asyncio==3.4.3 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:83360ff8bc97980e4ff25c964c7bd3923d333d177aa4f7fb736b019f26c7cb41 \
--hash=sha256:b62c9157d36187eca799c378e572c969f0da87cd5fc42ca372d92cdb06e7e1de \
--hash=sha256:c46a87b48213d7464f22d9a497b9eef8c1928b68320a2fa94240f969f6fec08c \
--hash=sha256:c4d18b22701821de07bd6aea8b53d21449ec0ec5680645e5317062ea21817d2d
certifi==2024.8.30 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8 \
--hash=sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9
chardet==3.0.4 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae \
--hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691
charset-normalizer==3.4.0 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:0099d79bdfcf5c1f0c2c72f91516702ebf8b0b8ddd8905f97a8aecf49712c621 \
--hash=sha256:0713f3adb9d03d49d365b70b84775d0a0d18e4ab08d12bc46baa6132ba78aaf6 \
--hash=sha256:07afec21bbbbf8a5cc3651aa96b980afe2526e7f048fdfb7f1014d84acc8b6d8 \
--hash=sha256:0b309d1747110feb25d7ed6b01afdec269c647d382c857ef4663bbe6ad95a912 \
--hash=sha256:0d99dd8ff461990f12d6e42c7347fd9ab2532fb70e9621ba520f9e8637161d7c \
--hash=sha256:0de7b687289d3c1b3e8660d0741874abe7888100efe14bd0f9fd7141bcbda92b \
--hash=sha256:1110e22af8ca26b90bd6364fe4c763329b0ebf1ee213ba32b68c73de5752323d \
--hash=sha256:130272c698667a982a5d0e626851ceff662565379baf0ff2cc58067b81d4f11d \
--hash=sha256:136815f06a3ae311fae551c3df1f998a1ebd01ddd424aa5603a4336997629e95 \
--hash=sha256:14215b71a762336254351b00ec720a8e85cada43b987da5a042e4ce3e82bd68e \
--hash=sha256:1db4e7fefefd0f548d73e2e2e041f9df5c59e178b4c72fbac4cc6f535cfb1565 \
--hash=sha256:1ffd9493de4c922f2a38c2bf62b831dcec90ac673ed1ca182fe11b4d8e9f2a64 \
--hash=sha256:2006769bd1640bdf4d5641c69a3d63b71b81445473cac5ded39740a226fa88ab \
--hash=sha256:20587d20f557fe189b7947d8e7ec5afa110ccf72a3128d61a2a387c3313f46be \
--hash=sha256:223217c3d4f82c3ac5e29032b3f1c2eb0fb591b72161f86d93f5719079dae93e \
--hash=sha256:27623ba66c183eca01bf9ff833875b459cad267aeeb044477fedac35e19ba907 \
--hash=sha256:285e96d9d53422efc0d7a17c60e59f37fbf3dfa942073f666db4ac71e8d726d0 \
--hash=sha256:2de62e8801ddfff069cd5c504ce3bc9672b23266597d4e4f50eda28846c322f2 \
--hash=sha256:2f6c34da58ea9c1a9515621f4d9ac379871a8f21168ba1b5e09d74250de5ad62 \
--hash=sha256:309a7de0a0ff3040acaebb35ec45d18db4b28232f21998851cfa709eeff49d62 \
--hash=sha256:35c404d74c2926d0287fbd63ed5d27eb911eb9e4a3bb2c6d294f3cfd4a9e0c23 \
--hash=sha256:3710a9751938947e6327ea9f3ea6332a09bf0ba0c09cae9cb1f250bd1f1549bc \
--hash=sha256:3d59d125ffbd6d552765510e3f31ed75ebac2c7470c7274195b9161a32350284 \
--hash=sha256:40d3ff7fc90b98c637bda91c89d51264a3dcf210cade3a2c6f838c7268d7a4ca \
--hash=sha256:425c5f215d0eecee9a56cdb703203dda90423247421bf0d67125add85d0c4455 \
--hash=sha256:43193c5cda5d612f247172016c4bb71251c784d7a4d9314677186a838ad34858 \
--hash=sha256:44aeb140295a2f0659e113b31cfe92c9061622cadbc9e2a2f7b8ef6b1e29ef4b \
--hash=sha256:47334db71978b23ebcf3c0f9f5ee98b8d65992b65c9c4f2d34c2eaf5bcaf0594 \
--hash=sha256:4796efc4faf6b53a18e3d46343535caed491776a22af773f366534056c4e1fbc \
--hash=sha256:4a51b48f42d9358460b78725283f04bddaf44a9358197b889657deba38f329db \
--hash=sha256:4b67fdab07fdd3c10bb21edab3cbfe8cf5696f453afce75d815d9d7223fbe88b \
--hash=sha256:4ec9dd88a5b71abfc74e9df5ebe7921c35cbb3b641181a531ca65cdb5e8e4dea \
--hash=sha256:4f9fc98dad6c2eaa32fc3af1417d95b5e3d08aff968df0cd320066def971f9a6 \
--hash=sha256:54b6a92d009cbe2fb11054ba694bc9e284dad30a26757b1e372a1fdddaf21920 \
--hash=sha256:55f56e2ebd4e3bc50442fbc0888c9d8c94e4e06a933804e2af3e89e2f9c1c749 \
--hash=sha256:5726cf76c982532c1863fb64d8c6dd0e4c90b6ece9feb06c9f202417a31f7dd7 \
--hash=sha256:5d447056e2ca60382d460a604b6302d8db69476fd2015c81e7c35417cfabe4cd \
--hash=sha256:5ed2e36c3e9b4f21dd9422f6893dec0abf2cca553af509b10cd630f878d3eb99 \
--hash=sha256:5ff2ed8194587faf56555927b3aa10e6fb69d931e33953943bc4f837dfee2242 \
--hash=sha256:62f60aebecfc7f4b82e3f639a7d1433a20ec32824db2199a11ad4f5e146ef5ee \
--hash=sha256:63bc5c4ae26e4bc6be6469943b8253c0fd4e4186c43ad46e713ea61a0ba49129 \
--hash=sha256:6b40e8d38afe634559e398cc32b1472f376a4099c75fe6299ae607e404c033b2 \
--hash=sha256:6b493a043635eb376e50eedf7818f2f322eabbaa974e948bd8bdd29eb7ef2a51 \
--hash=sha256:6dba5d19c4dfab08e58d5b36304b3f92f3bd5d42c1a3fa37b5ba5cdf6dfcbcee \
--hash=sha256:6fd30dc99682dc2c603c2b315bded2799019cea829f8bf57dc6b61efde6611c8 \
--hash=sha256:707b82d19e65c9bd28b81dde95249b07bf9f5b90ebe1ef17d9b57473f8a64b7b \
--hash=sha256:7706f5850360ac01d80c89bcef1640683cc12ed87f42579dab6c5d3ed6888613 \
--hash=sha256:7782afc9b6b42200f7362858f9e73b1f8316afb276d316336c0ec3bd73312742 \
--hash=sha256:79983512b108e4a164b9c8d34de3992f76d48cadc9554c9e60b43f308988aabe \
--hash=sha256:7f683ddc7eedd742e2889d2bfb96d69573fde1d92fcb811979cdb7165bb9c7d3 \
--hash=sha256:82357d85de703176b5587dbe6ade8ff67f9f69a41c0733cf2425378b49954de5 \
--hash=sha256:84450ba661fb96e9fd67629b93d2941c871ca86fc38d835d19d4225ff946a631 \
--hash=sha256:86f4e8cca779080f66ff4f191a685ced73d2f72d50216f7112185dc02b90b9b7 \
--hash=sha256:8cda06946eac330cbe6598f77bb54e690b4ca93f593dee1568ad22b04f347c15 \
--hash=sha256:8ce7fd6767a1cc5a92a639b391891bf1c268b03ec7e021c7d6d902285259685c \
--hash=sha256:8ff4e7cdfdb1ab5698e675ca622e72d58a6fa2a8aa58195de0c0061288e6e3ea \
--hash=sha256:9289fd5dddcf57bab41d044f1756550f9e7cf0c8e373b8cdf0ce8773dc4bd417 \
--hash=sha256:92a7e36b000bf022ef3dbb9c46bfe2d52c047d5e3f3343f43204263c5addc250 \
--hash=sha256:92db3c28b5b2a273346bebb24857fda45601aef6ae1c011c0a997106581e8a88 \
--hash=sha256:95c3c157765b031331dd4db3c775e58deaee050a3042fcad72cbc4189d7c8dca \
--hash=sha256:980b4f289d1d90ca5efcf07958d3eb38ed9c0b7676bf2831a54d4f66f9c27dfa \
--hash=sha256:9ae4ef0b3f6b41bad6366fb0ea4fc1d7ed051528e113a60fa2a65a9abb5b1d99 \
--hash=sha256:9c98230f5042f4945f957d006edccc2af1e03ed5e37ce7c373f00a5a4daa6149 \
--hash=sha256:9fa2566ca27d67c86569e8c85297aaf413ffab85a8960500f12ea34ff98e4c41 \
--hash=sha256:a14969b8691f7998e74663b77b4c36c0337cb1df552da83d5c9004a93afdb574 \
--hash=sha256:a8aacce6e2e1edcb6ac625fb0f8c3a9570ccc7bfba1f63419b3769ccf6a00ed0 \
--hash=sha256:a8e538f46104c815be19c975572d74afb53f29650ea2025bbfaef359d2de2f7f \
--hash=sha256:aa41e526a5d4a9dfcfbab0716c7e8a1b215abd3f3df5a45cf18a12721d31cb5d \
--hash=sha256:aa693779a8b50cd97570e5a0f343538a8dbd3e496fa5dcb87e29406ad0299654 \
--hash=sha256:ab22fbd9765e6954bc0bcff24c25ff71dcbfdb185fcdaca49e81bac68fe724d3 \
--hash=sha256:ab2e5bef076f5a235c3774b4f4028a680432cded7cad37bba0fd90d64b187d19 \
--hash=sha256:ab973df98fc99ab39080bfb0eb3a925181454d7c3ac8a1e695fddfae696d9e90 \
--hash=sha256:af73657b7a68211996527dbfeffbb0864e043d270580c5aef06dc4b659a4b578 \
--hash=sha256:b197e7094f232959f8f20541ead1d9862ac5ebea1d58e9849c1bf979255dfac9 \
--hash=sha256:b295729485b06c1a0683af02a9e42d2caa9db04a373dc38a6a58cdd1e8abddf1 \
--hash=sha256:b8831399554b92b72af5932cdbbd4ddc55c55f631bb13ff8fe4e6536a06c5c51 \
--hash=sha256:b8dcd239c743aa2f9c22ce674a145e0a25cb1566c495928440a181ca1ccf6719 \
--hash=sha256:bcb4f8ea87d03bc51ad04add8ceaf9b0f085ac045ab4d74e73bbc2dc033f0236 \
--hash=sha256:bd7af3717683bea4c87acd8c0d3d5b44d56120b26fd3f8a692bdd2d5260c620a \
--hash=sha256:bf4475b82be41b07cc5e5ff94810e6a01f276e37c2d55571e3fe175e467a1a1c \
--hash=sha256:c3e446d253bd88f6377260d07c895816ebf33ffffd56c1c792b13bff9c3e1ade \
--hash=sha256:c57516e58fd17d03ebe67e181a4e4e2ccab1168f8c2976c6a334d4f819fe5944 \
--hash=sha256:c94057af19bc953643a33581844649a7fdab902624d2eb739738a30e2b3e60fc \
--hash=sha256:cab5d0b79d987c67f3b9e9c53f54a61360422a5a0bc075f43cab5621d530c3b6 \
--hash=sha256:ce031db0408e487fd2775d745ce30a7cd2923667cf3b69d48d219f1d8f5ddeb6 \
--hash=sha256:cee4373f4d3ad28f1ab6290684d8e2ebdb9e7a1b74fdc39e4c211995f77bec27 \
--hash=sha256:d5b054862739d276e09928de37c79ddeec42a6e1bfc55863be96a36ba22926f6 \
--hash=sha256:dbe03226baf438ac4fda9e2d0715022fd579cb641c4cf639fa40d53b2fe6f3e2 \
--hash=sha256:dc15e99b2d8a656f8e666854404f1ba54765871104e50c8e9813af8a7db07f12 \
--hash=sha256:dcaf7c1524c0542ee2fc82cc8ec337f7a9f7edee2532421ab200d2b920fc97cf \
--hash=sha256:dd4eda173a9fcccb5f2e2bd2a9f423d180194b1bf17cf59e3269899235b2a114 \
--hash=sha256:dd9a8bd8900e65504a305bf8ae6fa9fbc66de94178c420791d0293702fce2df7 \
--hash=sha256:de7376c29d95d6719048c194a9cf1a1b0393fbe8488a22008610b0361d834ecf \
--hash=sha256:e7fdd52961feb4c96507aa649550ec2a0d527c086d284749b2f582f2d40a2e0d \
--hash=sha256:e91f541a85298cf35433bf66f3fab2a4a2cff05c127eeca4af174f6d497f0d4b \
--hash=sha256:e9e3c4c9e1ed40ea53acf11e2a386383c3304212c965773704e4603d589343ed \
--hash=sha256:ee803480535c44e7f5ad00788526da7d85525cfefaf8acf8ab9a310000be4b03 \
--hash=sha256:f09cb5a7bbe1ecae6e87901a2eb23e0256bb524a79ccc53eb0b7629fbe7677c4 \
--hash=sha256:f19c1585933c82098c2a520f8ec1227f20e339e33aca8fa6f956f6691b784e67 \
--hash=sha256:f1a2f519ae173b5b6a2c9d5fa3116ce16e48b3462c8b96dfdded11055e3d6365 \
--hash=sha256:f28f891ccd15c514a0981f3b9db9aa23d62fe1a99997512b0491d2ed323d229a \
--hash=sha256:f3e73a4255342d4eb26ef6df01e3962e73aa29baa3124a8e824c5d3364a65748 \
--hash=sha256:f606a1881d2663630ea5b8ce2efe2111740df4b687bd78b34a8131baa007f79b \
--hash=sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079 \
--hash=sha256:ffc519621dce0c767e96b9c53f09c5d215578e10b02c285809f76509a3931482
click==8.1.7 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28 \
--hash=sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de
colorama==0.4.6 ; python_version >= "3.10" and python_version < "4.0" and (platform_system == "Windows" or sys_platform == "win32") \
--hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \
--hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6
exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b \
--hash=sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc
googletrans==4.0.0rc1 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:74df47b092e2d566522019d149e3f1d75732570ad76eaf8e14aebeffc126c372
h11==0.9.0 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:33d4bca7be0fa039f4e84d50ab00531047e53d6ee8ffbc83501ea602c169cae1 \
--hash=sha256:4bc6d6a1238b7615b266ada57e0618568066f57dd6fa967d1290ec9309b2f2f1
h2==3.2.0 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:61e0f6601fa709f35cdb730863b4e5ec7ad449792add80d1410d4174ed139af5 \
--hash=sha256:875f41ebd6f2c44781259005b157faed1a5031df3ae5aa7bcb4628a6c0782f14
hpack==3.0.0 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:0edd79eda27a53ba5be2dfabf3b15780928a0dff6eb0c60a3d6767720e970c89 \
--hash=sha256:8eec9c1f4bfae3408a3f30500261f7e6a65912dc138526ea054f9ad98892e9d2
hstspreload==2024.10.1 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:2859a6b52253743ddddad468d8c9570ba650170ca49ac416336826915ee409b8 \
--hash=sha256:3ab481036cbdff095cb411dafe33ee7924492319cf6ddaf4e776a159537541b3
httpcore==0.9.1 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:9850fe97a166a794d7e920590d5ec49a05488884c9fc8b5dba8561effab0c2a0 \
--hash=sha256:ecc5949310d9dae4de64648a4ce529f86df1f232ce23dcfefe737c24d21dfbe9
httpx==0.13.3 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:32d930858eab677bc29a742aaa4f096de259f1c78c68a90ad11f5c3c04f08335 \
--hash=sha256:3642bd13e90b80ba8a243a730275eb10a4c26ec96f5fc16b87e458d4ab21efae
hyperframe==5.2.0 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:5187962cb16dcc078f23cb5a4b110098d546c3f41ff2d4038a9896893bbd0b40 \
--hash=sha256:a9f5c17f2cc3c719b917c4f33ed1c61bd1f8dfac4b1bd23b7c80b3400971b41f
idna==2.10 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6 \
--hash=sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0
imbalanced-learn==0.12.4 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:8153ba385d296b07d97e0901a2624a86c06b48c94c2f92da3a5354827697b7a3 \
--hash=sha256:d47fc599160d3ea882e712a3a6b02bdd353c1a6436d8d68d41b1922e6ee4a703
imblearn==0.0 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:d42c2d709d22c00d2b9a91e638d57240a8b79b4014122d92181fcd2549a2f79a \
--hash=sha256:d8fbb662919c1b16f438ad91a8256220e53bcf6815c9ad5502c518b798de34f2
iniconfig==2.0.0 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \
--hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
joblib==1.4.2 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 \
--hash=sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e
nltk==3.9.1 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1 \
--hash=sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868
numpy==2.1.2 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:05b2d4e667895cc55e3ff2b56077e4c8a5604361fc21a042845ea3ad67465aa8 \
--hash=sha256:12edb90831ff481f7ef5f6bc6431a9d74dc0e5ff401559a71e5e4611d4f2d466 \
--hash=sha256:13311c2db4c5f7609b462bc0f43d3c465424d25c626d95040f073e30f7570e35 \
--hash=sha256:13532a088217fa624c99b843eeb54640de23b3414b14aa66d023805eb731066c \
--hash=sha256:13602b3174432a35b16c4cfb5de9a12d229727c3dd47a6ce35111f2ebdf66ff4 \
--hash=sha256:1600068c262af1ca9580a527d43dc9d959b0b1d8e56f8a05d830eea39b7c8af6 \
--hash=sha256:1b8cde4f11f0a975d1fd59373b32e2f5a562ade7cde4f85b7137f3de8fbb29a0 \
--hash=sha256:1c193d0b0238638e6fc5f10f1b074a6993cb13b0b431f64079a509d63d3aa8b7 \
--hash=sha256:1ebec5fd716c5a5b3d8dfcc439be82a8407b7b24b230d0ad28a81b61c2f4659a \
--hash=sha256:242b39d00e4944431a3cd2db2f5377e15b5785920421993770cddb89992c3f3a \
--hash=sha256:259ec80d54999cc34cd1eb8ded513cb053c3bf4829152a2e00de2371bd406f5e \
--hash=sha256:2abbf905a0b568706391ec6fa15161fad0fb5d8b68d73c461b3c1bab6064dd62 \
--hash=sha256:2cbba4b30bf31ddbe97f1c7205ef976909a93a66bb1583e983adbd155ba72ac2 \
--hash=sha256:2ffef621c14ebb0188a8633348504a35c13680d6da93ab5cb86f4e54b7e922b5 \
--hash=sha256:30d53720b726ec36a7f88dc873f0eec8447fbc93d93a8f079dfac2629598d6ee \
--hash=sha256:32e16a03138cabe0cb28e1007ee82264296ac0983714094380b408097a418cfe \
--hash=sha256:43cca367bf94a14aca50b89e9bc2061683116cfe864e56740e083392f533ce7a \
--hash=sha256:456e3b11cb79ac9946c822a56346ec80275eaf2950314b249b512896c0d2505e \
--hash=sha256:4d6ec0d4222e8ffdab1744da2560f07856421b367928026fb540e1945f2eeeaf \
--hash=sha256:5006b13a06e0b38d561fab5ccc37581f23c9511879be7693bd33c7cd15ca227c \
--hash=sha256:675c741d4739af2dc20cd6c6a5c4b7355c728167845e3c6b0e824e4e5d36a6c3 \
--hash=sha256:6cdb606a7478f9ad91c6283e238544451e3a95f30fb5467fbf715964341a8a86 \
--hash=sha256:6d95f286b8244b3649b477ac066c6906fbb2905f8ac19b170e2175d3d799f4df \
--hash=sha256:76322dcdb16fccf2ac56f99048af32259dcc488d9b7e25b51e5eca5147a3fb98 \
--hash=sha256:7c1c60328bd964b53f8b835df69ae8198659e2b9302ff9ebb7de4e5a5994db3d \
--hash=sha256:860ec6e63e2c5c2ee5e9121808145c7bf86c96cca9ad396c0bd3e0f2798ccbe2 \
--hash=sha256:8e00ea6fc82e8a804433d3e9cedaa1051a1422cb6e443011590c14d2dea59146 \
--hash=sha256:9c6c754df29ce6a89ed23afb25550d1c2d5fdb9901d9c67a16e0b16eaf7e2550 \
--hash=sha256:a26ae94658d3ba3781d5e103ac07a876b3e9b29db53f68ed7df432fd033358a8 \
--hash=sha256:a65acfdb9c6ebb8368490dbafe83c03c7e277b37e6857f0caeadbbc56e12f4fb \
--hash=sha256:a7d80b2e904faa63068ead63107189164ca443b42dd1930299e0d1cb041cec2e \
--hash=sha256:a84498e0d0a1174f2b3ed769b67b656aa5460c92c9554039e11f20a05650f00d \
--hash=sha256:ab4754d432e3ac42d33a269c8567413bdb541689b02d93788af4131018cbf366 \
--hash=sha256:ad369ed238b1959dfbade9018a740fb9392c5ac4f9b5173f420bd4f37ba1f7a0 \
--hash=sha256:b1d0fcae4f0949f215d4632be684a539859b295e2d0cb14f78ec231915d644db \
--hash=sha256:b42a1a511c81cc78cbc4539675713bbcf9d9c3913386243ceff0e9429ca892fe \
--hash=sha256:bd33f82e95ba7ad632bc57837ee99dba3d7e006536200c4e9124089e1bf42426 \
--hash=sha256:bdd407c40483463898b84490770199d5714dcc9dd9b792f6c6caccc523c00952 \
--hash=sha256:c6eef7a2dbd0abfb0d9eaf78b73017dbfd0b54051102ff4e6a7b2980d5ac1a03 \
--hash=sha256:c82af4b2ddd2ee72d1fc0c6695048d457e00b3582ccde72d8a1c991b808bb20f \
--hash=sha256:d666cb72687559689e9906197e3bec7b736764df6a2e58ee265e360663e9baf7 \
--hash=sha256:d7bf0a4f9f15b32b5ba53147369e94296f5fffb783db5aacc1be15b4bf72f43b \
--hash=sha256:d82075752f40c0ddf57e6e02673a17f6cb0f8eb3f587f63ca1eaab5594da5b17 \
--hash=sha256:da65fb46d4cbb75cb417cddf6ba5e7582eb7bb0b47db4b99c9fe5787ce5d91f5 \
--hash=sha256:e2b49c3c0804e8ecb05d59af8386ec2f74877f7ca8fd9c1e00be2672e4d399b1 \
--hash=sha256:e585c8ae871fd38ac50598f4763d73ec5497b0de9a0ab4ef5b69f01c6a046142 \
--hash=sha256:e8d3ca0a72dd8846eb6f7dfe8f19088060fcb76931ed592d29128e0219652884 \
--hash=sha256:ef444c57d664d35cac4e18c298c47d7b504c66b17c2ea91312e979fcfbdfb08a \
--hash=sha256:f1eb068ead09f4994dec71c24b2844f1e4e4e013b9629f812f292f04bd1510d9 \
--hash=sha256:f2ded8d9b6f68cc26f8425eda5d3877b47343e68ca23d0d0846f4d312ecaa445 \
--hash=sha256:f751ed0a2f250541e19dfca9f1eafa31a392c71c832b6bb9e113b10d050cb0f1 \
--hash=sha256:faa88bc527d0f097abdc2c663cddf37c05a1c2f113716601555249805cf573f1 \
--hash=sha256:fc44e3c68ff00fd991b59092a54350e6e4911152682b4782f68070985aa9e648
nvidia-nccl-cu12==2.23.4 ; platform_system == "Linux" and platform_machine != "aarch64" and python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:aa946c8327e22ced28e7cef508a334673abc42064ec85f02d005ba1785ea4cec \
--hash=sha256:b097258d9aab2fa9f686e33c6fe40ae57b27df60cedbd15d139701bb5509e0c1
packaging==24.1 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \
--hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124
pandas==2.2.3 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a \
--hash=sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d \
--hash=sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5 \
--hash=sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4 \
--hash=sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0 \
--hash=sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32 \
--hash=sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea \
--hash=sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28 \
--hash=sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f \
--hash=sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348 \
--hash=sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18 \
--hash=sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468 \
--hash=sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5 \
--hash=sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e \
--hash=sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667 \
--hash=sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645 \
--hash=sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13 \
--hash=sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30 \
--hash=sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3 \
--hash=sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d \
--hash=sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb \
--hash=sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3 \
--hash=sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039 \
--hash=sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8 \
--hash=sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd \
--hash=sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761 \
--hash=sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659 \
--hash=sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57 \
--hash=sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c \
--hash=sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c \
--hash=sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4 \
--hash=sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a \
--hash=sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9 \
--hash=sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42 \
--hash=sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2 \
--hash=sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39 \
--hash=sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc \
--hash=sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698 \
--hash=sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed \
--hash=sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015 \
--hash=sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24 \
--hash=sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319
pluggy==1.5.0 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \
--hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669
pytest==8.3.3 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181 \
--hash=sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2
python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \
--hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427
pytz==2024.2 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a \
--hash=sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725
regex==2024.9.11 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:01c2acb51f8a7d6494c8c5eafe3d8e06d76563d8a8a4643b37e9b2dd8a2ff623 \
--hash=sha256:02087ea0a03b4af1ed6ebab2c54d7118127fee8d71b26398e8e4b05b78963199 \
--hash=sha256:040562757795eeea356394a7fb13076ad4f99d3c62ab0f8bdfb21f99a1f85664 \
--hash=sha256:042c55879cfeb21a8adacc84ea347721d3d83a159da6acdf1116859e2427c43f \
--hash=sha256:079400a8269544b955ffa9e31f186f01d96829110a3bf79dc338e9910f794fca \
--hash=sha256:07f45f287469039ffc2c53caf6803cd506eb5f5f637f1d4acb37a738f71dd066 \
--hash=sha256:09d77559e80dcc9d24570da3745ab859a9cf91953062e4ab126ba9d5993688ca \
--hash=sha256:0cbff728659ce4bbf4c30b2a1be040faafaa9eca6ecde40aaff86f7889f4ab39 \
--hash=sha256:0e12c481ad92d129c78f13a2a3662317e46ee7ef96c94fd332e1c29131875b7d \
--hash=sha256:0ea51dcc0835eea2ea31d66456210a4e01a076d820e9039b04ae8d17ac11dee6 \
--hash=sha256:0ffbcf9221e04502fc35e54d1ce9567541979c3fdfb93d2c554f0ca583a19b35 \
--hash=sha256:1494fa8725c285a81d01dc8c06b55287a1ee5e0e382d8413adc0a9197aac6408 \
--hash=sha256:16e13a7929791ac1216afde26f712802e3df7bf0360b32e4914dca3ab8baeea5 \
--hash=sha256:18406efb2f5a0e57e3a5881cd9354c1512d3bb4f5c45d96d110a66114d84d23a \
--hash=sha256:18e707ce6c92d7282dfce370cd205098384b8ee21544e7cb29b8aab955b66fa9 \
--hash=sha256:220e92a30b426daf23bb67a7962900ed4613589bab80382be09b48896d211e92 \
--hash=sha256:23b30c62d0f16827f2ae9f2bb87619bc4fba2044911e2e6c2eb1af0161cdb766 \
--hash=sha256:23f9985c8784e544d53fc2930fc1ac1a7319f5d5332d228437acc9f418f2f168 \
--hash=sha256:297f54910247508e6e5cae669f2bc308985c60540a4edd1c77203ef19bfa63ca \
--hash=sha256:2b08fce89fbd45664d3df6ad93e554b6c16933ffa9d55cb7e01182baaf971508 \
--hash=sha256:2cce2449e5927a0bf084d346da6cd5eb016b2beca10d0013ab50e3c226ffc0df \
--hash=sha256:313ea15e5ff2a8cbbad96ccef6be638393041b0a7863183c2d31e0c6116688cf \
--hash=sha256:323c1f04be6b2968944d730e5c2091c8c89767903ecaa135203eec4565ed2b2b \
--hash=sha256:35f4a6f96aa6cb3f2f7247027b07b15a374f0d5b912c0001418d1d55024d5cb4 \
--hash=sha256:3b37fa423beefa44919e009745ccbf353d8c981516e807995b2bd11c2c77d268 \
--hash=sha256:3ce4f1185db3fbde8ed8aa223fc9620f276c58de8b0d4f8cc86fd1360829edb6 \
--hash=sha256:46989629904bad940bbec2106528140a218b4a36bb3042d8406980be1941429c \
--hash=sha256:4838e24ee015101d9f901988001038f7f0d90dc0c3b115541a1365fb439add62 \
--hash=sha256:49b0e06786ea663f933f3710a51e9385ce0cba0ea56b67107fd841a55d56a231 \
--hash=sha256:4db21ece84dfeefc5d8a3863f101995de646c6cb0536952c321a2650aa202c36 \
--hash=sha256:54c4a097b8bc5bb0dfc83ae498061d53ad7b5762e00f4adaa23bee22b012e6ba \
--hash=sha256:54d9ff35d4515debf14bc27f1e3b38bfc453eff3220f5bce159642fa762fe5d4 \
--hash=sha256:55b96e7ce3a69a8449a66984c268062fbaa0d8ae437b285428e12797baefce7e \
--hash=sha256:57fdd2e0b2694ce6fc2e5ccf189789c3e2962916fb38779d3e3521ff8fe7a822 \
--hash=sha256:587d4af3979376652010e400accc30404e6c16b7df574048ab1f581af82065e4 \
--hash=sha256:5b513b6997a0b2f10e4fd3a1313568e373926e8c252bd76c960f96fd039cd28d \
--hash=sha256:5ddcd9a179c0a6fa8add279a4444015acddcd7f232a49071ae57fa6e278f1f71 \
--hash=sha256:6113c008a7780792efc80f9dfe10ba0cd043cbf8dc9a76ef757850f51b4edc50 \
--hash=sha256:635a1d96665f84b292e401c3d62775851aedc31d4f8784117b3c68c4fcd4118d \
--hash=sha256:64ce2799bd75039b480cc0360907c4fb2f50022f030bf9e7a8705b636e408fad \
--hash=sha256:69dee6a020693d12a3cf892aba4808fe168d2a4cef368eb9bf74f5398bfd4ee8 \
--hash=sha256:6a2644a93da36c784e546de579ec1806bfd2763ef47babc1b03d765fe560c9f8 \
--hash=sha256:6b41e1adc61fa347662b09398e31ad446afadff932a24807d3ceb955ed865cc8 \
--hash=sha256:6c188c307e8433bcb63dc1915022deb553b4203a70722fc542c363bf120a01fd \
--hash=sha256:6edd623bae6a737f10ce853ea076f56f507fd7726bee96a41ee3d68d347e4d16 \
--hash=sha256:73d6d2f64f4d894c96626a75578b0bf7d9e56dcda8c3d037a2118fdfe9b1c664 \
--hash=sha256:7a22ccefd4db3f12b526eccb129390942fe874a3a9fdbdd24cf55773a1faab1a \
--hash=sha256:7fb89ee5d106e4a7a51bce305ac4efb981536301895f7bdcf93ec92ae0d91c7f \
--hash=sha256:846bc79ee753acf93aef4184c040d709940c9d001029ceb7b7a52747b80ed2dd \
--hash=sha256:85ab7824093d8f10d44330fe1e6493f756f252d145323dd17ab6b48733ff6c0a \
--hash=sha256:8dee5b4810a89447151999428fe096977346cf2f29f4d5e29609d2e19e0199c9 \
--hash=sha256:8e5fb5f77c8745a60105403a774fe2c1759b71d3e7b4ca237a5e67ad066c7199 \
--hash=sha256:98eeee2f2e63edae2181c886d7911ce502e1292794f4c5ee71e60e23e8d26b5d \
--hash=sha256:9d4a76b96f398697fe01117093613166e6aa8195d63f1b4ec3f21ab637632963 \
--hash=sha256:9e8719792ca63c6b8340380352c24dcb8cd7ec49dae36e963742a275dfae6009 \
--hash=sha256:a0b2b80321c2ed3fcf0385ec9e51a12253c50f146fddb2abbb10f033fe3d049a \
--hash=sha256:a4cc92bb6db56ab0c1cbd17294e14f5e9224f0cc6521167ef388332604e92679 \
--hash=sha256:a738b937d512b30bf75995c0159c0ddf9eec0775c9d72ac0202076c72f24aa96 \
--hash=sha256:a8f877c89719d759e52783f7fe6e1c67121076b87b40542966c02de5503ace42 \
--hash=sha256:a906ed5e47a0ce5f04b2c981af1c9acf9e8696066900bf03b9d7879a6f679fc8 \
--hash=sha256:ae2941333154baff9838e88aa71c1d84f4438189ecc6021a12c7573728b5838e \
--hash=sha256:b0d0a6c64fcc4ef9c69bd5b3b3626cc3776520a1637d8abaa62b9edc147a58f7 \
--hash=sha256:b5b029322e6e7b94fff16cd120ab35a253236a5f99a79fb04fda7ae71ca20ae8 \
--hash=sha256:b7aaa315101c6567a9a45d2839322c51c8d6e81f67683d529512f5bcfb99c802 \
--hash=sha256:be1c8ed48c4c4065ecb19d882a0ce1afe0745dfad8ce48c49586b90a55f02366 \
--hash=sha256:c0256beda696edcf7d97ef16b2a33a8e5a875affd6fa6567b54f7c577b30a137 \
--hash=sha256:c157bb447303070f256e084668b702073db99bbb61d44f85d811025fcf38f784 \
--hash=sha256:c57d08ad67aba97af57a7263c2d9006d5c404d721c5f7542f077f109ec2a4a29 \
--hash=sha256:c69ada171c2d0e97a4b5aa78fbb835e0ffbb6b13fc5da968c09811346564f0d3 \
--hash=sha256:c94bb0a9f1db10a1d16c00880bdebd5f9faf267273b8f5bd1878126e0fbde771 \
--hash=sha256:cb130fccd1a37ed894824b8c046321540263013da72745d755f2d35114b81a60 \
--hash=sha256:ced479f601cd2f8ca1fd7b23925a7e0ad512a56d6e9476f79b8f381d9d37090a \
--hash=sha256:d05ac6fa06959c4172eccd99a222e1fbf17b5670c4d596cb1e5cde99600674c4 \
--hash=sha256:d552c78411f60b1fdaafd117a1fca2f02e562e309223b9d44b7de8be451ec5e0 \
--hash=sha256:dd4490a33eb909ef5078ab20f5f000087afa2a4daa27b4c072ccb3cb3050ad84 \
--hash=sha256:df5cbb1fbc74a8305b6065d4ade43b993be03dbe0f8b30032cced0d7740994bd \
--hash=sha256:e28f9faeb14b6f23ac55bfbbfd3643f5c7c18ede093977f1df249f73fd22c7b1 \
--hash=sha256:e464b467f1588e2c42d26814231edecbcfe77f5ac414d92cbf4e7b55b2c2a776 \
--hash=sha256:e4c22e1ac1f1ec1e09f72e6c44d8f2244173db7eb9629cc3a346a8d7ccc31142 \
--hash=sha256:e53b5fbab5d675aec9f0c501274c467c0f9a5d23696cfc94247e1fb56501ed89 \
--hash=sha256:e93f1c331ca8e86fe877a48ad64e77882c0c4da0097f2212873a69bbfea95d0c \
--hash=sha256:e997fd30430c57138adc06bba4c7c2968fb13d101e57dd5bb9355bf8ce3fa7e8 \
--hash=sha256:e9a091b0550b3b0207784a7d6d0f1a00d1d1c8a11699c1a4d93db3fbefc3ad35 \
--hash=sha256:eab4bb380f15e189d1313195b062a6aa908f5bd687a0ceccd47c8211e9cf0d4a \
--hash=sha256:eb1ae19e64c14c7ec1995f40bd932448713d3c73509e82d8cd7744dc00e29e86 \
--hash=sha256:ecea58b43a67b1b79805f1a0255730edaf5191ecef84dbc4cc85eb30bc8b63b9 \
--hash=sha256:ee439691d8c23e76f9802c42a95cfeebf9d47cf4ffd06f18489122dbb0a7ad64 \
--hash=sha256:eee9130eaad130649fd73e5cd92f60e55708952260ede70da64de420cdcad554 \
--hash=sha256:f47cd43a5bfa48f86925fe26fbdd0a488ff15b62468abb5d2a1e092a4fb10e85 \
--hash=sha256:f6fff13ef6b5f29221d6904aa816c34701462956aa72a77f1f151a8ec4f56aeb \
--hash=sha256:f745ec09bc1b0bd15cfc73df6fa4f726dcc26bb16c23a03f9e3367d357eeedd0 \
--hash=sha256:f8404bf61298bb6f8224bb9176c1424548ee1181130818fcd2cbffddc768bed8 \
--hash=sha256:f9268774428ec173654985ce55fc6caf4c6d11ade0f6f914d48ef4719eb05ebb \
--hash=sha256:faa3c142464efec496967359ca99696c896c591c56c53506bac1ad465f66e919
requests==2.32.3 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \
--hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6
rfc3986==1.5.0 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835 \
--hash=sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97
scheduler==0.8.7 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:0917fe659a4e62f3d306af7c36f77c5161ef73a7a645bf231db6bd19722b9a3e \
--hash=sha256:ab54d6474649650c5d040b0cc0ae314c636b110fd0dd83254a0481bc93415ee5
scikit-learn==1.5.2 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:03b6158efa3faaf1feea3faa884c840ebd61b6484167c711548fce208ea09445 \
--hash=sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3 \
--hash=sha256:1ff45e26928d3b4eb767a8f14a9a6efbf1cbff7c05d1fb0f95f211a89fd4f5de \
--hash=sha256:299406827fb9a4f862626d0fe6c122f5f87f8910b86fe5daa4c32dcd742139b6 \
--hash=sha256:2d4cad1119c77930b235579ad0dc25e65c917e756fe80cab96aa3b9428bd3fb0 \
--hash=sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6 \
--hash=sha256:3a686885a4b3818d9e62904d91b57fa757fc2bed3e465c8b177be652f4dd37c8 \
--hash=sha256:3b923d119d65b7bd555c73be5423bf06c0105678ce7e1f558cb4b40b0a5502b1 \
--hash=sha256:3bed4909ba187aca80580fe2ef370d9180dcf18e621a27c4cf2ef10d279a7efe \
--hash=sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1 \
--hash=sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1 \
--hash=sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8 \
--hash=sha256:6c16d84a0d45e4894832b3c4d0bf73050939e21b99b01b6fd59cbb0cf39163b6 \
--hash=sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9 \
--hash=sha256:8c412ccc2ad9bf3755915e3908e677b367ebc8d010acbb3f182814524f2e5540 \
--hash=sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908 \
--hash=sha256:b4237ed7b3fdd0a4882792e68ef2545d5baa50aca3bb45aa7df468138ad8f94d \
--hash=sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f \
--hash=sha256:c15b1ca23d7c5f33cc2cb0a0d6aaacf893792271cddff0edbd6a40e8319bc113 \
--hash=sha256:ca64b3089a6d9b9363cd3546f8978229dcbb737aceb2c12144ee3f70f95684b7 \
--hash=sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5 \
--hash=sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd \
--hash=sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12 \
--hash=sha256:f763897fe92d0e903aa4847b0aec0e68cadfff77e8a0687cabd946c89d17e675 \
--hash=sha256:f8b0ccd4a902836493e026c03256e8b206656f91fbcc4fde28c57a5b752561f1 \
--hash=sha256:f932a02c3f4956dfb981391ab24bda1dbd90fe3d628e4b42caef3e041c67707a
scipy==1.14.1 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:0c2f95de3b04e26f5f3ad5bb05e74ba7f68b837133a4492414b3afd79dfe540e \
--hash=sha256:1729560c906963fc8389f6aac023739ff3983e727b1a4d87696b7bf108316a79 \
--hash=sha256:278266012eb69f4a720827bdd2dc54b2271c97d84255b2faaa8f161a158c3b37 \
--hash=sha256:2843f2d527d9eebec9a43e6b406fb7266f3af25a751aa91d62ff416f54170bc5 \
--hash=sha256:2da0469a4ef0ecd3693761acbdc20f2fdeafb69e6819cc081308cc978153c675 \
--hash=sha256:2ff0a7e01e422c15739ecd64432743cf7aae2b03f3084288f399affcefe5222d \
--hash=sha256:2ff38e22128e6c03ff73b6bb0f85f897d2362f8c052e3b8ad00532198fbdae3f \
--hash=sha256:30ac8812c1d2aab7131a79ba62933a2a76f582d5dbbc695192453dae67ad6310 \
--hash=sha256:3a1b111fac6baec1c1d92f27e76511c9e7218f1695d61b59e05e0fe04dc59617 \
--hash=sha256:4079b90df244709e675cdc8b93bfd8a395d59af40b72e339c2287c91860deb8e \
--hash=sha256:5149e3fd2d686e42144a093b206aef01932a0059c2a33ddfa67f5f035bdfe13e \
--hash=sha256:5a275584e726026a5699459aa72f828a610821006228e841b94275c4a7c08417 \
--hash=sha256:631f07b3734d34aced009aaf6fedfd0eb3498a97e581c3b1e5f14a04164a456d \
--hash=sha256:716e389b694c4bb564b4fc0c51bc84d381735e0d39d3f26ec1af2556ec6aad94 \
--hash=sha256:8426251ad1e4ad903a4514712d2fa8fdd5382c978010d1c6f5f37ef286a713ad \
--hash=sha256:8475230e55549ab3f207bff11ebfc91c805dc3463ef62eda3ccf593254524ce8 \
--hash=sha256:8bddf15838ba768bb5f5083c1ea012d64c9a444e16192762bd858f1e126196d0 \
--hash=sha256:8e32dced201274bf96899e6491d9ba3e9a5f6b336708656466ad0522d8528f69 \
--hash=sha256:8f9ea80f2e65bdaa0b7627fb00cbeb2daf163caa015e59b7516395fe3bd1e066 \
--hash=sha256:97c5dddd5932bd2a1a31c927ba5e1463a53b87ca96b5c9bdf5dfd6096e27efc3 \
--hash=sha256:a49f6ed96f83966f576b33a44257d869756df6cf1ef4934f59dd58b25e0327e5 \
--hash=sha256:af29a935803cc707ab2ed7791c44288a682f9c8107bc00f0eccc4f92c08d6e07 \
--hash=sha256:b05d43735bb2f07d689f56f7b474788a13ed8adc484a85aa65c0fd931cf9ccd2 \
--hash=sha256:b28d2ca4add7ac16ae8bb6632a3c86e4b9e4d52d3e34267f6e1b0c1f8d87e389 \
--hash=sha256:b99722ea48b7ea25e8e015e8341ae74624f72e5f21fc2abd45f3a93266de4c5d \
--hash=sha256:baff393942b550823bfce952bb62270ee17504d02a1801d7fd0719534dfb9c84 \
--hash=sha256:c0ee987efa6737242745f347835da2cc5bb9f1b42996a4d97d5c7ff7928cb6f2 \
--hash=sha256:d0d2821003174de06b69e58cef2316a6622b60ee613121199cb2852a873f8cf3 \
--hash=sha256:e0cf28db0f24a38b2a0ca33a85a54852586e43cf6fd876365c86e0657cfe7d73 \
--hash=sha256:e4f5a7c49323533f9103d4dacf4e4f07078f360743dec7f7596949149efeec06 \
--hash=sha256:eb58ca0abd96911932f688528977858681a59d61a7ce908ffd355957f7025cfc \
--hash=sha256:edaf02b82cd7639db00dbff629995ef185c8df4c3ffa71a5562a595765a06ce1 \
--hash=sha256:fef8c87f8abfb884dac04e97824b61299880c43f4ce675dd2cbeadd3c9b466d2
six==1.16.0 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \
--hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
sniffio==1.3.1 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2 \
--hash=sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc
spam-detector-ai==2.1.18 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:315c789255db8966bd3d6b2510affd6e229401c31349c00435fa2bec0dd4fee4 \
--hash=sha256:809dda6a5edee72d4cd7c22129d1fea92d94d706772ff135051abe7b40839de7
threadpoolctl==3.5.0 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107 \
--hash=sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467
tomli==2.0.2 ; python_version >= "3.10" and python_version < "3.11" \
--hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \
--hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed
tornado==6.4.1 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:163b0aafc8e23d8cdc3c9dfb24c5368af84a81e3364745ccb4427669bf84aec8 \
--hash=sha256:25486eb223babe3eed4b8aecbac33b37e3dd6d776bc730ca14e1bf93888b979f \
--hash=sha256:454db8a7ecfcf2ff6042dde58404164d969b6f5d58b926da15e6b23817950fc4 \
--hash=sha256:613bf4ddf5c7a95509218b149b555621497a6cc0d46ac341b30bd9ec19eac7f3 \
--hash=sha256:6d5ce3437e18a2b66fbadb183c1d3364fb03f2be71299e7d10dbeeb69f4b2a14 \
--hash=sha256:8ae50a504a740365267b2a8d1a90c9fbc86b780a39170feca9bcc1787ff80842 \
--hash=sha256:92d3ab53183d8c50f8204a51e6f91d18a15d5ef261e84d452800d4ff6fc504e9 \
--hash=sha256:a02a08cc7a9314b006f653ce40483b9b3c12cda222d6a46d4ac63bb6c9057698 \
--hash=sha256:b24b8982ed444378d7f21d563f4180a2de31ced9d8d84443907a0a64da2072e7 \
--hash=sha256:d9a566c40b89757c9aa8e6f032bcdb8ca8795d7c1a9762910c722b1635c9de4d \
--hash=sha256:e2e20b9113cd7293f164dc46fffb13535266e713cdb87bd2d15ddb336e96cfc4
tqdm==4.66.6 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63 \
--hash=sha256:4bdd694238bef1485ce839d67967ab50af8f9272aab687c0d7702a01da0be090
typeguard==4.4.0 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:463bd8697a65a4aa576a63767c369b1ecfba8a5ba735edfe3223127b6ecfa28c \
--hash=sha256:8ca34c14043f53b2caae7040549ba431770869bcd6287cfa8239db7ecb882b4a
typing-extensions==4.12.2 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
--hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
tzdata==2024.2 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc \
--hash=sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd
urllib3==2.2.3 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac \
--hash=sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9
xgboost==2.1.2 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:151071c22d0f5637739ccaf057f57d05983caaba557cc3d8191c2cb8ce8ae659 \
--hash=sha256:30a23db30a7c958917fce9f0467eab8920480d064a4bb74de693e0680528163f \
--hash=sha256:69f1e1038e594d28d6f74252685495eba9de4983e95c87d1e93706fa5f720b02 \
--hash=sha256:93782c35e8a2c2eb124eb68186c3238f270ee868c654f65b47f0d594376b25d6 \
--hash=sha256:9efbdb262cce5e6d71123748ee8c0c4e6cf66d9c9207dae71f645116717c1f00 \
--hash=sha256:a8abdcc48d68cad22812df43a16cfc8314dcf771064b63ada9d73399129c8fdc \
--hash=sha256:bfa7e6cecf7b62319379f0d4b04b966e84a933adcde0405061b9ddb0d0aef6b1 \
--hash=sha256:ec49df8017e729244e08ffb4904659f3f0b6f3c8ca11c90a5ca501ab9cc8c8ce

5575
spam.csv Normal file

File diff suppressed because it is too large Load Diff

3
src/__main__.py Normal file
View File

@ -0,0 +1,3 @@
import src.app
src.app.start()

View File

@ -1,27 +1,39 @@
import argparse import argparse
import os import os
from dataset.preparer import download_words import src.model.trainer as model_trainer
from model.trainer import train import src.web.server as web_server
import web.server import src.model.updater as model_updater
import model.updater
parser = argparse.ArgumentParser(prog='Antispam complex') parser = argparse.ArgumentParser(prog='app.py')
parser.add_argument('-i', '--init', action=argparse.BooleanOptionalAction, help='Initializing, must be run beforehand, --dataset is required') parser.add_argument('-i', '--init', action=argparse.BooleanOptionalAction, help='Initializing, must be run beforehand, --dataset is required')
parser.add_argument('-m', '--decision-maker', action=argparse.BooleanOptionalAction, help='Start as Decision maker') parser.add_argument('-m', '--decision-maker', action=argparse.BooleanOptionalAction, help='Start as Decision maker')
parser.add_argument('-d', '--dataset', required=False, help='Path to CSV (ham/spam) dataset') parser.add_argument('-d', '--dataset', required=False, help='Path to CSV (ham/spam) dataset')
parser.add_argument('-u', '--model-updater', required=False, help='Start as Model updater') parser.add_argument('-u', '--model-updater', action=argparse.BooleanOptionalAction, help='Start as Model updater')
args = parser.parse_args() args = parser.parse_args()
assert (args.init is not None
or args.decision_maker is not None
or args.model_updater is not None), "No mode set. Run --help"
_port = 8080 if os.getenv('PORT') is None else os.getenv('PORT') _port = 8080 if os.getenv('PORT') is None else os.getenv('PORT')
_models_dir = os.getenv("MODELS_DIR")
_fucking_dir = os.getenv("FUCKING_DIR")
_web_api_url = os.getenv("WEB_API_URL")
_token = os.getenv("TOKEN")
def start():
if args.init:
assert args.dataset is not None, "Dataset is required, run --help"
model_trainer.train(args.dataset, fucking_path=_fucking_dir, backup_path=_models_dir)
elif args.decision_maker:
model_trainer.apply_latest(fucking_path=_fucking_dir, backup_path=_models_dir)
web_server.start(port=_port, token=_token, fucking_path=_fucking_dir, backup_path=_models_dir)
elif args.model_updater:
assert args.dataset is not None, "Dataset is required, run --help"
model_updater.start(fucking_path=_fucking_dir, models_dir=_models_dir, dataset_path=args.dataset, web_api_url=_web_api_url, token=_token)
if __name__ == '__main__': if __name__ == '__main__':
if args.init: start()
assert args.dataset is not None, "Dataset is required, show --help"
download_words()
train(args.dataset)
elif args.decision_maker:
web.server.start(port=_port)
elif args.model_updater:
model.updater.start()

View File

@ -1,5 +0,0 @@
import nltk
def download_words() -> None:
nltk.download('wordnet')
nltk.download('stopwords')

View File

@ -1,12 +1,27 @@
# spam_detector_ai/trainer.py # spam_detector_ai/trainer.py
import os import os
import sys import shutil
from pathlib import Path from os.path import isfile, join
from src.l.logger import logger
def _does_file_exist_in_dir(path):
return any(isfile(join(path, i)) for i in os.listdir(path))
def apply_latest(fucking_path: str, backup_path: str) -> None:
logger.info("Applying models...")
if _does_file_exist_in_dir(backup_path):
files = os.listdir(fucking_path)
for file in files:
shutil.rmtree(os.path.join(fucking_path, file))
shutil.copytree(backup_path, fucking_path,
copy_function=shutil.copy2,
dirs_exist_ok=True)
def train(dataset_path: str, fucking_path: str, backup_path: str) -> None:
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from spam_detector_ai.classifiers.classifier_types import ClassifierType from spam_detector_ai.classifiers.classifier_types import ClassifierType
from spam_detector_ai.training.train_models import ModelTrainer from spam_detector_ai.training.train_models import ModelTrainer
from src.logging.logger import logger
def _train_model(classifier_type, model_filename, vectoriser_filename, X_train, y_train): def _train_model(classifier_type, model_filename, vectoriser_filename, X_train, y_train):
logger.info(f'Training {classifier_type}') logger.info(f'Training {classifier_type}')
@ -14,11 +29,10 @@ def _train_model(classifier_type, model_filename, vectoriser_filename, X_train,
trainer_.train(X_train, y_train) trainer_.train(X_train, y_train)
trainer_.save_model(model_filename, vectoriser_filename) trainer_.save_model(model_filename, vectoriser_filename)
logger.info(f"Starting to train using data_path={dataset_path}")
def train(data_path: str) -> None:
# Load and preprocess data once # Load and preprocess data once
# data_path = os.path.join(project_root, 'spam.csv') # data_path = os.path.join(project_root, 'spam.csv')
initial_trainer = ModelTrainer(data_path=data_path, logger=logger) initial_trainer = ModelTrainer(data_path=dataset_path, logger=logger)
processed_data = initial_trainer.preprocess_data_() processed_data = initial_trainer.preprocess_data_()
# Split the data once # Split the data once
@ -38,3 +52,12 @@ def train(data_path: str) -> None:
logger.info(f"Train each model with the pre-split data\n") logger.info(f"Train each model with the pre-split data\n")
for ct, mf, vf in configurations: for ct, mf, vf in configurations:
_train_model(ct, mf, vf, X__train, y__train) _train_model(ct, mf, vf, X__train, y__train)
logger.info("Backing up...")
files = os.listdir(backup_path)
for file in files:
shutil.rmtree(os.path.join(backup_path, file))
shutil.copytree(fucking_path, backup_path,
copy_function=shutil.copy2,
dirs_exist_ok=True)
logger.info("Backing up - done")

View File

@ -1,5 +1,40 @@
from src.logging.logger import logger import datetime as dt
from src.l.logger import logger
import src.model.trainer as trainer
from scheduler import Scheduler
from os import listdir
import time
import requests
def start() -> None: def _does_file_exist_in_dir(path):
return len(listdir(path)) > 0
def start(fucking_path: str, models_dir: str, dataset_path: str, web_api_url: str, token: str) -> None:
logger.info("Starting...") logger.info("Starting...")
def _restart_web_api() -> None:
headers = { 'Authorization': f"Bearer {token}" }
response = requests.post(
f"{web_api_url}/admin/restart",
json={},
headers=headers,
timeout=3
)
if response.status_code > 399:
logger.warn(f"Unable to restart Web API server: {response.status_code}, {response.text}")
def _train() -> None:
trainer.train(dataset_path=dataset_path, fucking_path=fucking_path, backup_path=models_dir)
_restart_web_api()
tz_moscow = dt.timezone(dt.timedelta(hours=3))
scheduler = Scheduler(tzinfo=dt.timezone.utc)
if not _does_file_exist_in_dir(models_dir):
logger.info("Will be updated in 5 seconds...")
scheduler.once(dt.timedelta(seconds=5), _train)
scheduler.daily(dt.time(hour=3, minute=0, tzinfo=tz_moscow), _train)
print(scheduler)
while True:
scheduler.exec_jobs()
time.sleep(1)

7
src/preparer.py Normal file
View File

@ -0,0 +1,7 @@
import nltk
import os
_working_dir = os.getenv("WORKING_DIR")
nltk.download('wordnet', download_dir=_working_dir)
nltk.download('stopwords', download_dir=_working_dir)

View File

@ -15,7 +15,7 @@ parser = argparse.ArgumentParser(prog='translate-dataset')
parser.add_argument('-i', '--input', help='Source file') parser.add_argument('-i', '--input', help='Source file')
parser.add_argument('-o', '--output', help='Destination file') parser.add_argument('-o', '--output', help='Destination file')
args = parser.parse_args() args = parser.parse_args()
#/home/bvn13/develop/spam-detector-1/spam.csv #/home/bvn13/develop/spam-detector/spam.csv
translator = Translator() translator = Translator()

View File

@ -2,24 +2,30 @@ import asyncio
import json import json
import os import os
import tornado import tornado
from spam_detector_ai.prediction.predict import VotingSpamDetector import src.model.trainer as model_trainer
from src.logging.logger import logger from src.l.logger import logger
_spam_detector = VotingSpamDetector() _spam_detector = None
def _json(data) -> str: def _json(data) -> str:
return json.dumps(data) return json.dumps(data)
def start(port: int) -> None: def start(port: int, token: str, fucking_path: str, backup_path: str) -> None:
global _spam_detector
logger.info("Starting...") logger.info("Starting...")
class CheckSpamHandler(tornado.web.RequestHandler): def _create_spam_detector():
def set_default_headers(self): model_trainer.apply_latest(fucking_path=fucking_path, backup_path=backup_path)
self.set_header("Access-Control-Allow-Origin", "*") from spam_detector_ai.prediction.predict import VotingSpamDetector
return VotingSpamDetector()
def get(self): _spam_detector = _create_spam_detector()
class CheckSpamHandler(tornado.web.RequestHandler):
@tornado.gen.coroutine
def post(self):
body = json.loads(self.request.body) body = json.loads(self.request.body)
if not 'text' in body: if not 'text' in body:
self.write_error(400, body=_json({"error": "text is not specified"})) self.write_error(400, body=_json({"error": "text is not specified"}))
@ -27,11 +33,29 @@ def start(port: int) -> None:
r = json.dumps({"is_spam": _spam_detector.is_spam(body['text'])}) r = json.dumps({"is_spam": _spam_detector.is_spam(body['text'])})
self.write(r) self.write(r)
class AdminRestartHandler(tornado.web.RequestHandler):
@tornado.gen.coroutine
def post(self):
global _spam_detector
auth_header = self.request.headers.get('Authorization')
if auth_header:
auth_token = auth_header.split(" ")[1]
else:
auth_token = ''
if auth_token == token:
_spam_detector = _create_spam_detector()
else:
self.set_status(403)
self.write({'status': 'fail', 'message': 'Invalid authentication token'})
self.finish()
return
async def start_web_server(): async def start_web_server():
logger.info(f"Starting web server on port {port}") logger.info(f"Starting web server on port {port}")
app = tornado.web.Application( app = tornado.web.Application(
[ [
(r"/check-spam", CheckSpamHandler), (r"/check-spam", CheckSpamHandler),
(r"/admin/restart", AdminRestartHandler)
], ],
template_path=os.path.join(os.path.dirname(__file__), "templates"), template_path=os.path.join(os.path.dirname(__file__), "templates"),
static_path=os.path.join(os.path.dirname(__file__), "static"), static_path=os.path.join(os.path.dirname(__file__), "static"),