Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ requires = [
"cymem>=2.0.2,<2.1.0",
"preshed>=3.0.2,<3.1.0",
"murmurhash>=0.28.0,<1.1.0",
"thinc>=8.3.12,<8.4.0",
"blis @ file:///mnt/d/Kodland/M6U4/cython-blis",
"thinc @ file:///mnt/d/Kodland/M6U4/thinc-8.2.4",
"numpy>=2.0.0,<3.0.0"
]
build-backend = "setuptools.build_meta"
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ spacy-legacy>=3.0.11,<3.1.0
spacy-loggers>=1.0.0,<2.0.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
thinc>=8.3.12,<8.4.0
thinc @ file:///mnt/d/Kodland/M6U4//thinc-8.2.4
ml_datasets>=0.2.1,<0.3.0
murmurhash>=0.28.0,<1.1.0
wasabi>=0.9.1,<1.2.0
Expand Down Expand Up @@ -34,4 +34,4 @@ types-requests
types-setuptools>=57.0.0
ruff>=0.9.0
cython-lint>=0.15.0
confection>=1.1.0,<2.0.0
confection>=1.3.2,<2.0.0
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ setup_requires =
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
murmurhash>=0.28.0,<1.1.0
thinc>=8.3.12,<8.4.0
thinc @ file:///mnt/d/Kodland/M6U4//thinc-8.2.4
install_requires =
# Our libraries
spacy-legacy>=3.0.11,<3.1.0
spacy-loggers>=1.0.0,<2.0.0
murmurhash>=0.28.0,<1.1.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
thinc>=8.3.12,<8.4.0
thinc @ file:///mnt/d/Kodland/M6U4//thinc-8.2.4
wasabi>=0.9.1,<1.2.0
srsly>=2.5.3,<3.0.0
catalogue>=2.0.6,<2.1.0
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@
"mingw32": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"],
"other": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"],
}
LINK_OPTIONS = {"msvc": ["-std=c++11"], "mingw32": ["-std=c++11"], "other": []}
COMPILER_DIRECTIVES = {
LINK_OPTIONS: dict[str, list[str]] = {"msvc": ["-std=c++11"], "mingw32": ["-std=c++11"], "other": []}
COMPILER_DIRECTIVES: dict[str, int | bool] = {
"language_level": -3,
"embedsignature": True,
"annotation_typing": False,
Expand Down
14 changes: 6 additions & 8 deletions spacy/ml/parser_model.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ cdef struct SizesC:


cdef struct WeightsC:
const float* feat_weights
const float* feat_bias
const float* hidden_bias
const float* hidden_weights
const float* seen_classes
float* feat_weights
float* feat_bias
float* hidden_bias
float* hidden_weights
float* seen_classes


cdef struct ActivationsC:
Expand All @@ -40,9 +40,7 @@ cdef ActivationsC alloc_activations(SizesC n) nogil

cdef void free_activations(const ActivationsC* A) nogil

cdef void predict_states(
CBlas cblas, ActivationsC* A, StateC** states, const WeightsC* W, SizesC n
) nogil
cdef void predict_states(CBlas cblas, ActivationsC* A, StateC** states, WeightsC* W, SizesC n) nogil

cdef int arg_max_if_valid(const weight_t* scores, const int* is_valid, int n) nogil

Expand Down
49 changes: 27 additions & 22 deletions spacy/ml/parser_model.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ cdef WeightsC get_c_weights(model) except *:
cdef WeightsC output
cdef precompute_hiddens state2vec = model.state2vec
output.feat_weights = state2vec.get_feat_weights()
output.feat_bias = <const float*>state2vec.bias.data
output.feat_bias = <float*>state2vec.bias.data
cdef np.ndarray vec2scores_W
cdef np.ndarray vec2scores_b
if model.vec2scores is None:
Expand All @@ -31,10 +31,10 @@ cdef WeightsC get_c_weights(model) except *:
else:
vec2scores_W = model.vec2scores.get_param("W")
vec2scores_b = model.vec2scores.get_param("b")
output.hidden_weights = <const float*>vec2scores_W.data
output.hidden_bias = <const float*>vec2scores_b.data
output.hidden_weights = <float*>vec2scores_W.data
output.hidden_bias = <float*>vec2scores_b.data
cdef np.ndarray class_mask = model._class_mask
output.seen_classes = <const float*>class_mask.data
output.seen_classes = <float*>class_mask.data
return output


Expand Down Expand Up @@ -98,9 +98,7 @@ cdef void resize_activations(ActivationsC* A, SizesC n) noexcept nogil:
A._curr_size = n.states


cdef void predict_states(
CBlas cblas, ActivationsC* A, StateC** states, const WeightsC* W, SizesC n
) noexcept nogil:
cdef void predict_states(CBlas cblas, ActivationsC* A, StateC** states, WeightsC* W, SizesC n) noexcept nogil:
resize_activations(A, n)
for i in range(n.states):
states[i].set_context_tokens(&A.token_ids[i*n.feats], n.feats)
Expand Down Expand Up @@ -132,8 +130,8 @@ cdef void predict_states(
# Compute hidden-to-output
sgemm(cblas)(
False, True, n.states, n.classes, n.hiddens,
1.0, <const float *>A.hiddens, n.hiddens,
<const float *>W.hidden_weights, n.hiddens,
1.0, <float *>A.hiddens, n.hiddens,
<float *>W.hidden_weights, n.hiddens,
0.0, A.scores, n.classes
)
# Add bias
Expand Down Expand Up @@ -173,7 +171,7 @@ cdef void sum_state_features(
else:
idx = token_ids[f] * id_stride + f*O
feature = &cached[idx]
saxpy(cblas)(O, one, <const float*>feature, 1, &output[b*O], 1)
saxpy(cblas)(O, one, <float*>feature, 1, &output[b*O], 1)
token_ids += F


Expand Down Expand Up @@ -338,6 +336,14 @@ class ParserStepModel(Model):
NUMPY_OPS = NumpyOps()


def _backprop_parser_step(d_scores, model, token_ids, get_d_vector, get_d_tokvecs, mask):
d_scores *= model._class_mask
d_vector = get_d_vector(d_scores)
if mask is not None:
d_vector *= mask
model.backprop_step(token_ids, d_vector, get_d_tokvecs)
return None

def step_forward(model: ParserStepModel, states, is_train):
token_ids = model.get_token_ids(states)
vector, get_d_tokvecs = model.state2vec(token_ids, is_train)
Expand All @@ -350,19 +356,18 @@ def step_forward(model: ParserStepModel, states, is_train):
scores, get_d_vector = model.vec2scores(vector, is_train)
else:
scores = NumpyOps().asarray(vector)
get_d_vector = lambda d_scores: d_scores # no-cython-lint: E731
# If the class is unseen, make sure its score is minimum
get_d_vector = lambda d_scores: d_scores
scores[:, model._class_mask == 0] = numpy.nanmin(scores)

def backprop_parser_step(d_scores):
# Zero vectors for unseen classes
d_scores *= model._class_mask
d_vector = get_d_vector(d_scores)
if mask is not None:
d_vector *= mask
model.backprop_step(token_ids, d_vector, get_d_tokvecs)
return None
return scores, backprop_parser_step
# Возвращаем частично применённую функцию
from functools import partial
backprop = partial(_backprop_parser_step,
model=model,
token_ids=token_ids,
get_d_vector=get_d_vector,
get_d_tokvecs=get_d_tokvecs,
mask=mask)
return scores, backprop


cdef class precompute_hiddens:
Expand Down Expand Up @@ -423,7 +428,7 @@ cdef class precompute_hiddens:
self._cached = cached
self._bp_hiddens = bp_features

cdef const float* get_feat_weights(self) except NULL:
cdef float* get_feat_weights(self) except NULL:
if not self._is_synchronized and self._cuda_stream is not None:
self._cuda_stream.synchronize()
self._is_synchronized = True
Expand Down
147 changes: 147 additions & 0 deletions spacy/tests/package/test.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
[metadata]
description = Industrial-strength Natural Language Processing (NLP) in Python
url = https://spacy.io
author = Explosion
author_email = contact@explosion.ai
license = MIT
long_description = file: README.md
long_description_content_type = text/markdown
classifiers =
Development Status :: 5 - Production/Stable
Environment :: Console
Intended Audience :: Developers
Intended Audience :: Science/Research
License :: OSI Approved :: MIT License
Operating System :: POSIX :: Linux
Operating System :: MacOS :: MacOS X
Operating System :: Microsoft :: Windows
Programming Language :: Cython
Programming Language :: Python :: 3
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
Programming Language :: Python :: 3.13
Programming Language :: Python :: 3.14
Topic :: Scientific/Engineering
project_urls =
Release notes = https://github.com/explosion/spaCy/releases
Source = https://github.com/explosion/spaCy

[options]
zip_safe = false
include_package_data = true
python_requires = >=3.9,<3.15
# NOTE: This section is superseded by pyproject.toml and will be removed in
# spaCy v4
setup_requires =
cython>=3.0,<4.0
numpy>=2.0.0,<3.0.0; python_version < "3.9"
numpy>=2.0.0,<3.0.0; python_version >= "3.9"
# We also need our Cython packages here to compile against
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
murmurhash>=0.28.0,<1.1.0
thinc @ file:///mnt/d/Kodland/M6U4//thinc-8.2.4
install_requires =
# Our libraries
spacy-legacy>=3.0.11,<3.1.0
spacy-loggers>=1.0.0,<2.0.0
murmurhash>=0.28.0,<1.1.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
thinc @ file:///mnt/d/Kodland/M6U4//thinc-8.2.4
wasabi>=0.9.1,<1.2.0
srsly>=2.5.3,<3.0.0
catalogue>=2.0.6,<2.1.0
weasel>=1.0.0,<2.0.0
confection>=1.3.2,<2.0.0
# Third-party dependencies
typer>=0.3.0,<1.0.0
tqdm>=4.38.0,<5.0.0
numpy>=1.15.0; python_version < "3.9"
numpy>=1.19.0; python_version >= "3.9"
requests>=2.13.0,<3.0.0
pydantic>=2.0.0,<3.0.0
jinja2
# Official Python utilities
setuptools
packaging>=20.0

[options.entry_points]
console_scripts =
spacy = spacy.cli:setup_cli

[options.extras_require]
lookups =
spacy_lookups_data>=1.0.3,<1.1.0
transformers =
spacy_transformers>=1.1.2,<1.4.0
cuda =
cupy>=5.0.0b4,<13.0.0
cuda80 =
cupy-cuda80>=5.0.0b4,<13.0.0
cuda90 =
cupy-cuda90>=5.0.0b4,<13.0.0
cuda91 =
cupy-cuda91>=5.0.0b4,<13.0.0
cuda92 =
cupy-cuda92>=5.0.0b4,<13.0.0
cuda100 =
cupy-cuda100>=5.0.0b4,<13.0.0
cuda101 =
cupy-cuda101>=5.0.0b4,<13.0.0
cuda102 =
cupy-cuda102>=5.0.0b4,<13.0.0
cuda110 =
cupy-cuda110>=5.0.0b4,<13.0.0
cuda111 =
cupy-cuda111>=5.0.0b4,<13.0.0
cuda112 =
cupy-cuda112>=5.0.0b4,<13.0.0
cuda113 =
cupy-cuda113>=5.0.0b4,<13.0.0
cuda114 =
cupy-cuda114>=5.0.0b4,<13.0.0
cuda115 =
cupy-cuda115>=5.0.0b4,<13.0.0
cuda116 =
cupy-cuda116>=5.0.0b4,<13.0.0
cuda117 =
cupy-cuda117>=5.0.0b4,<13.0.0
cuda11x =
cupy-cuda11x>=11.0.0,<13.0.0
cuda12x =
cupy-cuda12x>=11.5.0,<13.0.0
cuda-autodetect =
cupy-wheel>=11.0.0,<13.0.0
apple =
thinc-apple-ops>=1.0.0,<2.0.0
# Language tokenizers with external dependencies
ja =
sudachipy>=0.5.2,!=0.6.1
sudachidict_core>=20211220
ko =
natto-py>=0.9.0
th =
pythainlp>=2.0

[bdist_wheel]
universal = false

[sdist]
formats = gztar

[tool:pytest]
markers =
slow: mark a test as slow
issue: reference specific issue
filterwarnings =
error
ignore:Core Pydantic V1:UserWarning:pydantic

[mypy]
ignore_missing_imports = True
no_implicit_optional = True
plugins = pydantic.mypy, thinc.mypy
allow_redefinition = True
Loading