mirror of
https://git.in.rschanz.org/ryan77627/guix.git
synced 2025-01-24 19:49:25 -05:00
gnu: Add r-doc2vec.
* gnu/packages/cran.scm (r-doc2vec): New variable.
This commit is contained in:
parent
6b34433c90
commit
49711f621b
1 changed files with 32 additions and 0 deletions
|
@ -17314,6 +17314,38 @@ (define-public r-rvcheck
|
|||
R packages (on CRAN, Bioconductor or Github).")
|
||||
(license license:artistic2.0)))
|
||||
|
||||
(define-public r-doc2vec
|
||||
(package
|
||||
(name "r-doc2vec")
|
||||
(version "0.2.0")
|
||||
(source (origin
|
||||
(method url-fetch)
|
||||
(uri (cran-uri "doc2vec" version))
|
||||
(sha256
|
||||
(base32
|
||||
"0249hm0103kxxsi4gks4h20wf6p00gbrk9jf8c148mbja1l56f6v"))))
|
||||
(properties `((upstream-name . "doc2vec")))
|
||||
(build-system r-build-system)
|
||||
(propagated-inputs (list r-rcpp))
|
||||
(home-page "https://github.com/bnosac/doc2vec")
|
||||
(synopsis "Distributed representations of sentences, documents and topics")
|
||||
(description
|
||||
"Learn vector representations of sentences, paragraphs or documents by
|
||||
using the Paragraph Vector algorithms, namely the distributed bag of
|
||||
words (PV-DBOW) and the distributed memory (PV-DM) model. Top2vec finds
|
||||
clusters in text documents by combining techniques to embed documents and
|
||||
words and density-based clustering. It does this by embedding documents in
|
||||
the semantic space as defined by the doc2vec algorithm. Next it maps these
|
||||
document embeddings to a lower-dimensional space using the Uniform Manifold
|
||||
Approximation and Projection (UMAP) clustering algorithm and finds dense areas
|
||||
in that space using a Hierarchical Density-Based Clustering
|
||||
technique (HDBSCAN). These dense areas are the topic clusters which can be
|
||||
represented by the corresponding topic vector which is an aggregate of the
|
||||
document embeddings of the documents which are part of that topic cluster. In
|
||||
the same semantic space similar words can be found which are representative of
|
||||
the topic.")
|
||||
(license license:expat)))
|
||||
|
||||
(define-public r-docopt
|
||||
(package
|
||||
(name "r-docopt")
|
||||
|
|
Loading…
Reference in a new issue