Open-sourcing Representation Manager

Representation Manager (RMS) serves as a centralized embedding management system, providing SimClusters or other embeddings as facade of the underlying storage or services.
This commit is contained in:
twitter-team 2023-04-18 13:04:42 -07:00
parent 197bf2c563
commit 43cdcf2ed6
42 changed files with 3439 additions and 0 deletions

View File

@ -0,0 +1 @@
# This prevents SQ query from grabbing //:all since it traverses up once to find a BUILD

View File

@ -0,0 +1,4 @@
# Representation Manager #
**Representation Manager** (RMS) serves as a centralized embedding management system, providing SimClusters or other embeddings as facade of the underlying storage or services.

View File

@ -0,0 +1,4 @@
#!/usr/bin/env bash
JOB=representation-manager bazel run --ui_event_filters=-info,-stdout,-stderr --noshow_progress \
//relevance-platform/src/main/python/deploy -- "$@"

View File

@ -0,0 +1,17 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"finatra/inject/inject-thrift-client",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/readablestore",
"representation-manager/client/src/main/scala/com/twitter/representation_manager/config",
"representation-manager/server/src/main/thrift:thrift-scala",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"stitch/stitch-storehaus",
"strato/src/main/scala/com/twitter/strato/client",
],
)

View File

@ -0,0 +1,208 @@
package com.twitter.representation_manager
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.representation_manager.config.ClientConfig
import com.twitter.representation_manager.config.DisabledInMemoryCacheParams
import com.twitter.representation_manager.config.EnabledInMemoryCacheParams
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.LocaleEntityId
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.simclusters_v2.thriftscala.TopicId
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.strato.thrift.ScroogeConvImplicits._
/**
* This is the class that offers features to build readable stores for a given
* SimClustersEmbeddingView (i.e. embeddingType and modelVersion). It applies ClientConfig
* for a particular service and build ReadableStores which implement that config.
*/
class StoreBuilder(
clientConfig: ClientConfig,
stratoClient: StratoClient,
memCachedClient: MemcachedClient,
globalStats: StatsReceiver,
) {
private val stats =
globalStats.scope("representation_manager_client").scope(this.getClass.getSimpleName)
// Column consts
private val ColPathPrefix = "recommendations/representation_manager/"
private val SimclustersTweetColPath = ColPathPrefix + "simClustersEmbedding.Tweet"
private val SimclustersUserColPath = ColPathPrefix + "simClustersEmbedding.User"
private val SimclustersTopicIdColPath = ColPathPrefix + "simClustersEmbedding.TopicId"
private val SimclustersLocaleEntityIdColPath =
ColPathPrefix + "simClustersEmbedding.LocaleEntityId"
def buildSimclustersTweetEmbeddingStore(
embeddingColumnView: SimClustersEmbeddingView
): ReadableStore[Long, SimClustersEmbedding] = {
val rawStore = StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersTweetColPath,
embeddingColumnView)
.mapValues(SimClustersEmbedding(_))
addCacheLayer(rawStore, embeddingColumnView)
}
def buildSimclustersUserEmbeddingStore(
embeddingColumnView: SimClustersEmbeddingView
): ReadableStore[Long, SimClustersEmbedding] = {
val rawStore = StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersUserColPath,
embeddingColumnView)
.mapValues(SimClustersEmbedding(_))
addCacheLayer(rawStore, embeddingColumnView)
}
def buildSimclustersTopicIdEmbeddingStore(
embeddingColumnView: SimClustersEmbeddingView
): ReadableStore[TopicId, SimClustersEmbedding] = {
val rawStore = StratoFetchableStore
.withView[TopicId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersTopicIdColPath,
embeddingColumnView)
.mapValues(SimClustersEmbedding(_))
addCacheLayer(rawStore, embeddingColumnView)
}
def buildSimclustersLocaleEntityIdEmbeddingStore(
embeddingColumnView: SimClustersEmbeddingView
): ReadableStore[LocaleEntityId, SimClustersEmbedding] = {
val rawStore = StratoFetchableStore
.withView[LocaleEntityId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersLocaleEntityIdColPath,
embeddingColumnView)
.mapValues(SimClustersEmbedding(_))
addCacheLayer(rawStore, embeddingColumnView)
}
def buildSimclustersTweetEmbeddingStoreWithEmbeddingIdAsKey(
embeddingColumnView: SimClustersEmbeddingView
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
val rawStore = StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersTweetColPath,
embeddingColumnView)
.mapValues(SimClustersEmbedding(_))
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(_, _, InternalId.TweetId(tweetId)) =>
tweetId
}
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
}
def buildSimclustersUserEmbeddingStoreWithEmbeddingIdAsKey(
embeddingColumnView: SimClustersEmbeddingView
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
val rawStore = StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersUserColPath,
embeddingColumnView)
.mapValues(SimClustersEmbedding(_))
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) =>
userId
}
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
}
def buildSimclustersTopicEmbeddingStoreWithEmbeddingIdAsKey(
embeddingColumnView: SimClustersEmbeddingView
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
val rawStore = StratoFetchableStore
.withView[TopicId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersTopicIdColPath,
embeddingColumnView)
.mapValues(SimClustersEmbedding(_))
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(_, _, InternalId.TopicId(topicId)) =>
topicId
}
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
}
def buildSimclustersTopicIdEmbeddingStoreWithEmbeddingIdAsKey(
embeddingColumnView: SimClustersEmbeddingView
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
val rawStore = StratoFetchableStore
.withView[TopicId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersTopicIdColPath,
embeddingColumnView)
.mapValues(SimClustersEmbedding(_))
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(_, _, InternalId.TopicId(topicId)) =>
topicId
}
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
}
def buildSimclustersLocaleEntityIdEmbeddingStoreWithEmbeddingIdAsKey(
embeddingColumnView: SimClustersEmbeddingView
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
val rawStore = StratoFetchableStore
.withView[LocaleEntityId, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersLocaleEntityIdColPath,
embeddingColumnView)
.mapValues(SimClustersEmbedding(_))
val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(_, _, InternalId.LocaleEntityId(localeEntityId)) =>
localeEntityId
}
addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView)
}
private def addCacheLayer[K](
rawStore: ReadableStore[K, SimClustersEmbedding],
embeddingColumnView: SimClustersEmbeddingView,
): ReadableStore[K, SimClustersEmbedding] = {
// Add in-memory caching based on ClientConfig
val inMemCacheParams = clientConfig.inMemoryCacheConfig
.getCacheSetup(embeddingColumnView.embeddingType, embeddingColumnView.modelVersion)
val statsPerStore = stats
.scope(embeddingColumnView.embeddingType.name).scope(embeddingColumnView.modelVersion.name)
inMemCacheParams match {
case DisabledInMemoryCacheParams =>
ObservedReadableStore(
store = rawStore
)(statsPerStore)
case EnabledInMemoryCacheParams(ttl, maxKeys, cacheName) =>
ObservedCachedReadableStore.from[K, SimClustersEmbedding](
rawStore,
ttl = ttl,
maxKeys = maxKeys,
cacheName = cacheName,
windowSize = 10000L
)(statsPerStore)
}
}
}

View File

@ -0,0 +1,12 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"finatra/inject/inject-thrift-client",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/common",
"representation-manager/server/src/main/thrift:thrift-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"strato/src/main/scala/com/twitter/strato/client",
],
)

View File

@ -0,0 +1,25 @@
package com.twitter.representation_manager.config
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.ModelVersion
/*
* This is RMS client config class.
* We only support setting up in memory cache params for now, but we expect to enable other
* customisations in the near future e.g. request timeout
*
* --------------------------------------------
* PLEASE NOTE:
* Having in-memory cache is not necessarily a free performance win, anyone considering it should
* investigate rather than blindly enabling it
* */
class ClientConfig(inMemCacheParamsOverrides: Map[
(EmbeddingType, ModelVersion),
InMemoryCacheParams
] = Map.empty) {
// In memory cache config per embedding
val inMemCacheParams = DefaultInMemoryCacheConfig.cacheParamsMap ++ inMemCacheParamsOverrides
val inMemoryCacheConfig = new InMemoryCacheConfig(inMemCacheParams)
}
object DefaultClientConfig extends ClientConfig

View File

@ -0,0 +1,53 @@
package com.twitter.representation_manager.config
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.twitter.util.Duration
/*
* --------------------------------------------
* PLEASE NOTE:
* Having in-memory cache is not necessarily a free performance win, anyone considering it should
* investigate rather than blindly enabling it
* --------------------------------------------
* */
sealed trait InMemoryCacheParams
/*
* This holds params that is required to set up a in-mem cache for a single embedding store
*/
case class EnabledInMemoryCacheParams(
ttl: Duration,
maxKeys: Int,
cacheName: String)
extends InMemoryCacheParams
object DisabledInMemoryCacheParams extends InMemoryCacheParams
/*
* This is the class for the in-memory cache config. Client could pass in their own cacheParamsMap to
* create a new InMemoryCacheConfig instead of using the DefaultInMemoryCacheConfig object below
* */
class InMemoryCacheConfig(
cacheParamsMap: Map[
(EmbeddingType, ModelVersion),
InMemoryCacheParams
] = Map.empty) {
def getCacheSetup(
embeddingType: EmbeddingType,
modelVersion: ModelVersion
): InMemoryCacheParams = {
// When requested embedding type doesn't exist, we return DisabledInMemoryCacheParams
cacheParamsMap.getOrElse((embeddingType, modelVersion), DisabledInMemoryCacheParams)
}
}
/*
* Default config for the in-memory cache
* Clients can directly import and use this one if they don't want to set up a customised config
* */
object DefaultInMemoryCacheConfig extends InMemoryCacheConfig {
// set default to no in-memory caching
val cacheParamsMap = Map.empty
}

View File

@ -0,0 +1,21 @@
jvm_binary(
name = "bin",
basename = "representation-manager",
main = "com.twitter.representation_manager.RepresentationManagerFedServerMain",
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"finatra/inject/inject-logback/src/main/scala",
"loglens/loglens-logback/src/main/scala/com/twitter/loglens/logback",
"representation-manager/server/src/main/resources",
"representation-manager/server/src/main/scala/com/twitter/representation_manager",
"twitter-server/logback-classic/src/main/scala",
],
)
# Aurora Workflows build phase convention requires a jvm_app named with ${project-name}-app
jvm_app(
name = "representation-manager-app",
archive = "zip",
binary = ":bin",
)

View File

@ -0,0 +1,7 @@
resources(
sources = [
"*.xml",
"config/*.yml",
],
tags = ["bazel-compatible"],
)

View File

@ -0,0 +1,219 @@
# ---------- traffic percentage by embedding type and model version ----------
# Decider strings are build dynamically following the rule in there
# i.e. s"enable_${embeddingType.name}_${modelVersion.name}"
# Hence this should be updated accordingly if usage is changed in the embedding stores
# Tweet embeddings
"enable_LogFavBasedTweet_Model20m145k2020":
comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavBasedTweet - Model20m145k2020. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_LogFavBasedTweet_Model20m145kUpdated":
comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavBasedTweet - Model20m145kUpdated. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_LogFavLongestL2EmbeddingTweet_Model20m145k2020":
comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavLongestL2EmbeddingTweet - Model20m145k2020. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_LogFavLongestL2EmbeddingTweet_Model20m145kUpdated":
comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavLongestL2EmbeddingTweet - Model20m145kUpdated. 0 means return EMPTY for all requests."
default_availability: 10000
# Topic embeddings
"enable_FavTfgTopic_Model20m145k2020":
comment: "Enable the read traffic to FavTfgTopic - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_LogFavBasedKgoApeTopic_Model20m145k2020":
comment: "Enable the read traffic to LogFavBasedKgoApeTopic - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
# User embeddings - KnownFor
"enable_FavBasedProducer_Model20m145kUpdated":
comment: "Enable the read traffic to FavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_FavBasedProducer_Model20m145k2020":
comment: "Enable the read traffic to FavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_FollowBasedProducer_Model20m145k2020":
comment: "Enable the read traffic to FollowBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_AggregatableFavBasedProducer_Model20m145kUpdated":
comment: "Enable the read traffic to AggregatableFavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_AggregatableFavBasedProducer_Model20m145k2020":
comment: "Enable the read traffic to AggregatableFavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_AggregatableLogFavBasedProducer_Model20m145kUpdated":
comment: "Enable the read traffic to AggregatableLogFavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_AggregatableLogFavBasedProducer_Model20m145k2020":
comment: "Enable the read traffic to AggregatableLogFavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_RelaxedAggregatableLogFavBasedProducer_Model20m145kUpdated:
comment: "Enable the read traffic to RelaxedAggregatableLogFavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
enable_RelaxedAggregatableLogFavBasedProducer_Model20m145k2020:
comment: "Enable the read traffic to RelaxedAggregatableLogFavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
# User embeddings - InterestedIn
"enable_LogFavBasedUserInterestedInFromAPE_Model20m145k2020":
comment: "Enable the read traffic to LogFavBasedUserInterestedInFromAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_FollowBasedUserInterestedInFromAPE_Model20m145k2020":
comment: "Enable the read traffic to FollowBasedUserInterestedInFromAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_FavBasedUserInterestedIn_Model20m145kUpdated":
comment: "Enable the read traffic to FavBasedUserInterestedIn - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_FavBasedUserInterestedIn_Model20m145k2020":
comment: "Enable the read traffic to FavBasedUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_FollowBasedUserInterestedIn_Model20m145k2020":
comment: "Enable the read traffic to FollowBasedUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_LogFavBasedUserInterestedIn_Model20m145k2020":
comment: "Enable the read traffic to LogFavBasedUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_FavBasedUserInterestedInFromPE_Model20m145kUpdated":
comment: "Enable the read traffic to FavBasedUserInterestedInFromPE - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_FilteredUserInterestedIn_Model20m145kUpdated":
comment: "Enable the read traffic to FilteredUserInterestedIn - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_FilteredUserInterestedIn_Model20m145k2020":
comment: "Enable the read traffic to FilteredUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_FilteredUserInterestedInFromPE_Model20m145kUpdated":
comment: "Enable the read traffic to FilteredUserInterestedInFromPE - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_UnfilteredUserInterestedIn_Model20m145kUpdated":
comment: "Enable the read traffic to UnfilteredUserInterestedIn - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_UnfilteredUserInterestedIn_Model20m145k2020":
comment: "Enable the read traffic to UnfilteredUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_UserNextInterestedIn_Model20m145k2020":
comment: "Enable the read traffic to UserNextInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
comment: "Enable the read traffic to LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_LogFavBasedUserInterestedAverageAddressBookFromIIAPE_Model20m145k2020":
comment: "Enable the read traffic to LogFavBasedUserInterestedAverageAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
comment: "Enable the read traffic to LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
comment: "Enable the read traffic to LogFavBasedUserInterestedAverageAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
comment: "Enable the read traffic to LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
"enable_LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE_Model20m145k2020":
comment: "Enable the read traffic to LogFavBasedUserInterestedAverageAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests."
default_availability: 10000
# ---------- load shedding by caller id ----------
# To create a new decider, add here with the same format and caller's details :
# "representation-manager_load_shed_by_caller_id_twtr:{{role}}:{{name}}:{{environment}}:{{cluster}}"
# All the deciders below are generated by this script:
# ./strato/bin/fed deciders representation-manager --service-role=representation-manager --service-name=representation-manager
# If you need to run the script and paste the output, add ONLY the prod deciders here.
"representation-manager_load_shed_by_caller_id_all":
comment: "Reject all traffic from caller id: all"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:cr-mixer:cr-mixer:prod:atla":
comment: "Reject all traffic from caller id: twtr:svc:cr-mixer:cr-mixer:prod:atla"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:cr-mixer:cr-mixer:prod:pdxa":
comment: "Reject all traffic from caller id: twtr:svc:cr-mixer:cr-mixer:prod:pdxa"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-1:prod:atla":
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-1:prod:atla"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-1:prod:pdxa":
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-1:prod:pdxa"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-3:prod:atla":
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-3:prod:atla"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-3:prod:pdxa":
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-3:prod:pdxa"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-4:prod:atla":
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-4:prod:atla"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-4:prod:pdxa":
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-4:prod:pdxa"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:atla":
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:atla"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:pdxa":
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:pdxa"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann:prod:atla":
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann:prod:atla"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann:prod:pdxa":
comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann:prod:pdxa"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:stratostore:stratoapi:prod:atla":
comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoapi:prod:atla"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:stratostore:stratoserver:prod:atla":
comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoserver:prod:atla"
default_availability: 0
"representation-manager_load_shed_by_caller_id_twtr:svc:stratostore:stratoserver:prod:pdxa":
comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoserver:prod:pdxa"
default_availability: 0
# ---------- Dark Traffic Proxy ----------
representation-manager_forward_dark_traffic:
comment: "Defines the percentage of traffic to forward to diffy-proxy. Set to 0 to disable dark traffic forwarding"
default_availability: 0

View File

@ -0,0 +1,165 @@
<configuration>
<shutdownHook class="ch.qos.logback.core.hook.DelayingShutdownHook"/>
<!-- ===================================================== -->
<!-- Service Config -->
<!-- ===================================================== -->
<property name="DEFAULT_SERVICE_PATTERN"
value="%-16X{traceId} %-12X{clientId:--} %-16X{method} %-25logger{0} %msg"/>
<property name="DEFAULT_ACCESS_PATTERN"
value="%msg"/>
<!-- ===================================================== -->
<!-- Common Config -->
<!-- ===================================================== -->
<!-- JUL/JDK14 to Logback bridge -->
<contextListener class="ch.qos.logback.classic.jul.LevelChangePropagator">
<resetJUL>true</resetJUL>
</contextListener>
<!-- ====================================================================================== -->
<!-- NOTE: The following appenders use a simple TimeBasedRollingPolicy configuration. -->
<!-- You may want to consider using a more advanced SizeAndTimeBasedRollingPolicy. -->
<!-- See: https://logback.qos.ch/manual/appenders.html#SizeAndTimeBasedRollingPolicy -->
<!-- ====================================================================================== -->
<!-- Service Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
<appender name="SERVICE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${log.service.output}</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>${log.service.output}.%d.gz</fileNamePattern>
<!-- the maximum total size of all the log files -->
<totalSizeCap>3GB</totalSizeCap>
<!-- keep maximum 21 days' worth of history -->
<maxHistory>21</maxHistory>
<cleanHistoryOnStart>true</cleanHistoryOnStart>
</rollingPolicy>
<encoder>
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
</encoder>
</appender>
<!-- Access Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
<appender name="ACCESS" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${log.access.output}</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>${log.access.output}.%d.gz</fileNamePattern>
<!-- the maximum total size of all the log files -->
<totalSizeCap>100MB</totalSizeCap>
<!-- keep maximum 7 days' worth of history -->
<maxHistory>7</maxHistory>
<cleanHistoryOnStart>true</cleanHistoryOnStart>
</rollingPolicy>
<encoder>
<pattern>${DEFAULT_ACCESS_PATTERN}%n</pattern>
</encoder>
</appender>
<!--LogLens -->
<appender name="LOGLENS" class="com.twitter.loglens.logback.LoglensAppender">
<mdcAdditionalContext>true</mdcAdditionalContext>
<category>${log.lens.category}</category>
<index>${log.lens.index}</index>
<tag>${log.lens.tag}/service</tag>
<encoder>
<pattern>%msg</pattern>
</encoder>
</appender>
<!-- LogLens Access -->
<appender name="LOGLENS-ACCESS" class="com.twitter.loglens.logback.LoglensAppender">
<mdcAdditionalContext>true</mdcAdditionalContext>
<category>${log.lens.category}</category>
<index>${log.lens.index}</index>
<tag>${log.lens.tag}/access</tag>
<encoder>
<pattern>%msg</pattern>
</encoder>
</appender>
<!-- Pipeline Execution Logs -->
<appender name="ALLOW-LISTED-PIPELINE-EXECUTIONS" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>allow_listed_pipeline_executions.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>allow_listed_pipeline_executions.log.%d.gz</fileNamePattern>
<!-- the maximum total size of all the log files -->
<totalSizeCap>100MB</totalSizeCap>
<!-- keep maximum 7 days' worth of history -->
<maxHistory>7</maxHistory>
<cleanHistoryOnStart>true</cleanHistoryOnStart>
</rollingPolicy>
<encoder>
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
</encoder>
</appender>
<!-- ===================================================== -->
<!-- Primary Async Appenders -->
<!-- ===================================================== -->
<property name="async_queue_size" value="${queue.size:-50000}"/>
<property name="async_max_flush_time" value="${max.flush.time:-0}"/>
<appender name="ASYNC-SERVICE" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="SERVICE"/>
</appender>
<appender name="ASYNC-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="ACCESS"/>
</appender>
<appender name="ASYNC-ALLOW-LISTED-PIPELINE-EXECUTIONS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="ALLOW-LISTED-PIPELINE-EXECUTIONS"/>
</appender>
<appender name="ASYNC-LOGLENS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="LOGLENS"/>
</appender>
<appender name="ASYNC-LOGLENS-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="LOGLENS-ACCESS"/>
</appender>
<!-- ===================================================== -->
<!-- Package Config -->
<!-- ===================================================== -->
<!-- Per-Package Config -->
<logger name="com.twitter" level="INHERITED"/>
<logger name="com.twitter.wilyns" level="INHERITED"/>
<logger name="com.twitter.configbus.client.file" level="INHERITED"/>
<logger name="com.twitter.finagle.mux" level="INHERITED"/>
<logger name="com.twitter.finagle.serverset2" level="INHERITED"/>
<logger name="com.twitter.logging.ScribeHandler" level="INHERITED"/>
<logger name="com.twitter.zookeeper.client.internal" level="INHERITED"/>
<!-- Root Config -->
<!-- For all logs except access logs, disable logging below log_level level by default. This can be overriden in the per-package loggers, and dynamically in the admin panel of individual instances. -->
<root level="${log_level:-INFO}">
<appender-ref ref="ASYNC-SERVICE"/>
<appender-ref ref="ASYNC-LOGLENS"/>
</root>
<!-- Access Logging -->
<!-- Access logs are turned off by default -->
<logger name="com.twitter.finatra.thrift.filters.AccessLoggingFilter" level="OFF" additivity="false">
<appender-ref ref="ASYNC-ACCESS"/>
<appender-ref ref="ASYNC-LOGLENS-ACCESS"/>
</logger>
</configuration>

View File

@ -0,0 +1,13 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"finatra/inject/inject-thrift-client",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/tweet",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/user",
"strato/src/main/scala/com/twitter/strato/fed",
"strato/src/main/scala/com/twitter/strato/fed/server",
],
)

View File

@ -0,0 +1,40 @@
package com.twitter.representation_manager
import com.google.inject.Module
import com.twitter.inject.thrift.modules.ThriftClientIdModule
import com.twitter.representation_manager.columns.topic.LocaleEntityIdSimClustersEmbeddingCol
import com.twitter.representation_manager.columns.topic.TopicIdSimClustersEmbeddingCol
import com.twitter.representation_manager.columns.tweet.TweetSimClustersEmbeddingCol
import com.twitter.representation_manager.columns.user.UserSimClustersEmbeddingCol
import com.twitter.representation_manager.modules.CacheModule
import com.twitter.representation_manager.modules.InterestsThriftClientModule
import com.twitter.representation_manager.modules.LegacyRMSConfigModule
import com.twitter.representation_manager.modules.StoreModule
import com.twitter.representation_manager.modules.TimerModule
import com.twitter.representation_manager.modules.UttClientModule
import com.twitter.strato.fed._
import com.twitter.strato.fed.server._
object RepresentationManagerFedServerMain extends RepresentationManagerFedServer
trait RepresentationManagerFedServer extends StratoFedServer {
override def dest: String = "/s/representation-manager/representation-manager"
override val modules: Seq[Module] =
Seq(
CacheModule,
InterestsThriftClientModule,
LegacyRMSConfigModule,
StoreModule,
ThriftClientIdModule,
TimerModule,
UttClientModule
)
override def columns: Seq[Class[_ <: StratoFed.Column]] =
Seq(
classOf[TweetSimClustersEmbeddingCol],
classOf[UserSimClustersEmbeddingCol],
classOf[TopicIdSimClustersEmbeddingCol],
classOf[LocaleEntityIdSimClustersEmbeddingCol]
)
}

View File

@ -0,0 +1,9 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"strato/src/main/scala/com/twitter/strato/fed",
"strato/src/main/scala/com/twitter/strato/fed/server",
],
)

View File

@ -0,0 +1,26 @@
package com.twitter.representation_manager.columns
import com.twitter.strato.access.Access.LdapGroup
import com.twitter.strato.config.ContactInfo
import com.twitter.strato.config.FromColumns
import com.twitter.strato.config.Has
import com.twitter.strato.config.Prefix
import com.twitter.strato.config.ServiceIdentifierPattern
object ColumnConfigBase {
/****************** Internal permissions *******************/
val recosPermissions: Seq[com.twitter.strato.config.Policy] = Seq()
/****************** External permissions *******************/
// This is used to grant limited access to members outside of RP team.
val externalPermissions: Seq[com.twitter.strato.config.Policy] = Seq()
val contactInfo: ContactInfo = ContactInfo(
description = "Please contact Relevance Platform for more details",
contactEmail = "no-reply@twitter.com",
ldapGroup = "ldap",
jiraProject = "JIRA",
links = Seq("http://go/rms-runbook")
)
}

View File

@ -0,0 +1,14 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"finatra/inject/inject-core/src/main/scala",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/modules",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/store",
"representation-manager/server/src/main/thrift:thrift-scala",
"strato/src/main/scala/com/twitter/strato/fed",
"strato/src/main/scala/com/twitter/strato/fed/server",
],
)

View File

@ -0,0 +1,77 @@
package com.twitter.representation_manager.columns.topic
import com.twitter.representation_manager.columns.ColumnConfigBase
import com.twitter.representation_manager.store.TopicSimClustersEmbeddingStore
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.simclusters_v2.thriftscala.LocaleEntityId
import com.twitter.stitch
import com.twitter.stitch.Stitch
import com.twitter.stitch.storehaus.StitchOfReadableStore
import com.twitter.strato.catalog.OpMetadata
import com.twitter.strato.config.AnyOf
import com.twitter.strato.config.ContactInfo
import com.twitter.strato.config.FromColumns
import com.twitter.strato.config.Policy
import com.twitter.strato.config.Prefix
import com.twitter.strato.data.Conv
import com.twitter.strato.data.Description.PlainText
import com.twitter.strato.data.Lifecycle
import com.twitter.strato.fed._
import com.twitter.strato.thrift.ScroogeConv
import javax.inject.Inject
class LocaleEntityIdSimClustersEmbeddingCol @Inject() (
embeddingStore: TopicSimClustersEmbeddingStore)
extends StratoFed.Column(
"recommendations/representation_manager/simClustersEmbedding.LocaleEntityId")
with StratoFed.Fetch.Stitch {
private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] =
StitchOfReadableStore(embeddingStore.topicSimClustersEmbeddingStore.mapValues(_.toThrift))
val colPermissions: Seq[com.twitter.strato.config.Policy] =
ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns(
Set(
Prefix("ml/featureStore/simClusters"),
))
override val policy: Policy = AnyOf({
colPermissions
})
override type Key = LocaleEntityId
override type View = SimClustersEmbeddingView
override type Value = SimClustersEmbedding
override val keyConv: Conv[Key] = ScroogeConv.fromStruct[LocaleEntityId]
override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView]
override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding]
override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo
override val metadata: OpMetadata = OpMetadata(
lifecycle = Some(Lifecycle.Production),
description = Some(
PlainText(
"The Topic SimClusters Embedding Endpoint in Representation Management Service with LocaleEntityId." +
" TDD: http://go/rms-tdd"))
)
override def fetch(key: Key, view: View): Stitch[Result[Value]] = {
val embeddingId = SimClustersEmbeddingId(
view.embeddingType,
view.modelVersion,
InternalId.LocaleEntityId(key)
)
storeStitch(embeddingId)
.map(embedding => found(embedding))
.handle {
case stitch.NotFound => missing
}
}
}

View File

@ -0,0 +1,74 @@
package com.twitter.representation_manager.columns.topic
import com.twitter.representation_manager.columns.ColumnConfigBase
import com.twitter.representation_manager.store.TopicSimClustersEmbeddingStore
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.simclusters_v2.thriftscala.TopicId
import com.twitter.stitch
import com.twitter.stitch.Stitch
import com.twitter.stitch.storehaus.StitchOfReadableStore
import com.twitter.strato.catalog.OpMetadata
import com.twitter.strato.config.AnyOf
import com.twitter.strato.config.ContactInfo
import com.twitter.strato.config.FromColumns
import com.twitter.strato.config.Policy
import com.twitter.strato.config.Prefix
import com.twitter.strato.data.Conv
import com.twitter.strato.data.Description.PlainText
import com.twitter.strato.data.Lifecycle
import com.twitter.strato.fed._
import com.twitter.strato.thrift.ScroogeConv
import javax.inject.Inject
class TopicIdSimClustersEmbeddingCol @Inject() (embeddingStore: TopicSimClustersEmbeddingStore)
extends StratoFed.Column("recommendations/representation_manager/simClustersEmbedding.TopicId")
with StratoFed.Fetch.Stitch {
private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] =
StitchOfReadableStore(embeddingStore.topicSimClustersEmbeddingStore.mapValues(_.toThrift))
val colPermissions: Seq[com.twitter.strato.config.Policy] =
ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns(
Set(
Prefix("ml/featureStore/simClusters"),
))
override val policy: Policy = AnyOf({
colPermissions
})
override type Key = TopicId
override type View = SimClustersEmbeddingView
override type Value = SimClustersEmbedding
override val keyConv: Conv[Key] = ScroogeConv.fromStruct[TopicId]
override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView]
override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding]
override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo
override val metadata: OpMetadata = OpMetadata(
lifecycle = Some(Lifecycle.Production),
description = Some(PlainText(
"The Topic SimClusters Embedding Endpoint in Representation Management Service with TopicId." +
" TDD: http://go/rms-tdd"))
)
override def fetch(key: Key, view: View): Stitch[Result[Value]] = {
val embeddingId = SimClustersEmbeddingId(
view.embeddingType,
view.modelVersion,
InternalId.TopicId(key)
)
storeStitch(embeddingId)
.map(embedding => found(embedding))
.handle {
case stitch.NotFound => missing
}
}
}

View File

@ -0,0 +1,14 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"finatra/inject/inject-core/src/main/scala",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/modules",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/store",
"representation-manager/server/src/main/thrift:thrift-scala",
"strato/src/main/scala/com/twitter/strato/fed",
"strato/src/main/scala/com/twitter/strato/fed/server",
],
)

View File

@ -0,0 +1,73 @@
package com.twitter.representation_manager.columns.tweet
import com.twitter.representation_manager.columns.ColumnConfigBase
import com.twitter.representation_manager.store.TweetSimClustersEmbeddingStore
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.stitch
import com.twitter.stitch.Stitch
import com.twitter.stitch.storehaus.StitchOfReadableStore
import com.twitter.strato.catalog.OpMetadata
import com.twitter.strato.config.AnyOf
import com.twitter.strato.config.ContactInfo
import com.twitter.strato.config.FromColumns
import com.twitter.strato.config.Policy
import com.twitter.strato.config.Prefix
import com.twitter.strato.data.Conv
import com.twitter.strato.data.Description.PlainText
import com.twitter.strato.data.Lifecycle
import com.twitter.strato.fed._
import com.twitter.strato.thrift.ScroogeConv
import javax.inject.Inject
class TweetSimClustersEmbeddingCol @Inject() (embeddingStore: TweetSimClustersEmbeddingStore)
extends StratoFed.Column("recommendations/representation_manager/simClustersEmbedding.Tweet")
with StratoFed.Fetch.Stitch {
private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] =
StitchOfReadableStore(embeddingStore.tweetSimClustersEmbeddingStore.mapValues(_.toThrift))
val colPermissions: Seq[com.twitter.strato.config.Policy] =
ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns(
Set(
Prefix("ml/featureStore/simClusters"),
))
override val policy: Policy = AnyOf({
colPermissions
})
override type Key = Long // TweetId
override type View = SimClustersEmbeddingView
override type Value = SimClustersEmbedding
override val keyConv: Conv[Key] = Conv.long
override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView]
override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding]
override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo
override val metadata: OpMetadata = OpMetadata(
lifecycle = Some(Lifecycle.Production),
description = Some(
PlainText("The Tweet SimClusters Embedding Endpoint in Representation Management Service." +
" TDD: http://go/rms-tdd"))
)
override def fetch(key: Key, view: View): Stitch[Result[Value]] = {
val embeddingId = SimClustersEmbeddingId(
view.embeddingType,
view.modelVersion,
InternalId.TweetId(key)
)
storeStitch(embeddingId)
.map(embedding => found(embedding))
.handle {
case stitch.NotFound => missing
}
}
}

View File

@ -0,0 +1,14 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"finatra/inject/inject-core/src/main/scala",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/columns",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/modules",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/store",
"representation-manager/server/src/main/thrift:thrift-scala",
"strato/src/main/scala/com/twitter/strato/fed",
"strato/src/main/scala/com/twitter/strato/fed/server",
],
)

View File

@ -0,0 +1,73 @@
package com.twitter.representation_manager.columns.user
import com.twitter.representation_manager.columns.ColumnConfigBase
import com.twitter.representation_manager.store.UserSimClustersEmbeddingStore
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.stitch
import com.twitter.stitch.Stitch
import com.twitter.stitch.storehaus.StitchOfReadableStore
import com.twitter.strato.catalog.OpMetadata
import com.twitter.strato.config.AnyOf
import com.twitter.strato.config.ContactInfo
import com.twitter.strato.config.FromColumns
import com.twitter.strato.config.Policy
import com.twitter.strato.config.Prefix
import com.twitter.strato.data.Conv
import com.twitter.strato.data.Description.PlainText
import com.twitter.strato.data.Lifecycle
import com.twitter.strato.fed._
import com.twitter.strato.thrift.ScroogeConv
import javax.inject.Inject
class UserSimClustersEmbeddingCol @Inject() (embeddingStore: UserSimClustersEmbeddingStore)
extends StratoFed.Column("recommendations/representation_manager/simClustersEmbedding.User")
with StratoFed.Fetch.Stitch {
private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] =
StitchOfReadableStore(embeddingStore.userSimClustersEmbeddingStore.mapValues(_.toThrift))
val colPermissions: Seq[com.twitter.strato.config.Policy] =
ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns(
Set(
Prefix("ml/featureStore/simClusters"),
))
override val policy: Policy = AnyOf({
colPermissions
})
override type Key = Long // UserId
override type View = SimClustersEmbeddingView
override type Value = SimClustersEmbedding
override val keyConv: Conv[Key] = Conv.long
override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView]
override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding]
override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo
override val metadata: OpMetadata = OpMetadata(
lifecycle = Some(Lifecycle.Production),
description = Some(
PlainText("The User SimClusters Embedding Endpoint in Representation Management Service." +
" TDD: http://go/rms-tdd"))
)
override def fetch(key: Key, view: View): Stitch[Result[Value]] = {
val embeddingId = SimClustersEmbeddingId(
view.embeddingType,
view.modelVersion,
InternalId.UserId(key)
)
storeStitch(embeddingId)
.map(embedding => found(embedding))
.handle {
case stitch.NotFound => missing
}
}
}

View File

@ -0,0 +1,13 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"decider/src/main/scala",
"finagle/finagle-memcached",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
],
)

View File

@ -0,0 +1,153 @@
package com.twitter.representation_manager.common
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.conversions.DurationOps._
import com.twitter.finagle.memcached.Client
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.hashing.KeyHasher
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.relevance_platform.common.injection.LZ4Injection
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.common.SimClustersEmbeddingIdCacheKeyBuilder
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.twitter.simclusters_v2.thriftscala.ModelVersion._
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Duration
/*
* NOTE - ALL the cache configs here are just placeholders, NONE of them is used anyweher in RMS yet
* */
sealed trait MemCacheParams
sealed trait MemCacheConfig
/*
* This holds params that is required to set up a memcache cache for a single embedding store
* */
case class EnabledMemCacheParams(ttl: Duration) extends MemCacheParams
object DisabledMemCacheParams extends MemCacheParams
/*
* We use this MemcacheConfig as the single source to set up the memcache for all RMS use cases
* NO OVERRIDE FROM CLIENT
* */
object MemCacheConfig {
val keyHasher: KeyHasher = KeyHasher.FNV1A_64
val hashKeyPrefix: String = "RMS"
val simclustersEmbeddingCacheKeyBuilder =
SimClustersEmbeddingIdCacheKeyBuilder(keyHasher.hashKey, hashKeyPrefix)
val cacheParamsMap: Map[
(EmbeddingType, ModelVersion),
MemCacheParams
] = Map(
// Tweet Embeddings
(LogFavBasedTweet, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 10.minutes),
(LogFavBasedTweet, Model20m145k2020) -> EnabledMemCacheParams(ttl = 10.minutes),
(LogFavLongestL2EmbeddingTweet, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 10.minutes),
(LogFavLongestL2EmbeddingTweet, Model20m145k2020) -> EnabledMemCacheParams(ttl = 10.minutes),
// User - KnownFor Embeddings
(FavBasedProducer, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
(FavBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(FollowBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(AggregatableLogFavBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(RelaxedAggregatableLogFavBasedProducer, Model20m145kUpdated) -> EnabledMemCacheParams(ttl =
12.hours),
(RelaxedAggregatableLogFavBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl =
12.hours),
// User - InterestedIn Embeddings
(LogFavBasedUserInterestedInFromAPE, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(FollowBasedUserInterestedInFromAPE, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(FavBasedUserInterestedIn, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
(FavBasedUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(FollowBasedUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(LogFavBasedUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(FavBasedUserInterestedInFromPE, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
(FilteredUserInterestedIn, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
(FilteredUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(FilteredUserInterestedInFromPE, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
(UnfilteredUserInterestedIn, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours),
(UnfilteredUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(UserNextInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl =
30.minutes), //embedding is updated every 2 hours, keeping it lower to avoid staleness
(
LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE,
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(
LogFavBasedUserInterestedAverageAddressBookFromIIAPE,
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(
LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE,
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(
LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE,
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE,
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(
LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE,
Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
// Topic Embeddings
(FavTfgTopic, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
(LogFavBasedKgoApeTopic, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours),
)
def getCacheSetup(
embeddingType: EmbeddingType,
modelVersion: ModelVersion
): MemCacheParams = {
// When requested (embeddingType, modelVersion) doesn't exist, we return DisabledMemCacheParams
cacheParamsMap.getOrElse((embeddingType, modelVersion), DisabledMemCacheParams)
}
def getCacheKeyPrefix(embeddingType: EmbeddingType, modelVersion: ModelVersion) =
s"${embeddingType.value}_${modelVersion.value}_"
def getStatsName(embeddingType: EmbeddingType, modelVersion: ModelVersion) =
s"${embeddingType.name}_${modelVersion.name}_mem_cache"
/**
* Build a ReadableStore based on MemCacheConfig.
*
* If memcache is disabled, it will return a normal readable store wrapper of the rawStore,
* with SimClustersEmbedding as value;
* If memcache is enabled, it will return a ObservedMemcachedReadableStore wrapper of the rawStore,
* with memcache set up according to the EnabledMemCacheParams
* */
def buildMemCacheStoreForSimClustersEmbedding(
rawStore: ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding],
cacheClient: Client,
embeddingType: EmbeddingType,
modelVersion: ModelVersion,
stats: StatsReceiver
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
val cacheParams = getCacheSetup(embeddingType, modelVersion)
val store = cacheParams match {
case DisabledMemCacheParams => rawStore
case EnabledMemCacheParams(ttl) =>
val memCacheKeyPrefix = MemCacheConfig.getCacheKeyPrefix(
embeddingType,
modelVersion
)
val statsName = MemCacheConfig.getStatsName(
embeddingType,
modelVersion
)
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = rawStore,
cacheClient = cacheClient,
ttl = ttl
)(
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
statsReceiver = stats.scope(statsName),
keyToString = { k => memCacheKeyPrefix + k.toString }
)
}
store.mapValues(SimClustersEmbedding(_))
}
}

View File

@ -0,0 +1,25 @@
package com.twitter.representation_manager.common
import com.twitter.decider.Decider
import com.twitter.decider.RandomRecipient
import com.twitter.decider.Recipient
import com.twitter.simclusters_v2.common.DeciderGateBuilderWithIdHashing
import javax.inject.Inject
case class RepresentationManagerDecider @Inject() (decider: Decider) {
val deciderGateBuilder = new DeciderGateBuilderWithIdHashing(decider)
def isAvailable(feature: String, recipient: Option[Recipient]): Boolean = {
decider.isAvailable(feature, recipient)
}
/**
* When useRandomRecipient is set to false, the decider is either completely on or off.
* When useRandomRecipient is set to true, the decider is on for the specified % of traffic.
*/
def isAvailable(feature: String, useRandomRecipient: Boolean = true): Boolean = {
if (useRandomRecipient) isAvailable(feature, Some(RandomRecipient))
else isAvailable(feature, None)
}
}

View File

@ -0,0 +1,25 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"content-recommender/server/src/main/scala/com/twitter/contentrecommender:representation-manager-deps",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/readablestore",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/common",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/store",
"src/scala/com/twitter/ml/api/embedding",
"src/scala/com/twitter/simclusters_v2/common",
"src/scala/com/twitter/simclusters_v2/score",
"src/scala/com/twitter/simclusters_v2/summingbird/stores",
"src/scala/com/twitter/storehaus_internal/manhattan",
"src/scala/com/twitter/storehaus_internal/util",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"src/thrift/com/twitter/socialgraph:thrift-scala",
"storage/clients/manhattan/client/src/main/scala",
"tweetypie/src/scala/com/twitter/tweetypie/util",
],
)

View File

@ -0,0 +1,846 @@
package com.twitter.representation_manager.migration
import com.twitter.bijection.Injection
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.contentrecommender.store.ApeEntityEmbeddingStore
import com.twitter.contentrecommender.store.InterestsOptOutStore
import com.twitter.contentrecommender.store.SemanticCoreTopicSeedStore
import com.twitter.contentrecommender.twistly
import com.twitter.conversions.DurationOps._
import com.twitter.decider.Decider
import com.twitter.escherbird.util.uttclient.CacheConfigV2
import com.twitter.escherbird.util.uttclient.CachedUttClientV2
import com.twitter.escherbird.util.uttclient.UttClientCacheConfigsV2
import com.twitter.escherbird.utt.strato.thriftscala.Environment
import com.twitter.finagle.ThriftMux
import com.twitter.finagle.memcached.Client
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.finagle.mtls.client.MtlsStackClient.MtlsThriftMuxClientSyntax
import com.twitter.finagle.mux.ClientDiscardedRequestException
import com.twitter.finagle.service.ReqRep
import com.twitter.finagle.service.ResponseClass
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.thrift.ClientId
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.frigate.common.util.SeqLongInjection
import com.twitter.hashing.KeyHasher
import com.twitter.hermit.store.common.DeciderableReadableStore
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.interests.thriftscala.InterestsThriftService
import com.twitter.relevance_platform.common.injection.LZ4Injection
import com.twitter.relevance_platform.common.readablestore.ReadableStoreWithTimeout
import com.twitter.representation_manager.common.RepresentationManagerDecider
import com.twitter.representation_manager.store.DeciderConstants
import com.twitter.representation_manager.store.DeciderKey
import com.twitter.simclusters_v2.common.ModelVersions
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.common.SimClustersEmbeddingIdCacheKeyBuilder
import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
import com.twitter.simclusters_v2.summingbird.stores.PersistentTweetEmbeddingStore
import com.twitter.simclusters_v2.summingbird.stores.ProducerClusterEmbeddingReadableStores
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore
import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.twitter.simclusters_v2.thriftscala.ModelVersion.Model20m145k2020
import com.twitter.simclusters_v2.thriftscala.ModelVersion.Model20m145kUpdated
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.simclusters_v2.thriftscala.SimClustersMultiEmbedding
import com.twitter.simclusters_v2.thriftscala.SimClustersMultiEmbeddingId
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Athena
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import com.twitter.strato.client.Strato
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.strato.thrift.ScroogeConvImplicits._
import com.twitter.tweetypie.util.UserId
import com.twitter.util.Duration
import com.twitter.util.Future
import com.twitter.util.Throw
import com.twitter.util.Timer
import javax.inject.Inject
import javax.inject.Named
import scala.reflect.ClassTag
class LegacyRMS @Inject() (
serviceIdentifier: ServiceIdentifier,
cacheClient: Client,
stats: StatsReceiver,
decider: Decider,
clientId: ClientId,
timer: Timer,
@Named("cacheHashKeyPrefix") val cacheHashKeyPrefix: String = "RMS",
@Named("useContentRecommenderConfiguration") val useContentRecommenderConfiguration: Boolean =
false) {
private val mhMtlsParams: ManhattanKVClientMtlsParams = ManhattanKVClientMtlsParams(
serviceIdentifier)
private val rmsDecider = RepresentationManagerDecider(decider)
val keyHasher: KeyHasher = KeyHasher.FNV1A_64
private val embeddingCacheKeyBuilder =
SimClustersEmbeddingIdCacheKeyBuilder(keyHasher.hashKey, cacheHashKeyPrefix)
private val statsReceiver = stats.scope("representation_management")
// Strato client, default timeout = 280ms
val stratoClient: StratoClient =
Strato.client
.withMutualTls(serviceIdentifier)
.build()
// Builds ThriftMux client builder for Content-Recommender service
private def makeThriftClientBuilder(
requestTimeout: Duration
): ThriftMux.Client = {
ThriftMux.client
.withClientId(clientId)
.withMutualTls(serviceIdentifier)
.withRequestTimeout(requestTimeout)
.withStatsReceiver(statsReceiver.scope("clnt"))
.withResponseClassifier {
case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable
}
}
private def makeThriftClient[ThriftServiceType: ClassTag](
dest: String,
label: String,
requestTimeout: Duration = 450.milliseconds
): ThriftServiceType = {
makeThriftClientBuilder(requestTimeout)
.build[ThriftServiceType](dest, label)
}
/** *** SimCluster Embedding Stores ******/
implicit val simClustersEmbeddingIdInjection: Injection[SimClustersEmbeddingId, Array[Byte]] =
BinaryScalaCodec(SimClustersEmbeddingId)
implicit val simClustersEmbeddingInjection: Injection[ThriftSimClustersEmbedding, Array[Byte]] =
BinaryScalaCodec(ThriftSimClustersEmbedding)
implicit val simClustersMultiEmbeddingInjection: Injection[SimClustersMultiEmbedding, Array[
Byte
]] =
BinaryScalaCodec(SimClustersMultiEmbedding)
implicit val simClustersMultiEmbeddingIdInjection: Injection[SimClustersMultiEmbeddingId, Array[
Byte
]] =
BinaryScalaCodec(SimClustersMultiEmbeddingId)
def getEmbeddingsDataset(
mhMtlsParams: ManhattanKVClientMtlsParams,
datasetName: String
): ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding] = {
ManhattanRO.getReadableStoreWithMtls[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("content_recommender_athena"),
DatasetName(datasetName), // this should be correct
Athena
),
mhMtlsParams
)
}
lazy val logFavBasedLongestL2Tweet20M145K2020EmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore =
PersistentTweetEmbeddingStore
.longestL2NormTweetEmbeddingStoreManhattan(
mhMtlsParams,
PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset,
statsReceiver,
maxLength = 10,
).mapValues(_.toThrift)
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
backingStore = rawStore,
cacheClient = cacheClient,
ttl = 15.minutes
)(
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
statsReceiver =
statsReceiver.scope("log_fav_based_longest_l2_tweet_embedding_20m145k2020_mem_cache"),
keyToString = { k =>
s"scez_l2:${LogFavBasedTweet}_${ModelVersions.Model20M145K2020}_$k"
}
)
val inMemoryCacheStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
memcachedStore
.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(
LogFavLongestL2EmbeddingTweet,
Model20m145k2020,
InternalId.TweetId(tweetId)) =>
tweetId
}
.mapValues(SimClustersEmbedding(_))
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
inMemoryCacheStore,
ttl = 12.minute,
maxKeys = 1048575,
cacheName = "log_fav_based_longest_l2_tweet_embedding_20m145k2020_cache",
windowSize = 10000L
)(statsReceiver.scope("log_fav_based_longest_l2_tweet_embedding_20m145k2020_store"))
}
lazy val logFavBased20M145KUpdatedTweetEmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore =
PersistentTweetEmbeddingStore
.mostRecentTweetEmbeddingStoreManhattan(
mhMtlsParams,
PersistentTweetEmbeddingStore.LogFavBased20m145kUpdatedDataset,
statsReceiver
).mapValues(_.toThrift)
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
backingStore = rawStore,
cacheClient = cacheClient,
ttl = 10.minutes
)(
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
statsReceiver = statsReceiver.scope("log_fav_based_tweet_embedding_mem_cache"),
keyToString = { k =>
// SimClusters_embedding_LZ4/embeddingType_modelVersion_tweetId
s"scez:${LogFavBasedTweet}_${ModelVersions.Model20M145KUpdated}_$k"
}
)
val inMemoryCacheStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
memcachedStore
.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(
LogFavBasedTweet,
Model20m145kUpdated,
InternalId.TweetId(tweetId)) =>
tweetId
}
.mapValues(SimClustersEmbedding(_))
}
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
inMemoryCacheStore,
ttl = 5.minute,
maxKeys = 1048575, // 200MB
cacheName = "log_fav_based_tweet_embedding_cache",
windowSize = 10000L
)(statsReceiver.scope("log_fav_based_tweet_embedding_store"))
}
lazy val logFavBased20M145K2020TweetEmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore =
PersistentTweetEmbeddingStore
.mostRecentTweetEmbeddingStoreManhattan(
mhMtlsParams,
PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset,
statsReceiver,
maxLength = 10,
).mapValues(_.toThrift)
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
backingStore = rawStore,
cacheClient = cacheClient,
ttl = 15.minutes
)(
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
statsReceiver = statsReceiver.scope("log_fav_based_tweet_embedding_20m145k2020_mem_cache"),
keyToString = { k =>
// SimClusters_embedding_LZ4/embeddingType_modelVersion_tweetId
s"scez:${LogFavBasedTweet}_${ModelVersions.Model20M145K2020}_$k"
}
)
val inMemoryCacheStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
memcachedStore
.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(
LogFavBasedTweet,
Model20m145k2020,
InternalId.TweetId(tweetId)) =>
tweetId
}
.mapValues(SimClustersEmbedding(_))
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
inMemoryCacheStore,
ttl = 12.minute,
maxKeys = 16777215,
cacheName = "log_fav_based_tweet_embedding_20m145k2020_cache",
windowSize = 10000L
)(statsReceiver.scope("log_fav_based_tweet_embedding_20m145k2020_store"))
}
lazy val favBasedTfgTopicEmbedding2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val stratoStore =
StratoFetchableStore
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
stratoClient,
"recommendations/simclusters_v2/embeddings/favBasedTFGTopic20M145K2020")
val truncatedStore = stratoStore.mapValues { embedding =>
SimClustersEmbedding(embedding, truncate = 50)
}
ObservedCachedReadableStore.from(
ObservedReadableStore(truncatedStore)(
statsReceiver.scope("fav_tfg_topic_embedding_2020_cache_backing_store")),
ttl = 12.hours,
maxKeys = 262143, // 200MB
cacheName = "fav_tfg_topic_embedding_2020_cache",
windowSize = 10000L
)(statsReceiver.scope("fav_tfg_topic_embedding_2020_cache"))
}
lazy val logFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
ObservedReadableStore(
StratoFetchableStore
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
stratoClient,
"recommendations/simclusters_v2/embeddings/logFavBasedAPE20M145K2020")
.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(
AggregatableLogFavBasedProducer,
Model20m145k2020,
internalId) =>
SimClustersEmbeddingId(AggregatableLogFavBasedProducer, Model20m145k2020, internalId)
}
.mapValues(embedding => SimClustersEmbedding(embedding, 50))
)(statsReceiver.scope("aggregatable_producer_embeddings_by_logfav_score_2020"))
}
val interestService: InterestsThriftService.MethodPerEndpoint =
makeThriftClient[InterestsThriftService.MethodPerEndpoint](
"/s/interests-thrift-service/interests-thrift-service",
"interests_thrift_service"
)
val interestsOptOutStore: InterestsOptOutStore = InterestsOptOutStore(interestService)
// Save 2 ^ 18 UTTs. Promising 100% cache rate
lazy val defaultCacheConfigV2: CacheConfigV2 = CacheConfigV2(262143)
lazy val uttClientCacheConfigsV2: UttClientCacheConfigsV2 = UttClientCacheConfigsV2(
getTaxonomyConfig = defaultCacheConfigV2,
getUttTaxonomyConfig = defaultCacheConfigV2,
getLeafIds = defaultCacheConfigV2,
getLeafUttEntities = defaultCacheConfigV2
)
// CachedUttClient to use StratoClient
lazy val cachedUttClientV2: CachedUttClientV2 = new CachedUttClientV2(
stratoClient = stratoClient,
env = Environment.Prod,
cacheConfigs = uttClientCacheConfigsV2,
statsReceiver = statsReceiver.scope("cached_utt_client")
)
lazy val semanticCoreTopicSeedStore: ReadableStore[
SemanticCoreTopicSeedStore.Key,
Seq[UserId]
] = {
/*
Up to 1000 Long seeds per topic/language = 62.5kb per topic/language (worst case)
Assume ~10k active topic/languages ~= 650MB (worst case)
*/
val underlying = new SemanticCoreTopicSeedStore(cachedUttClientV2, interestsOptOutStore)(
statsReceiver.scope("semantic_core_topic_seed_store"))
val memcacheStore = ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlying,
cacheClient = cacheClient,
ttl = 12.hours
)(
valueInjection = SeqLongInjection,
statsReceiver = statsReceiver.scope("topic_producer_seed_store_mem_cache"),
keyToString = { k => s"tpss:${k.entityId}_${k.languageCode}" }
)
ObservedCachedReadableStore.from[SemanticCoreTopicSeedStore.Key, Seq[UserId]](
store = memcacheStore,
ttl = 6.hours,
maxKeys = 20e3.toInt,
cacheName = "topic_producer_seed_store_cache",
windowSize = 5000
)(statsReceiver.scope("topic_producer_seed_store_cache"))
}
lazy val logFavBasedApeEntity20M145K2020EmbeddingStore: ApeEntityEmbeddingStore = {
val apeStore = logFavBasedApe20M145K2020EmbeddingStore.composeKeyMapping[UserId]({ id =>
SimClustersEmbeddingId(
AggregatableLogFavBasedProducer,
Model20m145k2020,
InternalId.UserId(id))
})
new ApeEntityEmbeddingStore(
semanticCoreSeedStore = semanticCoreTopicSeedStore,
aggregatableProducerEmbeddingStore = apeStore,
statsReceiver = statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_store"))
}
lazy val logFavBasedApeEntity20M145K2020EmbeddingCachedStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val truncatedStore =
logFavBasedApeEntity20M145K2020EmbeddingStore.mapValues(_.truncate(50).toThrift)
val memcachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = truncatedStore,
cacheClient = cacheClient,
ttl = 12.hours
)(
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
statsReceiver = statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_mem_cache"),
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
).mapValues(SimClustersEmbedding(_))
val inMemoryCachedStore =
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
memcachedStore,
ttl = 6.hours,
maxKeys = 262143,
cacheName = "log_fav_based_ape_entity_2020_embedding_cache",
windowSize = 10000L
)(statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_cached_store"))
DeciderableReadableStore(
inMemoryCachedStore,
rmsDecider.deciderGateBuilder.idGateWithHashing[SimClustersEmbeddingId](
DeciderKey.enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore),
statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_deciderable_store")
)
}
lazy val relaxedLogFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
ObservedReadableStore(
StratoFetchableStore
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
stratoClient,
"recommendations/simclusters_v2/embeddings/logFavBasedAPERelaxedFavEngagementThreshold20M145K2020")
.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(
RelaxedAggregatableLogFavBasedProducer,
Model20m145k2020,
internalId) =>
SimClustersEmbeddingId(
RelaxedAggregatableLogFavBasedProducer,
Model20m145k2020,
internalId)
}
.mapValues(embedding => SimClustersEmbedding(embedding).truncate(50))
)(statsReceiver.scope(
"aggregatable_producer_embeddings_by_logfav_score_relaxed_fav_engagement_threshold_2020"))
}
lazy val relaxedLogFavBasedApe20M145K2020EmbeddingCachedStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val truncatedStore =
relaxedLogFavBasedApe20M145K2020EmbeddingStore.mapValues(_.truncate(50).toThrift)
val memcachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = truncatedStore,
cacheClient = cacheClient,
ttl = 12.hours
)(
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
statsReceiver =
statsReceiver.scope("relaxed_log_fav_based_ape_entity_2020_embedding_mem_cache"),
keyToString = { k: SimClustersEmbeddingId => embeddingCacheKeyBuilder.apply(k) }
).mapValues(SimClustersEmbedding(_))
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
memcachedStore,
ttl = 6.hours,
maxKeys = 262143,
cacheName = "relaxed_log_fav_based_ape_entity_2020_embedding_cache",
windowSize = 10000L
)(statsReceiver.scope("relaxed_log_fav_based_ape_entity_2020_embedding_cache_store"))
}
lazy val favBasedProducer20M145K2020EmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val underlyingStore = ProducerClusterEmbeddingReadableStores
.getProducerTopKSimClusters2020EmbeddingsStore(
mhMtlsParams
).composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(
FavBasedProducer,
Model20m145k2020,
InternalId.UserId(userId)) =>
userId
}.mapValues { topSimClustersWithScore =>
ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters.take(10))
}
// same memcache config as for favBasedUserInterestedIn20M145K2020Store
val memcachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = underlyingStore,
cacheClient = cacheClient,
ttl = 24.hours
)(
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
statsReceiver = statsReceiver.scope("fav_based_producer_embedding_20M_145K_2020_mem_cache"),
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
).mapValues(SimClustersEmbedding(_))
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
memcachedStore,
ttl = 12.hours,
maxKeys = 16777215,
cacheName = "fav_based_producer_embedding_20M_145K_2020_embedding_cache",
windowSize = 10000L
)(statsReceiver.scope("fav_based_producer_embedding_20M_145K_2020_embedding_store"))
}
// Production
lazy val interestedIn20M145KUpdatedStore: ReadableStore[UserId, ClustersUserIsInterestedIn] = {
UserInterestedInReadableStore.defaultStoreWithMtls(
mhMtlsParams,
modelVersion = ModelVersions.Model20M145KUpdated
)
}
// Production
lazy val interestedIn20M145K2020Store: ReadableStore[UserId, ClustersUserIsInterestedIn] = {
UserInterestedInReadableStore.defaultStoreWithMtls(
mhMtlsParams,
modelVersion = ModelVersions.Model20M145K2020
)
}
// Production
lazy val InterestedInFromPE20M145KUpdatedStore: ReadableStore[
UserId,
ClustersUserIsInterestedIn
] = {
UserInterestedInReadableStore.defaultIIPEStoreWithMtls(
mhMtlsParams,
modelVersion = ModelVersions.Model20M145KUpdated)
}
lazy val simClustersInterestedInStore: ReadableStore[
(UserId, ModelVersion),
ClustersUserIsInterestedIn
] = {
new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] {
override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = {
k match {
case (userId, Model20m145kUpdated) =>
interestedIn20M145KUpdatedStore.get(userId)
case (userId, Model20m145k2020) =>
interestedIn20M145K2020Store.get(userId)
case _ =>
Future.None
}
}
}
}
lazy val simClustersInterestedInFromProducerEmbeddingsStore: ReadableStore[
(UserId, ModelVersion),
ClustersUserIsInterestedIn
] = {
new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] {
override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = {
k match {
case (userId, ModelVersion.Model20m145kUpdated) =>
InterestedInFromPE20M145KUpdatedStore.get(userId)
case _ =>
Future.None
}
}
}
}
lazy val userInterestedInStore =
new twistly.interestedin.EmbeddingStore(
interestedInStore = simClustersInterestedInStore,
interestedInFromProducerEmbeddingStore = simClustersInterestedInFromProducerEmbeddingsStore,
statsReceiver = statsReceiver
)
// Production
lazy val favBasedUserInterestedIn20M145KUpdatedStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val underlyingStore =
UserInterestedInReadableStore
.defaultSimClustersEmbeddingStoreWithMtls(
mhMtlsParams,
EmbeddingType.FavBasedUserInterestedIn,
ModelVersion.Model20m145kUpdated)
.mapValues(_.toThrift)
val memcachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = underlyingStore,
cacheClient = cacheClient,
ttl = 12.hours
)(
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
statsReceiver = statsReceiver.scope("fav_based_user_interested_in_mem_cache"),
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
).mapValues(SimClustersEmbedding(_))
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
memcachedStore,
ttl = 6.hours,
maxKeys = 262143,
cacheName = "fav_based_user_interested_in_cache",
windowSize = 10000L
)(statsReceiver.scope("fav_based_user_interested_in_store"))
}
// Production
lazy val LogFavBasedInterestedInFromAPE20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val underlyingStore =
UserInterestedInReadableStore
.defaultIIAPESimClustersEmbeddingStoreWithMtls(
mhMtlsParams,
EmbeddingType.LogFavBasedUserInterestedInFromAPE,
ModelVersion.Model20m145k2020)
.mapValues(_.toThrift)
val memcachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = underlyingStore,
cacheClient = cacheClient,
ttl = 12.hours
)(
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
statsReceiver = statsReceiver.scope("log_fav_based_user_interested_in_from_ape_mem_cache"),
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
).mapValues(SimClustersEmbedding(_))
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
memcachedStore,
ttl = 6.hours,
maxKeys = 262143,
cacheName = "log_fav_based_user_interested_in_from_ape_cache",
windowSize = 10000L
)(statsReceiver.scope("log_fav_based_user_interested_in_from_ape_store"))
}
// Production
lazy val FollowBasedInterestedInFromAPE20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val underlyingStore =
UserInterestedInReadableStore
.defaultIIAPESimClustersEmbeddingStoreWithMtls(
mhMtlsParams,
EmbeddingType.FollowBasedUserInterestedInFromAPE,
ModelVersion.Model20m145k2020)
.mapValues(_.toThrift)
val memcachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = underlyingStore,
cacheClient = cacheClient,
ttl = 12.hours
)(
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
statsReceiver = statsReceiver.scope("follow_based_user_interested_in_from_ape_mem_cache"),
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
).mapValues(SimClustersEmbedding(_))
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
memcachedStore,
ttl = 6.hours,
maxKeys = 262143,
cacheName = "follow_based_user_interested_in_from_ape_cache",
windowSize = 10000L
)(statsReceiver.scope("follow_based_user_interested_in_from_ape_store"))
}
// production
lazy val favBasedUserInterestedIn20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val underlyingStore: ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding] =
UserInterestedInReadableStore
.defaultSimClustersEmbeddingStoreWithMtls(
mhMtlsParams,
EmbeddingType.FavBasedUserInterestedIn,
ModelVersion.Model20m145k2020).mapValues(_.toThrift)
ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = underlyingStore,
cacheClient = cacheClient,
ttl = 12.hours
)(
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
statsReceiver = statsReceiver.scope("fav_based_user_interested_in_2020_mem_cache"),
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
).mapValues(SimClustersEmbedding(_))
}
// Production
lazy val logFavBasedUserInterestedIn20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val underlyingStore =
UserInterestedInReadableStore
.defaultSimClustersEmbeddingStoreWithMtls(
mhMtlsParams,
EmbeddingType.LogFavBasedUserInterestedIn,
ModelVersion.Model20m145k2020)
val memcachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = underlyingStore.mapValues(_.toThrift),
cacheClient = cacheClient,
ttl = 12.hours
)(
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
statsReceiver = statsReceiver.scope("log_fav_based_user_interested_in_2020_store"),
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
).mapValues(SimClustersEmbedding(_))
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
memcachedStore,
ttl = 6.hours,
maxKeys = 262143,
cacheName = "log_fav_based_user_interested_in_2020_cache",
windowSize = 10000L
)(statsReceiver.scope("log_fav_based_user_interested_in_2020_store"))
}
// Production
lazy val favBasedUserInterestedInFromPE20M145KUpdatedStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val underlyingStore =
UserInterestedInReadableStore
.defaultIIPESimClustersEmbeddingStoreWithMtls(
mhMtlsParams,
EmbeddingType.FavBasedUserInterestedInFromPE,
ModelVersion.Model20m145kUpdated)
.mapValues(_.toThrift)
val memcachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = underlyingStore,
cacheClient = cacheClient,
ttl = 12.hours
)(
valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)),
statsReceiver = statsReceiver.scope("fav_based_user_interested_in_from_pe_mem_cache"),
keyToString = { k => embeddingCacheKeyBuilder.apply(k) }
).mapValues(SimClustersEmbedding(_))
ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding](
memcachedStore,
ttl = 6.hours,
maxKeys = 262143,
cacheName = "fav_based_user_interested_in_from_pe_cache",
windowSize = 10000L
)(statsReceiver.scope("fav_based_user_interested_in_from_pe_cache"))
}
private val underlyingStores: Map[
(EmbeddingType, ModelVersion),
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
] = Map(
// Tweet Embeddings
(LogFavBasedTweet, Model20m145kUpdated) -> logFavBased20M145KUpdatedTweetEmbeddingStore,
(LogFavBasedTweet, Model20m145k2020) -> logFavBased20M145K2020TweetEmbeddingStore,
(
LogFavLongestL2EmbeddingTweet,
Model20m145k2020) -> logFavBasedLongestL2Tweet20M145K2020EmbeddingStore,
// Entity Embeddings
(FavTfgTopic, Model20m145k2020) -> favBasedTfgTopicEmbedding2020Store,
(
LogFavBasedKgoApeTopic,
Model20m145k2020) -> logFavBasedApeEntity20M145K2020EmbeddingCachedStore,
// KnownFor Embeddings
(FavBasedProducer, Model20m145k2020) -> favBasedProducer20M145K2020EmbeddingStore,
(
RelaxedAggregatableLogFavBasedProducer,
Model20m145k2020) -> relaxedLogFavBasedApe20M145K2020EmbeddingCachedStore,
// InterestedIn Embeddings
(
LogFavBasedUserInterestedInFromAPE,
Model20m145k2020) -> LogFavBasedInterestedInFromAPE20M145K2020Store,
(
FollowBasedUserInterestedInFromAPE,
Model20m145k2020) -> FollowBasedInterestedInFromAPE20M145K2020Store,
(FavBasedUserInterestedIn, Model20m145kUpdated) -> favBasedUserInterestedIn20M145KUpdatedStore,
(FavBasedUserInterestedIn, Model20m145k2020) -> favBasedUserInterestedIn20M145K2020Store,
(LogFavBasedUserInterestedIn, Model20m145k2020) -> logFavBasedUserInterestedIn20M145K2020Store,
(
FavBasedUserInterestedInFromPE,
Model20m145kUpdated) -> favBasedUserInterestedInFromPE20M145KUpdatedStore,
(FilteredUserInterestedIn, Model20m145kUpdated) -> userInterestedInStore,
(FilteredUserInterestedIn, Model20m145k2020) -> userInterestedInStore,
(FilteredUserInterestedInFromPE, Model20m145kUpdated) -> userInterestedInStore,
(UnfilteredUserInterestedIn, Model20m145kUpdated) -> userInterestedInStore,
(UnfilteredUserInterestedIn, Model20m145k2020) -> userInterestedInStore,
)
val simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
val underlying: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
SimClustersEmbeddingStore.buildWithDecider(
underlyingStores = underlyingStores,
decider = rmsDecider.decider,
statsReceiver = statsReceiver.scope("simClusters_embeddings_store_deciderable")
)
val underlyingWithTimeout: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
new ReadableStoreWithTimeout(
rs = underlying,
decider = rmsDecider.decider,
enableTimeoutDeciderKey = DeciderConstants.enableSimClustersEmbeddingStoreTimeouts,
timeoutValueKey = DeciderConstants.simClustersEmbeddingStoreTimeoutValueMillis,
timer = timer,
statsReceiver = statsReceiver.scope("simClusters_embedding_store_timeouts")
)
ObservedReadableStore(
store = underlyingWithTimeout
)(statsReceiver.scope("simClusters_embeddings_store"))
}
}

View File

@ -0,0 +1,18 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
"finagle/finagle-stats",
"finatra/inject/inject-core/src/main/scala",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
"interests-service/thrift/src/main/thrift:thrift-scala",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/common",
"servo/util",
"src/scala/com/twitter/storehaus_internal/manhattan",
"src/scala/com/twitter/storehaus_internal/memcache",
"src/scala/com/twitter/storehaus_internal/util",
"strato/src/main/scala/com/twitter/strato/client",
],
)

View File

@ -0,0 +1,34 @@
package com.twitter.representation_manager.modules
import com.google.inject.Provides
import com.twitter.finagle.memcached.Client
import javax.inject.Singleton
import com.twitter.conversions.DurationOps._
import com.twitter.inject.TwitterModule
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.storehaus_internal.memcache.MemcacheStore
import com.twitter.storehaus_internal.util.ClientName
import com.twitter.storehaus_internal.util.ZkEndPoint
object CacheModule extends TwitterModule {
private val cacheDest = flag[String]("cache_module.dest", "Path to memcache service")
private val timeout = flag[Int]("memcache.timeout", "Memcache client timeout")
private val retries = flag[Int]("memcache.retries", "Memcache timeout retries")
@Singleton
@Provides
def providesCache(
serviceIdentifier: ServiceIdentifier,
stats: StatsReceiver
): Client =
MemcacheStore.memcachedClient(
name = ClientName("memcache_representation_manager"),
dest = ZkEndPoint(cacheDest()),
timeout = timeout().milliseconds,
retries = retries(),
statsReceiver = stats.scope("cache_client"),
serviceIdentifier = serviceIdentifier
)
}

View File

@ -0,0 +1,40 @@
package com.twitter.representation_manager.modules
import com.google.inject.Provides
import com.twitter.conversions.DurationOps._
import com.twitter.finagle.ThriftMux
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.finagle.mtls.client.MtlsStackClient.MtlsThriftMuxClientSyntax
import com.twitter.finagle.mux.ClientDiscardedRequestException
import com.twitter.finagle.service.ReqRep
import com.twitter.finagle.service.ResponseClass
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.thrift.ClientId
import com.twitter.inject.TwitterModule
import com.twitter.interests.thriftscala.InterestsThriftService
import com.twitter.util.Throw
import javax.inject.Singleton
object InterestsThriftClientModule extends TwitterModule {
@Singleton
@Provides
def providesInterestsThriftClient(
clientId: ClientId,
serviceIdentifier: ServiceIdentifier,
statsReceiver: StatsReceiver
): InterestsThriftService.MethodPerEndpoint = {
ThriftMux.client
.withClientId(clientId)
.withMutualTls(serviceIdentifier)
.withRequestTimeout(450.milliseconds)
.withStatsReceiver(statsReceiver.scope("InterestsThriftClient"))
.withResponseClassifier {
case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable
}
.build[InterestsThriftService.MethodPerEndpoint](
dest = "/s/interests-thrift-service/interests-thrift-service",
label = "interests_thrift_service"
)
}
}

View File

@ -0,0 +1,18 @@
package com.twitter.representation_manager.modules
import com.google.inject.Provides
import com.twitter.inject.TwitterModule
import javax.inject.Named
import javax.inject.Singleton
object LegacyRMSConfigModule extends TwitterModule {
@Singleton
@Provides
@Named("cacheHashKeyPrefix")
def providesCacheHashKeyPrefix: String = "RMS"
@Singleton
@Provides
@Named("useContentRecommenderConfiguration")
def providesUseContentRecommenderConfiguration: Boolean = false
}

View File

@ -0,0 +1,24 @@
package com.twitter.representation_manager.modules
import com.google.inject.Provides
import javax.inject.Singleton
import com.twitter.inject.TwitterModule
import com.twitter.decider.Decider
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.representation_manager.common.RepresentationManagerDecider
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
object StoreModule extends TwitterModule {
@Singleton
@Provides
def providesMhMtlsParams(
serviceIdentifier: ServiceIdentifier
): ManhattanKVClientMtlsParams = ManhattanKVClientMtlsParams(serviceIdentifier)
@Singleton
@Provides
def providesRmsDecider(
decider: Decider
): RepresentationManagerDecider = RepresentationManagerDecider(decider)
}

View File

@ -0,0 +1,13 @@
package com.twitter.representation_manager.modules
import com.google.inject.Provides
import com.twitter.finagle.util.DefaultTimer
import com.twitter.inject.TwitterModule
import com.twitter.util.Timer
import javax.inject.Singleton
object TimerModule extends TwitterModule {
@Singleton
@Provides
def providesTimer: Timer = DefaultTimer
}

View File

@ -0,0 +1,39 @@
package com.twitter.representation_manager.modules
import com.google.inject.Provides
import com.twitter.escherbird.util.uttclient.CacheConfigV2
import com.twitter.escherbird.util.uttclient.CachedUttClientV2
import com.twitter.escherbird.util.uttclient.UttClientCacheConfigsV2
import com.twitter.escherbird.utt.strato.thriftscala.Environment
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.strato.client.{Client => StratoClient}
import javax.inject.Singleton
object UttClientModule extends TwitterModule {
@Singleton
@Provides
def providesUttClient(
stratoClient: StratoClient,
statsReceiver: StatsReceiver
): CachedUttClientV2 = {
// Save 2 ^ 18 UTTs. Promising 100% cache rate
val defaultCacheConfigV2: CacheConfigV2 = CacheConfigV2(262143)
val uttClientCacheConfigsV2: UttClientCacheConfigsV2 = UttClientCacheConfigsV2(
getTaxonomyConfig = defaultCacheConfigV2,
getUttTaxonomyConfig = defaultCacheConfigV2,
getLeafIds = defaultCacheConfigV2,
getLeafUttEntities = defaultCacheConfigV2
)
// CachedUttClient to use StratoClient
new CachedUttClientV2(
stratoClient = stratoClient,
env = Environment.Prod,
cacheConfigs = uttClientCacheConfigsV2,
statsReceiver = statsReceiver.scope("cached_utt_client")
)
}
}

View File

@ -0,0 +1,16 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"content-recommender/server/src/main/scala/com/twitter/contentrecommender:representation-manager-deps",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/common",
"src/scala/com/twitter/simclusters_v2/stores",
"src/scala/com/twitter/simclusters_v2/summingbird/stores",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"storage/clients/manhattan/client/src/main/scala",
"tweetypie/src/scala/com/twitter/tweetypie/util",
],
)

View File

@ -0,0 +1,39 @@
package com.twitter.representation_manager.store
import com.twitter.servo.decider.DeciderKeyEnum
object DeciderConstants {
// Deciders inherited from CR and RSX and only used in LegacyRMS
// Their value are manipulated by CR and RSX's yml file and their decider dashboard
// We will remove them after migration completed
val enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore =
"enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore"
val enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore =
"enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore"
val enablelogFavBased20M145K2020TweetEmbeddingStoreTimeouts =
"enable_log_fav_based_tweet_embedding_20m145k2020_timeouts"
val logFavBased20M145K2020TweetEmbeddingStoreTimeoutValueMillis =
"log_fav_based_tweet_embedding_20m145k2020_timeout_value_millis"
val enablelogFavBased20M145KUpdatedTweetEmbeddingStoreTimeouts =
"enable_log_fav_based_tweet_embedding_20m145kUpdated_timeouts"
val logFavBased20M145KUpdatedTweetEmbeddingStoreTimeoutValueMillis =
"log_fav_based_tweet_embedding_20m145kUpdated_timeout_value_millis"
val enableSimClustersEmbeddingStoreTimeouts = "enable_sim_clusters_embedding_store_timeouts"
val simClustersEmbeddingStoreTimeoutValueMillis =
"sim_clusters_embedding_store_timeout_value_millis"
}
// Necessary for using servo Gates
object DeciderKey extends DeciderKeyEnum {
val enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore: Value = Value(
DeciderConstants.enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore
)
val enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore: Value = Value(
DeciderConstants.enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore
)
}

View File

@ -0,0 +1,198 @@
package com.twitter.representation_manager.store
import com.twitter.contentrecommender.store.ApeEntityEmbeddingStore
import com.twitter.contentrecommender.store.InterestsOptOutStore
import com.twitter.contentrecommender.store.SemanticCoreTopicSeedStore
import com.twitter.conversions.DurationOps._
import com.twitter.escherbird.util.uttclient.CachedUttClientV2
import com.twitter.finagle.memcached.Client
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.frigate.common.util.SeqLongInjection
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.interests.thriftscala.InterestsThriftService
import com.twitter.representation_manager.common.MemCacheConfig
import com.twitter.representation_manager.common.RepresentationManagerDecider
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.twitter.simclusters_v2.thriftscala.ModelVersion._
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.simclusters_v2.thriftscala.TopicId
import com.twitter.simclusters_v2.thriftscala.LocaleEntityId
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.tweetypie.util.UserId
import javax.inject.Inject
class TopicSimClustersEmbeddingStore @Inject() (
stratoClient: StratoClient,
cacheClient: Client,
globalStats: StatsReceiver,
mhMtlsParams: ManhattanKVClientMtlsParams,
rmsDecider: RepresentationManagerDecider,
interestService: InterestsThriftService.MethodPerEndpoint,
uttClient: CachedUttClientV2) {
private val stats = globalStats.scope(this.getClass.getSimpleName)
private val interestsOptOutStore = InterestsOptOutStore(interestService)
/**
* Note this is NOT an embedding store. It is a list of author account ids we use to represent
* topics
*/
private val semanticCoreTopicSeedStore: ReadableStore[
SemanticCoreTopicSeedStore.Key,
Seq[UserId]
] = {
/*
Up to 1000 Long seeds per topic/language = 62.5kb per topic/language (worst case)
Assume ~10k active topic/languages ~= 650MB (worst case)
*/
val underlying = new SemanticCoreTopicSeedStore(uttClient, interestsOptOutStore)(
stats.scope("semantic_core_topic_seed_store"))
val memcacheStore = ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlying,
cacheClient = cacheClient,
ttl = 12.hours)(
valueInjection = SeqLongInjection,
statsReceiver = stats.scope("topic_producer_seed_store_mem_cache"),
keyToString = { k => s"tpss:${k.entityId}_${k.languageCode}" }
)
ObservedCachedReadableStore.from[SemanticCoreTopicSeedStore.Key, Seq[UserId]](
store = memcacheStore,
ttl = 6.hours,
maxKeys = 20e3.toInt,
cacheName = "topic_producer_seed_store_cache",
windowSize = 5000
)(stats.scope("topic_producer_seed_store_cache"))
}
private val favBasedTfgTopicEmbedding20m145k2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore =
StratoFetchableStore
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
stratoClient,
"recommendations/simclusters_v2/embeddings/favBasedTFGTopic20M145K2020").mapValues(
embedding => SimClustersEmbedding(embedding, truncate = 50).toThrift)
.composeKeyMapping[LocaleEntityId] { localeEntityId =>
SimClustersEmbeddingId(
FavTfgTopic,
Model20m145k2020,
InternalId.LocaleEntityId(localeEntityId))
}
buildLocaleEntityIdMemCacheStore(rawStore, FavTfgTopic, Model20m145k2020)
}
private val logFavBasedApeEntity20M145K2020EmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val apeStore = StratoFetchableStore
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
stratoClient,
"recommendations/simclusters_v2/embeddings/logFavBasedAPE20M145K2020")
.mapValues(embedding => SimClustersEmbedding(embedding, truncate = 50))
.composeKeyMapping[UserId]({ id =>
SimClustersEmbeddingId(
AggregatableLogFavBasedProducer,
Model20m145k2020,
InternalId.UserId(id))
})
val rawStore = new ApeEntityEmbeddingStore(
semanticCoreSeedStore = semanticCoreTopicSeedStore,
aggregatableProducerEmbeddingStore = apeStore,
statsReceiver = stats.scope("log_fav_based_ape_entity_2020_embedding_store"))
.mapValues(embedding => SimClustersEmbedding(embedding.toThrift, truncate = 50).toThrift)
.composeKeyMapping[TopicId] { topicId =>
SimClustersEmbeddingId(
LogFavBasedKgoApeTopic,
Model20m145k2020,
InternalId.TopicId(topicId))
}
buildTopicIdMemCacheStore(rawStore, LogFavBasedKgoApeTopic, Model20m145k2020)
}
private def buildTopicIdMemCacheStore(
rawStore: ReadableStore[TopicId, ThriftSimClustersEmbedding],
embeddingType: EmbeddingType,
modelVersion: ModelVersion
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
val observedStore: ObservedReadableStore[TopicId, ThriftSimClustersEmbedding] =
ObservedReadableStore(
store = rawStore
)(stats.scope(embeddingType.name).scope(modelVersion.name))
val storeWithKeyMapping = observedStore.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(_, _, InternalId.TopicId(topicId)) =>
topicId
}
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
storeWithKeyMapping,
cacheClient,
embeddingType,
modelVersion,
stats
)
}
private def buildLocaleEntityIdMemCacheStore(
rawStore: ReadableStore[LocaleEntityId, ThriftSimClustersEmbedding],
embeddingType: EmbeddingType,
modelVersion: ModelVersion
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
val observedStore: ObservedReadableStore[LocaleEntityId, ThriftSimClustersEmbedding] =
ObservedReadableStore(
store = rawStore
)(stats.scope(embeddingType.name).scope(modelVersion.name))
val storeWithKeyMapping = observedStore.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(_, _, InternalId.LocaleEntityId(localeEntityId)) =>
localeEntityId
}
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
storeWithKeyMapping,
cacheClient,
embeddingType,
modelVersion,
stats
)
}
private val underlyingStores: Map[
(EmbeddingType, ModelVersion),
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
] = Map(
// Topic Embeddings
(FavTfgTopic, Model20m145k2020) -> favBasedTfgTopicEmbedding20m145k2020Store,
(LogFavBasedKgoApeTopic, Model20m145k2020) -> logFavBasedApeEntity20M145K2020EmbeddingStore,
)
val topicSimClustersEmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
SimClustersEmbeddingStore.buildWithDecider(
underlyingStores = underlyingStores,
decider = rmsDecider.decider,
statsReceiver = stats
)
}
}

View File

@ -0,0 +1,141 @@
package com.twitter.representation_manager.store
import com.twitter.finagle.memcached.Client
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.representation_manager.common.MemCacheConfig
import com.twitter.representation_manager.common.RepresentationManagerDecider
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
import com.twitter.simclusters_v2.summingbird.stores.PersistentTweetEmbeddingStore
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.twitter.simclusters_v2.thriftscala.ModelVersion._
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import javax.inject.Inject
class TweetSimClustersEmbeddingStore @Inject() (
cacheClient: Client,
globalStats: StatsReceiver,
mhMtlsParams: ManhattanKVClientMtlsParams,
rmsDecider: RepresentationManagerDecider) {
private val stats = globalStats.scope(this.getClass.getSimpleName)
val logFavBasedLongestL2Tweet20M145KUpdatedEmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore =
PersistentTweetEmbeddingStore
.longestL2NormTweetEmbeddingStoreManhattan(
mhMtlsParams,
PersistentTweetEmbeddingStore.LogFavBased20m145kUpdatedDataset,
stats
).mapValues(_.toThrift)
buildMemCacheStore(rawStore, LogFavLongestL2EmbeddingTweet, Model20m145kUpdated)
}
val logFavBasedLongestL2Tweet20M145K2020EmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore =
PersistentTweetEmbeddingStore
.longestL2NormTweetEmbeddingStoreManhattan(
mhMtlsParams,
PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset,
stats
).mapValues(_.toThrift)
buildMemCacheStore(rawStore, LogFavLongestL2EmbeddingTweet, Model20m145k2020)
}
val logFavBased20M145KUpdatedTweetEmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore =
PersistentTweetEmbeddingStore
.mostRecentTweetEmbeddingStoreManhattan(
mhMtlsParams,
PersistentTweetEmbeddingStore.LogFavBased20m145kUpdatedDataset,
stats
).mapValues(_.toThrift)
buildMemCacheStore(rawStore, LogFavBasedTweet, Model20m145kUpdated)
}
val logFavBased20M145K2020TweetEmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore =
PersistentTweetEmbeddingStore
.mostRecentTweetEmbeddingStoreManhattan(
mhMtlsParams,
PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset,
stats
).mapValues(_.toThrift)
buildMemCacheStore(rawStore, LogFavBasedTweet, Model20m145k2020)
}
private def buildMemCacheStore(
rawStore: ReadableStore[TweetId, ThriftSimClustersEmbedding],
embeddingType: EmbeddingType,
modelVersion: ModelVersion
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
val observedStore: ObservedReadableStore[TweetId, ThriftSimClustersEmbedding] =
ObservedReadableStore(
store = rawStore
)(stats.scope(embeddingType.name).scope(modelVersion.name))
val storeWithKeyMapping = observedStore.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(_, _, InternalId.TweetId(tweetId)) =>
tweetId
}
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
storeWithKeyMapping,
cacheClient,
embeddingType,
modelVersion,
stats
)
}
private val underlyingStores: Map[
(EmbeddingType, ModelVersion),
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
] = Map(
// Tweet Embeddings
(LogFavBasedTweet, Model20m145kUpdated) -> logFavBased20M145KUpdatedTweetEmbeddingStore,
(LogFavBasedTweet, Model20m145k2020) -> logFavBased20M145K2020TweetEmbeddingStore,
(
LogFavLongestL2EmbeddingTweet,
Model20m145kUpdated) -> logFavBasedLongestL2Tweet20M145KUpdatedEmbeddingStore,
(
LogFavLongestL2EmbeddingTweet,
Model20m145k2020) -> logFavBasedLongestL2Tweet20M145K2020EmbeddingStore,
)
val tweetSimClustersEmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
SimClustersEmbeddingStore.buildWithDecider(
underlyingStores = underlyingStores,
decider = rmsDecider.decider,
statsReceiver = stats
)
}
}

View File

@ -0,0 +1,602 @@
package com.twitter.representation_manager.store
import com.twitter.contentrecommender.twistly
import com.twitter.finagle.memcached.Client
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.representation_manager.common.MemCacheConfig
import com.twitter.representation_manager.common.RepresentationManagerDecider
import com.twitter.simclusters_v2.common.ModelVersions
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore
import com.twitter.simclusters_v2.summingbird.stores.ProducerClusterEmbeddingReadableStores
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.getStore
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.modelVersionToDatasetMap
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.knownModelVersions
import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.toSimClustersEmbedding
import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.EmbeddingType._
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.twitter.simclusters_v2.thriftscala.ModelVersion._
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Apollo
import com.twitter.storehaus_internal.manhattan.ManhattanCluster
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.strato.thrift.ScroogeConvImplicits._
import com.twitter.tweetypie.util.UserId
import com.twitter.util.Future
import javax.inject.Inject
class UserSimClustersEmbeddingStore @Inject() (
stratoClient: StratoClient,
cacheClient: Client,
globalStats: StatsReceiver,
mhMtlsParams: ManhattanKVClientMtlsParams,
rmsDecider: RepresentationManagerDecider) {
private val stats = globalStats.scope(this.getClass.getSimpleName)
private val favBasedProducer20M145KUpdatedEmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore = ProducerClusterEmbeddingReadableStores
.getProducerTopKSimClustersEmbeddingsStore(
mhMtlsParams
).mapValues { topSimClustersWithScore =>
ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters)
}.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) =>
userId
}
buildMemCacheStore(rawStore, FavBasedProducer, Model20m145kUpdated)
}
private val favBasedProducer20M145K2020EmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore = ProducerClusterEmbeddingReadableStores
.getProducerTopKSimClusters2020EmbeddingsStore(
mhMtlsParams
).mapValues { topSimClustersWithScore =>
ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters)
}.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) =>
userId
}
buildMemCacheStore(rawStore, FavBasedProducer, Model20m145k2020)
}
private val followBasedProducer20M145K2020EmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore = ProducerClusterEmbeddingReadableStores
.getProducerTopKSimClustersEmbeddingsByFollowStore(
mhMtlsParams
).mapValues { topSimClustersWithScore =>
ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters)
}.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) =>
userId
}
buildMemCacheStore(rawStore, FollowBasedProducer, Model20m145k2020)
}
private val logFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore = StratoFetchableStore
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
stratoClient,
"recommendations/simclusters_v2/embeddings/logFavBasedAPE20M145K2020")
.mapValues(embedding => SimClustersEmbedding(embedding, truncate = 50).toThrift)
buildMemCacheStore(rawStore, AggregatableLogFavBasedProducer, Model20m145k2020)
}
private val rawRelaxedLogFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
ThriftSimClustersEmbedding
] = {
StratoFetchableStore
.withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](
stratoClient,
"recommendations/simclusters_v2/embeddings/logFavBasedAPERelaxedFavEngagementThreshold20M145K2020")
.mapValues(embedding => SimClustersEmbedding(embedding, truncate = 50).toThrift)
}
private val relaxedLogFavBasedApe20M145K2020EmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildMemCacheStore(
rawRelaxedLogFavBasedApe20M145K2020EmbeddingStore,
RelaxedAggregatableLogFavBasedProducer,
Model20m145k2020)
}
private val relaxedLogFavBasedApe20m145kUpdatedEmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore = rawRelaxedLogFavBasedApe20M145K2020EmbeddingStore
.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(
RelaxedAggregatableLogFavBasedProducer,
Model20m145kUpdated,
internalId) =>
SimClustersEmbeddingId(
RelaxedAggregatableLogFavBasedProducer,
Model20m145k2020,
internalId)
}
buildMemCacheStore(rawStore, RelaxedAggregatableLogFavBasedProducer, Model20m145kUpdated)
}
private val logFavBasedInterestedInFromAPE20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildUserInterestedInStore(
UserInterestedInReadableStore.defaultIIAPESimClustersEmbeddingStoreWithMtls,
LogFavBasedUserInterestedInFromAPE,
Model20m145k2020)
}
private val followBasedInterestedInFromAPE20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildUserInterestedInStore(
UserInterestedInReadableStore.defaultIIAPESimClustersEmbeddingStoreWithMtls,
FollowBasedUserInterestedInFromAPE,
Model20m145k2020)
}
private val favBasedUserInterestedIn20M145KUpdatedStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildUserInterestedInStore(
UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls,
FavBasedUserInterestedIn,
Model20m145kUpdated)
}
private val favBasedUserInterestedIn20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildUserInterestedInStore(
UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls,
FavBasedUserInterestedIn,
Model20m145k2020)
}
private val followBasedUserInterestedIn20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildUserInterestedInStore(
UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls,
FollowBasedUserInterestedIn,
Model20m145k2020)
}
private val logFavBasedUserInterestedIn20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildUserInterestedInStore(
UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls,
LogFavBasedUserInterestedIn,
Model20m145k2020)
}
private val favBasedUserInterestedInFromPE20M145KUpdatedStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildUserInterestedInStore(
UserInterestedInReadableStore.defaultIIPESimClustersEmbeddingStoreWithMtls,
FavBasedUserInterestedInFromPE,
Model20m145kUpdated)
}
private val twistlyUserInterestedInStore: ReadableStore[
SimClustersEmbeddingId,
ThriftSimClustersEmbedding
] = {
val interestedIn20M145KUpdatedStore = {
UserInterestedInReadableStore.defaultStoreWithMtls(
mhMtlsParams,
modelVersion = ModelVersions.Model20M145KUpdated
)
}
val interestedIn20M145K2020Store = {
UserInterestedInReadableStore.defaultStoreWithMtls(
mhMtlsParams,
modelVersion = ModelVersions.Model20M145K2020
)
}
val interestedInFromPE20M145KUpdatedStore = {
UserInterestedInReadableStore.defaultIIPEStoreWithMtls(
mhMtlsParams,
modelVersion = ModelVersions.Model20M145KUpdated)
}
val simClustersInterestedInStore: ReadableStore[
(UserId, ModelVersion),
ClustersUserIsInterestedIn
] = {
new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] {
override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = {
k match {
case (userId, Model20m145kUpdated) =>
interestedIn20M145KUpdatedStore.get(userId)
case (userId, Model20m145k2020) =>
interestedIn20M145K2020Store.get(userId)
case _ =>
Future.None
}
}
}
}
val simClustersInterestedInFromProducerEmbeddingsStore: ReadableStore[
(UserId, ModelVersion),
ClustersUserIsInterestedIn
] = {
new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] {
override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = {
k match {
case (userId, ModelVersion.Model20m145kUpdated) =>
interestedInFromPE20M145KUpdatedStore.get(userId)
case _ =>
Future.None
}
}
}
}
new twistly.interestedin.EmbeddingStore(
interestedInStore = simClustersInterestedInStore,
interestedInFromProducerEmbeddingStore = simClustersInterestedInFromProducerEmbeddingsStore,
statsReceiver = stats
).mapValues(_.toThrift)
}
private val userNextInterestedIn20m145k2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildUserInterestedInStore(
UserInterestedInReadableStore.defaultNextInterestedInStoreWithMtls,
UserNextInterestedIn,
Model20m145k2020)
}
private val filteredUserInterestedIn20m145kUpdatedStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildMemCacheStore(twistlyUserInterestedInStore, FilteredUserInterestedIn, Model20m145kUpdated)
}
private val filteredUserInterestedIn20m145k2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildMemCacheStore(twistlyUserInterestedInStore, FilteredUserInterestedIn, Model20m145k2020)
}
private val filteredUserInterestedInFromPE20m145kUpdatedStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildMemCacheStore(
twistlyUserInterestedInStore,
FilteredUserInterestedInFromPE,
Model20m145kUpdated)
}
private val unfilteredUserInterestedIn20m145kUpdatedStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildMemCacheStore(
twistlyUserInterestedInStore,
UnfilteredUserInterestedIn,
Model20m145kUpdated)
}
private val unfilteredUserInterestedIn20m145k2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
buildMemCacheStore(twistlyUserInterestedInStore, UnfilteredUserInterestedIn, Model20m145k2020)
}
// [Experimental] User InterestedIn, generated by aggregating IIAPE embedding from AddressBook
private val logFavBasedInterestedMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val datasetName = "addressbook_sims_embedding_iiape_maxpooling"
val appId = "wtf_embedding_apollo"
buildUserInterestedInStoreGeneric(
simClustersEmbeddingStoreWithMtls,
LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE,
Model20m145k2020,
datasetName = datasetName,
appId = appId,
manhattanCluster = Apollo
)
}
private val logFavBasedInterestedAverageAddressBookFromIIAPE20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val datasetName = "addressbook_sims_embedding_iiape_average"
val appId = "wtf_embedding_apollo"
buildUserInterestedInStoreGeneric(
simClustersEmbeddingStoreWithMtls,
LogFavBasedUserInterestedAverageAddressBookFromIIAPE,
Model20m145k2020,
datasetName = datasetName,
appId = appId,
manhattanCluster = Apollo
)
}
private val logFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val datasetName = "addressbook_sims_embedding_iiape_booktype_maxpooling"
val appId = "wtf_embedding_apollo"
buildUserInterestedInStoreGeneric(
simClustersEmbeddingStoreWithMtls,
LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE,
Model20m145k2020,
datasetName = datasetName,
appId = appId,
manhattanCluster = Apollo
)
}
private val logFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val datasetName = "addressbook_sims_embedding_iiape_largestdim_maxpooling"
val appId = "wtf_embedding_apollo"
buildUserInterestedInStoreGeneric(
simClustersEmbeddingStoreWithMtls,
LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE,
Model20m145k2020,
datasetName = datasetName,
appId = appId,
manhattanCluster = Apollo
)
}
private val logFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val datasetName = "addressbook_sims_embedding_iiape_louvain_maxpooling"
val appId = "wtf_embedding_apollo"
buildUserInterestedInStoreGeneric(
simClustersEmbeddingStoreWithMtls,
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE,
Model20m145k2020,
datasetName = datasetName,
appId = appId,
manhattanCluster = Apollo
)
}
private val logFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val datasetName = "addressbook_sims_embedding_iiape_connected_maxpooling"
val appId = "wtf_embedding_apollo"
buildUserInterestedInStoreGeneric(
simClustersEmbeddingStoreWithMtls,
LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE,
Model20m145k2020,
datasetName = datasetName,
appId = appId,
manhattanCluster = Apollo
)
}
/**
* Helper func to build a readable store for some UserInterestedIn embeddings with
* 1. A storeFunc from UserInterestedInReadableStore
* 2. EmbeddingType
* 3. ModelVersion
* 4. MemCacheConfig
* */
private def buildUserInterestedInStore(
storeFunc: (ManhattanKVClientMtlsParams, EmbeddingType, ModelVersion) => ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
],
embeddingType: EmbeddingType,
modelVersion: ModelVersion
): ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore = storeFunc(mhMtlsParams, embeddingType, modelVersion)
.mapValues(_.toThrift)
val observedStore = ObservedReadableStore(
store = rawStore
)(stats.scope(embeddingType.name).scope(modelVersion.name))
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
observedStore,
cacheClient,
embeddingType,
modelVersion,
stats
)
}
private def buildUserInterestedInStoreGeneric(
storeFunc: (ManhattanKVClientMtlsParams, EmbeddingType, ModelVersion, String, String,
ManhattanCluster) => ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
],
embeddingType: EmbeddingType,
modelVersion: ModelVersion,
datasetName: String,
appId: String,
manhattanCluster: ManhattanCluster
): ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
val rawStore =
storeFunc(mhMtlsParams, embeddingType, modelVersion, datasetName, appId, manhattanCluster)
.mapValues(_.toThrift)
val observedStore = ObservedReadableStore(
store = rawStore
)(stats.scope(embeddingType.name).scope(modelVersion.name))
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
observedStore,
cacheClient,
embeddingType,
modelVersion,
stats
)
}
private def simClustersEmbeddingStoreWithMtls(
mhMtlsParams: ManhattanKVClientMtlsParams,
embeddingType: EmbeddingType,
modelVersion: ModelVersion,
datasetName: String,
appId: String,
manhattanCluster: ManhattanCluster
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
if (!modelVersionToDatasetMap.contains(ModelVersions.toKnownForModelVersion(modelVersion))) {
throw new IllegalArgumentException(
"Unknown model version: " + modelVersion + ". Known model versions: " + knownModelVersions)
}
getStore(appId, mhMtlsParams, datasetName, manhattanCluster)
.composeKeyMapping[SimClustersEmbeddingId] {
case SimClustersEmbeddingId(theEmbeddingType, theModelVersion, InternalId.UserId(userId))
if theEmbeddingType == embeddingType && theModelVersion == modelVersion =>
userId
}.mapValues(toSimClustersEmbedding(_, embeddingType))
}
private def buildMemCacheStore(
rawStore: ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding],
embeddingType: EmbeddingType,
modelVersion: ModelVersion
): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
val observedStore = ObservedReadableStore(
store = rawStore
)(stats.scope(embeddingType.name).scope(modelVersion.name))
MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding(
observedStore,
cacheClient,
embeddingType,
modelVersion,
stats
)
}
private val underlyingStores: Map[
(EmbeddingType, ModelVersion),
ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
] = Map(
// KnownFor Embeddings
(FavBasedProducer, Model20m145kUpdated) -> favBasedProducer20M145KUpdatedEmbeddingStore,
(FavBasedProducer, Model20m145k2020) -> favBasedProducer20M145K2020EmbeddingStore,
(FollowBasedProducer, Model20m145k2020) -> followBasedProducer20M145K2020EmbeddingStore,
(AggregatableLogFavBasedProducer, Model20m145k2020) -> logFavBasedApe20M145K2020EmbeddingStore,
(
RelaxedAggregatableLogFavBasedProducer,
Model20m145kUpdated) -> relaxedLogFavBasedApe20m145kUpdatedEmbeddingStore,
(
RelaxedAggregatableLogFavBasedProducer,
Model20m145k2020) -> relaxedLogFavBasedApe20M145K2020EmbeddingStore,
// InterestedIn Embeddings
(
LogFavBasedUserInterestedInFromAPE,
Model20m145k2020) -> logFavBasedInterestedInFromAPE20M145K2020Store,
(
FollowBasedUserInterestedInFromAPE,
Model20m145k2020) -> followBasedInterestedInFromAPE20M145K2020Store,
(FavBasedUserInterestedIn, Model20m145kUpdated) -> favBasedUserInterestedIn20M145KUpdatedStore,
(FavBasedUserInterestedIn, Model20m145k2020) -> favBasedUserInterestedIn20M145K2020Store,
(FollowBasedUserInterestedIn, Model20m145k2020) -> followBasedUserInterestedIn20M145K2020Store,
(LogFavBasedUserInterestedIn, Model20m145k2020) -> logFavBasedUserInterestedIn20M145K2020Store,
(
FavBasedUserInterestedInFromPE,
Model20m145kUpdated) -> favBasedUserInterestedInFromPE20M145KUpdatedStore,
(FilteredUserInterestedIn, Model20m145kUpdated) -> filteredUserInterestedIn20m145kUpdatedStore,
(FilteredUserInterestedIn, Model20m145k2020) -> filteredUserInterestedIn20m145k2020Store,
(
FilteredUserInterestedInFromPE,
Model20m145kUpdated) -> filteredUserInterestedInFromPE20m145kUpdatedStore,
(
UnfilteredUserInterestedIn,
Model20m145kUpdated) -> unfilteredUserInterestedIn20m145kUpdatedStore,
(UnfilteredUserInterestedIn, Model20m145k2020) -> unfilteredUserInterestedIn20m145k2020Store,
(UserNextInterestedIn, Model20m145k2020) -> userNextInterestedIn20m145k2020Store,
(
LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE,
Model20m145k2020) -> logFavBasedInterestedMaxpoolingAddressBookFromIIAPE20M145K2020Store,
(
LogFavBasedUserInterestedAverageAddressBookFromIIAPE,
Model20m145k2020) -> logFavBasedInterestedAverageAddressBookFromIIAPE20M145K2020Store,
(
LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE,
Model20m145k2020) -> logFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE20M145K2020Store,
(
LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE,
Model20m145k2020) -> logFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE20M145K2020Store,
(
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE,
Model20m145k2020) -> logFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE20M145K2020Store,
(
LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE,
Model20m145k2020) -> logFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE20M145K2020Store,
)
val userSimClustersEmbeddingStore: ReadableStore[
SimClustersEmbeddingId,
SimClustersEmbedding
] = {
SimClustersEmbeddingStore.buildWithDecider(
underlyingStores = underlyingStores,
decider = rmsDecider.decider,
statsReceiver = stats
)
}
}

View File

@ -0,0 +1,18 @@
create_thrift_libraries(
base_name = "thrift",
sources = [
"com/twitter/representation_manager/service.thrift",
],
platform = "java8",
tags = [
"bazel-compatible",
],
dependency_roots = [
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift",
],
generate_languages = [
"java",
"scala",
"strato",
],
)

View File

@ -0,0 +1,14 @@
namespace java com.twitter.representation_manager.thriftjava
#@namespace scala com.twitter.representation_manager.thriftscala
#@namespace strato com.twitter.representation_manager
include "com/twitter/simclusters_v2/online_store.thrift"
include "com/twitter/simclusters_v2/identifier.thrift"
/**
* A uniform column view for all kinds of SimClusters based embeddings.
**/
struct SimClustersEmbeddingView {
1: required identifier.EmbeddingType embeddingType
2: required online_store.ModelVersion modelVersion
}(persisted = 'false', hasPersonalData = 'false')