Le code source comme document d’infrastructure: le cas de Yandex

Pierre Depaz - NYU Berlin - pierre.depaz@nyu.edu


Les compréhensions esthétiques du code source (source.enframed.net)

Software Studies et Critical Code Studies: appliquer les humanités au numérique

Un champ naissant et des études de texte restreintes


Un corpus

Forum post announcing the Yandex code leak

Une plateforme dominante

Un acteur dominant de l'économie des plateformes en Russie et pays russophones, cas d'étude de la gouvernance algorithmique (Daucé et Loveluck, 2021; Dovbysh et. al., 2022)


Méthodologie

Un monolithe composé de:

Première passe par eeefff


Approche par niveaux (Cayley, 2002)


Exploration de la base de code avec Open Semantic Search, Zoekt et VS Code.


Maps

Localisation et navigation


Dépendances internationales

        
            geosearch = new L.Control.GeoSearch({
                provider: new L.GeoSearch.Provider.OpenStreetMap()
            }).addTo(map);
        
    
maps/renderer/tools/vmap2js/js/main.js:244

        
            2000-10-14  Dov Grobgeld  
            * Released fribidi-0.1.14.
            * CreateGetType.pl, fribidi_tables.i: Applied a patch received from Roozbeh Pournader  (who has got quite some courage sending an email from Iran to Israel) which updates the character tables according to UnicodeData-3.0.1.txt .
            * fribidi_char_sets.c: Cleaned up the mess and made it compile!
        
    
contrib/fribidi/ChangeLog.old

Opérations de conversion

        
            def fill_graph(builder: TaskGraphBuilder, unused_regions=None):
                for region in osm_src_defs.REGIONS:
                    # region_vendor_mutagen adds suffix "_{region}_{vendor}" to resource names
                    # and adds property propagator to all tasks for properties "shipping_date", "region", "vendor"
                    regional_builder = mutagen.create_region_vendor_mutagen(
                        graph_builder=builder,
                        region=region,
                        vendor=osm_src_defs.VENDOR,
                        external_resources=[
                            osm_borders_src_defs.OSM_BORDERS_SRC.resource_name(osm_borders_src_defs.COUNTRIES_GEOM_TABLE),
                            osm_borders_src_defs.OSM_BORDERS_SRC.resource_name(osm_borders_src_defs.COUNTRIES_COVERAGE_FILE),
                            osm_borders_src_defs.OSM_BORDERS_SRC.resource_name(osm_borders_src_defs.REGIONS_COVERAGE_FILE),
                            osm_borders_src_defs.OSM_BORDERS_SRC.resource_name(osm_borders_src_defs.WATER_REGIONS_COVERAGE_FILE),
                            osm_borders_src_defs.OSM_BORDERS_SRC.resource_name(osm_borders_src_defs.REGIONS_GEOM_TABLE),
                        ]
                    )
            
                    copy_resources.fill_graph(regional_builder)
                    extract_regions.fill_graph(regional_builder, region)
                    osm_to_yt.fill_graph(regional_builder)
        
    
garden/modules/osm_to_yt/graph.py:10

Adapter les frontières

        
            FreeCountryBorders() {
                // Members of Schengen Area
                addBorders({
                    "AT", // Austria
                    "BE", // Belgium
                    "CZ", // Czech Republic
                    "DK", // Denmark
                    "EE", // Estonia
                    "FI", // Finland
                    "FR", // France
                    "DE", // Germany
                    "GR", // Greece
                    "HU", // Hungary
                    "IS", // Iceland
                    "IT", // Italy
                    "LV", // Latvia
                    "LI", // Liechtenstein
                    "LT", // Lithuania
                    "LU", // Luxembourg
                    "MT", // Malta
                    "NL", // Netherlands
                    "NO", // Norway
                    "PL", // Poland
                    "PT", // Portugal
                    "SK", // Slovakia
                    "SI", // Slovenia
                    "ES", // Spain
                    "SE", // Sweden
                    "CH", // Switzerland
        
                    // Non-members of Schengen Area having open borders with it
                    "AD", // Andorra
                    "MC", // Monaco
                    "SM", // San Marino
                    "VA", // Vatican City
                });
        
                // Israel and Palestine (see https://st.yandex-team.ru/MAPSNAVI-5024)
                addBorders({
                    "IL", // Israel
                    "PS", // Palestine
                });
            }
        
    

Personaliser les frontières

        const std::set& disputedViews()
            {
                static const std::set result = {
                    Region::_001,
                    Region::Ru,
                    Region::Ua,
                    Region::Tr,
                    Region::Il,
                };
                return result;
            }
        
    
libs/locale/impl/region_groups.cpp

        region_groups::Disputed disputedGroupId(Region region)
            {
                const auto DISPUTED_DEFAULT = region_groups::Disputed::_001;
                using Element = std::pair;
                static const std::array disputedGroups = {
                    /* Default. Currently equal to Russia */
                    Element{Region::_001, region_groups::Disputed::_001},
                    /* United Nations.
                       Ukrainian Crimea, Donetsk and Lugansk, Kosovo je Serbia,
                       Jerusalem, Syrian Golan, United Cyprus */
                    Element{Region::Un, region_groups::Disputed::Un},
                    Element{Region::Ua, region_groups::Disputed::Un},
                    Element{Region::Lv, region_groups::Disputed::Un},
                    Element{Region::Ee, region_groups::Disputed::Un},
                    Element{Region::Lt, region_groups::Disputed::Un},
                    Element{Region::Ge, region_groups::Disputed::Un},
                    Element{Region::Am, region_groups::Disputed::Un},
                    Element{Region::Md, region_groups::Disputed::Un},
                    Element{Region::Fi, region_groups::Disputed::Un},
                    Element{Region::Kz, region_groups::Disputed::Un},
                    Element{Region::Uz, region_groups::Disputed::Un},
                    Element{Region::By, region_groups::Disputed::Un},
                    Element{Region::Kg, region_groups::Disputed::Un},
                    Element{Region::Fr, region_groups::Disputed::Un},
                    Element{Region::Gb, region_groups::Disputed::Un},
                    Element{Region::Ae, region_groups::Disputed::Un},
                    Element{Region::Rs, region_groups::Disputed::Un},
                    /* Russia.
                       Russian Crimea, Independent DPR, LPR, Kosovo je Serbia,
                       Jerusalem, Syrian Golan, United Cyprus*/ 
                    Element{Region::Ru, region_groups::Disputed::Ru},
                    Element{Region::Su, region_groups::Disputed::Ru},
                    Element{Region::Xd, region_groups::Disputed::Ru},
                    Element{Region::Xl, region_groups::Disputed::Ru},
                    Element{Region::Ab, region_groups::Disputed::Ru},
                    Element{Region::Os, region_groups::Disputed::Ru},
                    /* Turkey.
                       Ukrainian Crimea, Donetsk and Lugansk, Kosovo,
                       Tel Aviv, Syrian Golan, Northern Cyprus, no armenian in Artsakh */
                    Element{Region::Tr, region_groups::Disputed::Tr},
                    Element{Region::Xn, region_groups::Disputed::Tr},
                    Element{Region::Az, region_groups::Disputed::Tr},
                    /* Israel.
                       Ukrainian Crimea, Donetsk and Lugansk, Kosovo je Serbia,
                       Jerusalem, Israeli Golan, United Cyprus */
                    Element{Region::Il, region_groups::Disputed::Il},
                };
                const auto it = std::find_if(
                    disputedGroups.begin(),
                    disputedGroups.end(),
                    [region](const auto& el){ return el.first == region; }
                );
                return it != disputedGroups.end()
                    ? it->second
                    : DISPUTED_DEFAULT;
            }
        
    
libs/locale/impl/region_groups.cpp

        
            const createPoliticsResponse = function (disputedBorders, region, language, quality) {
            return {
                region: region,
                language: language,
                disputedBorders: disputedBorders,
                quality: quality,
                fileName: [disputedBorders, region, language, quality].join('_') + '.json'
                };
            };
        
    
front/services/regions-service/src/lib/utils.js
        
            const {createPoliticsResponse} = require('../lib/utils');

            const disputedBordersToRegions = {
                RU: ['RU', 'UA'],
                UA: ['RU', 'UA'],
                UN: ['RU', 'UA', 'BY', 'KZ', 'TR']
            };

            const handleWorld = function (language, disputedBorders, quality) {
                return createPoliticsResponse(disputedBorders, '001', language, quality);
            };

            const handleCountry = function (region, language, disputedBorders, quality) {
                const availablePolitics = disputedBordersToRegions[disputedBorders];
                if (!availablePolitics || !availablePolitics.includes(region)) {
                    disputedBorders = 'UN';
                }
                return createPoliticsResponse(disputedBorders, region, language, quality);
            };

            const handleAntarctic = function (language, quality) {
                return createPoliticsResponse('UN', 'AQ', language, quality);
            };
        
    
front/services/regions-service/src/lib/politics.v2.js

Spécificité de la Crimée

        
            /**
            * Возвращает куку, отвечающую за статус Крыма.
            *
            * @see https://st.yandex-team.ru/MAPSUI-720
            */
            function getCrimeaStatusCookie(cookies: Record): string | undefined {
                if (!cookies.yp) {
                    return;
                }
                const values = yandexYCookie.parseYpCookie(cookies.yp);
                return values.cr && values.cr.value;
            }
        
    

        
            std::map loadDefaultGeoIdToPrivacyMap()
            {
                return {
                    {168, db::FeaturePrivacy::Public}, // Armenia
                    {149, db::FeaturePrivacy::Public}, // Belarus
                    {169, db::FeaturePrivacy::Public}, // Georgia
                    {159, db::FeaturePrivacy::Public}, // Kazakhstan
                    {207, db::FeaturePrivacy::Public}, // Kirgizia
                    {208, db::FeaturePrivacy::Public}, // Moldova
                    {225, db::FeaturePrivacy::Public}, // Russia
                    // Fixme: because of a bug in Russias boundary in coverage MAPSMRC-2071
                    // we added one of the regions explicitly
                    {10174, db::FeaturePrivacy::Public}, // SAINT_PETERSBURG_AND_LENINGRAD_OBLAST
                    {209, db::FeaturePrivacy::Public}, // Tadzikistan

                    // MAPSMRC-3882 mark all regions adjacent to Ukraine Restricted privacy
                    {11004, db::FeaturePrivacy::Restricted}, // Adygea
                    {10645, db::FeaturePrivacy::Restricted}, // Belgorod region
                    {29632, db::FeaturePrivacy::Restricted}, // Brest region
                    {10650, db::FeaturePrivacy::Restricted}, // Bryanks region
                    {977, db::FeaturePrivacy::Restricted}, // Crimea
                    {959, db::FeaturePrivacy::Restricted}, // Sevastopol
                    {29631, db::FeaturePrivacy::Restricted}, // Gomel region
                    {10995, db::FeaturePrivacy::Restricted}, // Krasnodarskij region
                    {10705, db::FeaturePrivacy::Restricted}, // Kursk region
                    {11029, db::FeaturePrivacy::Restricted}, // Rostovskij region
                    {10672, db::FeaturePrivacy::Restricted}, // Voronej region

                    {181, db::FeaturePrivacy::Secret}, // Israel
                };
            }
        
    
wikimap/mapspro/services/mrc/libs/privacy/impl/region_privacy.cpp

Géopolitique en filigrane


Search

Moteur de recherche (Search), combiné avec un fil d'actualités (News)


Zen, ou l'art du réarrangement

        
            void RearrangeOrganic(TRearrangeParams& rearrangeParams, TMetaGroup& mg) {
                size_t goodDocPos = 0;
                size_t maxDocPos = TypedScheme().RearrangeDepth();
        
                for (size_t j = 0; j < mg.ItemCount() && j <= maxDocPos; ++j) {
                    const TMergedDoc& doc = mg.MetaDocs[j];
                    const NSc::TValue& snippet = doc.SnippetValue()[ZenSnippetName];
                    if (IsZenDocumentStatusIsGood(snippet["SerpData"]["extra_data"]) &&
                            (j == 0 || !ZenInOrganic(rearrangeParams, doc.Url())))
                    {
                        goodDocPos = j;
                        break;
                    }
                }
        
                if (goodDocPos > 0) {
                    rearrangeParams.InsertWorkedRule("rearrange_organic", ToString(goodDocPos));
                    if (TypedScheme().OnlyDumpCanRearrangeOrganic()) {
                        return;
                    }
        
                    mg.MetaDocs[0] = mg.MetaDocs[goodDocPos];
                }
        }
        
    
web/rearrange/zen/zen.cpp

        
            class TRearrangeParams: public TContextParams {
                public:
                    TMergedRes* Result = nullptr;
                    const TMetaGroupingId Current;
                    size_t GroupsRequested = 0;
                    TMaybe ToFetchGroupsCount = Nothing();
                public:
                    TRearrangeParams(TMergedRes* result, const TMetaGroupingId& current);
                    TMetaGrouping* GetRearrangedAlias(const TMetaGroupingId& gId, TAliasRearrOpts opts);
                    virtual TMetaGrouping* GetMergedGrouping(const TMetaGroupingId& gId);
                    size_t GetGroupingDocCount(int prior, const TGroupingIndex& gi) const;
                    size_t GetGroupingDocCount(int prior, const TGroupingIndex& gi, const TSet& actualSources) const;
                    size_t GetGroupingDocCount(int prior, const TMetaGroupingId& gId, const TSet& actualSources) const;
                    //everything from here to "private:" is strange, deprecated or actually doesn't work
                    const IArchiveDocInfo* GetArchiveAccessor(int nGroup, int nDoc, const TGroupingIndex& gi) const;
                    ui64 GetGroupDocCount(int prior, int nGroup, const TGroupingIndex& gi) const;
                    ui64 GetGroupingGroupCount(int prior, const TGroupingIndex& gi) const;
                    //for audiomatch
                    int GetErrorCode() const;
                    void SetErrorCode(yxErrorCode code);
                    void SetErrorText(const TStringBuf& text);
                    //for fasttier and fusion second top
                    void ScheduleFetch(const TMetaGroupingId& gId);
                    void SetNewSnippets(TMergedDoc& doc, const NMetaProtocol::TArchiveInfo& archiveInfo);
                    const TRankModelsMapFactory* GetRankModelsMapFactory() const;
                    void RegisterInitGrouping(TMetaGrouping* grg);
                    void FlushDocumentsData(TMetaGrouping* cur, EFlushDocData options) override {
                        DoFlushDocData(DocMarkers(), cur, options);
                        for (auto& grg: InitGroupings_) {
                            DoFlushDocData(DocMarkers(), grg, options);
                        }
                        InitGroupings_.clear();
                    }
                    void FillDocExtraInfo(TMergedDoc& doc) const override;
                private:
                    THashSet InitGroupings_;
                    THashMap> CurrentRuleInitGroupings_;
                };
        
    
meta/rearrange/rearrange.h

Tests de fonctions et scénarios idéals

        
            void TestL2() {
                TLingBoostChecker ruleChecker{Search};
        
                ruleChecker.IsEnabled = true;
        
                ruleChecker.DoInitRule = [](TLingBoostTestCtx& ctx, TLingBoostRuleCtx&) {
                    ctx.LocalScheme()["UseLightBegemotBundle"] = 1;
                    ctx.LocalScheme()["UseBegemotBundle"] = 1;
                    ctx.LocalScheme()["RestrictJson"] = NSc::TValue::FromJson(
                        "{"
                            "\"default\": ["
                                "{\"Facet\": {}, \"Enabled\": false},"
                                "{\"Facet\": {\"Expansion\": \"XfDtShow\"}, \"Enabled\": true}"
                            "]"
                        "}");
                };
        
                ruleChecker.DoCheckSearch = [](const TMetaRequestAdjusterRef& adjuster) {
                    TStringBuf qbundle{adjuster->ClientFormField("qbundles", 0)};
                    UNIT_ASSERT(qbundle.size() > 1024);
                    UNIT_ASSERT_VALUES_EQUAL(TStringBuf{adjuster->ClientFormField("qbundle", 0)}, TStringBuf("xxx"));
                };
        
                ruleChecker.ExpectSearchRequest = true;
                ruleChecker.ExpectFactorRequest = false;
        
                ruleChecker.Run(TStringBuilder{}
                    << "&user_request=путин"
                    << "&qtree=" << TestData.GetQtree(TestData.PutinQtree)
                    << "&relev=lbqbundle=" << TestData.GetQbundle(TestData.PutinLbQbundle)
                    << "&relev=norm=путин+норм"
                    << "&relev=wizqbundle=xxx",
                    ESearchLevel::L2);
        
                // ua (unknown restrict)
        
                ruleChecker.DoCheckSearch = [](const TMetaRequestAdjusterRef& adjuster) {
                    TStringBuf qbundles{adjuster->ClientFormField("qbundles", 0)};
                    UNIT_ASSERT(qbundles.empty());
                    UNIT_ASSERT_VALUES_EQUAL(TStringBuf{adjuster->ClientFormField("qbundle", 0)}, TStringBuf("xxx"));
                };
        
                ruleChecker.ExpectSearchRequest = true;
                ruleChecker.ExpectFactorRequest = false;
        
                ruleChecker.Run(TStringBuilder{}
                    << "&user_request=путин"
                    << "&relev=relev_locale%3Dua"
                    << "&qtree=" << TestData.GetQtree(TestData.PutinQtree)
                    << "&relev=lbqbundle=yyy"
                    << "&relev=norm=путин+норм"
                    << "&relev=wizqbundle=xxx",
                    ESearchLevel::L2);
             }
        
    
web/rearrange/lingboost/ut/lingboost_ut.cpp

Sources d'information préférentielles

        
            class TStaticNewsRule : public IRearrangeRule {
                public:
                    THashSet HighQualityNewsmakers = {
                        "ria.ru",
                        "rbc.ru",
                        "tass.ru",
                        "kommersant.ru",
                        "rt.com",
                        "iz.ru",
                        "interfax.ru",
                        "lenta.ru",
                        "ura.news",
                        "smotrim.ru",
                        "echo.msk.ru",
                        "sport-express.ru",
                        "kp.ru",
                        "rg.ru",
                        "gazeta.ru",
                        "fontanka.ru",
                        "championat.com",
                        "360tv.ru",
                        "tvrain.ru",
                        "vedomosti.ru",
                        "govoritmoskva.ru",
                        "matchtv.ru",
                        "mk.ru",
                        "tsargrad.tv",
                        "life.ru",
                        "nsn.fm",
                        "sports.ru",
                        "tvc.ru",
                        "m24.ru",
                        "ntv.ru",
                        "ren.tv",
                        "vz.ru",
                        "forbes.ru",
                        "tvzvezda.ru",
                        "rambler.ru",
                        "news.ru",
                        "riafan.ru",
                        "aif.ru",
                        "regnum.ru",
                        "znak.com",
                        "rusfootball.info",
                        "spbdnevnik.ru",
                        "tatar-inform.ru",
                        "sport.ru",
                        "sport24.ru",
                        "metaratings.ru",
                        "rosbalt.ru",
                        "otr-online.ru",
                        "1tv.ru",
                        "business-gazeta.ru"
                    };
                    
                    public:
                    TStaticNewsRule() {
                    }
                    private:
                        IRearrangeRuleContext* DoConstructContext() const override {
                            return new TStaticNewsContext(HighQualityNewsmakers);
                        }
            }
        
    
web/rearrange/stable/stable_news.cpp
        
            void RankDocs(size_t topDocsPerStory,
                      const THashSet* forcedAgencies = nullptr,
                      const bool forceAnnotation = true,
                      const bool pessimizeDups = true)
        {
            TVector rankedDocs;
            rankedDocs.reserve(Docs.size());
            // We need to resort docs by TimeAdjustedMdsWeight because it might have changed
            // thanks to AddWeightsFrom()
            ::Sort(Docs.begin(), Docs.end(), TDoc::TTimeAdjustedMdsWeightGreater(forcedAgencies, forceAnnotation, pessimizeDups));

            // all that matters are 3 top docs (because we don't show more)
            // We want docs with max tail weight but without agency repetitions

            // FIXME: This is a very-very dumb algorithm
            while (rankedDocs.size() < topDocsPerStory && !Docs.empty()) {
                TVector dupDocs; // dups by agency in current iteration
                dupDocs.reserve(Docs.size());
                NFH::TDenseHashMapStaticMarker chosenAgencies;
                for (size_t i = 0; i < Docs.size(); ++i) {
                    if (!chosenAgencies.Value(Docs[i]->Agency, false)) {
                        chosenAgencies[Docs[i]->Agency] = true;
                        rankedDocs.push_back(Docs[i]);
                    } else {
                        dupDocs.push_back(Docs[i]);
                    }
                }
                Docs.swap(dupDocs);
            }
            rankedDocs.insert(rankedDocs.end(), Docs.begin(), Docs.end());
            Docs.swap(rankedDocs);
        }
        
    
news/base/search/rubric.h:224

Modulations géographique de l'information

        
            void TNewsRelevance::CalcFactors(TCalcFactorsContext& ctx) {
                if (ctx.Fast) {
                    return;
                }
                TFactorStorage& factors = *ctx.Factors;
                const SNewsErfInfo& erf = ErfManager.GetErf(ctx.DocId);
            
                factors[FI_NEWS_DUPLICATE] = erf.IsDuplicate;
                factors[FI_NEWS_UCP] = erf.UCP;
                factors[FI_NEWS_TAIL_SELECTED] = erf.TailSelected;
            
                TCateg relevRegion = RP.RelevRegion;
                if (relevRegion == COUNTRY_UKRAINE) {
                    factors[FI_NEWS_STORY_RANK] = erf.StoryRankUA;
                    factors[FI_NEWS_IN_STORY_AGENCY_WEIGHT] = erf.InStoryAgencyWeightUA;
                    factors[FI_NEWS_AGENCY_QUALITY] = erf.AgenQualityUA;
                } else if (relevRegion == COUNTRY_BELARUS) {
                    factors[FI_NEWS_IN_STORY_AGENCY_WEIGHT] = erf.InStoryAgencyWeightBY;
                    factors[FI_NEWS_AGENCY_QUALITY] = erf.AgenQualityBY;
                } else {
                    factors[FI_NEWS_STORY_RANK] = erf.StoryRank;
                    factors[FI_NEWS_IN_STORY_AGENCY_WEIGHT] = erf.InStoryAgencyWeight;
                    factors[FI_NEWS_AGENCY_QUALITY] = erf.AgenQuality;
                }
            
                // Do not calculate dynamic factors if we do not really need them. That's a little bit expensive
                if (IsInternalSearch)
                {
                    factors[FI_NEWS_NO_SELECTIONS] = !factors[FI_NEWS_TAIL_SELECTED];
                    return;
                }
            
                TextFactorsCalcer->CalcFactors(ctx.DocId, ctx.TextHits, factors);
            
                factors[FI_NEWS_NO_SELECTIONS] = !factors[FI_NEWS_TAIL_SELECTED] && !factors[FI_NEWS_WORDS_IN_TITLE];
            }
        
    
extsearch/news/base/search/relevance.cpp

À la recherche de l'actualité


Autres pistes


Questions